@heyai-rules/pilo-masterkit 2.1.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (739) hide show
  1. package/.agent/agents/PILO_MASTER.md +77 -77
  2. package/.agent/agents/architect.md +211 -211
  3. package/.agent/agents/backend-specialist.md +263 -263
  4. package/.agent/agents/build-error-resolver.md +114 -114
  5. package/.agent/agents/chief-of-staff.md +151 -151
  6. package/.agent/agents/code-archaeologist.md +106 -106
  7. package/.agent/agents/code-reviewer.md +237 -237
  8. package/.agent/agents/cpp-build-resolver.md +90 -90
  9. package/.agent/agents/cpp-reviewer.md +72 -72
  10. package/.agent/agents/csharp-reviewer.md +101 -101
  11. package/.agent/agents/dart-build-resolver.md +201 -201
  12. package/.agent/agents/database-architect.md +226 -226
  13. package/.agent/agents/database-reviewer.md +91 -91
  14. package/.agent/agents/debugger.md +225 -225
  15. package/.agent/agents/devops-engineer.md +242 -242
  16. package/.agent/agents/doc-updater.md +107 -107
  17. package/.agent/agents/docs-lookup.md +68 -68
  18. package/.agent/agents/documentation-writer.md +104 -104
  19. package/.agent/agents/e2e-runner.md +107 -107
  20. package/.agent/agents/explorer-agent.md +73 -73
  21. package/.agent/agents/flutter-reviewer.md +243 -243
  22. package/.agent/agents/frontend-specialist.md +593 -593
  23. package/.agent/agents/game-developer.md +162 -162
  24. package/.agent/agents/gan-evaluator.md +209 -209
  25. package/.agent/agents/gan-generator.md +131 -131
  26. package/.agent/agents/gan-planner.md +99 -99
  27. package/.agent/agents/go-build-resolver.md +94 -94
  28. package/.agent/agents/go-reviewer.md +76 -76
  29. package/.agent/agents/harness-optimizer.md +35 -35
  30. package/.agent/agents/healthcare-reviewer.md +83 -83
  31. package/.agent/agents/java-build-resolver.md +153 -153
  32. package/.agent/agents/java-reviewer.md +92 -92
  33. package/.agent/agents/kotlin-build-resolver.md +118 -118
  34. package/.agent/agents/kotlin-reviewer.md +159 -159
  35. package/.agent/agents/loop-operator.md +36 -36
  36. package/.agent/agents/mobile-developer.md +377 -377
  37. package/.agent/agents/opensource-forker.md +198 -198
  38. package/.agent/agents/opensource-packager.md +249 -249
  39. package/.agent/agents/opensource-sanitizer.md +188 -188
  40. package/.agent/agents/orchestrator.md +416 -416
  41. package/.agent/agents/penetration-tester.md +188 -188
  42. package/.agent/agents/performance-optimizer.md +446 -446
  43. package/.agent/agents/personas/athena-agent/agent.json +10 -10
  44. package/.agent/agents/personas/athena-agent/athena-backend-logic-architecture-profile.md +3 -3
  45. package/.agent/agents/personas/athena-agent/context-files/agents.md +1 -1
  46. package/.agent/agents/personas/athena-agent/context-files/identity.md +1 -1
  47. package/.agent/agents/personas/athena-agent/context-files/soul.md +1 -1
  48. package/.agent/agents/personas/athena-agent/context-files/user-predefined.md +1 -1
  49. package/.agent/agents/personas/athena-agent/user-context-files/system/bootstrap.md +1 -1
  50. package/.agent/agents/personas/athena-agent/user-context-files/system/user.md +1 -1
  51. package/.agent/agents/personas/da-vinci-agent/agent.json +10 -10
  52. package/.agent/agents/personas/da-vinci-agent/context-files/agents.md +1 -1
  53. package/.agent/agents/personas/da-vinci-agent/context-files/identity.md +1 -1
  54. package/.agent/agents/personas/da-vinci-agent/context-files/soul.md +1 -1
  55. package/.agent/agents/personas/da-vinci-agent/context-files/user-predefined.md +1 -1
  56. package/.agent/agents/personas/da-vinci-agent/da-vinci-frontend-ui-ux-design-profile.md +3 -3
  57. package/.agent/agents/personas/da-vinci-agent/user-context-files/system/bootstrap.md +1 -1
  58. package/.agent/agents/personas/da-vinci-agent/user-context-files/system/user.md +1 -1
  59. package/.agent/agents/personas/duong-tang-agent/agent.json +10 -10
  60. package/.agent/agents/personas/duong-tang-agent/context-files/agents.md +1 -1
  61. package/.agent/agents/personas/duong-tang-agent/context-files/identity.md +1 -1
  62. package/.agent/agents/personas/duong-tang-agent/context-files/soul.md +1 -1
  63. package/.agent/agents/personas/duong-tang-agent/context-files/user-predefined.md +1 -1
  64. package/.agent/agents/personas/duong-tang-agent/tang-monk-quality-testing-documentation-profile.md +3 -3
  65. package/.agent/agents/personas/duong-tang-agent/user-context-files/system/bootstrap.md +1 -1
  66. package/.agent/agents/personas/duong-tang-agent/user-context-files/system/user.md +1 -1
  67. package/.agent/agents/personas/gia-cat-luong-agent/agent.json +10 -10
  68. package/.agent/agents/personas/gia-cat-luong-agent/context-files/agents.md +1 -1
  69. package/.agent/agents/personas/gia-cat-luong-agent/context-files/identity.md +1 -1
  70. package/.agent/agents/personas/gia-cat-luong-agent/context-files/soul.md +1 -1
  71. package/.agent/agents/personas/gia-cat-luong-agent/context-files/user-predefined.md +1 -1
  72. package/.agent/agents/personas/gia-cat-luong-agent/kongming-research-strategy-analysis-profile.md +3 -3
  73. package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/bootstrap.md +1 -1
  74. package/.agent/agents/personas/gia-cat-luong-agent/user-context-files/system/user.md +1 -1
  75. package/.agent/agents/personas/mihata-agent/agent.json +10 -10
  76. package/.agent/agents/personas/mihata-agent/context-files/agents.md +1 -1
  77. package/.agent/agents/personas/mihata-agent/context-files/identity.md +1 -1
  78. package/.agent/agents/personas/mihata-agent/context-files/soul.md +1 -1
  79. package/.agent/agents/personas/mihata-agent/context-files/user-predefined.md +1 -1
  80. package/.agent/agents/personas/mihata-agent/mihata-multi-agent-orchestration-profile.md +3 -3
  81. package/.agent/agents/personas/mihata-agent/user-context-files/system/bootstrap.md +1 -1
  82. package/.agent/agents/personas/mihata-agent/user-context-files/system/user.md +1 -1
  83. package/.agent/agents/personas/tesla-agent/agent.json +10 -10
  84. package/.agent/agents/personas/tesla-agent/context-files/agents.md +1 -1
  85. package/.agent/agents/personas/tesla-agent/context-files/identity.md +1 -1
  86. package/.agent/agents/personas/tesla-agent/context-files/soul.md +1 -1
  87. package/.agent/agents/personas/tesla-agent/context-files/user-predefined.md +1 -1
  88. package/.agent/agents/personas/tesla-agent/tesla-fullstack-system-optimization-profile.md +3 -3
  89. package/.agent/agents/personas/tesla-agent/user-context-files/system/bootstrap.md +1 -1
  90. package/.agent/agents/personas/tesla-agent/user-context-files/system/user.md +1 -1
  91. package/.agent/agents/personas/tu-ma-y-agent/agent.json +10 -10
  92. package/.agent/agents/personas/tu-ma-y-agent/context-files/agents.md +1 -1
  93. package/.agent/agents/personas/tu-ma-y-agent/context-files/identity.md +1 -1
  94. package/.agent/agents/personas/tu-ma-y-agent/context-files/soul.md +1 -1
  95. package/.agent/agents/personas/tu-ma-y-agent/context-files/user-predefined.md +1 -1
  96. package/.agent/agents/personas/tu-ma-y-agent/simayi-feasibility-risk-control-profile.md +3 -3
  97. package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/bootstrap.md +1 -1
  98. package/.agent/agents/personas/tu-ma-y-agent/user-context-files/system/user.md +1 -1
  99. package/.agent/agents/personas/venti-agent/agent.json +10 -10
  100. package/.agent/agents/personas/venti-agent/context-files/agents.md +1 -1
  101. package/.agent/agents/personas/venti-agent/context-files/identity.md +1 -1
  102. package/.agent/agents/personas/venti-agent/context-files/soul.md +1 -1
  103. package/.agent/agents/personas/venti-agent/context-files/user-predefined.md +1 -1
  104. package/.agent/agents/personas/venti-agent/user-context-files/system/bootstrap.md +1 -1
  105. package/.agent/agents/personas/venti-agent/user-context-files/system/user.md +1 -1
  106. package/.agent/agents/personas/venti-agent/venti-learning-communication-mentoring-profile.md +3 -3
  107. package/.agent/agents/planner.md +212 -212
  108. package/.agent/agents/product-manager.md +112 -112
  109. package/.agent/agents/product-owner.md +95 -95
  110. package/.agent/agents/project-planner.md +406 -406
  111. package/.agent/agents/python-reviewer.md +98 -98
  112. package/.agent/agents/pytorch-build-resolver.md +120 -120
  113. package/.agent/agents/qa-automation-engineer.md +103 -103
  114. package/.agent/agents/refactor-cleaner.md +85 -85
  115. package/.agent/agents/rust-build-resolver.md +148 -148
  116. package/.agent/agents/rust-reviewer.md +94 -94
  117. package/.agent/agents/security-auditor.md +170 -170
  118. package/.agent/agents/security-reviewer.md +108 -108
  119. package/.agent/agents/seo-specialist.md +111 -111
  120. package/.agent/agents/tdd-guide.md +91 -91
  121. package/.agent/agents/test-engineer.md +158 -158
  122. package/.agent/agents/typescript-reviewer.md +112 -112
  123. package/.agent/contexts/dev.md +20 -20
  124. package/.agent/contexts/research.md +26 -26
  125. package/.agent/contexts/review.md +22 -22
  126. package/.agent/hooks/hooks.json +395 -395
  127. package/.agent/hooks/readme.md +222 -222
  128. package/.agent/mcp-configs/mcp-servers.json +181 -181
  129. package/.agent/rules/ARCHITECTURAL_BLUEPRINTS.md +62 -62
  130. package/.agent/rules/CODE_CRAFTSMANSHIP.md +69 -69
  131. package/.agent/rules/CORE_RULES.md +72 -72
  132. package/.agent/rules/PROJECT_MAP.md +58 -58
  133. package/.agent/rules/QUALITY_ASSURANCE.md +54 -54
  134. package/.agent/rules/SECURITY_ARMOR.md +44 -44
  135. package/.agent/rules/VERSION_ORCHESTRATION.md +64 -64
  136. package/.agent/rules/WORKFLOW_ORCHESTRATION.md +55 -55
  137. package/.agent/rules/common/agents.md +50 -50
  138. package/.agent/rules/common/code-review.md +124 -124
  139. package/.agent/rules/common/coding-style.md +48 -48
  140. package/.agent/rules/common/development-workflow.md +44 -44
  141. package/.agent/rules/common/git-workflow.md +24 -24
  142. package/.agent/rules/common/hooks.md +30 -30
  143. package/.agent/rules/common/patterns.md +31 -31
  144. package/.agent/rules/common/performance.md +55 -55
  145. package/.agent/rules/common/security.md +29 -29
  146. package/.agent/rules/common/testing.md +29 -29
  147. package/.agent/rules/cpp/coding-style.md +44 -44
  148. package/.agent/rules/cpp/hooks.md +39 -39
  149. package/.agent/rules/cpp/patterns.md +51 -51
  150. package/.agent/rules/cpp/security.md +51 -51
  151. package/.agent/rules/cpp/testing.md +44 -44
  152. package/.agent/rules/csharp/coding-style.md +72 -72
  153. package/.agent/rules/csharp/hooks.md +25 -25
  154. package/.agent/rules/csharp/patterns.md +50 -50
  155. package/.agent/rules/csharp/security.md +58 -58
  156. package/.agent/rules/csharp/testing.md +46 -46
  157. package/.agent/rules/dart/coding-style.md +159 -159
  158. package/.agent/rules/dart/hooks.md +66 -66
  159. package/.agent/rules/dart/patterns.md +261 -261
  160. package/.agent/rules/dart/security.md +135 -135
  161. package/.agent/rules/dart/testing.md +215 -215
  162. package/.agent/rules/golang/coding-style.md +32 -32
  163. package/.agent/rules/golang/hooks.md +17 -17
  164. package/.agent/rules/golang/patterns.md +45 -45
  165. package/.agent/rules/golang/security.md +34 -34
  166. package/.agent/rules/golang/testing.md +31 -31
  167. package/.agent/rules/java/coding-style.md +114 -114
  168. package/.agent/rules/java/hooks.md +18 -18
  169. package/.agent/rules/java/patterns.md +146 -146
  170. package/.agent/rules/java/security.md +100 -100
  171. package/.agent/rules/java/testing.md +131 -131
  172. package/.agent/rules/kotlin/coding-style.md +86 -86
  173. package/.agent/rules/kotlin/hooks.md +17 -17
  174. package/.agent/rules/kotlin/patterns.md +146 -146
  175. package/.agent/rules/kotlin/security.md +82 -82
  176. package/.agent/rules/kotlin/testing.md +128 -128
  177. package/.agent/rules/perl/coding-style.md +46 -46
  178. package/.agent/rules/perl/hooks.md +22 -22
  179. package/.agent/rules/perl/patterns.md +76 -76
  180. package/.agent/rules/perl/security.md +69 -69
  181. package/.agent/rules/perl/testing.md +54 -54
  182. package/.agent/rules/php/coding-style.md +40 -40
  183. package/.agent/rules/php/hooks.md +24 -24
  184. package/.agent/rules/php/patterns.md +33 -33
  185. package/.agent/rules/php/security.md +37 -37
  186. package/.agent/rules/php/testing.md +39 -39
  187. package/.agent/rules/python/coding-style.md +42 -42
  188. package/.agent/rules/python/hooks.md +19 -19
  189. package/.agent/rules/python/patterns.md +39 -39
  190. package/.agent/rules/python/security.md +30 -30
  191. package/.agent/rules/python/testing.md +38 -38
  192. package/.agent/rules/readme.md +111 -111
  193. package/.agent/rules/rust/coding-style.md +151 -151
  194. package/.agent/rules/rust/hooks.md +16 -16
  195. package/.agent/rules/rust/patterns.md +168 -168
  196. package/.agent/rules/rust/security.md +141 -141
  197. package/.agent/rules/rust/testing.md +154 -154
  198. package/.agent/rules/swift/coding-style.md +47 -47
  199. package/.agent/rules/swift/hooks.md +20 -20
  200. package/.agent/rules/swift/patterns.md +66 -66
  201. package/.agent/rules/swift/security.md +33 -33
  202. package/.agent/rules/swift/testing.md +45 -45
  203. package/.agent/rules/typescript/coding-style.md +199 -199
  204. package/.agent/rules/typescript/hooks.md +22 -22
  205. package/.agent/rules/typescript/patterns.md +52 -52
  206. package/.agent/rules/typescript/security.md +28 -28
  207. package/.agent/rules/typescript/testing.md +18 -18
  208. package/.agent/rules/web/coding-style.md +96 -96
  209. package/.agent/rules/web/design-quality.md +63 -63
  210. package/.agent/rules/web/hooks.md +120 -120
  211. package/.agent/rules/web/patterns.md +79 -79
  212. package/.agent/rules/web/performance.md +64 -64
  213. package/.agent/rules/web/security.md +57 -57
  214. package/.agent/rules/web/testing.md +55 -55
  215. package/.agent/rules/zh/agents.md +50 -50
  216. package/.agent/rules/zh/code-review.md +124 -124
  217. package/.agent/rules/zh/coding-style.md +48 -48
  218. package/.agent/rules/zh/development-workflow.md +44 -44
  219. package/.agent/rules/zh/git-workflow.md +24 -24
  220. package/.agent/rules/zh/hooks.md +30 -30
  221. package/.agent/rules/zh/patterns.md +31 -31
  222. package/.agent/rules/zh/performance.md +55 -55
  223. package/.agent/rules/zh/readme.md +108 -108
  224. package/.agent/rules/zh/security.md +29 -29
  225. package/.agent/rules/zh/testing.md +29 -29
  226. package/.agent/scripts/auto_preview.py +148 -148
  227. package/.agent/scripts/checklist.py +217 -217
  228. package/.agent/scripts/session_manager.py +120 -120
  229. package/.agent/scripts/verify_all.py +327 -327
  230. package/.agent/skills/agent-eval/SKILL.md +145 -145
  231. package/.agent/skills/agent-harness-construction/SKILL.md +73 -73
  232. package/.agent/skills/agent-payment-x402/SKILL.md +178 -178
  233. package/.agent/skills/agentic-engineering/SKILL.md +63 -63
  234. package/.agent/skills/ai-first-engineering/SKILL.md +51 -51
  235. package/.agent/skills/ai-regression-testing/SKILL.md +385 -385
  236. package/.agent/skills/android-clean-architecture/SKILL.md +339 -339
  237. package/.agent/skills/api-design/SKILL.md +523 -523
  238. package/.agent/skills/api-patterns/SKILL.md +81 -81
  239. package/.agent/skills/api-patterns/api-style.md +42 -42
  240. package/.agent/skills/api-patterns/auth.md +24 -24
  241. package/.agent/skills/api-patterns/documentation.md +26 -26
  242. package/.agent/skills/api-patterns/graphql.md +41 -41
  243. package/.agent/skills/api-patterns/rate-limiting.md +31 -31
  244. package/.agent/skills/api-patterns/response.md +37 -37
  245. package/.agent/skills/api-patterns/rest.md +40 -40
  246. package/.agent/skills/api-patterns/scripts/api_validator.py +211 -211
  247. package/.agent/skills/api-patterns/security-testing.md +122 -122
  248. package/.agent/skills/api-patterns/trpc.md +41 -41
  249. package/.agent/skills/api-patterns/versioning.md +22 -22
  250. package/.agent/skills/app-builder/SKILL.md +75 -75
  251. package/.agent/skills/app-builder/agent-coordination.md +71 -71
  252. package/.agent/skills/app-builder/feature-building.md +53 -53
  253. package/.agent/skills/app-builder/project-detection.md +34 -34
  254. package/.agent/skills/app-builder/scaffolding.md +118 -118
  255. package/.agent/skills/app-builder/tech-stack.md +41 -41
  256. package/.agent/skills/app-builder/templates/SKILL.md +39 -39
  257. package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +76 -76
  258. package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +92 -92
  259. package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +88 -88
  260. package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +88 -88
  261. package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +83 -83
  262. package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +90 -90
  263. package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +90 -90
  264. package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +122 -122
  265. package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +122 -122
  266. package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +169 -169
  267. package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +134 -134
  268. package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +83 -83
  269. package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +119 -119
  270. package/.agent/skills/architecture/SKILL.md +55 -55
  271. package/.agent/skills/architecture/context-discovery.md +43 -43
  272. package/.agent/skills/architecture/examples.md +94 -94
  273. package/.agent/skills/architecture/pattern-selection.md +68 -68
  274. package/.agent/skills/architecture/patterns-reference.md +50 -50
  275. package/.agent/skills/architecture/trade-off-analysis.md +77 -77
  276. package/.agent/skills/architecture-decision-records/SKILL.md +179 -179
  277. package/.agent/skills/article-writing/SKILL.md +79 -79
  278. package/.agent/skills/autonomous-agent-harness/SKILL.md +267 -267
  279. package/.agent/skills/autonomous-loops/SKILL.md +610 -610
  280. package/.agent/skills/backend-patterns/SKILL.md +598 -598
  281. package/.agent/skills/bash-linux/SKILL.md +199 -199
  282. package/.agent/skills/behavioral-modes/SKILL.md +242 -242
  283. package/.agent/skills/benchmark/SKILL.md +93 -93
  284. package/.agent/skills/blueprint/SKILL.md +105 -105
  285. package/.agent/skills/brainstorming/SKILL.md +163 -163
  286. package/.agent/skills/brainstorming/dynamic-questioning.md +350 -350
  287. package/.agent/skills/brand-voice/SKILL.md +97 -97
  288. package/.agent/skills/brand-voice/references/voice-profile-schema.md +55 -55
  289. package/.agent/skills/browser-qa/SKILL.md +87 -87
  290. package/.agent/skills/bun-runtime/SKILL.md +84 -84
  291. package/.agent/skills/canary-watch/SKILL.md +99 -99
  292. package/.agent/skills/carrier-relationship-management/SKILL.md +212 -212
  293. package/.agent/skills/ck/SKILL.md +147 -147
  294. package/.agent/skills/ck/commands/forget.mjs +44 -44
  295. package/.agent/skills/ck/commands/info.mjs +24 -24
  296. package/.agent/skills/ck/commands/init.mjs +143 -143
  297. package/.agent/skills/ck/commands/list.mjs +40 -40
  298. package/.agent/skills/ck/commands/migrate.mjs +202 -202
  299. package/.agent/skills/ck/commands/resume.mjs +36 -36
  300. package/.agent/skills/ck/commands/save.mjs +210 -210
  301. package/.agent/skills/ck/commands/shared.mjs +387 -387
  302. package/.agent/skills/ck/hooks/session-start.mjs +224 -224
  303. package/.agent/skills/claude-api/SKILL.md +337 -337
  304. package/.agent/skills/claude-devfleet/SKILL.md +103 -103
  305. package/.agent/skills/clean-code/SKILL.md +201 -201
  306. package/.agent/skills/click-path-audit/SKILL.md +244 -244
  307. package/.agent/skills/clickhouse-io/SKILL.md +439 -439
  308. package/.agent/skills/code-review-checklist/SKILL.md +109 -109
  309. package/.agent/skills/codebase-onboarding/SKILL.md +233 -233
  310. package/.agent/skills/coding-standards/SKILL.md +530 -530
  311. package/.agent/skills/compose-multiplatform-patterns/SKILL.md +299 -299
  312. package/.agent/skills/configure-ecc/SKILL.md +367 -367
  313. package/.agent/skills/connections-optimizer/SKILL.md +189 -189
  314. package/.agent/skills/content-engine/SKILL.md +131 -131
  315. package/.agent/skills/content-hash-cache-pattern/SKILL.md +161 -161
  316. package/.agent/skills/context-budget/SKILL.md +135 -135
  317. package/.agent/skills/continuous-agent-loop/SKILL.md +45 -45
  318. package/.agent/skills/continuous-learning/SKILL.md +119 -119
  319. package/.agent/skills/continuous-learning/config.json +18 -18
  320. package/.agent/skills/continuous-learning/evaluate-session.sh +69 -69
  321. package/.agent/skills/continuous-learning-v2/SKILL.md +365 -365
  322. package/.agent/skills/continuous-learning-v2/agents/observer-loop.sh +271 -271
  323. package/.agent/skills/continuous-learning-v2/agents/observer.md +198 -198
  324. package/.agent/skills/continuous-learning-v2/agents/session-guardian.sh +150 -150
  325. package/.agent/skills/continuous-learning-v2/agents/start-observer.sh +244 -244
  326. package/.agent/skills/continuous-learning-v2/config.json +8 -8
  327. package/.agent/skills/continuous-learning-v2/hooks/observe.sh +428 -428
  328. package/.agent/skills/continuous-learning-v2/scripts/detect-project.sh +228 -228
  329. package/.agent/skills/continuous-learning-v2/scripts/instinct-cli.py +1426 -1426
  330. package/.agent/skills/continuous-learning-v2/scripts/test-parse-instinct.py +984 -984
  331. package/.agent/skills/cost-aware-llm-pipeline/SKILL.md +183 -183
  332. package/.agent/skills/cpp-coding-standards/SKILL.md +723 -723
  333. package/.agent/skills/cpp-testing/SKILL.md +324 -324
  334. package/.agent/skills/crosspost/SKILL.md +111 -111
  335. package/.agent/skills/csharp-testing/SKILL.md +321 -321
  336. package/.agent/skills/customer-billing-ops/SKILL.md +140 -140
  337. package/.agent/skills/customs-trade-compliance/SKILL.md +263 -263
  338. package/.agent/skills/dart-flutter-patterns/SKILL.md +563 -563
  339. package/.agent/skills/data-scraper-agent/SKILL.md +764 -764
  340. package/.agent/skills/database-design/SKILL.md +52 -52
  341. package/.agent/skills/database-design/database-selection.md +43 -43
  342. package/.agent/skills/database-design/indexing.md +39 -39
  343. package/.agent/skills/database-design/migrations.md +48 -48
  344. package/.agent/skills/database-design/optimization.md +36 -36
  345. package/.agent/skills/database-design/orm-selection.md +30 -30
  346. package/.agent/skills/database-design/schema-design.md +56 -56
  347. package/.agent/skills/database-design/scripts/schema_validator.py +172 -172
  348. package/.agent/skills/database-migrations/SKILL.md +429 -429
  349. package/.agent/skills/deep-research/SKILL.md +155 -155
  350. package/.agent/skills/deployment-patterns/SKILL.md +427 -427
  351. package/.agent/skills/deployment-procedures/SKILL.md +241 -241
  352. package/.agent/skills/design-system/SKILL.md +82 -82
  353. package/.agent/skills/django-patterns/SKILL.md +734 -734
  354. package/.agent/skills/django-security/SKILL.md +593 -593
  355. package/.agent/skills/django-tdd/SKILL.md +729 -729
  356. package/.agent/skills/django-verification/SKILL.md +469 -469
  357. package/.agent/skills/dmux-workflows/SKILL.md +191 -191
  358. package/.agent/skills/doc.md +177 -177
  359. package/.agent/skills/docker-patterns/SKILL.md +364 -364
  360. package/.agent/skills/documentation-lookup/SKILL.md +90 -90
  361. package/.agent/skills/documentation-templates/SKILL.md +194 -194
  362. package/.agent/skills/dotnet-patterns/SKILL.md +321 -321
  363. package/.agent/skills/e2e-testing/SKILL.md +326 -326
  364. package/.agent/skills/energy-procurement/SKILL.md +228 -228
  365. package/.agent/skills/enterprise-agent-ops/SKILL.md +50 -50
  366. package/.agent/skills/eval-harness/SKILL.md +270 -270
  367. package/.agent/skills/exa-search/SKILL.md +103 -103
  368. package/.agent/skills/fal-ai-media/SKILL.md +284 -284
  369. package/.agent/skills/flutter-dart-code-review/SKILL.md +435 -435
  370. package/.agent/skills/foundation-models-on-device/SKILL.md +243 -243
  371. package/.agent/skills/frontend-design/SKILL.md +452 -452
  372. package/.agent/skills/frontend-design/animation-guide.md +331 -331
  373. package/.agent/skills/frontend-design/color-system.md +311 -311
  374. package/.agent/skills/frontend-design/decision-trees.md +418 -418
  375. package/.agent/skills/frontend-design/motion-graphics.md +306 -306
  376. package/.agent/skills/frontend-design/scripts/accessibility_checker.py +183 -183
  377. package/.agent/skills/frontend-design/scripts/ux_audit.py +722 -722
  378. package/.agent/skills/frontend-design/typography-system.md +345 -345
  379. package/.agent/skills/frontend-design/ux-psychology.md +1116 -1116
  380. package/.agent/skills/frontend-design/visual-effects.md +383 -383
  381. package/.agent/skills/frontend-patterns/SKILL.md +642 -642
  382. package/.agent/skills/frontend-slides/SKILL.md +184 -184
  383. package/.agent/skills/frontend-slides/style-presets.md +330 -330
  384. package/.agent/skills/game-development/2d-games/SKILL.md +119 -119
  385. package/.agent/skills/game-development/3d-games/SKILL.md +135 -135
  386. package/.agent/skills/game-development/SKILL.md +167 -167
  387. package/.agent/skills/game-development/game-art/SKILL.md +185 -185
  388. package/.agent/skills/game-development/game-audio/SKILL.md +190 -190
  389. package/.agent/skills/game-development/game-design/SKILL.md +129 -129
  390. package/.agent/skills/game-development/mobile-games/SKILL.md +108 -108
  391. package/.agent/skills/game-development/multiplayer/SKILL.md +132 -132
  392. package/.agent/skills/game-development/pc-games/SKILL.md +144 -144
  393. package/.agent/skills/game-development/vr-ar/SKILL.md +123 -123
  394. package/.agent/skills/game-development/web-games/SKILL.md +150 -150
  395. package/.agent/skills/gan-style-harness/SKILL.md +278 -278
  396. package/.agent/skills/geo-fundamentals/SKILL.md +156 -156
  397. package/.agent/skills/geo-fundamentals/scripts/geo_checker.py +289 -289
  398. package/.agent/skills/git-workflow/SKILL.md +715 -715
  399. package/.agent/skills/golang-patterns/SKILL.md +674 -674
  400. package/.agent/skills/golang-testing/SKILL.md +720 -720
  401. package/.agent/skills/google-workspace-ops/SKILL.md +95 -95
  402. package/.agent/skills/healthcare-cdss-patterns/SKILL.md +245 -245
  403. package/.agent/skills/healthcare-emr-patterns/SKILL.md +159 -159
  404. package/.agent/skills/healthcare-eval-harness/SKILL.md +207 -207
  405. package/.agent/skills/healthcare-phi-compliance/SKILL.md +145 -145
  406. package/.agent/skills/hexagonal-architecture/SKILL.md +276 -276
  407. package/.agent/skills/i18n-localization/SKILL.md +154 -154
  408. package/.agent/skills/i18n-localization/scripts/i18n_checker.py +241 -241
  409. package/.agent/skills/intelligent-routing/SKILL.md +335 -335
  410. package/.agent/skills/inventory-demand-planning/SKILL.md +247 -247
  411. package/.agent/skills/investor-materials/SKILL.md +96 -96
  412. package/.agent/skills/investor-outreach/SKILL.md +91 -91
  413. package/.agent/skills/iterative-retrieval/SKILL.md +211 -211
  414. package/.agent/skills/java-coding-standards/SKILL.md +147 -147
  415. package/.agent/skills/jira-integration/SKILL.md +293 -293
  416. package/.agent/skills/jpa-patterns/SKILL.md +151 -151
  417. package/.agent/skills/kotlin-coroutines-flows/SKILL.md +284 -284
  418. package/.agent/skills/kotlin-exposed-patterns/SKILL.md +719 -719
  419. package/.agent/skills/kotlin-ktor-patterns/SKILL.md +689 -689
  420. package/.agent/skills/kotlin-patterns/SKILL.md +711 -711
  421. package/.agent/skills/kotlin-testing/SKILL.md +824 -824
  422. package/.agent/skills/laravel-patterns/SKILL.md +415 -415
  423. package/.agent/skills/laravel-plugin-discovery/SKILL.md +229 -229
  424. package/.agent/skills/laravel-security/SKILL.md +285 -285
  425. package/.agent/skills/laravel-tdd/SKILL.md +283 -283
  426. package/.agent/skills/laravel-verification/SKILL.md +179 -179
  427. package/.agent/skills/lead-intelligence/SKILL.md +321 -321
  428. package/.agent/skills/lead-intelligence/agents/enrichment-agent.md +85 -85
  429. package/.agent/skills/lead-intelligence/agents/mutual-mapper.md +75 -75
  430. package/.agent/skills/lead-intelligence/agents/outreach-drafter.md +98 -98
  431. package/.agent/skills/lead-intelligence/agents/signal-scorer.md +60 -60
  432. package/.agent/skills/lint-and-validate/SKILL.md +45 -45
  433. package/.agent/skills/lint-and-validate/scripts/lint_runner.py +184 -184
  434. package/.agent/skills/lint-and-validate/scripts/type_coverage.py +173 -173
  435. package/.agent/skills/liquid-glass-design/SKILL.md +279 -279
  436. package/.agent/skills/logistics-exception-management/SKILL.md +222 -222
  437. package/.agent/skills/manim-video/SKILL.md +89 -89
  438. package/.agent/skills/manim-video/assets/network-graph-scene.py +52 -52
  439. package/.agent/skills/market-research/SKILL.md +75 -75
  440. package/.agent/skills/mcp-server-patterns/SKILL.md +67 -67
  441. package/.agent/skills/mobile-design/SKILL.md +394 -394
  442. package/.agent/skills/mobile-design/decision-trees.md +516 -516
  443. package/.agent/skills/mobile-design/mobile-backend.md +491 -491
  444. package/.agent/skills/mobile-design/mobile-color-system.md +420 -420
  445. package/.agent/skills/mobile-design/mobile-debugging.md +122 -122
  446. package/.agent/skills/mobile-design/mobile-design-thinking.md +357 -357
  447. package/.agent/skills/mobile-design/mobile-navigation.md +458 -458
  448. package/.agent/skills/mobile-design/mobile-performance.md +767 -767
  449. package/.agent/skills/mobile-design/mobile-testing.md +356 -356
  450. package/.agent/skills/mobile-design/mobile-typography.md +433 -433
  451. package/.agent/skills/mobile-design/platform-android.md +666 -666
  452. package/.agent/skills/mobile-design/platform-ios.md +561 -561
  453. package/.agent/skills/mobile-design/scripts/mobile_audit.py +670 -670
  454. package/.agent/skills/mobile-design/touch-psychology.md +537 -537
  455. package/.agent/skills/nanoclaw-repl/SKILL.md +33 -33
  456. package/.agent/skills/nestjs-patterns/SKILL.md +230 -230
  457. package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +351 -351
  458. package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +240 -240
  459. package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +490 -490
  460. package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +264 -264
  461. package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +581 -581
  462. package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +432 -432
  463. package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +684 -684
  464. package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +150 -150
  465. package/.agent/skills/nextjs-react-expert/9-cache-components.md +103 -103
  466. package/.agent/skills/nextjs-react-expert/SKILL.md +293 -293
  467. package/.agent/skills/nextjs-react-expert/scripts/convert_rules.py +222 -222
  468. package/.agent/skills/nextjs-react-expert/scripts/react_performance_checker.py +252 -252
  469. package/.agent/skills/nextjs-turbopack/SKILL.md +44 -44
  470. package/.agent/skills/nodejs-best-practices/SKILL.md +333 -333
  471. package/.agent/skills/nutrient-document-processing/SKILL.md +167 -167
  472. package/.agent/skills/nuxt4-patterns/SKILL.md +100 -100
  473. package/.agent/skills/openclaw-persona-forge/SKILL.md +296 -296
  474. package/.agent/skills/openclaw-persona-forge/gacha.py +224 -224
  475. package/.agent/skills/openclaw-persona-forge/gacha.sh +5 -5
  476. package/.agent/skills/openclaw-persona-forge/references/avatar-style.md +124 -124
  477. package/.agent/skills/openclaw-persona-forge/references/boundary-rules.md +53 -53
  478. package/.agent/skills/openclaw-persona-forge/references/error-handling.md +53 -53
  479. package/.agent/skills/openclaw-persona-forge/references/identity-tension.md +48 -48
  480. package/.agent/skills/openclaw-persona-forge/references/naming-system.md +39 -39
  481. package/.agent/skills/openclaw-persona-forge/references/output-template.md +166 -166
  482. package/.agent/skills/opensource-pipeline/SKILL.md +255 -255
  483. package/.agent/skills/parallel-agents/SKILL.md +175 -175
  484. package/.agent/skills/performance-profiling/SKILL.md +143 -143
  485. package/.agent/skills/performance-profiling/scripts/lighthouse_audit.py +76 -76
  486. package/.agent/skills/perl-patterns/SKILL.md +504 -504
  487. package/.agent/skills/perl-security/SKILL.md +503 -503
  488. package/.agent/skills/perl-testing/SKILL.md +475 -475
  489. package/.agent/skills/plan-writing/SKILL.md +152 -152
  490. package/.agent/skills/plankton-code-quality/SKILL.md +236 -236
  491. package/.agent/skills/postgres-patterns/SKILL.md +147 -147
  492. package/.agent/skills/powershell-windows/SKILL.md +167 -167
  493. package/.agent/skills/product-lens/SKILL.md +85 -85
  494. package/.agent/skills/production-scheduling/SKILL.md +238 -238
  495. package/.agent/skills/project-flow-ops/SKILL.md +111 -111
  496. package/.agent/skills/project-guidelines-example/SKILL.md +349 -349
  497. package/.agent/skills/prompt-optimizer/SKILL.md +397 -397
  498. package/.agent/skills/python-patterns/SKILL.md +750 -750
  499. package/.agent/skills/python-testing/SKILL.md +816 -816
  500. package/.agent/skills/pytorch-patterns/SKILL.md +396 -396
  501. package/.agent/skills/quality-nonconformance/SKILL.md +260 -260
  502. package/.agent/skills/ralphinho-rfc-pipeline/SKILL.md +67 -67
  503. package/.agent/skills/red-team-tactics/SKILL.md +199 -199
  504. package/.agent/skills/regex-vs-llm-structured-text/SKILL.md +220 -220
  505. package/.agent/skills/remotion-video-creation/SKILL.md +43 -43
  506. package/.agent/skills/remotion-video-creation/rules/3d.md +86 -86
  507. package/.agent/skills/remotion-video-creation/rules/animations.md +29 -29
  508. package/.agent/skills/remotion-video-creation/rules/assets/charts-bar-chart.tsx +173 -173
  509. package/.agent/skills/remotion-video-creation/rules/assets/text-animations-typewriter.tsx +100 -100
  510. package/.agent/skills/remotion-video-creation/rules/assets/text-animations-word-highlight.tsx +108 -108
  511. package/.agent/skills/remotion-video-creation/rules/assets.md +78 -78
  512. package/.agent/skills/remotion-video-creation/rules/audio.md +172 -172
  513. package/.agent/skills/remotion-video-creation/rules/calculate-metadata.md +104 -104
  514. package/.agent/skills/remotion-video-creation/rules/can-decode.md +75 -75
  515. package/.agent/skills/remotion-video-creation/rules/charts.md +58 -58
  516. package/.agent/skills/remotion-video-creation/rules/compositions.md +146 -146
  517. package/.agent/skills/remotion-video-creation/rules/display-captions.md +126 -126
  518. package/.agent/skills/remotion-video-creation/rules/extract-frames.md +229 -229
  519. package/.agent/skills/remotion-video-creation/rules/fonts.md +152 -152
  520. package/.agent/skills/remotion-video-creation/rules/get-audio-duration.md +58 -58
  521. package/.agent/skills/remotion-video-creation/rules/get-video-dimensions.md +68 -68
  522. package/.agent/skills/remotion-video-creation/rules/get-video-duration.md +58 -58
  523. package/.agent/skills/remotion-video-creation/rules/gifs.md +138 -138
  524. package/.agent/skills/remotion-video-creation/rules/images.md +130 -130
  525. package/.agent/skills/remotion-video-creation/rules/import-srt-captions.md +67 -67
  526. package/.agent/skills/remotion-video-creation/rules/lottie.md +67 -67
  527. package/.agent/skills/remotion-video-creation/rules/measuring-dom-nodes.md +34 -34
  528. package/.agent/skills/remotion-video-creation/rules/measuring-text.md +143 -143
  529. package/.agent/skills/remotion-video-creation/rules/sequencing.md +106 -106
  530. package/.agent/skills/remotion-video-creation/rules/tailwind.md +11 -11
  531. package/.agent/skills/remotion-video-creation/rules/text-animations.md +20 -20
  532. package/.agent/skills/remotion-video-creation/rules/timing.md +179 -179
  533. package/.agent/skills/remotion-video-creation/rules/transcribe-captions.md +19 -19
  534. package/.agent/skills/remotion-video-creation/rules/transitions.md +122 -122
  535. package/.agent/skills/remotion-video-creation/rules/trimming.md +52 -52
  536. package/.agent/skills/remotion-video-creation/rules/videos.md +171 -171
  537. package/.agent/skills/repo-scan/SKILL.md +63 -63
  538. package/.agent/skills/returns-reverse-logistics/SKILL.md +240 -240
  539. package/.agent/skills/rules-distill/SKILL.md +264 -264
  540. package/.agent/skills/rules-distill/scripts/scan-rules.sh +58 -58
  541. package/.agent/skills/rules-distill/scripts/scan-skills.sh +129 -129
  542. package/.agent/skills/rust-patterns/SKILL.md +499 -499
  543. package/.agent/skills/rust-pro/SKILL.md +175 -175
  544. package/.agent/skills/rust-testing/SKILL.md +500 -500
  545. package/.agent/skills/safety-guard/SKILL.md +75 -75
  546. package/.agent/skills/santa-method/SKILL.md +306 -306
  547. package/.agent/skills/search-first/SKILL.md +161 -161
  548. package/.agent/skills/security-review/SKILL.md +495 -495
  549. package/.agent/skills/security-review/cloud-infrastructure-security.md +361 -361
  550. package/.agent/skills/security-scan/SKILL.md +165 -165
  551. package/.agent/skills/seo-fundamentals/SKILL.md +129 -129
  552. package/.agent/skills/seo-fundamentals/scripts/seo_checker.py +219 -219
  553. package/.agent/skills/server-management/SKILL.md +161 -161
  554. package/.agent/skills/skill-comply/SKILL.md +58 -58
  555. package/.agent/skills/skill-comply/fixtures/compliant-trace.jsonl +5 -5
  556. package/.agent/skills/skill-comply/fixtures/noncompliant-trace.jsonl +3 -3
  557. package/.agent/skills/skill-comply/fixtures/tdd-spec.yaml +44 -44
  558. package/.agent/skills/skill-comply/prompts/classifier.md +24 -24
  559. package/.agent/skills/skill-comply/prompts/scenario-generator.md +62 -62
  560. package/.agent/skills/skill-comply/prompts/spec-generator.md +42 -42
  561. package/.agent/skills/skill-comply/pyproject.toml +15 -15
  562. package/.agent/skills/skill-comply/scripts/classifier.py +85 -85
  563. package/.agent/skills/skill-comply/scripts/grader.py +122 -122
  564. package/.agent/skills/skill-comply/scripts/parser.py +107 -107
  565. package/.agent/skills/skill-comply/scripts/report.py +170 -170
  566. package/.agent/skills/skill-comply/scripts/run.py +127 -127
  567. package/.agent/skills/skill-comply/scripts/runner.py +161 -161
  568. package/.agent/skills/skill-comply/scripts/scenario-generator.py +70 -70
  569. package/.agent/skills/skill-comply/scripts/spec-generator.py +72 -72
  570. package/.agent/skills/skill-comply/scripts/utils.py +13 -13
  571. package/.agent/skills/skill-comply/tests/test-grader.py +137 -137
  572. package/.agent/skills/skill-comply/tests/test-parser.py +90 -90
  573. package/.agent/skills/skill-stocktake/SKILL.md +193 -193
  574. package/.agent/skills/skill-stocktake/scripts/quick-diff.sh +87 -87
  575. package/.agent/skills/skill-stocktake/scripts/save-results.sh +56 -56
  576. package/.agent/skills/skill-stocktake/scripts/scan.sh +170 -170
  577. package/.agent/skills/social-graph-ranker/SKILL.md +154 -154
  578. package/.agent/skills/springboot-patterns/SKILL.md +314 -314
  579. package/.agent/skills/springboot-security/SKILL.md +272 -272
  580. package/.agent/skills/springboot-tdd/SKILL.md +158 -158
  581. package/.agent/skills/springboot-verification/SKILL.md +231 -231
  582. package/.agent/skills/strategic-compact/SKILL.md +131 -131
  583. package/.agent/skills/strategic-compact/suggest-compact.sh +54 -54
  584. package/.agent/skills/swift-actor-persistence/SKILL.md +143 -143
  585. package/.agent/skills/swift-concurrency-6-2/SKILL.md +216 -216
  586. package/.agent/skills/swift-protocol-di-testing/SKILL.md +190 -190
  587. package/.agent/skills/swiftui-patterns/SKILL.md +259 -259
  588. package/.agent/skills/systematic-debugging/SKILL.md +109 -109
  589. package/.agent/skills/tailwind-patterns/SKILL.md +269 -269
  590. package/.agent/skills/tdd-workflow/SKILL.md +463 -463
  591. package/.agent/skills/team-builder/SKILL.md +168 -168
  592. package/.agent/skills/testing-patterns/SKILL.md +178 -178
  593. package/.agent/skills/testing-patterns/scripts/test_runner.py +219 -219
  594. package/.agent/skills/token-budget-advisor/SKILL.md +133 -133
  595. package/.agent/skills/ui-demo/SKILL.md +465 -465
  596. package/.agent/skills/ui-ux-pro-max/SKILL.md +292 -292
  597. package/.agent/skills/ui-ux-pro-max/data/charts.csv +26 -26
  598. package/.agent/skills/ui-ux-pro-max/data/colors.csv +97 -97
  599. package/.agent/skills/ui-ux-pro-max/data/icons.csv +101 -101
  600. package/.agent/skills/ui-ux-pro-max/data/landing.csv +31 -31
  601. package/.agent/skills/ui-ux-pro-max/data/products.csv +96 -96
  602. package/.agent/skills/ui-ux-pro-max/data/react-performance.csv +45 -45
  603. package/.agent/skills/ui-ux-pro-max/data/stacks/astro.csv +54 -54
  604. package/.agent/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -53
  605. package/.agent/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -56
  606. package/.agent/skills/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -53
  607. package/.agent/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -53
  608. package/.agent/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -51
  609. package/.agent/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -59
  610. package/.agent/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -52
  611. package/.agent/skills/ui-ux-pro-max/data/stacks/react.csv +54 -54
  612. package/.agent/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -61
  613. package/.agent/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -54
  614. package/.agent/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -51
  615. package/.agent/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -50
  616. package/.agent/skills/ui-ux-pro-max/data/styles.csv +68 -68
  617. package/.agent/skills/ui-ux-pro-max/data/typography.csv +57 -57
  618. package/.agent/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -101
  619. package/.agent/skills/ui-ux-pro-max/data/ux-guidelines.csv +99 -99
  620. package/.agent/skills/ui-ux-pro-max/data/web-interface.csv +31 -31
  621. package/.agent/skills/ui-ux-pro-max/scripts/core.py +253 -253
  622. package/.agent/skills/ui-ux-pro-max/scripts/design_system.py +1067 -1067
  623. package/.agent/skills/ui-ux-pro-max/scripts/search.py +114 -114
  624. package/.agent/skills/verification-loop/SKILL.md +126 -126
  625. package/.agent/skills/video-editing/SKILL.md +310 -310
  626. package/.agent/skills/videodb/SKILL.md +374 -374
  627. package/.agent/skills/videodb/reference/api-reference.md +550 -550
  628. package/.agent/skills/videodb/reference/capture-reference.md +407 -407
  629. package/.agent/skills/videodb/reference/capture.md +101 -101
  630. package/.agent/skills/videodb/reference/editor.md +443 -443
  631. package/.agent/skills/videodb/reference/generative.md +331 -331
  632. package/.agent/skills/videodb/reference/rtstream-reference.md +564 -564
  633. package/.agent/skills/videodb/reference/rtstream.md +65 -65
  634. package/.agent/skills/videodb/reference/search.md +230 -230
  635. package/.agent/skills/videodb/reference/streaming.md +406 -406
  636. package/.agent/skills/videodb/reference/use-cases.md +118 -118
  637. package/.agent/skills/videodb/scripts/ws-listener.py +282 -282
  638. package/.agent/skills/visa-doc-translate/SKILL.md +117 -117
  639. package/.agent/skills/visa-doc-translate/readme.md +86 -86
  640. package/.agent/skills/vulnerability-scanner/SKILL.md +276 -276
  641. package/.agent/skills/vulnerability-scanner/checklists.md +121 -121
  642. package/.agent/skills/vulnerability-scanner/scripts/security_scan.py +458 -458
  643. package/.agent/skills/web-design-guidelines/SKILL.md +57 -57
  644. package/.agent/skills/webapp-testing/SKILL.md +187 -187
  645. package/.agent/skills/webapp-testing/scripts/playwright_runner.py +173 -173
  646. package/.agent/skills/workspace-surface-audit/SKILL.md +125 -125
  647. package/.agent/skills/x-api/SKILL.md +230 -230
  648. package/.agent/tasks/lessons.md +40 -40
  649. package/.agent/tasks/todo.md +33 -33
  650. package/.agent/tasks/two-track-merge-contract.md +1 -1
  651. package/.agent/workflows/aside.md +164 -164
  652. package/.agent/workflows/brainstorm.md +113 -113
  653. package/.agent/workflows/build-fix.md +62 -62
  654. package/.agent/workflows/checkpoint.md +74 -74
  655. package/.agent/workflows/claw.md +23 -23
  656. package/.agent/workflows/clean-memory.md +34 -34
  657. package/.agent/workflows/code-review.md +289 -289
  658. package/.agent/workflows/context-budget.md +23 -23
  659. package/.agent/workflows/cpp-build.md +173 -173
  660. package/.agent/workflows/cpp-review.md +132 -132
  661. package/.agent/workflows/cpp-test.md +251 -251
  662. package/.agent/workflows/create.md +59 -59
  663. package/.agent/workflows/debug.md +103 -103
  664. package/.agent/workflows/deploy.md +176 -176
  665. package/.agent/workflows/devfleet.md +23 -23
  666. package/.agent/workflows/docs.md +23 -23
  667. package/.agent/workflows/e2e.md +268 -268
  668. package/.agent/workflows/enhance.md +63 -63
  669. package/.agent/workflows/eval.md +23 -23
  670. package/.agent/workflows/evolve.md +178 -178
  671. package/.agent/workflows/flutter-build.md +164 -164
  672. package/.agent/workflows/flutter-review.md +116 -116
  673. package/.agent/workflows/flutter-test.md +144 -144
  674. package/.agent/workflows/gan-build.md +99 -99
  675. package/.agent/workflows/gan-design.md +35 -35
  676. package/.agent/workflows/go-build.md +183 -183
  677. package/.agent/workflows/go-review.md +148 -148
  678. package/.agent/workflows/go-test.md +268 -268
  679. package/.agent/workflows/gradle-build.md +70 -70
  680. package/.agent/workflows/harness-audit.md +73 -73
  681. package/.agent/workflows/init-docs.md +46 -46
  682. package/.agent/workflows/instinct-export.md +66 -66
  683. package/.agent/workflows/instinct-import.md +114 -114
  684. package/.agent/workflows/instinct-status.md +59 -59
  685. package/.agent/workflows/jira.md +106 -106
  686. package/.agent/workflows/kotlin-build.md +174 -174
  687. package/.agent/workflows/kotlin-review.md +140 -140
  688. package/.agent/workflows/kotlin-test.md +312 -312
  689. package/.agent/workflows/learn-eval.md +116 -116
  690. package/.agent/workflows/learn.md +70 -70
  691. package/.agent/workflows/loop-start.md +32 -32
  692. package/.agent/workflows/loop-status.md +24 -24
  693. package/.agent/workflows/model-route.md +26 -26
  694. package/.agent/workflows/multi-backend.md +158 -158
  695. package/.agent/workflows/multi-execute.md +315 -315
  696. package/.agent/workflows/multi-frontend.md +158 -158
  697. package/.agent/workflows/multi-plan.md +268 -268
  698. package/.agent/workflows/multi-workflow.md +191 -191
  699. package/.agent/workflows/orchestrate.md +135 -135
  700. package/.agent/workflows/plan.md +117 -117
  701. package/.agent/workflows/pm2.md +272 -272
  702. package/.agent/workflows/preview.md +81 -81
  703. package/.agent/workflows/projects.md +39 -39
  704. package/.agent/workflows/promote.md +41 -41
  705. package/.agent/workflows/prompt-optimize.md +23 -23
  706. package/.agent/workflows/prp-commit.md +112 -112
  707. package/.agent/workflows/prp-implement.md +385 -385
  708. package/.agent/workflows/prp-plan.md +502 -502
  709. package/.agent/workflows/prp-pr.md +184 -184
  710. package/.agent/workflows/prp-prd.md +447 -447
  711. package/.agent/workflows/prune.md +31 -31
  712. package/.agent/workflows/python-review.md +297 -297
  713. package/.agent/workflows/quality-gate.md +29 -29
  714. package/.agent/workflows/refactor-clean.md +80 -80
  715. package/.agent/workflows/resume-session.md +156 -156
  716. package/.agent/workflows/rules-distill.md +20 -20
  717. package/.agent/workflows/rust-build.md +187 -187
  718. package/.agent/workflows/rust-review.md +142 -142
  719. package/.agent/workflows/rust-test.md +308 -308
  720. package/.agent/workflows/santa-loop.md +175 -175
  721. package/.agent/workflows/save-session.md +275 -275
  722. package/.agent/workflows/sessions.md +333 -333
  723. package/.agent/workflows/setup-pm.md +80 -80
  724. package/.agent/workflows/skill-create.md +174 -174
  725. package/.agent/workflows/skill-health.md +54 -54
  726. package/.agent/workflows/status.md +86 -86
  727. package/.agent/workflows/tdd.md +231 -231
  728. package/.agent/workflows/test-coverage.md +69 -69
  729. package/.agent/workflows/test.md +144 -144
  730. package/.agent/workflows/ui-ux-pro-max.md +295 -295
  731. package/.agent/workflows/update-codemaps.md +72 -72
  732. package/.agent/workflows/update-docs.md +84 -84
  733. package/.agent/workflows/verify.md +23 -23
  734. package/LICENSE +176 -176
  735. package/README.md +144 -144
  736. package/package.json +1 -1
  737. package/scripts/release-check.js +55 -55
  738. package/src/bin/cli.js +424 -354
  739. package/src/lib/installer.js +223 -11
@@ -1,764 +1,764 @@
1
- ---
2
- name: data-scraper-agent
3
- description: Build a fully automated AI-powered data collection agent for any public source — job boards, prices, news, GitHub, sports, anything. Scrapes on a schedule, enriches data with a free LLM (Gemini Flash), stores results in Notion/Sheets/Supabase, and learns from user feedback. Runs 100% free on GitHub Actions. Use when the user wants to monitor, collect, or track any public data automatically.
4
- origin: community
5
- ---
6
-
7
- # Data Scraper Agent
8
-
9
- Build a production-ready, AI-powered data collection agent for any public data source.
10
- Runs on a schedule, enriches results with a free LLM, stores to a database, and improves over time.
11
-
12
- **Stack: Python · Gemini Flash (free) · GitHub Actions (free) · Notion / Sheets / Supabase**
13
-
14
- ## When to Activate
15
-
16
- - User wants to scrape or monitor any public website or API
17
- - User says "build a bot that checks...", "monitor X for me", "collect data from..."
18
- - User wants to track jobs, prices, news, repos, sports scores, events, listings
19
- - User asks how to automate data collection without paying for hosting
20
- - User wants an agent that gets smarter over time based on their decisions
21
-
22
- ## Core Concepts
23
-
24
- ### The Three Layers
25
-
26
- Every data scraper agent has three layers:
27
-
28
- ```
29
- COLLECT → ENRICH → STORE
30
- │ │ │
31
- Scraper AI (LLM) Database
32
- runs on scores/ Notion /
33
- schedule summarises Sheets /
34
- & classifies Supabase
35
- ```
36
-
37
- ### Free Stack
38
-
39
- | Layer | Tool | Why |
40
- |---|---|---|
41
- | **Scraping** | `requests` + `BeautifulSoup` | No cost, covers 80% of public sites |
42
- | **JS-rendered sites** | `playwright` (free) | When HTML scraping fails |
43
- | **AI enrichment** | Gemini Flash via REST API | 500 req/day, 1M tokens/day — free |
44
- | **Storage** | Notion API | Free tier, great UI for review |
45
- | **Schedule** | GitHub Actions cron | Free for public repos |
46
- | **Learning** | JSON feedback file in repo | Zero infra, persists in git |
47
-
48
- ### AI Model Fallback Chain
49
-
50
- Build agents to auto-fallback across Gemini models on quota exhaustion:
51
-
52
- ```
53
- gemini-2.0-flash-lite (30 RPM) →
54
- gemini-2.0-flash (15 RPM) →
55
- gemini-2.5-flash (10 RPM) →
56
- gemini-flash-lite-latest (fallback)
57
- ```
58
-
59
- ### Batch API Calls for Efficiency
60
-
61
- Never call the LLM once per item. Always batch:
62
-
63
- ```python
64
- # BAD: 33 API calls for 33 items
65
- for item in items:
66
- result = call_ai(item) # 33 calls → hits rate limit
67
-
68
- # GOOD: 7 API calls for 33 items (batch size 5)
69
- for batch in chunks(items, size=5):
70
- results = call_ai(batch) # 7 calls → stays within free tier
71
- ```
72
-
73
- ---
74
-
75
- ## Workflow
76
-
77
- ### Step 1: Understand the Goal
78
-
79
- Ask the user:
80
-
81
- 1. **What to collect:** "What data source? URL / API / RSS / public endpoint?"
82
- 2. **What to extract:** "What fields matter? Title, price, URL, date, score?"
83
- 3. **How to store:** "Where should results go? Notion, Google Sheets, Supabase, or local file?"
84
- 4. **How to enrich:** "Do you want AI to score, summarise, classify, or match each item?"
85
- 5. **Frequency:** "How often should it run? Every hour, daily, weekly?"
86
-
87
- Common examples to prompt:
88
- - Job boards → score relevance to resume
89
- - Product prices → alert on drops
90
- - GitHub repos → summarise new releases
91
- - News feeds → classify by topic + sentiment
92
- - Sports results → extract stats to tracker
93
- - Events calendar → filter by interest
94
-
95
- ---
96
-
97
- ### Step 2: Design the Agent Architecture
98
-
99
- Generate this directory structure for the user:
100
-
101
- ```
102
- my-agent/
103
- ├── config.yaml # User customises this (keywords, filters, preferences)
104
- ├── profile/
105
- │ └── context.md # User context the AI uses (resume, interests, criteria)
106
- ├── scraper/
107
- │ ├── __init__.py
108
- │ ├── main.py # Orchestrator: scrape → enrich → store
109
- │ ├── filters.py # Rule-based pre-filter (fast, before AI)
110
- │ └── sources/
111
- │ ├── __init__.py
112
- │ └── source_name.py # One file per data source
113
- ├── ai/
114
- │ ├── __init__.py
115
- │ ├── client.py # Gemini REST client with model fallback
116
- │ ├── pipeline.py # Batch AI analysis
117
- │ ├── jd_fetcher.py # Fetch full content from URLs (optional)
118
- │ └── memory.py # Learn from user feedback
119
- ├── storage/
120
- │ ├── __init__.py
121
- │ └── notion_sync.py # Or sheets_sync.py / supabase_sync.py
122
- ├── data/
123
- │ └── feedback.json # User decision history (auto-updated)
124
- ├── .env.example
125
- ├── setup.py # One-time DB/schema creation
126
- ├── enrich_existing.py # Backfill AI scores on old rows
127
- ├── requirements.txt
128
- └── .github/
129
- └── workflows/
130
- └── scraper.yml # GitHub Actions schedule
131
- ```
132
-
133
- ---
134
-
135
- ### Step 3: Build the Scraper Source
136
-
137
- Template for any data source:
138
-
139
- ```python
140
- # scraper/sources/my_source.py
141
- """
142
- [Source Name] — scrapes [what] from [where].
143
- Method: [REST API / HTML scraping / RSS feed]
144
- """
145
- import requests
146
- from bs4 import BeautifulSoup
147
- from datetime import datetime, timezone
148
- from scraper.filters import is_relevant
149
-
150
- HEADERS = {
151
- "User-Agent": "Mozilla/5.0 (compatible; research-bot/1.0)",
152
- }
153
-
154
-
155
- def fetch() -> list[dict]:
156
- """
157
- Returns a list of items with consistent schema.
158
- Each item must have at minimum: name, url, date_found.
159
- """
160
- results = []
161
-
162
- # ---- REST API source ----
163
- resp = requests.get("https://api.example.com/items", headers=HEADERS, timeout=15)
164
- if resp.status_code == 200:
165
- for item in resp.json().get("results", []):
166
- if not is_relevant(item.get("title", "")):
167
- continue
168
- results.append(_normalise(item))
169
-
170
- return results
171
-
172
-
173
- def _normalise(raw: dict) -> dict:
174
- """Convert raw API/HTML data to the standard schema."""
175
- return {
176
- "name": raw.get("title", ""),
177
- "url": raw.get("link", ""),
178
- "source": "MySource",
179
- "date_found": datetime.now(timezone.utc).date().isoformat(),
180
- # add domain-specific fields here
181
- }
182
- ```
183
-
184
- **HTML scraping pattern:**
185
- ```python
186
- soup = BeautifulSoup(resp.text, "lxml")
187
- for card in soup.select("[class*='listing']"):
188
- title = card.select_one("h2, h3").get_text(strip=True)
189
- link = card.select_one("a")["href"]
190
- if not link.startswith("http"):
191
- link = f"https://example.com{link}"
192
- ```
193
-
194
- **RSS feed pattern:**
195
- ```python
196
- import xml.etree.ElementTree as ET
197
- root = ET.fromstring(resp.text)
198
- for item in root.findall(".//item"):
199
- title = item.findtext("title", "")
200
- link = item.findtext("link", "")
201
- ```
202
-
203
- ---
204
-
205
- ### Step 4: Build the Gemini AI Client
206
-
207
- ```python
208
- # ai/client.py
209
- import os, json, time, requests
210
-
211
- _last_call = 0.0
212
-
213
- MODEL_FALLBACK = [
214
- "gemini-2.0-flash-lite",
215
- "gemini-2.0-flash",
216
- "gemini-2.5-flash",
217
- "gemini-flash-lite-latest",
218
- ]
219
-
220
-
221
- def generate(prompt: str, model: str = "", rate_limit: float = 7.0) -> dict:
222
- """Call Gemini with auto-fallback on 429. Returns parsed JSON or {}."""
223
- global _last_call
224
-
225
- api_key = os.environ.get("GEMINI_API_KEY", "")
226
- if not api_key:
227
- return {}
228
-
229
- elapsed = time.time() - _last_call
230
- if elapsed < rate_limit:
231
- time.sleep(rate_limit - elapsed)
232
-
233
- models = [model] + [m for m in MODEL_FALLBACK if m != model] if model else MODEL_FALLBACK
234
- _last_call = time.time()
235
-
236
- for m in models:
237
- url = f"https://generativelanguage.googleapis.com/v1beta/models/{m}:generateContent?key={api_key}"
238
- payload = {
239
- "contents": [{"parts": [{"text": prompt}]}],
240
- "generationConfig": {
241
- "responseMimeType": "application/json",
242
- "temperature": 0.3,
243
- "maxOutputTokens": 2048,
244
- },
245
- }
246
- try:
247
- resp = requests.post(url, json=payload, timeout=30)
248
- if resp.status_code == 200:
249
- return _parse(resp)
250
- if resp.status_code in (429, 404):
251
- time.sleep(1)
252
- continue
253
- return {}
254
- except requests.RequestException:
255
- return {}
256
-
257
- return {}
258
-
259
-
260
- def _parse(resp) -> dict:
261
- try:
262
- text = (
263
- resp.json()
264
- .get("candidates", [{}])[0]
265
- .get("content", {})
266
- .get("parts", [{}])[0]
267
- .get("text", "")
268
- .strip()
269
- )
270
- if text.startswith("```"):
271
- text = text.split("\n", 1)[-1].rsplit("```", 1)[0]
272
- return json.loads(text)
273
- except (json.JSONDecodeError, KeyError):
274
- return {}
275
- ```
276
-
277
- ---
278
-
279
- ### Step 5: Build the AI Pipeline (Batch)
280
-
281
- ```python
282
- # ai/pipeline.py
283
- import json
284
- import yaml
285
- from pathlib import Path
286
- from ai.client import generate
287
-
288
- def analyse_batch(items: list[dict], context: str = "", preference_prompt: str = "") -> list[dict]:
289
- """Analyse items in batches. Returns items enriched with AI fields."""
290
- config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
291
- model = config.get("ai", {}).get("model", "gemini-2.5-flash")
292
- rate_limit = config.get("ai", {}).get("rate_limit_seconds", 7.0)
293
- min_score = config.get("ai", {}).get("min_score", 0)
294
- batch_size = config.get("ai", {}).get("batch_size", 5)
295
-
296
- batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
297
- print(f" [AI] {len(items)} items → {len(batches)} API calls")
298
-
299
- enriched = []
300
- for i, batch in enumerate(batches):
301
- print(f" [AI] Batch {i + 1}/{len(batches)}...")
302
- prompt = _build_prompt(batch, context, preference_prompt, config)
303
- result = generate(prompt, model=model, rate_limit=rate_limit)
304
-
305
- analyses = result.get("analyses", [])
306
- for j, item in enumerate(batch):
307
- ai = analyses[j] if j < len(analyses) else {}
308
- if ai:
309
- score = max(0, min(100, int(ai.get("score", 0))))
310
- if min_score and score < min_score:
311
- continue
312
- enriched.append({**item, "ai_score": score, "ai_summary": ai.get("summary", ""), "ai_notes": ai.get("notes", "")})
313
- else:
314
- enriched.append(item)
315
-
316
- return enriched
317
-
318
-
319
- def _build_prompt(batch, context, preference_prompt, config):
320
- priorities = config.get("priorities", [])
321
- items_text = "\n\n".join(
322
- f"Item {i+1}: {json.dumps({k: v for k, v in item.items() if not k.startswith('_')})}"
323
- for i, item in enumerate(batch)
324
- )
325
-
326
- return f"""Analyse these {len(batch)} items and return a JSON object.
327
-
328
- # Items
329
- {items_text}
330
-
331
- # User Context
332
- {context[:800] if context else "Not provided"}
333
-
334
- # User Priorities
335
- {chr(10).join(f"- {p}" for p in priorities)}
336
-
337
- {preference_prompt}
338
-
339
- # Instructions
340
- Return: {{"analyses": [{{"score": <0-100>, "summary": "<2 sentences>", "notes": "<why this matches or doesn't>"}} for each item in order]}}
341
- Be concise. Score 90+=excellent match, 70-89=good, 50-69=ok, <50=weak."""
342
- ```
343
-
344
- ---
345
-
346
- ### Step 6: Build the Feedback Learning System
347
-
348
- ```python
349
- # ai/memory.py
350
- """Learn from user decisions to improve future scoring."""
351
- import json
352
- from pathlib import Path
353
-
354
- FEEDBACK_PATH = Path(__file__).parent.parent / "data" / "feedback.json"
355
-
356
-
357
- def load_feedback() -> dict:
358
- if FEEDBACK_PATH.exists():
359
- try:
360
- return json.loads(FEEDBACK_PATH.read_text())
361
- except (json.JSONDecodeError, OSError):
362
- pass
363
- return {"positive": [], "negative": []}
364
-
365
-
366
- def save_feedback(fb: dict):
367
- FEEDBACK_PATH.parent.mkdir(parents=True, exist_ok=True)
368
- FEEDBACK_PATH.write_text(json.dumps(fb, indent=2))
369
-
370
-
371
- def build_preference_prompt(feedback: dict, max_examples: int = 15) -> str:
372
- """Convert feedback history into a prompt bias section."""
373
- lines = []
374
- if feedback.get("positive"):
375
- lines.append("# Items the user LIKED (positive signal):")
376
- for e in feedback["positive"][-max_examples:]:
377
- lines.append(f"- {e}")
378
- if feedback.get("negative"):
379
- lines.append("\n# Items the user SKIPPED/REJECTED (negative signal):")
380
- for e in feedback["negative"][-max_examples:]:
381
- lines.append(f"- {e}")
382
- if lines:
383
- lines.append("\nUse these patterns to bias scoring on new items.")
384
- return "\n".join(lines)
385
- ```
386
-
387
- **Integration with your storage layer:** after each run, query your DB for items with positive/negative status and call `save_feedback()` with the extracted patterns.
388
-
389
- ---
390
-
391
- ### Step 7: Build Storage (Notion example)
392
-
393
- ```python
394
- # storage/notion_sync.py
395
- import os
396
- from notion_client import Client
397
- from notion_client.errors import APIResponseError
398
-
399
- _client = None
400
-
401
- def get_client():
402
- global _client
403
- if _client is None:
404
- _client = Client(auth=os.environ["NOTION_TOKEN"])
405
- return _client
406
-
407
- def get_existing_urls(db_id: str) -> set[str]:
408
- """Fetch all URLs already stored — used for deduplication."""
409
- client, seen, cursor = get_client(), set(), None
410
- while True:
411
- resp = client.databases.query(database_id=db_id, page_size=100, **{"start_cursor": cursor} if cursor else {})
412
- for page in resp["results"]:
413
- url = page["properties"].get("URL", {}).get("url", "")
414
- if url: seen.add(url)
415
- if not resp["has_more"]: break
416
- cursor = resp["next_cursor"]
417
- return seen
418
-
419
- def push_item(db_id: str, item: dict) -> bool:
420
- """Push one item to Notion. Returns True on success."""
421
- props = {
422
- "Name": {"title": [{"text": {"content": item.get("name", "")[:100]}}]},
423
- "URL": {"url": item.get("url")},
424
- "Source": {"select": {"name": item.get("source", "Unknown")}},
425
- "Date Found": {"date": {"start": item.get("date_found")}},
426
- "Status": {"select": {"name": "New"}},
427
- }
428
- # AI fields
429
- if item.get("ai_score") is not None:
430
- props["AI Score"] = {"number": item["ai_score"]}
431
- if item.get("ai_summary"):
432
- props["Summary"] = {"rich_text": [{"text": {"content": item["ai_summary"][:2000]}}]}
433
- if item.get("ai_notes"):
434
- props["Notes"] = {"rich_text": [{"text": {"content": item["ai_notes"][:2000]}}]}
435
-
436
- try:
437
- get_client().pages.create(parent={"database_id": db_id}, properties=props)
438
- return True
439
- except APIResponseError as e:
440
- print(f"[notion] Push failed: {e}")
441
- return False
442
-
443
- def sync(db_id: str, items: list[dict]) -> tuple[int, int]:
444
- existing = get_existing_urls(db_id)
445
- added = skipped = 0
446
- for item in items:
447
- if item.get("url") in existing:
448
- skipped += 1; continue
449
- if push_item(db_id, item):
450
- added += 1; existing.add(item["url"])
451
- else:
452
- skipped += 1
453
- return added, skipped
454
- ```
455
-
456
- ---
457
-
458
- ### Step 8: Orchestrate in main.py
459
-
460
- ```python
461
- # scraper/main.py
462
- import os, sys, yaml
463
- from pathlib import Path
464
- from dotenv import load_dotenv
465
-
466
- load_dotenv()
467
-
468
- from scraper.sources import my_source # add your sources
469
-
470
- # NOTE: This example uses Notion. If storage.provider is "sheets" or "supabase",
471
- # replace this import with storage.sheets_sync or storage.supabase_sync and update
472
- # the env var and sync() call accordingly.
473
- from storage.notion_sync import sync
474
-
475
- SOURCES = [
476
- ("My Source", my_source.fetch),
477
- ]
478
-
479
- def ai_enabled():
480
- return bool(os.environ.get("GEMINI_API_KEY"))
481
-
482
- def main():
483
- config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
484
- provider = config.get("storage", {}).get("provider", "notion")
485
-
486
- # Resolve the storage target identifier from env based on provider
487
- if provider == "notion":
488
- db_id = os.environ.get("NOTION_DATABASE_ID")
489
- if not db_id:
490
- print("ERROR: NOTION_DATABASE_ID not set"); sys.exit(1)
491
- else:
492
- # Extend here for sheets (SHEET_ID) or supabase (SUPABASE_TABLE) etc.
493
- print(f"ERROR: provider '{provider}' not yet wired in main.py"); sys.exit(1)
494
-
495
- config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
496
- all_items = []
497
-
498
- for name, fetch_fn in SOURCES:
499
- try:
500
- items = fetch_fn()
501
- print(f"[{name}] {len(items)} items")
502
- all_items.extend(items)
503
- except Exception as e:
504
- print(f"[{name}] FAILED: {e}")
505
-
506
- # Deduplicate by URL
507
- seen, deduped = set(), []
508
- for item in all_items:
509
- if (url := item.get("url", "")) and url not in seen:
510
- seen.add(url); deduped.append(item)
511
-
512
- print(f"Unique items: {len(deduped)}")
513
-
514
- if ai_enabled() and deduped:
515
- from ai.memory import load_feedback, build_preference_prompt
516
- from ai.pipeline import analyse_batch
517
-
518
- # load_feedback() reads data/feedback.json written by your feedback sync script.
519
- # To keep it current, implement a separate feedback_sync.py that queries your
520
- # storage provider for items with positive/negative statuses and calls save_feedback().
521
- feedback = load_feedback()
522
- preference = build_preference_prompt(feedback)
523
- context_path = Path(__file__).parent.parent / "profile" / "context.md"
524
- context = context_path.read_text() if context_path.exists() else ""
525
- deduped = analyse_batch(deduped, context=context, preference_prompt=preference)
526
- else:
527
- print("[AI] Skipped — GEMINI_API_KEY not set")
528
-
529
- added, skipped = sync(db_id, deduped)
530
- print(f"Done — {added} new, {skipped} existing")
531
-
532
- if __name__ == "__main__":
533
- main()
534
- ```
535
-
536
- ---
537
-
538
- ### Step 9: GitHub Actions Workflow
539
-
540
- ```yaml
541
- # .github/workflows/scraper.yml
542
- name: Data Scraper Agent
543
-
544
- on:
545
- schedule:
546
- - cron: "0 */3 * * *" # every 3 hours — adjust to your needs
547
- workflow_dispatch: # allow manual trigger
548
-
549
- permissions:
550
- contents: write # required for the feedback-history commit step
551
-
552
- jobs:
553
- scrape:
554
- runs-on: ubuntu-latest
555
- timeout-minutes: 20
556
-
557
- steps:
558
- - uses: actions/checkout@v4
559
-
560
- - uses: actions/setup-python@v5
561
- with:
562
- python-version: "3.11"
563
- cache: "pip"
564
-
565
- - run: pip install -r requirements.txt
566
-
567
- # Uncomment if Playwright is enabled in requirements.txt
568
- # - name: Install Playwright browsers
569
- # run: python -m playwright install chromium --with-deps
570
-
571
- - name: Run agent
572
- env:
573
- NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }}
574
- NOTION_DATABASE_ID: ${{ secrets.NOTION_DATABASE_ID }}
575
- GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
576
- run: python -m scraper.main
577
-
578
- - name: Commit feedback history
579
- run: |
580
- git config user.name "github-actions[bot]"
581
- git config user.email "github-actions[bot]@users.noreply.github.com"
582
- git add data/feedback.json || true
583
- git diff --cached --quiet || git commit -m "chore: update feedback history"
584
- git push
585
- ```
586
-
587
- ---
588
-
589
- ### Step 10: config.yaml Template
590
-
591
- ```yaml
592
- # Customise this file — no code changes needed
593
-
594
- # What to collect (pre-filter before AI)
595
- filters:
596
- required_keywords: [] # item must contain at least one
597
- blocked_keywords: [] # item must not contain any
598
-
599
- # Your priorities — AI uses these for scoring
600
- priorities:
601
- - "example priority 1"
602
- - "example priority 2"
603
-
604
- # Storage
605
- storage:
606
- provider: "notion" # notion | sheets | supabase | sqlite
607
-
608
- # Feedback learning
609
- feedback:
610
- positive_statuses: ["Saved", "Applied", "Interested"]
611
- negative_statuses: ["Skip", "Rejected", "Not relevant"]
612
-
613
- # AI settings
614
- ai:
615
- enabled: true
616
- model: "gemini-2.5-flash"
617
- min_score: 0 # filter out items below this score
618
- rate_limit_seconds: 7 # seconds between API calls
619
- batch_size: 5 # items per API call
620
- ```
621
-
622
- ---
623
-
624
- ## Common Scraping Patterns
625
-
626
- ### Pattern 1: REST API (easiest)
627
- ```python
628
- resp = requests.get(url, params={"q": query}, headers=HEADERS, timeout=15)
629
- items = resp.json().get("results", [])
630
- ```
631
-
632
- ### Pattern 2: HTML Scraping
633
- ```python
634
- soup = BeautifulSoup(resp.text, "lxml")
635
- for card in soup.select(".listing-card"):
636
- title = card.select_one("h2").get_text(strip=True)
637
- href = card.select_one("a")["href"]
638
- ```
639
-
640
- ### Pattern 3: RSS Feed
641
- ```python
642
- import xml.etree.ElementTree as ET
643
- root = ET.fromstring(resp.text)
644
- for item in root.findall(".//item"):
645
- title = item.findtext("title", "")
646
- link = item.findtext("link", "")
647
- pub_date = item.findtext("pubDate", "")
648
- ```
649
-
650
- ### Pattern 4: Paginated API
651
- ```python
652
- page = 1
653
- while True:
654
- resp = requests.get(url, params={"page": page, "limit": 50}, timeout=15)
655
- data = resp.json()
656
- items = data.get("results", [])
657
- if not items:
658
- break
659
- for item in items:
660
- results.append(_normalise(item))
661
- if not data.get("has_more"):
662
- break
663
- page += 1
664
- ```
665
-
666
- ### Pattern 5: JS-Rendered Pages (Playwright)
667
- ```python
668
- from playwright.sync_api import sync_playwright
669
-
670
- with sync_playwright() as p:
671
- browser = p.chromium.launch()
672
- page = browser.new_page()
673
- page.goto(url)
674
- page.wait_for_selector(".listing")
675
- html = page.content()
676
- browser.close()
677
-
678
- soup = BeautifulSoup(html, "lxml")
679
- ```
680
-
681
- ---
682
-
683
- ## Anti-Patterns to Avoid
684
-
685
- | Anti-pattern | Problem | Fix |
686
- |---|---|---|
687
- | One LLM call per item | Hits rate limits instantly | Batch 5 items per call |
688
- | Hardcoded keywords in code | Not reusable | Move all config to `config.yaml` |
689
- | Scraping without rate limit | IP ban | Add `time.sleep(1)` between requests |
690
- | Storing secrets in code | Security risk | Always use `.env` + GitHub Secrets |
691
- | No deduplication | Duplicate rows pile up | Always check URL before pushing |
692
- | Ignoring `robots.txt` | Legal/ethical risk | Respect crawl rules; use public APIs when available |
693
- | JS-rendered sites with `requests` | Empty response | Use Playwright or look for the underlying API |
694
- | `maxOutputTokens` too low | Truncated JSON, parse error | Use 2048+ for batch responses |
695
-
696
- ---
697
-
698
- ## Free Tier Limits Reference
699
-
700
- | Service | Free Limit | Typical Usage |
701
- |---|---|---|
702
- | Gemini Flash Lite | 30 RPM, 1500 RPD | ~56 req/day at 3-hr intervals |
703
- | Gemini 2.0 Flash | 15 RPM, 1500 RPD | Good fallback |
704
- | Gemini 2.5 Flash | 10 RPM, 500 RPD | Use sparingly |
705
- | GitHub Actions | Unlimited (public repos) | ~20 min/day |
706
- | Notion API | Unlimited | ~200 writes/day |
707
- | Supabase | 500MB DB, 2GB transfer | Fine for most agents |
708
- | Google Sheets API | 300 req/min | Works for small agents |
709
-
710
- ---
711
-
712
- ## Requirements Template
713
-
714
- ```
715
- requests==2.31.0
716
- beautifulsoup4==4.12.3
717
- lxml==5.1.0
718
- python-dotenv==1.0.1
719
- pyyaml==6.0.2
720
- notion-client==2.2.1 # if using Notion
721
- # playwright==1.40.0 # uncomment for JS-rendered sites
722
- ```
723
-
724
- ---
725
-
726
- ## Quality Checklist
727
-
728
- Before marking the agent complete:
729
-
730
- - [ ] `config.yaml` controls all user-facing settings — no hardcoded values
731
- - [ ] `profile/context.md` holds user-specific context for AI matching
732
- - [ ] Deduplication by URL before every storage push
733
- - [ ] Gemini client has model fallback chain (4 models)
734
- - [ ] Batch size ≤ 5 items per API call
735
- - [ ] `maxOutputTokens` ≥ 2048
736
- - [ ] `.env` is in `.gitignore`
737
- - [ ] `.env.example` provided for onboarding
738
- - [ ] `setup.py` creates DB schema on first run
739
- - [ ] `enrich_existing.py` backfills AI scores on old rows
740
- - [ ] GitHub Actions workflow commits `feedback.json` after each run
741
- - [ ] README covers: setup in < 5 minutes, required secrets, customisation
742
-
743
- ---
744
-
745
- ## Real-World Examples
746
-
747
- ```
748
- "Build me an agent that monitors Hacker News for AI startup funding news"
749
- "Scrape product prices from 3 e-commerce sites and alert when they drop"
750
- "Track new GitHub repos tagged with 'llm' or 'agents' — summarise each one"
751
- "Collect Chief of Staff job listings from LinkedIn and Cutshort into Notion"
752
- "Monitor a subreddit for posts mentioning my company — classify sentiment"
753
- "Scrape new academic papers from arXiv on a topic I care about daily"
754
- "Track sports fixture results and keep a running table in Google Sheets"
755
- "Build a real estate listing watcher — alert on new properties under ₹1 Cr"
756
- ```
757
-
758
- ---
759
-
760
- ## Reference Implementation
761
-
762
- A complete working agent built with this exact architecture would scrape 4+ sources,
763
- batch Gemini calls, learn from Applied/Rejected decisions stored in Notion, and run
764
- 100% free on GitHub Actions. Follow Steps 1–9 above to build your own.
1
+ ---
2
+ name: data-scraper-agent
3
+ description: Build a fully automated AI-powered data collection agent for any public source — job boards, prices, news, GitHub, sports, anything. Scrapes on a schedule, enriches data with a free LLM (Gemini Flash), stores results in Notion/Sheets/Supabase, and learns from user feedback. Runs 100% free on GitHub Actions. Use when the user wants to monitor, collect, or track any public data automatically.
4
+ origin: community
5
+ ---
6
+
7
+ # Data Scraper Agent
8
+
9
+ Build a production-ready, AI-powered data collection agent for any public data source.
10
+ Runs on a schedule, enriches results with a free LLM, stores to a database, and improves over time.
11
+
12
+ **Stack: Python · Gemini Flash (free) · GitHub Actions (free) · Notion / Sheets / Supabase**
13
+
14
+ ## When to Activate
15
+
16
+ - User wants to scrape or monitor any public website or API
17
+ - User says "build a bot that checks...", "monitor X for me", "collect data from..."
18
+ - User wants to track jobs, prices, news, repos, sports scores, events, listings
19
+ - User asks how to automate data collection without paying for hosting
20
+ - User wants an agent that gets smarter over time based on their decisions
21
+
22
+ ## Core Concepts
23
+
24
+ ### The Three Layers
25
+
26
+ Every data scraper agent has three layers:
27
+
28
+ ```
29
+ COLLECT → ENRICH → STORE
30
+ │ │ │
31
+ Scraper AI (LLM) Database
32
+ runs on scores/ Notion /
33
+ schedule summarises Sheets /
34
+ & classifies Supabase
35
+ ```
36
+
37
+ ### Free Stack
38
+
39
+ | Layer | Tool | Why |
40
+ |---|---|---|
41
+ | **Scraping** | `requests` + `BeautifulSoup` | No cost, covers 80% of public sites |
42
+ | **JS-rendered sites** | `playwright` (free) | When HTML scraping fails |
43
+ | **AI enrichment** | Gemini Flash via REST API | 500 req/day, 1M tokens/day — free |
44
+ | **Storage** | Notion API | Free tier, great UI for review |
45
+ | **Schedule** | GitHub Actions cron | Free for public repos |
46
+ | **Learning** | JSON feedback file in repo | Zero infra, persists in git |
47
+
48
+ ### AI Model Fallback Chain
49
+
50
+ Build agents to auto-fallback across Gemini models on quota exhaustion:
51
+
52
+ ```
53
+ gemini-2.0-flash-lite (30 RPM) →
54
+ gemini-2.0-flash (15 RPM) →
55
+ gemini-2.5-flash (10 RPM) →
56
+ gemini-flash-lite-latest (fallback)
57
+ ```
58
+
59
+ ### Batch API Calls for Efficiency
60
+
61
+ Never call the LLM once per item. Always batch:
62
+
63
+ ```python
64
+ # BAD: 33 API calls for 33 items
65
+ for item in items:
66
+ result = call_ai(item) # 33 calls → hits rate limit
67
+
68
+ # GOOD: 7 API calls for 33 items (batch size 5)
69
+ for batch in chunks(items, size=5):
70
+ results = call_ai(batch) # 7 calls → stays within free tier
71
+ ```
72
+
73
+ ---
74
+
75
+ ## Workflow
76
+
77
+ ### Step 1: Understand the Goal
78
+
79
+ Ask the user:
80
+
81
+ 1. **What to collect:** "What data source? URL / API / RSS / public endpoint?"
82
+ 2. **What to extract:** "What fields matter? Title, price, URL, date, score?"
83
+ 3. **How to store:** "Where should results go? Notion, Google Sheets, Supabase, or local file?"
84
+ 4. **How to enrich:** "Do you want AI to score, summarise, classify, or match each item?"
85
+ 5. **Frequency:** "How often should it run? Every hour, daily, weekly?"
86
+
87
+ Common examples to prompt:
88
+ - Job boards → score relevance to resume
89
+ - Product prices → alert on drops
90
+ - GitHub repos → summarise new releases
91
+ - News feeds → classify by topic + sentiment
92
+ - Sports results → extract stats to tracker
93
+ - Events calendar → filter by interest
94
+
95
+ ---
96
+
97
+ ### Step 2: Design the Agent Architecture
98
+
99
+ Generate this directory structure for the user:
100
+
101
+ ```
102
+ my-agent/
103
+ ├── config.yaml # User customises this (keywords, filters, preferences)
104
+ ├── profile/
105
+ │ └── context.md # User context the AI uses (resume, interests, criteria)
106
+ ├── scraper/
107
+ │ ├── __init__.py
108
+ │ ├── main.py # Orchestrator: scrape → enrich → store
109
+ │ ├── filters.py # Rule-based pre-filter (fast, before AI)
110
+ │ └── sources/
111
+ │ ├── __init__.py
112
+ │ └── source_name.py # One file per data source
113
+ ├── ai/
114
+ │ ├── __init__.py
115
+ │ ├── client.py # Gemini REST client with model fallback
116
+ │ ├── pipeline.py # Batch AI analysis
117
+ │ ├── jd_fetcher.py # Fetch full content from URLs (optional)
118
+ │ └── memory.py # Learn from user feedback
119
+ ├── storage/
120
+ │ ├── __init__.py
121
+ │ └── notion_sync.py # Or sheets_sync.py / supabase_sync.py
122
+ ├── data/
123
+ │ └── feedback.json # User decision history (auto-updated)
124
+ ├── .env.example
125
+ ├── setup.py # One-time DB/schema creation
126
+ ├── enrich_existing.py # Backfill AI scores on old rows
127
+ ├── requirements.txt
128
+ └── .github/
129
+ └── workflows/
130
+ └── scraper.yml # GitHub Actions schedule
131
+ ```
132
+
133
+ ---
134
+
135
+ ### Step 3: Build the Scraper Source
136
+
137
+ Template for any data source:
138
+
139
+ ```python
140
+ # scraper/sources/my_source.py
141
+ """
142
+ [Source Name] — scrapes [what] from [where].
143
+ Method: [REST API / HTML scraping / RSS feed]
144
+ """
145
+ import requests
146
+ from bs4 import BeautifulSoup
147
+ from datetime import datetime, timezone
148
+ from scraper.filters import is_relevant
149
+
150
+ HEADERS = {
151
+ "User-Agent": "Mozilla/5.0 (compatible; research-bot/1.0)",
152
+ }
153
+
154
+
155
+ def fetch() -> list[dict]:
156
+ """
157
+ Returns a list of items with consistent schema.
158
+ Each item must have at minimum: name, url, date_found.
159
+ """
160
+ results = []
161
+
162
+ # ---- REST API source ----
163
+ resp = requests.get("https://api.example.com/items", headers=HEADERS, timeout=15)
164
+ if resp.status_code == 200:
165
+ for item in resp.json().get("results", []):
166
+ if not is_relevant(item.get("title", "")):
167
+ continue
168
+ results.append(_normalise(item))
169
+
170
+ return results
171
+
172
+
173
+ def _normalise(raw: dict) -> dict:
174
+ """Convert raw API/HTML data to the standard schema."""
175
+ return {
176
+ "name": raw.get("title", ""),
177
+ "url": raw.get("link", ""),
178
+ "source": "MySource",
179
+ "date_found": datetime.now(timezone.utc).date().isoformat(),
180
+ # add domain-specific fields here
181
+ }
182
+ ```
183
+
184
+ **HTML scraping pattern:**
185
+ ```python
186
+ soup = BeautifulSoup(resp.text, "lxml")
187
+ for card in soup.select("[class*='listing']"):
188
+ title = card.select_one("h2, h3").get_text(strip=True)
189
+ link = card.select_one("a")["href"]
190
+ if not link.startswith("http"):
191
+ link = f"https://example.com{link}"
192
+ ```
193
+
194
+ **RSS feed pattern:**
195
+ ```python
196
+ import xml.etree.ElementTree as ET
197
+ root = ET.fromstring(resp.text)
198
+ for item in root.findall(".//item"):
199
+ title = item.findtext("title", "")
200
+ link = item.findtext("link", "")
201
+ ```
202
+
203
+ ---
204
+
205
+ ### Step 4: Build the Gemini AI Client
206
+
207
+ ```python
208
+ # ai/client.py
209
+ import os, json, time, requests
210
+
211
+ _last_call = 0.0
212
+
213
+ MODEL_FALLBACK = [
214
+ "gemini-2.0-flash-lite",
215
+ "gemini-2.0-flash",
216
+ "gemini-2.5-flash",
217
+ "gemini-flash-lite-latest",
218
+ ]
219
+
220
+
221
+ def generate(prompt: str, model: str = "", rate_limit: float = 7.0) -> dict:
222
+ """Call Gemini with auto-fallback on 429. Returns parsed JSON or {}."""
223
+ global _last_call
224
+
225
+ api_key = os.environ.get("GEMINI_API_KEY", "")
226
+ if not api_key:
227
+ return {}
228
+
229
+ elapsed = time.time() - _last_call
230
+ if elapsed < rate_limit:
231
+ time.sleep(rate_limit - elapsed)
232
+
233
+ models = [model] + [m for m in MODEL_FALLBACK if m != model] if model else MODEL_FALLBACK
234
+ _last_call = time.time()
235
+
236
+ for m in models:
237
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{m}:generateContent?key={api_key}"
238
+ payload = {
239
+ "contents": [{"parts": [{"text": prompt}]}],
240
+ "generationConfig": {
241
+ "responseMimeType": "application/json",
242
+ "temperature": 0.3,
243
+ "maxOutputTokens": 2048,
244
+ },
245
+ }
246
+ try:
247
+ resp = requests.post(url, json=payload, timeout=30)
248
+ if resp.status_code == 200:
249
+ return _parse(resp)
250
+ if resp.status_code in (429, 404):
251
+ time.sleep(1)
252
+ continue
253
+ return {}
254
+ except requests.RequestException:
255
+ return {}
256
+
257
+ return {}
258
+
259
+
260
+ def _parse(resp) -> dict:
261
+ try:
262
+ text = (
263
+ resp.json()
264
+ .get("candidates", [{}])[0]
265
+ .get("content", {})
266
+ .get("parts", [{}])[0]
267
+ .get("text", "")
268
+ .strip()
269
+ )
270
+ if text.startswith("```"):
271
+ text = text.split("\n", 1)[-1].rsplit("```", 1)[0]
272
+ return json.loads(text)
273
+ except (json.JSONDecodeError, KeyError):
274
+ return {}
275
+ ```
276
+
277
+ ---
278
+
279
+ ### Step 5: Build the AI Pipeline (Batch)
280
+
281
+ ```python
282
+ # ai/pipeline.py
283
+ import json
284
+ import yaml
285
+ from pathlib import Path
286
+ from ai.client import generate
287
+
288
+ def analyse_batch(items: list[dict], context: str = "", preference_prompt: str = "") -> list[dict]:
289
+ """Analyse items in batches. Returns items enriched with AI fields."""
290
+ config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
291
+ model = config.get("ai", {}).get("model", "gemini-2.5-flash")
292
+ rate_limit = config.get("ai", {}).get("rate_limit_seconds", 7.0)
293
+ min_score = config.get("ai", {}).get("min_score", 0)
294
+ batch_size = config.get("ai", {}).get("batch_size", 5)
295
+
296
+ batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
297
+ print(f" [AI] {len(items)} items → {len(batches)} API calls")
298
+
299
+ enriched = []
300
+ for i, batch in enumerate(batches):
301
+ print(f" [AI] Batch {i + 1}/{len(batches)}...")
302
+ prompt = _build_prompt(batch, context, preference_prompt, config)
303
+ result = generate(prompt, model=model, rate_limit=rate_limit)
304
+
305
+ analyses = result.get("analyses", [])
306
+ for j, item in enumerate(batch):
307
+ ai = analyses[j] if j < len(analyses) else {}
308
+ if ai:
309
+ score = max(0, min(100, int(ai.get("score", 0))))
310
+ if min_score and score < min_score:
311
+ continue
312
+ enriched.append({**item, "ai_score": score, "ai_summary": ai.get("summary", ""), "ai_notes": ai.get("notes", "")})
313
+ else:
314
+ enriched.append(item)
315
+
316
+ return enriched
317
+
318
+
319
+ def _build_prompt(batch, context, preference_prompt, config):
320
+ priorities = config.get("priorities", [])
321
+ items_text = "\n\n".join(
322
+ f"Item {i+1}: {json.dumps({k: v for k, v in item.items() if not k.startswith('_')})}"
323
+ for i, item in enumerate(batch)
324
+ )
325
+
326
+ return f"""Analyse these {len(batch)} items and return a JSON object.
327
+
328
+ # Items
329
+ {items_text}
330
+
331
+ # User Context
332
+ {context[:800] if context else "Not provided"}
333
+
334
+ # User Priorities
335
+ {chr(10).join(f"- {p}" for p in priorities)}
336
+
337
+ {preference_prompt}
338
+
339
+ # Instructions
340
+ Return: {{"analyses": [{{"score": <0-100>, "summary": "<2 sentences>", "notes": "<why this matches or doesn't>"}} for each item in order]}}
341
+ Be concise. Score 90+=excellent match, 70-89=good, 50-69=ok, <50=weak."""
342
+ ```
343
+
344
+ ---
345
+
346
+ ### Step 6: Build the Feedback Learning System
347
+
348
+ ```python
349
+ # ai/memory.py
350
+ """Learn from user decisions to improve future scoring."""
351
+ import json
352
+ from pathlib import Path
353
+
354
+ FEEDBACK_PATH = Path(__file__).parent.parent / "data" / "feedback.json"
355
+
356
+
357
+ def load_feedback() -> dict:
358
+ if FEEDBACK_PATH.exists():
359
+ try:
360
+ return json.loads(FEEDBACK_PATH.read_text())
361
+ except (json.JSONDecodeError, OSError):
362
+ pass
363
+ return {"positive": [], "negative": []}
364
+
365
+
366
+ def save_feedback(fb: dict):
367
+ FEEDBACK_PATH.parent.mkdir(parents=True, exist_ok=True)
368
+ FEEDBACK_PATH.write_text(json.dumps(fb, indent=2))
369
+
370
+
371
+ def build_preference_prompt(feedback: dict, max_examples: int = 15) -> str:
372
+ """Convert feedback history into a prompt bias section."""
373
+ lines = []
374
+ if feedback.get("positive"):
375
+ lines.append("# Items the user LIKED (positive signal):")
376
+ for e in feedback["positive"][-max_examples:]:
377
+ lines.append(f"- {e}")
378
+ if feedback.get("negative"):
379
+ lines.append("\n# Items the user SKIPPED/REJECTED (negative signal):")
380
+ for e in feedback["negative"][-max_examples:]:
381
+ lines.append(f"- {e}")
382
+ if lines:
383
+ lines.append("\nUse these patterns to bias scoring on new items.")
384
+ return "\n".join(lines)
385
+ ```
386
+
387
+ **Integration with your storage layer:** after each run, query your DB for items with positive/negative status and call `save_feedback()` with the extracted patterns.
388
+
389
+ ---
390
+
391
+ ### Step 7: Build Storage (Notion example)
392
+
393
+ ```python
394
+ # storage/notion_sync.py
395
+ import os
396
+ from notion_client import Client
397
+ from notion_client.errors import APIResponseError
398
+
399
+ _client = None
400
+
401
+ def get_client():
402
+ global _client
403
+ if _client is None:
404
+ _client = Client(auth=os.environ["NOTION_TOKEN"])
405
+ return _client
406
+
407
+ def get_existing_urls(db_id: str) -> set[str]:
408
+ """Fetch all URLs already stored — used for deduplication."""
409
+ client, seen, cursor = get_client(), set(), None
410
+ while True:
411
+ resp = client.databases.query(database_id=db_id, page_size=100, **{"start_cursor": cursor} if cursor else {})
412
+ for page in resp["results"]:
413
+ url = page["properties"].get("URL", {}).get("url", "")
414
+ if url: seen.add(url)
415
+ if not resp["has_more"]: break
416
+ cursor = resp["next_cursor"]
417
+ return seen
418
+
419
+ def push_item(db_id: str, item: dict) -> bool:
420
+ """Push one item to Notion. Returns True on success."""
421
+ props = {
422
+ "Name": {"title": [{"text": {"content": item.get("name", "")[:100]}}]},
423
+ "URL": {"url": item.get("url")},
424
+ "Source": {"select": {"name": item.get("source", "Unknown")}},
425
+ "Date Found": {"date": {"start": item.get("date_found")}},
426
+ "Status": {"select": {"name": "New"}},
427
+ }
428
+ # AI fields
429
+ if item.get("ai_score") is not None:
430
+ props["AI Score"] = {"number": item["ai_score"]}
431
+ if item.get("ai_summary"):
432
+ props["Summary"] = {"rich_text": [{"text": {"content": item["ai_summary"][:2000]}}]}
433
+ if item.get("ai_notes"):
434
+ props["Notes"] = {"rich_text": [{"text": {"content": item["ai_notes"][:2000]}}]}
435
+
436
+ try:
437
+ get_client().pages.create(parent={"database_id": db_id}, properties=props)
438
+ return True
439
+ except APIResponseError as e:
440
+ print(f"[notion] Push failed: {e}")
441
+ return False
442
+
443
+ def sync(db_id: str, items: list[dict]) -> tuple[int, int]:
444
+ existing = get_existing_urls(db_id)
445
+ added = skipped = 0
446
+ for item in items:
447
+ if item.get("url") in existing:
448
+ skipped += 1; continue
449
+ if push_item(db_id, item):
450
+ added += 1; existing.add(item["url"])
451
+ else:
452
+ skipped += 1
453
+ return added, skipped
454
+ ```
455
+
456
+ ---
457
+
458
+ ### Step 8: Orchestrate in main.py
459
+
460
+ ```python
461
+ # scraper/main.py
462
+ import os, sys, yaml
463
+ from pathlib import Path
464
+ from dotenv import load_dotenv
465
+
466
+ load_dotenv()
467
+
468
+ from scraper.sources import my_source # add your sources
469
+
470
+ # NOTE: This example uses Notion. If storage.provider is "sheets" or "supabase",
471
+ # replace this import with storage.sheets_sync or storage.supabase_sync and update
472
+ # the env var and sync() call accordingly.
473
+ from storage.notion_sync import sync
474
+
475
+ SOURCES = [
476
+ ("My Source", my_source.fetch),
477
+ ]
478
+
479
+ def ai_enabled():
480
+ return bool(os.environ.get("GEMINI_API_KEY"))
481
+
482
+ def main():
483
+ config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
484
+ provider = config.get("storage", {}).get("provider", "notion")
485
+
486
+ # Resolve the storage target identifier from env based on provider
487
+ if provider == "notion":
488
+ db_id = os.environ.get("NOTION_DATABASE_ID")
489
+ if not db_id:
490
+ print("ERROR: NOTION_DATABASE_ID not set"); sys.exit(1)
491
+ else:
492
+ # Extend here for sheets (SHEET_ID) or supabase (SUPABASE_TABLE) etc.
493
+ print(f"ERROR: provider '{provider}' not yet wired in main.py"); sys.exit(1)
494
+
495
+ config = yaml.safe_load((Path(__file__).parent.parent / "config.yaml").read_text())
496
+ all_items = []
497
+
498
+ for name, fetch_fn in SOURCES:
499
+ try:
500
+ items = fetch_fn()
501
+ print(f"[{name}] {len(items)} items")
502
+ all_items.extend(items)
503
+ except Exception as e:
504
+ print(f"[{name}] FAILED: {e}")
505
+
506
+ # Deduplicate by URL
507
+ seen, deduped = set(), []
508
+ for item in all_items:
509
+ if (url := item.get("url", "")) and url not in seen:
510
+ seen.add(url); deduped.append(item)
511
+
512
+ print(f"Unique items: {len(deduped)}")
513
+
514
+ if ai_enabled() and deduped:
515
+ from ai.memory import load_feedback, build_preference_prompt
516
+ from ai.pipeline import analyse_batch
517
+
518
+ # load_feedback() reads data/feedback.json written by your feedback sync script.
519
+ # To keep it current, implement a separate feedback_sync.py that queries your
520
+ # storage provider for items with positive/negative statuses and calls save_feedback().
521
+ feedback = load_feedback()
522
+ preference = build_preference_prompt(feedback)
523
+ context_path = Path(__file__).parent.parent / "profile" / "context.md"
524
+ context = context_path.read_text() if context_path.exists() else ""
525
+ deduped = analyse_batch(deduped, context=context, preference_prompt=preference)
526
+ else:
527
+ print("[AI] Skipped — GEMINI_API_KEY not set")
528
+
529
+ added, skipped = sync(db_id, deduped)
530
+ print(f"Done — {added} new, {skipped} existing")
531
+
532
+ if __name__ == "__main__":
533
+ main()
534
+ ```
535
+
536
+ ---
537
+
538
+ ### Step 9: GitHub Actions Workflow
539
+
540
+ ```yaml
541
+ # .github/workflows/scraper.yml
542
+ name: Data Scraper Agent
543
+
544
+ on:
545
+ schedule:
546
+ - cron: "0 */3 * * *" # every 3 hours — adjust to your needs
547
+ workflow_dispatch: # allow manual trigger
548
+
549
+ permissions:
550
+ contents: write # required for the feedback-history commit step
551
+
552
+ jobs:
553
+ scrape:
554
+ runs-on: ubuntu-latest
555
+ timeout-minutes: 20
556
+
557
+ steps:
558
+ - uses: actions/checkout@v4
559
+
560
+ - uses: actions/setup-python@v5
561
+ with:
562
+ python-version: "3.11"
563
+ cache: "pip"
564
+
565
+ - run: pip install -r requirements.txt
566
+
567
+ # Uncomment if Playwright is enabled in requirements.txt
568
+ # - name: Install Playwright browsers
569
+ # run: python -m playwright install chromium --with-deps
570
+
571
+ - name: Run agent
572
+ env:
573
+ NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }}
574
+ NOTION_DATABASE_ID: ${{ secrets.NOTION_DATABASE_ID }}
575
+ GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
576
+ run: python -m scraper.main
577
+
578
+ - name: Commit feedback history
579
+ run: |
580
+ git config user.name "github-actions[bot]"
581
+ git config user.email "github-actions[bot]@users.noreply.github.com"
582
+ git add data/feedback.json || true
583
+ git diff --cached --quiet || git commit -m "chore: update feedback history"
584
+ git push
585
+ ```
586
+
587
+ ---
588
+
589
+ ### Step 10: config.yaml Template
590
+
591
+ ```yaml
592
+ # Customise this file — no code changes needed
593
+
594
+ # What to collect (pre-filter before AI)
595
+ filters:
596
+ required_keywords: [] # item must contain at least one
597
+ blocked_keywords: [] # item must not contain any
598
+
599
+ # Your priorities — AI uses these for scoring
600
+ priorities:
601
+ - "example priority 1"
602
+ - "example priority 2"
603
+
604
+ # Storage
605
+ storage:
606
+ provider: "notion" # notion | sheets | supabase | sqlite
607
+
608
+ # Feedback learning
609
+ feedback:
610
+ positive_statuses: ["Saved", "Applied", "Interested"]
611
+ negative_statuses: ["Skip", "Rejected", "Not relevant"]
612
+
613
+ # AI settings
614
+ ai:
615
+ enabled: true
616
+ model: "gemini-2.5-flash"
617
+ min_score: 0 # filter out items below this score
618
+ rate_limit_seconds: 7 # seconds between API calls
619
+ batch_size: 5 # items per API call
620
+ ```
621
+
622
+ ---
623
+
624
+ ## Common Scraping Patterns
625
+
626
+ ### Pattern 1: REST API (easiest)
627
+ ```python
628
+ resp = requests.get(url, params={"q": query}, headers=HEADERS, timeout=15)
629
+ items = resp.json().get("results", [])
630
+ ```
631
+
632
+ ### Pattern 2: HTML Scraping
633
+ ```python
634
+ soup = BeautifulSoup(resp.text, "lxml")
635
+ for card in soup.select(".listing-card"):
636
+ title = card.select_one("h2").get_text(strip=True)
637
+ href = card.select_one("a")["href"]
638
+ ```
639
+
640
+ ### Pattern 3: RSS Feed
641
+ ```python
642
+ import xml.etree.ElementTree as ET
643
+ root = ET.fromstring(resp.text)
644
+ for item in root.findall(".//item"):
645
+ title = item.findtext("title", "")
646
+ link = item.findtext("link", "")
647
+ pub_date = item.findtext("pubDate", "")
648
+ ```
649
+
650
+ ### Pattern 4: Paginated API
651
+ ```python
652
+ page = 1
653
+ while True:
654
+ resp = requests.get(url, params={"page": page, "limit": 50}, timeout=15)
655
+ data = resp.json()
656
+ items = data.get("results", [])
657
+ if not items:
658
+ break
659
+ for item in items:
660
+ results.append(_normalise(item))
661
+ if not data.get("has_more"):
662
+ break
663
+ page += 1
664
+ ```
665
+
666
+ ### Pattern 5: JS-Rendered Pages (Playwright)
667
+ ```python
668
+ from playwright.sync_api import sync_playwright
669
+
670
+ with sync_playwright() as p:
671
+ browser = p.chromium.launch()
672
+ page = browser.new_page()
673
+ page.goto(url)
674
+ page.wait_for_selector(".listing")
675
+ html = page.content()
676
+ browser.close()
677
+
678
+ soup = BeautifulSoup(html, "lxml")
679
+ ```
680
+
681
+ ---
682
+
683
+ ## Anti-Patterns to Avoid
684
+
685
+ | Anti-pattern | Problem | Fix |
686
+ |---|---|---|
687
+ | One LLM call per item | Hits rate limits instantly | Batch 5 items per call |
688
+ | Hardcoded keywords in code | Not reusable | Move all config to `config.yaml` |
689
+ | Scraping without rate limit | IP ban | Add `time.sleep(1)` between requests |
690
+ | Storing secrets in code | Security risk | Always use `.env` + GitHub Secrets |
691
+ | No deduplication | Duplicate rows pile up | Always check URL before pushing |
692
+ | Ignoring `robots.txt` | Legal/ethical risk | Respect crawl rules; use public APIs when available |
693
+ | JS-rendered sites with `requests` | Empty response | Use Playwright or look for the underlying API |
694
+ | `maxOutputTokens` too low | Truncated JSON, parse error | Use 2048+ for batch responses |
695
+
696
+ ---
697
+
698
+ ## Free Tier Limits Reference
699
+
700
+ | Service | Free Limit | Typical Usage |
701
+ |---|---|---|
702
+ | Gemini Flash Lite | 30 RPM, 1500 RPD | ~56 req/day at 3-hr intervals |
703
+ | Gemini 2.0 Flash | 15 RPM, 1500 RPD | Good fallback |
704
+ | Gemini 2.5 Flash | 10 RPM, 500 RPD | Use sparingly |
705
+ | GitHub Actions | Unlimited (public repos) | ~20 min/day |
706
+ | Notion API | Unlimited | ~200 writes/day |
707
+ | Supabase | 500MB DB, 2GB transfer | Fine for most agents |
708
+ | Google Sheets API | 300 req/min | Works for small agents |
709
+
710
+ ---
711
+
712
+ ## Requirements Template
713
+
714
+ ```
715
+ requests==2.31.0
716
+ beautifulsoup4==4.12.3
717
+ lxml==5.1.0
718
+ python-dotenv==1.0.1
719
+ pyyaml==6.0.2
720
+ notion-client==2.2.1 # if using Notion
721
+ # playwright==1.40.0 # uncomment for JS-rendered sites
722
+ ```
723
+
724
+ ---
725
+
726
+ ## Quality Checklist
727
+
728
+ Before marking the agent complete:
729
+
730
+ - [ ] `config.yaml` controls all user-facing settings — no hardcoded values
731
+ - [ ] `profile/context.md` holds user-specific context for AI matching
732
+ - [ ] Deduplication by URL before every storage push
733
+ - [ ] Gemini client has model fallback chain (4 models)
734
+ - [ ] Batch size ≤ 5 items per API call
735
+ - [ ] `maxOutputTokens` ≥ 2048
736
+ - [ ] `.env` is in `.gitignore`
737
+ - [ ] `.env.example` provided for onboarding
738
+ - [ ] `setup.py` creates DB schema on first run
739
+ - [ ] `enrich_existing.py` backfills AI scores on old rows
740
+ - [ ] GitHub Actions workflow commits `feedback.json` after each run
741
+ - [ ] README covers: setup in < 5 minutes, required secrets, customisation
742
+
743
+ ---
744
+
745
+ ## Real-World Examples
746
+
747
+ ```
748
+ "Build me an agent that monitors Hacker News for AI startup funding news"
749
+ "Scrape product prices from 3 e-commerce sites and alert when they drop"
750
+ "Track new GitHub repos tagged with 'llm' or 'agents' — summarise each one"
751
+ "Collect Chief of Staff job listings from LinkedIn and Cutshort into Notion"
752
+ "Monitor a subreddit for posts mentioning my company — classify sentiment"
753
+ "Scrape new academic papers from arXiv on a topic I care about daily"
754
+ "Track sports fixture results and keep a running table in Google Sheets"
755
+ "Build a real estate listing watcher — alert on new properties under ₹1 Cr"
756
+ ```
757
+
758
+ ---
759
+
760
+ ## Reference Implementation
761
+
762
+ A complete working agent built with this exact architecture would scrape 4+ sources,
763
+ batch Gemini calls, learn from Applied/Rejected decisions stored in Notion, and run
764
+ 100% free on GitHub Actions. Follow Steps 1–9 above to build your own.