@soleri/core 2.1.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/brain/brain.d.ts +10 -1
  2. package/dist/brain/brain.d.ts.map +1 -1
  3. package/dist/brain/brain.js +116 -13
  4. package/dist/brain/brain.js.map +1 -1
  5. package/dist/brain/intelligence.d.ts +36 -1
  6. package/dist/brain/intelligence.d.ts.map +1 -1
  7. package/dist/brain/intelligence.js +119 -14
  8. package/dist/brain/intelligence.js.map +1 -1
  9. package/dist/brain/types.d.ts +34 -2
  10. package/dist/brain/types.d.ts.map +1 -1
  11. package/dist/cognee/client.d.ts +3 -0
  12. package/dist/cognee/client.d.ts.map +1 -1
  13. package/dist/cognee/client.js +17 -0
  14. package/dist/cognee/client.js.map +1 -1
  15. package/dist/cognee/sync-manager.d.ts +94 -0
  16. package/dist/cognee/sync-manager.d.ts.map +1 -0
  17. package/dist/cognee/sync-manager.js +293 -0
  18. package/dist/cognee/sync-manager.js.map +1 -0
  19. package/dist/control/identity-manager.d.ts +22 -0
  20. package/dist/control/identity-manager.d.ts.map +1 -0
  21. package/dist/control/identity-manager.js +233 -0
  22. package/dist/control/identity-manager.js.map +1 -0
  23. package/dist/control/intent-router.d.ts +32 -0
  24. package/dist/control/intent-router.d.ts.map +1 -0
  25. package/dist/control/intent-router.js +242 -0
  26. package/dist/control/intent-router.js.map +1 -0
  27. package/dist/control/types.d.ts +68 -0
  28. package/dist/control/types.d.ts.map +1 -0
  29. package/dist/control/types.js +9 -0
  30. package/dist/control/types.js.map +1 -0
  31. package/dist/curator/curator.d.ts +37 -1
  32. package/dist/curator/curator.d.ts.map +1 -1
  33. package/dist/curator/curator.js +199 -1
  34. package/dist/curator/curator.js.map +1 -1
  35. package/dist/errors/classify.d.ts +13 -0
  36. package/dist/errors/classify.d.ts.map +1 -0
  37. package/dist/errors/classify.js +97 -0
  38. package/dist/errors/classify.js.map +1 -0
  39. package/dist/errors/index.d.ts +6 -0
  40. package/dist/errors/index.d.ts.map +1 -0
  41. package/dist/errors/index.js +4 -0
  42. package/dist/errors/index.js.map +1 -0
  43. package/dist/errors/retry.d.ts +40 -0
  44. package/dist/errors/retry.d.ts.map +1 -0
  45. package/dist/errors/retry.js +97 -0
  46. package/dist/errors/retry.js.map +1 -0
  47. package/dist/errors/types.d.ts +48 -0
  48. package/dist/errors/types.d.ts.map +1 -0
  49. package/dist/errors/types.js +59 -0
  50. package/dist/errors/types.js.map +1 -0
  51. package/dist/facades/types.d.ts +1 -1
  52. package/dist/governance/governance.d.ts +42 -0
  53. package/dist/governance/governance.d.ts.map +1 -0
  54. package/dist/governance/governance.js +488 -0
  55. package/dist/governance/governance.js.map +1 -0
  56. package/dist/governance/index.d.ts +3 -0
  57. package/dist/governance/index.d.ts.map +1 -0
  58. package/dist/governance/index.js +2 -0
  59. package/dist/governance/index.js.map +1 -0
  60. package/dist/governance/types.d.ts +102 -0
  61. package/dist/governance/types.d.ts.map +1 -0
  62. package/dist/governance/types.js +3 -0
  63. package/dist/governance/types.js.map +1 -0
  64. package/dist/index.d.ts +52 -3
  65. package/dist/index.d.ts.map +1 -1
  66. package/dist/index.js +47 -1
  67. package/dist/index.js.map +1 -1
  68. package/dist/intake/content-classifier.d.ts +14 -0
  69. package/dist/intake/content-classifier.d.ts.map +1 -0
  70. package/dist/intake/content-classifier.js +125 -0
  71. package/dist/intake/content-classifier.js.map +1 -0
  72. package/dist/intake/dedup-gate.d.ts +17 -0
  73. package/dist/intake/dedup-gate.d.ts.map +1 -0
  74. package/dist/intake/dedup-gate.js +66 -0
  75. package/dist/intake/dedup-gate.js.map +1 -0
  76. package/dist/intake/intake-pipeline.d.ts +63 -0
  77. package/dist/intake/intake-pipeline.d.ts.map +1 -0
  78. package/dist/intake/intake-pipeline.js +373 -0
  79. package/dist/intake/intake-pipeline.js.map +1 -0
  80. package/dist/intake/types.d.ts +65 -0
  81. package/dist/intake/types.d.ts.map +1 -0
  82. package/dist/intake/types.js +3 -0
  83. package/dist/intake/types.js.map +1 -0
  84. package/dist/intelligence/loader.js +1 -1
  85. package/dist/intelligence/loader.js.map +1 -1
  86. package/dist/intelligence/types.d.ts +3 -1
  87. package/dist/intelligence/types.d.ts.map +1 -1
  88. package/dist/logging/logger.d.ts +37 -0
  89. package/dist/logging/logger.d.ts.map +1 -0
  90. package/dist/logging/logger.js +145 -0
  91. package/dist/logging/logger.js.map +1 -0
  92. package/dist/logging/types.d.ts +19 -0
  93. package/dist/logging/types.d.ts.map +1 -0
  94. package/dist/logging/types.js +2 -0
  95. package/dist/logging/types.js.map +1 -0
  96. package/dist/loop/loop-manager.d.ts +100 -0
  97. package/dist/loop/loop-manager.d.ts.map +1 -0
  98. package/dist/loop/loop-manager.js +379 -0
  99. package/dist/loop/loop-manager.js.map +1 -0
  100. package/dist/loop/types.d.ts +103 -0
  101. package/dist/loop/types.d.ts.map +1 -0
  102. package/dist/loop/types.js +11 -0
  103. package/dist/loop/types.js.map +1 -0
  104. package/dist/persistence/index.d.ts +3 -0
  105. package/dist/persistence/index.d.ts.map +1 -0
  106. package/dist/persistence/index.js +2 -0
  107. package/dist/persistence/index.js.map +1 -0
  108. package/dist/persistence/sqlite-provider.d.ts +25 -0
  109. package/dist/persistence/sqlite-provider.d.ts.map +1 -0
  110. package/dist/persistence/sqlite-provider.js +59 -0
  111. package/dist/persistence/sqlite-provider.js.map +1 -0
  112. package/dist/persistence/types.d.ts +36 -0
  113. package/dist/persistence/types.d.ts.map +1 -0
  114. package/dist/persistence/types.js +8 -0
  115. package/dist/persistence/types.js.map +1 -0
  116. package/dist/planning/gap-analysis.d.ts +72 -0
  117. package/dist/planning/gap-analysis.d.ts.map +1 -0
  118. package/dist/planning/gap-analysis.js +442 -0
  119. package/dist/planning/gap-analysis.js.map +1 -0
  120. package/dist/planning/gap-types.d.ts +29 -0
  121. package/dist/planning/gap-types.d.ts.map +1 -0
  122. package/dist/planning/gap-types.js +28 -0
  123. package/dist/planning/gap-types.js.map +1 -0
  124. package/dist/planning/planner.d.ts +421 -4
  125. package/dist/planning/planner.d.ts.map +1 -1
  126. package/dist/planning/planner.js +949 -21
  127. package/dist/planning/planner.js.map +1 -1
  128. package/dist/playbooks/generic/brainstorming.d.ts +9 -0
  129. package/dist/playbooks/generic/brainstorming.d.ts.map +1 -0
  130. package/dist/playbooks/generic/brainstorming.js +105 -0
  131. package/dist/playbooks/generic/brainstorming.js.map +1 -0
  132. package/dist/playbooks/generic/code-review.d.ts +11 -0
  133. package/dist/playbooks/generic/code-review.d.ts.map +1 -0
  134. package/dist/playbooks/generic/code-review.js +176 -0
  135. package/dist/playbooks/generic/code-review.js.map +1 -0
  136. package/dist/playbooks/generic/subagent-execution.d.ts +9 -0
  137. package/dist/playbooks/generic/subagent-execution.d.ts.map +1 -0
  138. package/dist/playbooks/generic/subagent-execution.js +68 -0
  139. package/dist/playbooks/generic/subagent-execution.js.map +1 -0
  140. package/dist/playbooks/generic/systematic-debugging.d.ts +9 -0
  141. package/dist/playbooks/generic/systematic-debugging.d.ts.map +1 -0
  142. package/dist/playbooks/generic/systematic-debugging.js +87 -0
  143. package/dist/playbooks/generic/systematic-debugging.js.map +1 -0
  144. package/dist/playbooks/generic/tdd.d.ts +9 -0
  145. package/dist/playbooks/generic/tdd.d.ts.map +1 -0
  146. package/dist/playbooks/generic/tdd.js +70 -0
  147. package/dist/playbooks/generic/tdd.js.map +1 -0
  148. package/dist/playbooks/generic/verification.d.ts +9 -0
  149. package/dist/playbooks/generic/verification.d.ts.map +1 -0
  150. package/dist/playbooks/generic/verification.js +74 -0
  151. package/dist/playbooks/generic/verification.js.map +1 -0
  152. package/dist/playbooks/index.d.ts +4 -0
  153. package/dist/playbooks/index.d.ts.map +1 -0
  154. package/dist/playbooks/index.js +5 -0
  155. package/dist/playbooks/index.js.map +1 -0
  156. package/dist/playbooks/playbook-registry.d.ts +42 -0
  157. package/dist/playbooks/playbook-registry.d.ts.map +1 -0
  158. package/dist/playbooks/playbook-registry.js +227 -0
  159. package/dist/playbooks/playbook-registry.js.map +1 -0
  160. package/dist/playbooks/playbook-seeder.d.ts +47 -0
  161. package/dist/playbooks/playbook-seeder.d.ts.map +1 -0
  162. package/dist/playbooks/playbook-seeder.js +104 -0
  163. package/dist/playbooks/playbook-seeder.js.map +1 -0
  164. package/dist/playbooks/playbook-types.d.ts +132 -0
  165. package/dist/playbooks/playbook-types.d.ts.map +1 -0
  166. package/dist/playbooks/playbook-types.js +12 -0
  167. package/dist/playbooks/playbook-types.js.map +1 -0
  168. package/dist/project/project-registry.d.ts +79 -0
  169. package/dist/project/project-registry.d.ts.map +1 -0
  170. package/dist/project/project-registry.js +274 -0
  171. package/dist/project/project-registry.js.map +1 -0
  172. package/dist/project/types.d.ts +28 -0
  173. package/dist/project/types.d.ts.map +1 -0
  174. package/dist/project/types.js +5 -0
  175. package/dist/project/types.js.map +1 -0
  176. package/dist/prompts/index.d.ts +4 -0
  177. package/dist/prompts/index.d.ts.map +1 -0
  178. package/dist/prompts/index.js +3 -0
  179. package/dist/prompts/index.js.map +1 -0
  180. package/dist/prompts/parser.d.ts +17 -0
  181. package/dist/prompts/parser.d.ts.map +1 -0
  182. package/dist/prompts/parser.js +47 -0
  183. package/dist/prompts/parser.js.map +1 -0
  184. package/dist/prompts/template-manager.d.ts +25 -0
  185. package/dist/prompts/template-manager.d.ts.map +1 -0
  186. package/dist/prompts/template-manager.js +71 -0
  187. package/dist/prompts/template-manager.js.map +1 -0
  188. package/dist/prompts/types.d.ts +26 -0
  189. package/dist/prompts/types.d.ts.map +1 -0
  190. package/dist/prompts/types.js +5 -0
  191. package/dist/prompts/types.js.map +1 -0
  192. package/dist/runtime/admin-extra-ops.d.ts +15 -0
  193. package/dist/runtime/admin-extra-ops.d.ts.map +1 -0
  194. package/dist/runtime/admin-extra-ops.js +595 -0
  195. package/dist/runtime/admin-extra-ops.js.map +1 -0
  196. package/dist/runtime/admin-ops.d.ts +15 -0
  197. package/dist/runtime/admin-ops.d.ts.map +1 -0
  198. package/dist/runtime/admin-ops.js +329 -0
  199. package/dist/runtime/admin-ops.js.map +1 -0
  200. package/dist/runtime/capture-ops.d.ts +15 -0
  201. package/dist/runtime/capture-ops.d.ts.map +1 -0
  202. package/dist/runtime/capture-ops.js +363 -0
  203. package/dist/runtime/capture-ops.js.map +1 -0
  204. package/dist/runtime/cognee-sync-ops.d.ts +12 -0
  205. package/dist/runtime/cognee-sync-ops.d.ts.map +1 -0
  206. package/dist/runtime/cognee-sync-ops.js +55 -0
  207. package/dist/runtime/cognee-sync-ops.js.map +1 -0
  208. package/dist/runtime/core-ops.d.ts +9 -3
  209. package/dist/runtime/core-ops.d.ts.map +1 -1
  210. package/dist/runtime/core-ops.js +693 -10
  211. package/dist/runtime/core-ops.js.map +1 -1
  212. package/dist/runtime/curator-extra-ops.d.ts +9 -0
  213. package/dist/runtime/curator-extra-ops.d.ts.map +1 -0
  214. package/dist/runtime/curator-extra-ops.js +71 -0
  215. package/dist/runtime/curator-extra-ops.js.map +1 -0
  216. package/dist/runtime/domain-ops.d.ts.map +1 -1
  217. package/dist/runtime/domain-ops.js +61 -15
  218. package/dist/runtime/domain-ops.js.map +1 -1
  219. package/dist/runtime/grading-ops.d.ts +14 -0
  220. package/dist/runtime/grading-ops.d.ts.map +1 -0
  221. package/dist/runtime/grading-ops.js +105 -0
  222. package/dist/runtime/grading-ops.js.map +1 -0
  223. package/dist/runtime/intake-ops.d.ts +14 -0
  224. package/dist/runtime/intake-ops.d.ts.map +1 -0
  225. package/dist/runtime/intake-ops.js +110 -0
  226. package/dist/runtime/intake-ops.js.map +1 -0
  227. package/dist/runtime/loop-ops.d.ts +14 -0
  228. package/dist/runtime/loop-ops.d.ts.map +1 -0
  229. package/dist/runtime/loop-ops.js +251 -0
  230. package/dist/runtime/loop-ops.js.map +1 -0
  231. package/dist/runtime/memory-cross-project-ops.d.ts +12 -0
  232. package/dist/runtime/memory-cross-project-ops.d.ts.map +1 -0
  233. package/dist/runtime/memory-cross-project-ops.js +165 -0
  234. package/dist/runtime/memory-cross-project-ops.js.map +1 -0
  235. package/dist/runtime/memory-extra-ops.d.ts +13 -0
  236. package/dist/runtime/memory-extra-ops.d.ts.map +1 -0
  237. package/dist/runtime/memory-extra-ops.js +173 -0
  238. package/dist/runtime/memory-extra-ops.js.map +1 -0
  239. package/dist/runtime/orchestrate-ops.d.ts +17 -0
  240. package/dist/runtime/orchestrate-ops.d.ts.map +1 -0
  241. package/dist/runtime/orchestrate-ops.js +246 -0
  242. package/dist/runtime/orchestrate-ops.js.map +1 -0
  243. package/dist/runtime/planning-extra-ops.d.ts +25 -0
  244. package/dist/runtime/planning-extra-ops.d.ts.map +1 -0
  245. package/dist/runtime/planning-extra-ops.js +663 -0
  246. package/dist/runtime/planning-extra-ops.js.map +1 -0
  247. package/dist/runtime/playbook-ops.d.ts +14 -0
  248. package/dist/runtime/playbook-ops.d.ts.map +1 -0
  249. package/dist/runtime/playbook-ops.js +141 -0
  250. package/dist/runtime/playbook-ops.js.map +1 -0
  251. package/dist/runtime/project-ops.d.ts +15 -0
  252. package/dist/runtime/project-ops.d.ts.map +1 -0
  253. package/dist/runtime/project-ops.js +186 -0
  254. package/dist/runtime/project-ops.js.map +1 -0
  255. package/dist/runtime/runtime.d.ts.map +1 -1
  256. package/dist/runtime/runtime.js +65 -3
  257. package/dist/runtime/runtime.js.map +1 -1
  258. package/dist/runtime/types.d.ts +29 -0
  259. package/dist/runtime/types.d.ts.map +1 -1
  260. package/dist/runtime/vault-extra-ops.d.ts +10 -0
  261. package/dist/runtime/vault-extra-ops.d.ts.map +1 -0
  262. package/dist/runtime/vault-extra-ops.js +536 -0
  263. package/dist/runtime/vault-extra-ops.js.map +1 -0
  264. package/dist/telemetry/telemetry.d.ts +48 -0
  265. package/dist/telemetry/telemetry.d.ts.map +1 -0
  266. package/dist/telemetry/telemetry.js +87 -0
  267. package/dist/telemetry/telemetry.js.map +1 -0
  268. package/dist/vault/playbook.d.ts +34 -0
  269. package/dist/vault/playbook.d.ts.map +1 -0
  270. package/dist/vault/playbook.js +60 -0
  271. package/dist/vault/playbook.js.map +1 -0
  272. package/dist/vault/vault.d.ts +97 -4
  273. package/dist/vault/vault.d.ts.map +1 -1
  274. package/dist/vault/vault.js +424 -65
  275. package/dist/vault/vault.js.map +1 -1
  276. package/package.json +7 -3
  277. package/src/__tests__/admin-extra-ops.test.ts +467 -0
  278. package/src/__tests__/admin-ops.test.ts +271 -0
  279. package/src/__tests__/brain-intelligence.test.ts +205 -0
  280. package/src/__tests__/brain.test.ts +134 -3
  281. package/src/__tests__/capture-ops.test.ts +509 -0
  282. package/src/__tests__/cognee-integration.test.ts +80 -0
  283. package/src/__tests__/cognee-sync-manager.test.ts +103 -0
  284. package/src/__tests__/core-ops.test.ts +292 -2
  285. package/src/__tests__/curator-extra-ops.test.ts +381 -0
  286. package/src/__tests__/domain-ops.test.ts +66 -0
  287. package/src/__tests__/errors.test.ts +388 -0
  288. package/src/__tests__/governance.test.ts +522 -0
  289. package/src/__tests__/grading-ops.test.ts +361 -0
  290. package/src/__tests__/identity-manager.test.ts +243 -0
  291. package/src/__tests__/intake-pipeline.test.ts +162 -0
  292. package/src/__tests__/intent-router.test.ts +222 -0
  293. package/src/__tests__/logger.test.ts +200 -0
  294. package/src/__tests__/loop-ops.test.ts +469 -0
  295. package/src/__tests__/memory-cross-project-ops.test.ts +248 -0
  296. package/src/__tests__/memory-extra-ops.test.ts +352 -0
  297. package/src/__tests__/orchestrate-ops.test.ts +289 -0
  298. package/src/__tests__/persistence.test.ts +225 -0
  299. package/src/__tests__/planner.test.ts +416 -7
  300. package/src/__tests__/planning-extra-ops.test.ts +706 -0
  301. package/src/__tests__/playbook-registry.test.ts +326 -0
  302. package/src/__tests__/playbook-seeder.test.ts +163 -0
  303. package/src/__tests__/playbook.test.ts +389 -0
  304. package/src/__tests__/project-ops.test.ts +381 -0
  305. package/src/__tests__/template-manager.test.ts +222 -0
  306. package/src/__tests__/vault-extra-ops.test.ts +482 -0
  307. package/src/brain/brain.ts +185 -16
  308. package/src/brain/intelligence.ts +179 -10
  309. package/src/brain/types.ts +40 -2
  310. package/src/cognee/client.ts +18 -0
  311. package/src/cognee/sync-manager.ts +389 -0
  312. package/src/control/identity-manager.ts +354 -0
  313. package/src/control/intent-router.ts +326 -0
  314. package/src/control/types.ts +102 -0
  315. package/src/curator/curator.ts +295 -1
  316. package/src/errors/classify.ts +102 -0
  317. package/src/errors/index.ts +5 -0
  318. package/src/errors/retry.ts +132 -0
  319. package/src/errors/types.ts +81 -0
  320. package/src/governance/governance.ts +698 -0
  321. package/src/governance/index.ts +18 -0
  322. package/src/governance/types.ts +111 -0
  323. package/src/index.ts +213 -2
  324. package/src/intake/content-classifier.ts +146 -0
  325. package/src/intake/dedup-gate.ts +92 -0
  326. package/src/intake/intake-pipeline.ts +503 -0
  327. package/src/intake/types.ts +69 -0
  328. package/src/intelligence/loader.ts +1 -1
  329. package/src/intelligence/types.ts +3 -1
  330. package/src/logging/logger.ts +154 -0
  331. package/src/logging/types.ts +21 -0
  332. package/src/loop/loop-manager.ts +448 -0
  333. package/src/loop/types.ts +115 -0
  334. package/src/persistence/index.ts +7 -0
  335. package/src/persistence/sqlite-provider.ts +62 -0
  336. package/src/persistence/types.ts +44 -0
  337. package/src/planning/gap-analysis.ts +775 -0
  338. package/src/planning/gap-types.ts +61 -0
  339. package/src/planning/planner.ts +1273 -24
  340. package/src/playbooks/generic/brainstorming.ts +110 -0
  341. package/src/playbooks/generic/code-review.ts +181 -0
  342. package/src/playbooks/generic/subagent-execution.ts +74 -0
  343. package/src/playbooks/generic/systematic-debugging.ts +92 -0
  344. package/src/playbooks/generic/tdd.ts +75 -0
  345. package/src/playbooks/generic/verification.ts +79 -0
  346. package/src/playbooks/index.ts +27 -0
  347. package/src/playbooks/playbook-registry.ts +284 -0
  348. package/src/playbooks/playbook-seeder.ts +119 -0
  349. package/src/playbooks/playbook-types.ts +162 -0
  350. package/src/project/project-registry.ts +370 -0
  351. package/src/project/types.ts +31 -0
  352. package/src/prompts/index.ts +3 -0
  353. package/src/prompts/parser.ts +59 -0
  354. package/src/prompts/template-manager.ts +77 -0
  355. package/src/prompts/types.ts +28 -0
  356. package/src/runtime/admin-extra-ops.ts +652 -0
  357. package/src/runtime/admin-ops.ts +340 -0
  358. package/src/runtime/capture-ops.ts +404 -0
  359. package/src/runtime/cognee-sync-ops.ts +63 -0
  360. package/src/runtime/core-ops.ts +787 -9
  361. package/src/runtime/curator-extra-ops.ts +85 -0
  362. package/src/runtime/domain-ops.ts +67 -15
  363. package/src/runtime/grading-ops.ts +130 -0
  364. package/src/runtime/intake-ops.ts +126 -0
  365. package/src/runtime/loop-ops.ts +277 -0
  366. package/src/runtime/memory-cross-project-ops.ts +191 -0
  367. package/src/runtime/memory-extra-ops.ts +186 -0
  368. package/src/runtime/orchestrate-ops.ts +278 -0
  369. package/src/runtime/planning-extra-ops.ts +718 -0
  370. package/src/runtime/playbook-ops.ts +169 -0
  371. package/src/runtime/project-ops.ts +202 -0
  372. package/src/runtime/runtime.ts +77 -3
  373. package/src/runtime/types.ts +29 -0
  374. package/src/runtime/vault-extra-ops.ts +606 -0
  375. package/src/telemetry/telemetry.ts +118 -0
  376. package/src/vault/playbook.ts +87 -0
  377. package/src/vault/vault.ts +575 -98
@@ -0,0 +1,503 @@
1
+ // ─── Intake Pipeline ──────────────────────────────────────────────
2
+ //
3
+ // 6-stage pipeline for ingesting PDF books into the vault:
4
+ // 1. Parse PDF + compute hash + create chunks → job record
5
+ // 2. Extract page text for each chunk
6
+ // 3. Classify chunk text via LLM
7
+ // 4. Dedup classified items against vault
8
+ // 5. Store unique items in vault
9
+ // 6. Finalize job with aggregate stats
10
+ //
11
+ // SQLite-backed job tracking for resumable processing.
12
+
13
+ import { createHash, randomUUID } from 'node:crypto';
14
+ import { readFileSync, statSync } from 'node:fs';
15
+ import type { PersistenceProvider } from '../persistence/types.js';
16
+ import type { Vault } from '../vault/vault.js';
17
+ import type { LLMClient } from '../llm/llm-client.js';
18
+ import type { IntelligenceEntry } from '../intelligence/types.js';
19
+ import type {
20
+ IntakeConfig,
21
+ IntakeChunk,
22
+ IntakeJobRecord,
23
+ IntakePreviewResult,
24
+ ClassifiedItem,
25
+ KnowledgeType,
26
+ } from './types.js';
27
+ import { classifyChunk } from './content-classifier.js';
28
+ import { dedupItems } from './dedup-gate.js';
29
+
30
+ // =============================================================================
31
+ // CONSTANTS
32
+ // =============================================================================
33
+
34
+ const DEFAULT_CHUNK_SIZE = 10;
35
+
36
+ /**
37
+ * Map KnowledgeType → IntelligenceEntry.type.
38
+ * Only 'pattern' and 'anti-pattern' map directly; everything else becomes 'rule'.
39
+ */
40
+ function mapKnowledgeType(kt: KnowledgeType): IntelligenceEntry['type'] {
41
+ if (kt === 'pattern') return 'pattern';
42
+ if (kt === 'anti-pattern') return 'anti-pattern';
43
+ return 'rule';
44
+ }
45
+
46
+ // =============================================================================
47
+ // HELPERS
48
+ // =============================================================================
49
+
50
+ /**
51
+ * Split concatenated PDF text into per-page segments.
52
+ *
53
+ * Strategy: split on form-feed characters first (common in pdf-parse output).
54
+ * If that yields fewer segments than expected, fall back to equal-length splits.
55
+ */
56
+ export function splitIntoPages(text: string, numPages: number): string[] {
57
+ if (numPages <= 0) return [text];
58
+
59
+ // Try form-feed split first
60
+ const ffPages = text.split('\f');
61
+ if (ffPages.length >= numPages) {
62
+ return ffPages.slice(0, numPages);
63
+ }
64
+
65
+ // Fallback: equal-length chunks
66
+ const chunkSize = Math.ceil(text.length / numPages);
67
+ const pages: string[] = [];
68
+ for (let i = 0; i < text.length; i += chunkSize) {
69
+ pages.push(text.slice(i, i + chunkSize));
70
+ }
71
+ // Pad with empty strings if we somehow got fewer
72
+ while (pages.length < numPages) {
73
+ pages.push('');
74
+ }
75
+ return pages;
76
+ }
77
+
78
+ // =============================================================================
79
+ // PIPELINE
80
+ // =============================================================================
81
+
82
+ export class IntakePipeline {
83
+ private provider: PersistenceProvider;
84
+ private vault: Vault;
85
+ private llm: LLMClient;
86
+
87
+ constructor(provider: PersistenceProvider, vault: Vault, llm: LLMClient) {
88
+ this.provider = provider;
89
+ this.vault = vault;
90
+ this.llm = llm;
91
+ this.initSchema();
92
+ }
93
+
94
+ // ─── Schema ──────────────────────────────────────────────────────
95
+
96
+ private initSchema(): void {
97
+ this.provider.execSql(`
98
+ CREATE TABLE IF NOT EXISTS intake_jobs (
99
+ id TEXT PRIMARY KEY,
100
+ status TEXT NOT NULL,
101
+ config TEXT NOT NULL,
102
+ pdf_meta TEXT,
103
+ toc TEXT,
104
+ stats TEXT,
105
+ created_at INTEGER,
106
+ updated_at INTEGER,
107
+ completed_at INTEGER
108
+ );
109
+
110
+ CREATE TABLE IF NOT EXISTS intake_chunks (
111
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
112
+ job_id TEXT NOT NULL REFERENCES intake_jobs(id),
113
+ chunk_index INTEGER,
114
+ title TEXT,
115
+ page_start INTEGER,
116
+ page_end INTEGER,
117
+ status TEXT DEFAULT 'pending',
118
+ items_extracted INTEGER DEFAULT 0,
119
+ items_stored INTEGER DEFAULT 0,
120
+ items_deduped INTEGER DEFAULT 0,
121
+ error TEXT,
122
+ processed_at INTEGER
123
+ );
124
+ `);
125
+ }
126
+
127
+ // ─── Stage 1: Ingest Book ────────────────────────────────────────
128
+
129
+ /**
130
+ * Parse a PDF, compute its file hash, create fixed-size page chunks,
131
+ * and persist the job + chunk records to the database.
132
+ */
133
+ async ingestBook(config: IntakeConfig): Promise<IntakeJobRecord> {
134
+ const jobId = randomUUID();
135
+ const now = Math.floor(Date.now() / 1000);
136
+ const chunkPageSize = config.chunkPageSize ?? DEFAULT_CHUNK_SIZE;
137
+
138
+ // Read file
139
+ const fileBuffer = readFileSync(config.pdfPath);
140
+ const fileSize = statSync(config.pdfPath).size;
141
+ const fileHash = createHash('sha256').update(fileBuffer).digest('hex');
142
+
143
+ // Dynamic import of pdf-parse
144
+ const pdfParse = (await import('pdf-parse')).default;
145
+ const pdfData = await pdfParse(fileBuffer);
146
+ const totalPages = pdfData.numpages;
147
+
148
+ const pdfMeta = { totalPages, fileHash, fileSize };
149
+
150
+ // Create chunk definitions (fixed N-page windows)
151
+ const numChunks = Math.ceil(totalPages / chunkPageSize);
152
+
153
+ this.provider.transaction(() => {
154
+ // Insert job
155
+ this.provider.run(
156
+ `INSERT INTO intake_jobs (id, status, config, pdf_meta, toc, stats, created_at, updated_at, completed_at)
157
+ VALUES (@id, @status, @config, @pdfMeta, @toc, @stats, @createdAt, @updatedAt, @completedAt)`,
158
+ {
159
+ id: jobId,
160
+ status: 'initialized',
161
+ config: JSON.stringify(config),
162
+ pdfMeta: JSON.stringify(pdfMeta),
163
+ toc: null,
164
+ stats: null,
165
+ createdAt: now,
166
+ updatedAt: now,
167
+ completedAt: null,
168
+ },
169
+ );
170
+
171
+ // Insert chunk records
172
+ for (let i = 0; i < numChunks; i++) {
173
+ const pageStart = i * chunkPageSize + 1;
174
+ const pageEnd = Math.min((i + 1) * chunkPageSize, totalPages);
175
+ const chunkTitle = `${config.title} — pages ${pageStart}-${pageEnd}`;
176
+
177
+ this.provider.run(
178
+ `INSERT INTO intake_chunks (job_id, chunk_index, title, page_start, page_end, status)
179
+ VALUES (@jobId, @chunkIndex, @title, @pageStart, @pageEnd, @status)`,
180
+ {
181
+ jobId,
182
+ chunkIndex: i,
183
+ title: chunkTitle,
184
+ pageStart,
185
+ pageEnd,
186
+ status: 'pending',
187
+ },
188
+ );
189
+ }
190
+ });
191
+
192
+ return this.getJob(jobId)!;
193
+ }
194
+
195
+ // ─── Stages 2-5: Process Chunks ──────────────────────────────────
196
+
197
+ /**
198
+ * Process up to `count` pending chunks for a job.
199
+ *
200
+ * For each chunk:
201
+ * 2. Extract page text from PDF
202
+ * 3. Classify via LLM
203
+ * 4. Dedup against vault
204
+ * 5. Store unique items
205
+ *
206
+ * When all chunks are done, finalizes the job (stage 6).
207
+ */
208
+ async processChunks(
209
+ jobId: string,
210
+ count: number = 5,
211
+ ): Promise<{
212
+ processed: number;
213
+ itemsStored: number;
214
+ itemsDeduped: number;
215
+ remaining: number;
216
+ }> {
217
+ // Get pending chunks
218
+ const pendingChunks = this.provider.all<Record<string, unknown>>(
219
+ `SELECT * FROM intake_chunks WHERE job_id = @jobId AND status = 'pending' ORDER BY chunk_index ASC LIMIT @limit`,
220
+ { jobId, limit: count },
221
+ );
222
+
223
+ if (pendingChunks.length === 0) {
224
+ const remaining = this.countPendingChunks(jobId);
225
+ return { processed: 0, itemsStored: 0, itemsDeduped: 0, remaining };
226
+ }
227
+
228
+ // Mark job as processing
229
+ this.provider.run(
230
+ `UPDATE intake_jobs SET status = 'processing', updated_at = @now WHERE id = @id`,
231
+ { id: jobId, now: Math.floor(Date.now() / 1000) },
232
+ );
233
+
234
+ // Re-read config and parse PDF
235
+ const job = this.getJob(jobId);
236
+ if (!job) {
237
+ return { processed: 0, itemsStored: 0, itemsDeduped: 0, remaining: 0 };
238
+ }
239
+
240
+ const fileBuffer = readFileSync(job.config.pdfPath);
241
+ const pdfParse = (await import('pdf-parse')).default;
242
+ const pdfData = await pdfParse(fileBuffer);
243
+ const totalPages = job.pdfMeta?.totalPages ?? pdfData.numpages;
244
+ const pages = splitIntoPages(pdfData.text, totalPages);
245
+
246
+ let totalStored = 0;
247
+ let totalDeduped = 0;
248
+ let processed = 0;
249
+
250
+ for (const chunkRow of pendingChunks) {
251
+ const chunkId = chunkRow.id as number;
252
+ const chunkIndex = chunkRow.chunk_index as number;
253
+ const pageStart = chunkRow.page_start as number;
254
+ const pageEnd = chunkRow.page_end as number;
255
+
256
+ try {
257
+ // Mark chunk processing
258
+ this.provider.run(`UPDATE intake_chunks SET status = 'processing' WHERE id = @id`, {
259
+ id: chunkId,
260
+ });
261
+
262
+ // Stage 2: Extract page text (1-indexed → 0-indexed)
263
+ const chunkText = pages.slice(pageStart - 1, pageEnd).join('\n\n');
264
+ const citation = `${job.config.title}, pages ${pageStart}-${pageEnd}`;
265
+
266
+ // Stage 3: Classify
267
+ const classifiedItems = await classifyChunk(this.llm, chunkText, citation);
268
+
269
+ // Stage 4: Dedup
270
+ const dedupResults = dedupItems(classifiedItems, this.vault);
271
+ const uniqueItems = dedupResults.filter((r) => !r.isDuplicate);
272
+ const dupCount = dedupResults.filter((r) => r.isDuplicate).length;
273
+
274
+ // Stage 5: Store unique items in vault
275
+ let storedCount = 0;
276
+ for (let itemIdx = 0; itemIdx < uniqueItems.length; itemIdx++) {
277
+ const result = uniqueItems[itemIdx];
278
+ const entry = classifiedItemToEntry(
279
+ result.item,
280
+ job.config.domain,
281
+ jobId,
282
+ chunkIndex,
283
+ itemIdx,
284
+ job.config.tags,
285
+ );
286
+ this.vault.add(entry);
287
+ storedCount++;
288
+ }
289
+
290
+ // Update chunk record
291
+ const now = Math.floor(Date.now() / 1000);
292
+ this.provider.run(
293
+ `UPDATE intake_chunks
294
+ SET status = 'completed', items_extracted = @extracted, items_stored = @stored, items_deduped = @deduped, processed_at = @now
295
+ WHERE id = @id`,
296
+ {
297
+ id: chunkId,
298
+ extracted: classifiedItems.length,
299
+ stored: storedCount,
300
+ deduped: dupCount,
301
+ now,
302
+ },
303
+ );
304
+
305
+ totalStored += storedCount;
306
+ totalDeduped += dupCount;
307
+ processed++;
308
+ } catch (err) {
309
+ // Graceful degradation: mark chunk as failed, continue with others
310
+ const errorMsg = err instanceof Error ? err.message : String(err);
311
+ this.provider.run(
312
+ `UPDATE intake_chunks SET status = 'failed', error = @error, processed_at = @now WHERE id = @id`,
313
+ { id: chunkId, error: errorMsg, now: Math.floor(Date.now() / 1000) },
314
+ );
315
+ processed++;
316
+ }
317
+ }
318
+
319
+ // Update job timestamp
320
+ this.provider.run(`UPDATE intake_jobs SET updated_at = @now WHERE id = @id`, {
321
+ id: jobId,
322
+ now: Math.floor(Date.now() / 1000),
323
+ });
324
+
325
+ // Check remaining
326
+ const remaining = this.countPendingChunks(jobId);
327
+ if (remaining === 0) {
328
+ this.finalizeJob(jobId);
329
+ }
330
+
331
+ return { processed, itemsStored: totalStored, itemsDeduped: totalDeduped, remaining };
332
+ }
333
+
334
+ // ─── Preview ─────────────────────────────────────────────────────
335
+
336
+ /**
337
+ * Parse a page range from a PDF and classify it without storing.
338
+ * Useful for previewing what the pipeline would extract.
339
+ */
340
+ async preview(
341
+ config: IntakeConfig,
342
+ pageStart: number,
343
+ pageEnd: number,
344
+ ): Promise<IntakePreviewResult> {
345
+ const fileBuffer = readFileSync(config.pdfPath);
346
+ const pdfParse = (await import('pdf-parse')).default;
347
+ const pdfData = await pdfParse(fileBuffer);
348
+ const totalPages = pdfData.numpages;
349
+
350
+ const pages = splitIntoPages(pdfData.text, totalPages);
351
+ const chunkText = pages.slice(pageStart - 1, pageEnd).join('\n\n');
352
+ const citation = `${config.title}, pages ${pageStart}-${pageEnd}`;
353
+
354
+ const items = await classifyChunk(this.llm, chunkText, citation);
355
+
356
+ return {
357
+ items,
358
+ chunkText,
359
+ pageRange: { start: pageStart, end: pageEnd },
360
+ };
361
+ }
362
+
363
+ // ─── Queries ─────────────────────────────────────────────────────
364
+
365
+ /**
366
+ * Get a job record by ID.
367
+ */
368
+ getJob(jobId: string): IntakeJobRecord | null {
369
+ const row = this.provider.get<Record<string, unknown>>(
370
+ 'SELECT * FROM intake_jobs WHERE id = @id',
371
+ { id: jobId },
372
+ );
373
+ return row ? rowToJobRecord(row) : null;
374
+ }
375
+
376
+ /**
377
+ * List all intake jobs.
378
+ */
379
+ listJobs(): IntakeJobRecord[] {
380
+ const rows = this.provider.all<Record<string, unknown>>(
381
+ 'SELECT * FROM intake_jobs ORDER BY created_at DESC',
382
+ );
383
+ return rows.map(rowToJobRecord);
384
+ }
385
+
386
+ /**
387
+ * Get all chunks for a job.
388
+ */
389
+ getChunks(jobId: string): IntakeChunk[] {
390
+ const rows = this.provider.all<Record<string, unknown>>(
391
+ 'SELECT * FROM intake_chunks WHERE job_id = @jobId ORDER BY chunk_index ASC',
392
+ { jobId },
393
+ );
394
+ return rows.map(rowToChunk);
395
+ }
396
+
397
+ // ─── Stage 6: Finalize ──────────────────────────────────────────
398
+
399
+ /**
400
+ * Sum stats from all chunks and mark the job as completed.
401
+ */
402
+ private finalizeJob(jobId: string): void {
403
+ const chunks = this.provider.all<Record<string, unknown>>(
404
+ 'SELECT * FROM intake_chunks WHERE job_id = @jobId',
405
+ { jobId },
406
+ );
407
+
408
+ let itemsExtracted = 0;
409
+ let itemsStored = 0;
410
+ let itemsDeduped = 0;
411
+ let itemsFailed = 0;
412
+
413
+ for (const chunk of chunks) {
414
+ const status = chunk.status as string;
415
+ if (status === 'completed') {
416
+ itemsExtracted += (chunk.items_extracted as number) ?? 0;
417
+ itemsStored += (chunk.items_stored as number) ?? 0;
418
+ itemsDeduped += (chunk.items_deduped as number) ?? 0;
419
+ } else if (status === 'failed') {
420
+ itemsFailed++;
421
+ }
422
+ }
423
+
424
+ const stats = { itemsExtracted, itemsStored, itemsDeduped, itemsFailed };
425
+ const now = Math.floor(Date.now() / 1000);
426
+
427
+ this.provider.run(
428
+ `UPDATE intake_jobs SET status = 'completed', stats = @stats, updated_at = @now, completed_at = @now WHERE id = @id`,
429
+ { id: jobId, stats: JSON.stringify(stats), now },
430
+ );
431
+ }
432
+
433
+ // ─── Private helpers ─────────────────────────────────────────────
434
+
435
+ private countPendingChunks(jobId: string): number {
436
+ const result = this.provider.get<{ count: number }>(
437
+ `SELECT COUNT(*) as count FROM intake_chunks WHERE job_id = @jobId AND status = 'pending'`,
438
+ { jobId },
439
+ );
440
+ return result?.count ?? 0;
441
+ }
442
+ }
443
+
444
+ // =============================================================================
445
+ // ROW MAPPERS
446
+ // =============================================================================
447
+
448
+ function rowToJobRecord(row: Record<string, unknown>): IntakeJobRecord {
449
+ return {
450
+ id: row.id as string,
451
+ status: row.status as IntakeJobRecord['status'],
452
+ config: JSON.parse(row.config as string) as IntakeConfig,
453
+ pdfMeta: row.pdf_meta ? JSON.parse(row.pdf_meta as string) : null,
454
+ toc: row.toc ? JSON.parse(row.toc as string) : null,
455
+ stats: row.stats ? JSON.parse(row.stats as string) : null,
456
+ createdAt: row.created_at as number,
457
+ updatedAt: row.updated_at as number,
458
+ completedAt: (row.completed_at as number) ?? null,
459
+ };
460
+ }
461
+
462
+ function rowToChunk(row: Record<string, unknown>): IntakeChunk {
463
+ return {
464
+ id: row.id as number,
465
+ jobId: row.job_id as string,
466
+ chunkIndex: row.chunk_index as number,
467
+ title: (row.title as string) ?? null,
468
+ pageStart: row.page_start as number,
469
+ pageEnd: row.page_end as number,
470
+ status: row.status as IntakeChunk['status'],
471
+ itemsExtracted: (row.items_extracted as number) ?? 0,
472
+ itemsStored: (row.items_stored as number) ?? 0,
473
+ itemsDeduped: (row.items_deduped as number) ?? 0,
474
+ error: (row.error as string) ?? null,
475
+ processedAt: (row.processed_at as number) ?? null,
476
+ };
477
+ }
478
+
479
+ /**
480
+ * Convert a ClassifiedItem to an IntelligenceEntry for vault storage.
481
+ */
482
+ function classifiedItemToEntry(
483
+ item: ClassifiedItem,
484
+ domain: string,
485
+ jobId: string,
486
+ chunkIndex: number,
487
+ itemIndex: number,
488
+ extraTags?: string[],
489
+ ): IntelligenceEntry {
490
+ const entryType = mapKnowledgeType(item.type);
491
+ const tags = [...item.tags, ...(extraTags ?? [])];
492
+
493
+ return {
494
+ id: `intake-${jobId}-${chunkIndex}-${itemIndex}`,
495
+ type: entryType,
496
+ domain,
497
+ title: item.title,
498
+ severity: item.severity,
499
+ description: item.description,
500
+ context: item.citation,
501
+ tags,
502
+ };
503
+ }
@@ -0,0 +1,69 @@
1
+ // ─── Intake Pipeline Types ────────────────────────────────────────
2
+
3
+ export type IntakeJobStatus = 'initialized' | 'processing' | 'completed' | 'failed' | 'cancelled';
4
+ export type IntakeChunkStatus = 'pending' | 'processing' | 'completed' | 'failed' | 'skipped';
5
+ export type KnowledgeType =
6
+ | 'pattern'
7
+ | 'anti-pattern'
8
+ | 'principle'
9
+ | 'concept'
10
+ | 'reference'
11
+ | 'workflow'
12
+ | 'idea'
13
+ | 'roadmap';
14
+
15
+ export interface IntakeConfig {
16
+ pdfPath: string;
17
+ title: string;
18
+ author?: string;
19
+ domain: string;
20
+ chunkPageSize?: number;
21
+ tags?: string[];
22
+ }
23
+
24
+ export interface IntakeChunk {
25
+ id: number;
26
+ jobId: string;
27
+ chunkIndex: number;
28
+ title: string | null;
29
+ pageStart: number;
30
+ pageEnd: number;
31
+ status: IntakeChunkStatus;
32
+ itemsExtracted: number;
33
+ itemsStored: number;
34
+ itemsDeduped: number;
35
+ error: string | null;
36
+ processedAt: number | null;
37
+ }
38
+
39
+ export interface ClassifiedItem {
40
+ type: KnowledgeType;
41
+ title: string;
42
+ description: string;
43
+ tags: string[];
44
+ severity: 'critical' | 'warning' | 'suggestion';
45
+ citation: string;
46
+ }
47
+
48
+ export interface IntakeJobRecord {
49
+ id: string;
50
+ status: IntakeJobStatus;
51
+ config: IntakeConfig;
52
+ pdfMeta: { totalPages: number; fileHash: string; fileSize: number } | null;
53
+ toc: Array<{ title: string; page: number }> | null;
54
+ stats: {
55
+ itemsExtracted: number;
56
+ itemsStored: number;
57
+ itemsDeduped: number;
58
+ itemsFailed: number;
59
+ } | null;
60
+ createdAt: number;
61
+ updatedAt: number;
62
+ completedAt: number | null;
63
+ }
64
+
65
+ export interface IntakePreviewResult {
66
+ items: ClassifiedItem[];
67
+ chunkText: string;
68
+ pageRange: { start: number; end: number };
69
+ }
@@ -31,7 +31,7 @@ function validateEntry(entry: IntelligenceEntry): boolean {
31
31
  return (
32
32
  typeof entry.id === 'string' &&
33
33
  entry.id.length > 0 &&
34
- ['pattern', 'anti-pattern', 'rule'].includes(entry.type) &&
34
+ ['pattern', 'anti-pattern', 'rule', 'playbook'].includes(entry.type) &&
35
35
  typeof entry.title === 'string' &&
36
36
  entry.title.length > 0 &&
37
37
  typeof entry.description === 'string' &&
@@ -1,6 +1,6 @@
1
1
  export interface IntelligenceEntry {
2
2
  id: string;
3
- type: 'pattern' | 'anti-pattern' | 'rule';
3
+ type: 'pattern' | 'anti-pattern' | 'rule' | 'playbook';
4
4
  domain: string;
5
5
  title: string;
6
6
  severity: 'critical' | 'warning' | 'suggestion';
@@ -11,6 +11,8 @@ export interface IntelligenceEntry {
11
11
  why?: string;
12
12
  tags: string[];
13
13
  appliesTo?: string[];
14
+ validFrom?: number; // unix epoch — when entry becomes active
15
+ validUntil?: number; // unix epoch — when entry expires (null = never)
14
16
  }
15
17
 
16
18
  export interface IntelligenceBundle {