@soleri/core 2.1.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/brain.d.ts +10 -1
- package/dist/brain/brain.d.ts.map +1 -1
- package/dist/brain/brain.js +116 -13
- package/dist/brain/brain.js.map +1 -1
- package/dist/brain/intelligence.d.ts +36 -1
- package/dist/brain/intelligence.d.ts.map +1 -1
- package/dist/brain/intelligence.js +119 -14
- package/dist/brain/intelligence.js.map +1 -1
- package/dist/brain/types.d.ts +34 -2
- package/dist/brain/types.d.ts.map +1 -1
- package/dist/cognee/client.d.ts +3 -0
- package/dist/cognee/client.d.ts.map +1 -1
- package/dist/cognee/client.js +17 -0
- package/dist/cognee/client.js.map +1 -1
- package/dist/cognee/sync-manager.d.ts +94 -0
- package/dist/cognee/sync-manager.d.ts.map +1 -0
- package/dist/cognee/sync-manager.js +293 -0
- package/dist/cognee/sync-manager.js.map +1 -0
- package/dist/control/identity-manager.d.ts +22 -0
- package/dist/control/identity-manager.d.ts.map +1 -0
- package/dist/control/identity-manager.js +233 -0
- package/dist/control/identity-manager.js.map +1 -0
- package/dist/control/intent-router.d.ts +32 -0
- package/dist/control/intent-router.d.ts.map +1 -0
- package/dist/control/intent-router.js +242 -0
- package/dist/control/intent-router.js.map +1 -0
- package/dist/control/types.d.ts +68 -0
- package/dist/control/types.d.ts.map +1 -0
- package/dist/control/types.js +9 -0
- package/dist/control/types.js.map +1 -0
- package/dist/curator/curator.d.ts +37 -1
- package/dist/curator/curator.d.ts.map +1 -1
- package/dist/curator/curator.js +199 -1
- package/dist/curator/curator.js.map +1 -1
- package/dist/errors/classify.d.ts +13 -0
- package/dist/errors/classify.d.ts.map +1 -0
- package/dist/errors/classify.js +97 -0
- package/dist/errors/classify.js.map +1 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +4 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/errors/retry.d.ts +40 -0
- package/dist/errors/retry.d.ts.map +1 -0
- package/dist/errors/retry.js +97 -0
- package/dist/errors/retry.js.map +1 -0
- package/dist/errors/types.d.ts +48 -0
- package/dist/errors/types.d.ts.map +1 -0
- package/dist/errors/types.js +59 -0
- package/dist/errors/types.js.map +1 -0
- package/dist/facades/types.d.ts +1 -1
- package/dist/governance/governance.d.ts +42 -0
- package/dist/governance/governance.d.ts.map +1 -0
- package/dist/governance/governance.js +488 -0
- package/dist/governance/governance.js.map +1 -0
- package/dist/governance/index.d.ts +3 -0
- package/dist/governance/index.d.ts.map +1 -0
- package/dist/governance/index.js +2 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/governance/types.d.ts +102 -0
- package/dist/governance/types.d.ts.map +1 -0
- package/dist/governance/types.js +3 -0
- package/dist/governance/types.js.map +1 -0
- package/dist/index.d.ts +52 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +47 -1
- package/dist/index.js.map +1 -1
- package/dist/intake/content-classifier.d.ts +14 -0
- package/dist/intake/content-classifier.d.ts.map +1 -0
- package/dist/intake/content-classifier.js +125 -0
- package/dist/intake/content-classifier.js.map +1 -0
- package/dist/intake/dedup-gate.d.ts +17 -0
- package/dist/intake/dedup-gate.d.ts.map +1 -0
- package/dist/intake/dedup-gate.js +66 -0
- package/dist/intake/dedup-gate.js.map +1 -0
- package/dist/intake/intake-pipeline.d.ts +63 -0
- package/dist/intake/intake-pipeline.d.ts.map +1 -0
- package/dist/intake/intake-pipeline.js +373 -0
- package/dist/intake/intake-pipeline.js.map +1 -0
- package/dist/intake/types.d.ts +65 -0
- package/dist/intake/types.d.ts.map +1 -0
- package/dist/intake/types.js +3 -0
- package/dist/intake/types.js.map +1 -0
- package/dist/intelligence/loader.js +1 -1
- package/dist/intelligence/loader.js.map +1 -1
- package/dist/intelligence/types.d.ts +3 -1
- package/dist/intelligence/types.d.ts.map +1 -1
- package/dist/logging/logger.d.ts +37 -0
- package/dist/logging/logger.d.ts.map +1 -0
- package/dist/logging/logger.js +145 -0
- package/dist/logging/logger.js.map +1 -0
- package/dist/logging/types.d.ts +19 -0
- package/dist/logging/types.d.ts.map +1 -0
- package/dist/logging/types.js +2 -0
- package/dist/logging/types.js.map +1 -0
- package/dist/loop/loop-manager.d.ts +100 -0
- package/dist/loop/loop-manager.d.ts.map +1 -0
- package/dist/loop/loop-manager.js +379 -0
- package/dist/loop/loop-manager.js.map +1 -0
- package/dist/loop/types.d.ts +103 -0
- package/dist/loop/types.d.ts.map +1 -0
- package/dist/loop/types.js +11 -0
- package/dist/loop/types.js.map +1 -0
- package/dist/persistence/index.d.ts +3 -0
- package/dist/persistence/index.d.ts.map +1 -0
- package/dist/persistence/index.js +2 -0
- package/dist/persistence/index.js.map +1 -0
- package/dist/persistence/sqlite-provider.d.ts +25 -0
- package/dist/persistence/sqlite-provider.d.ts.map +1 -0
- package/dist/persistence/sqlite-provider.js +59 -0
- package/dist/persistence/sqlite-provider.js.map +1 -0
- package/dist/persistence/types.d.ts +36 -0
- package/dist/persistence/types.d.ts.map +1 -0
- package/dist/persistence/types.js +8 -0
- package/dist/persistence/types.js.map +1 -0
- package/dist/planning/gap-analysis.d.ts +72 -0
- package/dist/planning/gap-analysis.d.ts.map +1 -0
- package/dist/planning/gap-analysis.js +442 -0
- package/dist/planning/gap-analysis.js.map +1 -0
- package/dist/planning/gap-types.d.ts +29 -0
- package/dist/planning/gap-types.d.ts.map +1 -0
- package/dist/planning/gap-types.js +28 -0
- package/dist/planning/gap-types.js.map +1 -0
- package/dist/planning/planner.d.ts +421 -4
- package/dist/planning/planner.d.ts.map +1 -1
- package/dist/planning/planner.js +949 -21
- package/dist/planning/planner.js.map +1 -1
- package/dist/playbooks/generic/brainstorming.d.ts +9 -0
- package/dist/playbooks/generic/brainstorming.d.ts.map +1 -0
- package/dist/playbooks/generic/brainstorming.js +105 -0
- package/dist/playbooks/generic/brainstorming.js.map +1 -0
- package/dist/playbooks/generic/code-review.d.ts +11 -0
- package/dist/playbooks/generic/code-review.d.ts.map +1 -0
- package/dist/playbooks/generic/code-review.js +176 -0
- package/dist/playbooks/generic/code-review.js.map +1 -0
- package/dist/playbooks/generic/subagent-execution.d.ts +9 -0
- package/dist/playbooks/generic/subagent-execution.d.ts.map +1 -0
- package/dist/playbooks/generic/subagent-execution.js +68 -0
- package/dist/playbooks/generic/subagent-execution.js.map +1 -0
- package/dist/playbooks/generic/systematic-debugging.d.ts +9 -0
- package/dist/playbooks/generic/systematic-debugging.d.ts.map +1 -0
- package/dist/playbooks/generic/systematic-debugging.js +87 -0
- package/dist/playbooks/generic/systematic-debugging.js.map +1 -0
- package/dist/playbooks/generic/tdd.d.ts +9 -0
- package/dist/playbooks/generic/tdd.d.ts.map +1 -0
- package/dist/playbooks/generic/tdd.js +70 -0
- package/dist/playbooks/generic/tdd.js.map +1 -0
- package/dist/playbooks/generic/verification.d.ts +9 -0
- package/dist/playbooks/generic/verification.d.ts.map +1 -0
- package/dist/playbooks/generic/verification.js +74 -0
- package/dist/playbooks/generic/verification.js.map +1 -0
- package/dist/playbooks/index.d.ts +4 -0
- package/dist/playbooks/index.d.ts.map +1 -0
- package/dist/playbooks/index.js +5 -0
- package/dist/playbooks/index.js.map +1 -0
- package/dist/playbooks/playbook-registry.d.ts +42 -0
- package/dist/playbooks/playbook-registry.d.ts.map +1 -0
- package/dist/playbooks/playbook-registry.js +227 -0
- package/dist/playbooks/playbook-registry.js.map +1 -0
- package/dist/playbooks/playbook-seeder.d.ts +47 -0
- package/dist/playbooks/playbook-seeder.d.ts.map +1 -0
- package/dist/playbooks/playbook-seeder.js +104 -0
- package/dist/playbooks/playbook-seeder.js.map +1 -0
- package/dist/playbooks/playbook-types.d.ts +132 -0
- package/dist/playbooks/playbook-types.d.ts.map +1 -0
- package/dist/playbooks/playbook-types.js +12 -0
- package/dist/playbooks/playbook-types.js.map +1 -0
- package/dist/project/project-registry.d.ts +79 -0
- package/dist/project/project-registry.d.ts.map +1 -0
- package/dist/project/project-registry.js +274 -0
- package/dist/project/project-registry.js.map +1 -0
- package/dist/project/types.d.ts +28 -0
- package/dist/project/types.d.ts.map +1 -0
- package/dist/project/types.js +5 -0
- package/dist/project/types.js.map +1 -0
- package/dist/prompts/index.d.ts +4 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +3 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/parser.d.ts +17 -0
- package/dist/prompts/parser.d.ts.map +1 -0
- package/dist/prompts/parser.js +47 -0
- package/dist/prompts/parser.js.map +1 -0
- package/dist/prompts/template-manager.d.ts +25 -0
- package/dist/prompts/template-manager.d.ts.map +1 -0
- package/dist/prompts/template-manager.js +71 -0
- package/dist/prompts/template-manager.js.map +1 -0
- package/dist/prompts/types.d.ts +26 -0
- package/dist/prompts/types.d.ts.map +1 -0
- package/dist/prompts/types.js +5 -0
- package/dist/prompts/types.js.map +1 -0
- package/dist/runtime/admin-extra-ops.d.ts +15 -0
- package/dist/runtime/admin-extra-ops.d.ts.map +1 -0
- package/dist/runtime/admin-extra-ops.js +595 -0
- package/dist/runtime/admin-extra-ops.js.map +1 -0
- package/dist/runtime/admin-ops.d.ts +15 -0
- package/dist/runtime/admin-ops.d.ts.map +1 -0
- package/dist/runtime/admin-ops.js +329 -0
- package/dist/runtime/admin-ops.js.map +1 -0
- package/dist/runtime/capture-ops.d.ts +15 -0
- package/dist/runtime/capture-ops.d.ts.map +1 -0
- package/dist/runtime/capture-ops.js +363 -0
- package/dist/runtime/capture-ops.js.map +1 -0
- package/dist/runtime/cognee-sync-ops.d.ts +12 -0
- package/dist/runtime/cognee-sync-ops.d.ts.map +1 -0
- package/dist/runtime/cognee-sync-ops.js +55 -0
- package/dist/runtime/cognee-sync-ops.js.map +1 -0
- package/dist/runtime/core-ops.d.ts +9 -3
- package/dist/runtime/core-ops.d.ts.map +1 -1
- package/dist/runtime/core-ops.js +693 -10
- package/dist/runtime/core-ops.js.map +1 -1
- package/dist/runtime/curator-extra-ops.d.ts +9 -0
- package/dist/runtime/curator-extra-ops.d.ts.map +1 -0
- package/dist/runtime/curator-extra-ops.js +71 -0
- package/dist/runtime/curator-extra-ops.js.map +1 -0
- package/dist/runtime/domain-ops.d.ts.map +1 -1
- package/dist/runtime/domain-ops.js +61 -15
- package/dist/runtime/domain-ops.js.map +1 -1
- package/dist/runtime/grading-ops.d.ts +14 -0
- package/dist/runtime/grading-ops.d.ts.map +1 -0
- package/dist/runtime/grading-ops.js +105 -0
- package/dist/runtime/grading-ops.js.map +1 -0
- package/dist/runtime/intake-ops.d.ts +14 -0
- package/dist/runtime/intake-ops.d.ts.map +1 -0
- package/dist/runtime/intake-ops.js +110 -0
- package/dist/runtime/intake-ops.js.map +1 -0
- package/dist/runtime/loop-ops.d.ts +14 -0
- package/dist/runtime/loop-ops.d.ts.map +1 -0
- package/dist/runtime/loop-ops.js +251 -0
- package/dist/runtime/loop-ops.js.map +1 -0
- package/dist/runtime/memory-cross-project-ops.d.ts +12 -0
- package/dist/runtime/memory-cross-project-ops.d.ts.map +1 -0
- package/dist/runtime/memory-cross-project-ops.js +165 -0
- package/dist/runtime/memory-cross-project-ops.js.map +1 -0
- package/dist/runtime/memory-extra-ops.d.ts +13 -0
- package/dist/runtime/memory-extra-ops.d.ts.map +1 -0
- package/dist/runtime/memory-extra-ops.js +173 -0
- package/dist/runtime/memory-extra-ops.js.map +1 -0
- package/dist/runtime/orchestrate-ops.d.ts +17 -0
- package/dist/runtime/orchestrate-ops.d.ts.map +1 -0
- package/dist/runtime/orchestrate-ops.js +246 -0
- package/dist/runtime/orchestrate-ops.js.map +1 -0
- package/dist/runtime/planning-extra-ops.d.ts +25 -0
- package/dist/runtime/planning-extra-ops.d.ts.map +1 -0
- package/dist/runtime/planning-extra-ops.js +663 -0
- package/dist/runtime/planning-extra-ops.js.map +1 -0
- package/dist/runtime/playbook-ops.d.ts +14 -0
- package/dist/runtime/playbook-ops.d.ts.map +1 -0
- package/dist/runtime/playbook-ops.js +141 -0
- package/dist/runtime/playbook-ops.js.map +1 -0
- package/dist/runtime/project-ops.d.ts +15 -0
- package/dist/runtime/project-ops.d.ts.map +1 -0
- package/dist/runtime/project-ops.js +186 -0
- package/dist/runtime/project-ops.js.map +1 -0
- package/dist/runtime/runtime.d.ts.map +1 -1
- package/dist/runtime/runtime.js +65 -3
- package/dist/runtime/runtime.js.map +1 -1
- package/dist/runtime/types.d.ts +29 -0
- package/dist/runtime/types.d.ts.map +1 -1
- package/dist/runtime/vault-extra-ops.d.ts +10 -0
- package/dist/runtime/vault-extra-ops.d.ts.map +1 -0
- package/dist/runtime/vault-extra-ops.js +536 -0
- package/dist/runtime/vault-extra-ops.js.map +1 -0
- package/dist/telemetry/telemetry.d.ts +48 -0
- package/dist/telemetry/telemetry.d.ts.map +1 -0
- package/dist/telemetry/telemetry.js +87 -0
- package/dist/telemetry/telemetry.js.map +1 -0
- package/dist/vault/playbook.d.ts +34 -0
- package/dist/vault/playbook.d.ts.map +1 -0
- package/dist/vault/playbook.js +60 -0
- package/dist/vault/playbook.js.map +1 -0
- package/dist/vault/vault.d.ts +97 -4
- package/dist/vault/vault.d.ts.map +1 -1
- package/dist/vault/vault.js +424 -65
- package/dist/vault/vault.js.map +1 -1
- package/package.json +7 -3
- package/src/__tests__/admin-extra-ops.test.ts +467 -0
- package/src/__tests__/admin-ops.test.ts +271 -0
- package/src/__tests__/brain-intelligence.test.ts +205 -0
- package/src/__tests__/brain.test.ts +134 -3
- package/src/__tests__/capture-ops.test.ts +509 -0
- package/src/__tests__/cognee-integration.test.ts +80 -0
- package/src/__tests__/cognee-sync-manager.test.ts +103 -0
- package/src/__tests__/core-ops.test.ts +292 -2
- package/src/__tests__/curator-extra-ops.test.ts +381 -0
- package/src/__tests__/domain-ops.test.ts +66 -0
- package/src/__tests__/errors.test.ts +388 -0
- package/src/__tests__/governance.test.ts +522 -0
- package/src/__tests__/grading-ops.test.ts +361 -0
- package/src/__tests__/identity-manager.test.ts +243 -0
- package/src/__tests__/intake-pipeline.test.ts +162 -0
- package/src/__tests__/intent-router.test.ts +222 -0
- package/src/__tests__/logger.test.ts +200 -0
- package/src/__tests__/loop-ops.test.ts +469 -0
- package/src/__tests__/memory-cross-project-ops.test.ts +248 -0
- package/src/__tests__/memory-extra-ops.test.ts +352 -0
- package/src/__tests__/orchestrate-ops.test.ts +289 -0
- package/src/__tests__/persistence.test.ts +225 -0
- package/src/__tests__/planner.test.ts +416 -7
- package/src/__tests__/planning-extra-ops.test.ts +706 -0
- package/src/__tests__/playbook-registry.test.ts +326 -0
- package/src/__tests__/playbook-seeder.test.ts +163 -0
- package/src/__tests__/playbook.test.ts +389 -0
- package/src/__tests__/project-ops.test.ts +381 -0
- package/src/__tests__/template-manager.test.ts +222 -0
- package/src/__tests__/vault-extra-ops.test.ts +482 -0
- package/src/brain/brain.ts +185 -16
- package/src/brain/intelligence.ts +179 -10
- package/src/brain/types.ts +40 -2
- package/src/cognee/client.ts +18 -0
- package/src/cognee/sync-manager.ts +389 -0
- package/src/control/identity-manager.ts +354 -0
- package/src/control/intent-router.ts +326 -0
- package/src/control/types.ts +102 -0
- package/src/curator/curator.ts +295 -1
- package/src/errors/classify.ts +102 -0
- package/src/errors/index.ts +5 -0
- package/src/errors/retry.ts +132 -0
- package/src/errors/types.ts +81 -0
- package/src/governance/governance.ts +698 -0
- package/src/governance/index.ts +18 -0
- package/src/governance/types.ts +111 -0
- package/src/index.ts +213 -2
- package/src/intake/content-classifier.ts +146 -0
- package/src/intake/dedup-gate.ts +92 -0
- package/src/intake/intake-pipeline.ts +503 -0
- package/src/intake/types.ts +69 -0
- package/src/intelligence/loader.ts +1 -1
- package/src/intelligence/types.ts +3 -1
- package/src/logging/logger.ts +154 -0
- package/src/logging/types.ts +21 -0
- package/src/loop/loop-manager.ts +448 -0
- package/src/loop/types.ts +115 -0
- package/src/persistence/index.ts +7 -0
- package/src/persistence/sqlite-provider.ts +62 -0
- package/src/persistence/types.ts +44 -0
- package/src/planning/gap-analysis.ts +775 -0
- package/src/planning/gap-types.ts +61 -0
- package/src/planning/planner.ts +1273 -24
- package/src/playbooks/generic/brainstorming.ts +110 -0
- package/src/playbooks/generic/code-review.ts +181 -0
- package/src/playbooks/generic/subagent-execution.ts +74 -0
- package/src/playbooks/generic/systematic-debugging.ts +92 -0
- package/src/playbooks/generic/tdd.ts +75 -0
- package/src/playbooks/generic/verification.ts +79 -0
- package/src/playbooks/index.ts +27 -0
- package/src/playbooks/playbook-registry.ts +284 -0
- package/src/playbooks/playbook-seeder.ts +119 -0
- package/src/playbooks/playbook-types.ts +162 -0
- package/src/project/project-registry.ts +370 -0
- package/src/project/types.ts +31 -0
- package/src/prompts/index.ts +3 -0
- package/src/prompts/parser.ts +59 -0
- package/src/prompts/template-manager.ts +77 -0
- package/src/prompts/types.ts +28 -0
- package/src/runtime/admin-extra-ops.ts +652 -0
- package/src/runtime/admin-ops.ts +340 -0
- package/src/runtime/capture-ops.ts +404 -0
- package/src/runtime/cognee-sync-ops.ts +63 -0
- package/src/runtime/core-ops.ts +787 -9
- package/src/runtime/curator-extra-ops.ts +85 -0
- package/src/runtime/domain-ops.ts +67 -15
- package/src/runtime/grading-ops.ts +130 -0
- package/src/runtime/intake-ops.ts +126 -0
- package/src/runtime/loop-ops.ts +277 -0
- package/src/runtime/memory-cross-project-ops.ts +191 -0
- package/src/runtime/memory-extra-ops.ts +186 -0
- package/src/runtime/orchestrate-ops.ts +278 -0
- package/src/runtime/planning-extra-ops.ts +718 -0
- package/src/runtime/playbook-ops.ts +169 -0
- package/src/runtime/project-ops.ts +202 -0
- package/src/runtime/runtime.ts +77 -3
- package/src/runtime/types.ts +29 -0
- package/src/runtime/vault-extra-ops.ts +606 -0
- package/src/telemetry/telemetry.ts +118 -0
- package/src/vault/playbook.ts +87 -0
- package/src/vault/vault.ts +575 -98
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
// ─── Intake Pipeline ──────────────────────────────────────────────
|
|
2
|
+
//
|
|
3
|
+
// 6-stage pipeline for ingesting PDF books into the vault:
|
|
4
|
+
// 1. Parse PDF + compute hash + create chunks → job record
|
|
5
|
+
// 2. Extract page text for each chunk
|
|
6
|
+
// 3. Classify chunk text via LLM
|
|
7
|
+
// 4. Dedup classified items against vault
|
|
8
|
+
// 5. Store unique items in vault
|
|
9
|
+
// 6. Finalize job with aggregate stats
|
|
10
|
+
//
|
|
11
|
+
// SQLite-backed job tracking for resumable processing.
|
|
12
|
+
|
|
13
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
14
|
+
import { readFileSync, statSync } from 'node:fs';
|
|
15
|
+
import type { PersistenceProvider } from '../persistence/types.js';
|
|
16
|
+
import type { Vault } from '../vault/vault.js';
|
|
17
|
+
import type { LLMClient } from '../llm/llm-client.js';
|
|
18
|
+
import type { IntelligenceEntry } from '../intelligence/types.js';
|
|
19
|
+
import type {
|
|
20
|
+
IntakeConfig,
|
|
21
|
+
IntakeChunk,
|
|
22
|
+
IntakeJobRecord,
|
|
23
|
+
IntakePreviewResult,
|
|
24
|
+
ClassifiedItem,
|
|
25
|
+
KnowledgeType,
|
|
26
|
+
} from './types.js';
|
|
27
|
+
import { classifyChunk } from './content-classifier.js';
|
|
28
|
+
import { dedupItems } from './dedup-gate.js';
|
|
29
|
+
|
|
30
|
+
// =============================================================================
|
|
31
|
+
// CONSTANTS
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
const DEFAULT_CHUNK_SIZE = 10;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Map KnowledgeType → IntelligenceEntry.type.
|
|
38
|
+
* Only 'pattern' and 'anti-pattern' map directly; everything else becomes 'rule'.
|
|
39
|
+
*/
|
|
40
|
+
function mapKnowledgeType(kt: KnowledgeType): IntelligenceEntry['type'] {
|
|
41
|
+
if (kt === 'pattern') return 'pattern';
|
|
42
|
+
if (kt === 'anti-pattern') return 'anti-pattern';
|
|
43
|
+
return 'rule';
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// =============================================================================
|
|
47
|
+
// HELPERS
|
|
48
|
+
// =============================================================================
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Split concatenated PDF text into per-page segments.
|
|
52
|
+
*
|
|
53
|
+
* Strategy: split on form-feed characters first (common in pdf-parse output).
|
|
54
|
+
* If that yields fewer segments than expected, fall back to equal-length splits.
|
|
55
|
+
*/
|
|
56
|
+
export function splitIntoPages(text: string, numPages: number): string[] {
|
|
57
|
+
if (numPages <= 0) return [text];
|
|
58
|
+
|
|
59
|
+
// Try form-feed split first
|
|
60
|
+
const ffPages = text.split('\f');
|
|
61
|
+
if (ffPages.length >= numPages) {
|
|
62
|
+
return ffPages.slice(0, numPages);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Fallback: equal-length chunks
|
|
66
|
+
const chunkSize = Math.ceil(text.length / numPages);
|
|
67
|
+
const pages: string[] = [];
|
|
68
|
+
for (let i = 0; i < text.length; i += chunkSize) {
|
|
69
|
+
pages.push(text.slice(i, i + chunkSize));
|
|
70
|
+
}
|
|
71
|
+
// Pad with empty strings if we somehow got fewer
|
|
72
|
+
while (pages.length < numPages) {
|
|
73
|
+
pages.push('');
|
|
74
|
+
}
|
|
75
|
+
return pages;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// =============================================================================
|
|
79
|
+
// PIPELINE
|
|
80
|
+
// =============================================================================
|
|
81
|
+
|
|
82
|
+
export class IntakePipeline {
|
|
83
|
+
private provider: PersistenceProvider;
|
|
84
|
+
private vault: Vault;
|
|
85
|
+
private llm: LLMClient;
|
|
86
|
+
|
|
87
|
+
constructor(provider: PersistenceProvider, vault: Vault, llm: LLMClient) {
|
|
88
|
+
this.provider = provider;
|
|
89
|
+
this.vault = vault;
|
|
90
|
+
this.llm = llm;
|
|
91
|
+
this.initSchema();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ─── Schema ──────────────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
private initSchema(): void {
|
|
97
|
+
this.provider.execSql(`
|
|
98
|
+
CREATE TABLE IF NOT EXISTS intake_jobs (
|
|
99
|
+
id TEXT PRIMARY KEY,
|
|
100
|
+
status TEXT NOT NULL,
|
|
101
|
+
config TEXT NOT NULL,
|
|
102
|
+
pdf_meta TEXT,
|
|
103
|
+
toc TEXT,
|
|
104
|
+
stats TEXT,
|
|
105
|
+
created_at INTEGER,
|
|
106
|
+
updated_at INTEGER,
|
|
107
|
+
completed_at INTEGER
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
CREATE TABLE IF NOT EXISTS intake_chunks (
|
|
111
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
112
|
+
job_id TEXT NOT NULL REFERENCES intake_jobs(id),
|
|
113
|
+
chunk_index INTEGER,
|
|
114
|
+
title TEXT,
|
|
115
|
+
page_start INTEGER,
|
|
116
|
+
page_end INTEGER,
|
|
117
|
+
status TEXT DEFAULT 'pending',
|
|
118
|
+
items_extracted INTEGER DEFAULT 0,
|
|
119
|
+
items_stored INTEGER DEFAULT 0,
|
|
120
|
+
items_deduped INTEGER DEFAULT 0,
|
|
121
|
+
error TEXT,
|
|
122
|
+
processed_at INTEGER
|
|
123
|
+
);
|
|
124
|
+
`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ─── Stage 1: Ingest Book ────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Parse a PDF, compute its file hash, create fixed-size page chunks,
|
|
131
|
+
* and persist the job + chunk records to the database.
|
|
132
|
+
*/
|
|
133
|
+
async ingestBook(config: IntakeConfig): Promise<IntakeJobRecord> {
|
|
134
|
+
const jobId = randomUUID();
|
|
135
|
+
const now = Math.floor(Date.now() / 1000);
|
|
136
|
+
const chunkPageSize = config.chunkPageSize ?? DEFAULT_CHUNK_SIZE;
|
|
137
|
+
|
|
138
|
+
// Read file
|
|
139
|
+
const fileBuffer = readFileSync(config.pdfPath);
|
|
140
|
+
const fileSize = statSync(config.pdfPath).size;
|
|
141
|
+
const fileHash = createHash('sha256').update(fileBuffer).digest('hex');
|
|
142
|
+
|
|
143
|
+
// Dynamic import of pdf-parse
|
|
144
|
+
const pdfParse = (await import('pdf-parse')).default;
|
|
145
|
+
const pdfData = await pdfParse(fileBuffer);
|
|
146
|
+
const totalPages = pdfData.numpages;
|
|
147
|
+
|
|
148
|
+
const pdfMeta = { totalPages, fileHash, fileSize };
|
|
149
|
+
|
|
150
|
+
// Create chunk definitions (fixed N-page windows)
|
|
151
|
+
const numChunks = Math.ceil(totalPages / chunkPageSize);
|
|
152
|
+
|
|
153
|
+
this.provider.transaction(() => {
|
|
154
|
+
// Insert job
|
|
155
|
+
this.provider.run(
|
|
156
|
+
`INSERT INTO intake_jobs (id, status, config, pdf_meta, toc, stats, created_at, updated_at, completed_at)
|
|
157
|
+
VALUES (@id, @status, @config, @pdfMeta, @toc, @stats, @createdAt, @updatedAt, @completedAt)`,
|
|
158
|
+
{
|
|
159
|
+
id: jobId,
|
|
160
|
+
status: 'initialized',
|
|
161
|
+
config: JSON.stringify(config),
|
|
162
|
+
pdfMeta: JSON.stringify(pdfMeta),
|
|
163
|
+
toc: null,
|
|
164
|
+
stats: null,
|
|
165
|
+
createdAt: now,
|
|
166
|
+
updatedAt: now,
|
|
167
|
+
completedAt: null,
|
|
168
|
+
},
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
// Insert chunk records
|
|
172
|
+
for (let i = 0; i < numChunks; i++) {
|
|
173
|
+
const pageStart = i * chunkPageSize + 1;
|
|
174
|
+
const pageEnd = Math.min((i + 1) * chunkPageSize, totalPages);
|
|
175
|
+
const chunkTitle = `${config.title} — pages ${pageStart}-${pageEnd}`;
|
|
176
|
+
|
|
177
|
+
this.provider.run(
|
|
178
|
+
`INSERT INTO intake_chunks (job_id, chunk_index, title, page_start, page_end, status)
|
|
179
|
+
VALUES (@jobId, @chunkIndex, @title, @pageStart, @pageEnd, @status)`,
|
|
180
|
+
{
|
|
181
|
+
jobId,
|
|
182
|
+
chunkIndex: i,
|
|
183
|
+
title: chunkTitle,
|
|
184
|
+
pageStart,
|
|
185
|
+
pageEnd,
|
|
186
|
+
status: 'pending',
|
|
187
|
+
},
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
return this.getJob(jobId)!;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ─── Stages 2-5: Process Chunks ──────────────────────────────────
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Process up to `count` pending chunks for a job.
|
|
199
|
+
*
|
|
200
|
+
* For each chunk:
|
|
201
|
+
* 2. Extract page text from PDF
|
|
202
|
+
* 3. Classify via LLM
|
|
203
|
+
* 4. Dedup against vault
|
|
204
|
+
* 5. Store unique items
|
|
205
|
+
*
|
|
206
|
+
* When all chunks are done, finalizes the job (stage 6).
|
|
207
|
+
*/
|
|
208
|
+
async processChunks(
|
|
209
|
+
jobId: string,
|
|
210
|
+
count: number = 5,
|
|
211
|
+
): Promise<{
|
|
212
|
+
processed: number;
|
|
213
|
+
itemsStored: number;
|
|
214
|
+
itemsDeduped: number;
|
|
215
|
+
remaining: number;
|
|
216
|
+
}> {
|
|
217
|
+
// Get pending chunks
|
|
218
|
+
const pendingChunks = this.provider.all<Record<string, unknown>>(
|
|
219
|
+
`SELECT * FROM intake_chunks WHERE job_id = @jobId AND status = 'pending' ORDER BY chunk_index ASC LIMIT @limit`,
|
|
220
|
+
{ jobId, limit: count },
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
if (pendingChunks.length === 0) {
|
|
224
|
+
const remaining = this.countPendingChunks(jobId);
|
|
225
|
+
return { processed: 0, itemsStored: 0, itemsDeduped: 0, remaining };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Mark job as processing
|
|
229
|
+
this.provider.run(
|
|
230
|
+
`UPDATE intake_jobs SET status = 'processing', updated_at = @now WHERE id = @id`,
|
|
231
|
+
{ id: jobId, now: Math.floor(Date.now() / 1000) },
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
// Re-read config and parse PDF
|
|
235
|
+
const job = this.getJob(jobId);
|
|
236
|
+
if (!job) {
|
|
237
|
+
return { processed: 0, itemsStored: 0, itemsDeduped: 0, remaining: 0 };
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const fileBuffer = readFileSync(job.config.pdfPath);
|
|
241
|
+
const pdfParse = (await import('pdf-parse')).default;
|
|
242
|
+
const pdfData = await pdfParse(fileBuffer);
|
|
243
|
+
const totalPages = job.pdfMeta?.totalPages ?? pdfData.numpages;
|
|
244
|
+
const pages = splitIntoPages(pdfData.text, totalPages);
|
|
245
|
+
|
|
246
|
+
let totalStored = 0;
|
|
247
|
+
let totalDeduped = 0;
|
|
248
|
+
let processed = 0;
|
|
249
|
+
|
|
250
|
+
for (const chunkRow of pendingChunks) {
|
|
251
|
+
const chunkId = chunkRow.id as number;
|
|
252
|
+
const chunkIndex = chunkRow.chunk_index as number;
|
|
253
|
+
const pageStart = chunkRow.page_start as number;
|
|
254
|
+
const pageEnd = chunkRow.page_end as number;
|
|
255
|
+
|
|
256
|
+
try {
|
|
257
|
+
// Mark chunk processing
|
|
258
|
+
this.provider.run(`UPDATE intake_chunks SET status = 'processing' WHERE id = @id`, {
|
|
259
|
+
id: chunkId,
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
// Stage 2: Extract page text (1-indexed → 0-indexed)
|
|
263
|
+
const chunkText = pages.slice(pageStart - 1, pageEnd).join('\n\n');
|
|
264
|
+
const citation = `${job.config.title}, pages ${pageStart}-${pageEnd}`;
|
|
265
|
+
|
|
266
|
+
// Stage 3: Classify
|
|
267
|
+
const classifiedItems = await classifyChunk(this.llm, chunkText, citation);
|
|
268
|
+
|
|
269
|
+
// Stage 4: Dedup
|
|
270
|
+
const dedupResults = dedupItems(classifiedItems, this.vault);
|
|
271
|
+
const uniqueItems = dedupResults.filter((r) => !r.isDuplicate);
|
|
272
|
+
const dupCount = dedupResults.filter((r) => r.isDuplicate).length;
|
|
273
|
+
|
|
274
|
+
// Stage 5: Store unique items in vault
|
|
275
|
+
let storedCount = 0;
|
|
276
|
+
for (let itemIdx = 0; itemIdx < uniqueItems.length; itemIdx++) {
|
|
277
|
+
const result = uniqueItems[itemIdx];
|
|
278
|
+
const entry = classifiedItemToEntry(
|
|
279
|
+
result.item,
|
|
280
|
+
job.config.domain,
|
|
281
|
+
jobId,
|
|
282
|
+
chunkIndex,
|
|
283
|
+
itemIdx,
|
|
284
|
+
job.config.tags,
|
|
285
|
+
);
|
|
286
|
+
this.vault.add(entry);
|
|
287
|
+
storedCount++;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Update chunk record
|
|
291
|
+
const now = Math.floor(Date.now() / 1000);
|
|
292
|
+
this.provider.run(
|
|
293
|
+
`UPDATE intake_chunks
|
|
294
|
+
SET status = 'completed', items_extracted = @extracted, items_stored = @stored, items_deduped = @deduped, processed_at = @now
|
|
295
|
+
WHERE id = @id`,
|
|
296
|
+
{
|
|
297
|
+
id: chunkId,
|
|
298
|
+
extracted: classifiedItems.length,
|
|
299
|
+
stored: storedCount,
|
|
300
|
+
deduped: dupCount,
|
|
301
|
+
now,
|
|
302
|
+
},
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
totalStored += storedCount;
|
|
306
|
+
totalDeduped += dupCount;
|
|
307
|
+
processed++;
|
|
308
|
+
} catch (err) {
|
|
309
|
+
// Graceful degradation: mark chunk as failed, continue with others
|
|
310
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
311
|
+
this.provider.run(
|
|
312
|
+
`UPDATE intake_chunks SET status = 'failed', error = @error, processed_at = @now WHERE id = @id`,
|
|
313
|
+
{ id: chunkId, error: errorMsg, now: Math.floor(Date.now() / 1000) },
|
|
314
|
+
);
|
|
315
|
+
processed++;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Update job timestamp
|
|
320
|
+
this.provider.run(`UPDATE intake_jobs SET updated_at = @now WHERE id = @id`, {
|
|
321
|
+
id: jobId,
|
|
322
|
+
now: Math.floor(Date.now() / 1000),
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
// Check remaining
|
|
326
|
+
const remaining = this.countPendingChunks(jobId);
|
|
327
|
+
if (remaining === 0) {
|
|
328
|
+
this.finalizeJob(jobId);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return { processed, itemsStored: totalStored, itemsDeduped: totalDeduped, remaining };
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// ─── Preview ─────────────────────────────────────────────────────
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Parse a page range from a PDF and classify it without storing.
|
|
338
|
+
* Useful for previewing what the pipeline would extract.
|
|
339
|
+
*/
|
|
340
|
+
async preview(
|
|
341
|
+
config: IntakeConfig,
|
|
342
|
+
pageStart: number,
|
|
343
|
+
pageEnd: number,
|
|
344
|
+
): Promise<IntakePreviewResult> {
|
|
345
|
+
const fileBuffer = readFileSync(config.pdfPath);
|
|
346
|
+
const pdfParse = (await import('pdf-parse')).default;
|
|
347
|
+
const pdfData = await pdfParse(fileBuffer);
|
|
348
|
+
const totalPages = pdfData.numpages;
|
|
349
|
+
|
|
350
|
+
const pages = splitIntoPages(pdfData.text, totalPages);
|
|
351
|
+
const chunkText = pages.slice(pageStart - 1, pageEnd).join('\n\n');
|
|
352
|
+
const citation = `${config.title}, pages ${pageStart}-${pageEnd}`;
|
|
353
|
+
|
|
354
|
+
const items = await classifyChunk(this.llm, chunkText, citation);
|
|
355
|
+
|
|
356
|
+
return {
|
|
357
|
+
items,
|
|
358
|
+
chunkText,
|
|
359
|
+
pageRange: { start: pageStart, end: pageEnd },
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ─── Queries ─────────────────────────────────────────────────────
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Get a job record by ID.
|
|
367
|
+
*/
|
|
368
|
+
getJob(jobId: string): IntakeJobRecord | null {
|
|
369
|
+
const row = this.provider.get<Record<string, unknown>>(
|
|
370
|
+
'SELECT * FROM intake_jobs WHERE id = @id',
|
|
371
|
+
{ id: jobId },
|
|
372
|
+
);
|
|
373
|
+
return row ? rowToJobRecord(row) : null;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* List all intake jobs.
|
|
378
|
+
*/
|
|
379
|
+
listJobs(): IntakeJobRecord[] {
|
|
380
|
+
const rows = this.provider.all<Record<string, unknown>>(
|
|
381
|
+
'SELECT * FROM intake_jobs ORDER BY created_at DESC',
|
|
382
|
+
);
|
|
383
|
+
return rows.map(rowToJobRecord);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
/**
|
|
387
|
+
* Get all chunks for a job.
|
|
388
|
+
*/
|
|
389
|
+
getChunks(jobId: string): IntakeChunk[] {
|
|
390
|
+
const rows = this.provider.all<Record<string, unknown>>(
|
|
391
|
+
'SELECT * FROM intake_chunks WHERE job_id = @jobId ORDER BY chunk_index ASC',
|
|
392
|
+
{ jobId },
|
|
393
|
+
);
|
|
394
|
+
return rows.map(rowToChunk);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// ─── Stage 6: Finalize ──────────────────────────────────────────
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Sum stats from all chunks and mark the job as completed.
|
|
401
|
+
*/
|
|
402
|
+
private finalizeJob(jobId: string): void {
|
|
403
|
+
const chunks = this.provider.all<Record<string, unknown>>(
|
|
404
|
+
'SELECT * FROM intake_chunks WHERE job_id = @jobId',
|
|
405
|
+
{ jobId },
|
|
406
|
+
);
|
|
407
|
+
|
|
408
|
+
let itemsExtracted = 0;
|
|
409
|
+
let itemsStored = 0;
|
|
410
|
+
let itemsDeduped = 0;
|
|
411
|
+
let itemsFailed = 0;
|
|
412
|
+
|
|
413
|
+
for (const chunk of chunks) {
|
|
414
|
+
const status = chunk.status as string;
|
|
415
|
+
if (status === 'completed') {
|
|
416
|
+
itemsExtracted += (chunk.items_extracted as number) ?? 0;
|
|
417
|
+
itemsStored += (chunk.items_stored as number) ?? 0;
|
|
418
|
+
itemsDeduped += (chunk.items_deduped as number) ?? 0;
|
|
419
|
+
} else if (status === 'failed') {
|
|
420
|
+
itemsFailed++;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const stats = { itemsExtracted, itemsStored, itemsDeduped, itemsFailed };
|
|
425
|
+
const now = Math.floor(Date.now() / 1000);
|
|
426
|
+
|
|
427
|
+
this.provider.run(
|
|
428
|
+
`UPDATE intake_jobs SET status = 'completed', stats = @stats, updated_at = @now, completed_at = @now WHERE id = @id`,
|
|
429
|
+
{ id: jobId, stats: JSON.stringify(stats), now },
|
|
430
|
+
);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// ─── Private helpers ─────────────────────────────────────────────
|
|
434
|
+
|
|
435
|
+
private countPendingChunks(jobId: string): number {
|
|
436
|
+
const result = this.provider.get<{ count: number }>(
|
|
437
|
+
`SELECT COUNT(*) as count FROM intake_chunks WHERE job_id = @jobId AND status = 'pending'`,
|
|
438
|
+
{ jobId },
|
|
439
|
+
);
|
|
440
|
+
return result?.count ?? 0;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// =============================================================================
|
|
445
|
+
// ROW MAPPERS
|
|
446
|
+
// =============================================================================
|
|
447
|
+
|
|
448
|
+
function rowToJobRecord(row: Record<string, unknown>): IntakeJobRecord {
|
|
449
|
+
return {
|
|
450
|
+
id: row.id as string,
|
|
451
|
+
status: row.status as IntakeJobRecord['status'],
|
|
452
|
+
config: JSON.parse(row.config as string) as IntakeConfig,
|
|
453
|
+
pdfMeta: row.pdf_meta ? JSON.parse(row.pdf_meta as string) : null,
|
|
454
|
+
toc: row.toc ? JSON.parse(row.toc as string) : null,
|
|
455
|
+
stats: row.stats ? JSON.parse(row.stats as string) : null,
|
|
456
|
+
createdAt: row.created_at as number,
|
|
457
|
+
updatedAt: row.updated_at as number,
|
|
458
|
+
completedAt: (row.completed_at as number) ?? null,
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
function rowToChunk(row: Record<string, unknown>): IntakeChunk {
|
|
463
|
+
return {
|
|
464
|
+
id: row.id as number,
|
|
465
|
+
jobId: row.job_id as string,
|
|
466
|
+
chunkIndex: row.chunk_index as number,
|
|
467
|
+
title: (row.title as string) ?? null,
|
|
468
|
+
pageStart: row.page_start as number,
|
|
469
|
+
pageEnd: row.page_end as number,
|
|
470
|
+
status: row.status as IntakeChunk['status'],
|
|
471
|
+
itemsExtracted: (row.items_extracted as number) ?? 0,
|
|
472
|
+
itemsStored: (row.items_stored as number) ?? 0,
|
|
473
|
+
itemsDeduped: (row.items_deduped as number) ?? 0,
|
|
474
|
+
error: (row.error as string) ?? null,
|
|
475
|
+
processedAt: (row.processed_at as number) ?? null,
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Convert a ClassifiedItem to an IntelligenceEntry for vault storage.
|
|
481
|
+
*/
|
|
482
|
+
function classifiedItemToEntry(
|
|
483
|
+
item: ClassifiedItem,
|
|
484
|
+
domain: string,
|
|
485
|
+
jobId: string,
|
|
486
|
+
chunkIndex: number,
|
|
487
|
+
itemIndex: number,
|
|
488
|
+
extraTags?: string[],
|
|
489
|
+
): IntelligenceEntry {
|
|
490
|
+
const entryType = mapKnowledgeType(item.type);
|
|
491
|
+
const tags = [...item.tags, ...(extraTags ?? [])];
|
|
492
|
+
|
|
493
|
+
return {
|
|
494
|
+
id: `intake-${jobId}-${chunkIndex}-${itemIndex}`,
|
|
495
|
+
type: entryType,
|
|
496
|
+
domain,
|
|
497
|
+
title: item.title,
|
|
498
|
+
severity: item.severity,
|
|
499
|
+
description: item.description,
|
|
500
|
+
context: item.citation,
|
|
501
|
+
tags,
|
|
502
|
+
};
|
|
503
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// ─── Intake Pipeline Types ────────────────────────────────────────
|
|
2
|
+
|
|
3
|
+
export type IntakeJobStatus = 'initialized' | 'processing' | 'completed' | 'failed' | 'cancelled';
|
|
4
|
+
export type IntakeChunkStatus = 'pending' | 'processing' | 'completed' | 'failed' | 'skipped';
|
|
5
|
+
export type KnowledgeType =
|
|
6
|
+
| 'pattern'
|
|
7
|
+
| 'anti-pattern'
|
|
8
|
+
| 'principle'
|
|
9
|
+
| 'concept'
|
|
10
|
+
| 'reference'
|
|
11
|
+
| 'workflow'
|
|
12
|
+
| 'idea'
|
|
13
|
+
| 'roadmap';
|
|
14
|
+
|
|
15
|
+
export interface IntakeConfig {
|
|
16
|
+
pdfPath: string;
|
|
17
|
+
title: string;
|
|
18
|
+
author?: string;
|
|
19
|
+
domain: string;
|
|
20
|
+
chunkPageSize?: number;
|
|
21
|
+
tags?: string[];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface IntakeChunk {
|
|
25
|
+
id: number;
|
|
26
|
+
jobId: string;
|
|
27
|
+
chunkIndex: number;
|
|
28
|
+
title: string | null;
|
|
29
|
+
pageStart: number;
|
|
30
|
+
pageEnd: number;
|
|
31
|
+
status: IntakeChunkStatus;
|
|
32
|
+
itemsExtracted: number;
|
|
33
|
+
itemsStored: number;
|
|
34
|
+
itemsDeduped: number;
|
|
35
|
+
error: string | null;
|
|
36
|
+
processedAt: number | null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface ClassifiedItem {
|
|
40
|
+
type: KnowledgeType;
|
|
41
|
+
title: string;
|
|
42
|
+
description: string;
|
|
43
|
+
tags: string[];
|
|
44
|
+
severity: 'critical' | 'warning' | 'suggestion';
|
|
45
|
+
citation: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface IntakeJobRecord {
|
|
49
|
+
id: string;
|
|
50
|
+
status: IntakeJobStatus;
|
|
51
|
+
config: IntakeConfig;
|
|
52
|
+
pdfMeta: { totalPages: number; fileHash: string; fileSize: number } | null;
|
|
53
|
+
toc: Array<{ title: string; page: number }> | null;
|
|
54
|
+
stats: {
|
|
55
|
+
itemsExtracted: number;
|
|
56
|
+
itemsStored: number;
|
|
57
|
+
itemsDeduped: number;
|
|
58
|
+
itemsFailed: number;
|
|
59
|
+
} | null;
|
|
60
|
+
createdAt: number;
|
|
61
|
+
updatedAt: number;
|
|
62
|
+
completedAt: number | null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface IntakePreviewResult {
|
|
66
|
+
items: ClassifiedItem[];
|
|
67
|
+
chunkText: string;
|
|
68
|
+
pageRange: { start: number; end: number };
|
|
69
|
+
}
|
|
@@ -31,7 +31,7 @@ function validateEntry(entry: IntelligenceEntry): boolean {
|
|
|
31
31
|
return (
|
|
32
32
|
typeof entry.id === 'string' &&
|
|
33
33
|
entry.id.length > 0 &&
|
|
34
|
-
['pattern', 'anti-pattern', 'rule'].includes(entry.type) &&
|
|
34
|
+
['pattern', 'anti-pattern', 'rule', 'playbook'].includes(entry.type) &&
|
|
35
35
|
typeof entry.title === 'string' &&
|
|
36
36
|
entry.title.length > 0 &&
|
|
37
37
|
typeof entry.description === 'string' &&
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export interface IntelligenceEntry {
|
|
2
2
|
id: string;
|
|
3
|
-
type: 'pattern' | 'anti-pattern' | 'rule';
|
|
3
|
+
type: 'pattern' | 'anti-pattern' | 'rule' | 'playbook';
|
|
4
4
|
domain: string;
|
|
5
5
|
title: string;
|
|
6
6
|
severity: 'critical' | 'warning' | 'suggestion';
|
|
@@ -11,6 +11,8 @@ export interface IntelligenceEntry {
|
|
|
11
11
|
why?: string;
|
|
12
12
|
tags: string[];
|
|
13
13
|
appliesTo?: string[];
|
|
14
|
+
validFrom?: number; // unix epoch — when entry becomes active
|
|
15
|
+
validUntil?: number; // unix epoch — when entry expires (null = never)
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
export interface IntelligenceBundle {
|