@soleri/core 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/brain.d.ts +7 -0
- package/dist/brain/brain.d.ts.map +1 -1
- package/dist/brain/brain.js +56 -9
- package/dist/brain/brain.js.map +1 -1
- package/dist/brain/types.d.ts +2 -2
- package/dist/brain/types.d.ts.map +1 -1
- package/dist/cognee/client.d.ts +3 -0
- package/dist/cognee/client.d.ts.map +1 -1
- package/dist/cognee/client.js +17 -0
- package/dist/cognee/client.js.map +1 -1
- package/dist/cognee/sync-manager.d.ts +94 -0
- package/dist/cognee/sync-manager.d.ts.map +1 -0
- package/dist/cognee/sync-manager.js +293 -0
- package/dist/cognee/sync-manager.js.map +1 -0
- package/dist/curator/curator.d.ts +8 -1
- package/dist/curator/curator.d.ts.map +1 -1
- package/dist/curator/curator.js +64 -1
- package/dist/curator/curator.js.map +1 -1
- package/dist/errors/classify.d.ts +13 -0
- package/dist/errors/classify.d.ts.map +1 -0
- package/dist/errors/classify.js +97 -0
- package/dist/errors/classify.js.map +1 -0
- package/dist/errors/index.d.ts +6 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +4 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/errors/retry.d.ts +40 -0
- package/dist/errors/retry.d.ts.map +1 -0
- package/dist/errors/retry.js +97 -0
- package/dist/errors/retry.js.map +1 -0
- package/dist/errors/types.d.ts +48 -0
- package/dist/errors/types.d.ts.map +1 -0
- package/dist/errors/types.js +59 -0
- package/dist/errors/types.js.map +1 -0
- package/dist/index.d.ts +25 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +21 -3
- package/dist/index.js.map +1 -1
- package/dist/intake/content-classifier.d.ts +14 -0
- package/dist/intake/content-classifier.d.ts.map +1 -0
- package/dist/intake/content-classifier.js +125 -0
- package/dist/intake/content-classifier.js.map +1 -0
- package/dist/intake/dedup-gate.d.ts +17 -0
- package/dist/intake/dedup-gate.d.ts.map +1 -0
- package/dist/intake/dedup-gate.js +66 -0
- package/dist/intake/dedup-gate.js.map +1 -0
- package/dist/intake/intake-pipeline.d.ts +63 -0
- package/dist/intake/intake-pipeline.d.ts.map +1 -0
- package/dist/intake/intake-pipeline.js +373 -0
- package/dist/intake/intake-pipeline.js.map +1 -0
- package/dist/intake/types.d.ts +65 -0
- package/dist/intake/types.d.ts.map +1 -0
- package/dist/intake/types.js +3 -0
- package/dist/intake/types.js.map +1 -0
- package/dist/intelligence/loader.js +1 -1
- package/dist/intelligence/loader.js.map +1 -1
- package/dist/intelligence/types.d.ts +3 -1
- package/dist/intelligence/types.d.ts.map +1 -1
- package/dist/loop/loop-manager.d.ts +58 -7
- package/dist/loop/loop-manager.d.ts.map +1 -1
- package/dist/loop/loop-manager.js +280 -6
- package/dist/loop/loop-manager.js.map +1 -1
- package/dist/loop/types.d.ts +69 -1
- package/dist/loop/types.d.ts.map +1 -1
- package/dist/loop/types.js +4 -1
- package/dist/loop/types.js.map +1 -1
- package/dist/persistence/index.d.ts +3 -0
- package/dist/persistence/index.d.ts.map +1 -0
- package/dist/persistence/index.js +2 -0
- package/dist/persistence/index.js.map +1 -0
- package/dist/persistence/sqlite-provider.d.ts +25 -0
- package/dist/persistence/sqlite-provider.d.ts.map +1 -0
- package/dist/persistence/sqlite-provider.js +59 -0
- package/dist/persistence/sqlite-provider.js.map +1 -0
- package/dist/persistence/types.d.ts +36 -0
- package/dist/persistence/types.d.ts.map +1 -0
- package/dist/persistence/types.js +8 -0
- package/dist/persistence/types.js.map +1 -0
- package/dist/planning/gap-analysis.d.ts +47 -4
- package/dist/planning/gap-analysis.d.ts.map +1 -1
- package/dist/planning/gap-analysis.js +190 -13
- package/dist/planning/gap-analysis.js.map +1 -1
- package/dist/planning/gap-types.d.ts +1 -1
- package/dist/planning/gap-types.d.ts.map +1 -1
- package/dist/planning/gap-types.js.map +1 -1
- package/dist/planning/planner.d.ts +277 -9
- package/dist/planning/planner.d.ts.map +1 -1
- package/dist/planning/planner.js +611 -46
- package/dist/planning/planner.js.map +1 -1
- package/dist/playbooks/generic/brainstorming.d.ts +9 -0
- package/dist/playbooks/generic/brainstorming.d.ts.map +1 -0
- package/dist/playbooks/generic/brainstorming.js +105 -0
- package/dist/playbooks/generic/brainstorming.js.map +1 -0
- package/dist/playbooks/generic/code-review.d.ts +11 -0
- package/dist/playbooks/generic/code-review.d.ts.map +1 -0
- package/dist/playbooks/generic/code-review.js +176 -0
- package/dist/playbooks/generic/code-review.js.map +1 -0
- package/dist/playbooks/generic/subagent-execution.d.ts +9 -0
- package/dist/playbooks/generic/subagent-execution.d.ts.map +1 -0
- package/dist/playbooks/generic/subagent-execution.js +68 -0
- package/dist/playbooks/generic/subagent-execution.js.map +1 -0
- package/dist/playbooks/generic/systematic-debugging.d.ts +9 -0
- package/dist/playbooks/generic/systematic-debugging.d.ts.map +1 -0
- package/dist/playbooks/generic/systematic-debugging.js +87 -0
- package/dist/playbooks/generic/systematic-debugging.js.map +1 -0
- package/dist/playbooks/generic/tdd.d.ts +9 -0
- package/dist/playbooks/generic/tdd.d.ts.map +1 -0
- package/dist/playbooks/generic/tdd.js +70 -0
- package/dist/playbooks/generic/tdd.js.map +1 -0
- package/dist/playbooks/generic/verification.d.ts +9 -0
- package/dist/playbooks/generic/verification.d.ts.map +1 -0
- package/dist/playbooks/generic/verification.js +74 -0
- package/dist/playbooks/generic/verification.js.map +1 -0
- package/dist/playbooks/index.d.ts +4 -0
- package/dist/playbooks/index.d.ts.map +1 -0
- package/dist/playbooks/index.js +5 -0
- package/dist/playbooks/index.js.map +1 -0
- package/dist/playbooks/playbook-registry.d.ts +42 -0
- package/dist/playbooks/playbook-registry.d.ts.map +1 -0
- package/dist/playbooks/playbook-registry.js +227 -0
- package/dist/playbooks/playbook-registry.js.map +1 -0
- package/dist/playbooks/playbook-seeder.d.ts +47 -0
- package/dist/playbooks/playbook-seeder.d.ts.map +1 -0
- package/dist/playbooks/playbook-seeder.js +104 -0
- package/dist/playbooks/playbook-seeder.js.map +1 -0
- package/dist/playbooks/playbook-types.d.ts +132 -0
- package/dist/playbooks/playbook-types.d.ts.map +1 -0
- package/dist/playbooks/playbook-types.js +12 -0
- package/dist/playbooks/playbook-types.js.map +1 -0
- package/dist/project/project-registry.d.ts.map +1 -1
- package/dist/project/project-registry.js +9 -11
- package/dist/project/project-registry.js.map +1 -1
- package/dist/prompts/index.d.ts +4 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +3 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/parser.d.ts +17 -0
- package/dist/prompts/parser.d.ts.map +1 -0
- package/dist/prompts/parser.js +47 -0
- package/dist/prompts/parser.js.map +1 -0
- package/dist/prompts/template-manager.d.ts +25 -0
- package/dist/prompts/template-manager.d.ts.map +1 -0
- package/dist/prompts/template-manager.js +71 -0
- package/dist/prompts/template-manager.js.map +1 -0
- package/dist/prompts/types.d.ts +26 -0
- package/dist/prompts/types.d.ts.map +1 -0
- package/dist/prompts/types.js +5 -0
- package/dist/prompts/types.js.map +1 -0
- package/dist/runtime/admin-extra-ops.d.ts +5 -3
- package/dist/runtime/admin-extra-ops.d.ts.map +1 -1
- package/dist/runtime/admin-extra-ops.js +322 -11
- package/dist/runtime/admin-extra-ops.js.map +1 -1
- package/dist/runtime/admin-ops.d.ts.map +1 -1
- package/dist/runtime/admin-ops.js +10 -3
- package/dist/runtime/admin-ops.js.map +1 -1
- package/dist/runtime/capture-ops.d.ts.map +1 -1
- package/dist/runtime/capture-ops.js +20 -2
- package/dist/runtime/capture-ops.js.map +1 -1
- package/dist/runtime/cognee-sync-ops.d.ts +12 -0
- package/dist/runtime/cognee-sync-ops.d.ts.map +1 -0
- package/dist/runtime/cognee-sync-ops.js +55 -0
- package/dist/runtime/cognee-sync-ops.js.map +1 -0
- package/dist/runtime/core-ops.d.ts +8 -6
- package/dist/runtime/core-ops.d.ts.map +1 -1
- package/dist/runtime/core-ops.js +226 -9
- package/dist/runtime/core-ops.js.map +1 -1
- package/dist/runtime/curator-extra-ops.d.ts +2 -2
- package/dist/runtime/curator-extra-ops.d.ts.map +1 -1
- package/dist/runtime/curator-extra-ops.js +15 -3
- package/dist/runtime/curator-extra-ops.js.map +1 -1
- package/dist/runtime/domain-ops.js +2 -2
- package/dist/runtime/domain-ops.js.map +1 -1
- package/dist/runtime/grading-ops.d.ts.map +1 -1
- package/dist/runtime/grading-ops.js.map +1 -1
- package/dist/runtime/intake-ops.d.ts +14 -0
- package/dist/runtime/intake-ops.d.ts.map +1 -0
- package/dist/runtime/intake-ops.js +110 -0
- package/dist/runtime/intake-ops.js.map +1 -0
- package/dist/runtime/loop-ops.d.ts +5 -4
- package/dist/runtime/loop-ops.d.ts.map +1 -1
- package/dist/runtime/loop-ops.js +84 -12
- package/dist/runtime/loop-ops.js.map +1 -1
- package/dist/runtime/memory-cross-project-ops.d.ts.map +1 -1
- package/dist/runtime/memory-cross-project-ops.js.map +1 -1
- package/dist/runtime/memory-extra-ops.js +5 -5
- package/dist/runtime/memory-extra-ops.js.map +1 -1
- package/dist/runtime/orchestrate-ops.d.ts.map +1 -1
- package/dist/runtime/orchestrate-ops.js +8 -2
- package/dist/runtime/orchestrate-ops.js.map +1 -1
- package/dist/runtime/planning-extra-ops.d.ts +13 -5
- package/dist/runtime/planning-extra-ops.d.ts.map +1 -1
- package/dist/runtime/planning-extra-ops.js +381 -18
- package/dist/runtime/planning-extra-ops.js.map +1 -1
- package/dist/runtime/playbook-ops.d.ts +14 -0
- package/dist/runtime/playbook-ops.d.ts.map +1 -0
- package/dist/runtime/playbook-ops.js +141 -0
- package/dist/runtime/playbook-ops.js.map +1 -0
- package/dist/runtime/project-ops.d.ts.map +1 -1
- package/dist/runtime/project-ops.js +7 -2
- package/dist/runtime/project-ops.js.map +1 -1
- package/dist/runtime/runtime.d.ts.map +1 -1
- package/dist/runtime/runtime.js +27 -8
- package/dist/runtime/runtime.js.map +1 -1
- package/dist/runtime/types.d.ts +8 -0
- package/dist/runtime/types.d.ts.map +1 -1
- package/dist/runtime/vault-extra-ops.d.ts +3 -2
- package/dist/runtime/vault-extra-ops.d.ts.map +1 -1
- package/dist/runtime/vault-extra-ops.js +345 -4
- package/dist/runtime/vault-extra-ops.js.map +1 -1
- package/dist/vault/playbook.d.ts +34 -0
- package/dist/vault/playbook.d.ts.map +1 -0
- package/dist/vault/playbook.js +60 -0
- package/dist/vault/playbook.js.map +1 -0
- package/dist/vault/vault.d.ts +31 -32
- package/dist/vault/vault.d.ts.map +1 -1
- package/dist/vault/vault.js +201 -181
- package/dist/vault/vault.js.map +1 -1
- package/package.json +7 -3
- package/src/__tests__/admin-extra-ops.test.ts +62 -15
- package/src/__tests__/admin-ops.test.ts +2 -2
- package/src/__tests__/brain.test.ts +3 -3
- package/src/__tests__/cognee-integration.test.ts +80 -0
- package/src/__tests__/cognee-sync-manager.test.ts +103 -0
- package/src/__tests__/core-ops.test.ts +30 -4
- package/src/__tests__/curator-extra-ops.test.ts +24 -2
- package/src/__tests__/errors.test.ts +388 -0
- package/src/__tests__/grading-ops.test.ts +28 -7
- package/src/__tests__/intake-pipeline.test.ts +162 -0
- package/src/__tests__/loop-ops.test.ts +74 -3
- package/src/__tests__/memory-cross-project-ops.test.ts +3 -1
- package/src/__tests__/orchestrate-ops.test.ts +8 -3
- package/src/__tests__/persistence.test.ts +225 -0
- package/src/__tests__/planner.test.ts +99 -21
- package/src/__tests__/planning-extra-ops.test.ts +168 -10
- package/src/__tests__/playbook-registry.test.ts +326 -0
- package/src/__tests__/playbook-seeder.test.ts +163 -0
- package/src/__tests__/playbook.test.ts +389 -0
- package/src/__tests__/project-ops.test.ts +18 -4
- package/src/__tests__/template-manager.test.ts +222 -0
- package/src/__tests__/vault-extra-ops.test.ts +82 -7
- package/src/brain/brain.ts +71 -9
- package/src/brain/types.ts +2 -2
- package/src/cognee/client.ts +18 -0
- package/src/cognee/sync-manager.ts +389 -0
- package/src/curator/curator.ts +88 -7
- package/src/errors/classify.ts +102 -0
- package/src/errors/index.ts +5 -0
- package/src/errors/retry.ts +132 -0
- package/src/errors/types.ts +81 -0
- package/src/index.ts +114 -3
- package/src/intake/content-classifier.ts +146 -0
- package/src/intake/dedup-gate.ts +92 -0
- package/src/intake/intake-pipeline.ts +503 -0
- package/src/intake/types.ts +69 -0
- package/src/intelligence/loader.ts +1 -1
- package/src/intelligence/types.ts +3 -1
- package/src/loop/loop-manager.ts +325 -7
- package/src/loop/types.ts +72 -1
- package/src/persistence/index.ts +7 -0
- package/src/persistence/sqlite-provider.ts +62 -0
- package/src/persistence/types.ts +44 -0
- package/src/planning/gap-analysis.ts +286 -17
- package/src/planning/gap-types.ts +4 -1
- package/src/planning/planner.ts +828 -55
- package/src/playbooks/generic/brainstorming.ts +110 -0
- package/src/playbooks/generic/code-review.ts +181 -0
- package/src/playbooks/generic/subagent-execution.ts +74 -0
- package/src/playbooks/generic/systematic-debugging.ts +92 -0
- package/src/playbooks/generic/tdd.ts +75 -0
- package/src/playbooks/generic/verification.ts +79 -0
- package/src/playbooks/index.ts +27 -0
- package/src/playbooks/playbook-registry.ts +284 -0
- package/src/playbooks/playbook-seeder.ts +119 -0
- package/src/playbooks/playbook-types.ts +162 -0
- package/src/project/project-registry.ts +29 -17
- package/src/prompts/index.ts +3 -0
- package/src/prompts/parser.ts +59 -0
- package/src/prompts/template-manager.ts +77 -0
- package/src/prompts/types.ts +28 -0
- package/src/runtime/admin-extra-ops.ts +358 -13
- package/src/runtime/admin-ops.ts +17 -6
- package/src/runtime/capture-ops.ts +25 -6
- package/src/runtime/cognee-sync-ops.ts +63 -0
- package/src/runtime/core-ops.ts +258 -8
- package/src/runtime/curator-extra-ops.ts +17 -3
- package/src/runtime/domain-ops.ts +2 -2
- package/src/runtime/grading-ops.ts +11 -2
- package/src/runtime/intake-ops.ts +126 -0
- package/src/runtime/loop-ops.ts +96 -13
- package/src/runtime/memory-cross-project-ops.ts +1 -2
- package/src/runtime/memory-extra-ops.ts +5 -5
- package/src/runtime/orchestrate-ops.ts +8 -2
- package/src/runtime/planning-extra-ops.ts +414 -23
- package/src/runtime/playbook-ops.ts +169 -0
- package/src/runtime/project-ops.ts +9 -3
- package/src/runtime/runtime.ts +35 -9
- package/src/runtime/types.ts +8 -0
- package/src/runtime/vault-extra-ops.ts +385 -4
- package/src/vault/playbook.ts +87 -0
- package/src/vault/vault.ts +301 -235
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retry presets with exponential backoff + jitter.
|
|
3
|
+
*
|
|
4
|
+
* Three presets map to different execution contexts:
|
|
5
|
+
* - fast: quick API calls, 3 attempts, short waits
|
|
6
|
+
* - normal: standard operations, 10 attempts, moderate waits
|
|
7
|
+
* - patient: batch/pipeline work, 25 attempts, long waits
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { classifyError } from './classify.js';
|
|
11
|
+
import { SoleriError, type Result, ok, err } from './types.js';
|
|
12
|
+
|
|
13
|
+
// ─── Types ─────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
export type RetryPreset = 'fast' | 'normal' | 'patient';
|
|
16
|
+
|
|
17
|
+
export interface RetryConfig {
|
|
18
|
+
initialIntervalMs: number;
|
|
19
|
+
maxIntervalMs: number;
|
|
20
|
+
maxAttempts: number;
|
|
21
|
+
backoffMultiplier: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface RetryOptions {
|
|
25
|
+
onRetry?: (error: SoleriError, attempt: number, delayMs: number) => void;
|
|
26
|
+
signal?: AbortSignal;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ─── Presets ───────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
export const RETRY_PRESETS: Record<RetryPreset, RetryConfig> = {
|
|
32
|
+
fast: { initialIntervalMs: 1_000, maxIntervalMs: 10_000, maxAttempts: 3, backoffMultiplier: 2 },
|
|
33
|
+
normal: {
|
|
34
|
+
initialIntervalMs: 10_000,
|
|
35
|
+
maxIntervalMs: 120_000,
|
|
36
|
+
maxAttempts: 10,
|
|
37
|
+
backoffMultiplier: 2,
|
|
38
|
+
},
|
|
39
|
+
patient: {
|
|
40
|
+
initialIntervalMs: 60_000,
|
|
41
|
+
maxIntervalMs: 900_000,
|
|
42
|
+
maxAttempts: 25,
|
|
43
|
+
backoffMultiplier: 1.5,
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// ─── Helpers ───────────────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Check if a classified error should be retried at the given attempt.
|
|
51
|
+
*/
|
|
52
|
+
export function shouldRetry(error: SoleriError, attempt: number, preset: RetryPreset): boolean {
|
|
53
|
+
if (!error.retryable) return false;
|
|
54
|
+
return attempt < RETRY_PRESETS[preset].maxAttempts;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Calculate retry delay with exponential backoff + jitter.
|
|
59
|
+
* Jitter adds ±25% to prevent thundering herd.
|
|
60
|
+
*/
|
|
61
|
+
export function getRetryDelay(attempt: number, preset: RetryPreset): number {
|
|
62
|
+
const config = RETRY_PRESETS[preset];
|
|
63
|
+
const base = config.initialIntervalMs * Math.pow(config.backoffMultiplier, attempt);
|
|
64
|
+
const capped = Math.min(base, config.maxIntervalMs);
|
|
65
|
+
// ±25% jitter
|
|
66
|
+
const jitter = capped * 0.25 * (Math.random() * 2 - 1);
|
|
67
|
+
return Math.max(0, Math.round(capped + jitter));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ─── Retry Loop ────────────────────────────────────────────────────────
|
|
71
|
+
|
|
72
|
+
function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
73
|
+
return new Promise((resolve, reject) => {
|
|
74
|
+
if (signal?.aborted) {
|
|
75
|
+
reject(signal.reason ?? new Error('Aborted'));
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const timer = setTimeout(resolve, ms);
|
|
79
|
+
signal?.addEventListener(
|
|
80
|
+
'abort',
|
|
81
|
+
() => {
|
|
82
|
+
clearTimeout(timer);
|
|
83
|
+
reject(signal.reason ?? new Error('Aborted'));
|
|
84
|
+
},
|
|
85
|
+
{ once: true },
|
|
86
|
+
);
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Retry an async operation with a named preset.
|
|
92
|
+
*
|
|
93
|
+
* - On success: returns ok(result)
|
|
94
|
+
* - On permanent/fixable error: returns err() immediately
|
|
95
|
+
* - On retryable error: retries up to maxAttempts with backoff
|
|
96
|
+
* - On exhaustion: returns err() with last error
|
|
97
|
+
*/
|
|
98
|
+
export async function retryWithPreset<T>(
|
|
99
|
+
fn: () => Promise<T>,
|
|
100
|
+
preset: RetryPreset,
|
|
101
|
+
options?: RetryOptions,
|
|
102
|
+
): Promise<Result<T>> {
|
|
103
|
+
let lastError: SoleriError | undefined;
|
|
104
|
+
|
|
105
|
+
for (let attempt = 0; attempt < RETRY_PRESETS[preset].maxAttempts; attempt++) {
|
|
106
|
+
try {
|
|
107
|
+
const value = await fn();
|
|
108
|
+
return ok(value);
|
|
109
|
+
} catch (thrown: unknown) {
|
|
110
|
+
lastError = classifyError(thrown);
|
|
111
|
+
|
|
112
|
+
if (!shouldRetry(lastError, attempt + 1, preset)) {
|
|
113
|
+
return err(lastError);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const delay = getRetryDelay(attempt, preset);
|
|
117
|
+
options?.onRetry?.(lastError, attempt + 1, delay);
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
await sleep(delay, options?.signal);
|
|
121
|
+
} catch {
|
|
122
|
+
// Aborted during sleep
|
|
123
|
+
return err(lastError);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return err(lastError ?? new SoleriError('Max retries exceeded', SoleriErrorCode.INTERNAL));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Re-import for the err fallback
|
|
132
|
+
import { SoleriErrorCode } from './types.js';
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Classified error types with typed retry presets.
|
|
3
|
+
*
|
|
4
|
+
* Every error is classified as retryable, fixable, or permanent.
|
|
5
|
+
* This lets callers decide strategy without inspecting codes.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ─── Error Codes ───────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
export enum SoleriErrorCode {
|
|
11
|
+
NETWORK = 'NETWORK',
|
|
12
|
+
AUTH = 'AUTH',
|
|
13
|
+
VALIDATION = 'VALIDATION',
|
|
14
|
+
TIMEOUT = 'TIMEOUT',
|
|
15
|
+
RATE_LIMIT = 'RATE_LIMIT',
|
|
16
|
+
INTERNAL = 'INTERNAL',
|
|
17
|
+
LLM_OVERLOAD = 'LLM_OVERLOAD',
|
|
18
|
+
RESOURCE_NOT_FOUND = 'RESOURCE_NOT_FOUND',
|
|
19
|
+
CONFIG_ERROR = 'CONFIG_ERROR',
|
|
20
|
+
VAULT_UNREACHABLE = 'VAULT_UNREACHABLE',
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// ─── Classification ────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
export type ErrorClassification = 'retryable' | 'fixable' | 'permanent';
|
|
26
|
+
|
|
27
|
+
const CLASSIFICATION_MAP: Record<SoleriErrorCode, ErrorClassification> = {
|
|
28
|
+
[SoleriErrorCode.NETWORK]: 'retryable',
|
|
29
|
+
[SoleriErrorCode.TIMEOUT]: 'retryable',
|
|
30
|
+
[SoleriErrorCode.RATE_LIMIT]: 'retryable',
|
|
31
|
+
[SoleriErrorCode.LLM_OVERLOAD]: 'retryable',
|
|
32
|
+
[SoleriErrorCode.VAULT_UNREACHABLE]: 'retryable',
|
|
33
|
+
[SoleriErrorCode.INTERNAL]: 'retryable',
|
|
34
|
+
[SoleriErrorCode.VALIDATION]: 'fixable',
|
|
35
|
+
[SoleriErrorCode.AUTH]: 'permanent',
|
|
36
|
+
[SoleriErrorCode.RESOURCE_NOT_FOUND]: 'permanent',
|
|
37
|
+
[SoleriErrorCode.CONFIG_ERROR]: 'permanent',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// ─── SoleriError ───────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
export interface SoleriErrorOptions {
|
|
43
|
+
cause?: Error;
|
|
44
|
+
context?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export class SoleriError extends Error {
|
|
48
|
+
readonly code: SoleriErrorCode;
|
|
49
|
+
readonly classification: ErrorClassification;
|
|
50
|
+
readonly retryable: boolean;
|
|
51
|
+
readonly context?: Record<string, unknown>;
|
|
52
|
+
|
|
53
|
+
constructor(message: string, code: SoleriErrorCode, options?: SoleriErrorOptions) {
|
|
54
|
+
super(message, options?.cause ? { cause: options.cause } : undefined);
|
|
55
|
+
this.name = 'SoleriError';
|
|
56
|
+
this.code = code;
|
|
57
|
+
this.classification = CLASSIFICATION_MAP[code];
|
|
58
|
+
this.retryable = this.classification === 'retryable';
|
|
59
|
+
this.context = options?.context;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ─── Result Type ───────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
export type Result<T, E = SoleriError> = { ok: true; value: T } | { ok: false; error: E };
|
|
66
|
+
|
|
67
|
+
export function ok<T>(value: T): Result<T, never> {
|
|
68
|
+
return { ok: true, value };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function err<T = never>(error: SoleriError): Result<T> {
|
|
72
|
+
return { ok: false, error };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function isOk<T, E>(result: Result<T, E>): result is { ok: true; value: T } {
|
|
76
|
+
return result.ok === true;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function isErr<T, E>(result: Result<T, E>): result is { ok: false; error: E } {
|
|
80
|
+
return result.ok === false;
|
|
81
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -5,6 +5,31 @@ export { loadIntelligenceData } from './intelligence/loader.js';
|
|
|
5
5
|
// ─── Vault ───────────────────────────────────────────────────────────
|
|
6
6
|
export { Vault } from './vault/vault.js';
|
|
7
7
|
export type { SearchResult, VaultStats, ProjectInfo, Memory, MemoryStats } from './vault/vault.js';
|
|
8
|
+
export { validatePlaybook, parsePlaybookFromEntry } from './vault/playbook.js';
|
|
9
|
+
export type { Playbook, PlaybookStep, PlaybookValidationResult } from './vault/playbook.js';
|
|
10
|
+
|
|
11
|
+
// ─── Playbook System (registry, matching, seeding) ─────────────────
|
|
12
|
+
export {
|
|
13
|
+
getBuiltinPlaybook,
|
|
14
|
+
getAllBuiltinPlaybooks,
|
|
15
|
+
scorePlaybook,
|
|
16
|
+
mergePlaybooks,
|
|
17
|
+
matchPlaybooks,
|
|
18
|
+
seedDefaultPlaybooks,
|
|
19
|
+
playbookDefinitionToEntry,
|
|
20
|
+
entryToPlaybookDefinition,
|
|
21
|
+
} from './playbooks/index.js';
|
|
22
|
+
export type {
|
|
23
|
+
PlaybookTier,
|
|
24
|
+
PlaybookIntent,
|
|
25
|
+
BrainstormSection,
|
|
26
|
+
PlaybookGate,
|
|
27
|
+
PlaybookTaskTemplate,
|
|
28
|
+
PlaybookDefinition,
|
|
29
|
+
MergedPlaybook,
|
|
30
|
+
PlaybookMatchResult,
|
|
31
|
+
} from './playbooks/index.js';
|
|
32
|
+
export { createPlaybookOps } from './runtime/playbook-ops.js';
|
|
8
33
|
|
|
9
34
|
// ─── Text Utilities ─────────────────────────────────────────────────
|
|
10
35
|
export {
|
|
@@ -99,12 +124,47 @@ export type {
|
|
|
99
124
|
CogneeCognifyResult,
|
|
100
125
|
} from './cognee/types.js';
|
|
101
126
|
|
|
127
|
+
// ─── Cognee Sync ──────────────────────────────────────────────────────
|
|
128
|
+
export { CogneeSyncManager } from './cognee/sync-manager.js';
|
|
129
|
+
export type { SyncOp, SyncStatus, SyncQueueItem, SyncManagerStats } from './cognee/sync-manager.js';
|
|
130
|
+
|
|
131
|
+
// ─── Intake Pipeline ──────────────────────────────────────────────────
|
|
132
|
+
export { IntakePipeline } from './intake/intake-pipeline.js';
|
|
133
|
+
export { classifyChunk, VALID_TYPES, CLASSIFICATION_PROMPT } from './intake/content-classifier.js';
|
|
134
|
+
export { dedupItems, DEDUP_THRESHOLD } from './intake/dedup-gate.js';
|
|
135
|
+
export type {
|
|
136
|
+
IntakeConfig,
|
|
137
|
+
IntakeChunk,
|
|
138
|
+
IntakeJobRecord,
|
|
139
|
+
IntakeJobStatus,
|
|
140
|
+
IntakeChunkStatus,
|
|
141
|
+
KnowledgeType,
|
|
142
|
+
ClassifiedItem,
|
|
143
|
+
IntakePreviewResult,
|
|
144
|
+
} from './intake/types.js';
|
|
145
|
+
export type { DedupResult } from './intake/dedup-gate.js';
|
|
146
|
+
|
|
102
147
|
// ─── Planning ────────────────────────────────────────────────────────
|
|
103
|
-
export {
|
|
148
|
+
export {
|
|
149
|
+
Planner,
|
|
150
|
+
calculateScore,
|
|
151
|
+
calculateDriftScore,
|
|
152
|
+
isValidTransition,
|
|
153
|
+
getValidNextStatuses,
|
|
154
|
+
shouldExpire,
|
|
155
|
+
LIFECYCLE_TRANSITIONS,
|
|
156
|
+
NON_EXPIRING_STATUSES,
|
|
157
|
+
DRIFT_WEIGHTS,
|
|
158
|
+
} from './planning/planner.js';
|
|
104
159
|
export type {
|
|
105
160
|
PlanStatus,
|
|
106
161
|
TaskStatus,
|
|
162
|
+
TaskEvidence,
|
|
163
|
+
TaskMetrics,
|
|
164
|
+
TaskDeliverable,
|
|
165
|
+
ExecutionSummary,
|
|
107
166
|
PlanTask,
|
|
167
|
+
PlanDecision,
|
|
108
168
|
Plan,
|
|
109
169
|
PlanStore,
|
|
110
170
|
DriftItem,
|
|
@@ -115,7 +175,12 @@ export type {
|
|
|
115
175
|
} from './planning/planner.js';
|
|
116
176
|
|
|
117
177
|
// ─── Plan Gap Analysis ──────────────────────────────────────────────
|
|
118
|
-
export {
|
|
178
|
+
export {
|
|
179
|
+
runGapAnalysis,
|
|
180
|
+
createToolFeasibilityPass,
|
|
181
|
+
createFlowAlignmentPass,
|
|
182
|
+
createAntiPatternPass,
|
|
183
|
+
} from './planning/gap-analysis.js';
|
|
119
184
|
export type { GapAnalysisOptions, GapAnalysisPass } from './planning/gap-analysis.js';
|
|
120
185
|
export {
|
|
121
186
|
SEVERITY_WEIGHTS,
|
|
@@ -128,13 +193,21 @@ export {
|
|
|
128
193
|
export type { GapSeverity, GapCategory, PlanGap } from './planning/gap-types.js';
|
|
129
194
|
|
|
130
195
|
// ─── Loop ────────────────────────────────────────────────────────────
|
|
131
|
-
export {
|
|
196
|
+
export {
|
|
197
|
+
LoopManager,
|
|
198
|
+
extractPromise,
|
|
199
|
+
detectImplicitCompletion,
|
|
200
|
+
detectAnomaly,
|
|
201
|
+
} from './loop/loop-manager.js';
|
|
132
202
|
export type {
|
|
133
203
|
LoopMode,
|
|
134
204
|
LoopConfig,
|
|
135
205
|
LoopIteration,
|
|
136
206
|
LoopStatus,
|
|
137
207
|
LoopState,
|
|
208
|
+
LoopKnowledge,
|
|
209
|
+
LoopHistoryEntry,
|
|
210
|
+
LoopIterateDecision,
|
|
138
211
|
} from './loop/types.js';
|
|
139
212
|
|
|
140
213
|
// ─── LLM Types ───────────────────────────────────────────────────────
|
|
@@ -226,4 +299,42 @@ export { createCaptureOps } from './runtime/capture-ops.js';
|
|
|
226
299
|
export { createCuratorExtraOps } from './runtime/curator-extra-ops.js';
|
|
227
300
|
export { createProjectOps } from './runtime/project-ops.js';
|
|
228
301
|
export { createMemoryCrossProjectOps } from './runtime/memory-cross-project-ops.js';
|
|
302
|
+
export { createCogneeSyncOps } from './runtime/cognee-sync-ops.js';
|
|
303
|
+
export { createIntakeOps } from './runtime/intake-ops.js';
|
|
229
304
|
export type { AgentRuntimeConfig, AgentRuntime } from './runtime/types.js';
|
|
305
|
+
|
|
306
|
+
// ─── Errors ────────────────────────────────────────────────────────────
|
|
307
|
+
export {
|
|
308
|
+
SoleriErrorCode,
|
|
309
|
+
SoleriError,
|
|
310
|
+
ok,
|
|
311
|
+
err,
|
|
312
|
+
isOk,
|
|
313
|
+
isErr,
|
|
314
|
+
classifyError,
|
|
315
|
+
retryWithPreset,
|
|
316
|
+
shouldRetry,
|
|
317
|
+
getRetryDelay,
|
|
318
|
+
RETRY_PRESETS,
|
|
319
|
+
} from './errors/index.js';
|
|
320
|
+
export type {
|
|
321
|
+
ErrorClassification,
|
|
322
|
+
Result,
|
|
323
|
+
SoleriErrorOptions,
|
|
324
|
+
RetryPreset,
|
|
325
|
+
RetryConfig as SoleriRetryConfig,
|
|
326
|
+
RetryOptions,
|
|
327
|
+
} from './errors/index.js';
|
|
328
|
+
|
|
329
|
+
// ─── Persistence ───────────────────────────────────────────────────────
|
|
330
|
+
export { SQLitePersistenceProvider } from './persistence/index.js';
|
|
331
|
+
export type {
|
|
332
|
+
PersistenceProvider,
|
|
333
|
+
PersistenceParams,
|
|
334
|
+
RunResult,
|
|
335
|
+
PersistenceConfig,
|
|
336
|
+
} from './persistence/index.js';
|
|
337
|
+
|
|
338
|
+
// ─── Prompts ───────────────────────────────────────────────────────────
|
|
339
|
+
export { TemplateManager, parseVariables, resolveIncludes } from './prompts/index.js';
|
|
340
|
+
export type { PromptTemplate, TemplateVariable, RenderOptions } from './prompts/index.js';
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
// ─── Content Classifier — LLM-based knowledge extraction ────────────────────
|
|
2
|
+
//
|
|
3
|
+
// Takes a text chunk and uses an LLM to classify it into structured
|
|
4
|
+
// knowledge items. Graceful degradation: returns [] on any error.
|
|
5
|
+
|
|
6
|
+
import type { LLMClient } from '../llm/llm-client.js';
|
|
7
|
+
import type { ClassifiedItem, KnowledgeType } from './types.js';
|
|
8
|
+
|
|
9
|
+
// =============================================================================
|
|
10
|
+
// CONSTANTS
|
|
11
|
+
// =============================================================================
|
|
12
|
+
|
|
13
|
+
export const VALID_TYPES: KnowledgeType[] = [
|
|
14
|
+
'pattern',
|
|
15
|
+
'anti-pattern',
|
|
16
|
+
'principle',
|
|
17
|
+
'concept',
|
|
18
|
+
'reference',
|
|
19
|
+
'workflow',
|
|
20
|
+
'idea',
|
|
21
|
+
'roadmap',
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
const VALID_SEVERITIES = ['critical', 'warning', 'suggestion'] as const;
|
|
25
|
+
type Severity = (typeof VALID_SEVERITIES)[number];
|
|
26
|
+
|
|
27
|
+
export const CLASSIFICATION_PROMPT = `You are a knowledge extraction engine. Your job is to analyze a text chunk and extract structured knowledge items from it.
|
|
28
|
+
|
|
29
|
+
For each distinct piece of knowledge you identify, produce an object with these fields:
|
|
30
|
+
- type: one of ${JSON.stringify(VALID_TYPES)}
|
|
31
|
+
- title: concise title, max 80 characters
|
|
32
|
+
- description: 2-3 sentence summary of the knowledge
|
|
33
|
+
- tags: 3-5 lowercase single-word or hyphenated tags
|
|
34
|
+
- severity: one of "critical", "warning", "suggestion"
|
|
35
|
+
|
|
36
|
+
Rules:
|
|
37
|
+
- Extract ALL meaningful knowledge items from the text.
|
|
38
|
+
- Each item must be self-contained and independently useful.
|
|
39
|
+
- Use "critical" for must-know items, "warning" for important gotchas, "suggestion" for nice-to-know.
|
|
40
|
+
- Tags should be specific and useful for search.
|
|
41
|
+
- Respond with a pure JSON array of objects. No markdown fences, no explanation, no wrapping.
|
|
42
|
+
- If the text contains no extractable knowledge, respond with an empty array: []`;
|
|
43
|
+
|
|
44
|
+
// =============================================================================
|
|
45
|
+
// CLASSIFIER
|
|
46
|
+
// =============================================================================
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Classify a text chunk into structured knowledge items using an LLM.
|
|
50
|
+
*
|
|
51
|
+
* @param llm - LLMClient instance
|
|
52
|
+
* @param chunkText - The text to classify
|
|
53
|
+
* @param citation - Source citation (e.g. "book.pdf, pages 12-15")
|
|
54
|
+
* @returns Classified items, or [] on any error
|
|
55
|
+
*/
|
|
56
|
+
export async function classifyChunk(
|
|
57
|
+
llm: LLMClient,
|
|
58
|
+
chunkText: string,
|
|
59
|
+
citation: string,
|
|
60
|
+
): Promise<ClassifiedItem[]> {
|
|
61
|
+
try {
|
|
62
|
+
const result = await llm.complete({
|
|
63
|
+
provider: 'openai',
|
|
64
|
+
model: 'gpt-4o-mini',
|
|
65
|
+
systemPrompt: CLASSIFICATION_PROMPT,
|
|
66
|
+
userPrompt: chunkText,
|
|
67
|
+
maxTokens: 4096,
|
|
68
|
+
temperature: 0.3,
|
|
69
|
+
caller: 'intake',
|
|
70
|
+
task: 'classify',
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const raw = parseJsonResponse(result.text);
|
|
74
|
+
if (!Array.isArray(raw)) return [];
|
|
75
|
+
|
|
76
|
+
return raw
|
|
77
|
+
.map((item: unknown) => sanitizeItem(item, citation))
|
|
78
|
+
.filter((item): item is ClassifiedItem => item !== null);
|
|
79
|
+
} catch {
|
|
80
|
+
// Graceful degradation — never throw
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// =============================================================================
|
|
86
|
+
// HELPERS
|
|
87
|
+
// =============================================================================
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Parse a JSON response, handling potential markdown fences the LLM
|
|
91
|
+
* might include despite instructions.
|
|
92
|
+
*/
|
|
93
|
+
function parseJsonResponse(text: string): unknown {
|
|
94
|
+
const trimmed = text.trim();
|
|
95
|
+
|
|
96
|
+
// Strip markdown fences if present (defensive)
|
|
97
|
+
const fenceMatch = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/);
|
|
98
|
+
const jsonStr = fenceMatch ? fenceMatch[1] : trimmed;
|
|
99
|
+
|
|
100
|
+
return JSON.parse(jsonStr);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Validate and sanitize a single classified item.
|
|
105
|
+
* Returns null if the item is not salvageable.
|
|
106
|
+
*/
|
|
107
|
+
function sanitizeItem(raw: unknown, citation: string): ClassifiedItem | null {
|
|
108
|
+
if (!raw || typeof raw !== 'object') return null;
|
|
109
|
+
|
|
110
|
+
const obj = raw as Record<string, unknown>;
|
|
111
|
+
|
|
112
|
+
// Type — must be a valid KnowledgeType
|
|
113
|
+
const type = typeof obj.type === 'string' ? obj.type : '';
|
|
114
|
+
if (!VALID_TYPES.includes(type as KnowledgeType)) return null;
|
|
115
|
+
|
|
116
|
+
// Title — required, truncate to 80 chars
|
|
117
|
+
const title = typeof obj.title === 'string' ? obj.title.slice(0, 80).trim() : '';
|
|
118
|
+
if (!title) return null;
|
|
119
|
+
|
|
120
|
+
// Description — required
|
|
121
|
+
const description = typeof obj.description === 'string' ? obj.description.trim() : '';
|
|
122
|
+
if (!description) return null;
|
|
123
|
+
|
|
124
|
+
// Tags — must be array of strings, cap at 5
|
|
125
|
+
const tags = Array.isArray(obj.tags)
|
|
126
|
+
? obj.tags
|
|
127
|
+
.filter((t): t is string => typeof t === 'string')
|
|
128
|
+
.map((t) => t.toLowerCase().trim())
|
|
129
|
+
.filter((t) => t.length > 0)
|
|
130
|
+
.slice(0, 5)
|
|
131
|
+
: [];
|
|
132
|
+
|
|
133
|
+
// Severity — default to 'suggestion' if invalid
|
|
134
|
+
const severity: Severity = VALID_SEVERITIES.includes(obj.severity as Severity)
|
|
135
|
+
? (obj.severity as Severity)
|
|
136
|
+
: 'suggestion';
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
type: type as KnowledgeType,
|
|
140
|
+
title,
|
|
141
|
+
description,
|
|
142
|
+
tags,
|
|
143
|
+
severity,
|
|
144
|
+
citation,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// ─── Dedup Gate ───────────────────────────────────────────────────
|
|
2
|
+
// TF-IDF cosine similarity check against existing vault entries.
|
|
3
|
+
// Pure function: no side effects, no I/O beyond reading vault.
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
tokenize,
|
|
7
|
+
calculateTfIdf,
|
|
8
|
+
cosineSimilarity,
|
|
9
|
+
type SparseVector,
|
|
10
|
+
} from '../text/similarity.js';
|
|
11
|
+
import type { Vault } from '../vault/vault.js';
|
|
12
|
+
import type { ClassifiedItem } from './types.js';
|
|
13
|
+
|
|
14
|
+
export const DEDUP_THRESHOLD = 0.85;
|
|
15
|
+
|
|
16
|
+
export interface DedupResult {
|
|
17
|
+
item: ClassifiedItem;
|
|
18
|
+
isDuplicate: boolean;
|
|
19
|
+
bestMatchId?: string;
|
|
20
|
+
similarity: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Check new items against existing vault entries for duplicates using TF-IDF cosine similarity.
|
|
25
|
+
*
|
|
26
|
+
* Builds a shared IDF vocabulary from all texts (existing + new), computes TF-IDF vectors,
|
|
27
|
+
* and marks items as duplicates when cosine similarity >= DEDUP_THRESHOLD.
|
|
28
|
+
*/
|
|
29
|
+
export function dedupItems(items: ClassifiedItem[], vault: Vault): DedupResult[] {
|
|
30
|
+
const existing = vault.exportAll().entries;
|
|
31
|
+
|
|
32
|
+
// Fast path: nothing in vault — everything is new
|
|
33
|
+
if (existing.length === 0) {
|
|
34
|
+
return items.map((item) => ({
|
|
35
|
+
item,
|
|
36
|
+
isDuplicate: false,
|
|
37
|
+
similarity: 0,
|
|
38
|
+
}));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ── Build texts for vocabulary ──────────────────────────────────
|
|
42
|
+
const existingTexts = existing.map((e) => `${e.title} ${e.description}`);
|
|
43
|
+
const newTexts = items.map((i) => `${i.title} ${i.description}`);
|
|
44
|
+
const allTexts = [...existingTexts, ...newTexts];
|
|
45
|
+
const totalDocs = allTexts.length;
|
|
46
|
+
|
|
47
|
+
// ── Count document frequency per term ───────────────────────────
|
|
48
|
+
const docFreq = new Map<string, number>();
|
|
49
|
+
for (const text of allTexts) {
|
|
50
|
+
const terms = new Set(tokenize(text));
|
|
51
|
+
for (const term of terms) {
|
|
52
|
+
docFreq.set(term, (docFreq.get(term) ?? 0) + 1);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ── Build IDF vocabulary ────────────────────────────────────────
|
|
57
|
+
const vocabulary = new Map<string, number>();
|
|
58
|
+
for (const [term, df] of docFreq) {
|
|
59
|
+
vocabulary.set(term, Math.log((totalDocs + 1) / (df + 1)) + 1);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── Compute TF-IDF vectors for existing entries ─────────────────
|
|
63
|
+
const existingVectors: Array<{ id: string; vec: SparseVector }> = existing.map((entry, idx) => ({
|
|
64
|
+
id: entry.id,
|
|
65
|
+
vec: calculateTfIdf(tokenize(existingTexts[idx]), vocabulary),
|
|
66
|
+
}));
|
|
67
|
+
|
|
68
|
+
// ── Score each new item against all existing entries ────────────
|
|
69
|
+
return items.map((item, idx) => {
|
|
70
|
+
const itemVec = calculateTfIdf(tokenize(newTexts[idx]), vocabulary);
|
|
71
|
+
|
|
72
|
+
let bestSimilarity = 0;
|
|
73
|
+
let bestMatchId: string | undefined;
|
|
74
|
+
|
|
75
|
+
for (const { id, vec } of existingVectors) {
|
|
76
|
+
const sim = cosineSimilarity(itemVec, vec);
|
|
77
|
+
if (sim > bestSimilarity) {
|
|
78
|
+
bestSimilarity = sim;
|
|
79
|
+
bestMatchId = id;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const isDuplicate = bestSimilarity >= DEDUP_THRESHOLD;
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
item,
|
|
87
|
+
isDuplicate,
|
|
88
|
+
bestMatchId: isDuplicate ? bestMatchId : undefined,
|
|
89
|
+
similarity: bestSimilarity,
|
|
90
|
+
};
|
|
91
|
+
});
|
|
92
|
+
}
|