cognitive-core 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/atlas.d.ts +10 -0
- package/dist/atlas.d.ts.map +1 -1
- package/dist/atlas.js +65 -0
- package/dist/atlas.js.map +1 -1
- package/dist/learning/pipeline.d.ts +4 -31
- package/dist/learning/pipeline.d.ts.map +1 -1
- package/dist/learning/pipeline.js +12 -64
- package/dist/learning/pipeline.js.map +1 -1
- package/dist/memory/curated-loader.d.ts +21 -4
- package/dist/memory/curated-loader.d.ts.map +1 -1
- package/dist/memory/curated-loader.js +53 -16
- package/dist/memory/curated-loader.js.map +1 -1
- package/dist/memory/index.d.ts +2 -1
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +3 -1
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/playbook.d.ts +6 -0
- package/dist/memory/playbook.d.ts.map +1 -1
- package/dist/memory/playbook.js +15 -0
- package/dist/memory/playbook.js.map +1 -1
- package/dist/memory/source-resolver.d.ts +120 -0
- package/dist/memory/source-resolver.d.ts.map +1 -0
- package/dist/memory/source-resolver.js +300 -0
- package/dist/memory/source-resolver.js.map +1 -0
- package/dist/types/config.d.ts +141 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/config.js +40 -0
- package/dist/types/config.js.map +1 -1
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -1
- package/dist/types/index.js.map +1 -1
- package/dist/workspace/types.d.ts +12 -54
- package/dist/workspace/types.d.ts.map +1 -1
- package/dist/workspace/types.js.map +1 -1
- package/package.json +2 -2
- package/playbooks/compound-engineering/adversarial-review.json +51 -0
- package/playbooks/compound-engineering/agent-native-architecture.json +59 -0
- package/playbooks/compound-engineering/agent-native-review.json +54 -0
- package/playbooks/compound-engineering/api-contract-review.json +52 -0
- package/playbooks/compound-engineering/brainstorm-requirements.json +55 -0
- package/playbooks/compound-engineering/bug-reproduction.json +62 -0
- package/playbooks/compound-engineering/confidence-calibration.json +49 -0
- package/playbooks/compound-engineering/correctness-review.json +49 -0
- package/playbooks/compound-engineering/data-migration-safety.json +59 -0
- package/playbooks/compound-engineering/deployment-verification.json +63 -0
- package/playbooks/compound-engineering/error-recovery-patterns.json +53 -0
- package/playbooks/compound-engineering/implementation-planning.json +64 -0
- package/playbooks/compound-engineering/issue-pattern-analysis.json +53 -0
- package/playbooks/compound-engineering/knowledge-compounding.json +63 -0
- package/playbooks/compound-engineering/learnings-research.json +54 -0
- package/playbooks/compound-engineering/maintainability-review.json +49 -0
- package/playbooks/compound-engineering/performance-review.json +54 -0
- package/playbooks/compound-engineering/plan-adversarial-review.json +56 -0
- package/playbooks/compound-engineering/plan-feasibility-review.json +56 -0
- package/playbooks/compound-engineering/project-standards-review.json +52 -0
- package/playbooks/compound-engineering/reliability-review.json +53 -0
- package/playbooks/compound-engineering/review-orchestration.json +64 -0
- package/playbooks/compound-engineering/security-review.json +54 -0
- package/playbooks/compound-engineering/systematic-execution.json +64 -0
- package/playbooks/compound-engineering/testing-review.json +50 -0
- package/src/atlas.ts +96 -0
- package/src/memory/curated-loader.ts +69 -16
- package/src/memory/index.ts +16 -0
- package/src/memory/playbook.ts +19 -0
- package/src/memory/source-resolver.ts +422 -0
- package/src/types/config.ts +46 -0
- package/src/types/index.ts +4 -0
- package/src/workspace/types.ts +22 -78
- package/tests/integration/curated-sources-e2e.test.ts +502 -0
- package/tests/memory/compound-engineering-seed.test.ts +338 -0
- package/tests/memory/curated-loader-extended.test.ts +225 -0
- package/tests/memory/playbook-quality-validation.test.ts +430 -0
- package/tests/memory/source-resolver.test.ts +700 -0
- package/.claude/settings.local.json +0 -11
- package/dist/learning/llm-extractor.d.ts +0 -88
- package/dist/learning/llm-extractor.d.ts.map +0 -1
- package/dist/learning/llm-extractor.js +0 -372
- package/dist/learning/llm-extractor.js.map +0 -1
- package/dist/learning/loop-coordinator.d.ts +0 -61
- package/dist/learning/loop-coordinator.d.ts.map +0 -1
- package/dist/learning/loop-coordinator.js +0 -96
- package/dist/learning/loop-coordinator.js.map +0 -1
- package/references/agent-workspace/CLAUDE.md +0 -74
- package/references/agent-workspace/README.md +0 -587
- package/references/agent-workspace/media/banner.png +0 -0
- package/references/agent-workspace/package-lock.json +0 -2061
- package/references/agent-workspace/package.json +0 -54
- package/references/agent-workspace/src/handle.ts +0 -122
- package/references/agent-workspace/src/index.ts +0 -32
- package/references/agent-workspace/src/manager.ts +0 -102
- package/references/agent-workspace/src/readers/json.ts +0 -71
- package/references/agent-workspace/src/readers/markdown.ts +0 -37
- package/references/agent-workspace/src/readers/raw.ts +0 -27
- package/references/agent-workspace/src/types.ts +0 -68
- package/references/agent-workspace/src/validation.ts +0 -93
- package/references/agent-workspace/src/writers/json.ts +0 -17
- package/references/agent-workspace/src/writers/markdown.ts +0 -27
- package/references/agent-workspace/src/writers/raw.ts +0 -22
- package/references/agent-workspace/tests/errors.test.ts +0 -652
- package/references/agent-workspace/tests/handle.test.ts +0 -144
- package/references/agent-workspace/tests/manager.test.ts +0 -124
- package/references/agent-workspace/tests/readers.test.ts +0 -205
- package/references/agent-workspace/tests/validation.test.ts +0 -196
- package/references/agent-workspace/tests/writers.test.ts +0 -108
- package/references/agent-workspace/tsconfig.json +0 -20
- package/references/agent-workspace/tsup.config.ts +0 -9
- package/references/minimem/.claude/settings.json +0 -7
- package/references/minimem/.sudocode/issues.jsonl +0 -18
- package/references/minimem/.sudocode/specs.jsonl +0 -1
- package/references/minimem/CLAUDE.md +0 -310
- package/references/minimem/README.md +0 -556
- package/references/minimem/claude-plugin/.claude-plugin/plugin.json +0 -10
- package/references/minimem/claude-plugin/.mcp.json +0 -7
- package/references/minimem/claude-plugin/README.md +0 -158
- package/references/minimem/claude-plugin/commands/recall.md +0 -47
- package/references/minimem/claude-plugin/commands/remember.md +0 -41
- package/references/minimem/claude-plugin/hooks/__tests__/hooks.test.ts +0 -272
- package/references/minimem/claude-plugin/hooks/hooks.json +0 -27
- package/references/minimem/claude-plugin/hooks/session-end.sh +0 -86
- package/references/minimem/claude-plugin/hooks/session-start.sh +0 -85
- package/references/minimem/claude-plugin/skills/memory/SKILL.md +0 -108
- package/references/minimem/package-lock.json +0 -5373
- package/references/minimem/package.json +0 -60
- package/references/minimem/scripts/postbuild.js +0 -35
- package/references/minimem/src/__tests__/edge-cases.test.ts +0 -371
- package/references/minimem/src/__tests__/errors.test.ts +0 -265
- package/references/minimem/src/__tests__/helpers.ts +0 -199
- package/references/minimem/src/__tests__/internal.test.ts +0 -407
- package/references/minimem/src/__tests__/knowledge.test.ts +0 -287
- package/references/minimem/src/__tests__/minimem.integration.test.ts +0 -1127
- package/references/minimem/src/__tests__/session.test.ts +0 -190
- package/references/minimem/src/cli/__tests__/commands.test.ts +0 -759
- package/references/minimem/src/cli/commands/__tests__/conflicts.test.ts +0 -141
- package/references/minimem/src/cli/commands/append.ts +0 -76
- package/references/minimem/src/cli/commands/config.ts +0 -262
- package/references/minimem/src/cli/commands/conflicts.ts +0 -413
- package/references/minimem/src/cli/commands/daemon.ts +0 -169
- package/references/minimem/src/cli/commands/index.ts +0 -12
- package/references/minimem/src/cli/commands/init.ts +0 -88
- package/references/minimem/src/cli/commands/mcp.ts +0 -177
- package/references/minimem/src/cli/commands/push-pull.ts +0 -213
- package/references/minimem/src/cli/commands/search.ts +0 -158
- package/references/minimem/src/cli/commands/status.ts +0 -84
- package/references/minimem/src/cli/commands/sync-init.ts +0 -290
- package/references/minimem/src/cli/commands/sync.ts +0 -70
- package/references/minimem/src/cli/commands/upsert.ts +0 -197
- package/references/minimem/src/cli/config.ts +0 -584
- package/references/minimem/src/cli/index.ts +0 -264
- package/references/minimem/src/cli/shared.ts +0 -161
- package/references/minimem/src/cli/sync/__tests__/central.test.ts +0 -152
- package/references/minimem/src/cli/sync/__tests__/conflicts.test.ts +0 -209
- package/references/minimem/src/cli/sync/__tests__/daemon.test.ts +0 -118
- package/references/minimem/src/cli/sync/__tests__/detection.test.ts +0 -207
- package/references/minimem/src/cli/sync/__tests__/integration.test.ts +0 -476
- package/references/minimem/src/cli/sync/__tests__/registry.test.ts +0 -363
- package/references/minimem/src/cli/sync/__tests__/state.test.ts +0 -255
- package/references/minimem/src/cli/sync/__tests__/validation.test.ts +0 -193
- package/references/minimem/src/cli/sync/__tests__/watcher.test.ts +0 -178
- package/references/minimem/src/cli/sync/central.ts +0 -292
- package/references/minimem/src/cli/sync/conflicts.ts +0 -204
- package/references/minimem/src/cli/sync/daemon.ts +0 -407
- package/references/minimem/src/cli/sync/detection.ts +0 -138
- package/references/minimem/src/cli/sync/index.ts +0 -107
- package/references/minimem/src/cli/sync/operations.ts +0 -373
- package/references/minimem/src/cli/sync/registry.ts +0 -279
- package/references/minimem/src/cli/sync/state.ts +0 -355
- package/references/minimem/src/cli/sync/validation.ts +0 -206
- package/references/minimem/src/cli/sync/watcher.ts +0 -234
- package/references/minimem/src/cli/version.ts +0 -34
- package/references/minimem/src/core/index.ts +0 -9
- package/references/minimem/src/core/indexer.ts +0 -628
- package/references/minimem/src/core/searcher.ts +0 -221
- package/references/minimem/src/db/schema.ts +0 -183
- package/references/minimem/src/db/sqlite-vec.ts +0 -24
- package/references/minimem/src/embeddings/__tests__/embeddings.test.ts +0 -431
- package/references/minimem/src/embeddings/batch-gemini.ts +0 -392
- package/references/minimem/src/embeddings/batch-openai.ts +0 -409
- package/references/minimem/src/embeddings/embeddings.ts +0 -434
- package/references/minimem/src/index.ts +0 -109
- package/references/minimem/src/internal.ts +0 -299
- package/references/minimem/src/minimem.ts +0 -1276
- package/references/minimem/src/search/__tests__/hybrid.test.ts +0 -247
- package/references/minimem/src/search/graph.ts +0 -234
- package/references/minimem/src/search/hybrid.ts +0 -151
- package/references/minimem/src/search/search.ts +0 -256
- package/references/minimem/src/server/__tests__/mcp.test.ts +0 -341
- package/references/minimem/src/server/__tests__/tools.test.ts +0 -364
- package/references/minimem/src/server/mcp.ts +0 -326
- package/references/minimem/src/server/tools.ts +0 -720
- package/references/minimem/src/session.ts +0 -460
- package/references/minimem/tsconfig.json +0 -19
- package/references/minimem/tsup.config.ts +0 -26
- package/references/minimem/vitest.config.ts +0 -24
- package/references/sessionlog/.husky/pre-commit +0 -1
- package/references/sessionlog/.lintstagedrc.json +0 -4
- package/references/sessionlog/.prettierignore +0 -4
- package/references/sessionlog/.prettierrc.json +0 -11
- package/references/sessionlog/LICENSE +0 -21
- package/references/sessionlog/README.md +0 -453
- package/references/sessionlog/eslint.config.js +0 -58
- package/references/sessionlog/package-lock.json +0 -3672
- package/references/sessionlog/package.json +0 -65
- package/references/sessionlog/src/__tests__/agent-hooks.test.ts +0 -570
- package/references/sessionlog/src/__tests__/agent-registry.test.ts +0 -127
- package/references/sessionlog/src/__tests__/claude-code-hooks.test.ts +0 -225
- package/references/sessionlog/src/__tests__/claude-generator.test.ts +0 -46
- package/references/sessionlog/src/__tests__/commit-msg.test.ts +0 -86
- package/references/sessionlog/src/__tests__/cursor-agent.test.ts +0 -224
- package/references/sessionlog/src/__tests__/e2e-live.test.ts +0 -890
- package/references/sessionlog/src/__tests__/event-log.test.ts +0 -183
- package/references/sessionlog/src/__tests__/flush-sentinel.test.ts +0 -105
- package/references/sessionlog/src/__tests__/gemini-agent.test.ts +0 -375
- package/references/sessionlog/src/__tests__/git-hooks.test.ts +0 -78
- package/references/sessionlog/src/__tests__/hook-managers.test.ts +0 -121
- package/references/sessionlog/src/__tests__/lifecycle-tasks.test.ts +0 -759
- package/references/sessionlog/src/__tests__/opencode-agent.test.ts +0 -338
- package/references/sessionlog/src/__tests__/redaction.test.ts +0 -136
- package/references/sessionlog/src/__tests__/session-repo.test.ts +0 -353
- package/references/sessionlog/src/__tests__/session-store.test.ts +0 -166
- package/references/sessionlog/src/__tests__/setup-ccweb.test.ts +0 -466
- package/references/sessionlog/src/__tests__/skill-live.test.ts +0 -461
- package/references/sessionlog/src/__tests__/summarize.test.ts +0 -348
- package/references/sessionlog/src/__tests__/task-plan-e2e.test.ts +0 -610
- package/references/sessionlog/src/__tests__/task-plan-live.test.ts +0 -632
- package/references/sessionlog/src/__tests__/transcript-timestamp.test.ts +0 -121
- package/references/sessionlog/src/__tests__/types.test.ts +0 -166
- package/references/sessionlog/src/__tests__/utils.test.ts +0 -333
- package/references/sessionlog/src/__tests__/validation.test.ts +0 -103
- package/references/sessionlog/src/__tests__/worktree.test.ts +0 -57
- package/references/sessionlog/src/agent/agents/claude-code.ts +0 -1089
- package/references/sessionlog/src/agent/agents/cursor.ts +0 -361
- package/references/sessionlog/src/agent/agents/gemini-cli.ts +0 -632
- package/references/sessionlog/src/agent/agents/opencode.ts +0 -540
- package/references/sessionlog/src/agent/registry.ts +0 -143
- package/references/sessionlog/src/agent/session-types.ts +0 -113
- package/references/sessionlog/src/agent/types.ts +0 -220
- package/references/sessionlog/src/cli.ts +0 -597
- package/references/sessionlog/src/commands/clean.ts +0 -133
- package/references/sessionlog/src/commands/disable.ts +0 -84
- package/references/sessionlog/src/commands/doctor.ts +0 -145
- package/references/sessionlog/src/commands/enable.ts +0 -202
- package/references/sessionlog/src/commands/explain.ts +0 -261
- package/references/sessionlog/src/commands/reset.ts +0 -105
- package/references/sessionlog/src/commands/resume.ts +0 -180
- package/references/sessionlog/src/commands/rewind.ts +0 -195
- package/references/sessionlog/src/commands/setup-ccweb.ts +0 -275
- package/references/sessionlog/src/commands/status.ts +0 -172
- package/references/sessionlog/src/config.ts +0 -165
- package/references/sessionlog/src/events/event-log.ts +0 -126
- package/references/sessionlog/src/git-operations.ts +0 -558
- package/references/sessionlog/src/hooks/git-hooks.ts +0 -165
- package/references/sessionlog/src/hooks/lifecycle.ts +0 -391
- package/references/sessionlog/src/index.ts +0 -650
- package/references/sessionlog/src/security/redaction.ts +0 -283
- package/references/sessionlog/src/session/state-machine.ts +0 -452
- package/references/sessionlog/src/store/checkpoint-store.ts +0 -509
- package/references/sessionlog/src/store/native-store.ts +0 -173
- package/references/sessionlog/src/store/provider-types.ts +0 -99
- package/references/sessionlog/src/store/session-store.ts +0 -266
- package/references/sessionlog/src/strategy/attribution.ts +0 -296
- package/references/sessionlog/src/strategy/common.ts +0 -207
- package/references/sessionlog/src/strategy/content-overlap.ts +0 -228
- package/references/sessionlog/src/strategy/manual-commit.ts +0 -988
- package/references/sessionlog/src/strategy/types.ts +0 -279
- package/references/sessionlog/src/summarize/claude-generator.ts +0 -115
- package/references/sessionlog/src/summarize/summarize.ts +0 -432
- package/references/sessionlog/src/types.ts +0 -508
- package/references/sessionlog/src/utils/chunk-files.ts +0 -49
- package/references/sessionlog/src/utils/commit-message.ts +0 -65
- package/references/sessionlog/src/utils/detect-agent.ts +0 -36
- package/references/sessionlog/src/utils/hook-managers.ts +0 -125
- package/references/sessionlog/src/utils/ide-tags.ts +0 -32
- package/references/sessionlog/src/utils/paths.ts +0 -79
- package/references/sessionlog/src/utils/preview-rewind.ts +0 -80
- package/references/sessionlog/src/utils/rewind-conflict.ts +0 -121
- package/references/sessionlog/src/utils/shadow-branch.ts +0 -109
- package/references/sessionlog/src/utils/string-utils.ts +0 -46
- package/references/sessionlog/src/utils/todo-extract.ts +0 -188
- package/references/sessionlog/src/utils/trailers.ts +0 -187
- package/references/sessionlog/src/utils/transcript-parse.ts +0 -177
- package/references/sessionlog/src/utils/transcript-timestamp.ts +0 -59
- package/references/sessionlog/src/utils/tree-ops.ts +0 -219
- package/references/sessionlog/src/utils/tty.ts +0 -72
- package/references/sessionlog/src/utils/validation.ts +0 -65
- package/references/sessionlog/src/utils/worktree.ts +0 -58
- package/references/sessionlog/src/wire-types.ts +0 -59
- package/references/sessionlog/templates/setup-env.sh +0 -153
- package/references/sessionlog/tsconfig.json +0 -18
- package/references/sessionlog/vitest.config.ts +0 -12
- package/references/skill-tree/.claude/settings.json +0 -6
- package/references/skill-tree/.sudocode/issues.jsonl +0 -19
- package/references/skill-tree/.sudocode/specs.jsonl +0 -3
- package/references/skill-tree/CLAUDE.md +0 -126
- package/references/skill-tree/README.md +0 -372
- package/references/skill-tree/docs/GAPS_v1.md +0 -221
- package/references/skill-tree/docs/INTEGRATION_PLAN.md +0 -467
- package/references/skill-tree/docs/TODOS.md +0 -91
- package/references/skill-tree/docs/anthropic_skill_guide.md +0 -1364
- package/references/skill-tree/docs/design/federated-skill-trees.md +0 -524
- package/references/skill-tree/docs/design/multi-agent-sync.md +0 -759
- package/references/skill-tree/docs/scraper/BRAINSTORM.md +0 -583
- package/references/skill-tree/docs/scraper/POC_PLAN.md +0 -420
- package/references/skill-tree/docs/scraper/README.md +0 -170
- package/references/skill-tree/examples/basic-usage.ts +0 -164
- package/references/skill-tree/package-lock.json +0 -1852
- package/references/skill-tree/package.json +0 -66
- package/references/skill-tree/scraper/README.md +0 -123
- package/references/skill-tree/scraper/docs/DESIGN.md +0 -683
- package/references/skill-tree/scraper/docs/PLAN.md +0 -336
- package/references/skill-tree/scraper/drizzle.config.ts +0 -10
- package/references/skill-tree/scraper/package-lock.json +0 -6329
- package/references/skill-tree/scraper/package.json +0 -68
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-description.md +0 -7
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-name.md +0 -7
- package/references/skill-tree/scraper/test/fixtures/minimal-skill/SKILL.md +0 -27
- package/references/skill-tree/scraper/test/fixtures/skill-json/SKILL.json +0 -21
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/SKILL.md +0 -54
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/_meta.json +0 -24
- package/references/skill-tree/scraper/test/fixtures/valid-skill/SKILL.md +0 -93
- package/references/skill-tree/scraper/test/fixtures/valid-skill/_meta.json +0 -22
- package/references/skill-tree/scraper/tsup.config.ts +0 -14
- package/references/skill-tree/scraper/vitest.config.ts +0 -17
- package/references/skill-tree/scripts/convert-to-vitest.ts +0 -166
- package/references/skill-tree/skills/skill-writer/SKILL.md +0 -339
- package/references/skill-tree/skills/skill-writer/references/examples.md +0 -326
- package/references/skill-tree/skills/skill-writer/references/patterns.md +0 -210
- package/references/skill-tree/skills/skill-writer/references/quality-checklist.md +0 -123
- package/references/skill-tree/test/run-all.ts +0 -106
- package/references/skill-tree/test/utils.ts +0 -128
- package/references/skill-tree/vitest.config.ts +0 -16
|
@@ -1,628 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* MemoryIndexer - Handles file indexing and embedding management
|
|
3
|
-
*
|
|
4
|
-
* Responsible for:
|
|
5
|
-
* - Processing memory files into chunks
|
|
6
|
-
* - Computing and caching embeddings
|
|
7
|
-
* - Managing file records in the database
|
|
8
|
-
* - Detecting stale content
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import { randomUUID } from "node:crypto";
|
|
12
|
-
import fs from "node:fs/promises";
|
|
13
|
-
import path from "node:path";
|
|
14
|
-
import type { DatabaseSync } from "node:sqlite";
|
|
15
|
-
|
|
16
|
-
import {
|
|
17
|
-
buildFileEntry,
|
|
18
|
-
chunkMarkdown,
|
|
19
|
-
hashText,
|
|
20
|
-
listMemoryFiles,
|
|
21
|
-
type MemoryChunk,
|
|
22
|
-
type MemoryFileEntry,
|
|
23
|
-
parseEmbedding,
|
|
24
|
-
vectorToBlob,
|
|
25
|
-
type DebugFn,
|
|
26
|
-
} from "../internal.js";
|
|
27
|
-
import type {
|
|
28
|
-
EmbeddingProvider,
|
|
29
|
-
OpenAiEmbeddingClient,
|
|
30
|
-
GeminiEmbeddingClient,
|
|
31
|
-
} from "../embeddings/embeddings.js";
|
|
32
|
-
import {
|
|
33
|
-
runOpenAiEmbeddingBatches,
|
|
34
|
-
type OpenAiBatchRequest,
|
|
35
|
-
OPENAI_BATCH_ENDPOINT,
|
|
36
|
-
} from "../embeddings/batch-openai.js";
|
|
37
|
-
import { runGeminiEmbeddingBatches, type GeminiBatchRequest } from "../embeddings/batch-gemini.js";
|
|
38
|
-
|
|
39
|
-
const META_KEY = "memory_index_meta_v1";
|
|
40
|
-
const EMBEDDING_CACHE_TABLE = "embedding_cache";
|
|
41
|
-
const VECTOR_TABLE = "chunks_vec";
|
|
42
|
-
const FTS_TABLE = "chunks_fts";
|
|
43
|
-
const EMBEDDING_RETRY_MAX_ATTEMPTS = 3;
|
|
44
|
-
const EMBEDDING_RETRY_BASE_DELAY_MS = 500;
|
|
45
|
-
const EMBEDDING_RETRY_MAX_DELAY_MS = 8000;
|
|
46
|
-
|
|
47
|
-
export type IndexerConfig = {
|
|
48
|
-
memoryDir: string;
|
|
49
|
-
chunking: { tokens: number; overlap: number };
|
|
50
|
-
cache: { enabled: boolean; maxEntries: number };
|
|
51
|
-
batch: {
|
|
52
|
-
enabled: boolean;
|
|
53
|
-
wait: boolean;
|
|
54
|
-
concurrency: number;
|
|
55
|
-
pollIntervalMs: number;
|
|
56
|
-
timeoutMs: number;
|
|
57
|
-
};
|
|
58
|
-
ftsEnabled: boolean;
|
|
59
|
-
debug?: DebugFn;
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
export type MemoryIndexMeta = {
|
|
63
|
-
model: string;
|
|
64
|
-
provider: string;
|
|
65
|
-
providerKey?: string;
|
|
66
|
-
chunkTokens: number;
|
|
67
|
-
chunkOverlap: number;
|
|
68
|
-
vectorDims?: number;
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
export type IndexStats = {
|
|
72
|
-
filesProcessed: number;
|
|
73
|
-
chunksCreated: number;
|
|
74
|
-
staleRemoved: number;
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* MemoryIndexer handles file indexing, chunking, and embedding management
|
|
79
|
-
*/
|
|
80
|
-
export class MemoryIndexer {
|
|
81
|
-
private readonly config: IndexerConfig;
|
|
82
|
-
private readonly db: DatabaseSync;
|
|
83
|
-
private readonly provider: EmbeddingProvider;
|
|
84
|
-
private readonly providerKey: string;
|
|
85
|
-
private readonly openAi?: OpenAiEmbeddingClient;
|
|
86
|
-
private readonly gemini?: GeminiEmbeddingClient;
|
|
87
|
-
|
|
88
|
-
// Vector/FTS state (shared with parent)
|
|
89
|
-
private vectorState: {
|
|
90
|
-
available: boolean;
|
|
91
|
-
dims?: number;
|
|
92
|
-
};
|
|
93
|
-
private ftsAvailable: boolean;
|
|
94
|
-
|
|
95
|
-
constructor(
|
|
96
|
-
db: DatabaseSync,
|
|
97
|
-
provider: EmbeddingProvider,
|
|
98
|
-
config: IndexerConfig,
|
|
99
|
-
options?: {
|
|
100
|
-
openAi?: OpenAiEmbeddingClient;
|
|
101
|
-
gemini?: GeminiEmbeddingClient;
|
|
102
|
-
vectorState?: { available: boolean; dims?: number };
|
|
103
|
-
ftsAvailable?: boolean;
|
|
104
|
-
}
|
|
105
|
-
) {
|
|
106
|
-
this.db = db;
|
|
107
|
-
this.provider = provider;
|
|
108
|
-
this.config = config;
|
|
109
|
-
this.openAi = options?.openAi;
|
|
110
|
-
this.gemini = options?.gemini;
|
|
111
|
-
this.vectorState = options?.vectorState ?? { available: false };
|
|
112
|
-
this.ftsAvailable = options?.ftsAvailable ?? false;
|
|
113
|
-
this.providerKey = this.computeProviderKey();
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Update vector/FTS availability (called by parent when extensions load)
|
|
118
|
-
*/
|
|
119
|
-
setVectorState(state: { available: boolean; dims?: number }): void {
|
|
120
|
-
this.vectorState = state;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
setFtsAvailable(available: boolean): void {
|
|
124
|
-
this.ftsAvailable = available;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
getVectorDims(): number | undefined {
|
|
128
|
-
return this.vectorState.dims;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Compute a unique key for the current provider configuration
|
|
133
|
-
*/
|
|
134
|
-
private computeProviderKey(): string {
|
|
135
|
-
const parts: string[] = [this.provider.id, this.provider.model];
|
|
136
|
-
if (this.openAi) {
|
|
137
|
-
parts.push(this.openAi.baseUrl);
|
|
138
|
-
}
|
|
139
|
-
if (this.gemini) {
|
|
140
|
-
parts.push(this.gemini.baseUrl);
|
|
141
|
-
}
|
|
142
|
-
return hashText(parts.join(":"));
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* Read index metadata from database
|
|
147
|
-
*/
|
|
148
|
-
readMeta(): MemoryIndexMeta | null {
|
|
149
|
-
try {
|
|
150
|
-
const row = this.db.prepare(`SELECT value FROM meta WHERE key = ?`).get(META_KEY) as
|
|
151
|
-
| { value: string }
|
|
152
|
-
| undefined;
|
|
153
|
-
if (!row?.value) return null;
|
|
154
|
-
return JSON.parse(row.value) as MemoryIndexMeta;
|
|
155
|
-
} catch {
|
|
156
|
-
return null;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Write index metadata to database
|
|
162
|
-
*/
|
|
163
|
-
writeMeta(meta: MemoryIndexMeta): void {
|
|
164
|
-
this.db
|
|
165
|
-
.prepare(`INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)`)
|
|
166
|
-
.run(META_KEY, JSON.stringify(meta));
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Check if the index is stale by comparing file mtimes
|
|
171
|
-
*/
|
|
172
|
-
async isStale(): Promise<boolean> {
|
|
173
|
-
try {
|
|
174
|
-
const files = await listMemoryFiles(this.config.memoryDir);
|
|
175
|
-
|
|
176
|
-
const stored = this.db
|
|
177
|
-
.prepare(`SELECT path, mtime FROM files WHERE source = ?`)
|
|
178
|
-
.all("memory") as Array<{ path: string; mtime: number }>;
|
|
179
|
-
|
|
180
|
-
if (files.length !== stored.length) {
|
|
181
|
-
this.config.debug?.(`Stale: file count changed (${stored.length} -> ${files.length})`);
|
|
182
|
-
return true;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
const storedMap = new Map(stored.map((f) => [f.path, f.mtime]));
|
|
186
|
-
|
|
187
|
-
for (const absPath of files) {
|
|
188
|
-
const relPath = path.relative(this.config.memoryDir, absPath).replace(/\\/g, "/");
|
|
189
|
-
const storedMtime = storedMap.get(relPath);
|
|
190
|
-
|
|
191
|
-
if (storedMtime === undefined) {
|
|
192
|
-
this.config.debug?.(`Stale: new file ${relPath}`);
|
|
193
|
-
return true;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
const stat = await fs.stat(absPath);
|
|
197
|
-
const currentMtime = Math.floor(stat.mtimeMs);
|
|
198
|
-
if (currentMtime !== storedMtime) {
|
|
199
|
-
this.config.debug?.(`Stale: mtime changed for ${relPath}`);
|
|
200
|
-
return true;
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return false;
|
|
205
|
-
} catch (err) {
|
|
206
|
-
this.config.debug?.(`Stale check failed: ${String(err)}`);
|
|
207
|
-
return true;
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
/**
|
|
212
|
-
* Check if a full reindex is needed based on configuration changes
|
|
213
|
-
*/
|
|
214
|
-
needsFullReindex(force?: boolean): boolean {
|
|
215
|
-
const meta = this.readMeta();
|
|
216
|
-
return (
|
|
217
|
-
force === true ||
|
|
218
|
-
!meta ||
|
|
219
|
-
meta.model !== this.provider.model ||
|
|
220
|
-
meta.provider !== this.provider.id ||
|
|
221
|
-
meta.providerKey !== this.providerKey ||
|
|
222
|
-
meta.chunkTokens !== this.config.chunking.tokens ||
|
|
223
|
-
meta.chunkOverlap !== this.config.chunking.overlap ||
|
|
224
|
-
(this.vectorState.available && !meta?.vectorDims)
|
|
225
|
-
);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
/**
|
|
229
|
-
* Index all memory files, returns stats
|
|
230
|
-
*/
|
|
231
|
-
async indexAll(force?: boolean): Promise<IndexStats> {
|
|
232
|
-
const needsFullReindex = this.needsFullReindex(force);
|
|
233
|
-
const files = await listMemoryFiles(this.config.memoryDir);
|
|
234
|
-
const activePaths = new Set<string>();
|
|
235
|
-
let filesProcessed = 0;
|
|
236
|
-
let chunksCreated = 0;
|
|
237
|
-
|
|
238
|
-
for (const absPath of files) {
|
|
239
|
-
const entry = await buildFileEntry(absPath, this.config.memoryDir);
|
|
240
|
-
activePaths.add(entry.path);
|
|
241
|
-
|
|
242
|
-
const record = this.db
|
|
243
|
-
.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`)
|
|
244
|
-
.get(entry.path, "memory") as { hash: string } | undefined;
|
|
245
|
-
|
|
246
|
-
if (!needsFullReindex && record?.hash === entry.hash) {
|
|
247
|
-
continue;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
const chunkCount = await this.indexFile(entry);
|
|
251
|
-
filesProcessed++;
|
|
252
|
-
chunksCreated += chunkCount;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
// Delete stale entries
|
|
256
|
-
const staleRemoved = this.removeStaleEntries(activePaths);
|
|
257
|
-
|
|
258
|
-
// Write meta
|
|
259
|
-
this.writeMeta({
|
|
260
|
-
model: this.provider.model,
|
|
261
|
-
provider: this.provider.id,
|
|
262
|
-
providerKey: this.providerKey,
|
|
263
|
-
chunkTokens: this.config.chunking.tokens,
|
|
264
|
-
chunkOverlap: this.config.chunking.overlap,
|
|
265
|
-
vectorDims: this.vectorState.dims,
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
// Prune embedding cache
|
|
269
|
-
this.pruneEmbeddingCacheIfNeeded();
|
|
270
|
-
|
|
271
|
-
return { filesProcessed, chunksCreated, staleRemoved };
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
/**
|
|
275
|
-
* Index a single file
|
|
276
|
-
*/
|
|
277
|
-
async indexFile(entry: MemoryFileEntry): Promise<number> {
|
|
278
|
-
const content = await fs.readFile(entry.absPath, "utf-8");
|
|
279
|
-
const chunks = chunkMarkdown(content, this.config.chunking);
|
|
280
|
-
|
|
281
|
-
const embeddings = await this.embedChunks(chunks);
|
|
282
|
-
|
|
283
|
-
// Update files table
|
|
284
|
-
this.db
|
|
285
|
-
.prepare(
|
|
286
|
-
`INSERT OR REPLACE INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)`
|
|
287
|
-
)
|
|
288
|
-
.run(entry.path, "memory", entry.hash, Math.floor(entry.mtimeMs), entry.size);
|
|
289
|
-
|
|
290
|
-
// Delete old chunks
|
|
291
|
-
this.deleteChunksForFile(entry.path);
|
|
292
|
-
|
|
293
|
-
// Insert new chunks
|
|
294
|
-
const now = Date.now();
|
|
295
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
296
|
-
const chunk = chunks[i];
|
|
297
|
-
const embedding = embeddings[i] ?? [];
|
|
298
|
-
this.insertChunk(entry.path, chunk, embedding, now);
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
return chunks.length;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/**
|
|
305
|
-
* Delete all chunks for a file
|
|
306
|
-
*/
|
|
307
|
-
private deleteChunksForFile(filePath: string): void {
|
|
308
|
-
try {
|
|
309
|
-
this.db
|
|
310
|
-
.prepare(
|
|
311
|
-
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`
|
|
312
|
-
)
|
|
313
|
-
.run(filePath, "memory");
|
|
314
|
-
} catch {
|
|
315
|
-
// Vector table may not exist
|
|
316
|
-
}
|
|
317
|
-
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(filePath, "memory");
|
|
318
|
-
if (this.config.ftsEnabled && this.ftsAvailable) {
|
|
319
|
-
try {
|
|
320
|
-
this.db
|
|
321
|
-
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
|
322
|
-
.run(filePath, "memory", this.provider.model);
|
|
323
|
-
} catch {
|
|
324
|
-
// FTS table may not exist
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
/**
|
|
330
|
-
* Insert a chunk into the database
|
|
331
|
-
*/
|
|
332
|
-
private insertChunk(
|
|
333
|
-
filePath: string,
|
|
334
|
-
chunk: MemoryChunk,
|
|
335
|
-
embedding: number[],
|
|
336
|
-
timestamp: number
|
|
337
|
-
): void {
|
|
338
|
-
const chunkId = randomUUID();
|
|
339
|
-
|
|
340
|
-
this.db
|
|
341
|
-
.prepare(
|
|
342
|
-
`INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at)
|
|
343
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
344
|
-
)
|
|
345
|
-
.run(
|
|
346
|
-
chunkId,
|
|
347
|
-
filePath,
|
|
348
|
-
"memory",
|
|
349
|
-
chunk.startLine,
|
|
350
|
-
chunk.endLine,
|
|
351
|
-
chunk.hash,
|
|
352
|
-
this.provider.model,
|
|
353
|
-
chunk.text,
|
|
354
|
-
JSON.stringify(embedding),
|
|
355
|
-
timestamp
|
|
356
|
-
);
|
|
357
|
-
|
|
358
|
-
// Insert into vector table if available
|
|
359
|
-
if (this.vectorState.available && embedding.length > 0) {
|
|
360
|
-
if (!this.vectorState.dims) {
|
|
361
|
-
this.vectorState.dims = embedding.length;
|
|
362
|
-
this.ensureVectorTable(embedding.length);
|
|
363
|
-
}
|
|
364
|
-
try {
|
|
365
|
-
this.db
|
|
366
|
-
.prepare(`INSERT INTO ${VECTOR_TABLE} (id, embedding) VALUES (?, ?)`)
|
|
367
|
-
.run(chunkId, vectorToBlob(embedding));
|
|
368
|
-
} catch {
|
|
369
|
-
// Vector insertion may fail
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
// Insert into FTS table if available
|
|
374
|
-
if (this.config.ftsEnabled && this.ftsAvailable) {
|
|
375
|
-
try {
|
|
376
|
-
this.db
|
|
377
|
-
.prepare(
|
|
378
|
-
`INSERT INTO ${FTS_TABLE} (text, id, path, source, model, start_line, end_line)
|
|
379
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
380
|
-
)
|
|
381
|
-
.run(
|
|
382
|
-
chunk.text,
|
|
383
|
-
chunkId,
|
|
384
|
-
filePath,
|
|
385
|
-
"memory",
|
|
386
|
-
this.provider.model,
|
|
387
|
-
chunk.startLine,
|
|
388
|
-
chunk.endLine
|
|
389
|
-
);
|
|
390
|
-
} catch {
|
|
391
|
-
// FTS insertion may fail
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* Remove stale file entries that no longer exist
|
|
398
|
-
*/
|
|
399
|
-
private removeStaleEntries(activePaths: Set<string>): number {
|
|
400
|
-
const staleRows = this.db
|
|
401
|
-
.prepare(`SELECT path FROM files WHERE source = ?`)
|
|
402
|
-
.all("memory") as Array<{ path: string }>;
|
|
403
|
-
|
|
404
|
-
let removed = 0;
|
|
405
|
-
for (const stale of staleRows) {
|
|
406
|
-
if (activePaths.has(stale.path)) continue;
|
|
407
|
-
|
|
408
|
-
this.db
|
|
409
|
-
.prepare(`DELETE FROM files WHERE path = ? AND source = ?`)
|
|
410
|
-
.run(stale.path, "memory");
|
|
411
|
-
this.deleteChunksForFile(stale.path);
|
|
412
|
-
removed++;
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
return removed;
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
/**
|
|
419
|
-
* Create vector table with the given dimensions
|
|
420
|
-
*/
|
|
421
|
-
ensureVectorTable(dimensions: number): void {
|
|
422
|
-
if (!this.vectorState.available) return;
|
|
423
|
-
try {
|
|
424
|
-
this.db.exec(
|
|
425
|
-
`CREATE VIRTUAL TABLE IF NOT EXISTS ${VECTOR_TABLE} USING vec0(
|
|
426
|
-
id TEXT PRIMARY KEY,
|
|
427
|
-
embedding FLOAT[${dimensions}]
|
|
428
|
-
)`
|
|
429
|
-
);
|
|
430
|
-
} catch (err) {
|
|
431
|
-
this.config.debug?.(`vector table creation failed: ${String(err)}`);
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
/**
|
|
436
|
-
* Get embeddings for chunks, using cache when available
|
|
437
|
-
*/
|
|
438
|
-
async embedChunks(chunks: MemoryChunk[]): Promise<number[][]> {
|
|
439
|
-
if (chunks.length === 0) return [];
|
|
440
|
-
|
|
441
|
-
const hashes = chunks.map((c) => c.hash);
|
|
442
|
-
const cached = this.loadEmbeddingCache(hashes);
|
|
443
|
-
const missing: Array<{ index: number; chunk: MemoryChunk }> = [];
|
|
444
|
-
|
|
445
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
446
|
-
if (!cached.has(hashes[i])) {
|
|
447
|
-
missing.push({ index: i, chunk: chunks[i] });
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
if (missing.length > 0) {
|
|
452
|
-
const texts = missing.map((m) => m.chunk.text);
|
|
453
|
-
const newEmbeddings = await this.embedBatchWithRetry(texts);
|
|
454
|
-
|
|
455
|
-
for (let i = 0; i < missing.length; i++) {
|
|
456
|
-
const hash = missing[i].chunk.hash;
|
|
457
|
-
const embedding = newEmbeddings[i] ?? [];
|
|
458
|
-
cached.set(hash, embedding);
|
|
459
|
-
this.upsertEmbeddingCache(hash, embedding);
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
return hashes.map((h) => cached.get(h) ?? []);
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
/**
|
|
467
|
-
* Embed texts with retry logic
|
|
468
|
-
*/
|
|
469
|
-
private async embedBatchWithRetry(texts: string[]): Promise<number[][]> {
|
|
470
|
-
if (texts.length === 0) return [];
|
|
471
|
-
|
|
472
|
-
// Try batch API first if enabled
|
|
473
|
-
if (this.config.batch.enabled) {
|
|
474
|
-
try {
|
|
475
|
-
return await this.embedWithBatchApi(texts);
|
|
476
|
-
} catch (err) {
|
|
477
|
-
this.config.debug?.(`batch embedding failed, falling back to direct: ${String(err)}`);
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// Fall back to direct embedding
|
|
482
|
-
let lastError: Error | null = null;
|
|
483
|
-
for (let attempt = 0; attempt < EMBEDDING_RETRY_MAX_ATTEMPTS; attempt++) {
|
|
484
|
-
try {
|
|
485
|
-
return await this.provider.embedBatch(texts);
|
|
486
|
-
} catch (err) {
|
|
487
|
-
lastError = err instanceof Error ? err : new Error(String(err));
|
|
488
|
-
if (attempt < EMBEDDING_RETRY_MAX_ATTEMPTS - 1) {
|
|
489
|
-
const delay = Math.min(
|
|
490
|
-
EMBEDDING_RETRY_MAX_DELAY_MS,
|
|
491
|
-
EMBEDDING_RETRY_BASE_DELAY_MS * Math.pow(2, attempt)
|
|
492
|
-
);
|
|
493
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
throw lastError;
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
/**
|
|
501
|
-
* Use batch API for large embedding jobs
|
|
502
|
-
*/
|
|
503
|
-
private async embedWithBatchApi(texts: string[]): Promise<number[][]> {
|
|
504
|
-
if (this.openAi) {
|
|
505
|
-
const requests: OpenAiBatchRequest[] = texts.map((text, i) => ({
|
|
506
|
-
custom_id: `chunk-${i}`,
|
|
507
|
-
method: "POST",
|
|
508
|
-
url: OPENAI_BATCH_ENDPOINT,
|
|
509
|
-
body: { model: this.openAi!.model, input: text },
|
|
510
|
-
}));
|
|
511
|
-
|
|
512
|
-
const results = await runOpenAiEmbeddingBatches({
|
|
513
|
-
openAi: this.openAi,
|
|
514
|
-
source: "minimem",
|
|
515
|
-
requests,
|
|
516
|
-
wait: this.config.batch.wait,
|
|
517
|
-
pollIntervalMs: this.config.batch.pollIntervalMs,
|
|
518
|
-
timeoutMs: this.config.batch.timeoutMs,
|
|
519
|
-
concurrency: this.config.batch.concurrency,
|
|
520
|
-
debug: this.config.debug,
|
|
521
|
-
});
|
|
522
|
-
|
|
523
|
-
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
if (this.gemini) {
|
|
527
|
-
const requests: GeminiBatchRequest[] = texts.map((text, i) => ({
|
|
528
|
-
custom_id: `chunk-${i}`,
|
|
529
|
-
content: { parts: [{ text }] },
|
|
530
|
-
taskType: "RETRIEVAL_DOCUMENT",
|
|
531
|
-
}));
|
|
532
|
-
|
|
533
|
-
const results = await runGeminiEmbeddingBatches({
|
|
534
|
-
gemini: this.gemini,
|
|
535
|
-
source: "minimem",
|
|
536
|
-
requests,
|
|
537
|
-
wait: this.config.batch.wait,
|
|
538
|
-
pollIntervalMs: this.config.batch.pollIntervalMs,
|
|
539
|
-
timeoutMs: this.config.batch.timeoutMs,
|
|
540
|
-
concurrency: this.config.batch.concurrency,
|
|
541
|
-
debug: this.config.debug,
|
|
542
|
-
});
|
|
543
|
-
|
|
544
|
-
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
throw new Error("Batch API not available for local embeddings");
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
/**
|
|
551
|
-
* Load embeddings from cache
|
|
552
|
-
*/
|
|
553
|
-
private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
|
|
554
|
-
const result = new Map<string, number[]>();
|
|
555
|
-
if (!this.config.cache.enabled || hashes.length === 0) return result;
|
|
556
|
-
|
|
557
|
-
const placeholders = hashes.map(() => "?").join(",");
|
|
558
|
-
const rows = this.db
|
|
559
|
-
.prepare(
|
|
560
|
-
`SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}
|
|
561
|
-
WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`
|
|
562
|
-
)
|
|
563
|
-
.all(this.provider.id, this.provider.model, this.providerKey, ...hashes) as Array<{
|
|
564
|
-
hash: string;
|
|
565
|
-
embedding: string;
|
|
566
|
-
}>;
|
|
567
|
-
|
|
568
|
-
const now = Date.now();
|
|
569
|
-
for (const row of rows) {
|
|
570
|
-
result.set(row.hash, parseEmbedding(row.embedding));
|
|
571
|
-
// Touch for LRU
|
|
572
|
-
this.db
|
|
573
|
-
.prepare(
|
|
574
|
-
`UPDATE ${EMBEDDING_CACHE_TABLE} SET updated_at = ?
|
|
575
|
-
WHERE provider = ? AND model = ? AND provider_key = ? AND hash = ?`
|
|
576
|
-
)
|
|
577
|
-
.run(now, this.provider.id, this.provider.model, this.providerKey, row.hash);
|
|
578
|
-
}
|
|
579
|
-
|
|
580
|
-
return result;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
/**
|
|
584
|
-
* Save embedding to cache
|
|
585
|
-
*/
|
|
586
|
-
private upsertEmbeddingCache(hash: string, embedding: number[]): void {
|
|
587
|
-
if (!this.config.cache.enabled) return;
|
|
588
|
-
const now = Date.now();
|
|
589
|
-
this.db
|
|
590
|
-
.prepare(
|
|
591
|
-
`INSERT OR REPLACE INTO ${EMBEDDING_CACHE_TABLE}
|
|
592
|
-
(provider, model, provider_key, hash, embedding, dims, updated_at)
|
|
593
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
594
|
-
)
|
|
595
|
-
.run(
|
|
596
|
-
this.provider.id,
|
|
597
|
-
this.provider.model,
|
|
598
|
-
this.providerKey,
|
|
599
|
-
hash,
|
|
600
|
-
JSON.stringify(embedding),
|
|
601
|
-
embedding.length,
|
|
602
|
-
now
|
|
603
|
-
);
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
/**
|
|
607
|
-
* Prune old cache entries if over limit
|
|
608
|
-
*/
|
|
609
|
-
private pruneEmbeddingCacheIfNeeded(): void {
|
|
610
|
-
if (!this.config.cache.enabled) return;
|
|
611
|
-
const row = this.db
|
|
612
|
-
.prepare(`SELECT COUNT(*) as count FROM ${EMBEDDING_CACHE_TABLE}`)
|
|
613
|
-
.get() as { count: number };
|
|
614
|
-
if (row.count <= this.config.cache.maxEntries) return;
|
|
615
|
-
|
|
616
|
-
const excess = row.count - this.config.cache.maxEntries;
|
|
617
|
-
this.db
|
|
618
|
-
.prepare(
|
|
619
|
-
`DELETE FROM ${EMBEDDING_CACHE_TABLE}
|
|
620
|
-
WHERE rowid IN (
|
|
621
|
-
SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}
|
|
622
|
-
ORDER BY updated_at ASC
|
|
623
|
-
LIMIT ?
|
|
624
|
-
)`
|
|
625
|
-
)
|
|
626
|
-
.run(excess);
|
|
627
|
-
}
|
|
628
|
-
}
|