@neurcode-ai/cli 0.9.63 → 0.9.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dist/commands/brain.d.ts.map +1 -1
- package/dist/commands/brain.js +273 -0
- package/dist/commands/brain.js.map +1 -1
- package/dist/commands/control-plane.js +7 -7
- package/dist/commands/control-plane.js.map +1 -1
- package/dist/commands/fix.d.ts.map +1 -1
- package/dist/commands/fix.js +108 -1
- package/dist/commands/fix.js.map +1 -1
- package/dist/commands/patch-apply.d.ts +2 -0
- package/dist/commands/patch-apply.d.ts.map +1 -1
- package/dist/commands/patch-apply.js +331 -19
- package/dist/commands/patch-apply.js.map +1 -1
- package/dist/commands/pilot-report.d.ts +9 -0
- package/dist/commands/pilot-report.d.ts.map +1 -0
- package/dist/commands/pilot-report.js +176 -0
- package/dist/commands/pilot-report.js.map +1 -0
- package/dist/commands/remediate-governance.d.ts +54 -0
- package/dist/commands/remediate-governance.d.ts.map +1 -0
- package/dist/commands/remediate-governance.js +375 -0
- package/dist/commands/remediate-governance.js.map +1 -0
- package/dist/commands/remediate.d.ts.map +1 -1
- package/dist/commands/remediate.js.map +1 -1
- package/dist/commands/replay.d.ts.map +1 -1
- package/dist/commands/replay.js +35 -5
- package/dist/commands/replay.js.map +1 -1
- package/dist/commands/verify.d.ts.map +1 -1
- package/dist/commands/verify.js +336 -25
- package/dist/commands/verify.js.map +1 -1
- package/dist/commands/workspace.js +7 -7
- package/dist/commands/workspace.js.map +1 -1
- package/dist/daemon/server.d.ts +2 -2
- package/dist/daemon/server.d.ts.map +1 -1
- package/dist/daemon/server.js +2113 -32
- package/dist/daemon/server.js.map +1 -1
- package/dist/explainability/DeterminismClassifier.d.ts +34 -0
- package/dist/explainability/DeterminismClassifier.d.ts.map +1 -0
- package/dist/explainability/DeterminismClassifier.js +104 -0
- package/dist/explainability/DeterminismClassifier.js.map +1 -0
- package/dist/explainability/ViolationFormatter.d.ts +32 -0
- package/dist/explainability/ViolationFormatter.d.ts.map +1 -0
- package/dist/explainability/ViolationFormatter.js +252 -0
- package/dist/explainability/ViolationFormatter.js.map +1 -0
- package/dist/explainability/index.d.ts +15 -0
- package/dist/explainability/index.d.ts.map +1 -0
- package/dist/explainability/index.js +94 -0
- package/dist/explainability/index.js.map +1 -0
- package/dist/explainability/types.d.ts +37 -0
- package/dist/explainability/types.d.ts.map +1 -0
- package/dist/explainability/types.js +3 -0
- package/dist/explainability/types.js.map +1 -0
- package/dist/governance/canonical-pipeline.d.ts +38 -0
- package/dist/governance/canonical-pipeline.d.ts.map +1 -0
- package/dist/governance/canonical-pipeline.js +448 -0
- package/dist/governance/canonical-pipeline.js.map +1 -0
- package/dist/governance/structural-on-diff.d.ts +13 -0
- package/dist/governance/structural-on-diff.d.ts.map +1 -0
- package/dist/governance/structural-on-diff.js +35 -0
- package/dist/governance/structural-on-diff.js.map +1 -0
- package/dist/governance/structural-policy-merge.d.ts +14 -0
- package/dist/governance/structural-policy-merge.d.ts.map +1 -0
- package/dist/governance/structural-policy-merge.js +25 -0
- package/dist/governance/structural-policy-merge.js.map +1 -0
- package/dist/index.js +86 -4
- package/dist/index.js.map +1 -1
- package/dist/integrations/review-compression/index.d.ts +50 -0
- package/dist/integrations/review-compression/index.d.ts.map +1 -0
- package/dist/integrations/review-compression/index.js +158 -0
- package/dist/integrations/review-compression/index.js.map +1 -0
- package/dist/intent-engine/domain-taxonomy.d.ts +42 -0
- package/dist/intent-engine/domain-taxonomy.d.ts.map +1 -0
- package/dist/intent-engine/domain-taxonomy.js +534 -0
- package/dist/intent-engine/domain-taxonomy.js.map +1 -0
- package/dist/intent-engine/index.d.ts +1 -0
- package/dist/intent-engine/index.d.ts.map +1 -1
- package/dist/intent-engine/index.js +6 -1
- package/dist/intent-engine/index.js.map +1 -1
- package/dist/intent-engine/matcher.d.ts.map +1 -1
- package/dist/intent-engine/matcher.js +2 -0
- package/dist/intent-engine/matcher.js.map +1 -1
- package/dist/intent-engine/parser.d.ts.map +1 -1
- package/dist/intent-engine/parser.js +47 -0
- package/dist/intent-engine/parser.js.map +1 -1
- package/dist/intent-engine/semantic-expander.d.ts +104 -0
- package/dist/intent-engine/semantic-expander.d.ts.map +1 -0
- package/dist/intent-engine/semantic-expander.js +480 -0
- package/dist/intent-engine/semantic-expander.js.map +1 -0
- package/dist/patch-engine/diff.d.ts +1 -1
- package/dist/patch-engine/diff.js +1 -1
- package/dist/patch-engine/generator.d.ts +9 -0
- package/dist/patch-engine/generator.d.ts.map +1 -1
- package/dist/patch-engine/generator.js +375 -17
- package/dist/patch-engine/generator.js.map +1 -1
- package/dist/patch-engine/index.d.ts +25 -25
- package/dist/patch-engine/index.d.ts.map +1 -1
- package/dist/patch-engine/index.js +134 -87
- package/dist/patch-engine/index.js.map +1 -1
- package/dist/patch-engine/patterns.d.ts +1 -1
- package/dist/patch-engine/patterns.d.ts.map +1 -1
- package/dist/patch-engine/patterns.js +282 -41
- package/dist/patch-engine/patterns.js.map +1 -1
- package/dist/patch-engine/rollback.d.ts +31 -0
- package/dist/patch-engine/rollback.d.ts.map +1 -0
- package/dist/patch-engine/rollback.js +275 -0
- package/dist/patch-engine/rollback.js.map +1 -0
- package/dist/patch-engine/safety.d.ts +28 -0
- package/dist/patch-engine/safety.d.ts.map +1 -0
- package/dist/patch-engine/safety.js +122 -0
- package/dist/patch-engine/safety.js.map +1 -0
- package/dist/patch-engine/transaction.d.ts +52 -0
- package/dist/patch-engine/transaction.d.ts.map +1 -0
- package/dist/patch-engine/transaction.js +93 -0
- package/dist/patch-engine/transaction.js.map +1 -0
- package/dist/semantic/index.d.ts +14 -0
- package/dist/semantic/index.d.ts.map +1 -0
- package/dist/semantic/index.js +30 -0
- package/dist/semantic/index.js.map +1 -0
- package/dist/semantic/tfidf-engine.d.ts +81 -0
- package/dist/semantic/tfidf-engine.d.ts.map +1 -0
- package/dist/semantic/tfidf-engine.js +278 -0
- package/dist/semantic/tfidf-engine.js.map +1 -0
- package/dist/semantic/vector-store.d.ts +108 -0
- package/dist/semantic/vector-store.d.ts.map +1 -0
- package/dist/semantic/vector-store.js +321 -0
- package/dist/semantic/vector-store.js.map +1 -0
- package/dist/structural-rules/context-severity.d.ts +46 -0
- package/dist/structural-rules/context-severity.d.ts.map +1 -0
- package/dist/structural-rules/context-severity.js +115 -0
- package/dist/structural-rules/context-severity.js.map +1 -0
- package/dist/structural-rules/distributed/DS001-saga-rollback-absence.d.ts +11 -0
- package/dist/structural-rules/distributed/DS001-saga-rollback-absence.d.ts.map +1 -0
- package/dist/structural-rules/distributed/DS001-saga-rollback-absence.js +212 -0
- package/dist/structural-rules/distributed/DS001-saga-rollback-absence.js.map +1 -0
- package/dist/structural-rules/distributed/DS002-missing-correlation-id.d.ts +11 -0
- package/dist/structural-rules/distributed/DS002-missing-correlation-id.d.ts.map +1 -0
- package/dist/structural-rules/distributed/DS002-missing-correlation-id.js +213 -0
- package/dist/structural-rules/distributed/DS002-missing-correlation-id.js.map +1 -0
- package/dist/structural-rules/distributed/index.d.ts +3 -0
- package/dist/structural-rules/distributed/index.d.ts.map +1 -0
- package/dist/structural-rules/distributed/index.js +8 -0
- package/dist/structural-rules/distributed/index.js.map +1 -0
- package/dist/structural-rules/engine.d.ts +25 -0
- package/dist/structural-rules/engine.d.ts.map +1 -0
- package/dist/structural-rules/engine.js +90 -0
- package/dist/structural-rules/engine.js.map +1 -0
- package/dist/structural-rules/index.d.ts +41 -0
- package/dist/structural-rules/index.d.ts.map +1 -0
- package/dist/structural-rules/index.js +141 -0
- package/dist/structural-rules/index.js.map +1 -0
- package/dist/structural-rules/python/PY001-asyncio-task-without-cancel.d.ts +11 -0
- package/dist/structural-rules/python/PY001-asyncio-task-without-cancel.d.ts.map +1 -0
- package/dist/structural-rules/python/PY001-asyncio-task-without-cancel.js +66 -0
- package/dist/structural-rules/python/PY001-asyncio-task-without-cancel.js.map +1 -0
- package/dist/structural-rules/python/PY002-unbounded-dict-singleton.d.ts +11 -0
- package/dist/structural-rules/python/PY002-unbounded-dict-singleton.d.ts.map +1 -0
- package/dist/structural-rules/python/PY002-unbounded-dict-singleton.js +135 -0
- package/dist/structural-rules/python/PY002-unbounded-dict-singleton.js.map +1 -0
- package/dist/structural-rules/python/PY003-broad-except-clause.d.ts +11 -0
- package/dist/structural-rules/python/PY003-broad-except-clause.d.ts.map +1 -0
- package/dist/structural-rules/python/PY003-broad-except-clause.js +86 -0
- package/dist/structural-rules/python/PY003-broad-except-clause.js.map +1 -0
- package/dist/structural-rules/python/PY004-swallowed-async-exception.d.ts +11 -0
- package/dist/structural-rules/python/PY004-swallowed-async-exception.d.ts.map +1 -0
- package/dist/structural-rules/python/PY004-swallowed-async-exception.js +167 -0
- package/dist/structural-rules/python/PY004-swallowed-async-exception.js.map +1 -0
- package/dist/structural-rules/python/PY005-fastapi-without-pydantic.d.ts +11 -0
- package/dist/structural-rules/python/PY005-fastapi-without-pydantic.d.ts.map +1 -0
- package/dist/structural-rules/python/PY005-fastapi-without-pydantic.js +154 -0
- package/dist/structural-rules/python/PY005-fastapi-without-pydantic.js.map +1 -0
- package/dist/structural-rules/python/PY006-blocking-io-in-async.d.ts +11 -0
- package/dist/structural-rules/python/PY006-blocking-io-in-async.d.ts.map +1 -0
- package/dist/structural-rules/python/PY006-blocking-io-in-async.js +130 -0
- package/dist/structural-rules/python/PY006-blocking-io-in-async.js.map +1 -0
- package/dist/structural-rules/python/PY007-sqlalchemy-session-leak.d.ts +11 -0
- package/dist/structural-rules/python/PY007-sqlalchemy-session-leak.d.ts.map +1 -0
- package/dist/structural-rules/python/PY007-sqlalchemy-session-leak.js +93 -0
- package/dist/structural-rules/python/PY007-sqlalchemy-session-leak.js.map +1 -0
- package/dist/structural-rules/python/PY008-celery-task-without-retry.d.ts +11 -0
- package/dist/structural-rules/python/PY008-celery-task-without-retry.d.ts.map +1 -0
- package/dist/structural-rules/python/PY008-celery-task-without-retry.js +154 -0
- package/dist/structural-rules/python/PY008-celery-task-without-retry.js.map +1 -0
- package/dist/structural-rules/python/PY009-unsafe-pickle-deserialization.d.ts +11 -0
- package/dist/structural-rules/python/PY009-unsafe-pickle-deserialization.d.ts.map +1 -0
- package/dist/structural-rules/python/PY009-unsafe-pickle-deserialization.js +133 -0
- package/dist/structural-rules/python/PY009-unsafe-pickle-deserialization.js.map +1 -0
- package/dist/structural-rules/python/PY010-leaked-aiohttp-session.d.ts +11 -0
- package/dist/structural-rules/python/PY010-leaked-aiohttp-session.d.ts.map +1 -0
- package/dist/structural-rules/python/PY010-leaked-aiohttp-session.js +80 -0
- package/dist/structural-rules/python/PY010-leaked-aiohttp-session.js.map +1 -0
- package/dist/structural-rules/rules/SR001-swallowed-async-rejection.d.ts +11 -0
- package/dist/structural-rules/rules/SR001-swallowed-async-rejection.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR001-swallowed-async-rejection.js +145 -0
- package/dist/structural-rules/rules/SR001-swallowed-async-rejection.js.map +1 -0
- package/dist/structural-rules/rules/SR002-unbounded-collection.d.ts +11 -0
- package/dist/structural-rules/rules/SR002-unbounded-collection.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR002-unbounded-collection.js +196 -0
- package/dist/structural-rules/rules/SR002-unbounded-collection.js.map +1 -0
- package/dist/structural-rules/rules/SR003-timer-without-cleanup.d.ts +11 -0
- package/dist/structural-rules/rules/SR003-timer-without-cleanup.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR003-timer-without-cleanup.js +148 -0
- package/dist/structural-rules/rules/SR003-timer-without-cleanup.js.map +1 -0
- package/dist/structural-rules/rules/SR004-request-boundary-no-validation.d.ts +11 -0
- package/dist/structural-rules/rules/SR004-request-boundary-no-validation.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR004-request-boundary-no-validation.js +162 -0
- package/dist/structural-rules/rules/SR004-request-boundary-no-validation.js.map +1 -0
- package/dist/structural-rules/rules/SR005-halfopen-probe-gate.d.ts +11 -0
- package/dist/structural-rules/rules/SR005-halfopen-probe-gate.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR005-halfopen-probe-gate.js +150 -0
- package/dist/structural-rules/rules/SR005-halfopen-probe-gate.js.map +1 -0
- package/dist/structural-rules/rules/SR006-fanout-error-sanitization.d.ts +11 -0
- package/dist/structural-rules/rules/SR006-fanout-error-sanitization.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR006-fanout-error-sanitization.js +161 -0
- package/dist/structural-rules/rules/SR006-fanout-error-sanitization.js.map +1 -0
- package/dist/structural-rules/rules/SR007-cross-request-error.d.ts +11 -0
- package/dist/structural-rules/rules/SR007-cross-request-error.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR007-cross-request-error.js +175 -0
- package/dist/structural-rules/rules/SR007-cross-request-error.js.map +1 -0
- package/dist/structural-rules/rules/SR008-background-task-orphan.d.ts +11 -0
- package/dist/structural-rules/rules/SR008-background-task-orphan.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR008-background-task-orphan.js +176 -0
- package/dist/structural-rules/rules/SR008-background-task-orphan.js.map +1 -0
- package/dist/structural-rules/rules/SR009-missing-retry-backoff.d.ts +11 -0
- package/dist/structural-rules/rules/SR009-missing-retry-backoff.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR009-missing-retry-backoff.js +168 -0
- package/dist/structural-rules/rules/SR009-missing-retry-backoff.js.map +1 -0
- package/dist/structural-rules/rules/SR010-retry-storm.d.ts +11 -0
- package/dist/structural-rules/rules/SR010-retry-storm.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR010-retry-storm.js +181 -0
- package/dist/structural-rules/rules/SR010-retry-storm.js.map +1 -0
- package/dist/structural-rules/rules/SR011-event-listener-leak.d.ts +11 -0
- package/dist/structural-rules/rules/SR011-event-listener-leak.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR011-event-listener-leak.js +208 -0
- package/dist/structural-rules/rules/SR011-event-listener-leak.js.map +1 -0
- package/dist/structural-rules/rules/SR012-promise-race-leak.d.ts +11 -0
- package/dist/structural-rules/rules/SR012-promise-race-leak.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR012-promise-race-leak.js +191 -0
- package/dist/structural-rules/rules/SR012-promise-race-leak.js.map +1 -0
- package/dist/structural-rules/rules/SR013-missing-idempotency-key.d.ts +11 -0
- package/dist/structural-rules/rules/SR013-missing-idempotency-key.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR013-missing-idempotency-key.js +219 -0
- package/dist/structural-rules/rules/SR013-missing-idempotency-key.js.map +1 -0
- package/dist/structural-rules/rules/SR014-mutable-closure-async.d.ts +11 -0
- package/dist/structural-rules/rules/SR014-mutable-closure-async.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR014-mutable-closure-async.js +208 -0
- package/dist/structural-rules/rules/SR014-mutable-closure-async.js.map +1 -0
- package/dist/structural-rules/rules/SR015-dangling-abort-controller.d.ts +11 -0
- package/dist/structural-rules/rules/SR015-dangling-abort-controller.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR015-dangling-abort-controller.js +190 -0
- package/dist/structural-rules/rules/SR015-dangling-abort-controller.js.map +1 -0
- package/dist/structural-rules/rules/SR016-unsafe-json-parse.d.ts +11 -0
- package/dist/structural-rules/rules/SR016-unsafe-json-parse.d.ts.map +1 -0
- package/dist/structural-rules/rules/SR016-unsafe-json-parse.js +187 -0
- package/dist/structural-rules/rules/SR016-unsafe-json-parse.js.map +1 -0
- package/dist/structural-rules/suppressions.d.ts +43 -0
- package/dist/structural-rules/suppressions.d.ts.map +1 -0
- package/dist/structural-rules/suppressions.js +115 -0
- package/dist/structural-rules/suppressions.js.map +1 -0
- package/dist/structural-rules/types.d.ts +43 -0
- package/dist/structural-rules/types.d.ts.map +1 -0
- package/dist/structural-rules/types.js +3 -0
- package/dist/structural-rules/types.js.map +1 -0
- package/dist/utils/advisory-signals.d.ts +5 -0
- package/dist/utils/advisory-signals.d.ts.map +1 -1
- package/dist/utils/advisory-signals.js +50 -12
- package/dist/utils/advisory-signals.js.map +1 -1
- package/dist/utils/ai-debt-budget.d.ts.map +1 -1
- package/dist/utils/ai-debt-budget.js +5 -2
- package/dist/utils/ai-debt-budget.js.map +1 -1
- package/dist/utils/brain-cache.d.ts +100 -0
- package/dist/utils/brain-cache.d.ts.map +1 -0
- package/dist/utils/brain-cache.js +346 -0
- package/dist/utils/brain-cache.js.map +1 -0
- package/dist/utils/cli-json.d.ts.map +1 -1
- package/dist/utils/cli-json.js +80 -12
- package/dist/utils/cli-json.js.map +1 -1
- package/dist/utils/execution-bus.d.ts +10 -0
- package/dist/utils/execution-bus.d.ts.map +1 -1
- package/dist/utils/execution-bus.js +16 -0
- package/dist/utils/execution-bus.js.map +1 -1
- package/dist/utils/governance-provenance.d.ts +95 -0
- package/dist/utils/governance-provenance.d.ts.map +1 -0
- package/dist/utils/governance-provenance.js +187 -0
- package/dist/utils/governance-provenance.js.map +1 -0
- package/dist/utils/pilot-metrics.d.ts +46 -0
- package/dist/utils/pilot-metrics.d.ts.map +1 -0
- package/dist/utils/pilot-metrics.js +240 -0
- package/dist/utils/pilot-metrics.js.map +1 -0
- package/dist/utils/policy-compiler.d.ts +6 -0
- package/dist/utils/policy-compiler.d.ts.map +1 -1
- package/dist/utils/policy-compiler.js +20 -0
- package/dist/utils/policy-compiler.js.map +1 -1
- package/dist/utils/replay-runtime.d.ts +34 -0
- package/dist/utils/replay-runtime.d.ts.map +1 -1
- package/dist/utils/replay-runtime.js +207 -0
- package/dist/utils/replay-runtime.js.map +1 -1
- package/dist/workspace/cross-repo-graph.d.ts +111 -0
- package/dist/workspace/cross-repo-graph.d.ts.map +1 -0
- package/dist/workspace/cross-repo-graph.js +450 -0
- package/dist/workspace/cross-repo-graph.js.map +1 -0
- package/dist/workspace/federated-context.d.ts +144 -0
- package/dist/workspace/federated-context.d.ts.map +1 -0
- package/dist/workspace/federated-context.js +347 -0
- package/dist/workspace/federated-context.js.map +1 -0
- package/dist/workspace/index.d.ts +38 -0
- package/dist/workspace/index.d.ts.map +1 -0
- package/dist/workspace/index.js +48 -0
- package/dist/workspace/index.js.map +1 -0
- package/package.json +10 -10
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Neurcode Semantic Layer — public API
|
|
4
|
+
*
|
|
5
|
+
* Phase 1: TF-IDF semantic vector store (deterministic, zero external deps)
|
|
6
|
+
* Phase 2: Optional embedding upgrade (drop-in, same API surface)
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { buildSemanticIndex, semanticSearch } from '../semantic';
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.explainQuery = exports.clearSemanticIndex = exports.getSemanticIndexStats = exports.removeFileFromIndex = exports.upsertFileInIndex = exports.semanticSearch = exports.buildSemanticIndex = exports.removeDocumentFromCorpus = exports.upsertDocumentInCorpus = exports.computeIDF = exports.searchCorpus = exports.buildCorpus = exports.tokenize = void 0;
|
|
13
|
+
var tfidf_engine_1 = require("./tfidf-engine");
|
|
14
|
+
// TF-IDF engine primitives (for testing / advanced use)
|
|
15
|
+
Object.defineProperty(exports, "tokenize", { enumerable: true, get: function () { return tfidf_engine_1.tokenize; } });
|
|
16
|
+
Object.defineProperty(exports, "buildCorpus", { enumerable: true, get: function () { return tfidf_engine_1.buildCorpus; } });
|
|
17
|
+
Object.defineProperty(exports, "searchCorpus", { enumerable: true, get: function () { return tfidf_engine_1.searchCorpus; } });
|
|
18
|
+
Object.defineProperty(exports, "computeIDF", { enumerable: true, get: function () { return tfidf_engine_1.computeIDF; } });
|
|
19
|
+
Object.defineProperty(exports, "upsertDocumentInCorpus", { enumerable: true, get: function () { return tfidf_engine_1.upsertDocumentInCorpus; } });
|
|
20
|
+
Object.defineProperty(exports, "removeDocumentFromCorpus", { enumerable: true, get: function () { return tfidf_engine_1.removeDocumentFromCorpus; } });
|
|
21
|
+
var vector_store_1 = require("./vector-store");
|
|
22
|
+
// High-level vector store (persists to .neurcode/semantic-index.json)
|
|
23
|
+
Object.defineProperty(exports, "buildSemanticIndex", { enumerable: true, get: function () { return vector_store_1.buildSemanticIndex; } });
|
|
24
|
+
Object.defineProperty(exports, "semanticSearch", { enumerable: true, get: function () { return vector_store_1.semanticSearch; } });
|
|
25
|
+
Object.defineProperty(exports, "upsertFileInIndex", { enumerable: true, get: function () { return vector_store_1.upsertFileInIndex; } });
|
|
26
|
+
Object.defineProperty(exports, "removeFileFromIndex", { enumerable: true, get: function () { return vector_store_1.removeFileFromIndex; } });
|
|
27
|
+
Object.defineProperty(exports, "getSemanticIndexStats", { enumerable: true, get: function () { return vector_store_1.getSemanticIndexStats; } });
|
|
28
|
+
Object.defineProperty(exports, "clearSemanticIndex", { enumerable: true, get: function () { return vector_store_1.clearSemanticIndex; } });
|
|
29
|
+
Object.defineProperty(exports, "explainQuery", { enumerable: true, get: function () { return vector_store_1.explainQuery; } });
|
|
30
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/semantic/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;;AAEH,+CAQwB;AAPtB,wDAAwD;AACxD,wGAAA,QAAQ,OAAA;AACR,2GAAA,WAAW,OAAA;AACX,4GAAA,YAAY,OAAA;AACZ,0GAAA,UAAU,OAAA;AACV,sHAAA,sBAAsB,OAAA;AACtB,wHAAA,wBAAwB,OAAA;AAY1B,+CASwB;AARtB,sEAAsE;AACtE,kHAAA,kBAAkB,OAAA;AAClB,8GAAA,cAAc,OAAA;AACd,iHAAA,iBAAiB,OAAA;AACjB,mHAAA,mBAAmB,OAAA;AACnB,qHAAA,qBAAqB,OAAA;AACrB,kHAAA,kBAAkB,OAAA;AAClB,4GAAA,YAAY,OAAA"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TF-IDF Semantic Engine — fully deterministic, zero external dependencies.
|
|
3
|
+
*
|
|
4
|
+
* Implements the classic TF-IDF (Term Frequency × Inverse Document Frequency)
|
|
5
|
+
* information-retrieval model with cosine similarity ranking.
|
|
6
|
+
*
|
|
7
|
+
* Why this over raw keyword matching:
|
|
8
|
+
* - Weights rare/distinctive terms higher (IDF) than common ones
|
|
9
|
+
* - File-length normalised (TF prevents long files dominating)
|
|
10
|
+
* - Cosine similarity handles partial matches gracefully
|
|
11
|
+
* - Identical results every run — fully deterministic, no LLM required
|
|
12
|
+
* - Upgradeable: the same SparseVector type works with real embeddings later
|
|
13
|
+
*
|
|
14
|
+
* Algorithm:
|
|
15
|
+
* TF(t, d) = log(1 + freq(t, d) / |d|)
|
|
16
|
+
* IDF(t, D) = log((|D| + 1) / (df(t) + 1)) + 1 [smoothed]
|
|
17
|
+
* weight = TF × IDF
|
|
18
|
+
* similarity = cosine(vec_query, vec_doc)
|
|
19
|
+
*/
|
|
20
|
+
/** Sparse vector: termId → TF-IDF weight. Keys are term strings, not integers. */
|
|
21
|
+
export type SparseVector = Map<string, number>;
|
|
22
|
+
export interface TfIdfDocument {
|
|
23
|
+
id: string;
|
|
24
|
+
text: string;
|
|
25
|
+
metadata?: Record<string, unknown>;
|
|
26
|
+
}
|
|
27
|
+
export interface TfIdfSearchResult {
|
|
28
|
+
id: string;
|
|
29
|
+
score: number;
|
|
30
|
+
metadata?: Record<string, unknown>;
|
|
31
|
+
}
|
|
32
|
+
/** Serialisable form of SparseVector (for JSON persistence). */
|
|
33
|
+
export type SparseVectorRecord = Record<string, number>;
|
|
34
|
+
export interface TfIdfIndexEntry {
|
|
35
|
+
id: string;
|
|
36
|
+
vector: SparseVectorRecord;
|
|
37
|
+
norm: number;
|
|
38
|
+
termCount: number;
|
|
39
|
+
metadata?: Record<string, unknown>;
|
|
40
|
+
}
|
|
41
|
+
export interface TfIdfCorpus {
|
|
42
|
+
schemaVersion: 1;
|
|
43
|
+
builtAt: string;
|
|
44
|
+
documentCount: number;
|
|
45
|
+
/** term → document-frequency (how many docs contain it) */
|
|
46
|
+
df: Record<string, number>;
|
|
47
|
+
entries: TfIdfIndexEntry[];
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Tokenises source text into normalised terms.
|
|
51
|
+
* Splits on: whitespace, punctuation, camelCase, PascalCase, snake_case.
|
|
52
|
+
*/
|
|
53
|
+
export declare function tokenize(text: string): string[];
|
|
54
|
+
/**
|
|
55
|
+
* Computes smoothed IDF for every term in the vocabulary.
|
|
56
|
+
* IDF(t) = log((N + 1) / (df(t) + 1)) + 1
|
|
57
|
+
*/
|
|
58
|
+
export declare function computeIDF(df: Record<string, number>, N: number): Map<string, number>;
|
|
59
|
+
export declare function buildCorpus(documents: TfIdfDocument[]): TfIdfCorpus;
|
|
60
|
+
/**
|
|
61
|
+
* Vectorises a query using the corpus IDF weights, then ranks documents
|
|
62
|
+
* by cosine similarity.
|
|
63
|
+
*
|
|
64
|
+
* Returns results sorted descending by score, filtered by minScore.
|
|
65
|
+
*/
|
|
66
|
+
export declare function searchCorpus(query: string, corpus: TfIdfCorpus, options?: {
|
|
67
|
+
limit?: number;
|
|
68
|
+
minScore?: number;
|
|
69
|
+
}): TfIdfSearchResult[];
|
|
70
|
+
/**
|
|
71
|
+
* Adds or replaces a single document in an existing corpus.
|
|
72
|
+
* DF is updated incrementally to avoid full rebuilds on every watch change.
|
|
73
|
+
* Note: IDF values become slightly approximate after incremental updates;
|
|
74
|
+
* a full rebuild is recommended periodically (e.g. on `neurcode brain build`).
|
|
75
|
+
*/
|
|
76
|
+
export declare function upsertDocumentInCorpus(corpus: TfIdfCorpus, doc: TfIdfDocument): TfIdfCorpus;
|
|
77
|
+
/**
|
|
78
|
+
* Removes a document from the corpus.
|
|
79
|
+
*/
|
|
80
|
+
export declare function removeDocumentFromCorpus(corpus: TfIdfCorpus, docId: string): TfIdfCorpus;
|
|
81
|
+
//# sourceMappingURL=tfidf-engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf-engine.d.ts","sourceRoot":"","sources":["../../src/semantic/tfidf-engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,mFAAmF;AACnF,MAAM,MAAM,YAAY,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE/C,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,iBAAiB;IAChC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,gEAAgE;AAChE,MAAM,MAAM,kBAAkB,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAExD,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,kBAAkB,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,WAAW;IAC1B,aAAa,EAAE,CAAC,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,2DAA2D;IAC3D,EAAE,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3B,OAAO,EAAE,eAAe,EAAE,CAAC;CAC5B;AA2BD;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAa/C;AAyBD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,EAAE,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAMrF;AAID,wBAAgB,WAAW,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,WAAW,CAuDnE;AAID;;;;;GAKG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,WAAW,EACnB,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GAClD,iBAAiB,EAAE,CA8CrB;AAID;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,WAAW,EACnB,GAAG,EAAE,aAAa,GACjB,WAAW,CAiEb;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,MAAM,GACZ,WAAW,CAoBb"}
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TF-IDF Semantic Engine — fully deterministic, zero external dependencies.
|
|
4
|
+
*
|
|
5
|
+
* Implements the classic TF-IDF (Term Frequency × Inverse Document Frequency)
|
|
6
|
+
* information-retrieval model with cosine similarity ranking.
|
|
7
|
+
*
|
|
8
|
+
* Why this over raw keyword matching:
|
|
9
|
+
* - Weights rare/distinctive terms higher (IDF) than common ones
|
|
10
|
+
* - File-length normalised (TF prevents long files dominating)
|
|
11
|
+
* - Cosine similarity handles partial matches gracefully
|
|
12
|
+
* - Identical results every run — fully deterministic, no LLM required
|
|
13
|
+
* - Upgradeable: the same SparseVector type works with real embeddings later
|
|
14
|
+
*
|
|
15
|
+
* Algorithm:
|
|
16
|
+
* TF(t, d) = log(1 + freq(t, d) / |d|)
|
|
17
|
+
* IDF(t, D) = log((|D| + 1) / (df(t) + 1)) + 1 [smoothed]
|
|
18
|
+
* weight = TF × IDF
|
|
19
|
+
* similarity = cosine(vec_query, vec_doc)
|
|
20
|
+
*/
|
|
21
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
+
exports.tokenize = tokenize;
|
|
23
|
+
exports.computeIDF = computeIDF;
|
|
24
|
+
exports.buildCorpus = buildCorpus;
|
|
25
|
+
exports.searchCorpus = searchCorpus;
|
|
26
|
+
exports.upsertDocumentInCorpus = upsertDocumentInCorpus;
|
|
27
|
+
exports.removeDocumentFromCorpus = removeDocumentFromCorpus;
|
|
28
|
+
// ── Constants ──────────────────────────────────────────────────────────────────
|
|
29
|
+
/** Terms shorter than this are discarded. */
|
|
30
|
+
const MIN_TERM_LEN = 2;
|
|
31
|
+
/** Max distinct terms stored per document (cap to control memory). */
|
|
32
|
+
const MAX_TERMS_PER_DOC = 400;
|
|
33
|
+
/**
|
|
34
|
+
* Stop-words that carry no semantic signal for code search.
|
|
35
|
+
* Keep this tight — over-pruning hurts recall.
|
|
36
|
+
*/
|
|
37
|
+
const STOP_WORDS = new Set([
|
|
38
|
+
'the', 'and', 'for', 'are', 'was', 'were', 'has', 'have', 'had',
|
|
39
|
+
'not', 'but', 'with', 'from', 'this', 'that', 'they', 'their',
|
|
40
|
+
'its', 'also', 'will', 'can', 'may', 'would', 'could', 'should',
|
|
41
|
+
'into', 'out', 'any', 'all', 'one', 'two', 'new', 'use', 'used',
|
|
42
|
+
'var', 'let', 'const', 'type', 'interface', 'class', 'function',
|
|
43
|
+
'return', 'import', 'export', 'from', 'default', 'async', 'await',
|
|
44
|
+
'true', 'false', 'null', 'undefined', 'void', 'string', 'number',
|
|
45
|
+
'boolean', 'object', 'array', 'promise', 'error', 'extends', 'implements',
|
|
46
|
+
]);
|
|
47
|
+
// ── Tokenisation ───────────────────────────────────────────────────────────────
|
|
48
|
+
/**
|
|
49
|
+
* Tokenises source text into normalised terms.
|
|
50
|
+
* Splits on: whitespace, punctuation, camelCase, PascalCase, snake_case.
|
|
51
|
+
*/
|
|
52
|
+
function tokenize(text) {
|
|
53
|
+
if (!text)
|
|
54
|
+
return [];
|
|
55
|
+
// Split camelCase / PascalCase into sub-words: getUserById → get User By Id
|
|
56
|
+
const camelSplit = text.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
57
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2');
|
|
58
|
+
const tokens = camelSplit
|
|
59
|
+
.toLowerCase()
|
|
60
|
+
.split(/[\s\W_]+/) // split on whitespace, punctuation, underscores
|
|
61
|
+
.filter((t) => t.length >= MIN_TERM_LEN && !STOP_WORDS.has(t));
|
|
62
|
+
return tokens;
|
|
63
|
+
}
|
|
64
|
+
// ── TF computation ─────────────────────────────────────────────────────────────
|
|
65
|
+
/**
|
|
66
|
+
* Returns a term-frequency map for a single document.
|
|
67
|
+
* Uses log-normalised TF: log(1 + count / doc_length)
|
|
68
|
+
*/
|
|
69
|
+
function computeTF(tokens) {
|
|
70
|
+
const freq = new Map();
|
|
71
|
+
for (const token of tokens) {
|
|
72
|
+
freq.set(token, (freq.get(token) ?? 0) + 1);
|
|
73
|
+
}
|
|
74
|
+
const docLen = Math.max(1, tokens.length);
|
|
75
|
+
const tf = new Map();
|
|
76
|
+
for (const [term, count] of freq) {
|
|
77
|
+
tf.set(term, Math.log(1 + count / docLen));
|
|
78
|
+
}
|
|
79
|
+
return tf;
|
|
80
|
+
}
|
|
81
|
+
// ── IDF computation ────────────────────────────────────────────────────────────
|
|
82
|
+
/**
|
|
83
|
+
* Computes smoothed IDF for every term in the vocabulary.
|
|
84
|
+
* IDF(t) = log((N + 1) / (df(t) + 1)) + 1
|
|
85
|
+
*/
|
|
86
|
+
function computeIDF(df, N) {
|
|
87
|
+
const idf = new Map();
|
|
88
|
+
for (const [term, docFreq] of Object.entries(df)) {
|
|
89
|
+
idf.set(term, Math.log((N + 1) / (docFreq + 1)) + 1);
|
|
90
|
+
}
|
|
91
|
+
return idf;
|
|
92
|
+
}
|
|
93
|
+
// ── Corpus building ────────────────────────────────────────────────────────────
|
|
94
|
+
function buildCorpus(documents) {
|
|
95
|
+
const N = documents.length;
|
|
96
|
+
// Pass 1: collect per-document token lists and global DF
|
|
97
|
+
const docTokens = [];
|
|
98
|
+
const df = {};
|
|
99
|
+
for (const doc of documents) {
|
|
100
|
+
const tokens = tokenize(doc.text);
|
|
101
|
+
docTokens.push({ id: doc.id, tokens, metadata: doc.metadata });
|
|
102
|
+
// Each unique term contributes 1 to DF
|
|
103
|
+
const unique = new Set(tokens);
|
|
104
|
+
for (const term of unique) {
|
|
105
|
+
df[term] = (df[term] ?? 0) + 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
const idf = computeIDF(df, N);
|
|
109
|
+
// Pass 2: compute TF-IDF vectors and norms
|
|
110
|
+
const entries = [];
|
|
111
|
+
for (const { id, tokens, metadata } of docTokens) {
|
|
112
|
+
const tf = computeTF(tokens);
|
|
113
|
+
// Build sparse TF-IDF vector, capped at MAX_TERMS_PER_DOC by weight
|
|
114
|
+
const weightedTerms = [];
|
|
115
|
+
for (const [term, tfVal] of tf) {
|
|
116
|
+
const idfVal = idf.get(term) ?? 1;
|
|
117
|
+
weightedTerms.push([term, tfVal * idfVal]);
|
|
118
|
+
}
|
|
119
|
+
// Sort by weight descending and keep top MAX_TERMS_PER_DOC
|
|
120
|
+
weightedTerms.sort((a, b) => b[1] - a[1]);
|
|
121
|
+
const topTerms = weightedTerms.slice(0, MAX_TERMS_PER_DOC);
|
|
122
|
+
const vector = {};
|
|
123
|
+
let normSq = 0;
|
|
124
|
+
for (const [term, weight] of topTerms) {
|
|
125
|
+
vector[term] = weight;
|
|
126
|
+
normSq += weight * weight;
|
|
127
|
+
}
|
|
128
|
+
const norm = Math.sqrt(normSq);
|
|
129
|
+
entries.push({ id, vector, norm, termCount: topTerms.length, metadata });
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
schemaVersion: 1,
|
|
133
|
+
builtAt: new Date().toISOString(),
|
|
134
|
+
documentCount: N,
|
|
135
|
+
df,
|
|
136
|
+
entries,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
// ── Search ─────────────────────────────────────────────────────────────────────
|
|
140
|
+
/**
|
|
141
|
+
* Vectorises a query using the corpus IDF weights, then ranks documents
|
|
142
|
+
* by cosine similarity.
|
|
143
|
+
*
|
|
144
|
+
* Returns results sorted descending by score, filtered by minScore.
|
|
145
|
+
*/
|
|
146
|
+
function searchCorpus(query, corpus, options = {}) {
|
|
147
|
+
const limit = Math.max(1, options.limit ?? 20);
|
|
148
|
+
const minScore = Math.max(0, options.minScore ?? 0.01);
|
|
149
|
+
const N = Math.max(1, corpus.documentCount);
|
|
150
|
+
const idf = computeIDF(corpus.df, N);
|
|
151
|
+
const queryTokens = tokenize(query);
|
|
152
|
+
if (queryTokens.length === 0)
|
|
153
|
+
return [];
|
|
154
|
+
// Build query TF-IDF vector
|
|
155
|
+
const queryTF = computeTF(queryTokens);
|
|
156
|
+
const queryVec = new Map();
|
|
157
|
+
let queryNormSq = 0;
|
|
158
|
+
for (const [term, tfVal] of queryTF) {
|
|
159
|
+
const idfVal = idf.get(term) ?? (Math.log((N + 1) / 1) + 1); // IDF for unseen term
|
|
160
|
+
const weight = tfVal * idfVal;
|
|
161
|
+
queryVec.set(term, weight);
|
|
162
|
+
queryNormSq += weight * weight;
|
|
163
|
+
}
|
|
164
|
+
const queryNorm = Math.sqrt(queryNormSq);
|
|
165
|
+
if (queryNorm === 0)
|
|
166
|
+
return [];
|
|
167
|
+
// Score each document
|
|
168
|
+
const scored = [];
|
|
169
|
+
for (const entry of corpus.entries) {
|
|
170
|
+
if (entry.norm === 0)
|
|
171
|
+
continue;
|
|
172
|
+
// Dot product of query and document sparse vectors
|
|
173
|
+
let dot = 0;
|
|
174
|
+
for (const [term, qWeight] of queryVec) {
|
|
175
|
+
const dWeight = entry.vector[term];
|
|
176
|
+
if (dWeight !== undefined) {
|
|
177
|
+
dot += qWeight * dWeight;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
const cosine = dot / (queryNorm * entry.norm);
|
|
181
|
+
if (cosine >= minScore) {
|
|
182
|
+
scored.push({ id: entry.id, score: cosine, metadata: entry.metadata });
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return scored
|
|
186
|
+
.sort((a, b) => b.score - a.score)
|
|
187
|
+
.slice(0, limit);
|
|
188
|
+
}
|
|
189
|
+
// ── Incremental update helpers ─────────────────────────────────────────────────
|
|
190
|
+
/**
|
|
191
|
+
* Adds or replaces a single document in an existing corpus.
|
|
192
|
+
* DF is updated incrementally to avoid full rebuilds on every watch change.
|
|
193
|
+
* Note: IDF values become slightly approximate after incremental updates;
|
|
194
|
+
* a full rebuild is recommended periodically (e.g. on `neurcode brain build`).
|
|
195
|
+
*/
|
|
196
|
+
function upsertDocumentInCorpus(corpus, doc) {
|
|
197
|
+
const existingIdx = corpus.entries.findIndex((e) => e.id === doc.id);
|
|
198
|
+
// If replacing, first remove old DF contributions
|
|
199
|
+
const dfMutable = { ...corpus.df };
|
|
200
|
+
let N = corpus.documentCount;
|
|
201
|
+
if (existingIdx !== -1) {
|
|
202
|
+
const old = corpus.entries[existingIdx];
|
|
203
|
+
for (const term of Object.keys(old.vector)) {
|
|
204
|
+
if (dfMutable[term] !== undefined) {
|
|
205
|
+
dfMutable[term] -= 1;
|
|
206
|
+
if (dfMutable[term] <= 0)
|
|
207
|
+
delete dfMutable[term];
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
N += 1;
|
|
213
|
+
}
|
|
214
|
+
// Add new DF contributions
|
|
215
|
+
const tokens = tokenize(doc.text);
|
|
216
|
+
const unique = new Set(tokens);
|
|
217
|
+
for (const term of unique) {
|
|
218
|
+
dfMutable[term] = (dfMutable[term] ?? 0) + 1;
|
|
219
|
+
}
|
|
220
|
+
const idf = computeIDF(dfMutable, N);
|
|
221
|
+
const tf = computeTF(tokens);
|
|
222
|
+
const weightedTerms = [];
|
|
223
|
+
for (const [term, tfVal] of tf) {
|
|
224
|
+
const idfVal = idf.get(term) ?? 1;
|
|
225
|
+
weightedTerms.push([term, tfVal * idfVal]);
|
|
226
|
+
}
|
|
227
|
+
weightedTerms.sort((a, b) => b[1] - a[1]);
|
|
228
|
+
const topTerms = weightedTerms.slice(0, MAX_TERMS_PER_DOC);
|
|
229
|
+
const vector = {};
|
|
230
|
+
let normSq = 0;
|
|
231
|
+
for (const [term, weight] of topTerms) {
|
|
232
|
+
vector[term] = weight;
|
|
233
|
+
normSq += weight * weight;
|
|
234
|
+
}
|
|
235
|
+
const norm = Math.sqrt(normSq);
|
|
236
|
+
const newEntry = {
|
|
237
|
+
id: doc.id,
|
|
238
|
+
vector,
|
|
239
|
+
norm,
|
|
240
|
+
termCount: topTerms.length,
|
|
241
|
+
metadata: doc.metadata,
|
|
242
|
+
};
|
|
243
|
+
const newEntries = existingIdx !== -1
|
|
244
|
+
? corpus.entries.map((e, i) => (i === existingIdx ? newEntry : e))
|
|
245
|
+
: [...corpus.entries, newEntry];
|
|
246
|
+
return {
|
|
247
|
+
schemaVersion: 1,
|
|
248
|
+
builtAt: new Date().toISOString(),
|
|
249
|
+
documentCount: N,
|
|
250
|
+
df: dfMutable,
|
|
251
|
+
entries: newEntries,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Removes a document from the corpus.
|
|
256
|
+
*/
|
|
257
|
+
function removeDocumentFromCorpus(corpus, docId) {
|
|
258
|
+
const existingIdx = corpus.entries.findIndex((e) => e.id === docId);
|
|
259
|
+
if (existingIdx === -1)
|
|
260
|
+
return corpus;
|
|
261
|
+
const dfMutable = { ...corpus.df };
|
|
262
|
+
const old = corpus.entries[existingIdx];
|
|
263
|
+
for (const term of Object.keys(old.vector)) {
|
|
264
|
+
if (dfMutable[term] !== undefined) {
|
|
265
|
+
dfMutable[term] -= 1;
|
|
266
|
+
if (dfMutable[term] <= 0)
|
|
267
|
+
delete dfMutable[term];
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return {
|
|
271
|
+
schemaVersion: 1,
|
|
272
|
+
builtAt: new Date().toISOString(),
|
|
273
|
+
documentCount: corpus.documentCount - 1,
|
|
274
|
+
df: dfMutable,
|
|
275
|
+
entries: corpus.entries.filter((_, i) => i !== existingIdx),
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
//# sourceMappingURL=tfidf-engine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tfidf-engine.js","sourceRoot":"","sources":["../../src/semantic/tfidf-engine.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;GAkBG;;AAoEH,4BAaC;AA6BD,gCAMC;AAID,kCAuDC;AAUD,oCAkDC;AAUD,wDAoEC;AAKD,4DAuBC;AA9SD,kFAAkF;AAElF,6CAA6C;AAC7C,MAAM,YAAY,GAAG,CAAC,CAAC;AAEvB,sEAAsE;AACtE,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;GAGG;AACH,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACzB,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK;IAC/D,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAC7D,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;IAC/D,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM;IAC/D,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU;IAC/D,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO;IACjE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;IAChE,SAAS,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY;CAC1E,CAAC,CAAC;AAEH,kFAAkF;AAElF;;;GAGG;AACH,SAAgB,QAAQ,CAAC,IAAY;IACnC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,4EAA4E;IAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC;SACnC,OAAO,CAAC,uBAAuB,EAAE,OAAO,CAAC,CAAC;IAElE,MAAM,MAAM,GAAG,UAAU;SACtB,WAAW,EAAE;SACb,KAAK,CAAC,UAAU,CAAC,CAAQ,gDAAgD;SACzE,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,YAAY,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,kFAAkF;AAElF;;;GAGG;AACH,SAAS,SAAS,CAAC,MAAgB;IACjC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC;QACjC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,kFAAkF;AAElF;;;GAGG;AACH,SAAgB,UAAU,CAAC,EAA0B,EAAE,CAAS;IAC9D,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;QACjD,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,kFAAkF;AAElF,SAAgB,WAAW,CAAC,SAA0B;IACpD,MAAM,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IAE3B,yDAAyD;IACzD,MAAM,SAAS,GAAgF,EAAE,CAAC;IAClG,MAAM,EAAE,GAA2B,EAAE,CAAC;IAEtC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,SAAS,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;QAE/D,uCAAuC;QACvC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;YAC1B,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,UAAU,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAE9B,2CAA2C;IAC3C,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,KAAK,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC;QACjD,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QAE7B,oEAAoE;QACpE,MAAM,aAAa,GAA4B,EAAE,CAAC;QAClD,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC;QAC7C,CAAC;QAED,2DAA2D;QAC3D,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC;QAE3D,MAAM,MAAM,GAAuB,EAAE,CAAC;QACtC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YACtB,MAAM,IAAI,MAAM,GAAG,MAAM,CAAC;QAC5B,CAAC;QACD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAE/B,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC3E,CAAC;IAED,OAAO;QACL,aAAa,EAAE,CAAC;QAChB,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,aAAa,EAAE,CAAC;QAChB,EAAE;QACF,OAAO;KACR,CAAC;AACJ,CAAC;AAED,kFAAkF;AAElF;;;;;GAKG;AACH,SAAgB,YAAY,CAC1B,KAAa,EACb,MAAmB,EACnB,UAAiD,EAAE;IAEnD,MAAM,KAAK,GAAM,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,IAAO,EAAE,CAAC,CAAC;IACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC;IAEvD,MAAM,CAAC,GAAI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAErC,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IACpC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAExC,4BAA4B;IAC5B,MAAM,OAAO,GAAG,SAAS,CAAC,WAAW,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,OAAO,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,sBAAsB;QACnF,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAC9B,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAC3B,WAAW,IAAI,MAAM,GAAG,MAAM,CAAC;IACjC,CAAC;IACD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/B,sBAAsB;IACtB,MAAM,MAAM,GAAwB,EAAE,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE/B,mDAAmD;QACnD,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,QAAQ,EAAE,CAAC;YACvC,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACnC,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC1B,GAAG,IAAI,OAAO,GAAG,OAAO,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9C,IAAI,MAAM,IAAI,QAAQ,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QACzE,CAAC;IACH,CAAC;IAED,OAAO,MAAM;SACV,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACjC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AACrB,CAAC;AAED,kFAAkF;AAElF;;;;;GAKG;AACH,SAAgB,sBAAsB,CACpC,MAAmB,EACnB,GAAkB;IAElB,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;IAErE,kDAAkD;IAClD,MAAM,SAAS,GAAG,EAAE,GAAG,MAAM,CAAC,EAAE,EAAE,CAAC;IACnC,IAAI,CAAC,GAAG,MAAM,CAAC,aAAa,CAAC;IAE7B,IAAI,WAAW,KAAK,CAAC,CAAC,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QACxC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,IAAI,SAAS,CAAC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;gBAClC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;YACnD,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,CAAC,IAAI,CAAC,CAAC;IACT,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAC/B,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,GAAG,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;IACrC,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;IAE7B,MAAM,aAAa,GAA4B,EAAE,CAAC;IAClD,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC;IAC7C,CAAC;IACD,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1C,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAuB,EAAE,CAAC;IACtC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QACtB,MAAM,IAAI,MAAM,GAAG,MAAM,CAAC;IAC5B,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE/B,MAAM,QAAQ,GAAoB;QAChC,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,MAAM;QACN,IAAI;QACJ,SAAS,EAAE,QAAQ,CAAC,MAAM;QAC1B,QAAQ,EAAE,GAAG,CAAC,QAAQ;KACvB,CAAC;IAEF,MAAM,UAAU,GACd,WAAW,KAAK,CAAC,CAAC;QAChB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IAEpC,OAAO;QACL,aAAa,EAAE,CAAC;QAChB,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,aAAa,EAAE,CAAC;QAChB,EAAE,EAAE,SAAS;QACb,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,wBAAwB,CACtC,MAAmB,EACnB,KAAa;IAEb,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,CAAC;IACpE,IAAI,WAAW,KAAK,CAAC,CAAC;QAAE,OAAO,MAAM,CAAC;IAEtC,MAAM,SAAS,GAAG,EAAE,GAAG,MAAM,CAAC,EAAE,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3C,IAAI,SAAS,CAAC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;YAClC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,OAAO;QACL,aAAa,EAAE,CAAC;QAChB,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,aAAa,EAAE,MAAM,CAAC,aAAa,GAAG,CAAC;QACvC,EAAE,EAAE,SAAS;QACb,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,WAAW,CAAC;KAC5D,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent Semantic Vector Store
|
|
3
|
+
*
|
|
4
|
+
* Manages a TF-IDF index over the project's brain-context file entries.
|
|
5
|
+
* Stored at: .neurcode/semantic-index.json
|
|
6
|
+
*
|
|
7
|
+
* Design principles:
|
|
8
|
+
* - Built FROM the existing brain-context.json — no new scanning required
|
|
9
|
+
* - Incremental updates on file changes (watch events)
|
|
10
|
+
* - Atomic writes (tmp → rename) to prevent corruption
|
|
11
|
+
* - Schema versioned for safe migrations
|
|
12
|
+
* - Completely offline, zero API calls, zero latency cost
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* buildSemanticIndex(cwd, scope) — full rebuild from brain context
|
|
16
|
+
* semanticSearch(cwd, scope, query) — search the index
|
|
17
|
+
* upsertFileInIndex(cwd, scope, filePath, content) — incremental update
|
|
18
|
+
*/
|
|
19
|
+
import { TfIdfSearchResult } from './tfidf-engine';
|
|
20
|
+
export interface SemanticIndexScope {
|
|
21
|
+
orgId: string | null;
|
|
22
|
+
projectId: string | null;
|
|
23
|
+
}
|
|
24
|
+
export interface SemanticRetrievalConfidence {
|
|
25
|
+
/** Normalized TF-IDF score for this hit (not cross-corpus calibrated). */
|
|
26
|
+
retrievalScore: number;
|
|
27
|
+
/** Estimated share of brain-context docs that entered the index (0–1). */
|
|
28
|
+
corpusCoverageRatio: number;
|
|
29
|
+
/** True when indexing stopped at MAX_INDEX_DOCS. */
|
|
30
|
+
indexTruncated: boolean;
|
|
31
|
+
documentsIndexed: number;
|
|
32
|
+
documentsCap: number;
|
|
33
|
+
/** Deterministic TF-IDF retrieval — reproducible given same index + query. */
|
|
34
|
+
retrievalMethod: 'deterministic-tfidf';
|
|
35
|
+
}
|
|
36
|
+
export interface SemanticSearchResult extends TfIdfSearchResult {
|
|
37
|
+
filePath: string;
|
|
38
|
+
language: string;
|
|
39
|
+
summary: string;
|
|
40
|
+
symbols: string[];
|
|
41
|
+
contentHash: string;
|
|
42
|
+
confidence: SemanticRetrievalConfidence;
|
|
43
|
+
explainability: {
|
|
44
|
+
retrievalCategory: 'deterministic-semantic';
|
|
45
|
+
includedBecause: string;
|
|
46
|
+
matchedTerms: string[];
|
|
47
|
+
retrievalPath: string[];
|
|
48
|
+
federationSource: 'local-repo-index';
|
|
49
|
+
truncationStatus: 'none' | 'index-truncated';
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
export interface SemanticIndexStats {
|
|
53
|
+
exists: boolean;
|
|
54
|
+
documentCount: number;
|
|
55
|
+
builtAt: string | null;
|
|
56
|
+
schemaVersion: number | null;
|
|
57
|
+
sizeBytes: number | null;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Builds a full semantic index from the current brain-context entries.
|
|
61
|
+
* Should be called after `neurcode brain build` or equivalent full refresh.
|
|
62
|
+
*
|
|
63
|
+
* Returns: number of documents indexed.
|
|
64
|
+
*/
|
|
65
|
+
export declare function buildSemanticIndex(cwd: string, scope: SemanticIndexScope): number;
|
|
66
|
+
/**
|
|
67
|
+
* Searches the semantic index.
|
|
68
|
+
*
|
|
69
|
+
* Falls back gracefully to an empty result if no index exists (never throws).
|
|
70
|
+
*/
|
|
71
|
+
export declare function semanticSearch(cwd: string, scope: SemanticIndexScope, query: string, options?: {
|
|
72
|
+
limit?: number;
|
|
73
|
+
minScore?: number;
|
|
74
|
+
}): SemanticSearchResult[];
|
|
75
|
+
/**
|
|
76
|
+
* Incrementally adds or updates a single file in the semantic index.
|
|
77
|
+
* Called by the watch daemon on file changes.
|
|
78
|
+
*
|
|
79
|
+
* `content` is the raw file content to vectorize.
|
|
80
|
+
* If the content hash matches what's already indexed, this is a no-op.
|
|
81
|
+
*/
|
|
82
|
+
export declare function upsertFileInIndex(cwd: string, scope: SemanticIndexScope, filePath: string, content: string, metadata?: {
|
|
83
|
+
language?: string;
|
|
84
|
+
summary?: string;
|
|
85
|
+
symbols?: string[];
|
|
86
|
+
}): boolean;
|
|
87
|
+
/**
|
|
88
|
+
* Removes a file from the semantic index.
|
|
89
|
+
* Called by the watch daemon on file deletions.
|
|
90
|
+
*/
|
|
91
|
+
export declare function removeFileFromIndex(cwd: string, scope: SemanticIndexScope, filePath: string): boolean;
|
|
92
|
+
/**
|
|
93
|
+
* Returns metadata about the current index without loading the full corpus.
|
|
94
|
+
*/
|
|
95
|
+
export declare function getSemanticIndexStats(cwd: string, scope: SemanticIndexScope): SemanticIndexStats;
|
|
96
|
+
/**
|
|
97
|
+
* Deletes the semantic index file for this workspace.
|
|
98
|
+
* Called by `neurcode brain clear`.
|
|
99
|
+
*/
|
|
100
|
+
export declare function clearSemanticIndex(cwd: string): boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Returns the query's tokenized representation for debugging/transparency.
|
|
103
|
+
*/
|
|
104
|
+
export declare function explainQuery(query: string): {
|
|
105
|
+
tokens: string[];
|
|
106
|
+
termCount: number;
|
|
107
|
+
};
|
|
108
|
+
//# sourceMappingURL=vector-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-store.d.ts","sourceRoot":"","sources":["../../src/semantic/vector-store.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAKH,OAAO,EAML,iBAAiB,EAGlB,MAAM,gBAAgB,CAAC;AAIxB,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AAED,MAAM,WAAW,2BAA2B;IAC1C,0EAA0E;IAC1E,cAAc,EAAE,MAAM,CAAC;IACvB,0EAA0E;IAC1E,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oDAAoD;IACpD,cAAc,EAAE,OAAO,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,8EAA8E;IAC9E,eAAe,EAAE,qBAAqB,CAAC;CACxC;AAED,MAAM,WAAW,oBAAqB,SAAQ,iBAAiB;IAC7D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,2BAA2B,CAAC;IACxC,cAAc,EAAE;QACd,iBAAiB,EAAE,wBAAwB,CAAC;QAC5C,eAAe,EAAE,MAAM,CAAC;QACxB,YAAY,EAAE,MAAM,EAAE,CAAC;QACvB,aAAa,EAAE,MAAM,EAAE,CAAC;QACxB,gBAAgB,EAAE,kBAAkB,CAAC;QACrC,gBAAgB,EAAE,MAAM,GAAG,iBAAiB,CAAC;KAC9C,CAAC;CACH;AAED,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,OAAO,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AA2JD;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAChC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,kBAAkB,GACxB,MAAM,CAoBR;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,kBAAkB,EACzB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GAClD,oBAAoB,EAAE,CA+CxB;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,kBAAkB,EACzB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,QAAQ,CAAC,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GACrE,OAAO,CAqCT;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,kBAAkB,EACzB,QAAQ,EAAE,MAAM,GACf,OAAO,CAgBT;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,kBAAkB,GACxB,kBAAkB,CAmBpB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAUvD;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG;IAC3C,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB,CAGA"}
|