gitnexus 1.6.3-rc.8 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -5
- package/dist/_shared/graph/types.d.ts +16 -0
- package/dist/_shared/graph/types.d.ts.map +1 -1
- package/dist/_shared/index.d.ts +20 -2
- package/dist/_shared/index.d.ts.map +1 -1
- package/dist/_shared/index.js +11 -0
- package/dist/_shared/index.js.map +1 -1
- package/dist/_shared/scope-resolution/def-index.js +2 -2
- package/dist/_shared/scope-resolution/def-index.js.map +1 -1
- package/dist/_shared/scope-resolution/method-dispatch-index.d.ts +8 -0
- package/dist/_shared/scope-resolution/method-dispatch-index.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/method-dispatch-index.js +2 -2
- package/dist/_shared/scope-resolution/method-dispatch-index.js.map +1 -1
- package/dist/_shared/scope-resolution/module-scope-index.d.ts +8 -0
- package/dist/_shared/scope-resolution/module-scope-index.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/module-scope-index.js +10 -2
- package/dist/_shared/scope-resolution/module-scope-index.js.map +1 -1
- package/dist/_shared/scope-resolution/parsed-file.d.ts +76 -0
- package/dist/_shared/scope-resolution/parsed-file.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/parsed-file.js +54 -0
- package/dist/_shared/scope-resolution/parsed-file.js.map +1 -0
- package/dist/_shared/scope-resolution/position-index.d.ts +12 -0
- package/dist/_shared/scope-resolution/position-index.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/position-index.js +2 -2
- package/dist/_shared/scope-resolution/position-index.js.map +1 -1
- package/dist/_shared/scope-resolution/qualified-name-index.js +2 -2
- package/dist/_shared/scope-resolution/qualified-name-index.js.map +1 -1
- package/dist/_shared/scope-resolution/reference-site.d.ts +75 -0
- package/dist/_shared/scope-resolution/reference-site.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/reference-site.js +24 -0
- package/dist/_shared/scope-resolution/reference-site.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/class-registry.d.ts +27 -0
- package/dist/_shared/scope-resolution/registries/class-registry.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/class-registry.js +30 -0
- package/dist/_shared/scope-resolution/registries/class-registry.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/context.d.ts +69 -0
- package/dist/_shared/scope-resolution/registries/context.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/context.js +44 -0
- package/dist/_shared/scope-resolution/registries/context.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/evidence.d.ts +56 -0
- package/dist/_shared/scope-resolution/registries/evidence.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/evidence.js +150 -0
- package/dist/_shared/scope-resolution/registries/evidence.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/field-registry.d.ts +26 -0
- package/dist/_shared/scope-resolution/registries/field-registry.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/field-registry.js +31 -0
- package/dist/_shared/scope-resolution/registries/field-registry.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/lookup-core.d.ts +81 -0
- package/dist/_shared/scope-resolution/registries/lookup-core.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/lookup-core.js +332 -0
- package/dist/_shared/scope-resolution/registries/lookup-core.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/lookup-qualified.d.ts +33 -0
- package/dist/_shared/scope-resolution/registries/lookup-qualified.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/lookup-qualified.js +56 -0
- package/dist/_shared/scope-resolution/registries/lookup-qualified.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/method-registry.d.ts +36 -0
- package/dist/_shared/scope-resolution/registries/method-registry.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/method-registry.js +32 -0
- package/dist/_shared/scope-resolution/registries/method-registry.js.map +1 -0
- package/dist/_shared/scope-resolution/registries/tie-breaks.d.ts +43 -0
- package/dist/_shared/scope-resolution/registries/tie-breaks.d.ts.map +1 -0
- package/dist/_shared/scope-resolution/registries/tie-breaks.js +60 -0
- package/dist/_shared/scope-resolution/registries/tie-breaks.js.map +1 -0
- package/dist/_shared/scope-resolution/resolve-type-ref.d.ts +1 -10
- package/dist/_shared/scope-resolution/resolve-type-ref.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/resolve-type-ref.js +6 -0
- package/dist/_shared/scope-resolution/resolve-type-ref.js.map +1 -1
- package/dist/_shared/scope-resolution/scope-tree.d.ts +4 -4
- package/dist/_shared/scope-resolution/scope-tree.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/scope-tree.js +3 -2
- package/dist/_shared/scope-resolution/scope-tree.js.map +1 -1
- package/dist/_shared/scope-resolution/shadow/aggregate.d.ts +6 -2
- package/dist/_shared/scope-resolution/shadow/aggregate.d.ts.map +1 -1
- package/dist/_shared/scope-resolution/shadow/aggregate.js +5 -0
- package/dist/_shared/scope-resolution/shadow/aggregate.js.map +1 -1
- package/dist/_shared/scope-resolution/types.d.ts +11 -0
- package/dist/_shared/scope-resolution/types.d.ts.map +1 -1
- package/dist/cli/ai-context.js +35 -4
- package/dist/cli/analyze.d.ts +27 -0
- package/dist/cli/analyze.js +31 -1
- package/dist/cli/clean.js +19 -1
- package/dist/cli/group.js +73 -0
- package/dist/cli/index-repo.js +8 -1
- package/dist/cli/index.js +26 -1
- package/dist/cli/list.js +11 -1
- package/dist/cli/remove.d.ts +30 -0
- package/dist/cli/remove.js +99 -0
- package/dist/cli/setup.js +185 -57
- package/dist/cli/tool.d.ts +5 -0
- package/dist/cli/tool.js +42 -0
- package/dist/config/ignore-service.d.ts +9 -0
- package/dist/config/ignore-service.js +80 -13
- package/dist/core/embedding-mode.d.ts +30 -0
- package/dist/core/embedding-mode.js +30 -0
- package/dist/core/embeddings/ast-utils.js +22 -22
- package/dist/core/embeddings/chunker.js +30 -25
- package/dist/core/embeddings/embedding-pipeline.d.ts +6 -0
- package/dist/core/embeddings/embedding-pipeline.js +15 -6
- package/dist/core/embeddings/text-generator.d.ts +1 -1
- package/dist/core/embeddings/text-generator.js +33 -24
- package/dist/core/embeddings/types.d.ts +43 -1
- package/dist/core/embeddings/types.js +101 -29
- package/dist/core/git-staleness.d.ts +18 -0
- package/dist/core/git-staleness.js +108 -0
- package/dist/core/graph/graph.js +115 -20
- package/dist/core/graph/types.d.ts +12 -1
- package/dist/core/group/config-parser.d.ts +4 -0
- package/dist/core/group/config-parser.js +18 -1
- package/dist/core/group/cross-impact.d.ts +41 -0
- package/dist/core/group/cross-impact.js +441 -0
- package/dist/core/group/extractors/http-patterns/php.js +126 -18
- package/dist/core/group/group-path-utils.d.ts +17 -0
- package/dist/core/group/group-path-utils.js +40 -0
- package/dist/core/group/resolve-at-member.d.ts +10 -0
- package/dist/core/group/resolve-at-member.js +31 -0
- package/dist/core/group/service.d.ts +9 -0
- package/dist/core/group/service.js +259 -25
- package/dist/core/group/types.d.ts +30 -0
- package/dist/core/ingestion/ast-cache.d.ts +16 -1
- package/dist/core/ingestion/ast-cache.js +14 -2
- package/dist/core/ingestion/call-processor.js +9 -0
- package/dist/core/ingestion/emit-references.d.ts +88 -0
- package/dist/core/ingestion/emit-references.js +229 -0
- package/dist/core/ingestion/filesystem-walker.js +6 -4
- package/dist/core/ingestion/finalize-orchestrator.d.ts +63 -0
- package/dist/core/ingestion/finalize-orchestrator.js +139 -0
- package/dist/core/ingestion/framework-detection.js +6 -2
- package/dist/core/ingestion/import-processor.js +4 -0
- package/dist/core/ingestion/import-resolvers/python.js +9 -6
- package/dist/core/ingestion/import-target-adapter.d.ts +73 -0
- package/dist/core/ingestion/import-target-adapter.js +95 -0
- package/dist/core/ingestion/language-provider.d.ts +36 -33
- package/dist/core/ingestion/languages/csharp/accessor-unwrap.d.ts +21 -0
- package/dist/core/ingestion/languages/csharp/accessor-unwrap.js +56 -0
- package/dist/core/ingestion/languages/csharp/arity-metadata.d.ts +26 -0
- package/dist/core/ingestion/languages/csharp/arity-metadata.js +46 -0
- package/dist/core/ingestion/languages/csharp/arity.d.ts +23 -0
- package/dist/core/ingestion/languages/csharp/arity.js +37 -0
- package/dist/core/ingestion/languages/csharp/cache-stats.d.ts +15 -0
- package/dist/core/ingestion/languages/csharp/cache-stats.js +26 -0
- package/dist/core/ingestion/languages/csharp/captures.d.ts +19 -0
- package/dist/core/ingestion/languages/csharp/captures.js +249 -0
- package/dist/core/ingestion/languages/csharp/import-decomposer.d.ts +19 -0
- package/dist/core/ingestion/languages/csharp/import-decomposer.js +93 -0
- package/dist/core/ingestion/languages/csharp/import-target.d.ts +25 -0
- package/dist/core/ingestion/languages/csharp/import-target.js +123 -0
- package/dist/core/ingestion/languages/csharp/index.d.ts +82 -0
- package/dist/core/ingestion/languages/csharp/index.js +82 -0
- package/dist/core/ingestion/languages/csharp/interpret.d.ts +15 -0
- package/dist/core/ingestion/languages/csharp/interpret.js +132 -0
- package/dist/core/ingestion/languages/csharp/merge-bindings.d.ts +27 -0
- package/dist/core/ingestion/languages/csharp/merge-bindings.js +55 -0
- package/dist/core/ingestion/languages/csharp/namespace-siblings.d.ts +50 -0
- package/dist/core/ingestion/languages/csharp/namespace-siblings.js +374 -0
- package/dist/core/ingestion/languages/csharp/query.d.ts +35 -0
- package/dist/core/ingestion/languages/csharp/query.js +515 -0
- package/dist/core/ingestion/languages/csharp/receiver-binding.d.ts +31 -0
- package/dist/core/ingestion/languages/csharp/receiver-binding.js +135 -0
- package/dist/core/ingestion/languages/csharp/scope-resolver.d.ts +10 -0
- package/dist/core/ingestion/languages/csharp/scope-resolver.js +63 -0
- package/dist/core/ingestion/languages/csharp/simple-hooks.d.ts +53 -0
- package/dist/core/ingestion/languages/csharp/simple-hooks.js +76 -0
- package/dist/core/ingestion/languages/csharp.js +14 -0
- package/dist/core/ingestion/languages/python/arity-metadata.d.ts +24 -0
- package/dist/core/ingestion/languages/python/arity-metadata.js +45 -0
- package/dist/core/ingestion/languages/python/arity.d.ts +22 -0
- package/dist/core/ingestion/languages/python/arity.js +38 -0
- package/dist/core/ingestion/languages/python/cache-stats.d.ts +17 -0
- package/dist/core/ingestion/languages/python/cache-stats.js +28 -0
- package/dist/core/ingestion/languages/python/captures.d.ts +19 -0
- package/dist/core/ingestion/languages/python/captures.js +106 -0
- package/dist/core/ingestion/languages/python/import-decomposer.d.ts +15 -0
- package/dist/core/ingestion/languages/python/import-decomposer.js +112 -0
- package/dist/core/ingestion/languages/python/import-target.d.ts +21 -0
- package/dist/core/ingestion/languages/python/import-target.js +99 -0
- package/dist/core/ingestion/languages/python/index.d.ts +80 -0
- package/dist/core/ingestion/languages/python/index.js +80 -0
- package/dist/core/ingestion/languages/python/interpret.d.ts +15 -0
- package/dist/core/ingestion/languages/python/interpret.js +191 -0
- package/dist/core/ingestion/languages/python/merge-bindings.d.ts +16 -0
- package/dist/core/ingestion/languages/python/merge-bindings.js +44 -0
- package/dist/core/ingestion/languages/python/query.d.ts +9 -0
- package/dist/core/ingestion/languages/python/query.js +267 -0
- package/dist/core/ingestion/languages/python/receiver-binding.d.ts +21 -0
- package/dist/core/ingestion/languages/python/receiver-binding.js +116 -0
- package/dist/core/ingestion/languages/python/scope-resolver.d.ts +16 -0
- package/dist/core/ingestion/languages/python/scope-resolver.js +53 -0
- package/dist/core/ingestion/languages/python/simple-hooks.d.ts +23 -0
- package/dist/core/ingestion/languages/python/simple-hooks.js +35 -0
- package/dist/core/ingestion/languages/python.js +14 -0
- package/dist/core/ingestion/model/method-registry.d.ts +9 -0
- package/dist/core/ingestion/model/method-registry.js +4 -0
- package/dist/core/ingestion/model/scope-resolution-indexes.d.ts +59 -0
- package/dist/core/ingestion/model/scope-resolution-indexes.js +42 -0
- package/dist/core/ingestion/model/semantic-model.d.ts +64 -0
- package/dist/core/ingestion/model/semantic-model.js +55 -0
- package/dist/core/ingestion/mro-processor.js +38 -22
- package/dist/core/ingestion/parsing-processor.d.ts +18 -1
- package/dist/core/ingestion/parsing-processor.js +45 -11
- package/dist/core/ingestion/pipeline-phases/index.d.ts +1 -0
- package/dist/core/ingestion/pipeline-phases/index.js +1 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +10 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +17 -2
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +18 -0
- package/dist/core/ingestion/pipeline.js +2 -1
- package/dist/core/ingestion/registry-primary-flag.d.ts +86 -0
- package/dist/core/ingestion/registry-primary-flag.js +111 -0
- package/dist/core/ingestion/resolve-references.d.ts +63 -0
- package/dist/core/ingestion/resolve-references.js +175 -0
- package/dist/core/ingestion/scope-extractor-bridge.d.ts +32 -0
- package/dist/core/ingestion/scope-extractor-bridge.js +44 -0
- package/dist/core/ingestion/scope-extractor.d.ts +86 -0
- package/dist/core/ingestion/scope-extractor.js +758 -0
- package/dist/core/ingestion/scope-resolution/contract/scope-resolver.d.ts +372 -0
- package/dist/core/ingestion/scope-resolution/contract/scope-resolver.js +212 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/edges.d.ts +43 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/edges.js +79 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/ids.d.ts +57 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/ids.js +112 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/imports-to-edges.d.ts +17 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/imports-to-edges.js +46 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/method-dispatch.d.ts +19 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/method-dispatch.js +30 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/node-lookup.d.ts +37 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/node-lookup.js +113 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/references-to-edges.d.ts +38 -0
- package/dist/core/ingestion/scope-resolution/graph-bridge/references-to-edges.js +73 -0
- package/dist/core/ingestion/scope-resolution/passes/compound-receiver.d.ts +42 -0
- package/dist/core/ingestion/scope-resolution/passes/compound-receiver.js +198 -0
- package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.d.ts +27 -0
- package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.js +131 -0
- package/dist/core/ingestion/scope-resolution/passes/imported-return-types.d.ts +48 -0
- package/dist/core/ingestion/scope-resolution/passes/imported-return-types.js +130 -0
- package/dist/core/ingestion/scope-resolution/passes/mro.d.ts +42 -0
- package/dist/core/ingestion/scope-resolution/passes/mro.js +99 -0
- package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.d.ts +26 -0
- package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.js +61 -0
- package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.d.ts +46 -0
- package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.js +327 -0
- package/dist/core/ingestion/scope-resolution/pipeline/phase.d.ts +47 -0
- package/dist/core/ingestion/scope-resolution/pipeline/phase.js +130 -0
- package/dist/core/ingestion/scope-resolution/pipeline/reconcile-ownership.d.ts +68 -0
- package/dist/core/ingestion/scope-resolution/pipeline/reconcile-ownership.js +125 -0
- package/dist/core/ingestion/scope-resolution/pipeline/registry.d.ts +17 -0
- package/dist/core/ingestion/scope-resolution/pipeline/registry.js +21 -0
- package/dist/core/ingestion/scope-resolution/pipeline/run.d.ts +66 -0
- package/dist/core/ingestion/scope-resolution/pipeline/run.js +157 -0
- package/dist/core/ingestion/scope-resolution/scope/namespace-targets.d.ts +36 -0
- package/dist/core/ingestion/scope-resolution/scope/namespace-targets.js +52 -0
- package/dist/core/ingestion/scope-resolution/scope/walkers.d.ts +127 -0
- package/dist/core/ingestion/scope-resolution/scope/walkers.js +349 -0
- package/dist/core/ingestion/scope-resolution/workspace-index.d.ts +52 -0
- package/dist/core/ingestion/scope-resolution/workspace-index.js +61 -0
- package/dist/core/ingestion/shadow-harness.d.ts +113 -0
- package/dist/core/ingestion/shadow-harness.js +148 -0
- package/dist/core/ingestion/utils/ast-helpers.d.ts +19 -1
- package/dist/core/ingestion/utils/ast-helpers.js +70 -0
- package/dist/core/ingestion/utils/max-file-size.d.ts +20 -0
- package/dist/core/ingestion/utils/max-file-size.js +52 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +9 -0
- package/dist/core/ingestion/workers/parse-worker.js +57 -21
- package/dist/core/lbug/lbug-adapter.d.ts +22 -2
- package/dist/core/lbug/lbug-adapter.js +58 -14
- package/dist/core/lbug/pool-adapter.d.ts +17 -0
- package/dist/core/lbug/pool-adapter.js +24 -14
- package/dist/core/run-analyze.d.ts +32 -0
- package/dist/core/run-analyze.js +74 -19
- package/dist/core/search/bm25-index.d.ts +18 -0
- package/dist/core/search/bm25-index.js +125 -12
- package/dist/core/tree-sitter/parser-loader.js +6 -1
- package/dist/mcp/local/local-backend.d.ts +67 -3
- package/dist/mcp/local/local-backend.js +296 -34
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +100 -17
- package/dist/mcp/tools.d.ts +4 -1
- package/dist/mcp/tools.js +75 -54
- package/dist/server/api.js +6 -2
- package/dist/storage/git.d.ts +49 -0
- package/dist/storage/git.js +111 -0
- package/dist/storage/repo-manager.d.ts +246 -1
- package/dist/storage/repo-manager.js +391 -9
- package/package.json +7 -6
- package/scripts/bench-scope-resolution.ts +134 -0
- package/scripts/ci-list-migrated-languages.ts +24 -0
- package/skills/gitnexus-cli.md +1 -0
|
@@ -245,16 +245,20 @@ const IGNORED_FILES = new Set([
|
|
|
245
245
|
'.env.test',
|
|
246
246
|
'.env.example',
|
|
247
247
|
]);
|
|
248
|
-
//
|
|
249
|
-
//
|
|
250
|
-
//
|
|
251
|
-
//
|
|
248
|
+
// The hardcoded DEFAULT_IGNORE_LIST is the "safety net" default: directories
|
|
249
|
+
// that are almost never source code (node_modules, .git, dist, __tests__,
|
|
250
|
+
// etc.). Users who legitimately need to index one of these can negate the
|
|
251
|
+
// hardcoded rule via a `!pattern` line in `.gitnexusignore` (#771) — same
|
|
252
|
+
// semantics as `.gitignore` negation. That override is applied in
|
|
253
|
+
// `createIgnoreFilter` below; `shouldIgnorePath` itself stays a pure
|
|
254
|
+
// hardcoded-list check so its callers (wiki generator, tests) get
|
|
255
|
+
// deterministic results independent of per-repo config.
|
|
252
256
|
export const shouldIgnorePath = (filePath) => {
|
|
253
257
|
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
254
258
|
const parts = normalizedPath.split('/');
|
|
255
259
|
const fileName = parts[parts.length - 1];
|
|
256
260
|
const fileNameLower = fileName.toLowerCase();
|
|
257
|
-
// Check if any path segment is in ignore list
|
|
261
|
+
// Check if any path segment is in the hardcoded ignore list.
|
|
258
262
|
for (const part of parts) {
|
|
259
263
|
if (DEFAULT_IGNORE_LIST.has(part)) {
|
|
260
264
|
return true;
|
|
@@ -320,6 +324,44 @@ export const loadIgnoreRules = async (repoPath, options) => {
|
|
|
320
324
|
}
|
|
321
325
|
return hasRules ? ig : null;
|
|
322
326
|
};
|
|
327
|
+
/**
|
|
328
|
+
* Walk ancestor segments of `rel` and check whether `.gitnexusignore`
|
|
329
|
+
* (or `.gitignore`) contains an explicit `!pattern` negation that
|
|
330
|
+
* applies. Returns true as soon as any segment — or the path itself —
|
|
331
|
+
* is matched by a negation rule.
|
|
332
|
+
*
|
|
333
|
+
* Why this exists (#771): the hardcoded DEFAULT_IGNORE_LIST would
|
|
334
|
+
* otherwise block indexing of directories like `__tests__/` even when
|
|
335
|
+
* the user has an explicit `!__tests__/` line in `.gitnexusignore`.
|
|
336
|
+
* Mirroring `.gitignore` negation semantics: a user's explicit
|
|
337
|
+
* unignore of a parent directory implicitly unignores everything
|
|
338
|
+
* underneath, so we walk the ancestor chain rather than only testing
|
|
339
|
+
* the leaf.
|
|
340
|
+
*
|
|
341
|
+
* The `ignore` package's `test(path)` returns `{ignored, unignored}`;
|
|
342
|
+
* `unignored: true` is the "a negation rule matched this path"
|
|
343
|
+
* signal. Children of a negated directory return
|
|
344
|
+
* `{ignored: false, unignored: false}` on a direct test, which is why
|
|
345
|
+
* we also walk the ancestors here.
|
|
346
|
+
*/
|
|
347
|
+
const hasExplicitUnignore = (ig, rel) => {
|
|
348
|
+
// Direct match on the path (as a file).
|
|
349
|
+
if (ig.test(rel).unignored)
|
|
350
|
+
return true;
|
|
351
|
+
// Direct match on the path treated as a directory — `!dir/` matches
|
|
352
|
+
// here when rel is the directory itself.
|
|
353
|
+
if (ig.test(rel + '/').unignored)
|
|
354
|
+
return true;
|
|
355
|
+
// Walk ancestor segments. `!parent/` should propagate to every
|
|
356
|
+
// descendant the same way `.gitignore` negation propagates.
|
|
357
|
+
const parts = rel.split('/');
|
|
358
|
+
for (let i = parts.length - 1; i > 0; i--) {
|
|
359
|
+
const ancestor = parts.slice(0, i).join('/') + '/';
|
|
360
|
+
if (ig.test(ancestor).unignored)
|
|
361
|
+
return true;
|
|
362
|
+
}
|
|
363
|
+
return false;
|
|
364
|
+
};
|
|
323
365
|
/**
|
|
324
366
|
* Create a glob-compatible ignore filter combining:
|
|
325
367
|
* - .gitignore / .gitnexusignore patterns (via `ignore` package)
|
|
@@ -327,6 +369,15 @@ export const loadIgnoreRules = async (repoPath, options) => {
|
|
|
327
369
|
*
|
|
328
370
|
* Returns an IgnoreLike object for glob's `ignore` option,
|
|
329
371
|
* enabling directory-level pruning during traversal.
|
|
372
|
+
*
|
|
373
|
+
* Precedence (#771): user's `.gitnexusignore` negation patterns take
|
|
374
|
+
* priority over the hardcoded list, matching `.gitignore` semantics.
|
|
375
|
+
* An explicit `!pattern` rule unignores descendants even when they
|
|
376
|
+
* would otherwise be blocked by DEFAULT_IGNORE_LIST — UNLESS a more
|
|
377
|
+
* specific rule in the same file re-ignores a subset (e.g.
|
|
378
|
+
* `!__tests__/` paired with `__tests__/generated/` blocks the child
|
|
379
|
+
* while leaving the parent negated). Last-match-wins is enforced by
|
|
380
|
+
* consulting `ig.ignores(rel)` after `hasExplicitUnignore`.
|
|
330
381
|
*/
|
|
331
382
|
export const createIgnoreFilter = async (repoPath, options) => {
|
|
332
383
|
const ig = await loadIgnoreRules(repoPath, options);
|
|
@@ -337,6 +388,15 @@ export const createIgnoreFilter = async (repoPath, options) => {
|
|
|
337
388
|
const rel = p.relative();
|
|
338
389
|
if (!rel)
|
|
339
390
|
return false;
|
|
391
|
+
// User's .gitnexusignore negation takes precedence over hardcoded
|
|
392
|
+
// rules (#771). If any ancestor or the path itself was explicitly
|
|
393
|
+
// unignored AND no more-specific rule re-ignores this exact path,
|
|
394
|
+
// allow it through. The `!ig.ignores(rel)` guard matches
|
|
395
|
+
// .gitignore's last-match-wins semantics: `!__tests__/` followed
|
|
396
|
+
// by `__tests__/generated/` negates the parent but still blocks
|
|
397
|
+
// the re-ignored child.
|
|
398
|
+
if (ig && hasExplicitUnignore(ig, rel) && !ig.ignores(rel))
|
|
399
|
+
return false;
|
|
340
400
|
// Check .gitignore / .gitnexusignore patterns
|
|
341
401
|
if (ig && ig.ignores(rel))
|
|
342
402
|
return true;
|
|
@@ -344,10 +404,20 @@ export const createIgnoreFilter = async (repoPath, options) => {
|
|
|
344
404
|
return shouldIgnorePath(rel);
|
|
345
405
|
},
|
|
346
406
|
childrenIgnored(p) {
|
|
347
|
-
// Fast path: check directory name against hardcoded list.
|
|
348
407
|
// Note: dot-directories (.git, .vscode, etc.) are primarily excluded by
|
|
349
|
-
// glob's `dot: false` option in filesystem-walker.ts.
|
|
350
|
-
// defense-in-depth — do not remove `dot: false`
|
|
408
|
+
// glob's `dot: false` option in filesystem-walker.ts. The hardcoded
|
|
409
|
+
// list check below is defense-in-depth — do not remove `dot: false`
|
|
410
|
+
// assuming this covers it.
|
|
411
|
+
const rel = p.relative();
|
|
412
|
+
// User's .gitnexusignore negation takes precedence (#771) — if the
|
|
413
|
+
// user explicitly unignored this directory or any ancestor via a
|
|
414
|
+
// !pattern rule, allow descent even if the directory name is in
|
|
415
|
+
// DEFAULT_IGNORE_LIST. The `!ig.ignores(rel + '/')` guard keeps
|
|
416
|
+
// last-match-wins: `!__tests__/` + `__tests__/generated/` still
|
|
417
|
+
// blocks descent into `__tests__/generated/`.
|
|
418
|
+
if (ig && rel && hasExplicitUnignore(ig, rel) && !ig.ignores(rel + '/'))
|
|
419
|
+
return false;
|
|
420
|
+
// Hardcoded list: block descent into well-known noise directories.
|
|
351
421
|
if (DEFAULT_IGNORE_LIST.has(p.name))
|
|
352
422
|
return true;
|
|
353
423
|
// Check against .gitignore / .gitnexusignore patterns.
|
|
@@ -358,11 +428,8 @@ export const createIgnoreFilter = async (repoPath, options) => {
|
|
|
358
428
|
// Bare-name patterns (e.g. `local`) still match `local/` per gitignore spec:
|
|
359
429
|
// the `ignore` package normalizes `dir` and `dir/` to match directories.
|
|
360
430
|
// See: https://github.com/kaelzhang/node-ignore#2-filenames-and-dirnames
|
|
361
|
-
if (ig)
|
|
362
|
-
|
|
363
|
-
if (rel && ig.ignores(rel + '/'))
|
|
364
|
-
return true;
|
|
365
|
-
}
|
|
431
|
+
if (ig && rel && ig.ignores(rel + '/'))
|
|
432
|
+
return true;
|
|
366
433
|
return false;
|
|
367
434
|
},
|
|
368
435
|
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure derivation of the embedding-mode flags for `runFullAnalysis`.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own module (no native imports) so the branching contract can
|
|
5
|
+
* be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
|
|
6
|
+
* other side-effecting dependencies pulled in by `run-analyze.ts`.
|
|
7
|
+
*
|
|
8
|
+
* Semantics:
|
|
9
|
+
* --drop-embeddings -> wipe (skip cache load entirely)
|
|
10
|
+
* --embeddings -> load cache, restore, then generate
|
|
11
|
+
* --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
|
|
12
|
+
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
|
+
* any path with existing=0 -> no cache work, no preservation
|
|
14
|
+
*/
|
|
15
|
+
export interface EmbeddingModeInput {
|
|
16
|
+
force?: boolean;
|
|
17
|
+
embeddings?: boolean;
|
|
18
|
+
dropEmbeddings?: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface EmbeddingMode {
|
|
21
|
+
/** True when phase 4 should run the embedding generation pipeline. */
|
|
22
|
+
shouldGenerateEmbeddings: boolean;
|
|
23
|
+
/** True when we should load the cache to re-insert vectors after rebuild without generating new ones. */
|
|
24
|
+
preserveExistingEmbeddings: boolean;
|
|
25
|
+
/** True when `--force` upgraded a default analyze into a regeneration because the repo was already embedded. */
|
|
26
|
+
forceRegenerateEmbeddings: boolean;
|
|
27
|
+
/** True when we need to load cached embeddings from the existing DB before the rebuild. */
|
|
28
|
+
shouldLoadCache: boolean;
|
|
29
|
+
}
|
|
30
|
+
export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure derivation of the embedding-mode flags for `runFullAnalysis`.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own module (no native imports) so the branching contract can
|
|
5
|
+
* be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
|
|
6
|
+
* other side-effecting dependencies pulled in by `run-analyze.ts`.
|
|
7
|
+
*
|
|
8
|
+
* Semantics:
|
|
9
|
+
* --drop-embeddings -> wipe (skip cache load entirely)
|
|
10
|
+
* --embeddings -> load cache, restore, then generate
|
|
11
|
+
* --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
|
|
12
|
+
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
|
+
* any path with existing=0 -> no cache work, no preservation
|
|
14
|
+
*/
|
|
15
|
+
export function deriveEmbeddingMode(options, existingEmbeddingCount) {
|
|
16
|
+
const hasExisting = existingEmbeddingCount > 0;
|
|
17
|
+
const drop = !!options.dropEmbeddings;
|
|
18
|
+
const explicit = !!options.embeddings;
|
|
19
|
+
const force = !!options.force;
|
|
20
|
+
const forceRegenerateEmbeddings = force && !explicit && !drop && hasExisting;
|
|
21
|
+
const preserveExistingEmbeddings = !explicit && !drop && !forceRegenerateEmbeddings && hasExisting;
|
|
22
|
+
const shouldGenerateEmbeddings = explicit || forceRegenerateEmbeddings;
|
|
23
|
+
const shouldLoadCache = !drop && (shouldGenerateEmbeddings || preserveExistingEmbeddings);
|
|
24
|
+
return {
|
|
25
|
+
shouldGenerateEmbeddings,
|
|
26
|
+
preserveExistingEmbeddings,
|
|
27
|
+
forceRegenerateEmbeddings,
|
|
28
|
+
shouldLoadCache,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
@@ -55,17 +55,17 @@ const FUNCTION_LIKE_TYPES = new Set([
|
|
|
55
55
|
* numbers don't apply.
|
|
56
56
|
*/
|
|
57
57
|
export const findFunctionNode = (root) => {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
if (
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
58
|
+
// Iterative DFS — avoids stack overflow on deeply nested ASTs.
|
|
59
|
+
const stack = [root];
|
|
60
|
+
while (stack.length > 0) {
|
|
61
|
+
const node = stack.pop();
|
|
62
|
+
if (FUNCTION_LIKE_TYPES.has(node.type))
|
|
63
|
+
return node;
|
|
64
|
+
for (let i = node.namedChildCount - 1; i >= 0; i--) {
|
|
65
|
+
const child = node.namedChild(i);
|
|
66
|
+
if (child)
|
|
67
|
+
stack.push(child);
|
|
68
|
+
}
|
|
69
69
|
}
|
|
70
70
|
return null;
|
|
71
71
|
};
|
|
@@ -89,17 +89,17 @@ export const findDeclarationNode = (root) => {
|
|
|
89
89
|
'object_declaration', // Kotlin: object
|
|
90
90
|
'impl_item', // Rust: impl
|
|
91
91
|
]);
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
const
|
|
96
|
-
if (
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
92
|
+
// Iterative DFS — avoids stack overflow on deeply nested ASTs.
|
|
93
|
+
const stack = [root];
|
|
94
|
+
while (stack.length > 0) {
|
|
95
|
+
const node = stack.pop();
|
|
96
|
+
if (CLASS_LIKE_TYPES.has(node.type))
|
|
97
|
+
return node;
|
|
98
|
+
for (let i = node.namedChildCount - 1; i >= 0; i--) {
|
|
99
|
+
const child = node.namedChild(i);
|
|
100
|
+
if (child)
|
|
101
|
+
stack.push(child);
|
|
102
|
+
}
|
|
103
103
|
}
|
|
104
104
|
return null;
|
|
105
105
|
};
|
|
@@ -10,6 +10,7 @@ export { characterChunk } from './character-chunk.js';
|
|
|
10
10
|
import { characterChunk } from './character-chunk.js';
|
|
11
11
|
import { ensureAndParse, findDeclarationNode, findFunctionNode } from './ast-utils.js';
|
|
12
12
|
import { buildLineIndex, resolveChunkLines } from './line-index.js';
|
|
13
|
+
import { CHUNKING_RULES, CHUNK_MODE_AST_DECLARATION, CHUNK_MODE_AST_FUNCTION, } from './types.js';
|
|
13
14
|
/**
|
|
14
15
|
* Main chunkNode function: dispatches by label
|
|
15
16
|
*/
|
|
@@ -27,26 +28,24 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
|
|
|
27
28
|
},
|
|
28
29
|
];
|
|
29
30
|
}
|
|
30
|
-
|
|
31
|
-
if (
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
const rule = CHUNKING_RULES[label];
|
|
32
|
+
if (!rule) {
|
|
33
|
+
return characterChunk(content, startLine, endLine, chunkSize, overlap);
|
|
34
|
+
}
|
|
35
|
+
try {
|
|
36
|
+
if (rule.mode === CHUNK_MODE_AST_FUNCTION) {
|
|
37
|
+
const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
|
|
34
38
|
if (astChunks.length > 0)
|
|
35
39
|
return astChunks;
|
|
36
40
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
if (label === 'Class' || label === 'Interface') {
|
|
42
|
-
try {
|
|
43
|
-
const declarationChunks = await declarationChunk(label, content, filePath, startLine, endLine, chunkSize, overlap);
|
|
41
|
+
if (rule.mode === CHUNK_MODE_AST_DECLARATION) {
|
|
42
|
+
const declarationChunks = await declarationChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
|
|
44
43
|
if (declarationChunks.length > 0)
|
|
45
44
|
return declarationChunks;
|
|
46
45
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
// AST parsing failed — fall through to character fallback
|
|
50
49
|
}
|
|
51
50
|
// Character-based fallback for everything else
|
|
52
51
|
return characterChunk(content, startLine, endLine, chunkSize, overlap);
|
|
@@ -56,7 +55,7 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
|
|
|
56
55
|
* Parse snippet content, locate the function declaration node,
|
|
57
56
|
* split body by statement boundaries.
|
|
58
57
|
*/
|
|
59
|
-
const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap) => {
|
|
58
|
+
const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
|
|
60
59
|
const tree = await ensureAndParse(content, filePath);
|
|
61
60
|
if (!tree)
|
|
62
61
|
return [];
|
|
@@ -84,7 +83,7 @@ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overla
|
|
|
84
83
|
}
|
|
85
84
|
if (statements.length === 0)
|
|
86
85
|
return [];
|
|
87
|
-
return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex,
|
|
86
|
+
return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
|
|
88
87
|
};
|
|
89
88
|
const DECLARATION_BODY_NODE_TYPES = new Set([
|
|
90
89
|
'class_body',
|
|
@@ -102,7 +101,7 @@ const FIELD_LIKE_MEMBER_TYPES = new Set([
|
|
|
102
101
|
'pair',
|
|
103
102
|
'enum_assignment',
|
|
104
103
|
]);
|
|
105
|
-
const declarationChunk = async (
|
|
104
|
+
const declarationChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
|
|
106
105
|
const tree = await ensureAndParse(content, filePath);
|
|
107
106
|
if (!tree)
|
|
108
107
|
return [];
|
|
@@ -112,10 +111,10 @@ const declarationChunk = async (label, content, filePath, startLine, endLine, ch
|
|
|
112
111
|
const bodyNode = getDeclarationBodyNode(targetNode);
|
|
113
112
|
if (!bodyNode)
|
|
114
113
|
return [];
|
|
115
|
-
const members = collectDeclarationUnits(bodyNode,
|
|
114
|
+
const members = collectDeclarationUnits(bodyNode, rule.groupFields);
|
|
116
115
|
if (members.length === 0)
|
|
117
116
|
return [];
|
|
118
|
-
return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex,
|
|
117
|
+
return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
|
|
119
118
|
};
|
|
120
119
|
const buildChunk = (content, lineOffsets, chunkIndex, startOffset, endOffset, baseStartLine) => {
|
|
121
120
|
const lineRange = resolveChunkLines(lineOffsets, startOffset, endOffset, baseStartLine);
|
|
@@ -150,12 +149,18 @@ const chunkByUnits = (content, lineOffsets, baseStartLine, chunkSize, overlap, u
|
|
|
150
149
|
}
|
|
151
150
|
if (candidateEndOffset - chunkStartOffset > chunkSize) {
|
|
152
151
|
const oversizedUnit = units[chunkStartUnitIdx];
|
|
153
|
-
const
|
|
154
|
-
|
|
152
|
+
const oversizedStartOffset = chunkStartUnitIdx === 0 && includeContainerPrefixOnFirstChunk
|
|
153
|
+
? containerStartOffset
|
|
154
|
+
: oversizedUnit.startIndex;
|
|
155
|
+
const oversizedEndOffset = chunkStartUnitIdx === units.length - 1 && includeContainerSuffixOnLastChunk
|
|
156
|
+
? containerEndOffset
|
|
157
|
+
: oversizedUnit.endIndex;
|
|
158
|
+
const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedStartOffset, oversizedEndOffset, baseStartLine);
|
|
159
|
+
const oversizedChunks = characterChunk(content.slice(oversizedStartOffset, oversizedEndOffset), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
|
|
155
160
|
...chunk,
|
|
156
161
|
chunkIndex: chunks.length + offsetIdx,
|
|
157
|
-
startOffset: chunk.startOffset +
|
|
158
|
-
endOffset: chunk.endOffset +
|
|
162
|
+
startOffset: chunk.startOffset + oversizedStartOffset,
|
|
163
|
+
endOffset: chunk.endOffset + oversizedStartOffset,
|
|
159
164
|
}));
|
|
160
165
|
chunks.push(...oversizedChunks);
|
|
161
166
|
chunkStartUnitIdx += 1;
|
|
@@ -200,7 +205,7 @@ const getDeclarationBodyNode = (node) => {
|
|
|
200
205
|
}
|
|
201
206
|
return null;
|
|
202
207
|
};
|
|
203
|
-
const collectDeclarationUnits = (bodyNode,
|
|
208
|
+
const collectDeclarationUnits = (bodyNode, groupFields) => {
|
|
204
209
|
const members = [];
|
|
205
210
|
for (let i = 0; i < bodyNode.namedChildCount; i++) {
|
|
206
211
|
const child = bodyNode.namedChild(i);
|
|
@@ -209,7 +214,7 @@ const collectDeclarationUnits = (bodyNode, label) => {
|
|
|
209
214
|
members.push({
|
|
210
215
|
startIndex: child.startIndex,
|
|
211
216
|
endIndex: child.endIndex,
|
|
212
|
-
groupable:
|
|
217
|
+
groupable: groupFields && FIELD_LIKE_MEMBER_TYPES.has(child.type),
|
|
213
218
|
});
|
|
214
219
|
}
|
|
215
220
|
if (members.length === 0)
|
|
@@ -9,6 +9,12 @@
|
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
11
|
import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Bump this when the embedding text template changes in a way that should
|
|
14
|
+
* invalidate existing vectors, such as metadata/header shape changes,
|
|
15
|
+
* structural container context changes, or preceding-context formatting rules.
|
|
16
|
+
*/
|
|
17
|
+
export declare const EMBEDDING_TEXT_VERSION = "v2";
|
|
12
18
|
/**
|
|
13
19
|
* Compute a stable content fingerprint for an embeddable node.
|
|
14
20
|
* Used to detect when the underlying text has changed so stale vectors
|
|
@@ -13,10 +13,16 @@ import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady,
|
|
|
13
13
|
import { generateEmbeddingText } from './text-generator.js';
|
|
14
14
|
import { chunkNode, characterChunk } from './chunker.js';
|
|
15
15
|
import { extractStructuralNames } from './structural-extractor.js';
|
|
16
|
-
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
|
|
16
|
+
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
|
|
17
17
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
|
|
18
18
|
import { loadVectorExtension } from '../lbug/lbug-adapter.js';
|
|
19
19
|
const isDev = process.env.NODE_ENV === 'development';
|
|
20
|
+
/**
|
|
21
|
+
* Bump this when the embedding text template changes in a way that should
|
|
22
|
+
* invalidate existing vectors, such as metadata/header shape changes,
|
|
23
|
+
* structural container context changes, or preceding-context formatting rules.
|
|
24
|
+
*/
|
|
25
|
+
export const EMBEDDING_TEXT_VERSION = 'v2';
|
|
20
26
|
/**
|
|
21
27
|
* Compute a stable content fingerprint for an embeddable node.
|
|
22
28
|
* Used to detect when the underlying text has changed so stale vectors
|
|
@@ -27,8 +33,9 @@ export const contentHashForNode = (node, config = {}) => {
|
|
|
27
33
|
// Hash must be deterministic across runs, so exclude methodNames/fieldNames
|
|
28
34
|
// which are populated during the batch loop via AST extraction.
|
|
29
35
|
// Using only node.content ensures the hash stays stable.
|
|
36
|
+
// NOTE: A change to extractStructuralNames behavior requires bumping EMBEDDING_TEXT_VERSION.
|
|
30
37
|
const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
|
|
31
|
-
return createHash('sha1').update(text).digest('hex');
|
|
38
|
+
return createHash('sha1').update(EMBEDDING_TEXT_VERSION).update('\n').update(text).digest('hex');
|
|
32
39
|
};
|
|
33
40
|
/**
|
|
34
41
|
* Query all embeddable nodes from LadybugDB
|
|
@@ -39,7 +46,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
39
46
|
for (const label of EMBEDDABLE_LABELS) {
|
|
40
47
|
try {
|
|
41
48
|
let query;
|
|
42
|
-
if (label ===
|
|
49
|
+
if (label === LABEL_METHOD) {
|
|
43
50
|
// Method has parameterCount and returnType
|
|
44
51
|
query = `
|
|
45
52
|
MATCH (n:Method)
|
|
@@ -72,7 +79,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
72
79
|
}
|
|
73
80
|
const rows = await executeQuery(query);
|
|
74
81
|
for (const row of rows) {
|
|
75
|
-
const hasExportedColumn = label ===
|
|
82
|
+
const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
|
|
76
83
|
allNodes.push({
|
|
77
84
|
id: row.id ?? row[0],
|
|
78
85
|
name: row.name ?? row[1],
|
|
@@ -83,7 +90,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
83
90
|
endLine: row.endLine ?? row[6],
|
|
84
91
|
isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
|
|
85
92
|
description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
|
|
86
|
-
...(label ===
|
|
93
|
+
...(label === LABEL_METHOD
|
|
87
94
|
? {
|
|
88
95
|
parameterCount: row.parameterCount ?? row[9],
|
|
89
96
|
returnType: row.returnType ?? row[10],
|
|
@@ -301,8 +308,9 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
301
308
|
chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
|
|
302
309
|
}
|
|
303
310
|
}
|
|
311
|
+
let prevTail = '';
|
|
304
312
|
for (const chunk of chunks) {
|
|
305
|
-
const text = generateEmbeddingText(node, chunk.text, finalConfig);
|
|
313
|
+
const text = generateEmbeddingText(node, chunk.text, finalConfig, chunk.chunkIndex, prevTail);
|
|
306
314
|
allTexts.push(text);
|
|
307
315
|
allUpdates.push({
|
|
308
316
|
nodeId: node.id,
|
|
@@ -311,6 +319,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
311
319
|
endLine: chunk.endLine,
|
|
312
320
|
contentHash: hash,
|
|
313
321
|
});
|
|
322
|
+
prevTail = overlap > 0 ? chunk.text.slice(-overlap) : '';
|
|
314
323
|
}
|
|
315
324
|
}
|
|
316
325
|
// Embed chunk texts in sub-batches to control memory
|
|
@@ -24,7 +24,7 @@ export declare const extractDeclarationOnly: (content: string) => string;
|
|
|
24
24
|
* Generate embedding text for any embeddable node
|
|
25
25
|
* Dispatches to the appropriate generator based on node label
|
|
26
26
|
*/
|
|
27
|
-
export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig
|
|
27
|
+
export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>, chunkIndex?: number, prevTail?: string) => string;
|
|
28
28
|
/**
|
|
29
29
|
* Export truncation helper for testing
|
|
30
30
|
*/
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* Method/field names for Class nodes are extracted by the ingestion
|
|
9
9
|
* pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
|
|
10
10
|
*/
|
|
11
|
-
import { DEFAULT_EMBEDDING_CONFIG, isShortLabel } from './types.js';
|
|
11
|
+
import { CHUNKING_RULES, DEFAULT_EMBEDDING_CONFIG, STRUCTURAL_TEXT_MODE_DECLARATION, isShortLabel, } from './types.js';
|
|
12
12
|
/**
|
|
13
13
|
* Truncate description to max length at sentence/word boundary
|
|
14
14
|
*/
|
|
@@ -71,34 +71,45 @@ const buildMetadataHeader = (node, config) => {
|
|
|
71
71
|
}
|
|
72
72
|
return parts.join('\n');
|
|
73
73
|
};
|
|
74
|
-
const generateCodeBodyText = (node, codeBody, config) => {
|
|
74
|
+
const generateCodeBodyText = (node, codeBody, config, prevTail) => {
|
|
75
75
|
const header = buildMetadataHeader(node, config);
|
|
76
|
-
const
|
|
77
|
-
|
|
76
|
+
const parts = [header];
|
|
77
|
+
if (prevTail) {
|
|
78
|
+
parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
|
|
79
|
+
}
|
|
80
|
+
parts.push('', cleanContent(codeBody));
|
|
81
|
+
return parts.join('\n');
|
|
78
82
|
};
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const generateClassText = (node, codeBody, config) => {
|
|
85
|
-
return generateStructuralTypeText(node, codeBody, config);
|
|
83
|
+
const getCompactContainerContext = (cleanedContent, declarationOnly) => {
|
|
84
|
+
const source = declarationOnly || cleanedContent;
|
|
85
|
+
const nlIdx = source.indexOf('\n');
|
|
86
|
+
const firstLine = (nlIdx === -1 ? source : source.substring(0, nlIdx)).trim();
|
|
87
|
+
return firstLine ? `Container: ${firstLine}` : undefined;
|
|
86
88
|
};
|
|
87
|
-
const generateStructuralTypeText = (node, codeBody, config) => {
|
|
89
|
+
const generateStructuralTypeText = (node, codeBody, config, chunkIndex, prevTail) => {
|
|
88
90
|
const header = buildMetadataHeader(node, config);
|
|
89
91
|
const parts = [header];
|
|
90
|
-
|
|
92
|
+
const isFirstChunk = chunkIndex === undefined || chunkIndex === 0;
|
|
93
|
+
const cleanedContent = cleanContent(node.content);
|
|
94
|
+
const declarationOnly = extractDeclarationOnly(cleanedContent);
|
|
95
|
+
const compactContainerContext = getCompactContainerContext(cleanedContent, declarationOnly);
|
|
96
|
+
if (compactContainerContext) {
|
|
97
|
+
parts.push(compactContainerContext);
|
|
98
|
+
}
|
|
99
|
+
if (prevTail) {
|
|
100
|
+
parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
|
|
101
|
+
}
|
|
102
|
+
if (isFirstChunk && node.methodNames?.length) {
|
|
91
103
|
parts.push(`Methods: ${node.methodNames.join(', ')}`);
|
|
92
104
|
}
|
|
93
|
-
if (node.fieldNames?.length) {
|
|
105
|
+
if (isFirstChunk && node.fieldNames?.length) {
|
|
94
106
|
parts.push(`Properties: ${node.fieldNames.join(', ')}`);
|
|
95
107
|
}
|
|
96
|
-
|
|
97
|
-
if (declarationOnly) {
|
|
108
|
+
if (isFirstChunk && declarationOnly) {
|
|
98
109
|
parts.push('', declarationOnly);
|
|
99
110
|
}
|
|
100
111
|
const cleanedChunk = cleanContent(codeBody);
|
|
101
|
-
if (cleanedChunk && cleanedChunk !==
|
|
112
|
+
if (cleanedChunk && cleanedChunk !== cleanedContent) {
|
|
102
113
|
parts.push('', cleanedChunk);
|
|
103
114
|
}
|
|
104
115
|
return parts.join('\n');
|
|
@@ -179,19 +190,17 @@ export const extractDeclarationOnly = (content) => {
|
|
|
179
190
|
* Generate embedding text for any embeddable node
|
|
180
191
|
* Dispatches to the appropriate generator based on node label
|
|
181
192
|
*/
|
|
182
|
-
export const generateEmbeddingText = (node, codeBody, config = {}) => {
|
|
193
|
+
export const generateEmbeddingText = (node, codeBody, config = {}, chunkIndex, prevTail) => {
|
|
183
194
|
if (isShortLabel(node.label)) {
|
|
184
195
|
const header = buildMetadataHeader(node, config);
|
|
185
196
|
const cleaned = cleanContent(node.content);
|
|
186
197
|
return `${header}\n\n${cleaned}`;
|
|
187
198
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
if (node.label === 'Interface') {
|
|
192
|
-
return generateStructuralTypeText(node, codeBody, config);
|
|
199
|
+
const chunkingRule = CHUNKING_RULES[node.label];
|
|
200
|
+
if (chunkingRule?.structuralTextMode === STRUCTURAL_TEXT_MODE_DECLARATION) {
|
|
201
|
+
return generateStructuralTypeText(node, codeBody, config, chunkIndex, prevTail);
|
|
193
202
|
}
|
|
194
|
-
return generateCodeBodyText(node, codeBody, config);
|
|
203
|
+
return generateCodeBodyText(node, codeBody, config, prevTail);
|
|
195
204
|
};
|
|
196
205
|
/**
|
|
197
206
|
* Export truncation helper for testing
|
|
@@ -3,6 +3,38 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Type definitions for the embedding generation and semantic search system.
|
|
5
5
|
*/
|
|
6
|
+
export declare const LABEL_FUNCTION: "Function";
|
|
7
|
+
export declare const LABEL_METHOD: "Method";
|
|
8
|
+
export declare const LABEL_CONSTRUCTOR: "Constructor";
|
|
9
|
+
export declare const LABEL_CLASS: "Class";
|
|
10
|
+
export declare const LABEL_INTERFACE: "Interface";
|
|
11
|
+
export declare const LABEL_STRUCT: "Struct";
|
|
12
|
+
export declare const LABEL_ENUM: "Enum";
|
|
13
|
+
export declare const LABEL_TRAIT: "Trait";
|
|
14
|
+
export declare const LABEL_IMPL: "Impl";
|
|
15
|
+
export declare const LABEL_MACRO: "Macro";
|
|
16
|
+
export declare const LABEL_NAMESPACE: "Namespace";
|
|
17
|
+
export declare const LABEL_TYPE_ALIAS: "TypeAlias";
|
|
18
|
+
export declare const LABEL_TYPEDEF: "Typedef";
|
|
19
|
+
export declare const LABEL_CONST: "Const";
|
|
20
|
+
export declare const LABEL_PROPERTY: "Property";
|
|
21
|
+
export declare const LABEL_RECORD: "Record";
|
|
22
|
+
export declare const LABEL_UNION: "Union";
|
|
23
|
+
export declare const LABEL_STATIC: "Static";
|
|
24
|
+
export declare const LABEL_VARIABLE: "Variable";
|
|
25
|
+
export declare const LABEL_CODE_ELEMENT: "CodeElement";
|
|
26
|
+
export declare const CHUNK_MODE_AST_FUNCTION: "ast-function";
|
|
27
|
+
export declare const CHUNK_MODE_AST_DECLARATION: "ast-declaration";
|
|
28
|
+
export declare const CHUNK_MODE_CHARACTER: "character";
|
|
29
|
+
export declare const STRUCTURAL_TEXT_MODE_NONE: "none";
|
|
30
|
+
export declare const STRUCTURAL_TEXT_MODE_DECLARATION: "declaration";
|
|
31
|
+
export interface ChunkingRule {
|
|
32
|
+
mode: typeof CHUNK_MODE_AST_FUNCTION | typeof CHUNK_MODE_AST_DECLARATION | typeof CHUNK_MODE_CHARACTER;
|
|
33
|
+
includePrefix: boolean;
|
|
34
|
+
includeSuffix: boolean;
|
|
35
|
+
groupFields: boolean;
|
|
36
|
+
structuralTextMode: typeof STRUCTURAL_TEXT_MODE_NONE | typeof STRUCTURAL_TEXT_MODE_DECLARATION;
|
|
37
|
+
}
|
|
6
38
|
/**
|
|
7
39
|
* Node labels that need chunking (have code body, potentially long)
|
|
8
40
|
*/
|
|
@@ -29,13 +61,22 @@ export declare const isChunkableLabel: (label: string) => boolean;
|
|
|
29
61
|
*/
|
|
30
62
|
export declare const isShortLabel: (label: string) => boolean;
|
|
31
63
|
/**
|
|
32
|
-
* Node labels that have structural names (methods/fields) extractable via AST
|
|
64
|
+
* Node labels that have structural names (methods/fields) extractable via AST.
|
|
65
|
+
* Only labels that consume methodNames/fieldNames in their embedding text should
|
|
66
|
+
* be listed here — extra entries trigger wasted AST parses with no effect on output.
|
|
33
67
|
*/
|
|
34
68
|
export declare const STRUCTURAL_LABELS: ReadonlySet<string>;
|
|
35
69
|
/**
|
|
36
70
|
* Node labels that have isExported column in their schema
|
|
37
71
|
*/
|
|
38
72
|
export declare const LABELS_WITH_EXPORTED: ReadonlySet<string>;
|
|
73
|
+
/**
|
|
74
|
+
* Labels that need special chunking and/or structural text semantics.
|
|
75
|
+
* Any chunkable label omitted here intentionally falls back to characterChunk
|
|
76
|
+
* plus generateCodeBodyText (for example Enum/Trait/Impl/Macro/Namespace).
|
|
77
|
+
*/
|
|
78
|
+
type ChunkableLabel = (typeof CHUNKABLE_LABELS)[number];
|
|
79
|
+
export declare const CHUNKING_RULES: Readonly<Partial<Record<ChunkableLabel, ChunkingRule>>>;
|
|
39
80
|
/**
|
|
40
81
|
* Embedding pipeline phases
|
|
41
82
|
*/
|
|
@@ -163,3 +204,4 @@ export declare const dedupBestChunks: (rows: ChunkSearchRow[], limit?: number) =
|
|
|
163
204
|
* or can tell the result set is exhausted.
|
|
164
205
|
*/
|
|
165
206
|
export declare const collectBestChunks: (limit: number, fetchRows: (fetchLimit: number) => Promise<ChunkSearchRow[]>, maxFetch?: number) => Promise<Map<string, BestChunkMatch>>;
|
|
207
|
+
export {};
|