gitnexus 1.6.3-rc.8 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +21 -5
  2. package/dist/_shared/graph/types.d.ts +16 -0
  3. package/dist/_shared/graph/types.d.ts.map +1 -1
  4. package/dist/_shared/index.d.ts +20 -2
  5. package/dist/_shared/index.d.ts.map +1 -1
  6. package/dist/_shared/index.js +11 -0
  7. package/dist/_shared/index.js.map +1 -1
  8. package/dist/_shared/scope-resolution/def-index.js +2 -2
  9. package/dist/_shared/scope-resolution/def-index.js.map +1 -1
  10. package/dist/_shared/scope-resolution/method-dispatch-index.d.ts +8 -0
  11. package/dist/_shared/scope-resolution/method-dispatch-index.d.ts.map +1 -1
  12. package/dist/_shared/scope-resolution/method-dispatch-index.js +2 -2
  13. package/dist/_shared/scope-resolution/method-dispatch-index.js.map +1 -1
  14. package/dist/_shared/scope-resolution/module-scope-index.d.ts +8 -0
  15. package/dist/_shared/scope-resolution/module-scope-index.d.ts.map +1 -1
  16. package/dist/_shared/scope-resolution/module-scope-index.js +10 -2
  17. package/dist/_shared/scope-resolution/module-scope-index.js.map +1 -1
  18. package/dist/_shared/scope-resolution/parsed-file.d.ts +76 -0
  19. package/dist/_shared/scope-resolution/parsed-file.d.ts.map +1 -0
  20. package/dist/_shared/scope-resolution/parsed-file.js +54 -0
  21. package/dist/_shared/scope-resolution/parsed-file.js.map +1 -0
  22. package/dist/_shared/scope-resolution/position-index.d.ts +12 -0
  23. package/dist/_shared/scope-resolution/position-index.d.ts.map +1 -1
  24. package/dist/_shared/scope-resolution/position-index.js +2 -2
  25. package/dist/_shared/scope-resolution/position-index.js.map +1 -1
  26. package/dist/_shared/scope-resolution/qualified-name-index.js +2 -2
  27. package/dist/_shared/scope-resolution/qualified-name-index.js.map +1 -1
  28. package/dist/_shared/scope-resolution/reference-site.d.ts +75 -0
  29. package/dist/_shared/scope-resolution/reference-site.d.ts.map +1 -0
  30. package/dist/_shared/scope-resolution/reference-site.js +24 -0
  31. package/dist/_shared/scope-resolution/reference-site.js.map +1 -0
  32. package/dist/_shared/scope-resolution/registries/class-registry.d.ts +27 -0
  33. package/dist/_shared/scope-resolution/registries/class-registry.d.ts.map +1 -0
  34. package/dist/_shared/scope-resolution/registries/class-registry.js +30 -0
  35. package/dist/_shared/scope-resolution/registries/class-registry.js.map +1 -0
  36. package/dist/_shared/scope-resolution/registries/context.d.ts +69 -0
  37. package/dist/_shared/scope-resolution/registries/context.d.ts.map +1 -0
  38. package/dist/_shared/scope-resolution/registries/context.js +44 -0
  39. package/dist/_shared/scope-resolution/registries/context.js.map +1 -0
  40. package/dist/_shared/scope-resolution/registries/evidence.d.ts +56 -0
  41. package/dist/_shared/scope-resolution/registries/evidence.d.ts.map +1 -0
  42. package/dist/_shared/scope-resolution/registries/evidence.js +150 -0
  43. package/dist/_shared/scope-resolution/registries/evidence.js.map +1 -0
  44. package/dist/_shared/scope-resolution/registries/field-registry.d.ts +26 -0
  45. package/dist/_shared/scope-resolution/registries/field-registry.d.ts.map +1 -0
  46. package/dist/_shared/scope-resolution/registries/field-registry.js +31 -0
  47. package/dist/_shared/scope-resolution/registries/field-registry.js.map +1 -0
  48. package/dist/_shared/scope-resolution/registries/lookup-core.d.ts +81 -0
  49. package/dist/_shared/scope-resolution/registries/lookup-core.d.ts.map +1 -0
  50. package/dist/_shared/scope-resolution/registries/lookup-core.js +332 -0
  51. package/dist/_shared/scope-resolution/registries/lookup-core.js.map +1 -0
  52. package/dist/_shared/scope-resolution/registries/lookup-qualified.d.ts +33 -0
  53. package/dist/_shared/scope-resolution/registries/lookup-qualified.d.ts.map +1 -0
  54. package/dist/_shared/scope-resolution/registries/lookup-qualified.js +56 -0
  55. package/dist/_shared/scope-resolution/registries/lookup-qualified.js.map +1 -0
  56. package/dist/_shared/scope-resolution/registries/method-registry.d.ts +36 -0
  57. package/dist/_shared/scope-resolution/registries/method-registry.d.ts.map +1 -0
  58. package/dist/_shared/scope-resolution/registries/method-registry.js +32 -0
  59. package/dist/_shared/scope-resolution/registries/method-registry.js.map +1 -0
  60. package/dist/_shared/scope-resolution/registries/tie-breaks.d.ts +43 -0
  61. package/dist/_shared/scope-resolution/registries/tie-breaks.d.ts.map +1 -0
  62. package/dist/_shared/scope-resolution/registries/tie-breaks.js +60 -0
  63. package/dist/_shared/scope-resolution/registries/tie-breaks.js.map +1 -0
  64. package/dist/_shared/scope-resolution/resolve-type-ref.d.ts +1 -10
  65. package/dist/_shared/scope-resolution/resolve-type-ref.d.ts.map +1 -1
  66. package/dist/_shared/scope-resolution/resolve-type-ref.js +6 -0
  67. package/dist/_shared/scope-resolution/resolve-type-ref.js.map +1 -1
  68. package/dist/_shared/scope-resolution/scope-tree.d.ts +4 -4
  69. package/dist/_shared/scope-resolution/scope-tree.d.ts.map +1 -1
  70. package/dist/_shared/scope-resolution/scope-tree.js +3 -2
  71. package/dist/_shared/scope-resolution/scope-tree.js.map +1 -1
  72. package/dist/_shared/scope-resolution/shadow/aggregate.d.ts +6 -2
  73. package/dist/_shared/scope-resolution/shadow/aggregate.d.ts.map +1 -1
  74. package/dist/_shared/scope-resolution/shadow/aggregate.js +5 -0
  75. package/dist/_shared/scope-resolution/shadow/aggregate.js.map +1 -1
  76. package/dist/_shared/scope-resolution/types.d.ts +11 -0
  77. package/dist/_shared/scope-resolution/types.d.ts.map +1 -1
  78. package/dist/cli/ai-context.js +35 -4
  79. package/dist/cli/analyze.d.ts +27 -0
  80. package/dist/cli/analyze.js +31 -1
  81. package/dist/cli/clean.js +19 -1
  82. package/dist/cli/group.js +73 -0
  83. package/dist/cli/index-repo.js +8 -1
  84. package/dist/cli/index.js +26 -1
  85. package/dist/cli/list.js +11 -1
  86. package/dist/cli/remove.d.ts +30 -0
  87. package/dist/cli/remove.js +99 -0
  88. package/dist/cli/setup.js +185 -57
  89. package/dist/cli/tool.d.ts +5 -0
  90. package/dist/cli/tool.js +42 -0
  91. package/dist/config/ignore-service.d.ts +9 -0
  92. package/dist/config/ignore-service.js +80 -13
  93. package/dist/core/embedding-mode.d.ts +30 -0
  94. package/dist/core/embedding-mode.js +30 -0
  95. package/dist/core/embeddings/ast-utils.js +22 -22
  96. package/dist/core/embeddings/chunker.js +30 -25
  97. package/dist/core/embeddings/embedding-pipeline.d.ts +6 -0
  98. package/dist/core/embeddings/embedding-pipeline.js +15 -6
  99. package/dist/core/embeddings/text-generator.d.ts +1 -1
  100. package/dist/core/embeddings/text-generator.js +33 -24
  101. package/dist/core/embeddings/types.d.ts +43 -1
  102. package/dist/core/embeddings/types.js +101 -29
  103. package/dist/core/git-staleness.d.ts +18 -0
  104. package/dist/core/git-staleness.js +108 -0
  105. package/dist/core/graph/graph.js +115 -20
  106. package/dist/core/graph/types.d.ts +12 -1
  107. package/dist/core/group/config-parser.d.ts +4 -0
  108. package/dist/core/group/config-parser.js +18 -1
  109. package/dist/core/group/cross-impact.d.ts +41 -0
  110. package/dist/core/group/cross-impact.js +441 -0
  111. package/dist/core/group/extractors/http-patterns/php.js +126 -18
  112. package/dist/core/group/group-path-utils.d.ts +17 -0
  113. package/dist/core/group/group-path-utils.js +40 -0
  114. package/dist/core/group/resolve-at-member.d.ts +10 -0
  115. package/dist/core/group/resolve-at-member.js +31 -0
  116. package/dist/core/group/service.d.ts +9 -0
  117. package/dist/core/group/service.js +259 -25
  118. package/dist/core/group/types.d.ts +30 -0
  119. package/dist/core/ingestion/ast-cache.d.ts +16 -1
  120. package/dist/core/ingestion/ast-cache.js +14 -2
  121. package/dist/core/ingestion/call-processor.js +9 -0
  122. package/dist/core/ingestion/emit-references.d.ts +88 -0
  123. package/dist/core/ingestion/emit-references.js +229 -0
  124. package/dist/core/ingestion/filesystem-walker.js +6 -4
  125. package/dist/core/ingestion/finalize-orchestrator.d.ts +63 -0
  126. package/dist/core/ingestion/finalize-orchestrator.js +139 -0
  127. package/dist/core/ingestion/framework-detection.js +6 -2
  128. package/dist/core/ingestion/import-processor.js +4 -0
  129. package/dist/core/ingestion/import-resolvers/python.js +9 -6
  130. package/dist/core/ingestion/import-target-adapter.d.ts +73 -0
  131. package/dist/core/ingestion/import-target-adapter.js +95 -0
  132. package/dist/core/ingestion/language-provider.d.ts +36 -33
  133. package/dist/core/ingestion/languages/csharp/accessor-unwrap.d.ts +21 -0
  134. package/dist/core/ingestion/languages/csharp/accessor-unwrap.js +56 -0
  135. package/dist/core/ingestion/languages/csharp/arity-metadata.d.ts +26 -0
  136. package/dist/core/ingestion/languages/csharp/arity-metadata.js +46 -0
  137. package/dist/core/ingestion/languages/csharp/arity.d.ts +23 -0
  138. package/dist/core/ingestion/languages/csharp/arity.js +37 -0
  139. package/dist/core/ingestion/languages/csharp/cache-stats.d.ts +15 -0
  140. package/dist/core/ingestion/languages/csharp/cache-stats.js +26 -0
  141. package/dist/core/ingestion/languages/csharp/captures.d.ts +19 -0
  142. package/dist/core/ingestion/languages/csharp/captures.js +249 -0
  143. package/dist/core/ingestion/languages/csharp/import-decomposer.d.ts +19 -0
  144. package/dist/core/ingestion/languages/csharp/import-decomposer.js +93 -0
  145. package/dist/core/ingestion/languages/csharp/import-target.d.ts +25 -0
  146. package/dist/core/ingestion/languages/csharp/import-target.js +123 -0
  147. package/dist/core/ingestion/languages/csharp/index.d.ts +82 -0
  148. package/dist/core/ingestion/languages/csharp/index.js +82 -0
  149. package/dist/core/ingestion/languages/csharp/interpret.d.ts +15 -0
  150. package/dist/core/ingestion/languages/csharp/interpret.js +132 -0
  151. package/dist/core/ingestion/languages/csharp/merge-bindings.d.ts +27 -0
  152. package/dist/core/ingestion/languages/csharp/merge-bindings.js +55 -0
  153. package/dist/core/ingestion/languages/csharp/namespace-siblings.d.ts +50 -0
  154. package/dist/core/ingestion/languages/csharp/namespace-siblings.js +374 -0
  155. package/dist/core/ingestion/languages/csharp/query.d.ts +35 -0
  156. package/dist/core/ingestion/languages/csharp/query.js +515 -0
  157. package/dist/core/ingestion/languages/csharp/receiver-binding.d.ts +31 -0
  158. package/dist/core/ingestion/languages/csharp/receiver-binding.js +135 -0
  159. package/dist/core/ingestion/languages/csharp/scope-resolver.d.ts +10 -0
  160. package/dist/core/ingestion/languages/csharp/scope-resolver.js +63 -0
  161. package/dist/core/ingestion/languages/csharp/simple-hooks.d.ts +53 -0
  162. package/dist/core/ingestion/languages/csharp/simple-hooks.js +76 -0
  163. package/dist/core/ingestion/languages/csharp.js +14 -0
  164. package/dist/core/ingestion/languages/python/arity-metadata.d.ts +24 -0
  165. package/dist/core/ingestion/languages/python/arity-metadata.js +45 -0
  166. package/dist/core/ingestion/languages/python/arity.d.ts +22 -0
  167. package/dist/core/ingestion/languages/python/arity.js +38 -0
  168. package/dist/core/ingestion/languages/python/cache-stats.d.ts +17 -0
  169. package/dist/core/ingestion/languages/python/cache-stats.js +28 -0
  170. package/dist/core/ingestion/languages/python/captures.d.ts +19 -0
  171. package/dist/core/ingestion/languages/python/captures.js +106 -0
  172. package/dist/core/ingestion/languages/python/import-decomposer.d.ts +15 -0
  173. package/dist/core/ingestion/languages/python/import-decomposer.js +112 -0
  174. package/dist/core/ingestion/languages/python/import-target.d.ts +21 -0
  175. package/dist/core/ingestion/languages/python/import-target.js +99 -0
  176. package/dist/core/ingestion/languages/python/index.d.ts +80 -0
  177. package/dist/core/ingestion/languages/python/index.js +80 -0
  178. package/dist/core/ingestion/languages/python/interpret.d.ts +15 -0
  179. package/dist/core/ingestion/languages/python/interpret.js +191 -0
  180. package/dist/core/ingestion/languages/python/merge-bindings.d.ts +16 -0
  181. package/dist/core/ingestion/languages/python/merge-bindings.js +44 -0
  182. package/dist/core/ingestion/languages/python/query.d.ts +9 -0
  183. package/dist/core/ingestion/languages/python/query.js +267 -0
  184. package/dist/core/ingestion/languages/python/receiver-binding.d.ts +21 -0
  185. package/dist/core/ingestion/languages/python/receiver-binding.js +116 -0
  186. package/dist/core/ingestion/languages/python/scope-resolver.d.ts +16 -0
  187. package/dist/core/ingestion/languages/python/scope-resolver.js +53 -0
  188. package/dist/core/ingestion/languages/python/simple-hooks.d.ts +23 -0
  189. package/dist/core/ingestion/languages/python/simple-hooks.js +35 -0
  190. package/dist/core/ingestion/languages/python.js +14 -0
  191. package/dist/core/ingestion/model/method-registry.d.ts +9 -0
  192. package/dist/core/ingestion/model/method-registry.js +4 -0
  193. package/dist/core/ingestion/model/scope-resolution-indexes.d.ts +59 -0
  194. package/dist/core/ingestion/model/scope-resolution-indexes.js +42 -0
  195. package/dist/core/ingestion/model/semantic-model.d.ts +64 -0
  196. package/dist/core/ingestion/model/semantic-model.js +55 -0
  197. package/dist/core/ingestion/mro-processor.js +38 -22
  198. package/dist/core/ingestion/parsing-processor.d.ts +18 -1
  199. package/dist/core/ingestion/parsing-processor.js +45 -11
  200. package/dist/core/ingestion/pipeline-phases/index.d.ts +1 -0
  201. package/dist/core/ingestion/pipeline-phases/index.js +1 -0
  202. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +10 -0
  203. package/dist/core/ingestion/pipeline-phases/parse-impl.js +17 -2
  204. package/dist/core/ingestion/pipeline-phases/parse.d.ts +18 -0
  205. package/dist/core/ingestion/pipeline.js +2 -1
  206. package/dist/core/ingestion/registry-primary-flag.d.ts +86 -0
  207. package/dist/core/ingestion/registry-primary-flag.js +111 -0
  208. package/dist/core/ingestion/resolve-references.d.ts +63 -0
  209. package/dist/core/ingestion/resolve-references.js +175 -0
  210. package/dist/core/ingestion/scope-extractor-bridge.d.ts +32 -0
  211. package/dist/core/ingestion/scope-extractor-bridge.js +44 -0
  212. package/dist/core/ingestion/scope-extractor.d.ts +86 -0
  213. package/dist/core/ingestion/scope-extractor.js +758 -0
  214. package/dist/core/ingestion/scope-resolution/contract/scope-resolver.d.ts +372 -0
  215. package/dist/core/ingestion/scope-resolution/contract/scope-resolver.js +212 -0
  216. package/dist/core/ingestion/scope-resolution/graph-bridge/edges.d.ts +43 -0
  217. package/dist/core/ingestion/scope-resolution/graph-bridge/edges.js +79 -0
  218. package/dist/core/ingestion/scope-resolution/graph-bridge/ids.d.ts +57 -0
  219. package/dist/core/ingestion/scope-resolution/graph-bridge/ids.js +112 -0
  220. package/dist/core/ingestion/scope-resolution/graph-bridge/imports-to-edges.d.ts +17 -0
  221. package/dist/core/ingestion/scope-resolution/graph-bridge/imports-to-edges.js +46 -0
  222. package/dist/core/ingestion/scope-resolution/graph-bridge/method-dispatch.d.ts +19 -0
  223. package/dist/core/ingestion/scope-resolution/graph-bridge/method-dispatch.js +30 -0
  224. package/dist/core/ingestion/scope-resolution/graph-bridge/node-lookup.d.ts +37 -0
  225. package/dist/core/ingestion/scope-resolution/graph-bridge/node-lookup.js +113 -0
  226. package/dist/core/ingestion/scope-resolution/graph-bridge/references-to-edges.d.ts +38 -0
  227. package/dist/core/ingestion/scope-resolution/graph-bridge/references-to-edges.js +73 -0
  228. package/dist/core/ingestion/scope-resolution/passes/compound-receiver.d.ts +42 -0
  229. package/dist/core/ingestion/scope-resolution/passes/compound-receiver.js +198 -0
  230. package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.d.ts +27 -0
  231. package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.js +131 -0
  232. package/dist/core/ingestion/scope-resolution/passes/imported-return-types.d.ts +48 -0
  233. package/dist/core/ingestion/scope-resolution/passes/imported-return-types.js +130 -0
  234. package/dist/core/ingestion/scope-resolution/passes/mro.d.ts +42 -0
  235. package/dist/core/ingestion/scope-resolution/passes/mro.js +99 -0
  236. package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.d.ts +26 -0
  237. package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.js +61 -0
  238. package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.d.ts +46 -0
  239. package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.js +327 -0
  240. package/dist/core/ingestion/scope-resolution/pipeline/phase.d.ts +47 -0
  241. package/dist/core/ingestion/scope-resolution/pipeline/phase.js +130 -0
  242. package/dist/core/ingestion/scope-resolution/pipeline/reconcile-ownership.d.ts +68 -0
  243. package/dist/core/ingestion/scope-resolution/pipeline/reconcile-ownership.js +125 -0
  244. package/dist/core/ingestion/scope-resolution/pipeline/registry.d.ts +17 -0
  245. package/dist/core/ingestion/scope-resolution/pipeline/registry.js +21 -0
  246. package/dist/core/ingestion/scope-resolution/pipeline/run.d.ts +66 -0
  247. package/dist/core/ingestion/scope-resolution/pipeline/run.js +157 -0
  248. package/dist/core/ingestion/scope-resolution/scope/namespace-targets.d.ts +36 -0
  249. package/dist/core/ingestion/scope-resolution/scope/namespace-targets.js +52 -0
  250. package/dist/core/ingestion/scope-resolution/scope/walkers.d.ts +127 -0
  251. package/dist/core/ingestion/scope-resolution/scope/walkers.js +349 -0
  252. package/dist/core/ingestion/scope-resolution/workspace-index.d.ts +52 -0
  253. package/dist/core/ingestion/scope-resolution/workspace-index.js +61 -0
  254. package/dist/core/ingestion/shadow-harness.d.ts +113 -0
  255. package/dist/core/ingestion/shadow-harness.js +148 -0
  256. package/dist/core/ingestion/utils/ast-helpers.d.ts +19 -1
  257. package/dist/core/ingestion/utils/ast-helpers.js +70 -0
  258. package/dist/core/ingestion/utils/max-file-size.d.ts +20 -0
  259. package/dist/core/ingestion/utils/max-file-size.js +52 -0
  260. package/dist/core/ingestion/workers/parse-worker.d.ts +9 -0
  261. package/dist/core/ingestion/workers/parse-worker.js +57 -21
  262. package/dist/core/lbug/lbug-adapter.d.ts +22 -2
  263. package/dist/core/lbug/lbug-adapter.js +58 -14
  264. package/dist/core/lbug/pool-adapter.d.ts +17 -0
  265. package/dist/core/lbug/pool-adapter.js +24 -14
  266. package/dist/core/run-analyze.d.ts +32 -0
  267. package/dist/core/run-analyze.js +74 -19
  268. package/dist/core/search/bm25-index.d.ts +18 -0
  269. package/dist/core/search/bm25-index.js +125 -12
  270. package/dist/core/tree-sitter/parser-loader.js +6 -1
  271. package/dist/mcp/local/local-backend.d.ts +67 -3
  272. package/dist/mcp/local/local-backend.js +296 -34
  273. package/dist/mcp/resources.d.ts +31 -0
  274. package/dist/mcp/resources.js +100 -17
  275. package/dist/mcp/tools.d.ts +4 -1
  276. package/dist/mcp/tools.js +75 -54
  277. package/dist/server/api.js +6 -2
  278. package/dist/storage/git.d.ts +49 -0
  279. package/dist/storage/git.js +111 -0
  280. package/dist/storage/repo-manager.d.ts +246 -1
  281. package/dist/storage/repo-manager.js +391 -9
  282. package/package.json +7 -6
  283. package/scripts/bench-scope-resolution.ts +134 -0
  284. package/scripts/ci-list-migrated-languages.ts +24 -0
  285. package/skills/gitnexus-cli.md +1 -0
@@ -245,16 +245,20 @@ const IGNORED_FILES = new Set([
245
245
  '.env.test',
246
246
  '.env.example',
247
247
  ]);
248
- // NOTE: Negation patterns in .gitnexusignore (e.g. `!vendor/`) cannot override
249
- // entries in DEFAULT_IGNORE_LIST this is intentional. The hardcoded list protects
250
- // against indexing directories that are almost never source code (node_modules, .git, etc.).
251
- // Users who need to include such directories should remove them from the hardcoded list.
248
+ // The hardcoded DEFAULT_IGNORE_LIST is the "safety net" default: directories
249
+ // that are almost never source code (node_modules, .git, dist, __tests__,
250
+ // etc.). Users who legitimately need to index one of these can negate the
251
+ // hardcoded rule via a `!pattern` line in `.gitnexusignore` (#771) same
252
+ // semantics as `.gitignore` negation. That override is applied in
253
+ // `createIgnoreFilter` below; `shouldIgnorePath` itself stays a pure
254
+ // hardcoded-list check so its callers (wiki generator, tests) get
255
+ // deterministic results independent of per-repo config.
252
256
  export const shouldIgnorePath = (filePath) => {
253
257
  const normalizedPath = filePath.replace(/\\/g, '/');
254
258
  const parts = normalizedPath.split('/');
255
259
  const fileName = parts[parts.length - 1];
256
260
  const fileNameLower = fileName.toLowerCase();
257
- // Check if any path segment is in ignore list
261
+ // Check if any path segment is in the hardcoded ignore list.
258
262
  for (const part of parts) {
259
263
  if (DEFAULT_IGNORE_LIST.has(part)) {
260
264
  return true;
@@ -320,6 +324,44 @@ export const loadIgnoreRules = async (repoPath, options) => {
320
324
  }
321
325
  return hasRules ? ig : null;
322
326
  };
327
+ /**
328
+ * Walk ancestor segments of `rel` and check whether `.gitnexusignore`
329
+ * (or `.gitignore`) contains an explicit `!pattern` negation that
330
+ * applies. Returns true as soon as any segment — or the path itself —
331
+ * is matched by a negation rule.
332
+ *
333
+ * Why this exists (#771): the hardcoded DEFAULT_IGNORE_LIST would
334
+ * otherwise block indexing of directories like `__tests__/` even when
335
+ * the user has an explicit `!__tests__/` line in `.gitnexusignore`.
336
+ * Mirroring `.gitignore` negation semantics: a user's explicit
337
+ * unignore of a parent directory implicitly unignores everything
338
+ * underneath, so we walk the ancestor chain rather than only testing
339
+ * the leaf.
340
+ *
341
+ * The `ignore` package's `test(path)` returns `{ignored, unignored}`;
342
+ * `unignored: true` is the "a negation rule matched this path"
343
+ * signal. Children of a negated directory return
344
+ * `{ignored: false, unignored: false}` on a direct test, which is why
345
+ * we also walk the ancestors here.
346
+ */
347
+ const hasExplicitUnignore = (ig, rel) => {
348
+ // Direct match on the path (as a file).
349
+ if (ig.test(rel).unignored)
350
+ return true;
351
+ // Direct match on the path treated as a directory — `!dir/` matches
352
+ // here when rel is the directory itself.
353
+ if (ig.test(rel + '/').unignored)
354
+ return true;
355
+ // Walk ancestor segments. `!parent/` should propagate to every
356
+ // descendant the same way `.gitignore` negation propagates.
357
+ const parts = rel.split('/');
358
+ for (let i = parts.length - 1; i > 0; i--) {
359
+ const ancestor = parts.slice(0, i).join('/') + '/';
360
+ if (ig.test(ancestor).unignored)
361
+ return true;
362
+ }
363
+ return false;
364
+ };
323
365
  /**
324
366
  * Create a glob-compatible ignore filter combining:
325
367
  * - .gitignore / .gitnexusignore patterns (via `ignore` package)
@@ -327,6 +369,15 @@ export const loadIgnoreRules = async (repoPath, options) => {
327
369
  *
328
370
  * Returns an IgnoreLike object for glob's `ignore` option,
329
371
  * enabling directory-level pruning during traversal.
372
+ *
373
+ * Precedence (#771): user's `.gitnexusignore` negation patterns take
374
+ * priority over the hardcoded list, matching `.gitignore` semantics.
375
+ * An explicit `!pattern` rule unignores descendants even when they
376
+ * would otherwise be blocked by DEFAULT_IGNORE_LIST — UNLESS a more
377
+ * specific rule in the same file re-ignores a subset (e.g.
378
+ * `!__tests__/` paired with `__tests__/generated/` blocks the child
379
+ * while leaving the parent negated). Last-match-wins is enforced by
380
+ * consulting `ig.ignores(rel)` after `hasExplicitUnignore`.
330
381
  */
331
382
  export const createIgnoreFilter = async (repoPath, options) => {
332
383
  const ig = await loadIgnoreRules(repoPath, options);
@@ -337,6 +388,15 @@ export const createIgnoreFilter = async (repoPath, options) => {
337
388
  const rel = p.relative();
338
389
  if (!rel)
339
390
  return false;
391
+ // User's .gitnexusignore negation takes precedence over hardcoded
392
+ // rules (#771). If any ancestor or the path itself was explicitly
393
+ // unignored AND no more-specific rule re-ignores this exact path,
394
+ // allow it through. The `!ig.ignores(rel)` guard matches
395
+ // .gitignore's last-match-wins semantics: `!__tests__/` followed
396
+ // by `__tests__/generated/` negates the parent but still blocks
397
+ // the re-ignored child.
398
+ if (ig && hasExplicitUnignore(ig, rel) && !ig.ignores(rel))
399
+ return false;
340
400
  // Check .gitignore / .gitnexusignore patterns
341
401
  if (ig && ig.ignores(rel))
342
402
  return true;
@@ -344,10 +404,20 @@ export const createIgnoreFilter = async (repoPath, options) => {
344
404
  return shouldIgnorePath(rel);
345
405
  },
346
406
  childrenIgnored(p) {
347
- // Fast path: check directory name against hardcoded list.
348
407
  // Note: dot-directories (.git, .vscode, etc.) are primarily excluded by
349
- // glob's `dot: false` option in filesystem-walker.ts. This check is
350
- // defense-in-depth — do not remove `dot: false` assuming this covers it.
408
+ // glob's `dot: false` option in filesystem-walker.ts. The hardcoded
409
+ // list check below is defense-in-depth — do not remove `dot: false`
410
+ // assuming this covers it.
411
+ const rel = p.relative();
412
+ // User's .gitnexusignore negation takes precedence (#771) — if the
413
+ // user explicitly unignored this directory or any ancestor via a
414
+ // !pattern rule, allow descent even if the directory name is in
415
+ // DEFAULT_IGNORE_LIST. The `!ig.ignores(rel + '/')` guard keeps
416
+ // last-match-wins: `!__tests__/` + `__tests__/generated/` still
417
+ // blocks descent into `__tests__/generated/`.
418
+ if (ig && rel && hasExplicitUnignore(ig, rel) && !ig.ignores(rel + '/'))
419
+ return false;
420
+ // Hardcoded list: block descent into well-known noise directories.
351
421
  if (DEFAULT_IGNORE_LIST.has(p.name))
352
422
  return true;
353
423
  // Check against .gitignore / .gitnexusignore patterns.
@@ -358,11 +428,8 @@ export const createIgnoreFilter = async (repoPath, options) => {
358
428
  // Bare-name patterns (e.g. `local`) still match `local/` per gitignore spec:
359
429
  // the `ignore` package normalizes `dir` and `dir/` to match directories.
360
430
  // See: https://github.com/kaelzhang/node-ignore#2-filenames-and-dirnames
361
- if (ig) {
362
- const rel = p.relative();
363
- if (rel && ig.ignores(rel + '/'))
364
- return true;
365
- }
431
+ if (ig && rel && ig.ignores(rel + '/'))
432
+ return true;
366
433
  return false;
367
434
  },
368
435
  };
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Pure derivation of the embedding-mode flags for `runFullAnalysis`.
3
+ *
4
+ * Lives in its own module (no native imports) so the branching contract can
5
+ * be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
6
+ * other side-effecting dependencies pulled in by `run-analyze.ts`.
7
+ *
8
+ * Semantics:
9
+ * --drop-embeddings -> wipe (skip cache load entirely)
10
+ * --embeddings -> load cache, restore, then generate
11
+ * --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
12
+ * (default) + existing>0 -> preserve only (load + restore, no generation)
13
+ * any path with existing=0 -> no cache work, no preservation
14
+ */
15
+ export interface EmbeddingModeInput {
16
+ force?: boolean;
17
+ embeddings?: boolean;
18
+ dropEmbeddings?: boolean;
19
+ }
20
+ export interface EmbeddingMode {
21
+ /** True when phase 4 should run the embedding generation pipeline. */
22
+ shouldGenerateEmbeddings: boolean;
23
+ /** True when we should load the cache to re-insert vectors after rebuild without generating new ones. */
24
+ preserveExistingEmbeddings: boolean;
25
+ /** True when `--force` upgraded a default analyze into a regeneration because the repo was already embedded. */
26
+ forceRegenerateEmbeddings: boolean;
27
+ /** True when we need to load cached embeddings from the existing DB before the rebuild. */
28
+ shouldLoadCache: boolean;
29
+ }
30
+ export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Pure derivation of the embedding-mode flags for `runFullAnalysis`.
3
+ *
4
+ * Lives in its own module (no native imports) so the branching contract can
5
+ * be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
6
+ * other side-effecting dependencies pulled in by `run-analyze.ts`.
7
+ *
8
+ * Semantics:
9
+ * --drop-embeddings -> wipe (skip cache load entirely)
10
+ * --embeddings -> load cache, restore, then generate
11
+ * --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
12
+ * (default) + existing>0 -> preserve only (load + restore, no generation)
13
+ * any path with existing=0 -> no cache work, no preservation
14
+ */
15
+ export function deriveEmbeddingMode(options, existingEmbeddingCount) {
16
+ const hasExisting = existingEmbeddingCount > 0;
17
+ const drop = !!options.dropEmbeddings;
18
+ const explicit = !!options.embeddings;
19
+ const force = !!options.force;
20
+ const forceRegenerateEmbeddings = force && !explicit && !drop && hasExisting;
21
+ const preserveExistingEmbeddings = !explicit && !drop && !forceRegenerateEmbeddings && hasExisting;
22
+ const shouldGenerateEmbeddings = explicit || forceRegenerateEmbeddings;
23
+ const shouldLoadCache = !drop && (shouldGenerateEmbeddings || preserveExistingEmbeddings);
24
+ return {
25
+ shouldGenerateEmbeddings,
26
+ preserveExistingEmbeddings,
27
+ forceRegenerateEmbeddings,
28
+ shouldLoadCache,
29
+ };
30
+ }
@@ -55,17 +55,17 @@ const FUNCTION_LIKE_TYPES = new Set([
55
55
  * numbers don't apply.
56
56
  */
57
57
  export const findFunctionNode = (root) => {
58
- if (FUNCTION_LIKE_TYPES.has(root.type))
59
- return root;
60
- for (let i = 0; i < root.namedChildCount; i++) {
61
- const child = root.namedChild(i);
62
- if (!child)
63
- continue;
64
- if (FUNCTION_LIKE_TYPES.has(child.type))
65
- return child;
66
- const found = findFunctionNode(child);
67
- if (found)
68
- return found;
58
+ // Iterative DFS — avoids stack overflow on deeply nested ASTs.
59
+ const stack = [root];
60
+ while (stack.length > 0) {
61
+ const node = stack.pop();
62
+ if (FUNCTION_LIKE_TYPES.has(node.type))
63
+ return node;
64
+ for (let i = node.namedChildCount - 1; i >= 0; i--) {
65
+ const child = node.namedChild(i);
66
+ if (child)
67
+ stack.push(child);
68
+ }
69
69
  }
70
70
  return null;
71
71
  };
@@ -89,17 +89,17 @@ export const findDeclarationNode = (root) => {
89
89
  'object_declaration', // Kotlin: object
90
90
  'impl_item', // Rust: impl
91
91
  ]);
92
- if (CLASS_LIKE_TYPES.has(root.type))
93
- return root;
94
- for (let i = 0; i < root.namedChildCount; i++) {
95
- const child = root.namedChild(i);
96
- if (!child)
97
- continue;
98
- if (CLASS_LIKE_TYPES.has(child.type))
99
- return child;
100
- const found = findDeclarationNode(child);
101
- if (found)
102
- return found;
92
+ // Iterative DFS — avoids stack overflow on deeply nested ASTs.
93
+ const stack = [root];
94
+ while (stack.length > 0) {
95
+ const node = stack.pop();
96
+ if (CLASS_LIKE_TYPES.has(node.type))
97
+ return node;
98
+ for (let i = node.namedChildCount - 1; i >= 0; i--) {
99
+ const child = node.namedChild(i);
100
+ if (child)
101
+ stack.push(child);
102
+ }
103
103
  }
104
104
  return null;
105
105
  };
@@ -10,6 +10,7 @@ export { characterChunk } from './character-chunk.js';
10
10
  import { characterChunk } from './character-chunk.js';
11
11
  import { ensureAndParse, findDeclarationNode, findFunctionNode } from './ast-utils.js';
12
12
  import { buildLineIndex, resolveChunkLines } from './line-index.js';
13
+ import { CHUNKING_RULES, CHUNK_MODE_AST_DECLARATION, CHUNK_MODE_AST_FUNCTION, } from './types.js';
13
14
  /**
14
15
  * Main chunkNode function: dispatches by label
15
16
  */
@@ -27,26 +28,24 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
27
28
  },
28
29
  ];
29
30
  }
30
- // Only function-like labels get AST chunking
31
- if (label === 'Function' || label === 'Method' || label === 'Constructor') {
32
- try {
33
- const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap);
31
+ const rule = CHUNKING_RULES[label];
32
+ if (!rule) {
33
+ return characterChunk(content, startLine, endLine, chunkSize, overlap);
34
+ }
35
+ try {
36
+ if (rule.mode === CHUNK_MODE_AST_FUNCTION) {
37
+ const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
34
38
  if (astChunks.length > 0)
35
39
  return astChunks;
36
40
  }
37
- catch {
38
- // AST parsing failed fall through to character fallback
39
- }
40
- }
41
- if (label === 'Class' || label === 'Interface') {
42
- try {
43
- const declarationChunks = await declarationChunk(label, content, filePath, startLine, endLine, chunkSize, overlap);
41
+ if (rule.mode === CHUNK_MODE_AST_DECLARATION) {
42
+ const declarationChunks = await declarationChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
44
43
  if (declarationChunks.length > 0)
45
44
  return declarationChunks;
46
45
  }
47
- catch {
48
- // AST parsing failed — fall through to character fallback
49
- }
46
+ }
47
+ catch {
48
+ // AST parsing failed — fall through to character fallback
50
49
  }
51
50
  // Character-based fallback for everything else
52
51
  return characterChunk(content, startLine, endLine, chunkSize, overlap);
@@ -56,7 +55,7 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
56
55
  * Parse snippet content, locate the function declaration node,
57
56
  * split body by statement boundaries.
58
57
  */
59
- const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap) => {
58
+ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
60
59
  const tree = await ensureAndParse(content, filePath);
61
60
  if (!tree)
62
61
  return [];
@@ -84,7 +83,7 @@ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overla
84
83
  }
85
84
  if (statements.length === 0)
86
85
  return [];
87
- return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, true, true);
86
+ return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
88
87
  };
89
88
  const DECLARATION_BODY_NODE_TYPES = new Set([
90
89
  'class_body',
@@ -102,7 +101,7 @@ const FIELD_LIKE_MEMBER_TYPES = new Set([
102
101
  'pair',
103
102
  'enum_assignment',
104
103
  ]);
105
- const declarationChunk = async (label, content, filePath, startLine, endLine, chunkSize, overlap) => {
104
+ const declarationChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
106
105
  const tree = await ensureAndParse(content, filePath);
107
106
  if (!tree)
108
107
  return [];
@@ -112,10 +111,10 @@ const declarationChunk = async (label, content, filePath, startLine, endLine, ch
112
111
  const bodyNode = getDeclarationBodyNode(targetNode);
113
112
  if (!bodyNode)
114
113
  return [];
115
- const members = collectDeclarationUnits(bodyNode, label);
114
+ const members = collectDeclarationUnits(bodyNode, rule.groupFields);
116
115
  if (members.length === 0)
117
116
  return [];
118
- return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, false, false);
117
+ return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
119
118
  };
120
119
  const buildChunk = (content, lineOffsets, chunkIndex, startOffset, endOffset, baseStartLine) => {
121
120
  const lineRange = resolveChunkLines(lineOffsets, startOffset, endOffset, baseStartLine);
@@ -150,12 +149,18 @@ const chunkByUnits = (content, lineOffsets, baseStartLine, chunkSize, overlap, u
150
149
  }
151
150
  if (candidateEndOffset - chunkStartOffset > chunkSize) {
152
151
  const oversizedUnit = units[chunkStartUnitIdx];
153
- const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedUnit.startIndex, oversizedUnit.endIndex, baseStartLine);
154
- const oversizedChunks = characterChunk(content.slice(oversizedUnit.startIndex, oversizedUnit.endIndex), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
152
+ const oversizedStartOffset = chunkStartUnitIdx === 0 && includeContainerPrefixOnFirstChunk
153
+ ? containerStartOffset
154
+ : oversizedUnit.startIndex;
155
+ const oversizedEndOffset = chunkStartUnitIdx === units.length - 1 && includeContainerSuffixOnLastChunk
156
+ ? containerEndOffset
157
+ : oversizedUnit.endIndex;
158
+ const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedStartOffset, oversizedEndOffset, baseStartLine);
159
+ const oversizedChunks = characterChunk(content.slice(oversizedStartOffset, oversizedEndOffset), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
155
160
  ...chunk,
156
161
  chunkIndex: chunks.length + offsetIdx,
157
- startOffset: chunk.startOffset + oversizedUnit.startIndex,
158
- endOffset: chunk.endOffset + oversizedUnit.startIndex,
162
+ startOffset: chunk.startOffset + oversizedStartOffset,
163
+ endOffset: chunk.endOffset + oversizedStartOffset,
159
164
  }));
160
165
  chunks.push(...oversizedChunks);
161
166
  chunkStartUnitIdx += 1;
@@ -200,7 +205,7 @@ const getDeclarationBodyNode = (node) => {
200
205
  }
201
206
  return null;
202
207
  };
203
- const collectDeclarationUnits = (bodyNode, label) => {
208
+ const collectDeclarationUnits = (bodyNode, groupFields) => {
204
209
  const members = [];
205
210
  for (let i = 0; i < bodyNode.namedChildCount; i++) {
206
211
  const child = bodyNode.namedChild(i);
@@ -209,7 +214,7 @@ const collectDeclarationUnits = (bodyNode, label) => {
209
214
  members.push({
210
215
  startIndex: child.startIndex,
211
216
  endIndex: child.endIndex,
212
- groupable: label === 'Class' && FIELD_LIKE_MEMBER_TYPES.has(child.type),
217
+ groupable: groupFields && FIELD_LIKE_MEMBER_TYPES.has(child.type),
213
218
  });
214
219
  }
215
220
  if (members.length === 0)
@@ -9,6 +9,12 @@
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
11
  import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
12
+ /**
13
+ * Bump this when the embedding text template changes in a way that should
14
+ * invalidate existing vectors, such as metadata/header shape changes,
15
+ * structural container context changes, or preceding-context formatting rules.
16
+ */
17
+ export declare const EMBEDDING_TEXT_VERSION = "v2";
12
18
  /**
13
19
  * Compute a stable content fingerprint for an embeddable node.
14
20
  * Used to detect when the underlying text has changed so stale vectors
@@ -13,10 +13,16 @@ import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady,
13
13
  import { generateEmbeddingText } from './text-generator.js';
14
14
  import { chunkNode, characterChunk } from './chunker.js';
15
15
  import { extractStructuralNames } from './structural-extractor.js';
16
- import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
16
+ import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
17
17
  import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
18
18
  import { loadVectorExtension } from '../lbug/lbug-adapter.js';
19
19
  const isDev = process.env.NODE_ENV === 'development';
20
+ /**
21
+ * Bump this when the embedding text template changes in a way that should
22
+ * invalidate existing vectors, such as metadata/header shape changes,
23
+ * structural container context changes, or preceding-context formatting rules.
24
+ */
25
+ export const EMBEDDING_TEXT_VERSION = 'v2';
20
26
  /**
21
27
  * Compute a stable content fingerprint for an embeddable node.
22
28
  * Used to detect when the underlying text has changed so stale vectors
@@ -27,8 +33,9 @@ export const contentHashForNode = (node, config = {}) => {
27
33
  // Hash must be deterministic across runs, so exclude methodNames/fieldNames
28
34
  // which are populated during the batch loop via AST extraction.
29
35
  // Using only node.content ensures the hash stays stable.
36
+ // NOTE: A change to extractStructuralNames behavior requires bumping EMBEDDING_TEXT_VERSION.
30
37
  const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
31
- return createHash('sha1').update(text).digest('hex');
38
+ return createHash('sha1').update(EMBEDDING_TEXT_VERSION).update('\n').update(text).digest('hex');
32
39
  };
33
40
  /**
34
41
  * Query all embeddable nodes from LadybugDB
@@ -39,7 +46,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
39
46
  for (const label of EMBEDDABLE_LABELS) {
40
47
  try {
41
48
  let query;
42
- if (label === 'Method') {
49
+ if (label === LABEL_METHOD) {
43
50
  // Method has parameterCount and returnType
44
51
  query = `
45
52
  MATCH (n:Method)
@@ -72,7 +79,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
72
79
  }
73
80
  const rows = await executeQuery(query);
74
81
  for (const row of rows) {
75
- const hasExportedColumn = label === 'Method' || LABELS_WITH_EXPORTED.has(label);
82
+ const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
76
83
  allNodes.push({
77
84
  id: row.id ?? row[0],
78
85
  name: row.name ?? row[1],
@@ -83,7 +90,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
83
90
  endLine: row.endLine ?? row[6],
84
91
  isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
85
92
  description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
86
- ...(label === 'Method'
93
+ ...(label === LABEL_METHOD
87
94
  ? {
88
95
  parameterCount: row.parameterCount ?? row[9],
89
96
  returnType: row.returnType ?? row[10],
@@ -301,8 +308,9 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
301
308
  chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
302
309
  }
303
310
  }
311
+ let prevTail = '';
304
312
  for (const chunk of chunks) {
305
- const text = generateEmbeddingText(node, chunk.text, finalConfig);
313
+ const text = generateEmbeddingText(node, chunk.text, finalConfig, chunk.chunkIndex, prevTail);
306
314
  allTexts.push(text);
307
315
  allUpdates.push({
308
316
  nodeId: node.id,
@@ -311,6 +319,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
311
319
  endLine: chunk.endLine,
312
320
  contentHash: hash,
313
321
  });
322
+ prevTail = overlap > 0 ? chunk.text.slice(-overlap) : '';
314
323
  }
315
324
  }
316
325
  // Embed chunk texts in sub-batches to control memory
@@ -24,7 +24,7 @@ export declare const extractDeclarationOnly: (content: string) => string;
24
24
  * Generate embedding text for any embeddable node
25
25
  * Dispatches to the appropriate generator based on node label
26
26
  */
27
- export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>) => string;
27
+ export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>, chunkIndex?: number, prevTail?: string) => string;
28
28
  /**
29
29
  * Export truncation helper for testing
30
30
  */
@@ -8,7 +8,7 @@
8
8
  * Method/field names for Class nodes are extracted by the ingestion
9
9
  * pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
10
10
  */
11
- import { DEFAULT_EMBEDDING_CONFIG, isShortLabel } from './types.js';
11
+ import { CHUNKING_RULES, DEFAULT_EMBEDDING_CONFIG, STRUCTURAL_TEXT_MODE_DECLARATION, isShortLabel, } from './types.js';
12
12
  /**
13
13
  * Truncate description to max length at sentence/word boundary
14
14
  */
@@ -71,34 +71,45 @@ const buildMetadataHeader = (node, config) => {
71
71
  }
72
72
  return parts.join('\n');
73
73
  };
74
- const generateCodeBodyText = (node, codeBody, config) => {
74
+ const generateCodeBodyText = (node, codeBody, config, prevTail) => {
75
75
  const header = buildMetadataHeader(node, config);
76
- const cleaned = cleanContent(codeBody);
77
- return `${header}\n\n${cleaned}`;
76
+ const parts = [header];
77
+ if (prevTail) {
78
+ parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
79
+ }
80
+ parts.push('', cleanContent(codeBody));
81
+ return parts.join('\n');
78
82
  };
79
- /**
80
- * Generate embedding text for Class nodes
81
- * Signature + properties + method name list only (no method bodies)
82
- * Method/field names come from AST extractors via node.methodNames/node.fieldNames.
83
- */
84
- const generateClassText = (node, codeBody, config) => {
85
- return generateStructuralTypeText(node, codeBody, config);
83
+ const getCompactContainerContext = (cleanedContent, declarationOnly) => {
84
+ const source = declarationOnly || cleanedContent;
85
+ const nlIdx = source.indexOf('\n');
86
+ const firstLine = (nlIdx === -1 ? source : source.substring(0, nlIdx)).trim();
87
+ return firstLine ? `Container: ${firstLine}` : undefined;
86
88
  };
87
- const generateStructuralTypeText = (node, codeBody, config) => {
89
+ const generateStructuralTypeText = (node, codeBody, config, chunkIndex, prevTail) => {
88
90
  const header = buildMetadataHeader(node, config);
89
91
  const parts = [header];
90
- if (node.methodNames?.length) {
92
+ const isFirstChunk = chunkIndex === undefined || chunkIndex === 0;
93
+ const cleanedContent = cleanContent(node.content);
94
+ const declarationOnly = extractDeclarationOnly(cleanedContent);
95
+ const compactContainerContext = getCompactContainerContext(cleanedContent, declarationOnly);
96
+ if (compactContainerContext) {
97
+ parts.push(compactContainerContext);
98
+ }
99
+ if (prevTail) {
100
+ parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
101
+ }
102
+ if (isFirstChunk && node.methodNames?.length) {
91
103
  parts.push(`Methods: ${node.methodNames.join(', ')}`);
92
104
  }
93
- if (node.fieldNames?.length) {
105
+ if (isFirstChunk && node.fieldNames?.length) {
94
106
  parts.push(`Properties: ${node.fieldNames.join(', ')}`);
95
107
  }
96
- const declarationOnly = extractDeclarationOnly(cleanContent(node.content));
97
- if (declarationOnly) {
108
+ if (isFirstChunk && declarationOnly) {
98
109
  parts.push('', declarationOnly);
99
110
  }
100
111
  const cleanedChunk = cleanContent(codeBody);
101
- if (cleanedChunk && cleanedChunk !== cleanContent(node.content)) {
112
+ if (cleanedChunk && cleanedChunk !== cleanedContent) {
102
113
  parts.push('', cleanedChunk);
103
114
  }
104
115
  return parts.join('\n');
@@ -179,19 +190,17 @@ export const extractDeclarationOnly = (content) => {
179
190
  * Generate embedding text for any embeddable node
180
191
  * Dispatches to the appropriate generator based on node label
181
192
  */
182
- export const generateEmbeddingText = (node, codeBody, config = {}) => {
193
+ export const generateEmbeddingText = (node, codeBody, config = {}, chunkIndex, prevTail) => {
183
194
  if (isShortLabel(node.label)) {
184
195
  const header = buildMetadataHeader(node, config);
185
196
  const cleaned = cleanContent(node.content);
186
197
  return `${header}\n\n${cleaned}`;
187
198
  }
188
- if (node.label === 'Class') {
189
- return generateClassText(node, codeBody, config);
190
- }
191
- if (node.label === 'Interface') {
192
- return generateStructuralTypeText(node, codeBody, config);
199
+ const chunkingRule = CHUNKING_RULES[node.label];
200
+ if (chunkingRule?.structuralTextMode === STRUCTURAL_TEXT_MODE_DECLARATION) {
201
+ return generateStructuralTypeText(node, codeBody, config, chunkIndex, prevTail);
193
202
  }
194
- return generateCodeBodyText(node, codeBody, config);
203
+ return generateCodeBodyText(node, codeBody, config, prevTail);
195
204
  };
196
205
  /**
197
206
  * Export truncation helper for testing
@@ -3,6 +3,38 @@
3
3
  *
4
4
  * Type definitions for the embedding generation and semantic search system.
5
5
  */
6
+ export declare const LABEL_FUNCTION: "Function";
7
+ export declare const LABEL_METHOD: "Method";
8
+ export declare const LABEL_CONSTRUCTOR: "Constructor";
9
+ export declare const LABEL_CLASS: "Class";
10
+ export declare const LABEL_INTERFACE: "Interface";
11
+ export declare const LABEL_STRUCT: "Struct";
12
+ export declare const LABEL_ENUM: "Enum";
13
+ export declare const LABEL_TRAIT: "Trait";
14
+ export declare const LABEL_IMPL: "Impl";
15
+ export declare const LABEL_MACRO: "Macro";
16
+ export declare const LABEL_NAMESPACE: "Namespace";
17
+ export declare const LABEL_TYPE_ALIAS: "TypeAlias";
18
+ export declare const LABEL_TYPEDEF: "Typedef";
19
+ export declare const LABEL_CONST: "Const";
20
+ export declare const LABEL_PROPERTY: "Property";
21
+ export declare const LABEL_RECORD: "Record";
22
+ export declare const LABEL_UNION: "Union";
23
+ export declare const LABEL_STATIC: "Static";
24
+ export declare const LABEL_VARIABLE: "Variable";
25
+ export declare const LABEL_CODE_ELEMENT: "CodeElement";
26
+ export declare const CHUNK_MODE_AST_FUNCTION: "ast-function";
27
+ export declare const CHUNK_MODE_AST_DECLARATION: "ast-declaration";
28
+ export declare const CHUNK_MODE_CHARACTER: "character";
29
+ export declare const STRUCTURAL_TEXT_MODE_NONE: "none";
30
+ export declare const STRUCTURAL_TEXT_MODE_DECLARATION: "declaration";
31
+ export interface ChunkingRule {
32
+ mode: typeof CHUNK_MODE_AST_FUNCTION | typeof CHUNK_MODE_AST_DECLARATION | typeof CHUNK_MODE_CHARACTER;
33
+ includePrefix: boolean;
34
+ includeSuffix: boolean;
35
+ groupFields: boolean;
36
+ structuralTextMode: typeof STRUCTURAL_TEXT_MODE_NONE | typeof STRUCTURAL_TEXT_MODE_DECLARATION;
37
+ }
6
38
  /**
7
39
  * Node labels that need chunking (have code body, potentially long)
8
40
  */
@@ -29,13 +61,22 @@ export declare const isChunkableLabel: (label: string) => boolean;
29
61
  */
30
62
  export declare const isShortLabel: (label: string) => boolean;
31
63
  /**
32
- * Node labels that have structural names (methods/fields) extractable via AST
64
+ * Node labels that have structural names (methods/fields) extractable via AST.
65
+ * Only labels that consume methodNames/fieldNames in their embedding text should
66
+ * be listed here — extra entries trigger wasted AST parses with no effect on output.
33
67
  */
34
68
  export declare const STRUCTURAL_LABELS: ReadonlySet<string>;
35
69
  /**
36
70
  * Node labels that have isExported column in their schema
37
71
  */
38
72
  export declare const LABELS_WITH_EXPORTED: ReadonlySet<string>;
73
+ /**
74
+ * Labels that need special chunking and/or structural text semantics.
75
+ * Any chunkable label omitted here intentionally falls back to characterChunk
76
+ * plus generateCodeBodyText (for example Enum/Trait/Impl/Macro/Namespace).
77
+ */
78
+ type ChunkableLabel = (typeof CHUNKABLE_LABELS)[number];
79
+ export declare const CHUNKING_RULES: Readonly<Partial<Record<ChunkableLabel, ChunkingRule>>>;
39
80
  /**
40
81
  * Embedding pipeline phases
41
82
  */
@@ -163,3 +204,4 @@ export declare const dedupBestChunks: (rows: ChunkSearchRow[], limit?: number) =
163
204
  * or can tell the result set is exhausted.
164
205
  */
165
206
  export declare const collectBestChunks: (limit: number, fetchRows: (fetchLimit: number) => Promise<ChunkSearchRow[]>, maxFetch?: number) => Promise<Map<string, BestChunkMatch>>;
207
+ export {};