gitnexus 1.6.3-rc.21 → 1.6.3-rc.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ export { characterChunk } from './character-chunk.js';
10
10
  import { characterChunk } from './character-chunk.js';
11
11
  import { ensureAndParse, findDeclarationNode, findFunctionNode } from './ast-utils.js';
12
12
  import { buildLineIndex, resolveChunkLines } from './line-index.js';
13
+ import { CHUNKING_RULES, CHUNK_MODE_AST_DECLARATION, CHUNK_MODE_AST_FUNCTION, } from './types.js';
13
14
  /**
14
15
  * Main chunkNode function: dispatches by label
15
16
  */
@@ -27,26 +28,24 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
27
28
  },
28
29
  ];
29
30
  }
30
- // Only function-like labels get AST chunking
31
- if (label === 'Function' || label === 'Method' || label === 'Constructor') {
32
- try {
33
- const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap);
31
+ const rule = CHUNKING_RULES[label];
32
+ if (!rule) {
33
+ return characterChunk(content, startLine, endLine, chunkSize, overlap);
34
+ }
35
+ try {
36
+ if (rule.mode === CHUNK_MODE_AST_FUNCTION) {
37
+ const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
34
38
  if (astChunks.length > 0)
35
39
  return astChunks;
36
40
  }
37
- catch {
38
- // AST parsing failed fall through to character fallback
39
- }
40
- }
41
- if (label === 'Class' || label === 'Interface') {
42
- try {
43
- const declarationChunks = await declarationChunk(label, content, filePath, startLine, endLine, chunkSize, overlap);
41
+ if (rule.mode === CHUNK_MODE_AST_DECLARATION) {
42
+ const declarationChunks = await declarationChunk(content, filePath, startLine, endLine, chunkSize, overlap, rule);
44
43
  if (declarationChunks.length > 0)
45
44
  return declarationChunks;
46
45
  }
47
- catch {
48
- // AST parsing failed — fall through to character fallback
49
- }
46
+ }
47
+ catch {
48
+ // AST parsing failed — fall through to character fallback
50
49
  }
51
50
  // Character-based fallback for everything else
52
51
  return characterChunk(content, startLine, endLine, chunkSize, overlap);
@@ -56,7 +55,7 @@ export const chunkNode = async (label, content, filePath, startLine, endLine, ch
56
55
  * Parse snippet content, locate the function declaration node,
57
56
  * split body by statement boundaries.
58
57
  */
59
- const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap) => {
58
+ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
60
59
  const tree = await ensureAndParse(content, filePath);
61
60
  if (!tree)
62
61
  return [];
@@ -84,7 +83,7 @@ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overla
84
83
  }
85
84
  if (statements.length === 0)
86
85
  return [];
87
- return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, true, true);
86
+ return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
88
87
  };
89
88
  const DECLARATION_BODY_NODE_TYPES = new Set([
90
89
  'class_body',
@@ -102,7 +101,7 @@ const FIELD_LIKE_MEMBER_TYPES = new Set([
102
101
  'pair',
103
102
  'enum_assignment',
104
103
  ]);
105
- const declarationChunk = async (label, content, filePath, startLine, endLine, chunkSize, overlap) => {
104
+ const declarationChunk = async (content, filePath, startLine, endLine, chunkSize, overlap, rule) => {
106
105
  const tree = await ensureAndParse(content, filePath);
107
106
  if (!tree)
108
107
  return [];
@@ -112,10 +111,10 @@ const declarationChunk = async (label, content, filePath, startLine, endLine, ch
112
111
  const bodyNode = getDeclarationBodyNode(targetNode);
113
112
  if (!bodyNode)
114
113
  return [];
115
- const members = collectDeclarationUnits(bodyNode, label);
114
+ const members = collectDeclarationUnits(bodyNode, rule.groupFields);
116
115
  if (members.length === 0)
117
116
  return [];
118
- return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, false, false);
117
+ return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, rule.includePrefix, rule.includeSuffix);
119
118
  };
120
119
  const buildChunk = (content, lineOffsets, chunkIndex, startOffset, endOffset, baseStartLine) => {
121
120
  const lineRange = resolveChunkLines(lineOffsets, startOffset, endOffset, baseStartLine);
@@ -150,12 +149,18 @@ const chunkByUnits = (content, lineOffsets, baseStartLine, chunkSize, overlap, u
150
149
  }
151
150
  if (candidateEndOffset - chunkStartOffset > chunkSize) {
152
151
  const oversizedUnit = units[chunkStartUnitIdx];
153
- const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedUnit.startIndex, oversizedUnit.endIndex, baseStartLine);
154
- const oversizedChunks = characterChunk(content.slice(oversizedUnit.startIndex, oversizedUnit.endIndex), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
152
+ const oversizedStartOffset = chunkStartUnitIdx === 0 && includeContainerPrefixOnFirstChunk
153
+ ? containerStartOffset
154
+ : oversizedUnit.startIndex;
155
+ const oversizedEndOffset = chunkStartUnitIdx === units.length - 1 && includeContainerSuffixOnLastChunk
156
+ ? containerEndOffset
157
+ : oversizedUnit.endIndex;
158
+ const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedStartOffset, oversizedEndOffset, baseStartLine);
159
+ const oversizedChunks = characterChunk(content.slice(oversizedStartOffset, oversizedEndOffset), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
155
160
  ...chunk,
156
161
  chunkIndex: chunks.length + offsetIdx,
157
- startOffset: chunk.startOffset + oversizedUnit.startIndex,
158
- endOffset: chunk.endOffset + oversizedUnit.startIndex,
162
+ startOffset: chunk.startOffset + oversizedStartOffset,
163
+ endOffset: chunk.endOffset + oversizedStartOffset,
159
164
  }));
160
165
  chunks.push(...oversizedChunks);
161
166
  chunkStartUnitIdx += 1;
@@ -200,7 +205,7 @@ const getDeclarationBodyNode = (node) => {
200
205
  }
201
206
  return null;
202
207
  };
203
- const collectDeclarationUnits = (bodyNode, label) => {
208
+ const collectDeclarationUnits = (bodyNode, groupFields) => {
204
209
  const members = [];
205
210
  for (let i = 0; i < bodyNode.namedChildCount; i++) {
206
211
  const child = bodyNode.namedChild(i);
@@ -209,7 +214,7 @@ const collectDeclarationUnits = (bodyNode, label) => {
209
214
  members.push({
210
215
  startIndex: child.startIndex,
211
216
  endIndex: child.endIndex,
212
- groupable: label === 'Class' && FIELD_LIKE_MEMBER_TYPES.has(child.type),
217
+ groupable: groupFields && FIELD_LIKE_MEMBER_TYPES.has(child.type),
213
218
  });
214
219
  }
215
220
  if (members.length === 0)
@@ -9,6 +9,12 @@
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
11
  import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
12
+ /**
13
+ * Bump this when the embedding text template changes in a way that should
14
+ * invalidate existing vectors, such as metadata/header shape changes,
15
+ * structural container context changes, or preceding-context formatting rules.
16
+ */
17
+ export declare const EMBEDDING_TEXT_VERSION = "v2";
12
18
  /**
13
19
  * Compute a stable content fingerprint for an embeddable node.
14
20
  * Used to detect when the underlying text has changed so stale vectors
@@ -13,10 +13,16 @@ import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady,
13
13
  import { generateEmbeddingText } from './text-generator.js';
14
14
  import { chunkNode, characterChunk } from './chunker.js';
15
15
  import { extractStructuralNames } from './structural-extractor.js';
16
- import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
16
+ import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
17
17
  import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
18
18
  import { loadVectorExtension } from '../lbug/lbug-adapter.js';
19
19
  const isDev = process.env.NODE_ENV === 'development';
20
+ /**
21
+ * Bump this when the embedding text template changes in a way that should
22
+ * invalidate existing vectors, such as metadata/header shape changes,
23
+ * structural container context changes, or preceding-context formatting rules.
24
+ */
25
+ export const EMBEDDING_TEXT_VERSION = 'v2';
20
26
  /**
21
27
  * Compute a stable content fingerprint for an embeddable node.
22
28
  * Used to detect when the underlying text has changed so stale vectors
@@ -27,8 +33,9 @@ export const contentHashForNode = (node, config = {}) => {
27
33
  // Hash must be deterministic across runs, so exclude methodNames/fieldNames
28
34
  // which are populated during the batch loop via AST extraction.
29
35
  // Using only node.content ensures the hash stays stable.
36
+ // NOTE: A change to extractStructuralNames behavior requires bumping EMBEDDING_TEXT_VERSION.
30
37
  const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
31
- return createHash('sha1').update(text).digest('hex');
38
+ return createHash('sha1').update(EMBEDDING_TEXT_VERSION).update('\n').update(text).digest('hex');
32
39
  };
33
40
  /**
34
41
  * Query all embeddable nodes from LadybugDB
@@ -39,7 +46,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
39
46
  for (const label of EMBEDDABLE_LABELS) {
40
47
  try {
41
48
  let query;
42
- if (label === 'Method') {
49
+ if (label === LABEL_METHOD) {
43
50
  // Method has parameterCount and returnType
44
51
  query = `
45
52
  MATCH (n:Method)
@@ -72,7 +79,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
72
79
  }
73
80
  const rows = await executeQuery(query);
74
81
  for (const row of rows) {
75
- const hasExportedColumn = label === 'Method' || LABELS_WITH_EXPORTED.has(label);
82
+ const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
76
83
  allNodes.push({
77
84
  id: row.id ?? row[0],
78
85
  name: row.name ?? row[1],
@@ -83,7 +90,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
83
90
  endLine: row.endLine ?? row[6],
84
91
  isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
85
92
  description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
86
- ...(label === 'Method'
93
+ ...(label === LABEL_METHOD
87
94
  ? {
88
95
  parameterCount: row.parameterCount ?? row[9],
89
96
  returnType: row.returnType ?? row[10],
@@ -301,8 +308,9 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
301
308
  chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
302
309
  }
303
310
  }
311
+ let prevTail = '';
304
312
  for (const chunk of chunks) {
305
- const text = generateEmbeddingText(node, chunk.text, finalConfig);
313
+ const text = generateEmbeddingText(node, chunk.text, finalConfig, chunk.chunkIndex, prevTail);
306
314
  allTexts.push(text);
307
315
  allUpdates.push({
308
316
  nodeId: node.id,
@@ -311,6 +319,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
311
319
  endLine: chunk.endLine,
312
320
  contentHash: hash,
313
321
  });
322
+ prevTail = overlap > 0 ? chunk.text.slice(-overlap) : '';
314
323
  }
315
324
  }
316
325
  // Embed chunk texts in sub-batches to control memory
@@ -24,7 +24,7 @@ export declare const extractDeclarationOnly: (content: string) => string;
24
24
  * Generate embedding text for any embeddable node
25
25
  * Dispatches to the appropriate generator based on node label
26
26
  */
27
- export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>) => string;
27
+ export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>, chunkIndex?: number, prevTail?: string) => string;
28
28
  /**
29
29
  * Export truncation helper for testing
30
30
  */
@@ -8,7 +8,7 @@
8
8
  * Method/field names for Class nodes are extracted by the ingestion
9
9
  * pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
10
10
  */
11
- import { DEFAULT_EMBEDDING_CONFIG, isShortLabel } from './types.js';
11
+ import { CHUNKING_RULES, DEFAULT_EMBEDDING_CONFIG, STRUCTURAL_TEXT_MODE_DECLARATION, isShortLabel, } from './types.js';
12
12
  /**
13
13
  * Truncate description to max length at sentence/word boundary
14
14
  */
@@ -71,34 +71,45 @@ const buildMetadataHeader = (node, config) => {
71
71
  }
72
72
  return parts.join('\n');
73
73
  };
74
- const generateCodeBodyText = (node, codeBody, config) => {
74
+ const generateCodeBodyText = (node, codeBody, config, prevTail) => {
75
75
  const header = buildMetadataHeader(node, config);
76
- const cleaned = cleanContent(codeBody);
77
- return `${header}\n\n${cleaned}`;
76
+ const parts = [header];
77
+ if (prevTail) {
78
+ parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
79
+ }
80
+ parts.push('', cleanContent(codeBody));
81
+ return parts.join('\n');
78
82
  };
79
- /**
80
- * Generate embedding text for Class nodes
81
- * Signature + properties + method name list only (no method bodies)
82
- * Method/field names come from AST extractors via node.methodNames/node.fieldNames.
83
- */
84
- const generateClassText = (node, codeBody, config) => {
85
- return generateStructuralTypeText(node, codeBody, config);
83
+ const getCompactContainerContext = (cleanedContent, declarationOnly) => {
84
+ const source = declarationOnly || cleanedContent;
85
+ const nlIdx = source.indexOf('\n');
86
+ const firstLine = (nlIdx === -1 ? source : source.substring(0, nlIdx)).trim();
87
+ return firstLine ? `Container: ${firstLine}` : undefined;
86
88
  };
87
- const generateStructuralTypeText = (node, codeBody, config) => {
89
+ const generateStructuralTypeText = (node, codeBody, config, chunkIndex, prevTail) => {
88
90
  const header = buildMetadataHeader(node, config);
89
91
  const parts = [header];
90
- if (node.methodNames?.length) {
92
+ const isFirstChunk = chunkIndex === undefined || chunkIndex === 0;
93
+ const cleanedContent = cleanContent(node.content);
94
+ const declarationOnly = extractDeclarationOnly(cleanedContent);
95
+ const compactContainerContext = getCompactContainerContext(cleanedContent, declarationOnly);
96
+ if (compactContainerContext) {
97
+ parts.push(compactContainerContext);
98
+ }
99
+ if (prevTail) {
100
+ parts.push(`[preceding context]: ...${cleanContent(prevTail)}`);
101
+ }
102
+ if (isFirstChunk && node.methodNames?.length) {
91
103
  parts.push(`Methods: ${node.methodNames.join(', ')}`);
92
104
  }
93
- if (node.fieldNames?.length) {
105
+ if (isFirstChunk && node.fieldNames?.length) {
94
106
  parts.push(`Properties: ${node.fieldNames.join(', ')}`);
95
107
  }
96
- const declarationOnly = extractDeclarationOnly(cleanContent(node.content));
97
- if (declarationOnly) {
108
+ if (isFirstChunk && declarationOnly) {
98
109
  parts.push('', declarationOnly);
99
110
  }
100
111
  const cleanedChunk = cleanContent(codeBody);
101
- if (cleanedChunk && cleanedChunk !== cleanContent(node.content)) {
112
+ if (cleanedChunk && cleanedChunk !== cleanedContent) {
102
113
  parts.push('', cleanedChunk);
103
114
  }
104
115
  return parts.join('\n');
@@ -179,19 +190,17 @@ export const extractDeclarationOnly = (content) => {
179
190
  * Generate embedding text for any embeddable node
180
191
  * Dispatches to the appropriate generator based on node label
181
192
  */
182
- export const generateEmbeddingText = (node, codeBody, config = {}) => {
193
+ export const generateEmbeddingText = (node, codeBody, config = {}, chunkIndex, prevTail) => {
183
194
  if (isShortLabel(node.label)) {
184
195
  const header = buildMetadataHeader(node, config);
185
196
  const cleaned = cleanContent(node.content);
186
197
  return `${header}\n\n${cleaned}`;
187
198
  }
188
- if (node.label === 'Class') {
189
- return generateClassText(node, codeBody, config);
190
- }
191
- if (node.label === 'Interface') {
192
- return generateStructuralTypeText(node, codeBody, config);
199
+ const chunkingRule = CHUNKING_RULES[node.label];
200
+ if (chunkingRule?.structuralTextMode === STRUCTURAL_TEXT_MODE_DECLARATION) {
201
+ return generateStructuralTypeText(node, codeBody, config, chunkIndex, prevTail);
193
202
  }
194
- return generateCodeBodyText(node, codeBody, config);
203
+ return generateCodeBodyText(node, codeBody, config, prevTail);
195
204
  };
196
205
  /**
197
206
  * Export truncation helper for testing
@@ -3,6 +3,38 @@
3
3
  *
4
4
  * Type definitions for the embedding generation and semantic search system.
5
5
  */
6
+ export declare const LABEL_FUNCTION: "Function";
7
+ export declare const LABEL_METHOD: "Method";
8
+ export declare const LABEL_CONSTRUCTOR: "Constructor";
9
+ export declare const LABEL_CLASS: "Class";
10
+ export declare const LABEL_INTERFACE: "Interface";
11
+ export declare const LABEL_STRUCT: "Struct";
12
+ export declare const LABEL_ENUM: "Enum";
13
+ export declare const LABEL_TRAIT: "Trait";
14
+ export declare const LABEL_IMPL: "Impl";
15
+ export declare const LABEL_MACRO: "Macro";
16
+ export declare const LABEL_NAMESPACE: "Namespace";
17
+ export declare const LABEL_TYPE_ALIAS: "TypeAlias";
18
+ export declare const LABEL_TYPEDEF: "Typedef";
19
+ export declare const LABEL_CONST: "Const";
20
+ export declare const LABEL_PROPERTY: "Property";
21
+ export declare const LABEL_RECORD: "Record";
22
+ export declare const LABEL_UNION: "Union";
23
+ export declare const LABEL_STATIC: "Static";
24
+ export declare const LABEL_VARIABLE: "Variable";
25
+ export declare const LABEL_CODE_ELEMENT: "CodeElement";
26
+ export declare const CHUNK_MODE_AST_FUNCTION: "ast-function";
27
+ export declare const CHUNK_MODE_AST_DECLARATION: "ast-declaration";
28
+ export declare const CHUNK_MODE_CHARACTER: "character";
29
+ export declare const STRUCTURAL_TEXT_MODE_NONE: "none";
30
+ export declare const STRUCTURAL_TEXT_MODE_DECLARATION: "declaration";
31
+ export interface ChunkingRule {
32
+ mode: typeof CHUNK_MODE_AST_FUNCTION | typeof CHUNK_MODE_AST_DECLARATION | typeof CHUNK_MODE_CHARACTER;
33
+ includePrefix: boolean;
34
+ includeSuffix: boolean;
35
+ groupFields: boolean;
36
+ structuralTextMode: typeof STRUCTURAL_TEXT_MODE_NONE | typeof STRUCTURAL_TEXT_MODE_DECLARATION;
37
+ }
6
38
  /**
7
39
  * Node labels that need chunking (have code body, potentially long)
8
40
  */
@@ -29,13 +61,22 @@ export declare const isChunkableLabel: (label: string) => boolean;
29
61
  */
30
62
  export declare const isShortLabel: (label: string) => boolean;
31
63
  /**
32
- * Node labels that have structural names (methods/fields) extractable via AST
64
+ * Node labels that have structural names (methods/fields) extractable via AST.
65
+ * Only labels that consume methodNames/fieldNames in their embedding text should
66
+ * be listed here — extra entries trigger wasted AST parses with no effect on output.
33
67
  */
34
68
  export declare const STRUCTURAL_LABELS: ReadonlySet<string>;
35
69
  /**
36
70
  * Node labels that have isExported column in their schema
37
71
  */
38
72
  export declare const LABELS_WITH_EXPORTED: ReadonlySet<string>;
73
+ /**
74
+ * Labels that need special chunking and/or structural text semantics.
75
+ * Any chunkable label omitted here intentionally falls back to characterChunk
76
+ * plus generateCodeBodyText (for example Enum/Trait/Impl/Macro/Namespace).
77
+ */
78
+ type ChunkableLabel = (typeof CHUNKABLE_LABELS)[number];
79
+ export declare const CHUNKING_RULES: Readonly<Partial<Record<ChunkableLabel, ChunkingRule>>>;
39
80
  /**
40
81
  * Embedding pipeline phases
41
82
  */
@@ -163,3 +204,4 @@ export declare const dedupBestChunks: (rows: ChunkSearchRow[], limit?: number) =
163
204
  * or can tell the result set is exhausted.
164
205
  */
165
206
  export declare const collectBestChunks: (limit: number, fetchRows: (fetchLimit: number) => Promise<ChunkSearchRow[]>, maxFetch?: number) => Promise<Map<string, BestChunkMatch>>;
207
+ export {};
@@ -3,34 +3,61 @@
3
3
  *
4
4
  * Type definitions for the embedding generation and semantic search system.
5
5
  */
6
+ export const LABEL_FUNCTION = 'Function';
7
+ export const LABEL_METHOD = 'Method';
8
+ export const LABEL_CONSTRUCTOR = 'Constructor';
9
+ export const LABEL_CLASS = 'Class';
10
+ export const LABEL_INTERFACE = 'Interface';
11
+ export const LABEL_STRUCT = 'Struct';
12
+ export const LABEL_ENUM = 'Enum';
13
+ export const LABEL_TRAIT = 'Trait';
14
+ export const LABEL_IMPL = 'Impl';
15
+ export const LABEL_MACRO = 'Macro';
16
+ export const LABEL_NAMESPACE = 'Namespace';
17
+ export const LABEL_TYPE_ALIAS = 'TypeAlias';
18
+ export const LABEL_TYPEDEF = 'Typedef';
19
+ export const LABEL_CONST = 'Const';
20
+ export const LABEL_PROPERTY = 'Property';
21
+ export const LABEL_RECORD = 'Record';
22
+ export const LABEL_UNION = 'Union';
23
+ export const LABEL_STATIC = 'Static';
24
+ export const LABEL_VARIABLE = 'Variable';
25
+ export const LABEL_CODE_ELEMENT = 'CodeElement';
26
+ export const CHUNK_MODE_AST_FUNCTION = 'ast-function';
27
+ export const CHUNK_MODE_AST_DECLARATION = 'ast-declaration';
28
+ // CHUNK_MODE_CHARACTER exists for type completeness but is a no-op in CHUNKING_RULES —
29
+ // omit the entry entirely to get character fallback via chunker.ts dispatch.
30
+ export const CHUNK_MODE_CHARACTER = 'character';
31
+ export const STRUCTURAL_TEXT_MODE_NONE = 'none';
32
+ export const STRUCTURAL_TEXT_MODE_DECLARATION = 'declaration';
6
33
  /**
7
34
  * Node labels that need chunking (have code body, potentially long)
8
35
  */
9
36
  export const CHUNKABLE_LABELS = [
10
- 'Function',
11
- 'Method',
12
- 'Constructor',
13
- 'Class',
14
- 'Interface',
15
- 'Struct',
16
- 'Enum',
17
- 'Trait',
18
- 'Impl',
19
- 'Macro',
20
- 'Namespace',
37
+ LABEL_FUNCTION,
38
+ LABEL_METHOD,
39
+ LABEL_CONSTRUCTOR,
40
+ LABEL_CLASS,
41
+ LABEL_INTERFACE,
42
+ LABEL_STRUCT,
43
+ LABEL_ENUM,
44
+ LABEL_TRAIT,
45
+ LABEL_IMPL,
46
+ LABEL_MACRO,
47
+ LABEL_NAMESPACE,
21
48
  ];
22
49
  /**
23
50
  * Node labels that are short (no chunking needed, embed directly)
24
51
  */
25
52
  export const SHORT_LABELS = [
26
- 'TypeAlias',
27
- 'Typedef',
28
- 'Const',
29
- 'Property',
30
- 'Record',
31
- 'Union',
32
- 'Static',
33
- 'Variable',
53
+ LABEL_TYPE_ALIAS,
54
+ LABEL_TYPEDEF,
55
+ LABEL_CONST,
56
+ LABEL_PROPERTY,
57
+ LABEL_RECORD,
58
+ LABEL_UNION,
59
+ LABEL_STATIC,
60
+ LABEL_VARIABLE,
34
61
  ];
35
62
  /**
36
63
  * All embeddable labels (union of CHUNKABLE + SHORT)
@@ -49,24 +76,69 @@ export const isChunkableLabel = (label) => CHUNKABLE_LABELS.includes(label);
49
76
  */
50
77
  export const isShortLabel = (label) => SHORT_LABELS.includes(label);
51
78
  /**
52
- * Node labels that have structural names (methods/fields) extractable via AST
79
+ * Node labels that have structural names (methods/fields) extractable via AST.
80
+ * Only labels that consume methodNames/fieldNames in their embedding text should
81
+ * be listed here — extra entries trigger wasted AST parses with no effect on output.
53
82
  */
54
83
  export const STRUCTURAL_LABELS = new Set([
55
- 'Class',
56
- 'Struct',
57
- 'Interface',
58
- 'Enum',
84
+ LABEL_CLASS,
85
+ LABEL_STRUCT,
86
+ LABEL_INTERFACE,
59
87
  ]);
60
88
  /**
61
89
  * Node labels that have isExported column in their schema
62
90
  */
63
91
  export const LABELS_WITH_EXPORTED = new Set([
64
- 'Function',
65
- 'Class',
66
- 'Interface',
67
- 'Method',
68
- 'CodeElement',
92
+ LABEL_FUNCTION,
93
+ LABEL_CLASS,
94
+ LABEL_INTERFACE,
95
+ LABEL_METHOD,
96
+ LABEL_CODE_ELEMENT,
69
97
  ]);
98
+ export const CHUNKING_RULES = {
99
+ [LABEL_FUNCTION]: {
100
+ mode: CHUNK_MODE_AST_FUNCTION,
101
+ includePrefix: true,
102
+ includeSuffix: true,
103
+ groupFields: false,
104
+ structuralTextMode: STRUCTURAL_TEXT_MODE_NONE,
105
+ },
106
+ [LABEL_METHOD]: {
107
+ mode: CHUNK_MODE_AST_FUNCTION,
108
+ includePrefix: true,
109
+ includeSuffix: true,
110
+ groupFields: false,
111
+ structuralTextMode: STRUCTURAL_TEXT_MODE_NONE,
112
+ },
113
+ [LABEL_CONSTRUCTOR]: {
114
+ mode: CHUNK_MODE_AST_FUNCTION,
115
+ includePrefix: true,
116
+ includeSuffix: true,
117
+ groupFields: false,
118
+ structuralTextMode: STRUCTURAL_TEXT_MODE_NONE,
119
+ },
120
+ [LABEL_CLASS]: {
121
+ mode: CHUNK_MODE_AST_DECLARATION,
122
+ includePrefix: true,
123
+ includeSuffix: false,
124
+ groupFields: true,
125
+ structuralTextMode: STRUCTURAL_TEXT_MODE_DECLARATION,
126
+ },
127
+ [LABEL_INTERFACE]: {
128
+ mode: CHUNK_MODE_AST_DECLARATION,
129
+ includePrefix: true,
130
+ includeSuffix: false,
131
+ groupFields: false,
132
+ structuralTextMode: STRUCTURAL_TEXT_MODE_DECLARATION,
133
+ },
134
+ [LABEL_STRUCT]: {
135
+ mode: CHUNK_MODE_AST_DECLARATION,
136
+ includePrefix: true,
137
+ includeSuffix: false,
138
+ groupFields: true,
139
+ structuralTextMode: STRUCTURAL_TEXT_MODE_DECLARATION,
140
+ },
141
+ };
70
142
  /**
71
143
  * Default embedding configuration
72
144
  * Uses snowflake-arctic-embed-xs for browser efficiency
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.3-rc.21",
3
+ "version": "1.6.3-rc.22",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",