gitnexus 1.4.7 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +22 -1
  2. package/dist/cli/ai-context.d.ts +1 -1
  3. package/dist/cli/ai-context.js +1 -1
  4. package/dist/cli/analyze.d.ts +2 -0
  5. package/dist/cli/analyze.js +54 -21
  6. package/dist/cli/index.js +2 -1
  7. package/dist/cli/setup.js +78 -1
  8. package/dist/config/supported-languages.d.ts +30 -0
  9. package/dist/config/supported-languages.js +30 -0
  10. package/dist/core/embeddings/embedder.d.ts +6 -1
  11. package/dist/core/embeddings/embedder.js +65 -5
  12. package/dist/core/embeddings/embedding-pipeline.js +11 -9
  13. package/dist/core/embeddings/http-client.d.ts +31 -0
  14. package/dist/core/embeddings/http-client.js +179 -0
  15. package/dist/core/embeddings/index.d.ts +1 -0
  16. package/dist/core/embeddings/index.js +1 -0
  17. package/dist/core/embeddings/types.d.ts +1 -1
  18. package/dist/core/graph/types.d.ts +2 -1
  19. package/dist/core/ingestion/ast-helpers.d.ts +80 -0
  20. package/dist/core/ingestion/ast-helpers.js +738 -0
  21. package/dist/core/ingestion/call-analysis.d.ts +73 -0
  22. package/dist/core/ingestion/call-analysis.js +490 -0
  23. package/dist/core/ingestion/call-processor.d.ts +48 -1
  24. package/dist/core/ingestion/call-processor.js +368 -7
  25. package/dist/core/ingestion/call-routing.d.ts +6 -0
  26. package/dist/core/ingestion/entry-point-scoring.js +36 -26
  27. package/dist/core/ingestion/framework-detection.d.ts +10 -2
  28. package/dist/core/ingestion/framework-detection.js +49 -12
  29. package/dist/core/ingestion/heritage-processor.js +47 -49
  30. package/dist/core/ingestion/import-processor.d.ts +1 -1
  31. package/dist/core/ingestion/import-processor.js +103 -194
  32. package/dist/core/ingestion/import-resolution.d.ts +101 -0
  33. package/dist/core/ingestion/import-resolution.js +251 -0
  34. package/dist/core/ingestion/language-config.d.ts +3 -0
  35. package/dist/core/ingestion/language-config.js +13 -0
  36. package/dist/core/ingestion/markdown-processor.d.ts +17 -0
  37. package/dist/core/ingestion/markdown-processor.js +124 -0
  38. package/dist/core/ingestion/mro-processor.js +8 -3
  39. package/dist/core/ingestion/named-binding-extraction.d.ts +9 -43
  40. package/dist/core/ingestion/named-binding-extraction.js +89 -79
  41. package/dist/core/ingestion/parsing-processor.d.ts +2 -2
  42. package/dist/core/ingestion/parsing-processor.js +14 -73
  43. package/dist/core/ingestion/pipeline.d.ts +10 -0
  44. package/dist/core/ingestion/pipeline.js +421 -4
  45. package/dist/core/ingestion/resolution-context.d.ts +5 -0
  46. package/dist/core/ingestion/resolution-context.js +7 -4
  47. package/dist/core/ingestion/resolvers/index.d.ts +1 -1
  48. package/dist/core/ingestion/resolvers/index.js +1 -1
  49. package/dist/core/ingestion/resolvers/jvm.d.ts +2 -1
  50. package/dist/core/ingestion/resolvers/jvm.js +25 -9
  51. package/dist/core/ingestion/resolvers/php.d.ts +14 -0
  52. package/dist/core/ingestion/resolvers/php.js +43 -3
  53. package/dist/core/ingestion/resolvers/utils.d.ts +5 -0
  54. package/dist/core/ingestion/resolvers/utils.js +16 -0
  55. package/dist/core/ingestion/symbol-table.d.ts +16 -0
  56. package/dist/core/ingestion/symbol-table.js +20 -6
  57. package/dist/core/ingestion/tree-sitter-queries.d.ts +4 -4
  58. package/dist/core/ingestion/tree-sitter-queries.js +43 -2
  59. package/dist/core/ingestion/type-env.d.ts +28 -1
  60. package/dist/core/ingestion/type-env.js +419 -96
  61. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +5 -0
  62. package/dist/core/ingestion/type-extractors/c-cpp.js +119 -0
  63. package/dist/core/ingestion/type-extractors/csharp.js +149 -16
  64. package/dist/core/ingestion/type-extractors/index.d.ts +1 -1
  65. package/dist/core/ingestion/type-extractors/index.js +1 -1
  66. package/dist/core/ingestion/type-extractors/jvm.js +169 -66
  67. package/dist/core/ingestion/type-extractors/rust.js +35 -1
  68. package/dist/core/ingestion/type-extractors/shared.d.ts +0 -2
  69. package/dist/core/ingestion/type-extractors/shared.js +5 -10
  70. package/dist/core/ingestion/type-extractors/swift.js +7 -6
  71. package/dist/core/ingestion/type-extractors/types.d.ts +37 -7
  72. package/dist/core/ingestion/type-extractors/typescript.js +141 -9
  73. package/dist/core/ingestion/utils.d.ts +2 -120
  74. package/dist/core/ingestion/utils.js +3 -1051
  75. package/dist/core/ingestion/workers/parse-worker.d.ts +13 -4
  76. package/dist/core/ingestion/workers/parse-worker.js +66 -87
  77. package/dist/core/lbug/csv-generator.js +18 -1
  78. package/dist/core/lbug/lbug-adapter.d.ts +10 -0
  79. package/dist/core/lbug/lbug-adapter.js +69 -4
  80. package/dist/core/lbug/schema.d.ts +5 -3
  81. package/dist/core/lbug/schema.js +26 -2
  82. package/dist/mcp/core/embedder.js +11 -3
  83. package/dist/mcp/core/lbug-adapter.js +12 -1
  84. package/dist/mcp/local/local-backend.d.ts +22 -0
  85. package/dist/mcp/local/local-backend.js +133 -29
  86. package/dist/mcp/resources.js +2 -0
  87. package/dist/mcp/tools.js +2 -2
  88. package/dist/server/api.d.ts +19 -1
  89. package/dist/server/api.js +66 -6
  90. package/dist/storage/git.d.ts +12 -0
  91. package/dist/storage/git.js +21 -0
  92. package/package.json +10 -2
@@ -0,0 +1,179 @@
1
+ /**
2
+ * HTTP Embedding Client
3
+ *
4
+ * Shared fetch+retry logic for OpenAI-compatible /v1/embeddings endpoints.
5
+ * Imported by both the core embedder (batch) and MCP embedder (query).
6
+ */
7
+ const HTTP_TIMEOUT_MS = 30_000;
8
+ const HTTP_MAX_RETRIES = 2;
9
+ const HTTP_RETRY_BACKOFF_MS = 1_000;
10
+ const HTTP_BATCH_SIZE = 64;
11
+ const DEFAULT_DIMS = 384;
12
+ /**
13
+ * Build config from the current process.env snapshot.
14
+ * Returns null when GITNEXUS_EMBEDDING_URL + GITNEXUS_EMBEDDING_MODEL are unset.
15
+ * Not cached — env vars are read fresh so late configuration takes effect.
16
+ */
17
+ const readConfig = () => {
18
+ const baseUrl = process.env.GITNEXUS_EMBEDDING_URL;
19
+ const model = process.env.GITNEXUS_EMBEDDING_MODEL;
20
+ if (!baseUrl || !model)
21
+ return null;
22
+ const rawDims = process.env.GITNEXUS_EMBEDDING_DIMS;
23
+ let dimensions;
24
+ if (rawDims !== undefined) {
25
+ const parsed = parseInt(rawDims, 10);
26
+ if (Number.isNaN(parsed) || parsed <= 0) {
27
+ throw new Error(`GITNEXUS_EMBEDDING_DIMS must be a positive integer, got "${rawDims}"`);
28
+ }
29
+ dimensions = parsed;
30
+ }
31
+ return {
32
+ baseUrl: baseUrl.replace(/\/+$/, ''),
33
+ model,
34
+ apiKey: process.env.GITNEXUS_EMBEDDING_API_KEY ?? 'unused',
35
+ dimensions,
36
+ };
37
+ };
38
+ /**
39
+ * Check whether HTTP embedding mode is active (env vars are set).
40
+ */
41
+ export const isHttpMode = () => readConfig() !== null;
42
+ /**
43
+ * Return the configured embedding dimensions for HTTP mode, or undefined
44
+ * if HTTP mode is not active or no explicit dimensions are set.
45
+ */
46
+ export const getHttpDimensions = () => readConfig()?.dimensions;
47
+ /**
48
+ * Return a safe representation of a URL for error messages.
49
+ * Strips query string (may contain tokens) and userinfo.
50
+ */
51
+ const safeUrl = (url) => {
52
+ try {
53
+ const u = new URL(url);
54
+ return `${u.protocol}//${u.host}${u.pathname}`;
55
+ }
56
+ catch {
57
+ return '<invalid-url>';
58
+ }
59
+ };
60
+ /**
61
+ * Send a single batch of texts to the embedding endpoint with retry.
62
+ *
63
+ * @param url - Full endpoint URL (e.g. https://host/v1/embeddings)
64
+ * @param batch - Texts to embed
65
+ * @param model - Model name for the request body
66
+ * @param apiKey - Bearer token (only used in Authorization header)
67
+ * @param batchIndex - Logical batch number (for error context)
68
+ * @param attempt - Current retry attempt (internal)
69
+ */
70
+ const httpEmbedBatch = async (url, batch, model, apiKey, batchIndex = 0, attempt = 0) => {
71
+ let resp;
72
+ try {
73
+ resp = await fetch(url, {
74
+ method: 'POST',
75
+ signal: AbortSignal.timeout(HTTP_TIMEOUT_MS),
76
+ headers: {
77
+ 'Content-Type': 'application/json',
78
+ 'Authorization': `Bearer ${apiKey}`,
79
+ },
80
+ body: JSON.stringify({ input: batch, model }),
81
+ });
82
+ }
83
+ catch (err) {
84
+ // Timeouts should not be retried — the server is unresponsive.
85
+ // AbortSignal.timeout() throws DOMException with name 'TimeoutError'.
86
+ const isTimeout = err instanceof DOMException && err.name === 'TimeoutError';
87
+ if (isTimeout) {
88
+ throw new Error(`Embedding request timed out after ${HTTP_TIMEOUT_MS}ms (${safeUrl(url)}, batch ${batchIndex})`);
89
+ }
90
+ // DNS, connection errors — retry with backoff
91
+ if (attempt < HTTP_MAX_RETRIES) {
92
+ const delay = HTTP_RETRY_BACKOFF_MS * (attempt + 1);
93
+ await new Promise(r => setTimeout(r, delay));
94
+ return httpEmbedBatch(url, batch, model, apiKey, batchIndex, attempt + 1);
95
+ }
96
+ const reason = err instanceof Error ? err.message : String(err);
97
+ throw new Error(`Embedding request failed (${safeUrl(url)}, batch ${batchIndex}): ${reason}`);
98
+ }
99
+ if (!resp.ok) {
100
+ const status = resp.status;
101
+ if ((status === 429 || status >= 500) && attempt < HTTP_MAX_RETRIES) {
102
+ const delay = HTTP_RETRY_BACKOFF_MS * (attempt + 1);
103
+ await new Promise(r => setTimeout(r, delay));
104
+ return httpEmbedBatch(url, batch, model, apiKey, batchIndex, attempt + 1);
105
+ }
106
+ throw new Error(`Embedding endpoint returned ${status} (${safeUrl(url)}, batch ${batchIndex})`);
107
+ }
108
+ const data = (await resp.json());
109
+ return data.data;
110
+ };
111
+ /**
112
+ * Embed texts via the HTTP backend, splitting into batches.
113
+ * Reads config from env vars on every call.
114
+ *
115
+ * @param texts - Array of texts to embed
116
+ * @returns Array of Float32Array embedding vectors
117
+ */
118
+ export const httpEmbed = async (texts) => {
119
+ if (texts.length === 0)
120
+ return [];
121
+ const config = readConfig();
122
+ if (!config)
123
+ throw new Error('HTTP embedding not configured');
124
+ const url = `${config.baseUrl}/embeddings`;
125
+ const allVectors = [];
126
+ for (let i = 0; i < texts.length; i += HTTP_BATCH_SIZE) {
127
+ const batch = texts.slice(i, i + HTTP_BATCH_SIZE);
128
+ const batchIndex = Math.floor(i / HTTP_BATCH_SIZE);
129
+ const items = await httpEmbedBatch(url, batch, config.model, config.apiKey, batchIndex);
130
+ if (items.length !== batch.length) {
131
+ throw new Error(`Embedding endpoint returned ${items.length} vectors for ${batch.length} texts ` +
132
+ `(${safeUrl(url)}, batch ${batchIndex})`);
133
+ }
134
+ for (const item of items) {
135
+ const vec = new Float32Array(item.embedding);
136
+ // Fail fast on dimension mismatch rather than inserting bad vectors
137
+ // into the FLOAT[N] column which would cause a cryptic Kuzu error.
138
+ const expected = config.dimensions ?? DEFAULT_DIMS;
139
+ if (vec.length !== expected) {
140
+ const hint = config.dimensions
141
+ ? 'Update GITNEXUS_EMBEDDING_DIMS to match your model output.'
142
+ : `Set GITNEXUS_EMBEDDING_DIMS=${vec.length} to match your model output.`;
143
+ throw new Error(`Embedding dimension mismatch: endpoint returned ${vec.length}d vector, ` +
144
+ `but expected ${expected}d. ${hint}`);
145
+ }
146
+ allVectors.push(vec);
147
+ }
148
+ }
149
+ return allVectors;
150
+ };
151
+ /**
152
+ * Embed a single query text via the HTTP backend.
153
+ * Convenience for MCP search where only one vector is needed.
154
+ *
155
+ * @param text - Query text to embed
156
+ * @returns Embedding vector as number array
157
+ */
158
+ export const httpEmbedQuery = async (text) => {
159
+ const config = readConfig();
160
+ if (!config)
161
+ throw new Error('HTTP embedding not configured');
162
+ const url = `${config.baseUrl}/embeddings`;
163
+ const items = await httpEmbedBatch(url, [text], config.model, config.apiKey);
164
+ if (!items.length) {
165
+ throw new Error(`Embedding endpoint returned empty response (${safeUrl(url)})`);
166
+ }
167
+ const embedding = items[0].embedding;
168
+ // Same dimension checks as httpEmbed — catch mismatches before they
169
+ // reach the Kuzu FLOAT[N] cast in search queries.
170
+ const expected = config.dimensions ?? DEFAULT_DIMS;
171
+ if (embedding.length !== expected) {
172
+ const hint = config.dimensions
173
+ ? 'Update GITNEXUS_EMBEDDING_DIMS to match your model output.'
174
+ : `Set GITNEXUS_EMBEDDING_DIMS=${embedding.length} to match your model output.`;
175
+ throw new Error(`Embedding dimension mismatch: endpoint returned ${embedding.length}d vector, ` +
176
+ `but expected ${expected}d. ${hint}`);
177
+ }
178
+ return embedding;
179
+ };
@@ -4,6 +4,7 @@
4
4
  * Re-exports for the embedding pipeline system.
5
5
  */
6
6
  export * from './types.js';
7
+ export * from './http-client.js';
7
8
  export * from './embedder.js';
8
9
  export * from './text-generator.js';
9
10
  export * from './embedding-pipeline.js';
@@ -4,6 +4,7 @@
4
4
  * Re-exports for the embedding pipeline system.
5
5
  */
6
6
  export * from './types.js';
7
+ export * from './http-client.js';
7
8
  export * from './embedder.js';
8
9
  export * from './text-generator.js';
9
10
  export * from './embedding-pipeline.js';
@@ -34,7 +34,7 @@ export interface EmbeddingProgress {
34
34
  * Configuration for the embedding pipeline
35
35
  */
36
36
  export interface EmbeddingConfig {
37
- /** Model identifier for transformers.js */
37
+ /** Model identifier for transformers.js (local) or the HTTP endpoint model name */
38
38
  modelId: string;
39
39
  /** Number of nodes to embed in each batch */
40
40
  batchSize: number;
@@ -1,4 +1,4 @@
1
- export type NodeLabel = 'Project' | 'Package' | 'Module' | 'Folder' | 'File' | 'Class' | 'Function' | 'Method' | 'Variable' | 'Interface' | 'Enum' | 'Decorator' | 'Import' | 'Type' | 'CodeElement' | 'Community' | 'Process' | 'Struct' | 'Macro' | 'Typedef' | 'Union' | 'Namespace' | 'Trait' | 'Impl' | 'TypeAlias' | 'Const' | 'Static' | 'Property' | 'Record' | 'Delegate' | 'Annotation' | 'Constructor' | 'Template';
1
+ export type NodeLabel = 'Project' | 'Package' | 'Module' | 'Folder' | 'File' | 'Class' | 'Function' | 'Method' | 'Variable' | 'Interface' | 'Enum' | 'Decorator' | 'Import' | 'Type' | 'CodeElement' | 'Community' | 'Process' | 'Struct' | 'Macro' | 'Typedef' | 'Union' | 'Namespace' | 'Trait' | 'Impl' | 'TypeAlias' | 'Const' | 'Static' | 'Property' | 'Record' | 'Delegate' | 'Annotation' | 'Constructor' | 'Template' | 'Section';
2
2
  import { SupportedLanguages } from '../../config/supported-languages.js';
3
3
  export type NodeProperties = {
4
4
  name: string;
@@ -23,6 +23,7 @@ export type NodeProperties = {
23
23
  entryPointScore?: number;
24
24
  entryPointReason?: string;
25
25
  parameterCount?: number;
26
+ level?: number;
26
27
  returnType?: string;
27
28
  };
28
29
  export type RelationshipType = 'CONTAINS' | 'CALLS' | 'INHERITS' | 'OVERRIDES' | 'IMPORTS' | 'USES' | 'DEFINES' | 'DECORATES' | 'IMPLEMENTS' | 'EXTENDS' | 'HAS_METHOD' | 'HAS_PROPERTY' | 'ACCESSES' | 'MEMBER_OF' | 'STEP_IN_PROCESS';
@@ -0,0 +1,80 @@
1
+ import type Parser from 'tree-sitter';
2
+ import { SupportedLanguages } from '../../config/supported-languages.js';
3
+ import type { NodeLabel } from '../graph/types.js';
4
+ /** Tree-sitter AST node. Re-exported for use across ingestion modules. */
5
+ export type SyntaxNode = Parser.SyntaxNode;
6
+ /**
7
+ * Ordered list of definition capture keys for tree-sitter query matches.
8
+ * Used to extract the definition node from a capture map.
9
+ */
10
+ export declare const DEFINITION_CAPTURE_KEYS: readonly ["definition.function", "definition.class", "definition.interface", "definition.method", "definition.struct", "definition.enum", "definition.namespace", "definition.module", "definition.trait", "definition.impl", "definition.type", "definition.const", "definition.static", "definition.typedef", "definition.macro", "definition.union", "definition.property", "definition.record", "definition.delegate", "definition.annotation", "definition.constructor", "definition.template"];
11
+ /** Extract the definition node from a tree-sitter query capture map. */
12
+ export declare const getDefinitionNodeFromCaptures: (captureMap: Record<string, any>) => SyntaxNode | null;
13
+ /**
14
+ * Node types that represent function/method definitions across languages.
15
+ * Used to find the enclosing function for a call site.
16
+ */
17
+ export declare const FUNCTION_NODE_TYPES: Set<string>;
18
+ /**
19
+ * Node types for standard function declarations that need C/C++ declarator handling.
20
+ * Used by extractFunctionName to determine how to extract the function name.
21
+ */
22
+ export declare const FUNCTION_DECLARATION_TYPES: Set<string>;
23
+ /** AST node types that represent a class-like container (for HAS_METHOD edge extraction) */
24
+ export declare const CLASS_CONTAINER_TYPES: Set<string>;
25
+ export declare const CONTAINER_TYPE_TO_LABEL: Record<string, string>;
26
+ /** Check if a Kotlin function_declaration capture is inside a class_body (i.e., a method).
27
+ * Kotlin grammar uses function_declaration for both top-level functions and class methods.
28
+ * Returns true when the captured definition node has a class_body ancestor. */
29
+ export declare function isKotlinClassMethod(captureNode: {
30
+ parent?: any;
31
+ } | null | undefined): boolean;
32
+ /**
33
+ * C/C++: check if a Function capture is inside a class/struct body.
34
+ * If true, the function is already captured by @definition.method and should be skipped
35
+ * to prevent double-indexing in globalIndex.
36
+ */
37
+ export declare function isCppDuplicateClassFunction(functionNode: {
38
+ parent?: any;
39
+ } | null | undefined, nodeLabel: string, language: SupportedLanguages): boolean;
40
+ /**
41
+ * Determine the graph node label from a tree-sitter capture map.
42
+ * Handles language-specific reclassification (C/C++ duplicate skipping, Kotlin Method promotion).
43
+ * Returns null if the capture should be skipped (import, call, C/C++ duplicate, missing name).
44
+ */
45
+ export declare function getLabelFromCaptures(captureMap: Record<string, any>, language: SupportedLanguages): NodeLabel | null;
46
+ /** Walk up AST to find enclosing class/struct/interface/impl, return its generateId or null.
47
+ * For Go method_declaration nodes, extracts receiver type (e.g. `func (u *User) Save()` → User struct). */
48
+ export declare const findEnclosingClassId: (node: any, filePath: string) => string | null;
49
+ /**
50
+ * Find a child of `childType` within a sibling node of `siblingType`.
51
+ * Used for Kotlin AST traversal where visibility_modifier lives inside a modifiers sibling.
52
+ */
53
+ export declare const findSiblingChild: (parent: any, siblingType: string, childType: string) => any | null;
54
+ /**
55
+ * Extract function name and label from a function_definition or similar AST node.
56
+ * Handles C/C++ qualified_identifier (ClassName::MethodName) and other language patterns.
57
+ */
58
+ export declare const extractFunctionName: (node: SyntaxNode) => {
59
+ funcName: string | null;
60
+ label: string;
61
+ };
62
+ export interface MethodSignature {
63
+ parameterCount: number | undefined;
64
+ /** Number of required (non-optional, non-default) parameters.
65
+ * Only set when fewer than parameterCount — enables range-based arity filtering.
66
+ * undefined means all parameters are required (or metadata unavailable). */
67
+ requiredParameterCount: number | undefined;
68
+ /** Per-parameter type names extracted via extractSimpleTypeName.
69
+ * Only populated for languages with method overloading (Java, Kotlin, C#, C++).
70
+ * undefined (not []) when no types are extractable — avoids empty array allocations. */
71
+ parameterTypes: string[] | undefined;
72
+ returnType: string | undefined;
73
+ }
74
+ /** Argument list node types shared between extractMethodSignature and countCallArguments. */
75
+ export declare const CALL_ARGUMENT_LIST_TYPES: Set<string>;
76
+ /**
77
+ * Extract parameter count and return type text from an AST method/function node.
78
+ * Works across languages by looking for common AST patterns.
79
+ */
80
+ export declare const extractMethodSignature: (node: SyntaxNode | null | undefined) => MethodSignature;