@veewo/gitnexus 1.5.0-rc.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/benchmark/analyze-runner.d.ts +1 -1
  2. package/dist/benchmark/analyze-runner.js +4 -3
  3. package/dist/benchmark/analyze-runner.test.js +7 -0
  4. package/dist/cli/ai-context.d.ts +0 -1
  5. package/dist/cli/ai-context.js +15 -6
  6. package/dist/cli/analyze-options.js +58 -34
  7. package/dist/cli/analyze-options.test.js +57 -0
  8. package/dist/cli/analyze-runtime-summary.js +1 -0
  9. package/dist/cli/analyze-runtime-summary.test.js +10 -0
  10. package/dist/cli/analyze-summary.d.ts +2 -0
  11. package/dist/cli/analyze-summary.js +19 -0
  12. package/dist/cli/analyze.d.ts +11 -0
  13. package/dist/cli/analyze.js +30 -5
  14. package/dist/cli/analyze.test.d.ts +1 -0
  15. package/dist/cli/analyze.test.js +25 -0
  16. package/dist/cli/benchmark-agent-context.js +1 -1
  17. package/dist/cli/benchmark-unity.js +1 -1
  18. package/dist/cli/benchmark-unity.test.js +5 -1
  19. package/dist/cli/index.js +4 -2
  20. package/dist/cli/scope-manifest-config.d.ts +9 -0
  21. package/dist/cli/scope-manifest-config.js +37 -0
  22. package/dist/cli/setup.js +40 -41
  23. package/dist/cli/setup.test.js +14 -14
  24. package/dist/cli/sync-manifest.d.ts +27 -0
  25. package/dist/cli/sync-manifest.js +200 -0
  26. package/dist/cli/sync-manifest.test.d.ts +1 -0
  27. package/dist/cli/sync-manifest.test.js +88 -0
  28. package/dist/core/config/unity-config.d.ts +1 -0
  29. package/dist/core/config/unity-config.js +1 -0
  30. package/dist/core/ingestion/call-processor.d.ts +2 -1
  31. package/dist/core/ingestion/call-processor.js +28 -6
  32. package/dist/core/ingestion/heritage-processor.d.ts +2 -1
  33. package/dist/core/ingestion/heritage-processor.js +30 -7
  34. package/dist/core/ingestion/import-processor.d.ts +2 -1
  35. package/dist/core/ingestion/import-processor.js +28 -6
  36. package/dist/core/ingestion/parsing-processor.d.ts +5 -3
  37. package/dist/core/ingestion/parsing-processor.js +46 -13
  38. package/dist/core/ingestion/pipeline.js +65 -13
  39. package/dist/core/ingestion/unity-runtime-binding-rules.d.ts +1 -1
  40. package/dist/core/ingestion/unity-runtime-binding-rules.js +21 -18
  41. package/dist/core/ingestion/workers/parse-worker.d.ts +2 -0
  42. package/dist/core/ingestion/workers/parse-worker.js +50 -6
  43. package/dist/core/tree-sitter/csharp-define-profile.d.ts +6 -0
  44. package/dist/core/tree-sitter/csharp-define-profile.js +43 -0
  45. package/dist/core/tree-sitter/csharp-preproc-normalizer.d.ts +14 -0
  46. package/dist/core/tree-sitter/csharp-preproc-normalizer.js +261 -0
  47. package/dist/core/tree-sitter/parser-loader.d.ts +10 -0
  48. package/dist/core/tree-sitter/parser-loader.js +19 -0
  49. package/dist/types/pipeline.d.ts +13 -0
  50. package/package.json +12 -12
  51. package/scripts/check-sync-manifest-traceability.mjs +203 -0
  52. package/scripts/tree-sitter-audit-classify.mjs +172 -0
  53. package/skills/gitnexus-cli.md +36 -4
  54. package/skills/gitnexus-unity-rule-gen.md +2 -2
@@ -0,0 +1,88 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import fs from 'node:fs/promises';
4
+ import os from 'node:os';
5
+ import path from 'node:path';
6
+ import { enforceSyncManifestConsistency } from './sync-manifest.js';
7
+ async function writeManifest(filePath, content) {
8
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
9
+ await fs.writeFile(filePath, content, 'utf-8');
10
+ }
11
+ test('when explicit CLI values differ from manifest, TTY mode asks whether to update manifest', async () => {
12
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-'));
13
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest.txt');
14
+ await writeManifest(manifestPath, ['Assets/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
15
+ let promptMessage = '';
16
+ const result = await enforceSyncManifestConsistency({
17
+ manifestPath,
18
+ extensions: '.ts',
19
+ policy: 'ask',
20
+ stdinIsTTY: true,
21
+ prompt: async (message) => {
22
+ promptMessage = message;
23
+ return 'keep';
24
+ },
25
+ });
26
+ assert.equal(result.decision, 'keep');
27
+ assert.match(promptMessage, /@extensions/i);
28
+ assert.match(promptMessage, /sync-manifest\.txt/i);
29
+ assert.match(promptMessage, /update/i);
30
+ assert.match(promptMessage, /keep/i);
31
+ });
32
+ test('non-TTY without explicit policy exits with actionable error', async () => {
33
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-'));
34
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest.txt');
35
+ await writeManifest(manifestPath, ['Assets/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
36
+ await assert.rejects(enforceSyncManifestConsistency({
37
+ manifestPath,
38
+ extensions: '.ts',
39
+ stdinIsTTY: false,
40
+ }), /--sync-manifest-policy/i);
41
+ });
42
+ test('policy=update rewrites manifest with normalized directives', async () => {
43
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-'));
44
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest.txt');
45
+ await writeManifest(manifestPath, ['Assets/', 'Packages/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
46
+ const result = await enforceSyncManifestConsistency({
47
+ manifestPath,
48
+ extensions: '.ts,.tsx',
49
+ embeddings: true,
50
+ policy: 'update',
51
+ });
52
+ const rewritten = await fs.readFile(manifestPath, 'utf-8');
53
+ assert.equal(result.decision, 'update');
54
+ assert.equal(rewritten, ['Assets', 'Packages', '@extensions=.ts,.tsx', '@repoAlias=demo-repo', '@embeddings=true', ''].join('\n'));
55
+ });
56
+ test('rejects placeholder manifest path values', async () => {
57
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-placeholder-'));
58
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest-placeholder.txt');
59
+ await writeManifest(manifestPath, ['Assets/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
60
+ await assert.rejects(enforceSyncManifestConsistency({
61
+ manifestPath,
62
+ extensions: '.ts',
63
+ policy: 'keep',
64
+ }), /placeholder manifest path/i);
65
+ });
66
+ test('TTY prompt branch requires concrete stdin.isTTY evidence', async () => {
67
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-tty-evidence-'));
68
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest.txt');
69
+ await writeManifest(manifestPath, ['Assets/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
70
+ await assert.rejects(enforceSyncManifestConsistency({
71
+ manifestPath,
72
+ extensions: '.ts',
73
+ policy: 'ask',
74
+ prompt: async () => 'keep',
75
+ }), /stdin\.isTTY evidence/i);
76
+ });
77
+ test('manifest rewrite requires non-empty diff entries', async () => {
78
+ const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-sync-manifest-noop-'));
79
+ const manifestPath = path.join(tmpDir, '.gitnexus', 'sync-manifest.txt');
80
+ await writeManifest(manifestPath, ['Assets/', '@extensions=.cs,.meta', '@repoAlias=demo-repo', '@embeddings=false'].join('\n'));
81
+ await assert.rejects(enforceSyncManifestConsistency({
82
+ manifestPath,
83
+ extensions: '.cs,.meta',
84
+ repoAlias: 'demo-repo',
85
+ embeddings: false,
86
+ policy: 'update',
87
+ }), /non-empty diff/i);
88
+ });
@@ -1,6 +1,7 @@
1
1
  export interface UnityConfig {
2
2
  maxSyntheticEdgesPerClass: number;
3
3
  maxSyntheticEdgesTotal: number;
4
+ enableContainerNodes: boolean;
4
5
  lazyMaxPaths: number;
5
6
  lazyBatchSize: number;
6
7
  lazyMaxMs: number;
@@ -3,6 +3,7 @@ import { join } from 'node:path';
3
3
  const DEFAULTS = {
4
4
  maxSyntheticEdgesPerClass: 12,
5
5
  maxSyntheticEdgesTotal: 256,
6
+ enableContainerNodes: false,
6
7
  lazyMaxPaths: 120,
7
8
  lazyBatchSize: 30,
8
9
  lazyMaxMs: 5000,
@@ -5,7 +5,8 @@ import type { ExtractedCall, ExtractedHeritage, ExtractedRoute, FileConstructorB
5
5
  export declare const processCalls: (graph: KnowledgeGraph, files: {
6
6
  path: string;
7
7
  content: string;
8
- }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<ExtractedHeritage[]>;
8
+ rawContent?: string;
9
+ }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, onRawFallbackParse?: (count: number) => void) => Promise<ExtractedHeritage[]>;
9
10
  /**
10
11
  * Fast path: resolve pre-extracted call sites from workers.
11
12
  * No AST parsing — workers already extracted calledName + sourceId.
@@ -1,11 +1,10 @@
1
1
  import Parser from 'tree-sitter';
2
2
  import { TIER_CONFIDENCE } from './resolution-context.js';
3
- import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
3
+ import { isLanguageAvailable, loadParser, loadLanguage, parseContent } from '../tree-sitter/parser-loader.js';
4
4
  import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
5
5
  import { generateId } from '../../lib/utils.js';
6
6
  import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop, FUNCTION_NODE_TYPES, extractFunctionName, isBuiltInOrNoise, countCallArguments, inferCallForm, extractReceiverName, extractReceiverNode, findEnclosingClassId, CALL_EXPRESSION_TYPES, extractCallChain, } from './utils.js';
7
7
  import { buildTypeEnv } from './type-env.js';
8
- import { getTreeSitterBufferSize } from './constants.js';
9
8
  import { callRouters } from './call-routing.js';
10
9
  import { extractReturnTypeName } from './type-extractors/shared.js';
11
10
  /**
@@ -86,7 +85,7 @@ const verifyConstructorBindings = (bindings, filePath, ctx, graph) => {
86
85
  }
87
86
  return verified;
88
87
  };
89
- export const processCalls = async (graph, files, astCache, ctx, onProgress) => {
88
+ export const processCalls = async (graph, files, astCache, ctx, onProgress, onRawFallbackParse) => {
90
89
  const parser = await loadParser();
91
90
  const collectedHeritage = [];
92
91
  const logSkipped = isVerboseIngestionEnabled();
@@ -112,10 +111,33 @@ export const processCalls = async (graph, files, astCache, ctx, onProgress) => {
112
111
  let tree = astCache.get(file.path);
113
112
  if (!tree) {
114
113
  try {
115
- tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) });
114
+ tree = parseContent(file.content);
116
115
  }
117
- catch (parseError) {
118
- continue;
116
+ catch {
117
+ if (file.rawContent && file.rawContent !== file.content) {
118
+ try {
119
+ tree = parseContent(file.rawContent);
120
+ onRawFallbackParse?.(1);
121
+ }
122
+ catch {
123
+ continue;
124
+ }
125
+ }
126
+ else {
127
+ continue;
128
+ }
129
+ }
130
+ if (file.rawContent && file.rawContent !== file.content && tree.rootNode?.hasError) {
131
+ try {
132
+ const rawTree = parseContent(file.rawContent);
133
+ if (!rawTree.rootNode?.hasError) {
134
+ tree = rawTree;
135
+ onRawFallbackParse?.(1);
136
+ }
137
+ }
138
+ catch {
139
+ // Keep normalized parse result when raw fallback fails
140
+ }
119
141
  }
120
142
  astCache.set(file.path, tree);
121
143
  }
@@ -20,7 +20,8 @@ import type { ResolutionContext } from './resolution-context.js';
20
20
  export declare const processHeritage: (graph: KnowledgeGraph, files: {
21
21
  path: string;
22
22
  content: string;
23
- }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<void>;
23
+ rawContent?: string;
24
+ }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, onRawFallbackParse?: (count: number) => void) => Promise<void>;
24
25
  /**
25
26
  * Fast path: resolve pre-extracted heritage from workers.
26
27
  * No AST parsing — workers already extracted className + parentName + kind.
@@ -14,12 +14,11 @@
14
14
  * - All other languages: default to EXTENDS
15
15
  */
16
16
  import Parser from 'tree-sitter';
17
- import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
17
+ import { isLanguageAvailable, loadParser, loadLanguage, parseContent } from '../tree-sitter/parser-loader.js';
18
18
  import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
19
19
  import { generateId } from '../../lib/utils.js';
20
20
  import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop } from './utils.js';
21
21
  import { SupportedLanguages } from '../../config/supported-languages.js';
22
- import { getTreeSitterBufferSize } from './constants.js';
23
22
  /** C#/Java convention: interfaces start with I followed by an uppercase letter */
24
23
  const INTERFACE_NAME_RE = /^I[A-Z]/;
25
24
  /**
@@ -65,7 +64,7 @@ const resolveHeritageId = (name, filePath, ctx, fallbackLabel, fallbackKey) => {
65
64
  }
66
65
  return generateId(fallbackLabel, fallbackKey ?? name);
67
66
  };
68
- export const processHeritage = async (graph, files, astCache, ctx, onProgress) => {
67
+ export const processHeritage = async (graph, files, astCache, ctx, onProgress, onRawFallbackParse) => {
69
68
  const parser = await loadParser();
70
69
  const logSkipped = isVerboseIngestionEnabled();
71
70
  const skippedByLang = logSkipped ? new Map() : null;
@@ -94,11 +93,35 @@ export const processHeritage = async (graph, files, astCache, ctx, onProgress) =
94
93
  if (!tree) {
95
94
  // Use larger bufferSize for files > 32KB
96
95
  try {
97
- tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) });
96
+ tree = parseContent(file.content);
98
97
  }
99
- catch (parseError) {
100
- // Skip files that can't be parsed
101
- continue;
98
+ catch {
99
+ if (file.rawContent && file.rawContent !== file.content) {
100
+ try {
101
+ tree = parseContent(file.rawContent);
102
+ onRawFallbackParse?.(1);
103
+ }
104
+ catch {
105
+ // Skip files that can't be parsed
106
+ continue;
107
+ }
108
+ }
109
+ else {
110
+ // Skip files that can't be parsed
111
+ continue;
112
+ }
113
+ }
114
+ if (file.rawContent && file.rawContent !== file.content && tree.rootNode?.hasError) {
115
+ try {
116
+ const rawTree = parseContent(file.rawContent);
117
+ if (!rawTree.rootNode?.hasError) {
118
+ tree = rawTree;
119
+ onRawFallbackParse?.(1);
120
+ }
121
+ }
122
+ catch {
123
+ // Keep normalized parse result when raw fallback fails
124
+ }
102
125
  }
103
126
  // Cache re-parsed tree for potential future use
104
127
  astCache.set(file.path, tree);
@@ -28,7 +28,8 @@ export declare function buildImportResolutionContext(allPaths: string[]): Import
28
28
  export declare const processImports: (graph: KnowledgeGraph, files: {
29
29
  path: string;
30
30
  content: string;
31
- }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[]) => Promise<void>;
31
+ rawContent?: string;
32
+ }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[], onRawFallbackParse?: (count: number) => void) => Promise<void>;
32
33
  export declare const processImportsFromExtracted: (graph: KnowledgeGraph, files: {
33
34
  path: string;
34
35
  }[], extractedImports: ExtractedImport[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, repoRoot?: string, prebuiltCtx?: ImportResolutionContext) => Promise<void>;
@@ -1,11 +1,10 @@
1
1
  import Parser from 'tree-sitter';
2
- import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
2
+ import { isLanguageAvailable, loadParser, loadLanguage, parseContent } from '../tree-sitter/parser-loader.js';
3
3
  import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
4
4
  import { generateId } from '../../lib/utils.js';
5
5
  import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop } from './utils.js';
6
6
  import { SupportedLanguages } from '../../config/supported-languages.js';
7
7
  import { extractNamedBindings } from './named-binding-extraction.js';
8
- import { getTreeSitterBufferSize } from './constants.js';
9
8
  import { loadTsconfigPaths, loadGoModulePath, loadComposerConfig, loadCSharpProjectConfig, loadSwiftPackageConfig, } from './language-config.js';
10
9
  import { buildSuffixIndex, resolveImportPath, appendKotlinWildcard, KOTLIN_EXTENSIONS, resolveJvmWildcard, resolveJvmMemberImport, resolveGoPackageDir, resolveGoPackage, resolveCSharpImport, resolveCSharpNamespaceDir, resolvePhpImport, resolveRustImport, resolveRubyImport, resolvePythonImport, } from './resolvers/index.js';
11
10
  import { callRouters } from './call-routing.js';
@@ -173,7 +172,7 @@ function applyImportResult(result, filePath, importMap, packageMap, addImportEdg
173
172
  // ============================================================================
174
173
  // MAIN IMPORT PROCESSOR
175
174
  // ============================================================================
176
- export const processImports = async (graph, files, astCache, ctx, onProgress, repoRoot, allPaths) => {
175
+ export const processImports = async (graph, files, astCache, ctx, onProgress, repoRoot, allPaths, onRawFallbackParse) => {
177
176
  const importMap = ctx.importMap;
178
177
  const packageMap = ctx.packageMap;
179
178
  const namedImportMap = ctx.namedImportMap;
@@ -249,10 +248,33 @@ export const processImports = async (graph, files, astCache, ctx, onProgress, re
249
248
  let wasReparsed = false;
250
249
  if (!tree) {
251
250
  try {
252
- tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) });
251
+ tree = parseContent(file.content);
253
252
  }
254
- catch (parseError) {
255
- continue;
253
+ catch {
254
+ if (file.rawContent && file.rawContent !== file.content) {
255
+ try {
256
+ tree = parseContent(file.rawContent);
257
+ onRawFallbackParse?.(1);
258
+ }
259
+ catch {
260
+ continue;
261
+ }
262
+ }
263
+ else {
264
+ continue;
265
+ }
266
+ }
267
+ if (file.rawContent && file.rawContent !== file.content && tree.rootNode?.hasError) {
268
+ try {
269
+ const rawTree = parseContent(file.rawContent);
270
+ if (!rawTree.rootNode?.hasError) {
271
+ tree = rawTree;
272
+ onRawFallbackParse?.(1);
273
+ }
274
+ }
275
+ catch {
276
+ // Keep normalized parse result when raw fallback fails
277
+ }
256
278
  }
257
279
  wasReparsed = true;
258
280
  // Cache re-parsed tree so call/heritage phases get hits
@@ -11,8 +11,10 @@ export interface WorkerExtractedData {
11
11
  routes: ExtractedRoute[];
12
12
  constructorBindings: FileConstructorBindings[];
13
13
  }
14
- export { isNodeExported } from './export-detection.js';
15
- export declare const processParsing: (graph: KnowledgeGraph, files: {
14
+ export interface ParsingFileInput {
16
15
  path: string;
17
16
  content: string;
18
- }[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback, workerPool?: WorkerPool) => Promise<WorkerExtractedData | null>;
17
+ rawContent?: string;
18
+ }
19
+ export { isNodeExported } from './export-detection.js';
20
+ export declare const processParsing: (graph: KnowledgeGraph, files: ParsingFileInput[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback, workerPool?: WorkerPool, onRawFallbackParse?: (count: number) => void) => Promise<WorkerExtractedData | null>;
@@ -1,25 +1,30 @@
1
1
  import Parser from 'tree-sitter';
2
- import { loadParser, loadLanguage, isLanguageAvailable } from '../tree-sitter/parser-loader.js';
2
+ import { loadParser, loadLanguage, isLanguageAvailable, parseContent } from '../tree-sitter/parser-loader.js';
3
3
  import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
4
4
  import { generateId } from '../../lib/utils.js';
5
5
  import { getLanguageFromFilename, yieldToEventLoop, getDefinitionNodeFromCaptures, findEnclosingClassId, extractMethodSignature } from './utils.js';
6
6
  import { isNodeExported } from './export-detection.js';
7
7
  import { detectFrameworkFromAST } from './framework-detection.js';
8
8
  import { typeConfigs } from './type-extractors/index.js';
9
- import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from './constants.js';
9
+ import { TREE_SITTER_MAX_BUFFER } from './constants.js';
10
10
  // isNodeExported imported from ./export-detection.js (shared module)
11
11
  // Re-export for backward compatibility with any external consumers
12
12
  export { isNodeExported } from './export-detection.js';
13
13
  // ============================================================================
14
14
  // Worker-based parallel parsing
15
15
  // ============================================================================
16
- const processParsingWithWorkers = async (graph, files, symbolTable, astCache, workerPool, onFileProgress) => {
16
+ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, workerPool, onFileProgress, onRawFallbackParse) => {
17
17
  // Filter to parseable files only
18
18
  const parseableFiles = [];
19
19
  for (const file of files) {
20
20
  const lang = getLanguageFromFilename(file.path);
21
- if (lang)
22
- parseableFiles.push({ path: file.path, content: file.content });
21
+ if (lang) {
22
+ parseableFiles.push({
23
+ path: file.path,
24
+ content: file.content,
25
+ ...(file.rawContent ? { rawContent: file.rawContent } : {}),
26
+ });
27
+ }
23
28
  }
24
29
  if (parseableFiles.length === 0)
25
30
  return { imports: [], calls: [], heritage: [], routes: [], constructorBindings: [] };
@@ -34,6 +39,7 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
34
39
  const allHeritage = [];
35
40
  const allRoutes = [];
36
41
  const allConstructorBindings = [];
42
+ let rawFallbackCount = 0;
37
43
  for (const result of chunkResults) {
38
44
  for (const node of result.nodes) {
39
45
  graph.addNode({
@@ -57,7 +63,10 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
57
63
  allHeritage.push(...result.heritage);
58
64
  allRoutes.push(...result.routes);
59
65
  allConstructorBindings.push(...result.constructorBindings);
66
+ rawFallbackCount += result.csharpPreprocFallbackFiles;
60
67
  }
68
+ if (rawFallbackCount > 0)
69
+ onRawFallbackParse?.(rawFallbackCount);
61
70
  // Merge and log skipped languages from workers
62
71
  const skippedLanguages = new Map();
63
72
  for (const result of chunkResults) {
@@ -78,7 +87,7 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
78
87
  // ============================================================================
79
88
  // Sequential fallback (original implementation)
80
89
  // ============================================================================
81
- const processParsingSequential = async (graph, files, symbolTable, astCache, onFileProgress) => {
90
+ const processParsingSequential = async (graph, files, symbolTable, astCache, onFileProgress, onRawFallbackParse) => {
82
91
  const parser = await loadParser();
83
92
  const total = files.length;
84
93
  const skippedLanguages = new Map();
@@ -106,11 +115,35 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
106
115
  }
107
116
  let tree;
108
117
  try {
109
- tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) });
118
+ tree = parseContent(file.content);
110
119
  }
111
- catch (parseError) {
112
- console.warn(`Skipping unparseable file: ${file.path}`);
113
- continue;
120
+ catch {
121
+ if (file.rawContent && file.rawContent !== file.content) {
122
+ try {
123
+ tree = parseContent(file.rawContent);
124
+ onRawFallbackParse?.(1);
125
+ }
126
+ catch {
127
+ console.warn(`Skipping unparseable file: ${file.path}`);
128
+ continue;
129
+ }
130
+ }
131
+ else {
132
+ console.warn(`Skipping unparseable file: ${file.path}`);
133
+ continue;
134
+ }
135
+ }
136
+ if (file.rawContent && file.rawContent !== file.content && tree.rootNode?.hasError) {
137
+ try {
138
+ const rawTree = parseContent(file.rawContent);
139
+ if (!rawTree.rootNode?.hasError) {
140
+ tree = rawTree;
141
+ onRawFallbackParse?.(1);
142
+ }
143
+ }
144
+ catch {
145
+ // Keep normalized parse result when raw fallback fails
146
+ }
114
147
  }
115
148
  astCache.set(file.path, tree);
116
149
  const queryString = LANGUAGE_QUERIES[language];
@@ -274,16 +307,16 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
274
307
  // ============================================================================
275
308
  // Public API
276
309
  // ============================================================================
277
- export const processParsing = async (graph, files, symbolTable, astCache, onFileProgress, workerPool) => {
310
+ export const processParsing = async (graph, files, symbolTable, astCache, onFileProgress, workerPool, onRawFallbackParse) => {
278
311
  if (workerPool) {
279
312
  try {
280
- return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
313
+ return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress, onRawFallbackParse);
281
314
  }
282
315
  catch (err) {
283
316
  console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
284
317
  }
285
318
  }
286
319
  // Fallback: sequential parsing (no pre-extracted data)
287
- await processParsingSequential(graph, files, symbolTable, astCache, onFileProgress);
320
+ await processParsingSequential(graph, files, symbolTable, astCache, onFileProgress, onRawFallbackParse);
288
321
  return null;
289
322
  };
@@ -11,11 +11,14 @@ import { processUnityResources } from './unity-resource-processor.js';
11
11
  import { applyUnityLifecycleSyntheticCalls } from './unity-lifecycle-synthetic-calls.js';
12
12
  import { applyUnityRuntimeBindingRules } from './unity-runtime-binding-rules.js';
13
13
  import { resolveUnityConfig } from '../config/unity-config.js';
14
+ import { loadCSharpDefineProfileFromCsproj } from '../tree-sitter/csharp-define-profile.js';
15
+ import { normalizeCSharpPreprocessorBranches } from '../tree-sitter/csharp-preproc-normalizer.js';
14
16
  import { loadAnalyzeRules } from '../../mcp/local/runtime-claim-rule-registry.js';
15
17
  import { createResolutionContext } from './resolution-context.js';
16
18
  import { createASTCache } from './ast-cache.js';
17
19
  import { walkRepositoryPaths, readFileContents, walkUnityResourcePaths } from './filesystem-walker.js';
18
20
  import { getLanguageFromFilename } from './utils.js';
21
+ import { SupportedLanguages } from '../../config/supported-languages.js';
19
22
  import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
20
23
  import { createWorkerPool } from './workers/worker-pool.js';
21
24
  import { selectEntriesByScopeRules } from './scope-filter.js';
@@ -39,7 +42,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
39
42
  astCache.clear();
40
43
  ctx.clear();
41
44
  };
45
+ let csharpDefineSymbols;
46
+ const csharpUndefinedSymbols = new Set();
47
+ let csharpPreprocDiagnostics;
42
48
  try {
49
+ if (options?.csharpDefineCsproj) {
50
+ const defineProfile = await loadCSharpDefineProfileFromCsproj(options.csharpDefineCsproj);
51
+ csharpDefineSymbols = defineProfile.symbols;
52
+ csharpPreprocDiagnostics = {
53
+ enabled: true,
54
+ sourcePath: defineProfile.sourcePath,
55
+ defineSymbolCount: defineProfile.symbols.size,
56
+ normalizedFiles: 0,
57
+ fallbackFiles: 0,
58
+ skippedFiles: 0,
59
+ expressionErrors: 0,
60
+ undefinedSymbols: [],
61
+ };
62
+ }
43
63
  // ── Phase 1: Scan paths only (no content read) ─────────────────────
44
64
  onProgress({
45
65
  phase: 'extracting',
@@ -184,7 +204,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
184
204
  // Calls/heritage use the symbol table built so far (symbols from earlier chunks
185
205
  // are already registered). This trades ~5% cross-chunk resolution accuracy for
186
206
  // 200-400MB less memory — critical for Linux-kernel-scale repos.
187
- const sequentialChunkPaths = [];
207
+ const sequentialChunkFiles = [];
188
208
  try {
189
209
  for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
190
210
  const chunkPaths = chunks[chunkIdx];
@@ -192,7 +212,27 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
192
212
  const chunkContents = await readFileContents(repoPath, chunkPaths);
193
213
  const chunkFiles = chunkPaths
194
214
  .filter(p => chunkContents.has(p))
195
- .map(p => ({ path: p, content: chunkContents.get(p) }));
215
+ .map((p) => {
216
+ const originalContent = chunkContents.get(p);
217
+ if (!csharpDefineSymbols || getLanguageFromFilename(p) !== SupportedLanguages.CSharp) {
218
+ return { path: p, content: originalContent };
219
+ }
220
+ const normalized = normalizeCSharpPreprocessorBranches(originalContent, csharpDefineSymbols);
221
+ csharpPreprocDiagnostics.expressionErrors += normalized.diagnostics.expressionErrors;
222
+ for (const symbol of normalized.diagnostics.undefinedSymbols) {
223
+ csharpUndefinedSymbols.add(symbol);
224
+ }
225
+ if (!normalized.changed) {
226
+ csharpPreprocDiagnostics.skippedFiles += 1;
227
+ return { path: p, content: originalContent };
228
+ }
229
+ csharpPreprocDiagnostics.normalizedFiles += 1;
230
+ return {
231
+ path: p,
232
+ content: normalized.normalizedText,
233
+ rawContent: originalContent,
234
+ };
235
+ });
196
236
  // Parse this chunk (workers or sequential fallback)
197
237
  const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
198
238
  const globalCurrent = filesParsedSoFar + current;
@@ -204,7 +244,10 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
204
244
  detail: filePath,
205
245
  stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
206
246
  });
207
- }, workerPool);
247
+ }, workerPool, (count) => {
248
+ if (csharpPreprocDiagnostics)
249
+ csharpPreprocDiagnostics.fallbackFiles += count;
250
+ });
208
251
  const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
209
252
  if (chunkWorkerData) {
210
253
  // Imports
@@ -251,8 +294,11 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
251
294
  ]);
252
295
  }
253
296
  else {
254
- await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
255
- sequentialChunkPaths.push(chunkPaths);
297
+ await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths, (count) => {
298
+ if (csharpPreprocDiagnostics)
299
+ csharpPreprocDiagnostics.fallbackFiles += count;
300
+ });
301
+ sequentialChunkFiles.push(chunkFiles);
256
302
  }
257
303
  filesParsedSoFar += chunkFiles.length;
258
304
  // Clear AST cache between chunks to free memory
@@ -263,15 +309,17 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
263
309
  finally {
264
310
  await workerPool?.terminate();
265
311
  }
266
- // Sequential fallback chunks: re-read source for call/heritage resolution
267
- for (const chunkPaths of sequentialChunkPaths) {
268
- const chunkContents = await readFileContents(repoPath, chunkPaths);
269
- const chunkFiles = chunkPaths
270
- .filter(p => chunkContents.has(p))
271
- .map(p => ({ path: p, content: chunkContents.get(p) }));
312
+ // Sequential fallback chunks: use the same normalized-or-raw source for call/heritage resolution
313
+ for (const chunkFiles of sequentialChunkFiles) {
272
314
  astCache = createASTCache(chunkFiles.length);
273
- const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx);
274
- await processHeritage(graph, chunkFiles, astCache, ctx);
315
+ const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, (count) => {
316
+ if (csharpPreprocDiagnostics)
317
+ csharpPreprocDiagnostics.fallbackFiles += count;
318
+ });
319
+ await processHeritage(graph, chunkFiles, astCache, ctx, undefined, (count) => {
320
+ if (csharpPreprocDiagnostics)
321
+ csharpPreprocDiagnostics.fallbackFiles += count;
322
+ });
275
323
  if (rubyHeritage.length > 0) {
276
324
  await processHeritageFromExtracted(graph, rubyHeritage, ctx);
277
325
  }
@@ -459,6 +507,9 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
459
507
  },
460
508
  });
461
509
  astCache.clear();
510
+ if (csharpPreprocDiagnostics) {
511
+ csharpPreprocDiagnostics.undefinedSymbols = [...csharpUndefinedSymbols].sort();
512
+ }
462
513
  return {
463
514
  graph,
464
515
  repoPath,
@@ -467,6 +518,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
467
518
  processResult,
468
519
  unityResult,
469
520
  scopeDiagnostics: scopeSelection.diagnostics,
521
+ csharpPreprocDiagnostics,
470
522
  };
471
523
  }
472
524
  catch (error) {
@@ -8,4 +8,4 @@ export interface UnityRuntimeBindingResult {
8
8
  edgesInjected: number;
9
9
  }>;
10
10
  }
11
- export declare function applyUnityRuntimeBindingRules(graph: KnowledgeGraph, rules: RuntimeClaimRule[], _config: UnityConfig): UnityRuntimeBindingResult;
11
+ export declare function applyUnityRuntimeBindingRules(graph: KnowledgeGraph, rules: RuntimeClaimRule[], config: UnityConfig): UnityRuntimeBindingResult;