@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +73 -82
- package/dist/cli/augment.js +0 -2
- package/dist/cli/eval-server.d.ts +2 -2
- package/dist/cli/eval-server.js +6 -6
- package/dist/cli/index.js +6 -10
- package/dist/cli/mcp.d.ts +1 -3
- package/dist/cli/mcp.js +3 -3
- package/dist/cli/refresh.d.ts +2 -2
- package/dist/cli/refresh.js +24 -29
- package/dist/cli/status.js +4 -13
- package/dist/cli/tool.d.ts +5 -4
- package/dist/cli/tool.js +8 -10
- package/dist/config/ignore-service.js +14 -34
- package/dist/core/augmentation/engine.js +53 -83
- package/dist/core/db/adapter.d.ts +99 -0
- package/dist/core/db/adapter.js +402 -0
- package/dist/core/db/graph-loader.d.ts +27 -0
- package/dist/core/db/graph-loader.js +148 -0
- package/dist/core/db/queries.d.ts +160 -0
- package/dist/core/db/queries.js +441 -0
- package/dist/core/db/schema.d.ts +108 -0
- package/dist/core/db/schema.js +136 -0
- package/dist/core/embeddings/embedder.d.ts +21 -12
- package/dist/core/embeddings/embedder.js +104 -50
- package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
- package/dist/core/embeddings/embedding-pipeline.js +220 -262
- package/dist/core/embeddings/text-generator.js +4 -19
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/graph.d.ts +1 -1
- package/dist/core/graph/graph.js +1 -0
- package/dist/core/graph/types.d.ts +11 -9
- package/dist/core/graph/types.js +4 -1
- package/dist/core/incremental/refresh.d.ts +46 -0
- package/dist/core/incremental/refresh.js +464 -0
- package/dist/core/incremental/types.d.ts +2 -1
- package/dist/core/incremental/types.js +42 -44
- package/dist/core/ingestion/ast-cache.js +1 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -3
- package/dist/core/ingestion/call-processor.js +448 -60
- package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
- package/dist/core/ingestion/cluster-enricher.js +2 -0
- package/dist/core/ingestion/community-processor.d.ts +1 -1
- package/dist/core/ingestion/community-processor.js +8 -3
- package/dist/core/ingestion/export-detection.d.ts +1 -1
- package/dist/core/ingestion/export-detection.js +1 -1
- package/dist/core/ingestion/filesystem-walker.js +1 -1
- package/dist/core/ingestion/heritage-processor.d.ts +2 -2
- package/dist/core/ingestion/heritage-processor.js +22 -11
- package/dist/core/ingestion/import-processor.d.ts +2 -2
- package/dist/core/ingestion/import-processor.js +24 -9
- package/dist/core/ingestion/language-config.js +7 -4
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +23 -11
- package/dist/core/ingestion/named-binding-extraction.js +5 -5
- package/dist/core/ingestion/parsing-processor.d.ts +4 -4
- package/dist/core/ingestion/parsing-processor.js +26 -18
- package/dist/core/ingestion/pipeline.d.ts +4 -2
- package/dist/core/ingestion/pipeline.js +50 -20
- package/dist/core/ingestion/process-processor.d.ts +2 -2
- package/dist/core/ingestion/process-processor.js +28 -14
- package/dist/core/ingestion/resolution-context.d.ts +1 -1
- package/dist/core/ingestion/resolution-context.js +14 -4
- package/dist/core/ingestion/resolvers/csharp.js +4 -3
- package/dist/core/ingestion/resolvers/go.js +3 -1
- package/dist/core/ingestion/resolvers/jvm.js +13 -4
- package/dist/core/ingestion/resolvers/standard.js +2 -2
- package/dist/core/ingestion/resolvers/utils.js +6 -2
- package/dist/core/ingestion/route-stitcher.d.ts +15 -0
- package/dist/core/ingestion/route-stitcher.js +92 -0
- package/dist/core/ingestion/structure-processor.d.ts +1 -1
- package/dist/core/ingestion/structure-processor.js +3 -2
- package/dist/core/ingestion/symbol-table.d.ts +2 -0
- package/dist/core/ingestion/symbol-table.js +5 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +177 -0
- package/dist/core/ingestion/type-env.js +20 -0
- package/dist/core/ingestion/type-extractors/csharp.js +4 -3
- package/dist/core/ingestion/type-extractors/go.js +23 -12
- package/dist/core/ingestion/type-extractors/php.js +18 -10
- package/dist/core/ingestion/type-extractors/ruby.js +15 -3
- package/dist/core/ingestion/type-extractors/rust.js +3 -2
- package/dist/core/ingestion/type-extractors/shared.js +3 -2
- package/dist/core/ingestion/type-extractors/typescript.js +11 -5
- package/dist/core/ingestion/utils.d.ts +27 -4
- package/dist/core/ingestion/utils.js +145 -100
- package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
- package/dist/core/ingestion/workers/parse-worker.js +97 -29
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/search/bm25-index.d.ts +15 -8
- package/dist/core/search/bm25-index.js +48 -98
- package/dist/core/search/hybrid-search.d.ts +9 -3
- package/dist/core/search/hybrid-search.js +30 -25
- package/dist/core/search/reranker.js +9 -7
- package/dist/core/search/types.d.ts +0 -4
- package/dist/core/semantic/tsgo-service.d.ts +5 -1
- package/dist/core/semantic/tsgo-service.js +161 -66
- package/dist/lib/tsgo-test.d.ts +2 -0
- package/dist/lib/tsgo-test.js +6 -0
- package/dist/lib/type-utils.d.ts +25 -0
- package/dist/lib/type-utils.js +22 -0
- package/dist/lib/utils.d.ts +3 -2
- package/dist/lib/utils.js +3 -2
- package/dist/mcp/compatible-stdio-transport.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +29 -56
- package/dist/mcp/local/local-backend.js +808 -1118
- package/dist/mcp/resources.js +35 -25
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +5 -5
- package/dist/mcp/tools.js +24 -25
- package/dist/storage/repo-manager.d.ts +2 -12
- package/dist/storage/repo-manager.js +1 -47
- package/dist/types/pipeline.d.ts +8 -5
- package/dist/types/pipeline.js +5 -0
- package/package.json +18 -11
- package/dist/cli/serve.d.ts +0 -5
- package/dist/cli/serve.js +0 -8
- package/dist/core/incremental/child-process.d.ts +0 -8
- package/dist/core/incremental/child-process.js +0 -649
- package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
- package/dist/core/incremental/refresh-coordinator.js +0 -147
- package/dist/core/lbug/csv-generator.d.ts +0 -28
- package/dist/core/lbug/csv-generator.js +0 -355
- package/dist/core/lbug/lbug-adapter.d.ts +0 -96
- package/dist/core/lbug/lbug-adapter.js +0 -753
- package/dist/core/lbug/schema.d.ts +0 -46
- package/dist/core/lbug/schema.js +0 -402
- package/dist/mcp/core/embedder.d.ts +0 -24
- package/dist/mcp/core/embedder.js +0 -168
- package/dist/mcp/core/lbug-adapter.d.ts +0 -29
- package/dist/mcp/core/lbug-adapter.js +0 -330
- package/dist/server/api.d.ts +0 -5
- package/dist/server/api.js +0 -340
- package/dist/server/mcp-http.d.ts +0 -7
- package/dist/server/mcp-http.js +0 -95
- package/models/mlx-embedder.py +0 -185
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
// code-mapper/src/core/incremental/refresh-coordinator.ts
|
|
2
|
-
/**
|
|
3
|
-
* @file refresh-coordinator.ts
|
|
4
|
-
* @description Parent-side orchestrator that manages child process lifecycle for
|
|
5
|
-
* incremental DB refreshes — forks the worker, sends dirty files over IPC,
|
|
6
|
-
* and resolves/rejects the caller's promise based on the child's response
|
|
7
|
-
*/
|
|
8
|
-
import { fork } from 'child_process';
|
|
9
|
-
import path from 'path';
|
|
10
|
-
import { fileURLToPath } from 'url';
|
|
11
|
-
import { parseChildMessage, assertNever, } from './types.js';
|
|
12
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
-
const __dirname = path.dirname(__filename);
|
|
14
|
-
const CHILD_SCRIPT = path.join(__dirname, 'child-process.js');
|
|
15
|
-
/** Default timeout before the child is forcefully terminated */
|
|
16
|
-
const DEFAULT_REFRESH_TIMEOUT_MS = 30_000;
|
|
17
|
-
/** Grace period between SIGTERM and SIGKILL */
|
|
18
|
-
const KILL_GRACE_MS = 5_000;
|
|
19
|
-
// ---------------------------------------------------------------------------
|
|
20
|
-
// Coordinator
|
|
21
|
-
// ---------------------------------------------------------------------------
|
|
22
|
-
export class RefreshCoordinator {
|
|
23
|
-
/** In-flight refreshes keyed by repo ID — ensures one refresh per repo at a time */
|
|
24
|
-
inFlight = new Map();
|
|
25
|
-
/**
|
|
26
|
-
* Trigger an incremental refresh for the given repo
|
|
27
|
-
*
|
|
28
|
-
* If a refresh for this repo is already in progress the existing promise is
|
|
29
|
-
* returned (deduplication). Otherwise a child process is forked, the dirty
|
|
30
|
-
* file list is sent over IPC, and the returned promise settles once the child
|
|
31
|
-
* reports success or failure
|
|
32
|
-
*/
|
|
33
|
-
async refresh(repo, dirtyFiles, existingPaths, timeoutMs) {
|
|
34
|
-
const key = repo.id;
|
|
35
|
-
// Deduplicate — return the existing promise if already refreshing this repo
|
|
36
|
-
const existing = this.inFlight.get(key);
|
|
37
|
-
if (existing)
|
|
38
|
-
return existing;
|
|
39
|
-
const promise = this.doRefresh(repo, dirtyFiles, existingPaths, timeoutMs).finally(() => {
|
|
40
|
-
this.inFlight.delete(key);
|
|
41
|
-
});
|
|
42
|
-
this.inFlight.set(key, promise);
|
|
43
|
-
return promise;
|
|
44
|
-
}
|
|
45
|
-
// -------------------------------------------------------------------------
|
|
46
|
-
// Internal
|
|
47
|
-
// -------------------------------------------------------------------------
|
|
48
|
-
/**
|
|
49
|
-
* Core refresh logic — queries existing file paths from the DB, forks the
|
|
50
|
-
* child process, and wires up IPC + timeout handling
|
|
51
|
-
*/
|
|
52
|
-
async doRefresh(repo, dirtyFiles, existingPaths, timeoutMs) {
|
|
53
|
-
return new Promise((resolve, reject) => {
|
|
54
|
-
// Fork the worker child
|
|
55
|
-
const child = fork(CHILD_SCRIPT, [], {
|
|
56
|
-
stdio: ['ignore', 'pipe', 'pipe', 'ipc'],
|
|
57
|
-
env: {
|
|
58
|
-
...process.env,
|
|
59
|
-
// Pass existing file paths via env so the child has full context
|
|
60
|
-
CODE_MAPPER_EXISTING_PATHS: JSON.stringify(existingPaths),
|
|
61
|
-
},
|
|
62
|
-
});
|
|
63
|
-
let settled = false;
|
|
64
|
-
// -- Cleanup helper ---------------------------------------------------
|
|
65
|
-
const cleanup = () => {
|
|
66
|
-
child.removeAllListeners();
|
|
67
|
-
if (child.stdout)
|
|
68
|
-
child.stdout.removeAllListeners();
|
|
69
|
-
if (child.stderr)
|
|
70
|
-
child.stderr.removeAllListeners();
|
|
71
|
-
clearTimeout(timeoutHandle);
|
|
72
|
-
clearTimeout(killHandle);
|
|
73
|
-
};
|
|
74
|
-
const settle = (action, value) => {
|
|
75
|
-
if (settled)
|
|
76
|
-
return;
|
|
77
|
-
settled = true;
|
|
78
|
-
cleanup();
|
|
79
|
-
if (action === 'resolve') {
|
|
80
|
-
resolve(value);
|
|
81
|
-
}
|
|
82
|
-
else {
|
|
83
|
-
reject(value);
|
|
84
|
-
}
|
|
85
|
-
};
|
|
86
|
-
// -- Timeout handling -------------------------------------------------
|
|
87
|
-
let killHandle;
|
|
88
|
-
const effectiveTimeout = timeoutMs ?? DEFAULT_REFRESH_TIMEOUT_MS;
|
|
89
|
-
const timeoutHandle = setTimeout(() => {
|
|
90
|
-
// Graceful shutdown first
|
|
91
|
-
child.kill('SIGTERM');
|
|
92
|
-
killHandle = setTimeout(() => {
|
|
93
|
-
child.kill('SIGKILL');
|
|
94
|
-
}, KILL_GRACE_MS);
|
|
95
|
-
settle('reject', new Error(`Incremental refresh timed out after ${effectiveTimeout}ms for repo ${repo.id}`));
|
|
96
|
-
}, effectiveTimeout);
|
|
97
|
-
// -- IPC message handling ---------------------------------------------
|
|
98
|
-
child.on('message', (raw) => {
|
|
99
|
-
let msg;
|
|
100
|
-
try {
|
|
101
|
-
msg = parseChildMessage(raw);
|
|
102
|
-
}
|
|
103
|
-
catch (err) {
|
|
104
|
-
settle('reject', new Error(`Invalid IPC message from child: ${err instanceof Error ? err.message : String(err)}`));
|
|
105
|
-
child.kill('SIGTERM');
|
|
106
|
-
return;
|
|
107
|
-
}
|
|
108
|
-
switch (msg.kind) {
|
|
109
|
-
case 'progress':
|
|
110
|
-
// Progress messages are informational — no action required
|
|
111
|
-
break;
|
|
112
|
-
case 'success':
|
|
113
|
-
settle('resolve', msg.payload);
|
|
114
|
-
break;
|
|
115
|
-
case 'error':
|
|
116
|
-
settle('reject', Object.assign(new Error(msg.message), msg.stack ? { childStack: msg.stack } : {}));
|
|
117
|
-
break;
|
|
118
|
-
default:
|
|
119
|
-
assertNever(msg, `Unhandled child message kind: ${JSON.stringify(msg)}`);
|
|
120
|
-
}
|
|
121
|
-
});
|
|
122
|
-
// -- Child exit handling ----------------------------------------------
|
|
123
|
-
child.on('error', (err) => {
|
|
124
|
-
settle('reject', new Error(`Child process error: ${err.message}`));
|
|
125
|
-
});
|
|
126
|
-
child.on('exit', (code, signal) => {
|
|
127
|
-
// If already settled (success/error message arrived first), nothing to do
|
|
128
|
-
if (settled)
|
|
129
|
-
return;
|
|
130
|
-
if (code !== 0) {
|
|
131
|
-
settle('reject', new Error(`Child process exited unexpectedly (code=${code ?? 'null'}, signal=${signal ?? 'none'})`));
|
|
132
|
-
}
|
|
133
|
-
});
|
|
134
|
-
// -- Send the refresh command to the child ----------------------------
|
|
135
|
-
const message = {
|
|
136
|
-
kind: 'refresh',
|
|
137
|
-
payload: {
|
|
138
|
-
repoRoot: repo.repoRoot,
|
|
139
|
-
dbPath: repo.dbPath,
|
|
140
|
-
storagePath: repo.storagePath,
|
|
141
|
-
dirtyFiles,
|
|
142
|
-
},
|
|
143
|
-
};
|
|
144
|
-
child.send(message);
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file csv-generator.ts
|
|
3
|
-
* @description Streams RFC 4180-compliant CSV rows directly to disk in a single
|
|
4
|
-
* pass over graph nodes for LadybugDB bulk import
|
|
5
|
-
*
|
|
6
|
-
* File contents are lazy-read from disk with an LRU cache to avoid holding the
|
|
7
|
-
* entire repo in RAM. Rows are buffered (FLUSH_EVERY) to minimize Promise overhead
|
|
8
|
-
*/
|
|
9
|
-
import { KnowledgeGraph } from '../graph/types.js';
|
|
10
|
-
import { NodeTableName } from './schema.js';
|
|
11
|
-
export declare const sanitizeUTF8: (str: string) => string;
|
|
12
|
-
export declare const escapeCSVField: (value: string | number | undefined | null) => string;
|
|
13
|
-
export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
|
|
14
|
-
export declare const isBinaryContent: (content: string) => boolean;
|
|
15
|
-
export interface StreamedCSVResult {
|
|
16
|
-
nodeFiles: Map<NodeTableName, {
|
|
17
|
-
csvPath: string;
|
|
18
|
-
rows: number;
|
|
19
|
-
}>;
|
|
20
|
-
relCsvPath: string;
|
|
21
|
-
relRows: number;
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Stream all CSV data directly to disk files
|
|
25
|
-
*
|
|
26
|
-
* Iterates graph nodes exactly once — routes each node to the right writer
|
|
27
|
-
*/
|
|
28
|
-
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
|
|
@@ -1,355 +0,0 @@
|
|
|
1
|
-
// code-mapper/src/core/lbug/csv-generator.ts
|
|
2
|
-
/**
|
|
3
|
-
* @file csv-generator.ts
|
|
4
|
-
* @description Streams RFC 4180-compliant CSV rows directly to disk in a single
|
|
5
|
-
* pass over graph nodes for LadybugDB bulk import
|
|
6
|
-
*
|
|
7
|
-
* File contents are lazy-read from disk with an LRU cache to avoid holding the
|
|
8
|
-
* entire repo in RAM. Rows are buffered (FLUSH_EVERY) to minimize Promise overhead
|
|
9
|
-
*/
|
|
10
|
-
import fs from 'fs/promises';
|
|
11
|
-
import { createWriteStream } from 'fs';
|
|
12
|
-
import path from 'path';
|
|
13
|
-
/** Flush buffered rows to disk every N rows */
|
|
14
|
-
const FLUSH_EVERY = 500;
|
|
15
|
-
// CSV escape utilities
|
|
16
|
-
export const sanitizeUTF8 = (str) => {
|
|
17
|
-
return str
|
|
18
|
-
.replace(/\r\n/g, '\n')
|
|
19
|
-
.replace(/\r/g, '\n')
|
|
20
|
-
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
|
|
21
|
-
.replace(/[\uD800-\uDFFF]/g, '')
|
|
22
|
-
.replace(/[\uFFFE\uFFFF]/g, '');
|
|
23
|
-
};
|
|
24
|
-
export const escapeCSVField = (value) => {
|
|
25
|
-
if (value === undefined || value === null)
|
|
26
|
-
return '""';
|
|
27
|
-
let str = String(value);
|
|
28
|
-
str = sanitizeUTF8(str);
|
|
29
|
-
return `"${str.replace(/"/g, '""')}"`;
|
|
30
|
-
};
|
|
31
|
-
export const escapeCSVNumber = (value, defaultValue = -1) => {
|
|
32
|
-
if (value === undefined || value === null)
|
|
33
|
-
return String(defaultValue);
|
|
34
|
-
return String(value);
|
|
35
|
-
};
|
|
36
|
-
// Content extraction (lazy — reads from disk on demand)
|
|
37
|
-
export const isBinaryContent = (content) => {
|
|
38
|
-
if (!content || content.length === 0)
|
|
39
|
-
return false;
|
|
40
|
-
const sample = content.slice(0, 1000);
|
|
41
|
-
let nonPrintable = 0;
|
|
42
|
-
for (let i = 0; i < sample.length; i++) {
|
|
43
|
-
const code = sample.charCodeAt(i);
|
|
44
|
-
if ((code < 9) || (code > 13 && code < 32) || code === 127)
|
|
45
|
-
nonPrintable++;
|
|
46
|
-
}
|
|
47
|
-
return (nonPrintable / sample.length) > 0.1;
|
|
48
|
-
};
|
|
49
|
-
/**
|
|
50
|
-
* LRU content cache — avoids re-reading the same source file for every
|
|
51
|
-
* symbol defined in it
|
|
52
|
-
*/
|
|
53
|
-
class FileContentCache {
|
|
54
|
-
cache = new Map();
|
|
55
|
-
accessOrder = [];
|
|
56
|
-
maxSize;
|
|
57
|
-
repoPath;
|
|
58
|
-
constructor(repoPath, maxSize = 3000) {
|
|
59
|
-
this.repoPath = repoPath;
|
|
60
|
-
this.maxSize = maxSize;
|
|
61
|
-
}
|
|
62
|
-
async get(relativePath) {
|
|
63
|
-
if (!relativePath)
|
|
64
|
-
return '';
|
|
65
|
-
const cached = this.cache.get(relativePath);
|
|
66
|
-
if (cached !== undefined) {
|
|
67
|
-
// Move to end of accessOrder (LRU promotion)
|
|
68
|
-
const idx = this.accessOrder.indexOf(relativePath);
|
|
69
|
-
if (idx !== -1) {
|
|
70
|
-
this.accessOrder.splice(idx, 1);
|
|
71
|
-
this.accessOrder.push(relativePath);
|
|
72
|
-
}
|
|
73
|
-
return cached;
|
|
74
|
-
}
|
|
75
|
-
try {
|
|
76
|
-
const fullPath = path.join(this.repoPath, relativePath);
|
|
77
|
-
const content = await fs.readFile(fullPath, 'utf-8');
|
|
78
|
-
this.set(relativePath, content);
|
|
79
|
-
return content;
|
|
80
|
-
}
|
|
81
|
-
catch {
|
|
82
|
-
this.set(relativePath, '');
|
|
83
|
-
return '';
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
set(key, value) {
|
|
87
|
-
if (this.cache.size >= this.maxSize) {
|
|
88
|
-
const oldest = this.accessOrder.shift();
|
|
89
|
-
if (oldest)
|
|
90
|
-
this.cache.delete(oldest);
|
|
91
|
-
}
|
|
92
|
-
this.cache.set(key, value);
|
|
93
|
-
this.accessOrder.push(key);
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
const extractContent = async (node, contentCache) => {
|
|
97
|
-
const filePath = node.properties.filePath;
|
|
98
|
-
const content = await contentCache.get(filePath);
|
|
99
|
-
if (!content)
|
|
100
|
-
return '';
|
|
101
|
-
if (node.label === 'Folder')
|
|
102
|
-
return '';
|
|
103
|
-
if (isBinaryContent(content))
|
|
104
|
-
return '[Binary file - content not stored]';
|
|
105
|
-
if (node.label === 'File') {
|
|
106
|
-
const MAX_FILE_CONTENT = 100_000;
|
|
107
|
-
return content.length > MAX_FILE_CONTENT
|
|
108
|
-
? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
|
|
109
|
-
: content;
|
|
110
|
-
}
|
|
111
|
-
const startLine = node.properties.startLine;
|
|
112
|
-
const endLine = node.properties.endLine;
|
|
113
|
-
if (startLine === undefined || endLine === undefined)
|
|
114
|
-
return '';
|
|
115
|
-
const lines = content.split('\n');
|
|
116
|
-
const start = Math.max(0, startLine - 2);
|
|
117
|
-
const end = Math.min(lines.length - 1, endLine + 2);
|
|
118
|
-
const snippet = lines.slice(start, end + 1).join('\n');
|
|
119
|
-
const MAX_SNIPPET = 50_000;
|
|
120
|
-
return snippet.length > MAX_SNIPPET
|
|
121
|
-
? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
|
|
122
|
-
: snippet;
|
|
123
|
-
};
|
|
124
|
-
// Buffered CSV writer
|
|
125
|
-
class BufferedCSVWriter {
|
|
126
|
-
ws;
|
|
127
|
-
buffer = [];
|
|
128
|
-
rows = 0;
|
|
129
|
-
constructor(filePath, header) {
|
|
130
|
-
this.ws = createWriteStream(filePath, 'utf-8');
|
|
131
|
-
// Raise listener cap for large repos with many flushes
|
|
132
|
-
this.ws.setMaxListeners(50);
|
|
133
|
-
this.buffer.push(header);
|
|
134
|
-
}
|
|
135
|
-
addRow(row) {
|
|
136
|
-
this.buffer.push(row);
|
|
137
|
-
this.rows++;
|
|
138
|
-
if (this.buffer.length >= FLUSH_EVERY) {
|
|
139
|
-
return this.flush();
|
|
140
|
-
}
|
|
141
|
-
return Promise.resolve();
|
|
142
|
-
}
|
|
143
|
-
flush() {
|
|
144
|
-
if (this.buffer.length === 0)
|
|
145
|
-
return Promise.resolve();
|
|
146
|
-
const chunk = this.buffer.join('\n') + '\n';
|
|
147
|
-
this.buffer.length = 0;
|
|
148
|
-
return new Promise((resolve, reject) => {
|
|
149
|
-
this.ws.once('error', reject);
|
|
150
|
-
const ok = this.ws.write(chunk);
|
|
151
|
-
if (ok) {
|
|
152
|
-
this.ws.removeListener('error', reject);
|
|
153
|
-
resolve();
|
|
154
|
-
}
|
|
155
|
-
else {
|
|
156
|
-
this.ws.once('drain', () => {
|
|
157
|
-
this.ws.removeListener('error', reject);
|
|
158
|
-
resolve();
|
|
159
|
-
});
|
|
160
|
-
}
|
|
161
|
-
});
|
|
162
|
-
}
|
|
163
|
-
async finish() {
|
|
164
|
-
await this.flush();
|
|
165
|
-
return new Promise((resolve, reject) => {
|
|
166
|
-
this.ws.end(() => resolve());
|
|
167
|
-
this.ws.on('error', reject);
|
|
168
|
-
});
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
/**
|
|
172
|
-
* Stream all CSV data directly to disk files
|
|
173
|
-
*
|
|
174
|
-
* Iterates graph nodes exactly once — routes each node to the right writer
|
|
175
|
-
*/
|
|
176
|
-
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
177
|
-
// Remove stale CSVs from previous runs, then recreate
|
|
178
|
-
try {
|
|
179
|
-
await fs.rm(csvDir, { recursive: true, force: true });
|
|
180
|
-
}
|
|
181
|
-
catch { }
|
|
182
|
-
await fs.mkdir(csvDir, { recursive: true });
|
|
183
|
-
// Raise listener limit for ~30 concurrent write-streams (restored at end)
|
|
184
|
-
const prevMax = process.getMaxListeners();
|
|
185
|
-
process.setMaxListeners(prevMax + 40);
|
|
186
|
-
const contentCache = new FileContentCache(repoPath);
|
|
187
|
-
// Create writers for every node type
|
|
188
|
-
const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
|
|
189
|
-
const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
|
|
190
|
-
const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description';
|
|
191
|
-
const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
|
|
192
|
-
const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
|
|
193
|
-
const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
|
|
194
|
-
const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType';
|
|
195
|
-
const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
|
|
196
|
-
const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
|
|
197
|
-
const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
|
|
198
|
-
const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
|
|
199
|
-
// Multi-language node types share the same CSV shape (no isExported column)
|
|
200
|
-
const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description';
|
|
201
|
-
const MULTI_LANG_TYPES = ['Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
|
|
202
|
-
'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module'];
|
|
203
|
-
const multiLangWriters = new Map();
|
|
204
|
-
for (const t of MULTI_LANG_TYPES) {
|
|
205
|
-
multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
|
|
206
|
-
}
|
|
207
|
-
const codeWriterMap = {
|
|
208
|
-
'Function': functionWriter,
|
|
209
|
-
'Class': classWriter,
|
|
210
|
-
'Interface': interfaceWriter,
|
|
211
|
-
'CodeElement': codeElemWriter,
|
|
212
|
-
};
|
|
213
|
-
const seenFileIds = new Set();
|
|
214
|
-
// Single pass over all nodes
|
|
215
|
-
for (const node of graph.iterNodes()) {
|
|
216
|
-
switch (node.label) {
|
|
217
|
-
case 'File': {
|
|
218
|
-
if (seenFileIds.has(node.id))
|
|
219
|
-
break;
|
|
220
|
-
seenFileIds.add(node.id);
|
|
221
|
-
const content = await extractContent(node, contentCache);
|
|
222
|
-
await fileWriter.addRow([
|
|
223
|
-
escapeCSVField(node.id),
|
|
224
|
-
escapeCSVField(node.properties.name || ''),
|
|
225
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
226
|
-
escapeCSVField(content),
|
|
227
|
-
].join(','));
|
|
228
|
-
break;
|
|
229
|
-
}
|
|
230
|
-
case 'Folder':
|
|
231
|
-
await folderWriter.addRow([
|
|
232
|
-
escapeCSVField(node.id),
|
|
233
|
-
escapeCSVField(node.properties.name || ''),
|
|
234
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
235
|
-
].join(','));
|
|
236
|
-
break;
|
|
237
|
-
case 'Community': {
|
|
238
|
-
const keywords = node.properties.keywords || [];
|
|
239
|
-
const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
|
|
240
|
-
await communityWriter.addRow([
|
|
241
|
-
escapeCSVField(node.id),
|
|
242
|
-
escapeCSVField(node.properties.name || ''),
|
|
243
|
-
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
244
|
-
keywordsStr,
|
|
245
|
-
escapeCSVField(node.properties.description || ''),
|
|
246
|
-
escapeCSVField(node.properties.enrichedBy || 'heuristic'),
|
|
247
|
-
escapeCSVNumber(node.properties.cohesion, 0),
|
|
248
|
-
escapeCSVNumber(node.properties.symbolCount, 0),
|
|
249
|
-
].join(','));
|
|
250
|
-
break;
|
|
251
|
-
}
|
|
252
|
-
case 'Process': {
|
|
253
|
-
const communities = node.properties.communities || [];
|
|
254
|
-
const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
|
|
255
|
-
await processWriter.addRow([
|
|
256
|
-
escapeCSVField(node.id),
|
|
257
|
-
escapeCSVField(node.properties.name || ''),
|
|
258
|
-
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
259
|
-
escapeCSVField(node.properties.processType || ''),
|
|
260
|
-
escapeCSVNumber(node.properties.stepCount, 0),
|
|
261
|
-
escapeCSVField(communitiesStr),
|
|
262
|
-
escapeCSVField(node.properties.entryPointId || ''),
|
|
263
|
-
escapeCSVField(node.properties.terminalId || ''),
|
|
264
|
-
].join(','));
|
|
265
|
-
break;
|
|
266
|
-
}
|
|
267
|
-
case 'Method': {
|
|
268
|
-
const content = await extractContent(node, contentCache);
|
|
269
|
-
await methodWriter.addRow([
|
|
270
|
-
escapeCSVField(node.id),
|
|
271
|
-
escapeCSVField(node.properties.name || ''),
|
|
272
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
273
|
-
escapeCSVNumber(node.properties.startLine, -1),
|
|
274
|
-
escapeCSVNumber(node.properties.endLine, -1),
|
|
275
|
-
node.properties.isExported ? 'true' : 'false',
|
|
276
|
-
escapeCSVField(content),
|
|
277
|
-
escapeCSVField(node.properties.description || ''),
|
|
278
|
-
escapeCSVNumber(node.properties.parameterCount, 0),
|
|
279
|
-
escapeCSVField(node.properties.returnType || ''),
|
|
280
|
-
].join(','));
|
|
281
|
-
break;
|
|
282
|
-
}
|
|
283
|
-
default: {
|
|
284
|
-
// Code element nodes
|
|
285
|
-
const writer = codeWriterMap[node.label];
|
|
286
|
-
if (writer) {
|
|
287
|
-
const content = await extractContent(node, contentCache);
|
|
288
|
-
await writer.addRow([
|
|
289
|
-
escapeCSVField(node.id),
|
|
290
|
-
escapeCSVField(node.properties.name || ''),
|
|
291
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
292
|
-
escapeCSVNumber(node.properties.startLine, -1),
|
|
293
|
-
escapeCSVNumber(node.properties.endLine, -1),
|
|
294
|
-
node.properties.isExported ? 'true' : 'false',
|
|
295
|
-
escapeCSVField(content),
|
|
296
|
-
escapeCSVField(node.properties.description || ''),
|
|
297
|
-
].join(','));
|
|
298
|
-
}
|
|
299
|
-
else {
|
|
300
|
-
// Multi-language node types (Struct, Impl, Trait, Macro, etc)
|
|
301
|
-
const mlWriter = multiLangWriters.get(node.label);
|
|
302
|
-
if (mlWriter) {
|
|
303
|
-
const content = await extractContent(node, contentCache);
|
|
304
|
-
await mlWriter.addRow([
|
|
305
|
-
escapeCSVField(node.id),
|
|
306
|
-
escapeCSVField(node.properties.name || ''),
|
|
307
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
308
|
-
escapeCSVNumber(node.properties.startLine, -1),
|
|
309
|
-
escapeCSVNumber(node.properties.endLine, -1),
|
|
310
|
-
escapeCSVField(content),
|
|
311
|
-
escapeCSVField(node.properties.description || ''),
|
|
312
|
-
].join(','));
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
break;
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
// Flush and close all node writers
|
|
320
|
-
const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, ...multiLangWriters.values()];
|
|
321
|
-
await Promise.all(allWriters.map(w => w.finish()));
|
|
322
|
-
// Stream relationship CSV
|
|
323
|
-
const relCsvPath = path.join(csvDir, 'relations.csv');
|
|
324
|
-
const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step,callLine');
|
|
325
|
-
for (const rel of graph.iterRelationships()) {
|
|
326
|
-
await relWriter.addRow([
|
|
327
|
-
escapeCSVField(rel.sourceId),
|
|
328
|
-
escapeCSVField(rel.targetId),
|
|
329
|
-
escapeCSVField(rel.type),
|
|
330
|
-
escapeCSVNumber(rel.confidence, 1.0),
|
|
331
|
-
escapeCSVField(rel.reason),
|
|
332
|
-
escapeCSVNumber(rel.step, 0),
|
|
333
|
-
escapeCSVNumber(rel.callLine, 0),
|
|
334
|
-
].join(','));
|
|
335
|
-
}
|
|
336
|
-
await relWriter.finish();
|
|
337
|
-
// Build result map (only tables with rows)
|
|
338
|
-
const nodeFiles = new Map();
|
|
339
|
-
const tableMap = [
|
|
340
|
-
['File', fileWriter], ['Folder', folderWriter],
|
|
341
|
-
['Function', functionWriter], ['Class', classWriter],
|
|
342
|
-
['Interface', interfaceWriter], ['Method', methodWriter],
|
|
343
|
-
['CodeElement', codeElemWriter],
|
|
344
|
-
['Community', communityWriter], ['Process', processWriter],
|
|
345
|
-
...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
|
|
346
|
-
];
|
|
347
|
-
for (const [name, writer] of tableMap) {
|
|
348
|
-
if (writer.rows > 0) {
|
|
349
|
-
nodeFiles.set(name, { csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`), rows: writer.rows });
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
// Restore listener limit
|
|
353
|
-
process.setMaxListeners(prevMax);
|
|
354
|
-
return { nodeFiles, relCsvPath, relRows: relWriter.rows };
|
|
355
|
-
};
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file lbug-adapter.ts
|
|
3
|
-
* @description Core LadybugDB adapter — manages singleton DB connection, schema
|
|
4
|
-
* creation, bulk CSV import, FTS indexing, and Cypher query execution
|
|
5
|
-
*/
|
|
6
|
-
import lbug from '@ladybugdb/core';
|
|
7
|
-
import { KnowledgeGraph } from '../graph/types.js';
|
|
8
|
-
export declare const initLbug: (dbPath: string) => Promise<{
|
|
9
|
-
db: lbug.Database;
|
|
10
|
-
conn: lbug.Connection;
|
|
11
|
-
}>;
|
|
12
|
-
/** Execute multiple queries against one repo DB atomically (holds session lock) */
|
|
13
|
-
export declare const withLbugDb: <T>(dbPath: string, operation: () => Promise<T>) => Promise<T>;
|
|
14
|
-
export type LbugProgressCallback = (message: string) => void;
|
|
15
|
-
export declare const loadGraphToLbug: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback) => Promise<{
|
|
16
|
-
success: boolean;
|
|
17
|
-
insertedRels: number;
|
|
18
|
-
skippedRels: number;
|
|
19
|
-
warnings: string[];
|
|
20
|
-
}>;
|
|
21
|
-
/**
|
|
22
|
-
* Insert a single node to LadybugDB
|
|
23
|
-
* @param label - Node type (File, Function, Class, etc)
|
|
24
|
-
* @param properties - Node properties
|
|
25
|
-
* @param dbPath - Path to LadybugDB database (optional if already initialized)
|
|
26
|
-
*/
|
|
27
|
-
export declare const insertNodeToLbug: (label: string, properties: Record<string, any>, dbPath?: string) => Promise<boolean>;
|
|
28
|
-
/**
|
|
29
|
-
* Batch insert multiple nodes using a single connection
|
|
30
|
-
* @param nodes - Array of {label, properties} to insert
|
|
31
|
-
* @param dbPath - Path to LadybugDB database
|
|
32
|
-
*/
|
|
33
|
-
export declare const batchInsertNodesToLbug: (nodes: Array<{
|
|
34
|
-
label: string;
|
|
35
|
-
properties: Record<string, any>;
|
|
36
|
-
}>, dbPath: string) => Promise<{
|
|
37
|
-
inserted: number;
|
|
38
|
-
failed: number;
|
|
39
|
-
}>;
|
|
40
|
-
export declare const executeQuery: (cypher: string) => Promise<any[]>;
|
|
41
|
-
export declare const executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>;
|
|
42
|
-
export declare const getLbugStats: () => Promise<{
|
|
43
|
-
nodes: number;
|
|
44
|
-
edges: number;
|
|
45
|
-
}>;
|
|
46
|
-
/**
|
|
47
|
-
* Load cached embeddings before a rebuild
|
|
48
|
-
*
|
|
49
|
-
* Returns all vectors so they can be re-inserted after the graph reloads,
|
|
50
|
-
* avoiding expensive re-embedding of unchanged nodes
|
|
51
|
-
*/
|
|
52
|
-
export declare const loadCachedEmbeddings: () => Promise<{
|
|
53
|
-
embeddingNodeIds: Set<string>;
|
|
54
|
-
embeddings: Array<{
|
|
55
|
-
nodeId: string;
|
|
56
|
-
embedding: number[];
|
|
57
|
-
}>;
|
|
58
|
-
}>;
|
|
59
|
-
export declare const closeLbug: () => Promise<void>;
|
|
60
|
-
export declare const isLbugReady: () => boolean;
|
|
61
|
-
/**
|
|
62
|
-
* Delete all nodes (and relationships) for a specific file
|
|
63
|
-
* @param filePath - File path to delete nodes for
|
|
64
|
-
* @param dbPath - Optional path for per-query connection
|
|
65
|
-
*/
|
|
66
|
-
export declare const deleteNodesForFile: (filePath: string, dbPath?: string) => Promise<{
|
|
67
|
-
deletedNodes: number;
|
|
68
|
-
}>;
|
|
69
|
-
export declare const getEmbeddingTableName: () => string;
|
|
70
|
-
/** Load the FTS extension (idempotent — tracks loaded state) */
|
|
71
|
-
export declare const loadFTSExtension: () => Promise<void>;
|
|
72
|
-
/**
|
|
73
|
-
* Create a full-text search index on a table
|
|
74
|
-
* @param tableName - Node table name (e.g. 'File', 'Function')
|
|
75
|
-
* @param indexName - FTS index name
|
|
76
|
-
* @param properties - Properties to index (e.g. ['name', 'content'])
|
|
77
|
-
* @param stemmer - Stemming algorithm (default: 'porter')
|
|
78
|
-
*/
|
|
79
|
-
export declare const createFTSIndex: (tableName: string, indexName: string, properties: string[], stemmer?: string) => Promise<void>;
|
|
80
|
-
/**
|
|
81
|
-
* Query a full-text search index
|
|
82
|
-
* @param tableName - Node table name
|
|
83
|
-
* @param indexName - FTS index name
|
|
84
|
-
* @param query - Search query string
|
|
85
|
-
* @param limit - Max results
|
|
86
|
-
* @param conjunctive - If true, all terms must match (AND); false = any term (OR)
|
|
87
|
-
*/
|
|
88
|
-
export declare const queryFTS: (tableName: string, indexName: string, query: string, limit?: number, conjunctive?: boolean) => Promise<Array<{
|
|
89
|
-
nodeId: string;
|
|
90
|
-
name: string;
|
|
91
|
-
filePath: string;
|
|
92
|
-
score: number;
|
|
93
|
-
[key: string]: any;
|
|
94
|
-
}>>;
|
|
95
|
-
/** Drop an FTS index */
|
|
96
|
-
export declare const dropFTSIndex: (tableName: string, indexName: string) => Promise<void>;
|