@codragraph/cli 1.6.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +67 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/index.d.ts +2 -2
- package/dist/_shared/index.js +1 -1
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +109 -6
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +45 -23
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/cli/wiki.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1320 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +492 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +62 -0
- package/dist/core/cgdb/schema.js +502 -0
- package/dist/core/embeddings/embedding-pipeline.js +27 -10
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +1 -1
- package/dist/core/graphstore/index.js +3 -3
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +123 -36
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/sync.js +4 -4
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +114 -27
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +75 -23
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +2 -2
- package/dist/mcp/local/local-backend.js +36 -19
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +1 -1
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +34 -33
- package/dist/storage/repo-manager.d.ts +42 -3
- package/dist/storage/repo-manager.js +23 -4
- package/hooks/claude/codragraph-hook.cjs +98 -5
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/build.js +8 -9
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in cgdb node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
23
|
+
*
|
|
24
|
+
* Returns the input content unchanged when:
|
|
25
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
26
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
27
|
+
* `--compress`);
|
|
28
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
29
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
30
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
31
|
+
*
|
|
32
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
33
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
34
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
35
|
+
*/
|
|
36
|
+
export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
|
|
37
|
+
/**
|
|
38
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
39
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
40
|
+
*
|
|
41
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
42
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
43
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
44
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
45
|
+
*/
|
|
46
|
+
export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in cgdb node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
import { decodeContent } from '@codragraph/graphstore';
|
|
22
|
+
/**
|
|
23
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
24
|
+
*
|
|
25
|
+
* Returns the input content unchanged when:
|
|
26
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
27
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
28
|
+
* `--compress`);
|
|
29
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
30
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
31
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
32
|
+
*
|
|
33
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
34
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
35
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
36
|
+
*/
|
|
37
|
+
export function decodeContentField(content, encoding) {
|
|
38
|
+
if (content === undefined || content === null)
|
|
39
|
+
return undefined;
|
|
40
|
+
if (typeof content !== 'string')
|
|
41
|
+
return content;
|
|
42
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
|
|
43
|
+
return content;
|
|
44
|
+
}
|
|
45
|
+
return decodeContent(content, encoding);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
49
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
50
|
+
*
|
|
51
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
52
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
53
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
54
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
55
|
+
*/
|
|
56
|
+
export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
|
|
57
|
+
const content = row[contentKey];
|
|
58
|
+
if (content === undefined || content === null)
|
|
59
|
+
return row;
|
|
60
|
+
const encoding = row[encodingKey];
|
|
61
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
|
|
62
|
+
return row;
|
|
63
|
+
return { ...row, [contentKey]: decodeContentField(content, encoding) };
|
|
64
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Generator for LadybugDB Hybrid Schema
|
|
3
|
+
*
|
|
4
|
+
* Streams CSV rows directly to disk files in a single pass over graph nodes.
|
|
5
|
+
* File contents are lazy-read from disk per-node to avoid holding the entire
|
|
6
|
+
* repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
|
|
7
|
+
* per-row Promise overhead.
|
|
8
|
+
*
|
|
9
|
+
* RFC 4180 Compliant:
|
|
10
|
+
* - Fields containing commas, double quotes, or newlines are enclosed in double quotes
|
|
11
|
+
* - Double quotes within fields are escaped by doubling them ("")
|
|
12
|
+
* - All fields are consistently quoted for safety with code content
|
|
13
|
+
*/
|
|
14
|
+
import { KnowledgeGraph } from '../graph/types.js';
|
|
15
|
+
import { NodeTableName } from './schema.js';
|
|
16
|
+
import { type ContentEncoding } from '@codragraph/graphstore';
|
|
17
|
+
export declare const sanitizeUTF8: (str: string) => string;
|
|
18
|
+
export declare const escapeCSVField: (value: string | number | undefined | null) => string;
|
|
19
|
+
export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
|
|
20
|
+
export declare const isBinaryContent: (content: string) => boolean;
|
|
21
|
+
export interface StreamedCSVResult {
|
|
22
|
+
nodeFiles: Map<NodeTableName, {
|
|
23
|
+
csvPath: string;
|
|
24
|
+
rows: number;
|
|
25
|
+
}>;
|
|
26
|
+
relCsvPath: string;
|
|
27
|
+
relRows: number;
|
|
28
|
+
}
|
|
29
|
+
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;
|
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Generator for LadybugDB Hybrid Schema
|
|
3
|
+
*
|
|
4
|
+
* Streams CSV rows directly to disk files in a single pass over graph nodes.
|
|
5
|
+
* File contents are lazy-read from disk per-node to avoid holding the entire
|
|
6
|
+
* repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
|
|
7
|
+
* per-row Promise overhead.
|
|
8
|
+
*
|
|
9
|
+
* RFC 4180 Compliant:
|
|
10
|
+
* - Fields containing commas, double quotes, or newlines are enclosed in double quotes
|
|
11
|
+
* - Double quotes within fields are escaped by doubling them ("")
|
|
12
|
+
* - All fields are consistently quoted for safety with code content
|
|
13
|
+
*/
|
|
14
|
+
import fs from 'fs/promises';
|
|
15
|
+
import { createWriteStream } from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import { encodeContent } from '@codragraph/graphstore';
|
|
18
|
+
/** Flush buffered rows to disk every N rows */
|
|
19
|
+
const FLUSH_EVERY = 500;
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// CSV ESCAPE UTILITIES
|
|
22
|
+
// ============================================================================
|
|
23
|
+
export const sanitizeUTF8 = (str) => {
|
|
24
|
+
return str
|
|
25
|
+
.replace(/\r\n/g, '\n')
|
|
26
|
+
.replace(/\r/g, '\n')
|
|
27
|
+
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
|
|
28
|
+
.replace(/[\uD800-\uDFFF]/g, '')
|
|
29
|
+
.replace(/[\uFFFE\uFFFF]/g, '');
|
|
30
|
+
};
|
|
31
|
+
export const escapeCSVField = (value) => {
|
|
32
|
+
if (value === undefined || value === null)
|
|
33
|
+
return '""';
|
|
34
|
+
let str = String(value);
|
|
35
|
+
str = sanitizeUTF8(str);
|
|
36
|
+
return `"${str.replace(/"/g, '""')}"`;
|
|
37
|
+
};
|
|
38
|
+
export const escapeCSVNumber = (value, defaultValue = -1) => {
|
|
39
|
+
if (value === undefined || value === null)
|
|
40
|
+
return String(defaultValue);
|
|
41
|
+
return String(value);
|
|
42
|
+
};
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// CONTENT EXTRACTION (lazy — reads from disk on demand)
|
|
45
|
+
// ============================================================================
|
|
46
|
+
export const isBinaryContent = (content) => {
|
|
47
|
+
if (!content || content.length === 0)
|
|
48
|
+
return false;
|
|
49
|
+
const sample = content.slice(0, 1000);
|
|
50
|
+
let nonPrintable = 0;
|
|
51
|
+
for (let i = 0; i < sample.length; i++) {
|
|
52
|
+
const code = sample.charCodeAt(i);
|
|
53
|
+
if (code < 9 || (code > 13 && code < 32) || code === 127)
|
|
54
|
+
nonPrintable++;
|
|
55
|
+
}
|
|
56
|
+
return nonPrintable / sample.length > 0.1;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* LRU content cache — avoids re-reading the same source file for every
|
|
60
|
+
* symbol defined in it. Sized generously so most files stay cached during
|
|
61
|
+
* the single-pass node iteration.
|
|
62
|
+
*/
|
|
63
|
+
class FileContentCache {
|
|
64
|
+
cache = new Map();
|
|
65
|
+
accessOrder = [];
|
|
66
|
+
maxSize;
|
|
67
|
+
repoPath;
|
|
68
|
+
constructor(repoPath, maxSize = 3000) {
|
|
69
|
+
this.repoPath = repoPath;
|
|
70
|
+
this.maxSize = maxSize;
|
|
71
|
+
}
|
|
72
|
+
async get(relativePath) {
|
|
73
|
+
if (!relativePath)
|
|
74
|
+
return '';
|
|
75
|
+
const cached = this.cache.get(relativePath);
|
|
76
|
+
if (cached !== undefined) {
|
|
77
|
+
// Move to end of accessOrder (LRU promotion)
|
|
78
|
+
const idx = this.accessOrder.indexOf(relativePath);
|
|
79
|
+
if (idx !== -1) {
|
|
80
|
+
this.accessOrder.splice(idx, 1);
|
|
81
|
+
this.accessOrder.push(relativePath);
|
|
82
|
+
}
|
|
83
|
+
return cached;
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
const fullPath = path.join(this.repoPath, relativePath);
|
|
87
|
+
const content = await fs.readFile(fullPath, 'utf-8');
|
|
88
|
+
this.set(relativePath, content);
|
|
89
|
+
return content;
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
this.set(relativePath, '');
|
|
93
|
+
return '';
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
set(key, value) {
|
|
97
|
+
if (this.cache.size >= this.maxSize) {
|
|
98
|
+
const oldest = this.accessOrder.shift();
|
|
99
|
+
if (oldest)
|
|
100
|
+
this.cache.delete(oldest);
|
|
101
|
+
}
|
|
102
|
+
this.cache.set(key, value);
|
|
103
|
+
this.accessOrder.push(key);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const extractContent = async (node, contentCache) => {
|
|
107
|
+
const filePath = node.properties.filePath;
|
|
108
|
+
const content = await contentCache.get(filePath);
|
|
109
|
+
if (!content)
|
|
110
|
+
return '';
|
|
111
|
+
if (node.label === 'Folder')
|
|
112
|
+
return '';
|
|
113
|
+
if (isBinaryContent(content))
|
|
114
|
+
return '[Binary file - content not stored]';
|
|
115
|
+
if (node.label === 'File') {
|
|
116
|
+
const MAX_FILE_CONTENT = 10000;
|
|
117
|
+
return content.length > MAX_FILE_CONTENT
|
|
118
|
+
? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
|
|
119
|
+
: content;
|
|
120
|
+
}
|
|
121
|
+
const startLine = node.properties.startLine;
|
|
122
|
+
const endLine = node.properties.endLine;
|
|
123
|
+
if (startLine === undefined || endLine === undefined)
|
|
124
|
+
return '';
|
|
125
|
+
const lines = content.split('\n');
|
|
126
|
+
const start = Math.max(0, startLine - 2);
|
|
127
|
+
const end = Math.min(lines.length - 1, endLine + 2);
|
|
128
|
+
const snippet = lines.slice(start, end + 1).join('\n');
|
|
129
|
+
const MAX_SNIPPET = 5000;
|
|
130
|
+
return snippet.length > MAX_SNIPPET
|
|
131
|
+
? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
|
|
132
|
+
: snippet;
|
|
133
|
+
};
|
|
134
|
+
// ============================================================================
|
|
135
|
+
// BUFFERED CSV WRITER
|
|
136
|
+
// ============================================================================
|
|
137
|
+
class BufferedCSVWriter {
|
|
138
|
+
ws;
|
|
139
|
+
buffer = [];
|
|
140
|
+
rows = 0;
|
|
141
|
+
constructor(filePath, header) {
|
|
142
|
+
this.ws = createWriteStream(filePath, 'utf-8');
|
|
143
|
+
// Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning
|
|
144
|
+
this.ws.setMaxListeners(50);
|
|
145
|
+
this.buffer.push(header);
|
|
146
|
+
}
|
|
147
|
+
addRow(row) {
|
|
148
|
+
this.buffer.push(row);
|
|
149
|
+
this.rows++;
|
|
150
|
+
if (this.buffer.length >= FLUSH_EVERY) {
|
|
151
|
+
return this.flush();
|
|
152
|
+
}
|
|
153
|
+
return Promise.resolve();
|
|
154
|
+
}
|
|
155
|
+
flush() {
|
|
156
|
+
if (this.buffer.length === 0)
|
|
157
|
+
return Promise.resolve();
|
|
158
|
+
const chunk = this.buffer.join('\n') + '\n';
|
|
159
|
+
this.buffer.length = 0;
|
|
160
|
+
return new Promise((resolve, reject) => {
|
|
161
|
+
this.ws.once('error', reject);
|
|
162
|
+
const ok = this.ws.write(chunk);
|
|
163
|
+
if (ok) {
|
|
164
|
+
this.ws.removeListener('error', reject);
|
|
165
|
+
resolve();
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
this.ws.once('drain', () => {
|
|
169
|
+
this.ws.removeListener('error', reject);
|
|
170
|
+
resolve();
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
async finish() {
|
|
176
|
+
await this.flush();
|
|
177
|
+
return new Promise((resolve, reject) => {
|
|
178
|
+
this.ws.end(() => resolve());
|
|
179
|
+
this.ws.on('error', reject);
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Stream all CSV data directly to disk files.
|
|
185
|
+
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
186
|
+
* File contents are lazy-read from disk with a generous LRU cache.
|
|
187
|
+
*/
|
|
188
|
+
/**
|
|
189
|
+
* Apply RFC 0001 Phase 2 content encoding. Returns the on-the-wire string
|
|
190
|
+
* + the encoding tag to write into the per-row `contentEncoding` column.
|
|
191
|
+
*
|
|
192
|
+
* `compress: undefined | 'none'` is the default — content goes through
|
|
193
|
+
* unchanged and the tag is `'none'` (matches the schema DEFAULT, so older
|
|
194
|
+
* readers and the schema-default behavior stay in agreement).
|
|
195
|
+
*
|
|
196
|
+
* Always writing the tag column (even as 'none') keeps the CSV / COPY /
|
|
197
|
+
* schema layouts uniform regardless of compression mode. The wasted bytes
|
|
198
|
+
* are negligible — a few characters per row vs the kilobytes of content
|
|
199
|
+
* the column is alongside.
|
|
200
|
+
*/
|
|
201
|
+
const applyEncoding = (content, compress) => {
|
|
202
|
+
if (!compress || compress === 'none') {
|
|
203
|
+
return { wireContent: content, tag: 'none' };
|
|
204
|
+
}
|
|
205
|
+
return { wireContent: encodeContent(content, compress), tag: compress };
|
|
206
|
+
};
|
|
207
|
+
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, compress) => {
|
|
208
|
+
// Remove stale CSVs from previous crashed runs, then recreate
|
|
209
|
+
try {
|
|
210
|
+
await fs.rm(csvDir, { recursive: true, force: true });
|
|
211
|
+
}
|
|
212
|
+
catch { }
|
|
213
|
+
await fs.mkdir(csvDir, { recursive: true });
|
|
214
|
+
// We open ~30 concurrent write-streams; raise process limit to suppress
|
|
215
|
+
// MaxListenersExceededWarning (restored after all streams finish).
|
|
216
|
+
const prevMax = process.getMaxListeners();
|
|
217
|
+
process.setMaxListeners(prevMax + 40);
|
|
218
|
+
const contentCache = new FileContentCache(repoPath);
|
|
219
|
+
// Create writers for every node type up-front. Content-bearing tables
|
|
220
|
+
// carry an extra `contentEncoding` column right after `content` to
|
|
221
|
+
// match the Phase 2 schema layout. Tables without `content` (Folder,
|
|
222
|
+
// Community, Process, Route, Tool) are unchanged.
|
|
223
|
+
const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content,contentEncoding');
|
|
224
|
+
const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
|
|
225
|
+
const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description';
|
|
226
|
+
const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
|
|
227
|
+
const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
|
|
228
|
+
const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
|
|
229
|
+
const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description,parameterCount,returnType';
|
|
230
|
+
const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
|
|
231
|
+
const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
|
|
232
|
+
const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
|
|
233
|
+
const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
|
|
234
|
+
// Section nodes have an extra 'level' column
|
|
235
|
+
const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,contentEncoding,description');
|
|
236
|
+
// Route nodes for API endpoint mapping
|
|
237
|
+
const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
|
|
238
|
+
// Tool nodes for MCP tool definitions
|
|
239
|
+
const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
|
|
240
|
+
// Multi-language node types share the same CSV shape (no isExported column)
|
|
241
|
+
const multiLangHeader = 'id,name,filePath,startLine,endLine,content,contentEncoding,description';
|
|
242
|
+
const MULTI_LANG_TYPES = [
|
|
243
|
+
'Struct',
|
|
244
|
+
'Enum',
|
|
245
|
+
'Macro',
|
|
246
|
+
'Typedef',
|
|
247
|
+
'Union',
|
|
248
|
+
'Namespace',
|
|
249
|
+
'Trait',
|
|
250
|
+
'Impl',
|
|
251
|
+
'TypeAlias',
|
|
252
|
+
'Const',
|
|
253
|
+
'Static',
|
|
254
|
+
'Variable',
|
|
255
|
+
'Property',
|
|
256
|
+
'Record',
|
|
257
|
+
'Delegate',
|
|
258
|
+
'Annotation',
|
|
259
|
+
'Constructor',
|
|
260
|
+
'Template',
|
|
261
|
+
'Module',
|
|
262
|
+
];
|
|
263
|
+
const multiLangWriters = new Map();
|
|
264
|
+
for (const t of MULTI_LANG_TYPES) {
|
|
265
|
+
multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
|
|
266
|
+
}
|
|
267
|
+
const codeWriterMap = {
|
|
268
|
+
Function: functionWriter,
|
|
269
|
+
Class: classWriter,
|
|
270
|
+
Interface: interfaceWriter,
|
|
271
|
+
CodeElement: codeElemWriter,
|
|
272
|
+
};
|
|
273
|
+
// Deduplicate all node types — the pipeline can produce duplicate IDs across
|
|
274
|
+
// all symbol types (Class, Method, Function, etc.), not just File nodes.
|
|
275
|
+
// A single Set covering every label prevents PK violations on COPY.
|
|
276
|
+
const seenNodeIds = new Set();
|
|
277
|
+
// --- SINGLE PASS over all nodes ---
|
|
278
|
+
for (const node of graph.iterNodes()) {
|
|
279
|
+
if (seenNodeIds.has(node.id))
|
|
280
|
+
continue;
|
|
281
|
+
seenNodeIds.add(node.id);
|
|
282
|
+
switch (node.label) {
|
|
283
|
+
case 'File': {
|
|
284
|
+
const content = await extractContent(node, contentCache);
|
|
285
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
286
|
+
await fileWriter.addRow([
|
|
287
|
+
escapeCSVField(node.id),
|
|
288
|
+
escapeCSVField(node.properties.name || ''),
|
|
289
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
290
|
+
escapeCSVField(wireContent),
|
|
291
|
+
escapeCSVField(tag),
|
|
292
|
+
].join(','));
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
case 'Folder':
|
|
296
|
+
await folderWriter.addRow([
|
|
297
|
+
escapeCSVField(node.id),
|
|
298
|
+
escapeCSVField(node.properties.name || ''),
|
|
299
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
300
|
+
].join(','));
|
|
301
|
+
break;
|
|
302
|
+
case 'Community': {
|
|
303
|
+
const keywords = node.properties.keywords || [];
|
|
304
|
+
const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
|
|
305
|
+
await communityWriter.addRow([
|
|
306
|
+
escapeCSVField(node.id),
|
|
307
|
+
escapeCSVField(node.properties.name || ''),
|
|
308
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
309
|
+
keywordsStr,
|
|
310
|
+
escapeCSVField(node.properties.description || ''),
|
|
311
|
+
escapeCSVField(node.properties.enrichedBy || 'heuristic'),
|
|
312
|
+
escapeCSVNumber(node.properties.cohesion, 0),
|
|
313
|
+
escapeCSVNumber(node.properties.symbolCount, 0),
|
|
314
|
+
].join(','));
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
case 'Process': {
|
|
318
|
+
const communities = node.properties.communities || [];
|
|
319
|
+
const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
|
|
320
|
+
await processWriter.addRow([
|
|
321
|
+
escapeCSVField(node.id),
|
|
322
|
+
escapeCSVField(node.properties.name || ''),
|
|
323
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
324
|
+
escapeCSVField(node.properties.processType || ''),
|
|
325
|
+
escapeCSVNumber(node.properties.stepCount, 0),
|
|
326
|
+
escapeCSVField(communitiesStr),
|
|
327
|
+
escapeCSVField(node.properties.entryPointId || ''),
|
|
328
|
+
escapeCSVField(node.properties.terminalId || ''),
|
|
329
|
+
].join(','));
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
332
|
+
case 'Method': {
|
|
333
|
+
const content = await extractContent(node, contentCache);
|
|
334
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
335
|
+
await methodWriter.addRow([
|
|
336
|
+
escapeCSVField(node.id),
|
|
337
|
+
escapeCSVField(node.properties.name || ''),
|
|
338
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
339
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
340
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
341
|
+
node.properties.isExported ? 'true' : 'false',
|
|
342
|
+
escapeCSVField(wireContent),
|
|
343
|
+
escapeCSVField(tag),
|
|
344
|
+
escapeCSVField(node.properties.description || ''),
|
|
345
|
+
escapeCSVNumber(node.properties.parameterCount, 0),
|
|
346
|
+
escapeCSVField(node.properties.returnType || ''),
|
|
347
|
+
].join(','));
|
|
348
|
+
break;
|
|
349
|
+
}
|
|
350
|
+
case 'Section': {
|
|
351
|
+
const content = await extractContent(node, contentCache);
|
|
352
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
353
|
+
await sectionWriter.addRow([
|
|
354
|
+
escapeCSVField(node.id),
|
|
355
|
+
escapeCSVField(node.properties.name || ''),
|
|
356
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
357
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
358
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
359
|
+
escapeCSVNumber(node.properties.level, 1),
|
|
360
|
+
escapeCSVField(wireContent),
|
|
361
|
+
escapeCSVField(tag),
|
|
362
|
+
escapeCSVField(node.properties.description || ''),
|
|
363
|
+
].join(','));
|
|
364
|
+
break;
|
|
365
|
+
}
|
|
366
|
+
case 'Route': {
|
|
367
|
+
const responseKeys = node.properties.responseKeys || [];
|
|
368
|
+
// LadybugDB array literal inside a quoted CSV field: escapeCSVField wraps in "..."
|
|
369
|
+
// and the array uses single-quoted elements
|
|
370
|
+
const keysStr = `[${responseKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
|
|
371
|
+
const errorKeys = node.properties.errorKeys || [];
|
|
372
|
+
const errorKeysStr = `[${errorKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
|
|
373
|
+
const middleware = node.properties.middleware || [];
|
|
374
|
+
const middlewareStr = `[${middleware.map((m) => `'${m.replace(/'/g, "''")}'`).join(',')}]`;
|
|
375
|
+
await routeWriter.addRow([
|
|
376
|
+
escapeCSVField(node.id),
|
|
377
|
+
escapeCSVField(node.properties.name || ''),
|
|
378
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
379
|
+
escapeCSVField(keysStr),
|
|
380
|
+
escapeCSVField(errorKeysStr),
|
|
381
|
+
escapeCSVField(middlewareStr),
|
|
382
|
+
].join(','));
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
case 'Tool':
|
|
386
|
+
await toolWriter.addRow([
|
|
387
|
+
escapeCSVField(node.id),
|
|
388
|
+
escapeCSVField(node.properties.name || ''),
|
|
389
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
390
|
+
escapeCSVField(node.properties.description || ''),
|
|
391
|
+
].join(','));
|
|
392
|
+
break;
|
|
393
|
+
default: {
|
|
394
|
+
// Code element nodes (Function, Class, Interface, CodeElement)
|
|
395
|
+
const writer = codeWriterMap[node.label];
|
|
396
|
+
if (writer) {
|
|
397
|
+
const content = await extractContent(node, contentCache);
|
|
398
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
399
|
+
await writer.addRow([
|
|
400
|
+
escapeCSVField(node.id),
|
|
401
|
+
escapeCSVField(node.properties.name || ''),
|
|
402
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
403
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
404
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
405
|
+
node.properties.isExported ? 'true' : 'false',
|
|
406
|
+
escapeCSVField(wireContent),
|
|
407
|
+
escapeCSVField(tag),
|
|
408
|
+
escapeCSVField(node.properties.description || ''),
|
|
409
|
+
].join(','));
|
|
410
|
+
}
|
|
411
|
+
else {
|
|
412
|
+
// Multi-language node types (Struct, Impl, Trait, Macro, etc.)
|
|
413
|
+
const mlWriter = multiLangWriters.get(node.label);
|
|
414
|
+
if (mlWriter) {
|
|
415
|
+
const content = await extractContent(node, contentCache);
|
|
416
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
417
|
+
await mlWriter.addRow([
|
|
418
|
+
escapeCSVField(node.id),
|
|
419
|
+
escapeCSVField(node.properties.name || ''),
|
|
420
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
421
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
422
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
423
|
+
escapeCSVField(wireContent),
|
|
424
|
+
escapeCSVField(tag),
|
|
425
|
+
escapeCSVField(node.properties.description || ''),
|
|
426
|
+
].join(','));
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
break;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
// Finish all node writers
|
|
434
|
+
const allWriters = [
|
|
435
|
+
fileWriter,
|
|
436
|
+
folderWriter,
|
|
437
|
+
functionWriter,
|
|
438
|
+
classWriter,
|
|
439
|
+
interfaceWriter,
|
|
440
|
+
methodWriter,
|
|
441
|
+
codeElemWriter,
|
|
442
|
+
communityWriter,
|
|
443
|
+
processWriter,
|
|
444
|
+
sectionWriter,
|
|
445
|
+
routeWriter,
|
|
446
|
+
toolWriter,
|
|
447
|
+
...multiLangWriters.values(),
|
|
448
|
+
];
|
|
449
|
+
await Promise.all(allWriters.map((w) => w.finish()));
|
|
450
|
+
// --- Stream relationship CSV ---
|
|
451
|
+
const relCsvPath = path.join(csvDir, 'relations.csv');
|
|
452
|
+
const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
|
|
453
|
+
for (const rel of graph.iterRelationships()) {
|
|
454
|
+
await relWriter.addRow([
|
|
455
|
+
escapeCSVField(rel.sourceId),
|
|
456
|
+
escapeCSVField(rel.targetId),
|
|
457
|
+
escapeCSVField(rel.type),
|
|
458
|
+
escapeCSVNumber(rel.confidence, 1.0),
|
|
459
|
+
escapeCSVField(rel.reason),
|
|
460
|
+
escapeCSVNumber(rel.step, 0),
|
|
461
|
+
].join(','));
|
|
462
|
+
}
|
|
463
|
+
await relWriter.finish();
|
|
464
|
+
// Build result map — only include tables that have rows
|
|
465
|
+
const nodeFiles = new Map();
|
|
466
|
+
const tableMap = [
|
|
467
|
+
['File', fileWriter],
|
|
468
|
+
['Folder', folderWriter],
|
|
469
|
+
['Function', functionWriter],
|
|
470
|
+
['Class', classWriter],
|
|
471
|
+
['Interface', interfaceWriter],
|
|
472
|
+
['Method', methodWriter],
|
|
473
|
+
['CodeElement', codeElemWriter],
|
|
474
|
+
['Community', communityWriter],
|
|
475
|
+
['Process', processWriter],
|
|
476
|
+
['Section', sectionWriter],
|
|
477
|
+
['Route', routeWriter],
|
|
478
|
+
['Tool', toolWriter],
|
|
479
|
+
...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
|
|
480
|
+
];
|
|
481
|
+
for (const [name, writer] of tableMap) {
|
|
482
|
+
if (writer.rows > 0) {
|
|
483
|
+
nodeFiles.set(name, {
|
|
484
|
+
csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`),
|
|
485
|
+
rows: writer.rows,
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
// Restore original process listener limit
|
|
490
|
+
process.setMaxListeners(prevMax);
|
|
491
|
+
return { nodeFiles, relCsvPath, relRows: relWriter.rows };
|
|
492
|
+
};
|