graphifyy 0.3.17 → 0.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja-JP.md +60 -17
- package/README.md +41 -13
- package/README.zh-CN.md +54 -17
- package/dist/cli.js +862 -369
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1070 -598
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -6
- package/dist/index.d.ts +36 -6
- package/dist/index.js +1092 -614
- package/dist/index.js.map +1 -1
- package/dist/skill-runtime.js +1182 -669
- package/dist/skill-runtime.js.map +1 -1
- package/package.json +14 -4
- package/src/skills/skill-claw.md +1 -0
- package/src/skills/skill-codex.md +69 -11
- package/src/skills/skill-droid.md +73 -6
- package/src/skills/skill-gemini.toml +207 -0
- package/src/skills/skill-opencode.md +73 -6
- package/src/skills/skill-trae.md +1 -0
- package/src/skills/skill-windows.md +76 -5
- package/src/skills/skill.md +82 -8
package/dist/index.js
CHANGED
|
@@ -14,6 +14,21 @@ var __export = (target, all) => {
|
|
|
14
14
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
15
|
};
|
|
16
16
|
|
|
17
|
+
// src/types.ts
|
|
18
|
+
var FileType;
|
|
19
|
+
var init_types = __esm({
|
|
20
|
+
"src/types.ts"() {
|
|
21
|
+
FileType = /* @__PURE__ */ ((FileType2) => {
|
|
22
|
+
FileType2["CODE"] = "code";
|
|
23
|
+
FileType2["DOCUMENT"] = "document";
|
|
24
|
+
FileType2["PAPER"] = "paper";
|
|
25
|
+
FileType2["IMAGE"] = "image";
|
|
26
|
+
FileType2["VIDEO"] = "video";
|
|
27
|
+
return FileType2;
|
|
28
|
+
})(FileType || {});
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
17
32
|
// src/validate.ts
|
|
18
33
|
function validateExtraction(data) {
|
|
19
34
|
if (typeof data !== "object" || data === null || Array.isArray(data)) {
|
|
@@ -103,14 +118,80 @@ var init_validate = __esm({
|
|
|
103
118
|
}
|
|
104
119
|
});
|
|
105
120
|
|
|
121
|
+
// src/graph.ts
|
|
122
|
+
import Graph from "graphology";
|
|
123
|
+
function createGraph(directed = false) {
|
|
124
|
+
return new Graph({ type: directed ? "directed" : "undirected", multi: false });
|
|
125
|
+
}
|
|
126
|
+
function isDirectedGraph(G) {
|
|
127
|
+
return G.type === "directed";
|
|
128
|
+
}
|
|
129
|
+
function loadGraphFromData(raw) {
|
|
130
|
+
const G = createGraph(raw.directed === true);
|
|
131
|
+
for (const [key, value] of Object.entries(raw.graph ?? {})) {
|
|
132
|
+
G.setAttribute(key, value);
|
|
133
|
+
}
|
|
134
|
+
for (const node of raw.nodes ?? []) {
|
|
135
|
+
const { id, ...attrs } = node;
|
|
136
|
+
G.mergeNode(id, attrs);
|
|
137
|
+
}
|
|
138
|
+
for (const link of raw.links ?? raw.edges ?? []) {
|
|
139
|
+
const { source, target, ...attrs } = link;
|
|
140
|
+
if (!G.hasNode(source) || !G.hasNode(target)) continue;
|
|
141
|
+
try {
|
|
142
|
+
G.mergeEdge(source, target, attrs);
|
|
143
|
+
} catch {
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
if (raw.hyperedges && raw.hyperedges.length > 0) {
|
|
147
|
+
G.setAttribute("hyperedges", raw.hyperedges);
|
|
148
|
+
}
|
|
149
|
+
return G;
|
|
150
|
+
}
|
|
151
|
+
function toUndirectedGraph(G) {
|
|
152
|
+
if (!isDirectedGraph(G)) return G.copy();
|
|
153
|
+
const copy = createGraph(false);
|
|
154
|
+
for (const [key, value] of Object.entries(G.getAttributes())) {
|
|
155
|
+
copy.setAttribute(key, value);
|
|
156
|
+
}
|
|
157
|
+
G.forEachNode((nodeId, attrs) => {
|
|
158
|
+
copy.mergeNode(nodeId, attrs);
|
|
159
|
+
});
|
|
160
|
+
G.forEachEdge((_edge, attrs, source, target) => {
|
|
161
|
+
if (!copy.hasNode(source) || !copy.hasNode(target)) return;
|
|
162
|
+
try {
|
|
163
|
+
copy.mergeEdge(source, target, attrs);
|
|
164
|
+
} catch {
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
return copy;
|
|
168
|
+
}
|
|
169
|
+
function forEachTraversalNeighbor(G, node, callback) {
|
|
170
|
+
if (isDirectedGraph(G)) {
|
|
171
|
+
G.forEachOutboundNeighbor(node, callback);
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
G.forEachNeighbor(node, callback);
|
|
175
|
+
}
|
|
176
|
+
function traversalNeighbors(G, node) {
|
|
177
|
+
const neighbors = [];
|
|
178
|
+
forEachTraversalNeighbor(G, node, (neighbor) => {
|
|
179
|
+
neighbors.push(neighbor);
|
|
180
|
+
});
|
|
181
|
+
return neighbors;
|
|
182
|
+
}
|
|
183
|
+
var init_graph = __esm({
|
|
184
|
+
"src/graph.ts"() {
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
106
188
|
// src/build.ts
|
|
107
189
|
var build_exports = {};
|
|
108
190
|
__export(build_exports, {
|
|
109
191
|
build: () => build,
|
|
110
192
|
buildFromJson: () => buildFromJson
|
|
111
193
|
});
|
|
112
|
-
|
|
113
|
-
function buildFromJson(extraction) {
|
|
194
|
+
function buildFromJson(extraction, options) {
|
|
114
195
|
const errors = validateExtraction(extraction);
|
|
115
196
|
const realErrors = errors.filter((e) => !e.includes("does not match any node id"));
|
|
116
197
|
if (realErrors.length > 0) {
|
|
@@ -118,7 +199,7 @@ function buildFromJson(extraction) {
|
|
|
118
199
|
`[graphify] Extraction warning (${realErrors.length} issues): ${realErrors[0]}`
|
|
119
200
|
);
|
|
120
201
|
}
|
|
121
|
-
const G =
|
|
202
|
+
const G = createGraph(options?.directed === true);
|
|
122
203
|
for (const node of extraction.nodes ?? []) {
|
|
123
204
|
const { id, ...attrs } = node;
|
|
124
205
|
G.mergeNode(id, attrs);
|
|
@@ -140,7 +221,7 @@ function buildFromJson(extraction) {
|
|
|
140
221
|
}
|
|
141
222
|
return G;
|
|
142
223
|
}
|
|
143
|
-
function build(extractions) {
|
|
224
|
+
function build(extractions, options) {
|
|
144
225
|
const combined = {
|
|
145
226
|
nodes: [],
|
|
146
227
|
edges: [],
|
|
@@ -155,10 +236,11 @@ function build(extractions) {
|
|
|
155
236
|
combined.input_tokens += ext.input_tokens ?? 0;
|
|
156
237
|
combined.output_tokens += ext.output_tokens ?? 0;
|
|
157
238
|
}
|
|
158
|
-
return buildFromJson(combined);
|
|
239
|
+
return buildFromJson(combined, options);
|
|
159
240
|
}
|
|
160
241
|
var init_build = __esm({
|
|
161
242
|
"src/build.ts"() {
|
|
243
|
+
init_graph();
|
|
162
244
|
init_validate();
|
|
163
245
|
}
|
|
164
246
|
});
|
|
@@ -199,7 +281,7 @@ __export(cluster_exports, {
|
|
|
199
281
|
});
|
|
200
282
|
import louvain from "graphology-communities-louvain";
|
|
201
283
|
function partition(G) {
|
|
202
|
-
const result = louvain(G);
|
|
284
|
+
const result = louvain(G.type === "directed" ? toUndirectedGraph(G) : G);
|
|
203
285
|
const map = /* @__PURE__ */ new Map();
|
|
204
286
|
for (const [node, cid] of Object.entries(result)) {
|
|
205
287
|
map.set(node, cid);
|
|
@@ -305,11 +387,378 @@ var MAX_COMMUNITY_FRACTION, MIN_SPLIT_SIZE;
|
|
|
305
387
|
var init_cluster = __esm({
|
|
306
388
|
"src/cluster.ts"() {
|
|
307
389
|
init_collections();
|
|
390
|
+
init_graph();
|
|
308
391
|
MAX_COMMUNITY_FRACTION = 0.25;
|
|
309
392
|
MIN_SPLIT_SIZE = 10;
|
|
310
393
|
}
|
|
311
394
|
});
|
|
312
395
|
|
|
396
|
+
// src/detect.ts
|
|
397
|
+
import {
|
|
398
|
+
readdirSync,
|
|
399
|
+
readFileSync,
|
|
400
|
+
writeFileSync,
|
|
401
|
+
statSync,
|
|
402
|
+
existsSync,
|
|
403
|
+
mkdirSync,
|
|
404
|
+
lstatSync
|
|
405
|
+
} from "fs";
|
|
406
|
+
import { join, resolve, extname, basename, relative, sep, dirname } from "path";
|
|
407
|
+
import { createHash } from "crypto";
|
|
408
|
+
function isSensitive(filePath) {
|
|
409
|
+
const name = basename(filePath);
|
|
410
|
+
return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
|
|
411
|
+
}
|
|
412
|
+
function looksLikePaper(filePath) {
|
|
413
|
+
try {
|
|
414
|
+
const text = readFileSync(filePath, "utf-8").slice(0, 3e3);
|
|
415
|
+
const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
|
|
416
|
+
return hits >= PAPER_SIGNAL_THRESHOLD;
|
|
417
|
+
} catch {
|
|
418
|
+
return false;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
function classifyFile(filePath) {
|
|
422
|
+
const ext = extname(filePath).toLowerCase();
|
|
423
|
+
if (CODE_EXTENSIONS.has(ext)) return "code" /* CODE */;
|
|
424
|
+
if (PAPER_EXTENSIONS.has(ext)) {
|
|
425
|
+
const parts = filePath.split(sep);
|
|
426
|
+
if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
|
|
427
|
+
return "paper" /* PAPER */;
|
|
428
|
+
}
|
|
429
|
+
if (IMAGE_EXTENSIONS.has(ext)) return "image" /* IMAGE */;
|
|
430
|
+
if (VIDEO_EXTENSIONS.has(ext)) return "video" /* VIDEO */;
|
|
431
|
+
if (DOC_EXTENSIONS.has(ext)) {
|
|
432
|
+
if (looksLikePaper(filePath)) return "paper" /* PAPER */;
|
|
433
|
+
return "document" /* DOCUMENT */;
|
|
434
|
+
}
|
|
435
|
+
if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
|
|
436
|
+
return null;
|
|
437
|
+
}
|
|
438
|
+
function countWords(filePath) {
|
|
439
|
+
try {
|
|
440
|
+
const text = readFileSync(filePath, "utf-8");
|
|
441
|
+
return text.split(/\s+/).filter(Boolean).length;
|
|
442
|
+
} catch {
|
|
443
|
+
return 0;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
function isNoiseDir(part) {
|
|
447
|
+
if (SKIP_DIRS.has(part)) return true;
|
|
448
|
+
if (part.endsWith("_venv") || part.endsWith("_env")) return true;
|
|
449
|
+
if (part.endsWith(".egg-info")) return true;
|
|
450
|
+
return false;
|
|
451
|
+
}
|
|
452
|
+
function loadGraphifyignore(root) {
|
|
453
|
+
const patterns = [];
|
|
454
|
+
let current = resolve(root);
|
|
455
|
+
while (true) {
|
|
456
|
+
const ignoreFile = join(current, ".graphifyignore");
|
|
457
|
+
if (existsSync(ignoreFile)) {
|
|
458
|
+
for (let line of readFileSync(ignoreFile, "utf-8").split("\n")) {
|
|
459
|
+
line = line.trim();
|
|
460
|
+
if (line && !line.startsWith("#")) {
|
|
461
|
+
patterns.push(line);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
if (existsSync(join(current, ".git"))) {
|
|
466
|
+
break;
|
|
467
|
+
}
|
|
468
|
+
const parent = dirname(current);
|
|
469
|
+
if (parent === current) {
|
|
470
|
+
break;
|
|
471
|
+
}
|
|
472
|
+
current = parent;
|
|
473
|
+
}
|
|
474
|
+
return patterns;
|
|
475
|
+
}
|
|
476
|
+
function matchGlob(text, pattern) {
|
|
477
|
+
const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
478
|
+
return new RegExp(`^${regex}$`).test(text);
|
|
479
|
+
}
|
|
480
|
+
function isIgnored(filePath, root, patterns) {
|
|
481
|
+
if (patterns.length === 0) return false;
|
|
482
|
+
let rel;
|
|
483
|
+
try {
|
|
484
|
+
rel = relative(root, filePath).replace(/\\/g, "/");
|
|
485
|
+
} catch {
|
|
486
|
+
return false;
|
|
487
|
+
}
|
|
488
|
+
const parts = rel.split("/");
|
|
489
|
+
for (const pattern of patterns) {
|
|
490
|
+
const p = pattern.replace(/^\/+|\/+$/g, "");
|
|
491
|
+
if (!p) continue;
|
|
492
|
+
if (matchGlob(rel, p)) return true;
|
|
493
|
+
if (matchGlob(basename(filePath), p)) return true;
|
|
494
|
+
for (let i = 0; i < parts.length; i++) {
|
|
495
|
+
if (matchGlob(parts[i], p)) return true;
|
|
496
|
+
if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return false;
|
|
500
|
+
}
|
|
501
|
+
function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
|
|
502
|
+
const result = [];
|
|
503
|
+
let entries;
|
|
504
|
+
try {
|
|
505
|
+
entries = readdirSync(dir);
|
|
506
|
+
} catch {
|
|
507
|
+
return result;
|
|
508
|
+
}
|
|
509
|
+
for (const entry of entries) {
|
|
510
|
+
const full = join(dir, entry);
|
|
511
|
+
let stat;
|
|
512
|
+
try {
|
|
513
|
+
stat = followSymlinks ? statSync(full) : lstatSync(full);
|
|
514
|
+
} catch {
|
|
515
|
+
continue;
|
|
516
|
+
}
|
|
517
|
+
if (stat.isDirectory()) {
|
|
518
|
+
if (!skipPrune) {
|
|
519
|
+
if (entry.startsWith(".")) continue;
|
|
520
|
+
if (isNoiseDir(entry)) continue;
|
|
521
|
+
if (isIgnored(full, root, ignorePatterns)) continue;
|
|
522
|
+
}
|
|
523
|
+
result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
|
|
524
|
+
} else if (stat.isFile()) {
|
|
525
|
+
result.push(full);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return result;
|
|
529
|
+
}
|
|
530
|
+
function detect(root, options) {
|
|
531
|
+
const followSymlinks = options?.followSymlinks ?? false;
|
|
532
|
+
const rootResolved = resolve(root);
|
|
533
|
+
const ignorePatterns = loadGraphifyignore(rootResolved);
|
|
534
|
+
const convertedDir = join(rootResolved, "graphify-out", "converted");
|
|
535
|
+
const memoryDir = join(rootResolved, "graphify-out", "memory");
|
|
536
|
+
const files = {
|
|
537
|
+
code: [],
|
|
538
|
+
document: [],
|
|
539
|
+
paper: [],
|
|
540
|
+
image: [],
|
|
541
|
+
video: []
|
|
542
|
+
};
|
|
543
|
+
let totalWords = 0;
|
|
544
|
+
const skippedSensitive = [];
|
|
545
|
+
const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
|
|
546
|
+
if (existsSync(memoryDir)) {
|
|
547
|
+
allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
|
|
548
|
+
}
|
|
549
|
+
const seen = /* @__PURE__ */ new Set();
|
|
550
|
+
for (const p of allFiles) {
|
|
551
|
+
if (seen.has(p)) continue;
|
|
552
|
+
seen.add(p);
|
|
553
|
+
const inMemory = existsSync(memoryDir) && p.startsWith(memoryDir);
|
|
554
|
+
if (!inMemory) {
|
|
555
|
+
if (basename(p).startsWith(".")) continue;
|
|
556
|
+
if (p.startsWith(convertedDir)) continue;
|
|
557
|
+
}
|
|
558
|
+
if (isIgnored(p, rootResolved, ignorePatterns)) continue;
|
|
559
|
+
if (isSensitive(p)) {
|
|
560
|
+
skippedSensitive.push(p);
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
const ftype = classifyFile(p);
|
|
564
|
+
if (!ftype) continue;
|
|
565
|
+
if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
|
|
566
|
+
skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
|
|
567
|
+
continue;
|
|
568
|
+
}
|
|
569
|
+
files[ftype].push(p);
|
|
570
|
+
if (ftype !== "video" /* VIDEO */) {
|
|
571
|
+
totalWords += countWords(p);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
|
|
575
|
+
const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
|
|
576
|
+
let warning = null;
|
|
577
|
+
if (!needsGraph) {
|
|
578
|
+
warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
|
|
579
|
+
} else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
|
|
580
|
+
warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
|
|
581
|
+
}
|
|
582
|
+
return {
|
|
583
|
+
files,
|
|
584
|
+
total_files: totalFiles,
|
|
585
|
+
total_words: totalWords,
|
|
586
|
+
needs_graph: needsGraph,
|
|
587
|
+
warning,
|
|
588
|
+
skipped_sensitive: skippedSensitive,
|
|
589
|
+
graphifyignore_patterns: ignorePatterns.length
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
function loadManifest(manifestPath = MANIFEST_PATH) {
|
|
593
|
+
try {
|
|
594
|
+
return JSON.parse(readFileSync(manifestPath, "utf-8"));
|
|
595
|
+
} catch {
|
|
596
|
+
return {};
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
function saveManifest(files, manifestPath = MANIFEST_PATH) {
|
|
600
|
+
const manifest = {};
|
|
601
|
+
for (const fileList of Object.values(files)) {
|
|
602
|
+
for (const f of fileList) {
|
|
603
|
+
try {
|
|
604
|
+
manifest[f] = statSync(f).mtimeMs;
|
|
605
|
+
} catch {
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
const dir = join(manifestPath, "..");
|
|
610
|
+
mkdirSync(dir, { recursive: true });
|
|
611
|
+
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
612
|
+
}
|
|
613
|
+
function detectIncremental(root, manifestPath = MANIFEST_PATH) {
|
|
614
|
+
const full = detect(root);
|
|
615
|
+
const manifest = loadManifest(manifestPath);
|
|
616
|
+
if (Object.keys(manifest).length === 0) {
|
|
617
|
+
return {
|
|
618
|
+
...full,
|
|
619
|
+
incremental: true,
|
|
620
|
+
new_files: full.files,
|
|
621
|
+
unchanged_files: Object.fromEntries(Object.keys(full.files).map((k) => [k, []])),
|
|
622
|
+
new_total: full.total_files
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
const newFiles = {};
|
|
626
|
+
const unchangedFiles = {};
|
|
627
|
+
for (const k of Object.keys(full.files)) {
|
|
628
|
+
newFiles[k] = [];
|
|
629
|
+
unchangedFiles[k] = [];
|
|
630
|
+
}
|
|
631
|
+
for (const [ftype, fileList] of Object.entries(full.files)) {
|
|
632
|
+
for (const f of fileList) {
|
|
633
|
+
const storedMtime = manifest[f];
|
|
634
|
+
let currentMtime = 0;
|
|
635
|
+
try {
|
|
636
|
+
currentMtime = statSync(f).mtimeMs;
|
|
637
|
+
} catch {
|
|
638
|
+
}
|
|
639
|
+
if (storedMtime === void 0 || currentMtime > storedMtime) {
|
|
640
|
+
newFiles[ftype].push(f);
|
|
641
|
+
} else {
|
|
642
|
+
unchangedFiles[ftype].push(f);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
const currentFiles = new Set(Object.values(full.files).flat());
|
|
647
|
+
const deletedFiles = Object.keys(manifest).filter((f) => !currentFiles.has(f));
|
|
648
|
+
const newTotal = Object.values(newFiles).reduce((s, v) => s + v.length, 0);
|
|
649
|
+
return {
|
|
650
|
+
...full,
|
|
651
|
+
incremental: true,
|
|
652
|
+
new_files: newFiles,
|
|
653
|
+
unchanged_files: unchangedFiles,
|
|
654
|
+
new_total: newTotal,
|
|
655
|
+
deleted_files: deletedFiles
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
var MANIFEST_PATH, CODE_EXTENSIONS, DOC_EXTENSIONS, PAPER_EXTENSIONS, IMAGE_EXTENSIONS, OFFICE_EXTENSIONS, VIDEO_EXTENSIONS, CORPUS_WARN_THRESHOLD, CORPUS_UPPER_THRESHOLD, FILE_COUNT_UPPER, SENSITIVE_PATTERNS, PAPER_SIGNALS, PAPER_SIGNAL_THRESHOLD, ASSET_DIR_MARKERS, SKIP_DIRS;
|
|
659
|
+
var init_detect = __esm({
|
|
660
|
+
"src/detect.ts"() {
|
|
661
|
+
init_types();
|
|
662
|
+
MANIFEST_PATH = "graphify-out/manifest.json";
|
|
663
|
+
CODE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
664
|
+
".py",
|
|
665
|
+
".ts",
|
|
666
|
+
".js",
|
|
667
|
+
".jsx",
|
|
668
|
+
".tsx",
|
|
669
|
+
".go",
|
|
670
|
+
".rs",
|
|
671
|
+
".java",
|
|
672
|
+
".cpp",
|
|
673
|
+
".cc",
|
|
674
|
+
".cxx",
|
|
675
|
+
".c",
|
|
676
|
+
".h",
|
|
677
|
+
".hpp",
|
|
678
|
+
".rb",
|
|
679
|
+
".swift",
|
|
680
|
+
".kt",
|
|
681
|
+
".kts",
|
|
682
|
+
".cs",
|
|
683
|
+
".scala",
|
|
684
|
+
".php",
|
|
685
|
+
".lua",
|
|
686
|
+
".toc",
|
|
687
|
+
".zig",
|
|
688
|
+
".ps1",
|
|
689
|
+
".ex",
|
|
690
|
+
".exs",
|
|
691
|
+
".m",
|
|
692
|
+
".mm",
|
|
693
|
+
".jl"
|
|
694
|
+
]);
|
|
695
|
+
DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
|
|
696
|
+
PAPER_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
|
|
697
|
+
IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
|
|
698
|
+
OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
|
|
699
|
+
VIDEO_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
700
|
+
".mp4",
|
|
701
|
+
".mov",
|
|
702
|
+
".webm",
|
|
703
|
+
".mkv",
|
|
704
|
+
".avi",
|
|
705
|
+
".m4v",
|
|
706
|
+
".mp3",
|
|
707
|
+
".wav",
|
|
708
|
+
".m4a",
|
|
709
|
+
".ogg"
|
|
710
|
+
]);
|
|
711
|
+
CORPUS_WARN_THRESHOLD = 5e4;
|
|
712
|
+
CORPUS_UPPER_THRESHOLD = 5e5;
|
|
713
|
+
FILE_COUNT_UPPER = 200;
|
|
714
|
+
SENSITIVE_PATTERNS = [
|
|
715
|
+
/(^|[\\/])\.(env|envrc)(\.|$)/i,
|
|
716
|
+
/\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
|
|
717
|
+
/(credential|secret|passwd|password|token|private_key)/i,
|
|
718
|
+
/(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
|
|
719
|
+
/(\.netrc|\.pgpass|\.htpasswd)$/i,
|
|
720
|
+
/(aws_credentials|gcloud_credentials|service.account)/i
|
|
721
|
+
];
|
|
722
|
+
PAPER_SIGNALS = [
|
|
723
|
+
/\barxiv\b/i,
|
|
724
|
+
/\bdoi\s*:/i,
|
|
725
|
+
/\babstract\b/i,
|
|
726
|
+
/\bproceedings\b/i,
|
|
727
|
+
/\bjournal\b/i,
|
|
728
|
+
/\bpreprint\b/i,
|
|
729
|
+
/\\cite\{/,
|
|
730
|
+
/\[\d+\]/,
|
|
731
|
+
/\[\n\d+\n\]/,
|
|
732
|
+
/eq\.\s*\d+|equation\s+\d+/i,
|
|
733
|
+
/\d{4}\.\d{4,5}/,
|
|
734
|
+
/\bwe propose\b/i,
|
|
735
|
+
/\bliterature\b/i
|
|
736
|
+
];
|
|
737
|
+
PAPER_SIGNAL_THRESHOLD = 3;
|
|
738
|
+
ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
|
|
739
|
+
SKIP_DIRS = /* @__PURE__ */ new Set([
|
|
740
|
+
"venv",
|
|
741
|
+
".venv",
|
|
742
|
+
"env",
|
|
743
|
+
".env",
|
|
744
|
+
"node_modules",
|
|
745
|
+
"__pycache__",
|
|
746
|
+
".git",
|
|
747
|
+
"dist",
|
|
748
|
+
"build",
|
|
749
|
+
"target",
|
|
750
|
+
"out",
|
|
751
|
+
"site-packages",
|
|
752
|
+
"lib64",
|
|
753
|
+
".pytest_cache",
|
|
754
|
+
".mypy_cache",
|
|
755
|
+
".ruff_cache",
|
|
756
|
+
".tox",
|
|
757
|
+
".eggs"
|
|
758
|
+
]);
|
|
759
|
+
}
|
|
760
|
+
});
|
|
761
|
+
|
|
313
762
|
// src/analyze.ts
|
|
314
763
|
var analyze_exports = {};
|
|
315
764
|
__export(analyze_exports, {
|
|
@@ -351,10 +800,11 @@ function isConceptNode(G, nodeId) {
|
|
|
351
800
|
return false;
|
|
352
801
|
}
|
|
353
802
|
function fileCategory(path) {
|
|
354
|
-
const ext = path.includes(".") ? path.split(".").pop()?.toLowerCase() ?? "" : "";
|
|
803
|
+
const ext = path.includes(".") ? `.${path.split(".").pop()?.toLowerCase() ?? ""}` : "";
|
|
355
804
|
if (CODE_EXTENSIONS.has(ext)) return "code";
|
|
356
805
|
if (PAPER_EXTENSIONS.has(ext)) return "paper";
|
|
357
806
|
if (IMAGE_EXTENSIONS.has(ext)) return "image";
|
|
807
|
+
if (DOC_EXTENSIONS.has(ext)) return "doc";
|
|
358
808
|
return "doc";
|
|
359
809
|
}
|
|
360
810
|
function topLevelDir(path) {
|
|
@@ -548,10 +998,10 @@ function suggestQuestions(G, communities, communityLabels, topN = 7) {
|
|
|
548
998
|
const cid = nodeCommunity.get(nodeId);
|
|
549
999
|
const commLabel = cid !== void 0 ? labelMap.get(cid) ?? `Community ${cid}` : "unknown";
|
|
550
1000
|
const neighborComms = /* @__PURE__ */ new Set();
|
|
551
|
-
G
|
|
1001
|
+
for (const n of traversalNeighbors(G, nodeId)) {
|
|
552
1002
|
const nc = nodeCommunity.get(n);
|
|
553
1003
|
if (nc !== void 0 && nc !== cid) neighborComms.add(nc);
|
|
554
|
-
}
|
|
1004
|
+
}
|
|
555
1005
|
if (neighborComms.size > 0) {
|
|
556
1006
|
const otherLabels = [...neighborComms].map((c) => labelMap.get(c) ?? `Community ${c}`);
|
|
557
1007
|
questions.push({
|
|
@@ -680,30 +1130,12 @@ function graphDiff(GOld, GNew) {
|
|
|
680
1130
|
summary: parts.length > 0 ? parts.join(", ") : "no changes"
|
|
681
1131
|
};
|
|
682
1132
|
}
|
|
683
|
-
var CODE_EXTENSIONS, PAPER_EXTENSIONS, IMAGE_EXTENSIONS;
|
|
684
1133
|
var init_analyze = __esm({
|
|
685
1134
|
"src/analyze.ts"() {
|
|
686
1135
|
init_collections();
|
|
687
1136
|
init_cluster();
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
"ts",
|
|
691
|
-
"tsx",
|
|
692
|
-
"js",
|
|
693
|
-
"go",
|
|
694
|
-
"rs",
|
|
695
|
-
"java",
|
|
696
|
-
"rb",
|
|
697
|
-
"cpp",
|
|
698
|
-
"c",
|
|
699
|
-
"h",
|
|
700
|
-
"cs",
|
|
701
|
-
"kt",
|
|
702
|
-
"scala",
|
|
703
|
-
"php"
|
|
704
|
-
]);
|
|
705
|
-
PAPER_EXTENSIONS = /* @__PURE__ */ new Set(["pdf"]);
|
|
706
|
-
IMAGE_EXTENSIONS = /* @__PURE__ */ new Set(["png", "jpg", "jpeg", "webp", "gif", "svg"]);
|
|
1137
|
+
init_graph();
|
|
1138
|
+
init_detect();
|
|
707
1139
|
}
|
|
708
1140
|
});
|
|
709
1141
|
|
|
@@ -873,7 +1305,7 @@ var init_report = __esm({
|
|
|
873
1305
|
|
|
874
1306
|
// src/security.ts
|
|
875
1307
|
import { resolve as pathResolve } from "path";
|
|
876
|
-
import { existsSync } from "fs";
|
|
1308
|
+
import { existsSync as existsSync2 } from "fs";
|
|
877
1309
|
import { URL as URL2 } from "url";
|
|
878
1310
|
import * as dns from "dns/promises";
|
|
879
1311
|
import * as net from "net";
|
|
@@ -973,7 +1405,7 @@ async function safeFetchText(url, maxBytes = MAX_TEXT_BYTES, timeout = 15e3) {
|
|
|
973
1405
|
}
|
|
974
1406
|
function validateGraphPath(filePath, base) {
|
|
975
1407
|
const resolvedBase = pathResolve(base ?? "graphify-out");
|
|
976
|
-
if (!
|
|
1408
|
+
if (!existsSync2(resolvedBase)) {
|
|
977
1409
|
throw new Error(
|
|
978
1410
|
`Graph base directory does not exist: ${resolvedBase}. Run the graphify skill first to build the graph (for Codex: $graphify .).`
|
|
979
1411
|
);
|
|
@@ -984,7 +1416,7 @@ function validateGraphPath(filePath, base) {
|
|
|
984
1416
|
`Path '${filePath}' escapes the allowed directory ${resolvedBase}. Only paths inside graphify-out/ are permitted.`
|
|
985
1417
|
);
|
|
986
1418
|
}
|
|
987
|
-
if (!
|
|
1419
|
+
if (!existsSync2(resolved)) {
|
|
988
1420
|
throw new Error(`Graph file not found: ${resolved}`);
|
|
989
1421
|
}
|
|
990
1422
|
return resolved;
|
|
@@ -1022,7 +1454,7 @@ __export(export_exports, {
|
|
|
1022
1454
|
toJson: () => toJson,
|
|
1023
1455
|
toSvg: () => toSvg
|
|
1024
1456
|
});
|
|
1025
|
-
import { writeFileSync } from "fs";
|
|
1457
|
+
import { writeFileSync as writeFileSync2 } from "fs";
|
|
1026
1458
|
function nodeCommunityMap2(communities) {
|
|
1027
1459
|
const communityMap = toNumericMap(communities);
|
|
1028
1460
|
const result = /* @__PURE__ */ new Map();
|
|
@@ -1050,14 +1482,17 @@ function normalizeCommunityLabels(labelsOrOptions) {
|
|
|
1050
1482
|
}
|
|
1051
1483
|
return toNumericMap(labelsOrOptions.communityLabels);
|
|
1052
1484
|
}
|
|
1053
|
-
function toJson(G, communities, outputPath) {
|
|
1485
|
+
function toJson(G, communities, outputPath, communityLabelsOrOptions) {
|
|
1054
1486
|
const nodeComm = nodeCommunityMap2(communities);
|
|
1487
|
+
const communityLabels = normalizeCommunityLabels(communityLabelsOrOptions);
|
|
1055
1488
|
const nodes = [];
|
|
1056
1489
|
G.forEachNode((nodeId, attrs) => {
|
|
1490
|
+
const communityId = nodeComm.get(nodeId) ?? null;
|
|
1057
1491
|
nodes.push({
|
|
1058
1492
|
id: nodeId,
|
|
1059
1493
|
...attrs,
|
|
1060
|
-
community:
|
|
1494
|
+
community: communityId,
|
|
1495
|
+
community_name: communityId !== null ? sanitizeLabel(communityLabels?.get(communityId) ?? `Community ${communityId}`) : null
|
|
1061
1496
|
});
|
|
1062
1497
|
});
|
|
1063
1498
|
const links = [];
|
|
@@ -1074,15 +1509,20 @@ function toJson(G, communities, outputPath) {
|
|
|
1074
1509
|
links.push(link);
|
|
1075
1510
|
});
|
|
1076
1511
|
const hyperedges = G.getAttribute("hyperedges") ?? [];
|
|
1512
|
+
const communityLabelsObject = communityLabels ? Object.fromEntries(
|
|
1513
|
+
[...communityLabels.entries()].sort((a, b) => a[0] - b[0]).map(([cid, label]) => [String(cid), sanitizeLabel(label)])
|
|
1514
|
+
) : {};
|
|
1077
1515
|
const output = {
|
|
1078
|
-
directed:
|
|
1516
|
+
directed: isDirectedGraph(G),
|
|
1079
1517
|
multigraph: false,
|
|
1080
|
-
graph: {
|
|
1518
|
+
graph: {
|
|
1519
|
+
community_labels: communityLabelsObject
|
|
1520
|
+
},
|
|
1081
1521
|
nodes,
|
|
1082
1522
|
links,
|
|
1083
1523
|
hyperedges
|
|
1084
1524
|
};
|
|
1085
|
-
|
|
1525
|
+
writeFileSync2(outputPath, JSON.stringify(output, null, 2), "utf-8");
|
|
1086
1526
|
}
|
|
1087
1527
|
function toCypher(G, outputPath) {
|
|
1088
1528
|
const lines = ["// Neo4j Cypher import - generated by the graphify skill", ""];
|
|
@@ -1104,7 +1544,7 @@ function toCypher(G, outputPath) {
|
|
|
1104
1544
|
`MATCH (a {id: '${uEsc}'}), (b {id: '${vEsc}'}) MERGE (a)-[:${rel} {confidence: '${conf}'}]->(b);`
|
|
1105
1545
|
);
|
|
1106
1546
|
});
|
|
1107
|
-
|
|
1547
|
+
writeFileSync2(outputPath, lines.join("\n"), "utf-8");
|
|
1108
1548
|
}
|
|
1109
1549
|
function neo4jLabel(label) {
|
|
1110
1550
|
const sanitized = label.replace(/[^A-Za-z0-9_]/g, "");
|
|
@@ -1343,9 +1783,24 @@ function focusNode(nodeId) {
|
|
|
1343
1783
|
showInfo(nodeId);
|
|
1344
1784
|
}
|
|
1345
1785
|
|
|
1786
|
+
let hoveredNodeId = null;
|
|
1787
|
+
network.on('hoverNode', params => {
|
|
1788
|
+
hoveredNodeId = params.node;
|
|
1789
|
+
container.style.cursor = 'pointer';
|
|
1790
|
+
});
|
|
1791
|
+
network.on('blurNode', () => {
|
|
1792
|
+
hoveredNodeId = null;
|
|
1793
|
+
container.style.cursor = 'default';
|
|
1794
|
+
});
|
|
1795
|
+
container.addEventListener('click', () => {
|
|
1796
|
+
if (hoveredNodeId !== null) {
|
|
1797
|
+
showInfo(hoveredNodeId);
|
|
1798
|
+
network.selectNodes([hoveredNodeId]);
|
|
1799
|
+
}
|
|
1800
|
+
});
|
|
1346
1801
|
network.on('click', params => {
|
|
1347
1802
|
if (params.nodes.length > 0) showInfo(params.nodes[0]);
|
|
1348
|
-
else document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
|
|
1803
|
+
else if (hoveredNodeId === null) document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
|
|
1349
1804
|
});
|
|
1350
1805
|
|
|
1351
1806
|
const searchInput = document.getElementById('search');
|
|
@@ -1500,7 +1955,7 @@ ${htmlScript(nodesJson, edgesJson, legendJson)}
|
|
|
1500
1955
|
${hyperedgeScript(hyperedgesJson)}
|
|
1501
1956
|
</body>
|
|
1502
1957
|
</html>`;
|
|
1503
|
-
|
|
1958
|
+
writeFileSync2(outputPath, html, "utf-8");
|
|
1504
1959
|
}
|
|
1505
1960
|
function toGraphml(G, communities, outputPath) {
|
|
1506
1961
|
const nodeComm = nodeCommunityMap2(communities);
|
|
@@ -1516,7 +1971,7 @@ function toGraphml(G, communities, outputPath) {
|
|
|
1516
1971
|
lines.push(' <key id="community" for="node" attr.name="community" attr.type="int"/>');
|
|
1517
1972
|
lines.push(' <key id="relation" for="edge" attr.name="relation" attr.type="string"/>');
|
|
1518
1973
|
lines.push(' <key id="confidence" for="edge" attr.name="confidence" attr.type="string"/>');
|
|
1519
|
-
lines.push(
|
|
1974
|
+
lines.push(` <graph id="G" edgedefault="${isDirectedGraph(G) ? "directed" : "undirected"}">`);
|
|
1520
1975
|
G.forEachNode((nodeId, data) => {
|
|
1521
1976
|
lines.push(` <node id="${xmlEsc(nodeId)}">`);
|
|
1522
1977
|
lines.push(` <data key="label">${xmlEsc(data.label ?? nodeId)}</data>`);
|
|
@@ -1533,7 +1988,7 @@ function toGraphml(G, communities, outputPath) {
|
|
|
1533
1988
|
});
|
|
1534
1989
|
lines.push(" </graph>");
|
|
1535
1990
|
lines.push("</graphml>");
|
|
1536
|
-
|
|
1991
|
+
writeFileSync2(outputPath, lines.join("\n"), "utf-8");
|
|
1537
1992
|
}
|
|
1538
1993
|
function toSvg(G, communities, outputPath, communityLabelsOrOptions, figsize = [20, 14]) {
|
|
1539
1994
|
const communityMap = toNumericMap(communities);
|
|
@@ -1606,7 +2061,7 @@ function toSvg(G, communities, outputPath, communityLabelsOrOptions, figsize = [
|
|
|
1606
2061
|
}
|
|
1607
2062
|
}
|
|
1608
2063
|
svgParts.push("</svg>");
|
|
1609
|
-
|
|
2064
|
+
writeFileSync2(outputPath, svgParts.join("\n"), "utf-8");
|
|
1610
2065
|
}
|
|
1611
2066
|
function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFilenames) {
|
|
1612
2067
|
const communityMap = toNumericMap(communities);
|
|
@@ -1615,7 +2070,7 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
|
|
|
1615
2070
|
const providedNodeFilenames = options?.nodeFilenames ?? nodeFilenames;
|
|
1616
2071
|
const CANVAS_COLORS = ["1", "2", "3", "4", "5", "6"];
|
|
1617
2072
|
function safeName(label) {
|
|
1618
|
-
return label.replace(/[\\/*?:"<>|#^[\]]/g, "").trim() || "unnamed";
|
|
2073
|
+
return label.replace(/\r\n/g, " ").replace(/\r/g, " ").replace(/\n/g, " ").replace(/[\\/*?:"<>|#^[\]]/g, "").trim() || "unnamed";
|
|
1619
2074
|
}
|
|
1620
2075
|
let filenameMap;
|
|
1621
2076
|
if (!providedNodeFilenames) {
|
|
@@ -1694,13 +2149,13 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
|
|
|
1694
2149
|
for (let idx = 0; idx < sortedCids.length; idx++) {
|
|
1695
2150
|
const cid = sortedCids[idx];
|
|
1696
2151
|
const members = communityMap.get(cid) ?? [];
|
|
1697
|
-
const
|
|
2152
|
+
const communityName2 = communityLabels?.get(cid) ?? `Community ${cid}`;
|
|
1698
2153
|
const [gx, gy, gw, gh] = groupLayout.get(cid) ?? [0, 0, 600, 400];
|
|
1699
2154
|
const canvasColor = CANVAS_COLORS[idx % CANVAS_COLORS.length];
|
|
1700
2155
|
canvasNodes.push({
|
|
1701
2156
|
id: `g${cid}`,
|
|
1702
2157
|
type: "group",
|
|
1703
|
-
label:
|
|
2158
|
+
label: communityName2,
|
|
1704
2159
|
x: gx,
|
|
1705
2160
|
y: gy,
|
|
1706
2161
|
width: gw,
|
|
@@ -1750,12 +2205,13 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
|
|
|
1750
2205
|
});
|
|
1751
2206
|
}
|
|
1752
2207
|
const canvasData = { nodes: canvasNodes, edges: canvasEdges };
|
|
1753
|
-
|
|
2208
|
+
writeFileSync2(outputPath, JSON.stringify(canvasData, null, 2), "utf-8");
|
|
1754
2209
|
}
|
|
1755
2210
|
var COMMUNITY_COLORS, MAX_NODES_FOR_VIZ, CONFIDENCE_SCORE_DEFAULTS;
|
|
1756
2211
|
var init_export = __esm({
|
|
1757
2212
|
"src/export.ts"() {
|
|
1758
2213
|
init_security();
|
|
2214
|
+
init_graph();
|
|
1759
2215
|
init_collections();
|
|
1760
2216
|
COMMUNITY_COLORS = [
|
|
1761
2217
|
"#4E79A7",
|
|
@@ -1781,9 +2237,21 @@ var init_export = __esm({
|
|
|
1781
2237
|
// src/cache.ts
|
|
1782
2238
|
import { createHash as createHash2 } from "crypto";
|
|
1783
2239
|
import { readFileSync as readFileSync2, writeFileSync as writeFileSync4, mkdirSync as mkdirSync3, readdirSync as readdirSync2, unlinkSync, renameSync, existsSync as existsSync3 } from "fs";
|
|
1784
|
-
import { join as join3, resolve as resolve2 } from "path";
|
|
2240
|
+
import { extname as extname2, join as join3, resolve as resolve2 } from "path";
|
|
2241
|
+
function bodyContent(content) {
|
|
2242
|
+
const text = content.toString("utf-8");
|
|
2243
|
+
if (!text.startsWith("---")) {
|
|
2244
|
+
return content;
|
|
2245
|
+
}
|
|
2246
|
+
const end = text.indexOf("\n---", 3);
|
|
2247
|
+
if (end === -1) {
|
|
2248
|
+
return content;
|
|
2249
|
+
}
|
|
2250
|
+
return Buffer.from(text.slice(end + 4), "utf-8");
|
|
2251
|
+
}
|
|
1785
2252
|
function fileHash(filePath) {
|
|
1786
|
-
const
|
|
2253
|
+
const raw = readFileSync2(filePath);
|
|
2254
|
+
const content = extname2(filePath).toLowerCase() === ".md" ? bodyContent(raw) : raw;
|
|
1787
2255
|
const resolved = resolve2(filePath);
|
|
1788
2256
|
const h = createHash2("sha256");
|
|
1789
2257
|
h.update(content);
|
|
@@ -1906,7 +2374,7 @@ __export(extract_exports, {
|
|
|
1906
2374
|
extractZig: () => extractZig
|
|
1907
2375
|
});
|
|
1908
2376
|
import { readFileSync as readFileSync3, readdirSync as readdirSync3, lstatSync as lstatSync2, realpathSync, existsSync as existsSync4 } from "fs";
|
|
1909
|
-
import { resolve as resolve3, basename as basename2, extname as
|
|
2377
|
+
import { resolve as resolve3, basename as basename2, extname as extname3, dirname as dirname2, join as join4, sep as sep2 } from "path";
|
|
1910
2378
|
import { createRequire } from "module";
|
|
1911
2379
|
import * as TreeSitter from "web-tree-sitter";
|
|
1912
2380
|
function getModuleRequire() {
|
|
@@ -2362,7 +2830,7 @@ async function _extractGeneric(filePath, config) {
|
|
|
2362
2830
|
return { nodes: [], edges: [], error: String(e) };
|
|
2363
2831
|
}
|
|
2364
2832
|
const root = tree.rootNode;
|
|
2365
|
-
const stem = basename2(filePath,
|
|
2833
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
2366
2834
|
const strPath = filePath;
|
|
2367
2835
|
const nodes = [];
|
|
2368
2836
|
const edges = [];
|
|
@@ -2731,10 +3199,10 @@ async function _extractGeneric(filePath, config) {
|
|
|
2731
3199
|
source: callerNid,
|
|
2732
3200
|
target: tgtNid,
|
|
2733
3201
|
relation: "calls",
|
|
2734
|
-
confidence: "
|
|
3202
|
+
confidence: "EXTRACTED",
|
|
2735
3203
|
source_file: strPath,
|
|
2736
3204
|
source_location: `L${line}`,
|
|
2737
|
-
weight:
|
|
3205
|
+
weight: 1
|
|
2738
3206
|
});
|
|
2739
3207
|
}
|
|
2740
3208
|
}
|
|
@@ -2770,7 +3238,7 @@ async function _extractPythonRationale(filePath, result) {
|
|
|
2770
3238
|
} catch {
|
|
2771
3239
|
return;
|
|
2772
3240
|
}
|
|
2773
|
-
const stem = basename2(filePath,
|
|
3241
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
2774
3242
|
const strPath = filePath;
|
|
2775
3243
|
const { nodes, edges } = result;
|
|
2776
3244
|
const seenIds = new Set(nodes.map((n) => n.id));
|
|
@@ -2866,7 +3334,7 @@ async function extractPython(filePath) {
|
|
|
2866
3334
|
return result;
|
|
2867
3335
|
}
|
|
2868
3336
|
async function extractJs(filePath) {
|
|
2869
|
-
const ext =
|
|
3337
|
+
const ext = extname3(filePath);
|
|
2870
3338
|
const config = ext === ".ts" || ext === ".tsx" ? _TS_CONFIG : _JS_CONFIG;
|
|
2871
3339
|
return _extractGeneric(filePath, config);
|
|
2872
3340
|
}
|
|
@@ -2917,7 +3385,7 @@ async function extractJulia(filePath) {
|
|
|
2917
3385
|
return { nodes: [], edges: [], error: String(e) };
|
|
2918
3386
|
}
|
|
2919
3387
|
const root = tree.rootNode;
|
|
2920
|
-
const stem = basename2(filePath,
|
|
3388
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
2921
3389
|
const strPath = filePath;
|
|
2922
3390
|
const nodes = [];
|
|
2923
3391
|
const edges = [];
|
|
@@ -3113,8 +3581,8 @@ async function extractGo(filePath) {
|
|
|
3113
3581
|
return { nodes: [], edges: [], error: String(e) };
|
|
3114
3582
|
}
|
|
3115
3583
|
const root = tree.rootNode;
|
|
3116
|
-
const stem = basename2(filePath,
|
|
3117
|
-
const pkgScope =
|
|
3584
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3585
|
+
const pkgScope = dirname2(filePath).split(sep2).pop() || stem;
|
|
3118
3586
|
const strPath = filePath;
|
|
3119
3587
|
const nodes = [];
|
|
3120
3588
|
const edges = [];
|
|
@@ -3257,10 +3725,10 @@ async function extractGo(filePath) {
|
|
|
3257
3725
|
source: callerNid,
|
|
3258
3726
|
target: tgtNid,
|
|
3259
3727
|
relation: "calls",
|
|
3260
|
-
confidence: "
|
|
3728
|
+
confidence: "EXTRACTED",
|
|
3261
3729
|
source_file: strPath,
|
|
3262
3730
|
source_location: `L${line}`,
|
|
3263
|
-
weight:
|
|
3731
|
+
weight: 1
|
|
3264
3732
|
});
|
|
3265
3733
|
}
|
|
3266
3734
|
}
|
|
@@ -3295,7 +3763,7 @@ async function extractRust(filePath) {
|
|
|
3295
3763
|
return { nodes: [], edges: [], error: String(e) };
|
|
3296
3764
|
}
|
|
3297
3765
|
const root = tree.rootNode;
|
|
3298
|
-
const stem = basename2(filePath,
|
|
3766
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3299
3767
|
const strPath = filePath;
|
|
3300
3768
|
const nodes = [];
|
|
3301
3769
|
const edges = [];
|
|
@@ -3412,10 +3880,10 @@ async function extractRust(filePath) {
|
|
|
3412
3880
|
source: callerNid,
|
|
3413
3881
|
target: tgtNid,
|
|
3414
3882
|
relation: "calls",
|
|
3415
|
-
confidence: "
|
|
3883
|
+
confidence: "EXTRACTED",
|
|
3416
3884
|
source_file: strPath,
|
|
3417
3885
|
source_location: `L${line}`,
|
|
3418
|
-
weight:
|
|
3886
|
+
weight: 1
|
|
3419
3887
|
});
|
|
3420
3888
|
}
|
|
3421
3889
|
}
|
|
@@ -3450,7 +3918,7 @@ async function extractZig(filePath) {
|
|
|
3450
3918
|
return { nodes: [], edges: [], error: String(e) };
|
|
3451
3919
|
}
|
|
3452
3920
|
const root = tree.rootNode;
|
|
3453
|
-
const stem = basename2(filePath,
|
|
3921
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3454
3922
|
const strPath = filePath;
|
|
3455
3923
|
const nodes = [];
|
|
3456
3924
|
const edges = [];
|
|
@@ -3574,7 +4042,7 @@ async function extractZig(filePath) {
|
|
|
3574
4042
|
const pair = `${callerNid}|${tgtNid}`;
|
|
3575
4043
|
if (!seenCallPairs.has(pair)) {
|
|
3576
4044
|
seenCallPairs.add(pair);
|
|
3577
|
-
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "
|
|
4045
|
+
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
|
|
3578
4046
|
}
|
|
3579
4047
|
}
|
|
3580
4048
|
}
|
|
@@ -3608,7 +4076,7 @@ async function extractPowershell(filePath) {
|
|
|
3608
4076
|
return { nodes: [], edges: [], error: String(e) };
|
|
3609
4077
|
}
|
|
3610
4078
|
const root = tree.rootNode;
|
|
3611
|
-
const stem = basename2(filePath,
|
|
4079
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3612
4080
|
const strPath = filePath;
|
|
3613
4081
|
const nodes = [];
|
|
3614
4082
|
const edges = [];
|
|
@@ -3757,7 +4225,7 @@ async function extractPowershell(filePath) {
|
|
|
3757
4225
|
const pair = `${callerNid}|${tgtNid}`;
|
|
3758
4226
|
if (!seenCallPairs.has(pair)) {
|
|
3759
4227
|
seenCallPairs.add(pair);
|
|
3760
|
-
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "
|
|
4228
|
+
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
|
|
3761
4229
|
}
|
|
3762
4230
|
}
|
|
3763
4231
|
}
|
|
@@ -3792,7 +4260,7 @@ async function extractObjc(filePath) {
|
|
|
3792
4260
|
return { nodes: [], edges: [], error: String(e) };
|
|
3793
4261
|
}
|
|
3794
4262
|
const root = tree.rootNode;
|
|
3795
|
-
const stem = basename2(filePath,
|
|
4263
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3796
4264
|
const strPath = filePath;
|
|
3797
4265
|
const nodes = [];
|
|
3798
4266
|
const edges = [];
|
|
@@ -3963,7 +4431,7 @@ async function extractObjc(filePath) {
|
|
|
3963
4431
|
const pair = `${callerNid}|${candidate}`;
|
|
3964
4432
|
if (!seenCalls.has(pair) && callerNid !== candidate) {
|
|
3965
4433
|
seenCalls.add(pair);
|
|
3966
|
-
addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "
|
|
4434
|
+
addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "EXTRACTED", 1);
|
|
3967
4435
|
}
|
|
3968
4436
|
}
|
|
3969
4437
|
}
|
|
@@ -3995,7 +4463,7 @@ async function extractElixir(filePath) {
|
|
|
3995
4463
|
return { nodes: [], edges: [], error: String(e) };
|
|
3996
4464
|
}
|
|
3997
4465
|
const root = tree.rootNode;
|
|
3998
|
-
const stem = basename2(filePath,
|
|
4466
|
+
const stem = basename2(filePath, extname3(filePath));
|
|
3999
4467
|
const strPath = filePath;
|
|
4000
4468
|
const nodes = [];
|
|
4001
4469
|
const edges = [];
|
|
@@ -4154,7 +4622,7 @@ async function extractElixir(filePath) {
|
|
|
4154
4622
|
const pair = `${callerNid}|${tgtNid}`;
|
|
4155
4623
|
if (!seenCallPairs.has(pair)) {
|
|
4156
4624
|
seenCallPairs.add(pair);
|
|
4157
|
-
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "
|
|
4625
|
+
addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
|
|
4158
4626
|
}
|
|
4159
4627
|
}
|
|
4160
4628
|
}
|
|
@@ -4181,7 +4649,7 @@ async function _resolveCrossFileImports(perFile, paths) {
|
|
|
4181
4649
|
for (const node of fileResult.nodes ?? []) {
|
|
4182
4650
|
const src = node.source_file ?? "";
|
|
4183
4651
|
if (!src) continue;
|
|
4184
|
-
const fileStem = basename2(src,
|
|
4652
|
+
const fileStem = basename2(src, extname3(src));
|
|
4185
4653
|
const label = node.label ?? "";
|
|
4186
4654
|
const nid = node.id ?? "";
|
|
4187
4655
|
if (label && !label.endsWith(")") && !label.endsWith(".py") && !label.startsWith("_")) {
|
|
@@ -4193,7 +4661,7 @@ async function _resolveCrossFileImports(perFile, paths) {
|
|
|
4193
4661
|
const newEdges = [];
|
|
4194
4662
|
const stemToPath = /* @__PURE__ */ new Map();
|
|
4195
4663
|
for (const p of paths) {
|
|
4196
|
-
stemToPath.set(basename2(p,
|
|
4664
|
+
stemToPath.set(basename2(p, extname3(p)), p);
|
|
4197
4665
|
}
|
|
4198
4666
|
for (let idx = 0; idx < perFile.length; idx++) {
|
|
4199
4667
|
let walkImports = function(node) {
|
|
@@ -4257,7 +4725,7 @@ async function _resolveCrossFileImports(perFile, paths) {
|
|
|
4257
4725
|
};
|
|
4258
4726
|
const fileResult = perFile[idx];
|
|
4259
4727
|
const filePath = paths[idx];
|
|
4260
|
-
const fileStem = basename2(filePath,
|
|
4728
|
+
const fileStem = basename2(filePath, extname3(filePath));
|
|
4261
4729
|
const strPath = filePath;
|
|
4262
4730
|
const localClasses = fileResult.nodes.filter(
|
|
4263
4731
|
(n) => n.source_file === strPath && !n.label.endsWith(")") && !n.label.endsWith(".py") && n.id !== _makeId(fileStem)
|
|
@@ -4283,7 +4751,7 @@ async function extractWithDiagnostics(paths) {
|
|
|
4283
4751
|
if (paths.length === 0) {
|
|
4284
4752
|
root = ".";
|
|
4285
4753
|
} else if (paths.length === 1) {
|
|
4286
|
-
root =
|
|
4754
|
+
root = dirname2(paths[0]);
|
|
4287
4755
|
} else {
|
|
4288
4756
|
const parts = paths.map((p) => p.split(sep2));
|
|
4289
4757
|
const minLen = Math.min(...parts.map((p) => p.length));
|
|
@@ -4306,7 +4774,7 @@ async function extractWithDiagnostics(paths) {
|
|
|
4306
4774
|
`);
|
|
4307
4775
|
}
|
|
4308
4776
|
const filePath = paths[i];
|
|
4309
|
-
const ext =
|
|
4777
|
+
const ext = extname3(filePath);
|
|
4310
4778
|
const extractor = _DISPATCH[ext];
|
|
4311
4779
|
if (!extractor) continue;
|
|
4312
4780
|
const cached = loadCached(filePath, root);
|
|
@@ -4332,9 +4800,9 @@ async function extractWithDiagnostics(paths) {
|
|
|
4332
4800
|
allNodes.push(...result.nodes ?? []);
|
|
4333
4801
|
allEdges.push(...result.edges ?? []);
|
|
4334
4802
|
}
|
|
4335
|
-
const pyPaths = paths.filter((p) =>
|
|
4803
|
+
const pyPaths = paths.filter((p) => extname3(p) === ".py");
|
|
4336
4804
|
if (pyPaths.length > 0) {
|
|
4337
|
-
const pyResults = perFile.filter((_r, i) =>
|
|
4805
|
+
const pyResults = perFile.filter((_r, i) => extname3(paths[i]) === ".py");
|
|
4338
4806
|
try {
|
|
4339
4807
|
const crossFileEdges = await _resolveCrossFileImports(pyResults, pyPaths);
|
|
4340
4808
|
allEdges.push(...crossFileEdges);
|
|
@@ -4388,7 +4856,7 @@ function collectFiles(target, options) {
|
|
|
4388
4856
|
const real = realpathSync(fullPath);
|
|
4389
4857
|
if (visited.has(real)) continue;
|
|
4390
4858
|
visited.add(real);
|
|
4391
|
-
const parentReal = realpathSync(
|
|
4859
|
+
const parentReal = realpathSync(dirname2(fullPath));
|
|
4392
4860
|
if (parentReal === real || parentReal.startsWith(real + sep2)) continue;
|
|
4393
4861
|
} catch {
|
|
4394
4862
|
continue;
|
|
@@ -4398,7 +4866,7 @@ function collectFiles(target, options) {
|
|
|
4398
4866
|
if (pathParts.some((part) => part.startsWith("."))) continue;
|
|
4399
4867
|
walkDir2(fullPath, visited);
|
|
4400
4868
|
} else if (stat.isFile()) {
|
|
4401
|
-
const ext =
|
|
4869
|
+
const ext = extname3(entry);
|
|
4402
4870
|
if (_EXTENSIONS.has(ext)) {
|
|
4403
4871
|
results.push(fullPath);
|
|
4404
4872
|
}
|
|
@@ -4673,16 +5141,8 @@ var init_extract = __esm({
|
|
|
4673
5141
|
}
|
|
4674
5142
|
});
|
|
4675
5143
|
|
|
4676
|
-
// src/types.ts
|
|
4677
|
-
var FileType = /* @__PURE__ */ ((FileType2) => {
|
|
4678
|
-
FileType2["CODE"] = "code";
|
|
4679
|
-
FileType2["DOCUMENT"] = "document";
|
|
4680
|
-
FileType2["PAPER"] = "paper";
|
|
4681
|
-
FileType2["IMAGE"] = "image";
|
|
4682
|
-
return FileType2;
|
|
4683
|
-
})(FileType || {});
|
|
4684
|
-
|
|
4685
5144
|
// src/index.ts
|
|
5145
|
+
init_types();
|
|
4686
5146
|
init_validate();
|
|
4687
5147
|
init_build();
|
|
4688
5148
|
init_cluster();
|
|
@@ -4692,22 +5152,23 @@ init_export();
|
|
|
4692
5152
|
|
|
4693
5153
|
// src/wiki.ts
|
|
4694
5154
|
init_collections();
|
|
4695
|
-
|
|
4696
|
-
import {
|
|
5155
|
+
init_graph();
|
|
5156
|
+
import { mkdirSync as mkdirSync2, writeFileSync as writeFileSync3 } from "fs";
|
|
5157
|
+
import { join as join2 } from "path";
|
|
4697
5158
|
function safeFilename(name) {
|
|
4698
|
-
return name.replace(/\//g, "-").replace(/ /g, "_").replace(/:/g, "-");
|
|
5159
|
+
return name.replace(/\r\n/g, " ").replace(/\r/g, " ").replace(/\n/g, " ").replace(/\//g, "-").replace(/ /g, "_").replace(/:/g, "-");
|
|
4699
5160
|
}
|
|
4700
5161
|
function crossCommunityLinks(G, nodes, ownCid, labels) {
|
|
4701
5162
|
const labelMap = toNumericMap(labels);
|
|
4702
5163
|
const counts = /* @__PURE__ */ new Map();
|
|
4703
5164
|
for (const nid of nodes) {
|
|
4704
|
-
G
|
|
5165
|
+
for (const neighbor of traversalNeighbors(G, nid)) {
|
|
4705
5166
|
const ncid = G.getNodeAttribute(neighbor, "community");
|
|
4706
5167
|
if (ncid !== void 0 && ncid !== ownCid) {
|
|
4707
5168
|
const label = labelMap.get(ncid) ?? `Community ${ncid}`;
|
|
4708
5169
|
counts.set(label, (counts.get(label) ?? 0) + 1);
|
|
4709
5170
|
}
|
|
4710
|
-
}
|
|
5171
|
+
}
|
|
4711
5172
|
}
|
|
4712
5173
|
return [...counts.entries()].sort((a, b) => b[1] - a[1]);
|
|
4713
5174
|
}
|
|
@@ -4771,15 +5232,15 @@ function godNodeArticle(G, nid, labels) {
|
|
|
4771
5232
|
const nodeLabel = d.label ?? nid;
|
|
4772
5233
|
const src = d.source_file ?? "";
|
|
4773
5234
|
const cid = d.community;
|
|
4774
|
-
const
|
|
5235
|
+
const communityName2 = cid !== void 0 ? labels.get(cid) ?? `Community ${cid}` : void 0;
|
|
4775
5236
|
const lines = [];
|
|
4776
5237
|
lines.push(`# ${nodeLabel}`, "");
|
|
4777
5238
|
lines.push(`> God node \xB7 ${G.degree(nid)} connections \xB7 \`${src}\``, "");
|
|
4778
|
-
if (
|
|
4779
|
-
lines.push(`**Community:** [[${
|
|
5239
|
+
if (communityName2) {
|
|
5240
|
+
lines.push(`**Community:** [[${communityName2}]]`, "");
|
|
4780
5241
|
}
|
|
4781
5242
|
const byRelation = /* @__PURE__ */ new Map();
|
|
4782
|
-
const neighbors =
|
|
5243
|
+
const neighbors = traversalNeighbors(G, nid).sort((a, b) => G.degree(b) - G.degree(a));
|
|
4783
5244
|
for (const neighbor of neighbors) {
|
|
4784
5245
|
const ed = G.getEdgeAttributes(G.edge(nid, neighbor));
|
|
4785
5246
|
const rel = ed.relation ?? "related";
|
|
@@ -4787,421 +5248,90 @@ function godNodeArticle(G, nid, labels) {
|
|
|
4787
5248
|
const conf = ed.confidence ?? "";
|
|
4788
5249
|
const confStr = conf ? ` \`${conf}\`` : "";
|
|
4789
5250
|
if (!byRelation.has(rel)) byRelation.set(rel, []);
|
|
4790
|
-
byRelation.get(rel).push(`[[${neighborLabel}]]${confStr}`);
|
|
4791
|
-
}
|
|
4792
|
-
lines.push("## Connections by Relation", "");
|
|
4793
|
-
for (const [rel, targets] of [...byRelation.entries()].sort()) {
|
|
4794
|
-
lines.push(`### ${rel}`);
|
|
4795
|
-
for (const t of targets.slice(0, 20)) {
|
|
4796
|
-
lines.push(`- ${t}`);
|
|
4797
|
-
}
|
|
4798
|
-
lines.push("");
|
|
4799
|
-
}
|
|
4800
|
-
lines.push("---", "", "*Part of the graphify knowledge wiki. See [[index]] to navigate.*");
|
|
4801
|
-
return lines.join("\n");
|
|
4802
|
-
}
|
|
4803
|
-
function indexMd(communities, labels, godNodesData, totalNodes, totalEdges) {
|
|
4804
|
-
const lines = [
|
|
4805
|
-
"# Knowledge Graph Index",
|
|
4806
|
-
"",
|
|
4807
|
-
"> Auto-generated by graphify. Start here \u2014 read community articles for context, then drill into god nodes for detail.",
|
|
4808
|
-
"",
|
|
4809
|
-
`**${totalNodes} nodes \xB7 ${totalEdges} edges \xB7 ${communities.size} communities**`,
|
|
4810
|
-
"",
|
|
4811
|
-
"---",
|
|
4812
|
-
"",
|
|
4813
|
-
"## Communities",
|
|
4814
|
-
"(sorted by size, largest first)",
|
|
4815
|
-
""
|
|
4816
|
-
];
|
|
4817
|
-
const sorted = [...communities.entries()].sort((a, b) => b[1].length - a[1].length);
|
|
4818
|
-
for (const [cid, nodes] of sorted) {
|
|
4819
|
-
const label = labels.get(cid) ?? `Community ${cid}`;
|
|
4820
|
-
lines.push(`- [[${label}]] \u2014 ${nodes.length} nodes`);
|
|
4821
|
-
}
|
|
4822
|
-
lines.push("");
|
|
4823
|
-
if (godNodesData.length > 0) {
|
|
4824
|
-
lines.push("## God Nodes", "(most connected concepts \u2014 the load-bearing abstractions)", "");
|
|
4825
|
-
for (const node of godNodesData) {
|
|
4826
|
-
lines.push(`- [[${node.label}]] \u2014 ${node.edges} connections`);
|
|
4827
|
-
}
|
|
4828
|
-
lines.push("");
|
|
4829
|
-
}
|
|
4830
|
-
lines.push(
|
|
4831
|
-
"---",
|
|
4832
|
-
"",
|
|
4833
|
-
"*Generated by [graphify](https://github.com/safishamsi/graphify)*"
|
|
4834
|
-
);
|
|
4835
|
-
return lines.join("\n");
|
|
4836
|
-
}
|
|
4837
|
-
function toWiki(G, communities, outputDir, options) {
|
|
4838
|
-
const communityMap = toNumericMap(communities);
|
|
4839
|
-
mkdirSync(outputDir, { recursive: true });
|
|
4840
|
-
const labels = options?.communityLabels ? toNumericMap(options.communityLabels) : new Map([...communityMap.keys()].map((cid) => [cid, `Community ${cid}`]));
|
|
4841
|
-
const cohesion = toNumericMap(options?.cohesion);
|
|
4842
|
-
const godNodesData = options?.godNodesData ?? [];
|
|
4843
|
-
let count = 0;
|
|
4844
|
-
for (const [cid, nodes] of communityMap) {
|
|
4845
|
-
const label = labels.get(cid) ?? `Community ${cid}`;
|
|
4846
|
-
const article = communityArticle(G, cid, nodes, label, labels, cohesion.get(cid));
|
|
4847
|
-
writeFileSync2(join(outputDir, `${safeFilename(label)}.md`), article);
|
|
4848
|
-
count++;
|
|
4849
|
-
}
|
|
4850
|
-
for (const nodeData of godNodesData) {
|
|
4851
|
-
const nid = nodeData.id;
|
|
4852
|
-
if (nid && G.hasNode(nid)) {
|
|
4853
|
-
const article = godNodeArticle(G, nid, labels);
|
|
4854
|
-
writeFileSync2(join(outputDir, `${safeFilename(nodeData.label)}.md`), article);
|
|
4855
|
-
count++;
|
|
4856
|
-
}
|
|
4857
|
-
}
|
|
4858
|
-
writeFileSync2(
|
|
4859
|
-
join(outputDir, "index.md"),
|
|
4860
|
-
indexMd(communityMap, labels, godNodesData, G.order, G.size)
|
|
4861
|
-
);
|
|
4862
|
-
return count;
|
|
4863
|
-
}
|
|
4864
|
-
|
|
4865
|
-
// src/detect.ts
|
|
4866
|
-
import {
|
|
4867
|
-
readdirSync,
|
|
4868
|
-
readFileSync,
|
|
4869
|
-
writeFileSync as writeFileSync3,
|
|
4870
|
-
statSync,
|
|
4871
|
-
existsSync as existsSync2,
|
|
4872
|
-
mkdirSync as mkdirSync2,
|
|
4873
|
-
lstatSync
|
|
4874
|
-
} from "fs";
|
|
4875
|
-
import { join as join2, resolve, extname, basename, relative, sep } from "path";
|
|
4876
|
-
import { createHash } from "crypto";
|
|
4877
|
-
var MANIFEST_PATH = "graphify-out/manifest.json";
|
|
4878
|
-
var CODE_EXTENSIONS2 = /* @__PURE__ */ new Set([
|
|
4879
|
-
".py",
|
|
4880
|
-
".ts",
|
|
4881
|
-
".js",
|
|
4882
|
-
".jsx",
|
|
4883
|
-
".tsx",
|
|
4884
|
-
".go",
|
|
4885
|
-
".rs",
|
|
4886
|
-
".java",
|
|
4887
|
-
".cpp",
|
|
4888
|
-
".cc",
|
|
4889
|
-
".cxx",
|
|
4890
|
-
".c",
|
|
4891
|
-
".h",
|
|
4892
|
-
".hpp",
|
|
4893
|
-
".rb",
|
|
4894
|
-
".swift",
|
|
4895
|
-
".kt",
|
|
4896
|
-
".kts",
|
|
4897
|
-
".cs",
|
|
4898
|
-
".scala",
|
|
4899
|
-
".php",
|
|
4900
|
-
".lua",
|
|
4901
|
-
".toc",
|
|
4902
|
-
".zig",
|
|
4903
|
-
".ps1",
|
|
4904
|
-
".ex",
|
|
4905
|
-
".exs",
|
|
4906
|
-
".m",
|
|
4907
|
-
".mm",
|
|
4908
|
-
".jl"
|
|
4909
|
-
]);
|
|
4910
|
-
var DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
|
|
4911
|
-
var PAPER_EXTENSIONS2 = /* @__PURE__ */ new Set([".pdf"]);
|
|
4912
|
-
var IMAGE_EXTENSIONS2 = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
|
|
4913
|
-
var OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
|
|
4914
|
-
var CORPUS_WARN_THRESHOLD = 5e4;
|
|
4915
|
-
var CORPUS_UPPER_THRESHOLD = 5e5;
|
|
4916
|
-
var FILE_COUNT_UPPER = 200;
|
|
4917
|
-
var SENSITIVE_PATTERNS = [
|
|
4918
|
-
/(^|[\\/])\.(env|envrc)(\.|$)/i,
|
|
4919
|
-
/\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
|
|
4920
|
-
/(credential|secret|passwd|password|token|private_key)/i,
|
|
4921
|
-
/(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
|
|
4922
|
-
/(\.netrc|\.pgpass|\.htpasswd)$/i,
|
|
4923
|
-
/(aws_credentials|gcloud_credentials|service.account)/i
|
|
4924
|
-
];
|
|
4925
|
-
var PAPER_SIGNALS = [
|
|
4926
|
-
/\barxiv\b/i,
|
|
4927
|
-
/\bdoi\s*:/i,
|
|
4928
|
-
/\babstract\b/i,
|
|
4929
|
-
/\bproceedings\b/i,
|
|
4930
|
-
/\bjournal\b/i,
|
|
4931
|
-
/\bpreprint\b/i,
|
|
4932
|
-
/\\cite\{/,
|
|
4933
|
-
/\[\d+\]/,
|
|
4934
|
-
/\[\n\d+\n\]/,
|
|
4935
|
-
/eq\.\s*\d+|equation\s+\d+/i,
|
|
4936
|
-
/\d{4}\.\d{4,5}/,
|
|
4937
|
-
/\bwe propose\b/i,
|
|
4938
|
-
/\bliterature\b/i
|
|
4939
|
-
];
|
|
4940
|
-
var PAPER_SIGNAL_THRESHOLD = 3;
|
|
4941
|
-
function isSensitive(filePath) {
|
|
4942
|
-
const name = basename(filePath);
|
|
4943
|
-
return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
|
|
4944
|
-
}
|
|
4945
|
-
function looksLikePaper(filePath) {
|
|
4946
|
-
try {
|
|
4947
|
-
const text = readFileSync(filePath, "utf-8").slice(0, 3e3);
|
|
4948
|
-
const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
|
|
4949
|
-
return hits >= PAPER_SIGNAL_THRESHOLD;
|
|
4950
|
-
} catch {
|
|
4951
|
-
return false;
|
|
4952
|
-
}
|
|
4953
|
-
}
|
|
4954
|
-
var ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
|
|
4955
|
-
function classifyFile(filePath) {
|
|
4956
|
-
const ext = extname(filePath).toLowerCase();
|
|
4957
|
-
if (CODE_EXTENSIONS2.has(ext)) return "code" /* CODE */;
|
|
4958
|
-
if (PAPER_EXTENSIONS2.has(ext)) {
|
|
4959
|
-
const parts = filePath.split(sep);
|
|
4960
|
-
if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
|
|
4961
|
-
return "paper" /* PAPER */;
|
|
4962
|
-
}
|
|
4963
|
-
if (IMAGE_EXTENSIONS2.has(ext)) return "image" /* IMAGE */;
|
|
4964
|
-
if (DOC_EXTENSIONS.has(ext)) {
|
|
4965
|
-
if (looksLikePaper(filePath)) return "paper" /* PAPER */;
|
|
4966
|
-
return "document" /* DOCUMENT */;
|
|
4967
|
-
}
|
|
4968
|
-
if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
|
|
4969
|
-
return null;
|
|
4970
|
-
}
|
|
4971
|
-
function countWords(filePath) {
|
|
4972
|
-
try {
|
|
4973
|
-
const text = readFileSync(filePath, "utf-8");
|
|
4974
|
-
return text.split(/\s+/).filter(Boolean).length;
|
|
4975
|
-
} catch {
|
|
4976
|
-
return 0;
|
|
4977
|
-
}
|
|
4978
|
-
}
|
|
4979
|
-
var SKIP_DIRS = /* @__PURE__ */ new Set([
|
|
4980
|
-
"venv",
|
|
4981
|
-
".venv",
|
|
4982
|
-
"env",
|
|
4983
|
-
".env",
|
|
4984
|
-
"node_modules",
|
|
4985
|
-
"__pycache__",
|
|
4986
|
-
".git",
|
|
4987
|
-
"dist",
|
|
4988
|
-
"build",
|
|
4989
|
-
"target",
|
|
4990
|
-
"out",
|
|
4991
|
-
"site-packages",
|
|
4992
|
-
"lib64",
|
|
4993
|
-
".pytest_cache",
|
|
4994
|
-
".mypy_cache",
|
|
4995
|
-
".ruff_cache",
|
|
4996
|
-
".tox",
|
|
4997
|
-
".eggs"
|
|
4998
|
-
]);
|
|
4999
|
-
function isNoiseDir(part) {
|
|
5000
|
-
if (SKIP_DIRS.has(part)) return true;
|
|
5001
|
-
if (part.endsWith("_venv") || part.endsWith("_env")) return true;
|
|
5002
|
-
if (part.endsWith(".egg-info")) return true;
|
|
5003
|
-
return false;
|
|
5004
|
-
}
|
|
5005
|
-
function loadGraphifyignore(root) {
|
|
5006
|
-
const ignoreFile = join2(root, ".graphifyignore");
|
|
5007
|
-
if (!existsSync2(ignoreFile)) return [];
|
|
5008
|
-
const patterns = [];
|
|
5009
|
-
for (let line of readFileSync(ignoreFile, "utf-8").split("\n")) {
|
|
5010
|
-
line = line.trim();
|
|
5011
|
-
if (line && !line.startsWith("#")) {
|
|
5012
|
-
patterns.push(line);
|
|
5013
|
-
}
|
|
5014
|
-
}
|
|
5015
|
-
return patterns;
|
|
5016
|
-
}
|
|
5017
|
-
function matchGlob(text, pattern) {
|
|
5018
|
-
const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
5019
|
-
return new RegExp(`^${regex}$`).test(text);
|
|
5020
|
-
}
|
|
5021
|
-
function isIgnored(filePath, root, patterns) {
|
|
5022
|
-
if (patterns.length === 0) return false;
|
|
5023
|
-
let rel;
|
|
5024
|
-
try {
|
|
5025
|
-
rel = relative(root, filePath).replace(/\\/g, "/");
|
|
5026
|
-
} catch {
|
|
5027
|
-
return false;
|
|
5028
|
-
}
|
|
5029
|
-
const parts = rel.split("/");
|
|
5030
|
-
for (const pattern of patterns) {
|
|
5031
|
-
const p = pattern.replace(/^\/+|\/+$/g, "");
|
|
5032
|
-
if (!p) continue;
|
|
5033
|
-
if (matchGlob(rel, p)) return true;
|
|
5034
|
-
if (matchGlob(basename(filePath), p)) return true;
|
|
5035
|
-
for (let i = 0; i < parts.length; i++) {
|
|
5036
|
-
if (matchGlob(parts[i], p)) return true;
|
|
5037
|
-
if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
|
|
5038
|
-
}
|
|
5039
|
-
}
|
|
5040
|
-
return false;
|
|
5041
|
-
}
|
|
5042
|
-
function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
|
|
5043
|
-
const result = [];
|
|
5044
|
-
let entries;
|
|
5045
|
-
try {
|
|
5046
|
-
entries = readdirSync(dir);
|
|
5047
|
-
} catch {
|
|
5048
|
-
return result;
|
|
5049
|
-
}
|
|
5050
|
-
for (const entry of entries) {
|
|
5051
|
-
const full = join2(dir, entry);
|
|
5052
|
-
let stat;
|
|
5053
|
-
try {
|
|
5054
|
-
stat = followSymlinks ? statSync(full) : lstatSync(full);
|
|
5055
|
-
} catch {
|
|
5056
|
-
continue;
|
|
5057
|
-
}
|
|
5058
|
-
if (stat.isDirectory()) {
|
|
5059
|
-
if (!skipPrune) {
|
|
5060
|
-
if (entry.startsWith(".")) continue;
|
|
5061
|
-
if (isNoiseDir(entry)) continue;
|
|
5062
|
-
if (isIgnored(full, root, ignorePatterns)) continue;
|
|
5063
|
-
}
|
|
5064
|
-
result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
|
|
5065
|
-
} else if (stat.isFile()) {
|
|
5066
|
-
result.push(full);
|
|
5067
|
-
}
|
|
5068
|
-
}
|
|
5069
|
-
return result;
|
|
5070
|
-
}
|
|
5071
|
-
function detect(root, options) {
|
|
5072
|
-
const followSymlinks = options?.followSymlinks ?? false;
|
|
5073
|
-
const rootResolved = resolve(root);
|
|
5074
|
-
const ignorePatterns = loadGraphifyignore(rootResolved);
|
|
5075
|
-
const convertedDir = join2(rootResolved, "graphify-out", "converted");
|
|
5076
|
-
const memoryDir = join2(rootResolved, "graphify-out", "memory");
|
|
5077
|
-
const files = {
|
|
5078
|
-
code: [],
|
|
5079
|
-
document: [],
|
|
5080
|
-
paper: [],
|
|
5081
|
-
image: []
|
|
5082
|
-
};
|
|
5083
|
-
let totalWords = 0;
|
|
5084
|
-
const skippedSensitive = [];
|
|
5085
|
-
const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
|
|
5086
|
-
if (existsSync2(memoryDir)) {
|
|
5087
|
-
allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
|
|
5088
|
-
}
|
|
5089
|
-
const seen = /* @__PURE__ */ new Set();
|
|
5090
|
-
for (const p of allFiles) {
|
|
5091
|
-
if (seen.has(p)) continue;
|
|
5092
|
-
seen.add(p);
|
|
5093
|
-
const inMemory = existsSync2(memoryDir) && p.startsWith(memoryDir);
|
|
5094
|
-
if (!inMemory) {
|
|
5095
|
-
if (basename(p).startsWith(".")) continue;
|
|
5096
|
-
if (p.startsWith(convertedDir)) continue;
|
|
5097
|
-
}
|
|
5098
|
-
if (isIgnored(p, rootResolved, ignorePatterns)) continue;
|
|
5099
|
-
if (isSensitive(p)) {
|
|
5100
|
-
skippedSensitive.push(p);
|
|
5101
|
-
continue;
|
|
5102
|
-
}
|
|
5103
|
-
const ftype = classifyFile(p);
|
|
5104
|
-
if (!ftype) continue;
|
|
5105
|
-
if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
|
|
5106
|
-
skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
|
|
5107
|
-
continue;
|
|
5108
|
-
}
|
|
5109
|
-
files[ftype].push(p);
|
|
5110
|
-
totalWords += countWords(p);
|
|
5111
|
-
}
|
|
5112
|
-
const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
|
|
5113
|
-
const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
|
|
5114
|
-
let warning = null;
|
|
5115
|
-
if (!needsGraph) {
|
|
5116
|
-
warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
|
|
5117
|
-
} else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
|
|
5118
|
-
warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
|
|
5251
|
+
byRelation.get(rel).push(`[[${neighborLabel}]]${confStr}`);
|
|
5119
5252
|
}
|
|
5120
|
-
|
|
5121
|
-
|
|
5122
|
-
|
|
5123
|
-
|
|
5124
|
-
|
|
5125
|
-
|
|
5126
|
-
|
|
5127
|
-
graphifyignore_patterns: ignorePatterns.length
|
|
5128
|
-
};
|
|
5129
|
-
}
|
|
5130
|
-
function loadManifest(manifestPath = MANIFEST_PATH) {
|
|
5131
|
-
try {
|
|
5132
|
-
return JSON.parse(readFileSync(manifestPath, "utf-8"));
|
|
5133
|
-
} catch {
|
|
5134
|
-
return {};
|
|
5253
|
+
lines.push("## Connections by Relation", "");
|
|
5254
|
+
for (const [rel, targets] of [...byRelation.entries()].sort()) {
|
|
5255
|
+
lines.push(`### ${rel}`);
|
|
5256
|
+
for (const t of targets.slice(0, 20)) {
|
|
5257
|
+
lines.push(`- ${t}`);
|
|
5258
|
+
}
|
|
5259
|
+
lines.push("");
|
|
5135
5260
|
}
|
|
5261
|
+
lines.push("---", "", "*Part of the graphify knowledge wiki. See [[index]] to navigate.*");
|
|
5262
|
+
return lines.join("\n");
|
|
5136
5263
|
}
|
|
5137
|
-
function
|
|
5138
|
-
const
|
|
5139
|
-
|
|
5140
|
-
|
|
5141
|
-
|
|
5142
|
-
|
|
5143
|
-
|
|
5144
|
-
|
|
5264
|
+
function indexMd(communities, labels, godNodesData, totalNodes, totalEdges) {
|
|
5265
|
+
const lines = [
|
|
5266
|
+
"# Knowledge Graph Index",
|
|
5267
|
+
"",
|
|
5268
|
+
"> Auto-generated by graphify. Start here \u2014 read community articles for context, then drill into god nodes for detail.",
|
|
5269
|
+
"",
|
|
5270
|
+
`**${totalNodes} nodes \xB7 ${totalEdges} edges \xB7 ${communities.size} communities**`,
|
|
5271
|
+
"",
|
|
5272
|
+
"---",
|
|
5273
|
+
"",
|
|
5274
|
+
"## Communities",
|
|
5275
|
+
"(sorted by size, largest first)",
|
|
5276
|
+
""
|
|
5277
|
+
];
|
|
5278
|
+
const sorted = [...communities.entries()].sort((a, b) => b[1].length - a[1].length);
|
|
5279
|
+
for (const [cid, nodes] of sorted) {
|
|
5280
|
+
const label = labels.get(cid) ?? `Community ${cid}`;
|
|
5281
|
+
lines.push(`- [[${label}]] \u2014 ${nodes.length} nodes`);
|
|
5282
|
+
}
|
|
5283
|
+
lines.push("");
|
|
5284
|
+
if (godNodesData.length > 0) {
|
|
5285
|
+
lines.push("## God Nodes", "(most connected concepts \u2014 the load-bearing abstractions)", "");
|
|
5286
|
+
for (const node of godNodesData) {
|
|
5287
|
+
lines.push(`- [[${node.label}]] \u2014 ${node.edges} connections`);
|
|
5145
5288
|
}
|
|
5289
|
+
lines.push("");
|
|
5146
5290
|
}
|
|
5147
|
-
|
|
5148
|
-
|
|
5149
|
-
|
|
5291
|
+
lines.push(
|
|
5292
|
+
"---",
|
|
5293
|
+
"",
|
|
5294
|
+
"*Generated by [graphify](https://github.com/safishamsi/graphify)*"
|
|
5295
|
+
);
|
|
5296
|
+
return lines.join("\n");
|
|
5150
5297
|
}
|
|
5151
|
-
function
|
|
5152
|
-
const
|
|
5153
|
-
|
|
5154
|
-
|
|
5155
|
-
|
|
5156
|
-
|
|
5157
|
-
|
|
5158
|
-
|
|
5159
|
-
|
|
5160
|
-
|
|
5161
|
-
};
|
|
5162
|
-
|
|
5163
|
-
const newFiles = {};
|
|
5164
|
-
const unchangedFiles = {};
|
|
5165
|
-
for (const k of Object.keys(full.files)) {
|
|
5166
|
-
newFiles[k] = [];
|
|
5167
|
-
unchangedFiles[k] = [];
|
|
5298
|
+
function toWiki(G, communities, outputDir, options) {
|
|
5299
|
+
const communityMap = toNumericMap(communities);
|
|
5300
|
+
mkdirSync2(outputDir, { recursive: true });
|
|
5301
|
+
const labels = options?.communityLabels ? toNumericMap(options.communityLabels) : new Map([...communityMap.keys()].map((cid) => [cid, `Community ${cid}`]));
|
|
5302
|
+
const cohesion = toNumericMap(options?.cohesion);
|
|
5303
|
+
const godNodesData = options?.godNodesData ?? [];
|
|
5304
|
+
let count = 0;
|
|
5305
|
+
for (const [cid, nodes] of communityMap) {
|
|
5306
|
+
const label = labels.get(cid) ?? `Community ${cid}`;
|
|
5307
|
+
const article = communityArticle(G, cid, nodes, label, labels, cohesion.get(cid));
|
|
5308
|
+
writeFileSync3(join2(outputDir, `${safeFilename(label)}.md`), article);
|
|
5309
|
+
count++;
|
|
5168
5310
|
}
|
|
5169
|
-
for (const
|
|
5170
|
-
|
|
5171
|
-
|
|
5172
|
-
|
|
5173
|
-
|
|
5174
|
-
|
|
5175
|
-
} catch {
|
|
5176
|
-
}
|
|
5177
|
-
if (storedMtime === void 0 || currentMtime > storedMtime) {
|
|
5178
|
-
newFiles[ftype].push(f);
|
|
5179
|
-
} else {
|
|
5180
|
-
unchangedFiles[ftype].push(f);
|
|
5181
|
-
}
|
|
5311
|
+
for (const nodeData of godNodesData) {
|
|
5312
|
+
const nid = nodeData.id;
|
|
5313
|
+
if (nid && G.hasNode(nid)) {
|
|
5314
|
+
const article = godNodeArticle(G, nid, labels);
|
|
5315
|
+
writeFileSync3(join2(outputDir, `${safeFilename(nodeData.label)}.md`), article);
|
|
5316
|
+
count++;
|
|
5182
5317
|
}
|
|
5183
5318
|
}
|
|
5184
|
-
|
|
5185
|
-
|
|
5186
|
-
|
|
5187
|
-
|
|
5188
|
-
|
|
5189
|
-
incremental: true,
|
|
5190
|
-
new_files: newFiles,
|
|
5191
|
-
unchanged_files: unchangedFiles,
|
|
5192
|
-
new_total: newTotal,
|
|
5193
|
-
deleted_files: deletedFiles
|
|
5194
|
-
};
|
|
5319
|
+
writeFileSync3(
|
|
5320
|
+
join2(outputDir, "index.md"),
|
|
5321
|
+
indexMd(communityMap, labels, godNodesData, G.order, G.size)
|
|
5322
|
+
);
|
|
5323
|
+
return count;
|
|
5195
5324
|
}
|
|
5196
5325
|
|
|
5197
5326
|
// src/index.ts
|
|
5327
|
+
init_detect();
|
|
5198
5328
|
init_extract();
|
|
5199
5329
|
init_cache();
|
|
5200
5330
|
init_security();
|
|
5201
5331
|
|
|
5202
5332
|
// src/benchmark.ts
|
|
5333
|
+
init_graph();
|
|
5203
5334
|
import { readFileSync as readFileSync4, existsSync as existsSync5 } from "fs";
|
|
5204
|
-
import Graph2 from "graphology";
|
|
5205
5335
|
var CHARS_PER_TOKEN = 4;
|
|
5206
5336
|
function estimateTokens(text) {
|
|
5207
5337
|
return Math.max(1, Math.floor(text.length / CHARS_PER_TOKEN));
|
|
@@ -5223,7 +5353,7 @@ function querySubgraphTokens(G, question, depth = 3) {
|
|
|
5223
5353
|
for (let d = 0; d < depth; d++) {
|
|
5224
5354
|
const nextFrontier = /* @__PURE__ */ new Set();
|
|
5225
5355
|
for (const n of frontier) {
|
|
5226
|
-
G
|
|
5356
|
+
forEachTraversalNeighbor(G, n, (neighbor) => {
|
|
5227
5357
|
if (!visited.has(neighbor)) {
|
|
5228
5358
|
nextFrontier.add(neighbor);
|
|
5229
5359
|
edgesSeen.push([n, neighbor]);
|
|
@@ -5258,21 +5388,7 @@ var SAMPLE_QUESTIONS = [
|
|
|
5258
5388
|
];
|
|
5259
5389
|
function loadGraph(graphPath) {
|
|
5260
5390
|
const raw = JSON.parse(readFileSync4(graphPath, "utf-8"));
|
|
5261
|
-
|
|
5262
|
-
for (const node of raw.nodes ?? []) {
|
|
5263
|
-
const { id, ...attrs } = node;
|
|
5264
|
-
G.mergeNode(id, attrs);
|
|
5265
|
-
}
|
|
5266
|
-
for (const link of raw.links ?? []) {
|
|
5267
|
-
const { source, target, ...attrs } = link;
|
|
5268
|
-
if (G.hasNode(source) && G.hasNode(target)) {
|
|
5269
|
-
try {
|
|
5270
|
-
G.mergeEdge(source, target, attrs);
|
|
5271
|
-
} catch {
|
|
5272
|
-
}
|
|
5273
|
-
}
|
|
5274
|
-
}
|
|
5275
|
-
return G;
|
|
5391
|
+
return loadGraphFromData(raw);
|
|
5276
5392
|
}
|
|
5277
5393
|
function runBenchmark(graphPath = "graphify-out/graph.json", corpusWordsOrOptions, questions) {
|
|
5278
5394
|
const options = typeof corpusWordsOrOptions === "number" ? { corpusWords: corpusWordsOrOptions, questions } : corpusWordsOrOptions ?? {};
|
|
@@ -5336,8 +5452,404 @@ graphify token reduction benchmark`);
|
|
|
5336
5452
|
|
|
5337
5453
|
// src/ingest.ts
|
|
5338
5454
|
init_security();
|
|
5339
|
-
import { existsSync as
|
|
5340
|
-
import { resolve as pathResolve2, basename as
|
|
5455
|
+
import { existsSync as existsSync7, mkdirSync as mkdirSync5, writeFileSync as writeFileSync6 } from "fs";
|
|
5456
|
+
import { resolve as pathResolve2, basename as basename4, extname as extname5 } from "path";
|
|
5457
|
+
|
|
5458
|
+
// src/transcribe.ts
|
|
5459
|
+
import * as childProcess from "child_process";
|
|
5460
|
+
import { createHash as createHash3 } from "crypto";
|
|
5461
|
+
import {
|
|
5462
|
+
cpSync,
|
|
5463
|
+
createWriteStream,
|
|
5464
|
+
existsSync as existsSync6,
|
|
5465
|
+
mkdirSync as mkdirSync4,
|
|
5466
|
+
mkdtempSync,
|
|
5467
|
+
readdirSync as readdirSync4,
|
|
5468
|
+
renameSync as renameSync2,
|
|
5469
|
+
rmSync,
|
|
5470
|
+
writeFileSync as writeFileSync5
|
|
5471
|
+
} from "fs";
|
|
5472
|
+
import { homedir, platform, tmpdir } from "os";
|
|
5473
|
+
import { basename as basename3, dirname as dirname3, extname as extname4, join as join5, resolve as resolve5 } from "path";
|
|
5474
|
+
import { Readable } from "stream";
|
|
5475
|
+
import { pipeline } from "stream/promises";
|
|
5476
|
+
var URL_PREFIXES = ["http://", "https://", "www."];
|
|
5477
|
+
var CACHED_AUDIO_EXTENSIONS = [".m4a", ".opus", ".mp3", ".ogg", ".wav", ".webm"];
|
|
5478
|
+
var DEFAULT_MODEL = "base";
|
|
5479
|
+
var TRANSCRIPTS_DIR = "graphify-out/transcripts";
|
|
5480
|
+
var FALLBACK_PROMPT = "Use proper punctuation and paragraph breaks.";
|
|
5481
|
+
var SHERPA_RELEASE_BASE = "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models";
|
|
5482
|
+
var AUDIO_SAMPLE_RATE = 16e3;
|
|
5483
|
+
var SUPPORTED_MODELS = /* @__PURE__ */ new Set([
|
|
5484
|
+
"tiny",
|
|
5485
|
+
"tiny.en",
|
|
5486
|
+
"base",
|
|
5487
|
+
"base.en",
|
|
5488
|
+
"small",
|
|
5489
|
+
"small.en",
|
|
5490
|
+
"medium",
|
|
5491
|
+
"medium.en",
|
|
5492
|
+
"large-v1",
|
|
5493
|
+
"large-v2",
|
|
5494
|
+
"large-v3",
|
|
5495
|
+
"turbo",
|
|
5496
|
+
"distil-small.en",
|
|
5497
|
+
"distil-medium.en",
|
|
5498
|
+
"distil-large-v2",
|
|
5499
|
+
"distil-large-v3",
|
|
5500
|
+
"distil-large-v3.5"
|
|
5501
|
+
]);
|
|
5502
|
+
var MODEL_ALIASES = {
|
|
5503
|
+
large: "large-v3"
|
|
5504
|
+
};
|
|
5505
|
+
var recognizerCache = /* @__PURE__ */ new Map();
|
|
5506
|
+
var sherpaModulePromise = null;
|
|
5507
|
+
function runCommand(command, args, options) {
|
|
5508
|
+
const result = childProcess.spawnSync(command, args, {
|
|
5509
|
+
encoding: "utf-8",
|
|
5510
|
+
...options
|
|
5511
|
+
});
|
|
5512
|
+
if (result.error) {
|
|
5513
|
+
throw result.error;
|
|
5514
|
+
}
|
|
5515
|
+
if (result.status !== 0) {
|
|
5516
|
+
throw new Error(result.stderr?.trim() || result.stdout?.trim() || `${command} failed`);
|
|
5517
|
+
}
|
|
5518
|
+
return result;
|
|
5519
|
+
}
|
|
5520
|
+
function defaultWhisperCacheDir() {
|
|
5521
|
+
if (process.env.GRAPHIFY_WHISPER_CACHE_DIR) {
|
|
5522
|
+
return resolve5(process.env.GRAPHIFY_WHISPER_CACHE_DIR);
|
|
5523
|
+
}
|
|
5524
|
+
if (platform() === "win32") {
|
|
5525
|
+
return join5(
|
|
5526
|
+
process.env.LOCALAPPDATA ?? join5(homedir(), "AppData", "Local"),
|
|
5527
|
+
"graphify",
|
|
5528
|
+
"whisper"
|
|
5529
|
+
);
|
|
5530
|
+
}
|
|
5531
|
+
return join5(process.env.XDG_CACHE_HOME ?? join5(homedir(), ".cache"), "graphify", "whisper");
|
|
5532
|
+
}
|
|
5533
|
+
function ffmpegBinary() {
|
|
5534
|
+
return process.env.GRAPHIFY_FFMPEG_BIN ?? "ffmpeg";
|
|
5535
|
+
}
|
|
5536
|
+
function tarBinary() {
|
|
5537
|
+
return process.env.GRAPHIFY_TAR_BIN ?? "tar";
|
|
5538
|
+
}
|
|
5539
|
+
function resolveRequestedModel(modelName) {
|
|
5540
|
+
const requested = modelName ?? process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
|
|
5541
|
+
const resolved = MODEL_ALIASES[requested] ?? requested;
|
|
5542
|
+
if (!SUPPORTED_MODELS.has(resolved)) {
|
|
5543
|
+
throw new Error(
|
|
5544
|
+
`Unsupported GRAPHIFY_WHISPER_MODEL "${requested}". Supported local TS models: ${[...SUPPORTED_MODELS].sort().join(", ")}`
|
|
5545
|
+
);
|
|
5546
|
+
}
|
|
5547
|
+
return { requested, resolved };
|
|
5548
|
+
}
|
|
5549
|
+
function walkFiles(dir) {
|
|
5550
|
+
if (!existsSync6(dir)) return [];
|
|
5551
|
+
const files = [];
|
|
5552
|
+
for (const entry of readdirSync4(dir, { withFileTypes: true })) {
|
|
5553
|
+
const fullPath = join5(dir, entry.name);
|
|
5554
|
+
if (entry.isDirectory()) {
|
|
5555
|
+
files.push(...walkFiles(fullPath));
|
|
5556
|
+
} else {
|
|
5557
|
+
files.push(fullPath);
|
|
5558
|
+
}
|
|
5559
|
+
}
|
|
5560
|
+
return files;
|
|
5561
|
+
}
|
|
5562
|
+
function findArtifactsIn(dir) {
|
|
5563
|
+
const files = walkFiles(dir);
|
|
5564
|
+
const encoderPath = files.find((path) => path.endsWith("-encoder.int8.onnx")) ?? files.find((path) => path.endsWith("-encoder.onnx"));
|
|
5565
|
+
const decoderPath = files.find((path) => path.endsWith("-decoder.int8.onnx")) ?? files.find((path) => path.endsWith("-decoder.onnx"));
|
|
5566
|
+
const tokensPath = files.find((path) => path.endsWith("-tokens.txt"));
|
|
5567
|
+
if (!encoderPath || !decoderPath || !tokensPath) {
|
|
5568
|
+
return null;
|
|
5569
|
+
}
|
|
5570
|
+
return {
|
|
5571
|
+
modelDir: dir,
|
|
5572
|
+
encoderPath,
|
|
5573
|
+
decoderPath,
|
|
5574
|
+
tokensPath
|
|
5575
|
+
};
|
|
5576
|
+
}
|
|
5577
|
+
function normalizeModelError(detail) {
|
|
5578
|
+
if (detail.includes("404")) {
|
|
5579
|
+
return `${detail}. The local sherpa-onnx release asset was not found for this Whisper model name.`;
|
|
5580
|
+
}
|
|
5581
|
+
return detail;
|
|
5582
|
+
}
|
|
5583
|
+
async function writeResponseToFile(response, destination) {
|
|
5584
|
+
if (!response.ok || !response.body) {
|
|
5585
|
+
throw new Error(`HTTP ${response.status} while downloading ${response.url}`);
|
|
5586
|
+
}
|
|
5587
|
+
await pipeline(Readable.fromWeb(response.body), createWriteStream(destination));
|
|
5588
|
+
}
|
|
5589
|
+
async function ensureWhisperArtifacts(modelName) {
|
|
5590
|
+
const { requested, resolved } = resolveRequestedModel(modelName);
|
|
5591
|
+
const cacheRoot = defaultWhisperCacheDir();
|
|
5592
|
+
mkdirSync4(cacheRoot, { recursive: true });
|
|
5593
|
+
const modelDir = join5(cacheRoot, `sherpa-onnx-whisper-${resolved}`);
|
|
5594
|
+
const cached = findArtifactsIn(modelDir);
|
|
5595
|
+
if (cached) {
|
|
5596
|
+
return { requestedModel: requested, resolvedModel: resolved, ...cached };
|
|
5597
|
+
}
|
|
5598
|
+
const tempDir = mkdtempSync(join5(tmpdir(), "graphify-whisper-model-"));
|
|
5599
|
+
const extractDir = join5(tempDir, "extract");
|
|
5600
|
+
const archiveName = `sherpa-onnx-whisper-${resolved}.tar.bz2`;
|
|
5601
|
+
const archivePath = join5(tempDir, archiveName);
|
|
5602
|
+
mkdirSync4(extractDir, { recursive: true });
|
|
5603
|
+
try {
|
|
5604
|
+
const url = `${SHERPA_RELEASE_BASE}/${archiveName}`;
|
|
5605
|
+
console.log(` downloading whisper model: ${resolved}`);
|
|
5606
|
+
const response = await fetch(url);
|
|
5607
|
+
await writeResponseToFile(response, archivePath);
|
|
5608
|
+
runCommand(tarBinary(), ["-xjf", archivePath, "-C", extractDir]);
|
|
5609
|
+
const extractedRoot = walkFiles(extractDir).map((path) => dirname3(path)).find((path) => findArtifactsIn(path) !== null);
|
|
5610
|
+
const sourceDir = extractedRoot ?? readdirSync4(extractDir, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => join5(extractDir, entry.name)).find((path) => findArtifactsIn(path) !== null);
|
|
5611
|
+
if (!sourceDir) {
|
|
5612
|
+
throw new Error(`Downloaded archive for ${resolved} but could not locate Whisper model files`);
|
|
5613
|
+
}
|
|
5614
|
+
if (existsSync6(modelDir)) {
|
|
5615
|
+
rmSync(modelDir, { recursive: true, force: true });
|
|
5616
|
+
}
|
|
5617
|
+
try {
|
|
5618
|
+
renameSync2(sourceDir, modelDir);
|
|
5619
|
+
} catch {
|
|
5620
|
+
cpSync(sourceDir, modelDir, { recursive: true });
|
|
5621
|
+
}
|
|
5622
|
+
const artifacts = findArtifactsIn(modelDir);
|
|
5623
|
+
if (!artifacts) {
|
|
5624
|
+
throw new Error(`Model cache for ${resolved} is incomplete after extraction`);
|
|
5625
|
+
}
|
|
5626
|
+
return { requestedModel: requested, resolvedModel: resolved, ...artifacts };
|
|
5627
|
+
} catch (error) {
|
|
5628
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5629
|
+
throw new Error(normalizeModelError(detail));
|
|
5630
|
+
} finally {
|
|
5631
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
5632
|
+
}
|
|
5633
|
+
}
|
|
5634
|
+
async function loadSherpaModule() {
|
|
5635
|
+
if (!sherpaModulePromise) {
|
|
5636
|
+
sherpaModulePromise = import("sherpa-onnx-node").then((imported) => Reflect.has(imported, "default") ? Reflect.get(imported, "default") : imported).catch((error) => {
|
|
5637
|
+
sherpaModulePromise = null;
|
|
5638
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5639
|
+
throw new Error(
|
|
5640
|
+
`Video transcription requires the optional dependency sherpa-onnx-node. Install it locally, then retry. ${detail}`
|
|
5641
|
+
);
|
|
5642
|
+
});
|
|
5643
|
+
}
|
|
5644
|
+
return sherpaModulePromise;
|
|
5645
|
+
}
|
|
5646
|
+
async function getRecognizer(modelName, sherpa) {
|
|
5647
|
+
const artifacts = await ensureWhisperArtifacts(modelName);
|
|
5648
|
+
const cacheKey = artifacts.modelDir;
|
|
5649
|
+
const existing = recognizerCache.get(cacheKey);
|
|
5650
|
+
if (existing) {
|
|
5651
|
+
return { recognizer: await existing, artifacts };
|
|
5652
|
+
}
|
|
5653
|
+
const createRecognizer = (async () => {
|
|
5654
|
+
const runtime = sherpa ?? await loadSherpaModule();
|
|
5655
|
+
return runtime.OfflineRecognizer.createAsync({
|
|
5656
|
+
featConfig: {
|
|
5657
|
+
sampleRate: AUDIO_SAMPLE_RATE,
|
|
5658
|
+
featureDim: 80
|
|
5659
|
+
},
|
|
5660
|
+
modelConfig: {
|
|
5661
|
+
whisper: {
|
|
5662
|
+
encoder: artifacts.encoderPath,
|
|
5663
|
+
decoder: artifacts.decoderPath,
|
|
5664
|
+
task: "transcribe"
|
|
5665
|
+
},
|
|
5666
|
+
tokens: artifacts.tokensPath,
|
|
5667
|
+
numThreads: 1,
|
|
5668
|
+
provider: "cpu",
|
|
5669
|
+
debug: 0
|
|
5670
|
+
}
|
|
5671
|
+
});
|
|
5672
|
+
})();
|
|
5673
|
+
recognizerCache.set(
|
|
5674
|
+
cacheKey,
|
|
5675
|
+
createRecognizer.catch((error) => {
|
|
5676
|
+
recognizerCache.delete(cacheKey);
|
|
5677
|
+
throw error;
|
|
5678
|
+
})
|
|
5679
|
+
);
|
|
5680
|
+
return { recognizer: await recognizerCache.get(cacheKey), artifacts };
|
|
5681
|
+
}
|
|
5682
|
+
function normalizeToWave(audioPath, workingDir) {
|
|
5683
|
+
const wavPath = join5(workingDir, `${basename3(audioPath, extname4(audioPath))}.wav`);
|
|
5684
|
+
try {
|
|
5685
|
+
runCommand(ffmpegBinary(), [
|
|
5686
|
+
"-y",
|
|
5687
|
+
"-i",
|
|
5688
|
+
audioPath,
|
|
5689
|
+
"-vn",
|
|
5690
|
+
"-ac",
|
|
5691
|
+
"1",
|
|
5692
|
+
"-ar",
|
|
5693
|
+
String(AUDIO_SAMPLE_RATE),
|
|
5694
|
+
"-c:a",
|
|
5695
|
+
"pcm_s16le",
|
|
5696
|
+
wavPath
|
|
5697
|
+
]);
|
|
5698
|
+
} catch (error) {
|
|
5699
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5700
|
+
throw new Error(
|
|
5701
|
+
`Video transcription requires ffmpeg in PATH. Install ffmpeg locally, then retry. ${detail}`
|
|
5702
|
+
);
|
|
5703
|
+
}
|
|
5704
|
+
return wavPath;
|
|
5705
|
+
}
|
|
5706
|
+
function extractTranscriptText(result) {
|
|
5707
|
+
return String(result.text ?? "").trim();
|
|
5708
|
+
}
|
|
5709
|
+
function isUrl(pathLike) {
|
|
5710
|
+
return URL_PREFIXES.some((prefix) => pathLike.startsWith(prefix));
|
|
5711
|
+
}
|
|
5712
|
+
function downloadAudio(url, outputDir) {
|
|
5713
|
+
mkdirSync4(outputDir, { recursive: true });
|
|
5714
|
+
const urlHash = createHash3("sha1").update(url).digest("hex").slice(0, 12);
|
|
5715
|
+
for (const ext of CACHED_AUDIO_EXTENSIONS) {
|
|
5716
|
+
const candidate = join5(outputDir, `yt_${urlHash}${ext}`);
|
|
5717
|
+
if (existsSync6(candidate)) {
|
|
5718
|
+
console.log(` cached audio: ${basename3(candidate)}`);
|
|
5719
|
+
return candidate;
|
|
5720
|
+
}
|
|
5721
|
+
}
|
|
5722
|
+
const outTemplate = join5(outputDir, `yt_${urlHash}.%(ext)s`);
|
|
5723
|
+
try {
|
|
5724
|
+
console.log(` downloading audio: ${url.slice(0, 80)} ...`);
|
|
5725
|
+
runCommand("yt-dlp", [
|
|
5726
|
+
"-f",
|
|
5727
|
+
"bestaudio[ext=m4a]/bestaudio/best",
|
|
5728
|
+
"-o",
|
|
5729
|
+
outTemplate,
|
|
5730
|
+
"--quiet",
|
|
5731
|
+
"--no-warnings",
|
|
5732
|
+
"--no-playlist",
|
|
5733
|
+
url
|
|
5734
|
+
]);
|
|
5735
|
+
} catch (error) {
|
|
5736
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5737
|
+
throw new Error(
|
|
5738
|
+
`YouTube/URL download requires yt-dlp. Install yt-dlp to enable video ingestion. ${detail}`
|
|
5739
|
+
);
|
|
5740
|
+
}
|
|
5741
|
+
for (const entry of readdirSync4(outputDir)) {
|
|
5742
|
+
if (entry.startsWith(`yt_${urlHash}.`)) {
|
|
5743
|
+
return join5(outputDir, entry);
|
|
5744
|
+
}
|
|
5745
|
+
}
|
|
5746
|
+
throw new Error(`yt-dlp finished without producing an audio file for ${url}`);
|
|
5747
|
+
}
|
|
5748
|
+
function buildWhisperPrompt(godNodes2) {
|
|
5749
|
+
const override = process.env.GRAPHIFY_WHISPER_PROMPT;
|
|
5750
|
+
if (override) return override;
|
|
5751
|
+
const labels = godNodes2.map((node) => node.label ?? "").filter((label) => Boolean(label)).slice(0, 5);
|
|
5752
|
+
if (labels.length === 0) {
|
|
5753
|
+
return FALLBACK_PROMPT;
|
|
5754
|
+
}
|
|
5755
|
+
return `Technical discussion about ${labels.join(", ")}. ${FALLBACK_PROMPT}`;
|
|
5756
|
+
}
|
|
5757
|
+
async function transcribe(videoPath, outputDir = TRANSCRIPTS_DIR, initialPrompt, force = false) {
|
|
5758
|
+
const outDir = resolve5(outputDir);
|
|
5759
|
+
mkdirSync4(outDir, { recursive: true });
|
|
5760
|
+
const audioPath = isUrl(videoPath) ? downloadAudio(videoPath, join5(outDir, "downloads")) : resolve5(videoPath);
|
|
5761
|
+
const transcriptPath = join5(outDir, `${basename3(audioPath, extname4(audioPath))}.txt`);
|
|
5762
|
+
if (existsSync6(transcriptPath) && !force) {
|
|
5763
|
+
return transcriptPath;
|
|
5764
|
+
}
|
|
5765
|
+
const prompt = initialPrompt ?? process.env.GRAPHIFY_WHISPER_PROMPT ?? FALLBACK_PROMPT;
|
|
5766
|
+
const requestedModel = process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
|
|
5767
|
+
const tempDir = mkdtempSync(join5(tmpdir(), "graphify-transcribe-"));
|
|
5768
|
+
try {
|
|
5769
|
+
console.log(` transcribing ${basename3(audioPath)} (model=${requestedModel}) ...`);
|
|
5770
|
+
const wavPath = normalizeToWave(audioPath, tempDir);
|
|
5771
|
+
const sherpa = await loadSherpaModule();
|
|
5772
|
+
const { recognizer, artifacts } = await getRecognizer(requestedModel, sherpa);
|
|
5773
|
+
const wave = sherpa.readWave(wavPath);
|
|
5774
|
+
const stream = recognizer.createStream();
|
|
5775
|
+
if (prompt && typeof stream.setOption === "function") {
|
|
5776
|
+
try {
|
|
5777
|
+
stream.setOption("prompt", prompt);
|
|
5778
|
+
} catch {
|
|
5779
|
+
}
|
|
5780
|
+
}
|
|
5781
|
+
stream.acceptWaveform({ samples: wave.samples, sampleRate: wave.sampleRate });
|
|
5782
|
+
const result = await recognizer.decodeAsync(stream);
|
|
5783
|
+
const transcript = extractTranscriptText(result);
|
|
5784
|
+
writeFileSync5(transcriptPath, transcript, "utf-8");
|
|
5785
|
+
if (artifacts.requestedModel !== artifacts.resolvedModel) {
|
|
5786
|
+
console.log(` model alias: ${artifacts.requestedModel} -> ${artifacts.resolvedModel}`);
|
|
5787
|
+
}
|
|
5788
|
+
} catch (error) {
|
|
5789
|
+
if (error instanceof Error && error.message.startsWith("Unsupported GRAPHIFY_WHISPER_MODEL")) {
|
|
5790
|
+
throw error;
|
|
5791
|
+
}
|
|
5792
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5793
|
+
throw new Error(
|
|
5794
|
+
`Video transcription requires the local TypeScript toolchain: sherpa-onnx-node + ffmpeg. Retry after installing them. ${detail}`
|
|
5795
|
+
);
|
|
5796
|
+
} finally {
|
|
5797
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
5798
|
+
}
|
|
5799
|
+
return transcriptPath;
|
|
5800
|
+
}
|
|
5801
|
+
async function transcribeAll(videoFiles, outputDir, initialPrompt, force = false) {
|
|
5802
|
+
if (videoFiles.length === 0) {
|
|
5803
|
+
return [];
|
|
5804
|
+
}
|
|
5805
|
+
const transcriptPaths = [];
|
|
5806
|
+
for (const videoFile of videoFiles) {
|
|
5807
|
+
try {
|
|
5808
|
+
transcriptPaths.push(await transcribe(videoFile, outputDir, initialPrompt, force));
|
|
5809
|
+
} catch (error) {
|
|
5810
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
5811
|
+
console.log(` warning: could not transcribe ${videoFile}: ${detail}`);
|
|
5812
|
+
}
|
|
5813
|
+
}
|
|
5814
|
+
return transcriptPaths;
|
|
5815
|
+
}
|
|
5816
|
+
function cloneDetection(detection) {
|
|
5817
|
+
return JSON.parse(JSON.stringify(detection));
|
|
5818
|
+
}
|
|
5819
|
+
async function augmentDetectionWithTranscripts(detection, options) {
|
|
5820
|
+
const nextDetection = cloneDetection(detection);
|
|
5821
|
+
const source = options?.incremental && nextDetection.new_files ? nextDetection.new_files : nextDetection.files;
|
|
5822
|
+
const videoFiles = [...source.video ?? []];
|
|
5823
|
+
const prompt = options?.initialPrompt ?? buildWhisperPrompt(options?.godNodes ?? []);
|
|
5824
|
+
if (videoFiles.length === 0) {
|
|
5825
|
+
return { detection: nextDetection, transcriptPaths: [], prompt };
|
|
5826
|
+
}
|
|
5827
|
+
const previousModel = process.env.GRAPHIFY_WHISPER_MODEL;
|
|
5828
|
+
if (options?.whisperModel) {
|
|
5829
|
+
process.env.GRAPHIFY_WHISPER_MODEL = options.whisperModel;
|
|
5830
|
+
}
|
|
5831
|
+
try {
|
|
5832
|
+
const transcriptPaths = await transcribeAll(
|
|
5833
|
+
videoFiles,
|
|
5834
|
+
options?.outputDir,
|
|
5835
|
+
prompt,
|
|
5836
|
+
options?.incremental === true
|
|
5837
|
+
);
|
|
5838
|
+
const existingDocuments = source.document ?? [];
|
|
5839
|
+
source.document = [...existingDocuments, ...transcriptPaths];
|
|
5840
|
+
return { detection: nextDetection, transcriptPaths, prompt };
|
|
5841
|
+
} finally {
|
|
5842
|
+
if (options?.whisperModel) {
|
|
5843
|
+
if (previousModel === void 0) {
|
|
5844
|
+
delete process.env.GRAPHIFY_WHISPER_MODEL;
|
|
5845
|
+
} else {
|
|
5846
|
+
process.env.GRAPHIFY_WHISPER_MODEL = previousModel;
|
|
5847
|
+
}
|
|
5848
|
+
}
|
|
5849
|
+
}
|
|
5850
|
+
}
|
|
5851
|
+
|
|
5852
|
+
// src/ingest.ts
|
|
5341
5853
|
function yamlStr(s) {
|
|
5342
5854
|
return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, " ").replace(/\r/g, " ");
|
|
5343
5855
|
}
|
|
@@ -5505,7 +6017,7 @@ async function downloadBinary(url, suffix, targetDir) {
|
|
|
5505
6017
|
const filename = safeFilename2(url, suffix);
|
|
5506
6018
|
const outPath = pathResolve2(targetDir, filename);
|
|
5507
6019
|
const data = await safeFetch(url);
|
|
5508
|
-
|
|
6020
|
+
writeFileSync6(outPath, data);
|
|
5509
6021
|
return outPath;
|
|
5510
6022
|
}
|
|
5511
6023
|
function normalizeIngestOptions(authorOrOptions, contributor) {
|
|
@@ -5521,7 +6033,7 @@ function normalizeIngestOptions(authorOrOptions, contributor) {
|
|
|
5521
6033
|
};
|
|
5522
6034
|
}
|
|
5523
6035
|
async function ingest(url, targetDir, authorOrOptions = null, contributor = null) {
|
|
5524
|
-
|
|
6036
|
+
mkdirSync5(targetDir, { recursive: true });
|
|
5525
6037
|
const urlType = detectUrlType(url);
|
|
5526
6038
|
const { author, contributor: normalizedContributor } = normalizeIngestOptions(
|
|
5527
6039
|
authorOrOptions,
|
|
@@ -5532,7 +6044,7 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
|
|
|
5532
6044
|
let filename;
|
|
5533
6045
|
if (urlType === "pdf") {
|
|
5534
6046
|
const out = await downloadBinary(url, ".pdf", targetDir);
|
|
5535
|
-
console.log(`Downloaded PDF: ${
|
|
6047
|
+
console.log(`Downloaded PDF: ${basename4(out)}`);
|
|
5536
6048
|
return out;
|
|
5537
6049
|
}
|
|
5538
6050
|
if (urlType === "image") {
|
|
@@ -5542,9 +6054,14 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
|
|
|
5542
6054
|
} catch {
|
|
5543
6055
|
throw new Error(`Invalid URL: ${url}`);
|
|
5544
6056
|
}
|
|
5545
|
-
const suffix =
|
|
6057
|
+
const suffix = extname5(parsed.pathname) || ".jpg";
|
|
5546
6058
|
const out = await downloadBinary(url, suffix, targetDir);
|
|
5547
|
-
console.log(`Downloaded image: ${
|
|
6059
|
+
console.log(`Downloaded image: ${basename4(out)}`);
|
|
6060
|
+
return out;
|
|
6061
|
+
}
|
|
6062
|
+
if (urlType === "youtube") {
|
|
6063
|
+
const out = downloadAudio(url, targetDir);
|
|
6064
|
+
console.log(`Downloaded audio: ${basename4(out)}`);
|
|
5548
6065
|
return out;
|
|
5549
6066
|
}
|
|
5550
6067
|
if (urlType === "tweet") {
|
|
@@ -5556,13 +6073,13 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
|
|
|
5556
6073
|
}
|
|
5557
6074
|
let outPath = pathResolve2(targetDir, filename);
|
|
5558
6075
|
let counter = 1;
|
|
5559
|
-
while (
|
|
6076
|
+
while (existsSync7(outPath)) {
|
|
5560
6077
|
const stem = filename.replace(/\.md$/, "");
|
|
5561
6078
|
outPath = pathResolve2(targetDir, `${stem}_${counter}.md`);
|
|
5562
6079
|
counter++;
|
|
5563
6080
|
}
|
|
5564
|
-
|
|
5565
|
-
console.log(`Saved ${urlType}: ${
|
|
6081
|
+
writeFileSync6(outPath, content, "utf-8");
|
|
6082
|
+
console.log(`Saved ${urlType}: ${basename4(outPath)}`);
|
|
5566
6083
|
return outPath;
|
|
5567
6084
|
}
|
|
5568
6085
|
function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "query", sourceNodes = null) {
|
|
@@ -5582,7 +6099,7 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
|
|
|
5582
6099
|
if (!payload.question) throw new Error("saveQueryResult requires a question");
|
|
5583
6100
|
if (!payload.memoryDir) throw new Error("saveQueryResult requires a memoryDir");
|
|
5584
6101
|
const effectiveAnswer = payload.answer ?? "";
|
|
5585
|
-
|
|
6102
|
+
mkdirSync5(payload.memoryDir, { recursive: true });
|
|
5586
6103
|
const now = /* @__PURE__ */ new Date();
|
|
5587
6104
|
const slug = payload.question.toLowerCase().replace(/[^\w]/g, "_").slice(0, 50).replace(/_+$/, "");
|
|
5588
6105
|
const ts = now.toISOString().replace(/[-:]/g, "").replace("T", "_").slice(0, 15);
|
|
@@ -5615,10 +6132,10 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
|
|
|
5615
6132
|
}
|
|
5616
6133
|
const content = [...frontmatterLines, ...bodyLines].join("\n");
|
|
5617
6134
|
const outPath = pathResolve2(payload.memoryDir, filename);
|
|
5618
|
-
|
|
6135
|
+
writeFileSync6(outPath, content, "utf-8");
|
|
5619
6136
|
return outPath;
|
|
5620
6137
|
}
|
|
5621
|
-
var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(
|
|
6138
|
+
var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(basename4(process.argv[1]));
|
|
5622
6139
|
if (isDirectExecution) {
|
|
5623
6140
|
const url = process.argv[2];
|
|
5624
6141
|
const targetDir = process.argv[3] ?? "./raw";
|
|
@@ -5634,16 +6151,16 @@ if (isDirectExecution) {
|
|
|
5634
6151
|
}
|
|
5635
6152
|
|
|
5636
6153
|
// src/serve.ts
|
|
6154
|
+
init_graph();
|
|
5637
6155
|
init_security();
|
|
5638
6156
|
init_analyze();
|
|
5639
6157
|
import { readFileSync as readFileSync5 } from "fs";
|
|
5640
|
-
import Graph3 from "graphology";
|
|
5641
6158
|
import { bidirectional } from "graphology-shortest-path/unweighted.js";
|
|
5642
|
-
import { basename as
|
|
6159
|
+
import { basename as basename5, dirname as dirname4, resolve as resolve7 } from "path";
|
|
5643
6160
|
function loadGraph2(graphPath) {
|
|
5644
6161
|
let safePath;
|
|
5645
6162
|
try {
|
|
5646
|
-
safePath = validateGraphPath(graphPath);
|
|
6163
|
+
safePath = validateGraphPath(graphPath, dirname4(resolve7(graphPath)));
|
|
5647
6164
|
} catch (err) {
|
|
5648
6165
|
console.error(`error: ${err instanceof Error ? err.message : err}`);
|
|
5649
6166
|
process.exit(1);
|
|
@@ -5657,21 +6174,7 @@ function loadGraph2(graphPath) {
|
|
|
5657
6174
|
);
|
|
5658
6175
|
process.exit(1);
|
|
5659
6176
|
}
|
|
5660
|
-
|
|
5661
|
-
const nodes = data.nodes ?? [];
|
|
5662
|
-
for (const node of nodes) {
|
|
5663
|
-
const { id, ...attrs } = node;
|
|
5664
|
-
G.mergeNode(id, attrs);
|
|
5665
|
-
}
|
|
5666
|
-
const links = data.links ?? data.edges ?? [];
|
|
5667
|
-
for (const link of links) {
|
|
5668
|
-
const { source, target, ...attrs } = link;
|
|
5669
|
-
try {
|
|
5670
|
-
G.mergeEdge(source, target, attrs);
|
|
5671
|
-
} catch {
|
|
5672
|
-
}
|
|
5673
|
-
}
|
|
5674
|
-
return G;
|
|
6177
|
+
return loadGraphFromData(data);
|
|
5675
6178
|
}
|
|
5676
6179
|
function communitiesFromGraph(G) {
|
|
5677
6180
|
const communities = /* @__PURE__ */ new Map();
|
|
@@ -5684,6 +6187,15 @@ function communitiesFromGraph(G) {
|
|
|
5684
6187
|
});
|
|
5685
6188
|
return communities;
|
|
5686
6189
|
}
|
|
6190
|
+
function communityName(G, cid) {
|
|
6191
|
+
if (cid === void 0 || cid === null) return null;
|
|
6192
|
+
const labels = G.getAttribute("community_labels");
|
|
6193
|
+
const fromGraph = labels?.[String(cid)];
|
|
6194
|
+
if (typeof fromGraph === "string" && fromGraph.length > 0) {
|
|
6195
|
+
return sanitizeLabel(fromGraph);
|
|
6196
|
+
}
|
|
6197
|
+
return null;
|
|
6198
|
+
}
|
|
5687
6199
|
function scoreNodes(G, terms) {
|
|
5688
6200
|
const scored = [];
|
|
5689
6201
|
G.forEachNode((nid, data) => {
|
|
@@ -5702,7 +6214,7 @@ function bfs(G, startNodes, depth) {
|
|
|
5702
6214
|
for (let i = 0; i < depth; i++) {
|
|
5703
6215
|
const nextFrontier = /* @__PURE__ */ new Set();
|
|
5704
6216
|
for (const n of frontier) {
|
|
5705
|
-
G
|
|
6217
|
+
forEachTraversalNeighbor(G, n, (neighbor) => {
|
|
5706
6218
|
if (!visited.has(neighbor)) {
|
|
5707
6219
|
nextFrontier.add(neighbor);
|
|
5708
6220
|
edges.push([n, neighbor]);
|
|
@@ -5722,7 +6234,7 @@ function dfs(G, startNodes, depth) {
|
|
|
5722
6234
|
const [node, d] = stack.pop();
|
|
5723
6235
|
if (visited.has(node) || d > depth) continue;
|
|
5724
6236
|
visited.add(node);
|
|
5725
|
-
G
|
|
6237
|
+
forEachTraversalNeighbor(G, node, (neighbor) => {
|
|
5726
6238
|
if (!visited.has(neighbor)) {
|
|
5727
6239
|
stack.push([neighbor, d + 1]);
|
|
5728
6240
|
edges.push([node, neighbor]);
|
|
@@ -5801,7 +6313,7 @@ function toolGetNode(G, args) {
|
|
|
5801
6313
|
` ID: ${nid}`,
|
|
5802
6314
|
` Source: ${d.source_file ?? ""} ${d.source_location ?? ""}`,
|
|
5803
6315
|
` Type: ${d.file_type ?? ""}`,
|
|
5804
|
-
` Community: ${d.community ?? ""}`,
|
|
6316
|
+
` Community: ${d.community_name ? `${d.community ?? ""} (${d.community_name})` : communityName(G, d.community) ?? String(d.community ?? "")}`,
|
|
5805
6317
|
` Degree: ${G.degree(nid)}`
|
|
5806
6318
|
].join("\n");
|
|
5807
6319
|
}
|
|
@@ -5812,7 +6324,7 @@ function toolGetNeighbors(G, args) {
|
|
|
5812
6324
|
if (matches.length === 0) return `No node matching '${label}' found.`;
|
|
5813
6325
|
const nid = matches[0];
|
|
5814
6326
|
const lines = [`Neighbors of ${G.getNodeAttribute(nid, "label") ?? nid}:`];
|
|
5815
|
-
G
|
|
6327
|
+
forEachTraversalNeighbor(G, nid, (neighbor) => {
|
|
5816
6328
|
const edgeKey = G.edge(nid, neighbor);
|
|
5817
6329
|
if (!edgeKey) return;
|
|
5818
6330
|
const d = G.getEdgeAttributes(edgeKey);
|
|
@@ -5828,7 +6340,8 @@ function toolGetCommunity(communities, G, args) {
|
|
|
5828
6340
|
const cid = Number(args.community_id);
|
|
5829
6341
|
const nodes = communities.get(cid);
|
|
5830
6342
|
if (!nodes || nodes.length === 0) return `Community ${cid} not found.`;
|
|
5831
|
-
const
|
|
6343
|
+
const label = communityName(G, cid);
|
|
6344
|
+
const lines = [label ? `Community ${cid} - ${label} (${nodes.length} nodes):` : `Community ${cid} (${nodes.length} nodes):`];
|
|
5832
6345
|
for (const n of nodes) {
|
|
5833
6346
|
const d = G.getNodeAttributes(n);
|
|
5834
6347
|
lines.push(` ${d.label ?? n} [${d.source_file ?? ""}]`);
|
|
@@ -6050,8 +6563,13 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
|
|
|
6050
6563
|
if (!handler) {
|
|
6051
6564
|
return { content: [{ type: "text", text: `Unknown tool: ${name}` }] };
|
|
6052
6565
|
}
|
|
6053
|
-
|
|
6054
|
-
|
|
6566
|
+
try {
|
|
6567
|
+
const text = handler(args ?? {});
|
|
6568
|
+
return { content: [{ type: "text", text }] };
|
|
6569
|
+
} catch (err) {
|
|
6570
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
6571
|
+
return { content: [{ type: "text", text: `Error executing ${name}: ${message}` }] };
|
|
6572
|
+
}
|
|
6055
6573
|
});
|
|
6056
6574
|
const serverTransport = transport ?? new StdioServerTransport();
|
|
6057
6575
|
let keepAlive;
|
|
@@ -6059,14 +6577,14 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
|
|
|
6059
6577
|
keepAlive = setInterval(() => void 0, 6e4);
|
|
6060
6578
|
process.stdin?.resume();
|
|
6061
6579
|
}
|
|
6062
|
-
const closed = new Promise((
|
|
6580
|
+
const closed = new Promise((resolve8) => {
|
|
6063
6581
|
const previousOnClose = server.onclose;
|
|
6064
6582
|
server.onclose = () => {
|
|
6065
6583
|
if (keepAlive) {
|
|
6066
6584
|
clearInterval(keepAlive);
|
|
6067
6585
|
}
|
|
6068
6586
|
previousOnClose?.();
|
|
6069
|
-
|
|
6587
|
+
resolve8();
|
|
6070
6588
|
};
|
|
6071
6589
|
});
|
|
6072
6590
|
await server.connect(serverTransport);
|
|
@@ -6074,7 +6592,7 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
|
|
|
6074
6592
|
await closed;
|
|
6075
6593
|
}
|
|
6076
6594
|
}
|
|
6077
|
-
var isDirectExecution2 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^serve\.(?:js|mjs|cjs|ts)$/.test(
|
|
6595
|
+
var isDirectExecution2 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^serve\.(?:js|mjs|cjs|ts)$/.test(basename5(process.argv[1]));
|
|
6078
6596
|
if (isDirectExecution2) {
|
|
6079
6597
|
const graphPath = process.argv[2] ?? "graphify-out/graph.json";
|
|
6080
6598
|
serve(graphPath).catch((err) => {
|
|
@@ -6084,59 +6602,14 @@ if (isDirectExecution2) {
|
|
|
6084
6602
|
}
|
|
6085
6603
|
|
|
6086
6604
|
// src/watch.ts
|
|
6087
|
-
|
|
6088
|
-
import {
|
|
6605
|
+
init_detect();
|
|
6606
|
+
import { existsSync as existsSync8, mkdirSync as mkdirSync6, writeFileSync as writeFileSync7, unlinkSync as unlinkSync2 } from "fs";
|
|
6607
|
+
import { resolve as pathResolve3, extname as extname6, basename as basename6 } from "path";
|
|
6089
6608
|
var WATCHED_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
6090
|
-
|
|
6091
|
-
|
|
6092
|
-
|
|
6093
|
-
|
|
6094
|
-
".rs",
|
|
6095
|
-
".java",
|
|
6096
|
-
".cpp",
|
|
6097
|
-
".c",
|
|
6098
|
-
".rb",
|
|
6099
|
-
".swift",
|
|
6100
|
-
".kt",
|
|
6101
|
-
".cs",
|
|
6102
|
-
".scala",
|
|
6103
|
-
".php",
|
|
6104
|
-
".cc",
|
|
6105
|
-
".cxx",
|
|
6106
|
-
".hpp",
|
|
6107
|
-
".h",
|
|
6108
|
-
".kts",
|
|
6109
|
-
".md",
|
|
6110
|
-
".txt",
|
|
6111
|
-
".rst",
|
|
6112
|
-
".pdf",
|
|
6113
|
-
".png",
|
|
6114
|
-
".jpg",
|
|
6115
|
-
".jpeg",
|
|
6116
|
-
".webp",
|
|
6117
|
-
".gif",
|
|
6118
|
-
".svg"
|
|
6119
|
-
]);
|
|
6120
|
-
var CODE_EXTENSIONS3 = /* @__PURE__ */ new Set([
|
|
6121
|
-
".py",
|
|
6122
|
-
".ts",
|
|
6123
|
-
".js",
|
|
6124
|
-
".go",
|
|
6125
|
-
".rs",
|
|
6126
|
-
".java",
|
|
6127
|
-
".cpp",
|
|
6128
|
-
".c",
|
|
6129
|
-
".rb",
|
|
6130
|
-
".swift",
|
|
6131
|
-
".kt",
|
|
6132
|
-
".cs",
|
|
6133
|
-
".scala",
|
|
6134
|
-
".php",
|
|
6135
|
-
".cc",
|
|
6136
|
-
".cxx",
|
|
6137
|
-
".hpp",
|
|
6138
|
-
".h",
|
|
6139
|
-
".kts"
|
|
6609
|
+
...CODE_EXTENSIONS,
|
|
6610
|
+
...DOC_EXTENSIONS,
|
|
6611
|
+
...PAPER_EXTENSIONS,
|
|
6612
|
+
...IMAGE_EXTENSIONS
|
|
6140
6613
|
]);
|
|
6141
6614
|
async function rebuildCode(watchPath, followSymlinks = false) {
|
|
6142
6615
|
try {
|
|
@@ -6189,7 +6662,7 @@ async function rebuildCode(watchPath, followSymlinks = false) {
|
|
|
6189
6662
|
}
|
|
6190
6663
|
const questions = suggestQuestions2(G, communities, labels);
|
|
6191
6664
|
const outDir = pathResolve3(watchPath, "graphify-out");
|
|
6192
|
-
|
|
6665
|
+
mkdirSync6(outDir, { recursive: true });
|
|
6193
6666
|
const report = generate2(
|
|
6194
6667
|
G,
|
|
6195
6668
|
communities,
|
|
@@ -6202,10 +6675,10 @@ async function rebuildCode(watchPath, followSymlinks = false) {
|
|
|
6202
6675
|
watchPath,
|
|
6203
6676
|
questions
|
|
6204
6677
|
);
|
|
6205
|
-
|
|
6206
|
-
toJson2(G, communities, pathResolve3(outDir, "graph.json"));
|
|
6678
|
+
writeFileSync7(pathResolve3(outDir, "GRAPH_REPORT.md"), report, "utf-8");
|
|
6679
|
+
toJson2(G, communities, pathResolve3(outDir, "graph.json"), { communityLabels: labels });
|
|
6207
6680
|
const flagPath = pathResolve3(outDir, "needs_update");
|
|
6208
|
-
if (
|
|
6681
|
+
if (existsSync8(flagPath)) {
|
|
6209
6682
|
unlinkSync2(flagPath);
|
|
6210
6683
|
}
|
|
6211
6684
|
console.log(
|
|
@@ -6224,9 +6697,9 @@ async function rebuildCode(watchPath, followSymlinks = false) {
|
|
|
6224
6697
|
}
|
|
6225
6698
|
function notifyOnly(watchPath) {
|
|
6226
6699
|
const outDir = pathResolve3(watchPath, "graphify-out");
|
|
6227
|
-
|
|
6700
|
+
mkdirSync6(outDir, { recursive: true });
|
|
6228
6701
|
const flagPath = pathResolve3(outDir, "needs_update");
|
|
6229
|
-
|
|
6702
|
+
writeFileSync7(flagPath, "1", "utf-8");
|
|
6230
6703
|
console.log(`
|
|
6231
6704
|
[graphify watch] New or changed files detected in ${watchPath}`);
|
|
6232
6705
|
console.log(
|
|
@@ -6238,7 +6711,7 @@ function notifyOnly(watchPath) {
|
|
|
6238
6711
|
console.log(`[graphify watch] Flag written to ${flagPath}`);
|
|
6239
6712
|
}
|
|
6240
6713
|
function hasNonCode(changedPaths) {
|
|
6241
|
-
return changedPaths.some((p) => !
|
|
6714
|
+
return changedPaths.some((p) => !CODE_EXTENSIONS.has(extname6(p).toLowerCase()));
|
|
6242
6715
|
}
|
|
6243
6716
|
async function watch(watchPath, debounce = 3) {
|
|
6244
6717
|
let chokidar;
|
|
@@ -6262,7 +6735,7 @@ async function watch(watchPath, debounce = 3) {
|
|
|
6262
6735
|
]
|
|
6263
6736
|
});
|
|
6264
6737
|
watcher.on("all", (_event, filePath) => {
|
|
6265
|
-
const ext =
|
|
6738
|
+
const ext = extname6(filePath).toLowerCase();
|
|
6266
6739
|
if (!WATCHED_EXTENSIONS.has(ext)) return;
|
|
6267
6740
|
const parts = filePath.split("/");
|
|
6268
6741
|
if (parts.some((part) => part.startsWith(".") && part !== ".")) return;
|
|
@@ -6301,7 +6774,7 @@ async function watch(watchPath, debounce = 3) {
|
|
|
6301
6774
|
process.on("SIGINT", cleanup);
|
|
6302
6775
|
process.on("SIGTERM", cleanup);
|
|
6303
6776
|
}
|
|
6304
|
-
var isDirectExecution3 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^watch\.(?:js|mjs|cjs|ts)$/.test(
|
|
6777
|
+
var isDirectExecution3 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^watch\.(?:js|mjs|cjs|ts)$/.test(basename6(process.argv[1]));
|
|
6305
6778
|
if (isDirectExecution3) {
|
|
6306
6779
|
const watchPath = process.argv[2] ?? ".";
|
|
6307
6780
|
const debounce = process.argv[3] ? parseFloat(process.argv[3]) : 3;
|
|
@@ -6313,8 +6786,10 @@ if (isDirectExecution3) {
|
|
|
6313
6786
|
export {
|
|
6314
6787
|
FileType,
|
|
6315
6788
|
assertValid,
|
|
6789
|
+
augmentDetectionWithTranscripts,
|
|
6316
6790
|
build,
|
|
6317
6791
|
buildFromJson,
|
|
6792
|
+
buildWhisperPrompt,
|
|
6318
6793
|
checkSemanticCache,
|
|
6319
6794
|
classifyFile,
|
|
6320
6795
|
cluster,
|
|
@@ -6322,6 +6797,7 @@ export {
|
|
|
6322
6797
|
collectFiles,
|
|
6323
6798
|
detect,
|
|
6324
6799
|
detectIncremental,
|
|
6800
|
+
downloadAudio,
|
|
6325
6801
|
extract,
|
|
6326
6802
|
fileHash,
|
|
6327
6803
|
generate as generateReport,
|
|
@@ -6351,6 +6827,8 @@ export {
|
|
|
6351
6827
|
toJson,
|
|
6352
6828
|
toSvg,
|
|
6353
6829
|
toWiki,
|
|
6830
|
+
transcribe,
|
|
6831
|
+
transcribeAll,
|
|
6354
6832
|
validateExtraction,
|
|
6355
6833
|
validateGraphPath,
|
|
6356
6834
|
validateUrl,
|