graphifyy 0.3.17 → 0.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,14 +7,13 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
7
7
 
8
8
  // src/skill-runtime.ts
9
9
  import { Command } from "commander";
10
- import Graph3 from "graphology";
11
10
  import {
12
- existsSync as existsSync7,
13
- mkdirSync as mkdirSync4,
11
+ existsSync as existsSync8,
12
+ mkdirSync as mkdirSync5,
14
13
  readFileSync as readFileSync5,
15
- writeFileSync as writeFileSync5
14
+ writeFileSync as writeFileSync6
16
15
  } from "fs";
17
- import { dirname as dirname2, join as join4, resolve as resolve5 } from "path";
16
+ import { dirname as dirname4, join as join5, resolve as resolve7 } from "path";
18
17
  import { fileURLToPath } from "url";
19
18
 
20
19
  // src/analyze.ts
@@ -36,10 +35,75 @@ function toNumericMap(value) {
36
35
 
37
36
  // src/cluster.ts
38
37
  import louvain from "graphology-communities-louvain";
38
+
39
+ // src/graph.ts
40
+ import Graph from "graphology";
41
+ function createGraph(directed = false) {
42
+ return new Graph({ type: directed ? "directed" : "undirected", multi: false });
43
+ }
44
+ function isDirectedGraph(G) {
45
+ return G.type === "directed";
46
+ }
47
+ function loadGraphFromData(raw) {
48
+ const G = createGraph(raw.directed === true);
49
+ for (const [key, value] of Object.entries(raw.graph ?? {})) {
50
+ G.setAttribute(key, value);
51
+ }
52
+ for (const node of raw.nodes ?? []) {
53
+ const { id, ...attrs } = node;
54
+ G.mergeNode(id, attrs);
55
+ }
56
+ for (const link of raw.links ?? raw.edges ?? []) {
57
+ const { source, target, ...attrs } = link;
58
+ if (!G.hasNode(source) || !G.hasNode(target)) continue;
59
+ try {
60
+ G.mergeEdge(source, target, attrs);
61
+ } catch {
62
+ }
63
+ }
64
+ if (raw.hyperedges && raw.hyperedges.length > 0) {
65
+ G.setAttribute("hyperedges", raw.hyperedges);
66
+ }
67
+ return G;
68
+ }
69
+ function toUndirectedGraph(G) {
70
+ if (!isDirectedGraph(G)) return G.copy();
71
+ const copy = createGraph(false);
72
+ for (const [key, value] of Object.entries(G.getAttributes())) {
73
+ copy.setAttribute(key, value);
74
+ }
75
+ G.forEachNode((nodeId, attrs) => {
76
+ copy.mergeNode(nodeId, attrs);
77
+ });
78
+ G.forEachEdge((_edge, attrs, source, target) => {
79
+ if (!copy.hasNode(source) || !copy.hasNode(target)) return;
80
+ try {
81
+ copy.mergeEdge(source, target, attrs);
82
+ } catch {
83
+ }
84
+ });
85
+ return copy;
86
+ }
87
+ function forEachTraversalNeighbor(G, node, callback) {
88
+ if (isDirectedGraph(G)) {
89
+ G.forEachOutboundNeighbor(node, callback);
90
+ return;
91
+ }
92
+ G.forEachNeighbor(node, callback);
93
+ }
94
+ function traversalNeighbors(G, node) {
95
+ const neighbors = [];
96
+ forEachTraversalNeighbor(G, node, (neighbor) => {
97
+ neighbors.push(neighbor);
98
+ });
99
+ return neighbors;
100
+ }
101
+
102
+ // src/cluster.ts
39
103
  var MAX_COMMUNITY_FRACTION = 0.25;
40
104
  var MIN_SPLIT_SIZE = 10;
41
105
  function partition(G) {
42
- const result = louvain(G);
106
+ const result = louvain(G.type === "directed" ? toUndirectedGraph(G) : G);
43
107
  const map = /* @__PURE__ */ new Map();
44
108
  for (const [node, cid] of Object.entries(result)) {
45
109
  map.set(node, cid);
@@ -142,93 +206,435 @@ function scoreAll(G, communities) {
142
206
  return result;
143
207
  }
144
208
 
145
- // src/analyze.ts
146
- function nodeCommunityMap(communities) {
147
- const communityMap = toNumericMap(communities);
148
- const result = /* @__PURE__ */ new Map();
149
- for (const [cid, nodes] of communityMap) {
150
- for (const n of nodes) result.set(n, cid);
209
+ // src/detect.ts
210
+ import {
211
+ readdirSync,
212
+ readFileSync,
213
+ writeFileSync,
214
+ statSync,
215
+ existsSync,
216
+ mkdirSync,
217
+ lstatSync
218
+ } from "fs";
219
+ import { join, resolve, extname, basename, relative, sep, dirname } from "path";
220
+ import { createHash } from "crypto";
221
+ var MANIFEST_PATH = "graphify-out/manifest.json";
222
+ var CODE_EXTENSIONS = /* @__PURE__ */ new Set([
223
+ ".py",
224
+ ".ts",
225
+ ".js",
226
+ ".jsx",
227
+ ".tsx",
228
+ ".go",
229
+ ".rs",
230
+ ".java",
231
+ ".cpp",
232
+ ".cc",
233
+ ".cxx",
234
+ ".c",
235
+ ".h",
236
+ ".hpp",
237
+ ".rb",
238
+ ".swift",
239
+ ".kt",
240
+ ".kts",
241
+ ".cs",
242
+ ".scala",
243
+ ".php",
244
+ ".lua",
245
+ ".toc",
246
+ ".zig",
247
+ ".ps1",
248
+ ".ex",
249
+ ".exs",
250
+ ".m",
251
+ ".mm",
252
+ ".jl"
253
+ ]);
254
+ var DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
255
+ var PAPER_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
256
+ var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
257
+ var OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
258
+ var VIDEO_EXTENSIONS = /* @__PURE__ */ new Set([
259
+ ".mp4",
260
+ ".mov",
261
+ ".webm",
262
+ ".mkv",
263
+ ".avi",
264
+ ".m4v",
265
+ ".mp3",
266
+ ".wav",
267
+ ".m4a",
268
+ ".ogg"
269
+ ]);
270
+ var CORPUS_WARN_THRESHOLD = 5e4;
271
+ var CORPUS_UPPER_THRESHOLD = 5e5;
272
+ var FILE_COUNT_UPPER = 200;
273
+ var SENSITIVE_PATTERNS = [
274
+ /(^|[\\/])\.(env|envrc)(\.|$)/i,
275
+ /\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
276
+ /(credential|secret|passwd|password|token|private_key)/i,
277
+ /(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
278
+ /(\.netrc|\.pgpass|\.htpasswd)$/i,
279
+ /(aws_credentials|gcloud_credentials|service.account)/i
280
+ ];
281
+ var PAPER_SIGNALS = [
282
+ /\barxiv\b/i,
283
+ /\bdoi\s*:/i,
284
+ /\babstract\b/i,
285
+ /\bproceedings\b/i,
286
+ /\bjournal\b/i,
287
+ /\bpreprint\b/i,
288
+ /\\cite\{/,
289
+ /\[\d+\]/,
290
+ /\[\n\d+\n\]/,
291
+ /eq\.\s*\d+|equation\s+\d+/i,
292
+ /\d{4}\.\d{4,5}/,
293
+ /\bwe propose\b/i,
294
+ /\bliterature\b/i
295
+ ];
296
+ var PAPER_SIGNAL_THRESHOLD = 3;
297
+ function isSensitive(filePath) {
298
+ const name = basename(filePath);
299
+ return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
300
+ }
301
+ function looksLikePaper(filePath) {
302
+ try {
303
+ const text = readFileSync(filePath, "utf-8").slice(0, 3e3);
304
+ const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
305
+ return hits >= PAPER_SIGNAL_THRESHOLD;
306
+ } catch {
307
+ return false;
151
308
  }
152
- return result;
153
309
  }
154
- function isFileNode(G, nodeId) {
155
- const attrs = G.getNodeAttributes(nodeId);
156
- const label = attrs.label ?? "";
157
- if (!label) return false;
158
- const sourceFile = attrs.source_file ?? "";
159
- if (sourceFile) {
160
- const fileName = sourceFile.split("/").pop() ?? "";
161
- if (label === fileName) return true;
310
+ var ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
311
+ function classifyFile(filePath) {
312
+ const ext = extname(filePath).toLowerCase();
313
+ if (CODE_EXTENSIONS.has(ext)) return "code" /* CODE */;
314
+ if (PAPER_EXTENSIONS.has(ext)) {
315
+ const parts = filePath.split(sep);
316
+ if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
317
+ return "paper" /* PAPER */;
162
318
  }
163
- if (label.startsWith(".") && label.endsWith("()")) return true;
164
- if (label.endsWith("()") && G.degree(nodeId) <= 1) return true;
165
- return false;
319
+ if (IMAGE_EXTENSIONS.has(ext)) return "image" /* IMAGE */;
320
+ if (VIDEO_EXTENSIONS.has(ext)) return "video" /* VIDEO */;
321
+ if (DOC_EXTENSIONS.has(ext)) {
322
+ if (looksLikePaper(filePath)) return "paper" /* PAPER */;
323
+ return "document" /* DOCUMENT */;
324
+ }
325
+ if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
326
+ return null;
166
327
  }
167
- function isConceptNode(G, nodeId) {
168
- const data = G.getNodeAttributes(nodeId);
169
- const source = data.source_file ?? "";
170
- if (!source) return true;
171
- const lastPart = source.split("/").pop() ?? "";
172
- if (!lastPart.includes(".")) return true;
173
- return false;
328
+ function countWords(filePath) {
329
+ try {
330
+ const text = readFileSync(filePath, "utf-8");
331
+ return text.split(/\s+/).filter(Boolean).length;
332
+ } catch {
333
+ return 0;
334
+ }
174
335
  }
175
- var CODE_EXTENSIONS = /* @__PURE__ */ new Set([
176
- "py",
177
- "ts",
178
- "tsx",
179
- "js",
180
- "go",
181
- "rs",
182
- "java",
183
- "rb",
184
- "cpp",
185
- "c",
186
- "h",
187
- "cs",
188
- "kt",
189
- "scala",
190
- "php"
336
+ var SKIP_DIRS = /* @__PURE__ */ new Set([
337
+ "venv",
338
+ ".venv",
339
+ "env",
340
+ ".env",
341
+ "node_modules",
342
+ "__pycache__",
343
+ ".git",
344
+ "dist",
345
+ "build",
346
+ "target",
347
+ "out",
348
+ "site-packages",
349
+ "lib64",
350
+ ".pytest_cache",
351
+ ".mypy_cache",
352
+ ".ruff_cache",
353
+ ".tox",
354
+ ".eggs"
191
355
  ]);
192
- var PAPER_EXTENSIONS = /* @__PURE__ */ new Set(["pdf"]);
193
- var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set(["png", "jpg", "jpeg", "webp", "gif", "svg"]);
194
- function fileCategory(path) {
195
- const ext = path.includes(".") ? path.split(".").pop()?.toLowerCase() ?? "" : "";
196
- if (CODE_EXTENSIONS.has(ext)) return "code";
197
- if (PAPER_EXTENSIONS.has(ext)) return "paper";
198
- if (IMAGE_EXTENSIONS.has(ext)) return "image";
199
- return "doc";
200
- }
201
- function topLevelDir(path) {
202
- return path.includes("/") ? path.split("/")[0] : path;
356
+ function isNoiseDir(part) {
357
+ if (SKIP_DIRS.has(part)) return true;
358
+ if (part.endsWith("_venv") || part.endsWith("_env")) return true;
359
+ if (part.endsWith(".egg-info")) return true;
360
+ return false;
203
361
  }
204
- function surpriseScore(G, u, v, data, nodeCommunity, uSource, vSource) {
205
- let score = 0;
206
- const reasons = [];
207
- const conf = data.confidence ?? "EXTRACTED";
208
- const confBonus = { AMBIGUOUS: 3, INFERRED: 2, EXTRACTED: 1 };
209
- score += confBonus[conf] ?? 1;
210
- if (conf === "AMBIGUOUS" || conf === "INFERRED") {
211
- reasons.push(`${conf.toLowerCase()} connection - not explicitly stated in source`);
212
- }
213
- const catU = fileCategory(uSource);
214
- const catV = fileCategory(vSource);
215
- if (catU !== catV) {
216
- score += 2;
217
- reasons.push(`crosses file types (${catU} \u2194 ${catV})`);
362
+ function loadGraphifyignore(root) {
363
+ const patterns = [];
364
+ let current = resolve(root);
365
+ while (true) {
366
+ const ignoreFile = join(current, ".graphifyignore");
367
+ if (existsSync(ignoreFile)) {
368
+ for (let line of readFileSync(ignoreFile, "utf-8").split("\n")) {
369
+ line = line.trim();
370
+ if (line && !line.startsWith("#")) {
371
+ patterns.push(line);
372
+ }
373
+ }
374
+ }
375
+ if (existsSync(join(current, ".git"))) {
376
+ break;
377
+ }
378
+ const parent = dirname(current);
379
+ if (parent === current) {
380
+ break;
381
+ }
382
+ current = parent;
218
383
  }
219
- if (topLevelDir(uSource) !== topLevelDir(vSource)) {
220
- score += 2;
221
- reasons.push("connects across different repos/directories");
384
+ return patterns;
385
+ }
386
+ function matchGlob(text, pattern) {
387
+ const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
388
+ return new RegExp(`^${regex}$`).test(text);
389
+ }
390
+ function isIgnored(filePath, root, patterns) {
391
+ if (patterns.length === 0) return false;
392
+ let rel;
393
+ try {
394
+ rel = relative(root, filePath).replace(/\\/g, "/");
395
+ } catch {
396
+ return false;
222
397
  }
223
- const cidU = nodeCommunity.get(u);
224
- const cidV = nodeCommunity.get(v);
225
- if (cidU !== void 0 && cidV !== void 0 && cidU !== cidV) {
226
- score += 1;
227
- reasons.push("bridges separate communities");
398
+ const parts = rel.split("/");
399
+ for (const pattern of patterns) {
400
+ const p = pattern.replace(/^\/+|\/+$/g, "");
401
+ if (!p) continue;
402
+ if (matchGlob(rel, p)) return true;
403
+ if (matchGlob(basename(filePath), p)) return true;
404
+ for (let i = 0; i < parts.length; i++) {
405
+ if (matchGlob(parts[i], p)) return true;
406
+ if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
407
+ }
228
408
  }
229
- if (data.relation === "semantically_similar_to") {
230
- score = Math.floor(score * 1.5);
231
- reasons.push("semantically similar concepts with no structural link");
409
+ return false;
410
+ }
411
+ function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
412
+ const result = [];
413
+ let entries;
414
+ try {
415
+ entries = readdirSync(dir);
416
+ } catch {
417
+ return result;
418
+ }
419
+ for (const entry of entries) {
420
+ const full = join(dir, entry);
421
+ let stat;
422
+ try {
423
+ stat = followSymlinks ? statSync(full) : lstatSync(full);
424
+ } catch {
425
+ continue;
426
+ }
427
+ if (stat.isDirectory()) {
428
+ if (!skipPrune) {
429
+ if (entry.startsWith(".")) continue;
430
+ if (isNoiseDir(entry)) continue;
431
+ if (isIgnored(full, root, ignorePatterns)) continue;
432
+ }
433
+ result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
434
+ } else if (stat.isFile()) {
435
+ result.push(full);
436
+ }
437
+ }
438
+ return result;
439
+ }
440
+ function detect(root, options) {
441
+ const followSymlinks = options?.followSymlinks ?? false;
442
+ const rootResolved = resolve(root);
443
+ const ignorePatterns = loadGraphifyignore(rootResolved);
444
+ const convertedDir = join(rootResolved, "graphify-out", "converted");
445
+ const memoryDir = join(rootResolved, "graphify-out", "memory");
446
+ const files = {
447
+ code: [],
448
+ document: [],
449
+ paper: [],
450
+ image: [],
451
+ video: []
452
+ };
453
+ let totalWords = 0;
454
+ const skippedSensitive = [];
455
+ const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
456
+ if (existsSync(memoryDir)) {
457
+ allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
458
+ }
459
+ const seen = /* @__PURE__ */ new Set();
460
+ for (const p of allFiles) {
461
+ if (seen.has(p)) continue;
462
+ seen.add(p);
463
+ const inMemory = existsSync(memoryDir) && p.startsWith(memoryDir);
464
+ if (!inMemory) {
465
+ if (basename(p).startsWith(".")) continue;
466
+ if (p.startsWith(convertedDir)) continue;
467
+ }
468
+ if (isIgnored(p, rootResolved, ignorePatterns)) continue;
469
+ if (isSensitive(p)) {
470
+ skippedSensitive.push(p);
471
+ continue;
472
+ }
473
+ const ftype = classifyFile(p);
474
+ if (!ftype) continue;
475
+ if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
476
+ skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
477
+ continue;
478
+ }
479
+ files[ftype].push(p);
480
+ if (ftype !== "video" /* VIDEO */) {
481
+ totalWords += countWords(p);
482
+ }
483
+ }
484
+ const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
485
+ const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
486
+ let warning = null;
487
+ if (!needsGraph) {
488
+ warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
489
+ } else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
490
+ warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
491
+ }
492
+ return {
493
+ files,
494
+ total_files: totalFiles,
495
+ total_words: totalWords,
496
+ needs_graph: needsGraph,
497
+ warning,
498
+ skipped_sensitive: skippedSensitive,
499
+ graphifyignore_patterns: ignorePatterns.length
500
+ };
501
+ }
502
+ function loadManifest(manifestPath = MANIFEST_PATH) {
503
+ try {
504
+ return JSON.parse(readFileSync(manifestPath, "utf-8"));
505
+ } catch {
506
+ return {};
507
+ }
508
+ }
509
+ function saveManifest(files, manifestPath = MANIFEST_PATH) {
510
+ const manifest = {};
511
+ for (const fileList of Object.values(files)) {
512
+ for (const f of fileList) {
513
+ try {
514
+ manifest[f] = statSync(f).mtimeMs;
515
+ } catch {
516
+ }
517
+ }
518
+ }
519
+ const dir = join(manifestPath, "..");
520
+ mkdirSync(dir, { recursive: true });
521
+ writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
522
+ }
523
+ function detectIncremental(root, manifestPath = MANIFEST_PATH) {
524
+ const full = detect(root);
525
+ const manifest = loadManifest(manifestPath);
526
+ if (Object.keys(manifest).length === 0) {
527
+ return {
528
+ ...full,
529
+ incremental: true,
530
+ new_files: full.files,
531
+ unchanged_files: Object.fromEntries(Object.keys(full.files).map((k) => [k, []])),
532
+ new_total: full.total_files
533
+ };
534
+ }
535
+ const newFiles = {};
536
+ const unchangedFiles = {};
537
+ for (const k of Object.keys(full.files)) {
538
+ newFiles[k] = [];
539
+ unchangedFiles[k] = [];
540
+ }
541
+ for (const [ftype, fileList] of Object.entries(full.files)) {
542
+ for (const f of fileList) {
543
+ const storedMtime = manifest[f];
544
+ let currentMtime = 0;
545
+ try {
546
+ currentMtime = statSync(f).mtimeMs;
547
+ } catch {
548
+ }
549
+ if (storedMtime === void 0 || currentMtime > storedMtime) {
550
+ newFiles[ftype].push(f);
551
+ } else {
552
+ unchangedFiles[ftype].push(f);
553
+ }
554
+ }
555
+ }
556
+ const currentFiles = new Set(Object.values(full.files).flat());
557
+ const deletedFiles = Object.keys(manifest).filter((f) => !currentFiles.has(f));
558
+ const newTotal = Object.values(newFiles).reduce((s, v) => s + v.length, 0);
559
+ return {
560
+ ...full,
561
+ incremental: true,
562
+ new_files: newFiles,
563
+ unchanged_files: unchangedFiles,
564
+ new_total: newTotal,
565
+ deleted_files: deletedFiles
566
+ };
567
+ }
568
+
569
+ // src/analyze.ts
570
+ function nodeCommunityMap(communities) {
571
+ const communityMap = toNumericMap(communities);
572
+ const result = /* @__PURE__ */ new Map();
573
+ for (const [cid, nodes] of communityMap) {
574
+ for (const n of nodes) result.set(n, cid);
575
+ }
576
+ return result;
577
+ }
578
+ function isFileNode(G, nodeId) {
579
+ const attrs = G.getNodeAttributes(nodeId);
580
+ const label = attrs.label ?? "";
581
+ if (!label) return false;
582
+ const sourceFile = attrs.source_file ?? "";
583
+ if (sourceFile) {
584
+ const fileName = sourceFile.split("/").pop() ?? "";
585
+ if (label === fileName) return true;
586
+ }
587
+ if (label.startsWith(".") && label.endsWith("()")) return true;
588
+ if (label.endsWith("()") && G.degree(nodeId) <= 1) return true;
589
+ return false;
590
+ }
591
+ function isConceptNode(G, nodeId) {
592
+ const data = G.getNodeAttributes(nodeId);
593
+ const source = data.source_file ?? "";
594
+ if (!source) return true;
595
+ const lastPart = source.split("/").pop() ?? "";
596
+ if (!lastPart.includes(".")) return true;
597
+ return false;
598
+ }
599
+ function fileCategory(path) {
600
+ const ext = path.includes(".") ? `.${path.split(".").pop()?.toLowerCase() ?? ""}` : "";
601
+ if (CODE_EXTENSIONS.has(ext)) return "code";
602
+ if (PAPER_EXTENSIONS.has(ext)) return "paper";
603
+ if (IMAGE_EXTENSIONS.has(ext)) return "image";
604
+ if (DOC_EXTENSIONS.has(ext)) return "doc";
605
+ return "doc";
606
+ }
607
+ function topLevelDir(path) {
608
+ return path.includes("/") ? path.split("/")[0] : path;
609
+ }
610
+ function surpriseScore(G, u, v, data, nodeCommunity, uSource, vSource) {
611
+ let score = 0;
612
+ const reasons = [];
613
+ const conf = data.confidence ?? "EXTRACTED";
614
+ const confBonus = { AMBIGUOUS: 3, INFERRED: 2, EXTRACTED: 1 };
615
+ score += confBonus[conf] ?? 1;
616
+ if (conf === "AMBIGUOUS" || conf === "INFERRED") {
617
+ reasons.push(`${conf.toLowerCase()} connection - not explicitly stated in source`);
618
+ }
619
+ const catU = fileCategory(uSource);
620
+ const catV = fileCategory(vSource);
621
+ if (catU !== catV) {
622
+ score += 2;
623
+ reasons.push(`crosses file types (${catU} \u2194 ${catV})`);
624
+ }
625
+ if (topLevelDir(uSource) !== topLevelDir(vSource)) {
626
+ score += 2;
627
+ reasons.push("connects across different repos/directories");
628
+ }
629
+ const cidU = nodeCommunity.get(u);
630
+ const cidV = nodeCommunity.get(v);
631
+ if (cidU !== void 0 && cidV !== void 0 && cidU !== cidV) {
632
+ score += 1;
633
+ reasons.push("bridges separate communities");
634
+ }
635
+ if (data.relation === "semantically_similar_to") {
636
+ score = Math.floor(score * 1.5);
637
+ reasons.push("semantically similar concepts with no structural link");
232
638
  }
233
639
  const degU = G.degree(u);
234
640
  const degV = G.degree(v);
@@ -389,10 +795,10 @@ function suggestQuestions(G, communities, communityLabels, topN = 7) {
389
795
  const cid = nodeCommunity.get(nodeId);
390
796
  const commLabel = cid !== void 0 ? labelMap.get(cid) ?? `Community ${cid}` : "unknown";
391
797
  const neighborComms = /* @__PURE__ */ new Set();
392
- G.forEachNeighbor(nodeId, (n) => {
798
+ for (const n of traversalNeighbors(G, nodeId)) {
393
799
  const nc = nodeCommunity.get(n);
394
800
  if (nc !== void 0 && nc !== cid) neighborComms.add(nc);
395
- });
801
+ }
396
802
  if (neighborComms.size > 0) {
397
803
  const otherLabels = [...neighborComms].map((c) => labelMap.get(c) ?? `Community ${c}`);
398
804
  questions.push({
@@ -523,8 +929,7 @@ function graphDiff(GOld, GNew) {
523
929
  }
524
930
 
525
931
  // src/benchmark.ts
526
- import { readFileSync, existsSync } from "fs";
527
- import Graph from "graphology";
932
+ import { readFileSync as readFileSync2, existsSync as existsSync2 } from "fs";
528
933
  var CHARS_PER_TOKEN = 4;
529
934
  function estimateTokens(text) {
530
935
  return Math.max(1, Math.floor(text.length / CHARS_PER_TOKEN));
@@ -546,7 +951,7 @@ function querySubgraphTokens(G, question, depth = 3) {
546
951
  for (let d = 0; d < depth; d++) {
547
952
  const nextFrontier = /* @__PURE__ */ new Set();
548
953
  for (const n of frontier) {
549
- G.forEachNeighbor(n, (neighbor) => {
954
+ forEachTraversalNeighbor(G, n, (neighbor) => {
550
955
  if (!visited.has(neighbor)) {
551
956
  nextFrontier.add(neighbor);
552
957
  edgesSeen.push([n, neighbor]);
@@ -580,26 +985,12 @@ var SAMPLE_QUESTIONS = [
580
985
  "what are the core abstractions"
581
986
  ];
582
987
  function loadGraph(graphPath) {
583
- const raw = JSON.parse(readFileSync(graphPath, "utf-8"));
584
- const G = new Graph({ type: "undirected" });
585
- for (const node of raw.nodes ?? []) {
586
- const { id, ...attrs } = node;
587
- G.mergeNode(id, attrs);
588
- }
589
- for (const link of raw.links ?? []) {
590
- const { source, target, ...attrs } = link;
591
- if (G.hasNode(source) && G.hasNode(target)) {
592
- try {
593
- G.mergeEdge(source, target, attrs);
594
- } catch {
595
- }
596
- }
597
- }
598
- return G;
988
+ const raw = JSON.parse(readFileSync2(graphPath, "utf-8"));
989
+ return loadGraphFromData(raw);
599
990
  }
600
991
  function runBenchmark(graphPath = "graphify-out/graph.json", corpusWordsOrOptions, questions) {
601
992
  const options = typeof corpusWordsOrOptions === "number" ? { corpusWords: corpusWordsOrOptions, questions } : corpusWordsOrOptions ?? {};
602
- if (!existsSync(graphPath)) {
993
+ if (!existsSync2(graphPath)) {
603
994
  return { error: `Graph file not found: ${graphPath}. Build the graph first.` };
604
995
  }
605
996
  const G = loadGraph(graphPath);
@@ -658,21 +1049,33 @@ graphify token reduction benchmark`);
658
1049
  }
659
1050
 
660
1051
  // src/cache.ts
661
- import { createHash } from "crypto";
662
- import { readFileSync as readFileSync2, writeFileSync, mkdirSync, readdirSync, unlinkSync, renameSync, existsSync as existsSync2 } from "fs";
663
- import { join, resolve } from "path";
1052
+ import { createHash as createHash2 } from "crypto";
1053
+ import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, readdirSync as readdirSync2, unlinkSync, renameSync, existsSync as existsSync3 } from "fs";
1054
+ import { extname as extname2, join as join2, resolve as resolve2 } from "path";
1055
+ function bodyContent(content) {
1056
+ const text = content.toString("utf-8");
1057
+ if (!text.startsWith("---")) {
1058
+ return content;
1059
+ }
1060
+ const end = text.indexOf("\n---", 3);
1061
+ if (end === -1) {
1062
+ return content;
1063
+ }
1064
+ return Buffer.from(text.slice(end + 4), "utf-8");
1065
+ }
664
1066
  function fileHash(filePath) {
665
- const content = readFileSync2(filePath);
666
- const resolved = resolve(filePath);
667
- const h = createHash("sha256");
1067
+ const raw = readFileSync3(filePath);
1068
+ const content = extname2(filePath).toLowerCase() === ".md" ? bodyContent(raw) : raw;
1069
+ const resolved = resolve2(filePath);
1070
+ const h = createHash2("sha256");
668
1071
  h.update(content);
669
1072
  h.update("\0");
670
1073
  h.update(resolved);
671
1074
  return h.digest("hex");
672
1075
  }
673
1076
  function cacheDir(root = ".") {
674
- const d = join(root, "graphify-out", "cache");
675
- mkdirSync(d, { recursive: true });
1077
+ const d = join2(root, "graphify-out", "cache");
1078
+ mkdirSync2(d, { recursive: true });
676
1079
  return d;
677
1080
  }
678
1081
  function loadCached(filePath, root = ".") {
@@ -682,20 +1085,20 @@ function loadCached(filePath, root = ".") {
682
1085
  } catch {
683
1086
  return null;
684
1087
  }
685
- const entry = join(cacheDir(root), `${h}.json`);
686
- if (!existsSync2(entry)) return null;
1088
+ const entry = join2(cacheDir(root), `${h}.json`);
1089
+ if (!existsSync3(entry)) return null;
687
1090
  try {
688
- return JSON.parse(readFileSync2(entry, "utf-8"));
1091
+ return JSON.parse(readFileSync3(entry, "utf-8"));
689
1092
  } catch {
690
1093
  return null;
691
1094
  }
692
1095
  }
693
1096
  function saveCached(filePath, result, root = ".") {
694
1097
  const h = fileHash(filePath);
695
- const entry = join(cacheDir(root), `${h}.json`);
1098
+ const entry = join2(cacheDir(root), `${h}.json`);
696
1099
  const tmp = entry + ".tmp";
697
1100
  try {
698
- writeFileSync(tmp, JSON.stringify(result));
1101
+ writeFileSync2(tmp, JSON.stringify(result));
699
1102
  renameSync(tmp, entry);
700
1103
  } catch {
701
1104
  try {
@@ -745,8 +1148,8 @@ function saveSemanticCache(nodes, edges, hyperedges = null, root = ".") {
745
1148
  }
746
1149
  let saved = 0;
747
1150
  for (const [fpath, result] of byFile) {
748
- const p = resolve(root, fpath);
749
- if (existsSync2(p)) {
1151
+ const p = resolve2(root, fpath);
1152
+ if (existsSync3(p)) {
750
1153
  saveCached(p, result, root);
751
1154
  saved++;
752
1155
  }
@@ -754,9 +1157,6 @@ function saveSemanticCache(nodes, edges, hyperedges = null, root = ".") {
754
1157
  return saved;
755
1158
  }
756
1159
 
757
- // src/build.ts
758
- import Graph2 from "graphology";
759
-
760
1160
  // src/validate.ts
761
1161
  var VALID_FILE_TYPES = /* @__PURE__ */ new Set(["code", "document", "paper", "image", "rationale"]);
762
1162
  var VALID_CONFIDENCES = /* @__PURE__ */ new Set(["EXTRACTED", "INFERRED", "AMBIGUOUS"]);
@@ -784,417 +1184,85 @@ function validateExtraction(data) {
784
1184
  errors.push(
785
1185
  `Node ${i} (id=${JSON.stringify(node.id ?? "?")}) missing required field '${field}'`
786
1186
  );
787
- }
788
- }
789
- if ("file_type" in node && !VALID_FILE_TYPES.has(node.file_type)) {
790
- errors.push(
791
- `Node ${i} (id=${JSON.stringify(node.id ?? "?")}) has invalid file_type '${node.file_type}' - must be one of ${JSON.stringify([...VALID_FILE_TYPES].sort())}`
792
- );
793
- }
794
- }
795
- }
796
- if (!("edges" in d)) {
797
- errors.push("Missing required key 'edges'");
798
- } else if (!Array.isArray(d.edges)) {
799
- errors.push("'edges' must be a list");
800
- } else {
801
- const nodeIds = /* @__PURE__ */ new Set();
802
- if (Array.isArray(d.nodes)) {
803
- for (const n of d.nodes) {
804
- if (typeof n === "object" && n !== null && "id" in n) {
805
- nodeIds.add(n.id);
806
- }
807
- }
808
- }
809
- for (let i = 0; i < d.edges.length; i++) {
810
- const edge = d.edges[i];
811
- if (typeof edge !== "object" || edge === null || Array.isArray(edge)) {
812
- errors.push(`Edge ${i} must be an object`);
813
- continue;
814
- }
815
- for (const field of REQUIRED_EDGE_FIELDS) {
816
- if (!(field in edge)) {
817
- errors.push(`Edge ${i} missing required field '${field}'`);
818
- }
819
- }
820
- if ("confidence" in edge && !VALID_CONFIDENCES.has(edge.confidence)) {
821
- errors.push(
822
- `Edge ${i} has invalid confidence '${edge.confidence}' - must be one of ${JSON.stringify([...VALID_CONFIDENCES].sort())}`
823
- );
824
- }
825
- if ("source" in edge && nodeIds.size > 0 && !nodeIds.has(edge.source)) {
826
- errors.push(`Edge ${i} source '${edge.source}' does not match any node id`);
827
- }
828
- if ("target" in edge && nodeIds.size > 0 && !nodeIds.has(edge.target)) {
829
- errors.push(`Edge ${i} target '${edge.target}' does not match any node id`);
830
- }
831
- }
832
- }
833
- return errors;
834
- }
835
-
836
- // src/build.ts
837
- function buildFromJson(extraction) {
838
- const errors = validateExtraction(extraction);
839
- const realErrors = errors.filter((e) => !e.includes("does not match any node id"));
840
- if (realErrors.length > 0) {
841
- console.error(
842
- `[graphify] Extraction warning (${realErrors.length} issues): ${realErrors[0]}`
843
- );
844
- }
845
- const G = new Graph2({ type: "undirected", multi: false });
846
- for (const node of extraction.nodes ?? []) {
847
- const { id, ...attrs } = node;
848
- G.mergeNode(id, attrs);
849
- }
850
- const nodeSet = new Set(G.nodes());
851
- for (const edge of extraction.edges ?? []) {
852
- const { source, target, ...attrs } = edge;
853
- if (!nodeSet.has(source) || !nodeSet.has(target)) continue;
854
- attrs._src = source;
855
- attrs._tgt = target;
856
- try {
857
- G.mergeEdge(source, target, attrs);
858
- } catch {
859
- }
860
- }
861
- const hyperedges = extraction.hyperedges ?? [];
862
- if (hyperedges.length > 0) {
863
- G.setAttribute("hyperedges", hyperedges);
864
- }
865
- return G;
866
- }
867
-
868
- // src/detect.ts
869
- import {
870
- readdirSync as readdirSync2,
871
- readFileSync as readFileSync3,
872
- writeFileSync as writeFileSync2,
873
- statSync,
874
- existsSync as existsSync3,
875
- mkdirSync as mkdirSync2,
876
- lstatSync
877
- } from "fs";
878
- import { join as join2, resolve as resolve2, extname, basename, relative, sep } from "path";
879
- import { createHash as createHash2 } from "crypto";
880
- var MANIFEST_PATH = "graphify-out/manifest.json";
881
- var CODE_EXTENSIONS2 = /* @__PURE__ */ new Set([
882
- ".py",
883
- ".ts",
884
- ".js",
885
- ".jsx",
886
- ".tsx",
887
- ".go",
888
- ".rs",
889
- ".java",
890
- ".cpp",
891
- ".cc",
892
- ".cxx",
893
- ".c",
894
- ".h",
895
- ".hpp",
896
- ".rb",
897
- ".swift",
898
- ".kt",
899
- ".kts",
900
- ".cs",
901
- ".scala",
902
- ".php",
903
- ".lua",
904
- ".toc",
905
- ".zig",
906
- ".ps1",
907
- ".ex",
908
- ".exs",
909
- ".m",
910
- ".mm",
911
- ".jl"
912
- ]);
913
- var DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
914
- var PAPER_EXTENSIONS2 = /* @__PURE__ */ new Set([".pdf"]);
915
- var IMAGE_EXTENSIONS2 = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
916
- var OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
917
- var CORPUS_WARN_THRESHOLD = 5e4;
918
- var CORPUS_UPPER_THRESHOLD = 5e5;
919
- var FILE_COUNT_UPPER = 200;
920
- var SENSITIVE_PATTERNS = [
921
- /(^|[\\/])\.(env|envrc)(\.|$)/i,
922
- /\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
923
- /(credential|secret|passwd|password|token|private_key)/i,
924
- /(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
925
- /(\.netrc|\.pgpass|\.htpasswd)$/i,
926
- /(aws_credentials|gcloud_credentials|service.account)/i
927
- ];
928
- var PAPER_SIGNALS = [
929
- /\barxiv\b/i,
930
- /\bdoi\s*:/i,
931
- /\babstract\b/i,
932
- /\bproceedings\b/i,
933
- /\bjournal\b/i,
934
- /\bpreprint\b/i,
935
- /\\cite\{/,
936
- /\[\d+\]/,
937
- /\[\n\d+\n\]/,
938
- /eq\.\s*\d+|equation\s+\d+/i,
939
- /\d{4}\.\d{4,5}/,
940
- /\bwe propose\b/i,
941
- /\bliterature\b/i
942
- ];
943
- var PAPER_SIGNAL_THRESHOLD = 3;
944
- function isSensitive(filePath) {
945
- const name = basename(filePath);
946
- return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
947
- }
948
- function looksLikePaper(filePath) {
949
- try {
950
- const text = readFileSync3(filePath, "utf-8").slice(0, 3e3);
951
- const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
952
- return hits >= PAPER_SIGNAL_THRESHOLD;
953
- } catch {
954
- return false;
955
- }
956
- }
957
- var ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
958
- function classifyFile(filePath) {
959
- const ext = extname(filePath).toLowerCase();
960
- if (CODE_EXTENSIONS2.has(ext)) return "code" /* CODE */;
961
- if (PAPER_EXTENSIONS2.has(ext)) {
962
- const parts = filePath.split(sep);
963
- if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
964
- return "paper" /* PAPER */;
965
- }
966
- if (IMAGE_EXTENSIONS2.has(ext)) return "image" /* IMAGE */;
967
- if (DOC_EXTENSIONS.has(ext)) {
968
- if (looksLikePaper(filePath)) return "paper" /* PAPER */;
969
- return "document" /* DOCUMENT */;
970
- }
971
- if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
972
- return null;
973
- }
974
- function countWords(filePath) {
975
- try {
976
- const text = readFileSync3(filePath, "utf-8");
977
- return text.split(/\s+/).filter(Boolean).length;
978
- } catch {
979
- return 0;
980
- }
981
- }
982
- var SKIP_DIRS = /* @__PURE__ */ new Set([
983
- "venv",
984
- ".venv",
985
- "env",
986
- ".env",
987
- "node_modules",
988
- "__pycache__",
989
- ".git",
990
- "dist",
991
- "build",
992
- "target",
993
- "out",
994
- "site-packages",
995
- "lib64",
996
- ".pytest_cache",
997
- ".mypy_cache",
998
- ".ruff_cache",
999
- ".tox",
1000
- ".eggs"
1001
- ]);
1002
- function isNoiseDir(part) {
1003
- if (SKIP_DIRS.has(part)) return true;
1004
- if (part.endsWith("_venv") || part.endsWith("_env")) return true;
1005
- if (part.endsWith(".egg-info")) return true;
1006
- return false;
1007
- }
1008
- function loadGraphifyignore(root) {
1009
- const ignoreFile = join2(root, ".graphifyignore");
1010
- if (!existsSync3(ignoreFile)) return [];
1011
- const patterns = [];
1012
- for (let line of readFileSync3(ignoreFile, "utf-8").split("\n")) {
1013
- line = line.trim();
1014
- if (line && !line.startsWith("#")) {
1015
- patterns.push(line);
1016
- }
1017
- }
1018
- return patterns;
1019
- }
1020
- function matchGlob(text, pattern) {
1021
- const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
1022
- return new RegExp(`^${regex}$`).test(text);
1023
- }
1024
- function isIgnored(filePath, root, patterns) {
1025
- if (patterns.length === 0) return false;
1026
- let rel;
1027
- try {
1028
- rel = relative(root, filePath).replace(/\\/g, "/");
1029
- } catch {
1030
- return false;
1031
- }
1032
- const parts = rel.split("/");
1033
- for (const pattern of patterns) {
1034
- const p = pattern.replace(/^\/+|\/+$/g, "");
1035
- if (!p) continue;
1036
- if (matchGlob(rel, p)) return true;
1037
- if (matchGlob(basename(filePath), p)) return true;
1038
- for (let i = 0; i < parts.length; i++) {
1039
- if (matchGlob(parts[i], p)) return true;
1040
- if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
1041
- }
1042
- }
1043
- return false;
1044
- }
1045
- function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
1046
- const result = [];
1047
- let entries;
1048
- try {
1049
- entries = readdirSync2(dir);
1050
- } catch {
1051
- return result;
1052
- }
1053
- for (const entry of entries) {
1054
- const full = join2(dir, entry);
1055
- let stat;
1056
- try {
1057
- stat = followSymlinks ? statSync(full) : lstatSync(full);
1058
- } catch {
1059
- continue;
1060
- }
1061
- if (stat.isDirectory()) {
1062
- if (!skipPrune) {
1063
- if (entry.startsWith(".")) continue;
1064
- if (isNoiseDir(entry)) continue;
1065
- if (isIgnored(full, root, ignorePatterns)) continue;
1066
- }
1067
- result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
1068
- } else if (stat.isFile()) {
1069
- result.push(full);
1070
- }
1071
- }
1072
- return result;
1073
- }
1074
- function detect(root, options) {
1075
- const followSymlinks = options?.followSymlinks ?? false;
1076
- const rootResolved = resolve2(root);
1077
- const ignorePatterns = loadGraphifyignore(rootResolved);
1078
- const convertedDir = join2(rootResolved, "graphify-out", "converted");
1079
- const memoryDir = join2(rootResolved, "graphify-out", "memory");
1080
- const files = {
1081
- code: [],
1082
- document: [],
1083
- paper: [],
1084
- image: []
1085
- };
1086
- let totalWords = 0;
1087
- const skippedSensitive = [];
1088
- const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
1089
- if (existsSync3(memoryDir)) {
1090
- allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
1091
- }
1092
- const seen = /* @__PURE__ */ new Set();
1093
- for (const p of allFiles) {
1094
- if (seen.has(p)) continue;
1095
- seen.add(p);
1096
- const inMemory = existsSync3(memoryDir) && p.startsWith(memoryDir);
1097
- if (!inMemory) {
1098
- if (basename(p).startsWith(".")) continue;
1099
- if (p.startsWith(convertedDir)) continue;
1100
- }
1101
- if (isIgnored(p, rootResolved, ignorePatterns)) continue;
1102
- if (isSensitive(p)) {
1103
- skippedSensitive.push(p);
1104
- continue;
1105
- }
1106
- const ftype = classifyFile(p);
1107
- if (!ftype) continue;
1108
- if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
1109
- skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
1110
- continue;
1111
- }
1112
- files[ftype].push(p);
1113
- totalWords += countWords(p);
1114
- }
1115
- const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
1116
- const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
1117
- let warning = null;
1118
- if (!needsGraph) {
1119
- warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
1120
- } else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
1121
- warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
1122
- }
1123
- return {
1124
- files,
1125
- total_files: totalFiles,
1126
- total_words: totalWords,
1127
- needs_graph: needsGraph,
1128
- warning,
1129
- skipped_sensitive: skippedSensitive,
1130
- graphifyignore_patterns: ignorePatterns.length
1131
- };
1132
- }
1133
- function loadManifest(manifestPath = MANIFEST_PATH) {
1134
- try {
1135
- return JSON.parse(readFileSync3(manifestPath, "utf-8"));
1136
- } catch {
1137
- return {};
1187
+ }
1188
+ }
1189
+ if ("file_type" in node && !VALID_FILE_TYPES.has(node.file_type)) {
1190
+ errors.push(
1191
+ `Node ${i} (id=${JSON.stringify(node.id ?? "?")}) has invalid file_type '${node.file_type}' - must be one of ${JSON.stringify([...VALID_FILE_TYPES].sort())}`
1192
+ );
1193
+ }
1194
+ }
1138
1195
  }
1139
- }
1140
- function saveManifest(files, manifestPath = MANIFEST_PATH) {
1141
- const manifest = {};
1142
- for (const fileList of Object.values(files)) {
1143
- for (const f of fileList) {
1144
- try {
1145
- manifest[f] = statSync(f).mtimeMs;
1146
- } catch {
1196
+ if (!("edges" in d)) {
1197
+ errors.push("Missing required key 'edges'");
1198
+ } else if (!Array.isArray(d.edges)) {
1199
+ errors.push("'edges' must be a list");
1200
+ } else {
1201
+ const nodeIds = /* @__PURE__ */ new Set();
1202
+ if (Array.isArray(d.nodes)) {
1203
+ for (const n of d.nodes) {
1204
+ if (typeof n === "object" && n !== null && "id" in n) {
1205
+ nodeIds.add(n.id);
1206
+ }
1207
+ }
1208
+ }
1209
+ for (let i = 0; i < d.edges.length; i++) {
1210
+ const edge = d.edges[i];
1211
+ if (typeof edge !== "object" || edge === null || Array.isArray(edge)) {
1212
+ errors.push(`Edge ${i} must be an object`);
1213
+ continue;
1214
+ }
1215
+ for (const field of REQUIRED_EDGE_FIELDS) {
1216
+ if (!(field in edge)) {
1217
+ errors.push(`Edge ${i} missing required field '${field}'`);
1218
+ }
1219
+ }
1220
+ if ("confidence" in edge && !VALID_CONFIDENCES.has(edge.confidence)) {
1221
+ errors.push(
1222
+ `Edge ${i} has invalid confidence '${edge.confidence}' - must be one of ${JSON.stringify([...VALID_CONFIDENCES].sort())}`
1223
+ );
1224
+ }
1225
+ if ("source" in edge && nodeIds.size > 0 && !nodeIds.has(edge.source)) {
1226
+ errors.push(`Edge ${i} source '${edge.source}' does not match any node id`);
1227
+ }
1228
+ if ("target" in edge && nodeIds.size > 0 && !nodeIds.has(edge.target)) {
1229
+ errors.push(`Edge ${i} target '${edge.target}' does not match any node id`);
1147
1230
  }
1148
1231
  }
1149
1232
  }
1150
- const dir = join2(manifestPath, "..");
1151
- mkdirSync2(dir, { recursive: true });
1152
- writeFileSync2(manifestPath, JSON.stringify(manifest, null, 2));
1233
+ return errors;
1153
1234
  }
1154
- function detectIncremental(root, manifestPath = MANIFEST_PATH) {
1155
- const full = detect(root);
1156
- const manifest = loadManifest(manifestPath);
1157
- if (Object.keys(manifest).length === 0) {
1158
- return {
1159
- ...full,
1160
- incremental: true,
1161
- new_files: full.files,
1162
- unchanged_files: Object.fromEntries(Object.keys(full.files).map((k) => [k, []])),
1163
- new_total: full.total_files
1164
- };
1235
+
1236
+ // src/build.ts
1237
+ function buildFromJson(extraction, options) {
1238
+ const errors = validateExtraction(extraction);
1239
+ const realErrors = errors.filter((e) => !e.includes("does not match any node id"));
1240
+ if (realErrors.length > 0) {
1241
+ console.error(
1242
+ `[graphify] Extraction warning (${realErrors.length} issues): ${realErrors[0]}`
1243
+ );
1165
1244
  }
1166
- const newFiles = {};
1167
- const unchangedFiles = {};
1168
- for (const k of Object.keys(full.files)) {
1169
- newFiles[k] = [];
1170
- unchangedFiles[k] = [];
1245
+ const G = createGraph(options?.directed === true);
1246
+ for (const node of extraction.nodes ?? []) {
1247
+ const { id, ...attrs } = node;
1248
+ G.mergeNode(id, attrs);
1171
1249
  }
1172
- for (const [ftype, fileList] of Object.entries(full.files)) {
1173
- for (const f of fileList) {
1174
- const storedMtime = manifest[f];
1175
- let currentMtime = 0;
1176
- try {
1177
- currentMtime = statSync(f).mtimeMs;
1178
- } catch {
1179
- }
1180
- if (storedMtime === void 0 || currentMtime > storedMtime) {
1181
- newFiles[ftype].push(f);
1182
- } else {
1183
- unchangedFiles[ftype].push(f);
1184
- }
1250
+ const nodeSet = new Set(G.nodes());
1251
+ for (const edge of extraction.edges ?? []) {
1252
+ const { source, target, ...attrs } = edge;
1253
+ if (!nodeSet.has(source) || !nodeSet.has(target)) continue;
1254
+ attrs._src = source;
1255
+ attrs._tgt = target;
1256
+ try {
1257
+ G.mergeEdge(source, target, attrs);
1258
+ } catch {
1185
1259
  }
1186
1260
  }
1187
- const currentFiles = new Set(Object.values(full.files).flat());
1188
- const deletedFiles = Object.keys(manifest).filter((f) => !currentFiles.has(f));
1189
- const newTotal = Object.values(newFiles).reduce((s, v) => s + v.length, 0);
1190
- return {
1191
- ...full,
1192
- incremental: true,
1193
- new_files: newFiles,
1194
- unchanged_files: unchangedFiles,
1195
- new_total: newTotal,
1196
- deleted_files: deletedFiles
1197
- };
1261
+ const hyperedges = extraction.hyperedges ?? [];
1262
+ if (hyperedges.length > 0) {
1263
+ G.setAttribute("hyperedges", hyperedges);
1264
+ }
1265
+ return G;
1198
1266
  }
1199
1267
 
1200
1268
  // src/export.ts
@@ -1360,14 +1428,17 @@ function normalizeCommunityLabels(labelsOrOptions) {
1360
1428
  }
1361
1429
  return toNumericMap(labelsOrOptions.communityLabels);
1362
1430
  }
1363
- function toJson(G, communities, outputPath) {
1431
+ function toJson(G, communities, outputPath, communityLabelsOrOptions) {
1364
1432
  const nodeComm = nodeCommunityMap2(communities);
1433
+ const communityLabels = normalizeCommunityLabels(communityLabelsOrOptions);
1365
1434
  const nodes = [];
1366
1435
  G.forEachNode((nodeId, attrs) => {
1436
+ const communityId = nodeComm.get(nodeId) ?? null;
1367
1437
  nodes.push({
1368
1438
  id: nodeId,
1369
1439
  ...attrs,
1370
- community: nodeComm.get(nodeId) ?? null
1440
+ community: communityId,
1441
+ community_name: communityId !== null ? sanitizeLabel(communityLabels?.get(communityId) ?? `Community ${communityId}`) : null
1371
1442
  });
1372
1443
  });
1373
1444
  const links = [];
@@ -1384,10 +1455,15 @@ function toJson(G, communities, outputPath) {
1384
1455
  links.push(link);
1385
1456
  });
1386
1457
  const hyperedges = G.getAttribute("hyperedges") ?? [];
1458
+ const communityLabelsObject = communityLabels ? Object.fromEntries(
1459
+ [...communityLabels.entries()].sort((a, b) => a[0] - b[0]).map(([cid, label]) => [String(cid), sanitizeLabel(label)])
1460
+ ) : {};
1387
1461
  const output = {
1388
- directed: false,
1462
+ directed: isDirectedGraph(G),
1389
1463
  multigraph: false,
1390
- graph: {},
1464
+ graph: {
1465
+ community_labels: communityLabelsObject
1466
+ },
1391
1467
  nodes,
1392
1468
  links,
1393
1469
  hyperedges
@@ -1653,9 +1729,24 @@ function focusNode(nodeId) {
1653
1729
  showInfo(nodeId);
1654
1730
  }
1655
1731
 
1732
+ let hoveredNodeId = null;
1733
+ network.on('hoverNode', params => {
1734
+ hoveredNodeId = params.node;
1735
+ container.style.cursor = 'pointer';
1736
+ });
1737
+ network.on('blurNode', () => {
1738
+ hoveredNodeId = null;
1739
+ container.style.cursor = 'default';
1740
+ });
1741
+ container.addEventListener('click', () => {
1742
+ if (hoveredNodeId !== null) {
1743
+ showInfo(hoveredNodeId);
1744
+ network.selectNodes([hoveredNodeId]);
1745
+ }
1746
+ });
1656
1747
  network.on('click', params => {
1657
1748
  if (params.nodes.length > 0) showInfo(params.nodes[0]);
1658
- else document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
1749
+ else if (hoveredNodeId === null) document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
1659
1750
  });
1660
1751
 
1661
1752
  const searchInput = document.getElementById('search');
@@ -1826,7 +1917,7 @@ function toGraphml(G, communities, outputPath) {
1826
1917
  lines.push(' <key id="community" for="node" attr.name="community" attr.type="int"/>');
1827
1918
  lines.push(' <key id="relation" for="edge" attr.name="relation" attr.type="string"/>');
1828
1919
  lines.push(' <key id="confidence" for="edge" attr.name="confidence" attr.type="string"/>');
1829
- lines.push(' <graph id="G" edgedefault="undirected">');
1920
+ lines.push(` <graph id="G" edgedefault="${isDirectedGraph(G) ? "directed" : "undirected"}">`);
1830
1921
  G.forEachNode((nodeId, data) => {
1831
1922
  lines.push(` <node id="${xmlEsc(nodeId)}">`);
1832
1923
  lines.push(` <data key="label">${xmlEsc(data.label ?? nodeId)}</data>`);
@@ -1921,7 +2012,7 @@ function toSvg(G, communities, outputPath, communityLabelsOrOptions, figsize = [
1921
2012
 
1922
2013
  // src/extract.ts
1923
2014
  import { readFileSync as readFileSync4, readdirSync as readdirSync3, lstatSync as lstatSync2, realpathSync, existsSync as existsSync5 } from "fs";
1924
- import { resolve as resolve3, basename as basename2, extname as extname2, dirname, join as join3, sep as sep2 } from "path";
2015
+ import { resolve as resolve3, basename as basename2, extname as extname3, dirname as dirname2, join as join3, sep as sep2 } from "path";
1925
2016
  import { createRequire } from "module";
1926
2017
  import * as TreeSitter from "web-tree-sitter";
1927
2018
  var Parser2 = TreeSitter.Parser ?? TreeSitter.default;
@@ -2562,7 +2653,7 @@ async function _extractGeneric(filePath, config) {
2562
2653
  return { nodes: [], edges: [], error: String(e) };
2563
2654
  }
2564
2655
  const root = tree.rootNode;
2565
- const stem = basename2(filePath, extname2(filePath));
2656
+ const stem = basename2(filePath, extname3(filePath));
2566
2657
  const strPath = filePath;
2567
2658
  const nodes = [];
2568
2659
  const edges = [];
@@ -2931,10 +3022,10 @@ async function _extractGeneric(filePath, config) {
2931
3022
  source: callerNid,
2932
3023
  target: tgtNid,
2933
3024
  relation: "calls",
2934
- confidence: "INFERRED",
3025
+ confidence: "EXTRACTED",
2935
3026
  source_file: strPath,
2936
3027
  source_location: `L${line}`,
2937
- weight: 0.8
3028
+ weight: 1
2938
3029
  });
2939
3030
  }
2940
3031
  }
@@ -2979,7 +3070,7 @@ async function _extractPythonRationale(filePath, result) {
2979
3070
  } catch {
2980
3071
  return;
2981
3072
  }
2982
- const stem = basename2(filePath, extname2(filePath));
3073
+ const stem = basename2(filePath, extname3(filePath));
2983
3074
  const strPath = filePath;
2984
3075
  const { nodes, edges } = result;
2985
3076
  const seenIds = new Set(nodes.map((n) => n.id));
@@ -3075,7 +3166,7 @@ async function extractPython(filePath) {
3075
3166
  return result;
3076
3167
  }
3077
3168
  async function extractJs(filePath) {
3078
- const ext = extname2(filePath);
3169
+ const ext = extname3(filePath);
3079
3170
  const config = ext === ".ts" || ext === ".tsx" ? _TS_CONFIG : _JS_CONFIG;
3080
3171
  return _extractGeneric(filePath, config);
3081
3172
  }
@@ -3126,7 +3217,7 @@ async function extractJulia(filePath) {
3126
3217
  return { nodes: [], edges: [], error: String(e) };
3127
3218
  }
3128
3219
  const root = tree.rootNode;
3129
- const stem = basename2(filePath, extname2(filePath));
3220
+ const stem = basename2(filePath, extname3(filePath));
3130
3221
  const strPath = filePath;
3131
3222
  const nodes = [];
3132
3223
  const edges = [];
@@ -3322,8 +3413,8 @@ async function extractGo(filePath) {
3322
3413
  return { nodes: [], edges: [], error: String(e) };
3323
3414
  }
3324
3415
  const root = tree.rootNode;
3325
- const stem = basename2(filePath, extname2(filePath));
3326
- const pkgScope = dirname(filePath).split(sep2).pop() || stem;
3416
+ const stem = basename2(filePath, extname3(filePath));
3417
+ const pkgScope = dirname2(filePath).split(sep2).pop() || stem;
3327
3418
  const strPath = filePath;
3328
3419
  const nodes = [];
3329
3420
  const edges = [];
@@ -3466,10 +3557,10 @@ async function extractGo(filePath) {
3466
3557
  source: callerNid,
3467
3558
  target: tgtNid,
3468
3559
  relation: "calls",
3469
- confidence: "INFERRED",
3560
+ confidence: "EXTRACTED",
3470
3561
  source_file: strPath,
3471
3562
  source_location: `L${line}`,
3472
- weight: 0.8
3563
+ weight: 1
3473
3564
  });
3474
3565
  }
3475
3566
  }
@@ -3504,7 +3595,7 @@ async function extractRust(filePath) {
3504
3595
  return { nodes: [], edges: [], error: String(e) };
3505
3596
  }
3506
3597
  const root = tree.rootNode;
3507
- const stem = basename2(filePath, extname2(filePath));
3598
+ const stem = basename2(filePath, extname3(filePath));
3508
3599
  const strPath = filePath;
3509
3600
  const nodes = [];
3510
3601
  const edges = [];
@@ -3621,10 +3712,10 @@ async function extractRust(filePath) {
3621
3712
  source: callerNid,
3622
3713
  target: tgtNid,
3623
3714
  relation: "calls",
3624
- confidence: "INFERRED",
3715
+ confidence: "EXTRACTED",
3625
3716
  source_file: strPath,
3626
3717
  source_location: `L${line}`,
3627
- weight: 0.8
3718
+ weight: 1
3628
3719
  });
3629
3720
  }
3630
3721
  }
@@ -3659,7 +3750,7 @@ async function extractZig(filePath) {
3659
3750
  return { nodes: [], edges: [], error: String(e) };
3660
3751
  }
3661
3752
  const root = tree.rootNode;
3662
- const stem = basename2(filePath, extname2(filePath));
3753
+ const stem = basename2(filePath, extname3(filePath));
3663
3754
  const strPath = filePath;
3664
3755
  const nodes = [];
3665
3756
  const edges = [];
@@ -3783,7 +3874,7 @@ async function extractZig(filePath) {
3783
3874
  const pair = `${callerNid}|${tgtNid}`;
3784
3875
  if (!seenCallPairs.has(pair)) {
3785
3876
  seenCallPairs.add(pair);
3786
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
3877
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
3787
3878
  }
3788
3879
  }
3789
3880
  }
@@ -3817,7 +3908,7 @@ async function extractPowershell(filePath) {
3817
3908
  return { nodes: [], edges: [], error: String(e) };
3818
3909
  }
3819
3910
  const root = tree.rootNode;
3820
- const stem = basename2(filePath, extname2(filePath));
3911
+ const stem = basename2(filePath, extname3(filePath));
3821
3912
  const strPath = filePath;
3822
3913
  const nodes = [];
3823
3914
  const edges = [];
@@ -3966,7 +4057,7 @@ async function extractPowershell(filePath) {
3966
4057
  const pair = `${callerNid}|${tgtNid}`;
3967
4058
  if (!seenCallPairs.has(pair)) {
3968
4059
  seenCallPairs.add(pair);
3969
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
4060
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
3970
4061
  }
3971
4062
  }
3972
4063
  }
@@ -4001,7 +4092,7 @@ async function extractObjc(filePath) {
4001
4092
  return { nodes: [], edges: [], error: String(e) };
4002
4093
  }
4003
4094
  const root = tree.rootNode;
4004
- const stem = basename2(filePath, extname2(filePath));
4095
+ const stem = basename2(filePath, extname3(filePath));
4005
4096
  const strPath = filePath;
4006
4097
  const nodes = [];
4007
4098
  const edges = [];
@@ -4172,7 +4263,7 @@ async function extractObjc(filePath) {
4172
4263
  const pair = `${callerNid}|${candidate}`;
4173
4264
  if (!seenCalls.has(pair) && callerNid !== candidate) {
4174
4265
  seenCalls.add(pair);
4175
- addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "INFERRED", 0.8);
4266
+ addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "EXTRACTED", 1);
4176
4267
  }
4177
4268
  }
4178
4269
  }
@@ -4204,7 +4295,7 @@ async function extractElixir(filePath) {
4204
4295
  return { nodes: [], edges: [], error: String(e) };
4205
4296
  }
4206
4297
  const root = tree.rootNode;
4207
- const stem = basename2(filePath, extname2(filePath));
4298
+ const stem = basename2(filePath, extname3(filePath));
4208
4299
  const strPath = filePath;
4209
4300
  const nodes = [];
4210
4301
  const edges = [];
@@ -4363,7 +4454,7 @@ async function extractElixir(filePath) {
4363
4454
  const pair = `${callerNid}|${tgtNid}`;
4364
4455
  if (!seenCallPairs.has(pair)) {
4365
4456
  seenCallPairs.add(pair);
4366
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
4457
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
4367
4458
  }
4368
4459
  }
4369
4460
  }
@@ -4390,7 +4481,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4390
4481
  for (const node of fileResult.nodes ?? []) {
4391
4482
  const src = node.source_file ?? "";
4392
4483
  if (!src) continue;
4393
- const fileStem = basename2(src, extname2(src));
4484
+ const fileStem = basename2(src, extname3(src));
4394
4485
  const label = node.label ?? "";
4395
4486
  const nid = node.id ?? "";
4396
4487
  if (label && !label.endsWith(")") && !label.endsWith(".py") && !label.startsWith("_")) {
@@ -4402,7 +4493,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4402
4493
  const newEdges = [];
4403
4494
  const stemToPath = /* @__PURE__ */ new Map();
4404
4495
  for (const p of paths) {
4405
- stemToPath.set(basename2(p, extname2(p)), p);
4496
+ stemToPath.set(basename2(p, extname3(p)), p);
4406
4497
  }
4407
4498
  for (let idx = 0; idx < perFile.length; idx++) {
4408
4499
  let walkImports = function(node) {
@@ -4466,7 +4557,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4466
4557
  };
4467
4558
  const fileResult = perFile[idx];
4468
4559
  const filePath = paths[idx];
4469
- const fileStem = basename2(filePath, extname2(filePath));
4560
+ const fileStem = basename2(filePath, extname3(filePath));
4470
4561
  const strPath = filePath;
4471
4562
  const localClasses = fileResult.nodes.filter(
4472
4563
  (n) => n.source_file === strPath && !n.label.endsWith(")") && !n.label.endsWith(".py") && n.id !== _makeId(fileStem)
@@ -4524,7 +4615,7 @@ async function extractWithDiagnostics(paths) {
4524
4615
  if (paths.length === 0) {
4525
4616
  root = ".";
4526
4617
  } else if (paths.length === 1) {
4527
- root = dirname(paths[0]);
4618
+ root = dirname2(paths[0]);
4528
4619
  } else {
4529
4620
  const parts = paths.map((p) => p.split(sep2));
4530
4621
  const minLen = Math.min(...parts.map((p) => p.length));
@@ -4547,7 +4638,7 @@ async function extractWithDiagnostics(paths) {
4547
4638
  `);
4548
4639
  }
4549
4640
  const filePath = paths[i];
4550
- const ext = extname2(filePath);
4641
+ const ext = extname3(filePath);
4551
4642
  const extractor = _DISPATCH[ext];
4552
4643
  if (!extractor) continue;
4553
4644
  const cached = loadCached(filePath, root);
@@ -4573,9 +4664,9 @@ async function extractWithDiagnostics(paths) {
4573
4664
  allNodes.push(...result.nodes ?? []);
4574
4665
  allEdges.push(...result.edges ?? []);
4575
4666
  }
4576
- const pyPaths = paths.filter((p) => extname2(p) === ".py");
4667
+ const pyPaths = paths.filter((p) => extname3(p) === ".py");
4577
4668
  if (pyPaths.length > 0) {
4578
- const pyResults = perFile.filter((_r, i) => extname2(paths[i]) === ".py");
4669
+ const pyResults = perFile.filter((_r, i) => extname3(paths[i]) === ".py");
4579
4670
  try {
4580
4671
  const crossFileEdges = await _resolveCrossFileImports(pyResults, pyPaths);
4581
4672
  allEdges.push(...crossFileEdges);
@@ -4594,8 +4685,404 @@ async function extractWithDiagnostics(paths) {
4594
4685
  }
4595
4686
 
4596
4687
  // src/ingest.ts
4597
- import { existsSync as existsSync6, mkdirSync as mkdirSync3, writeFileSync as writeFileSync4 } from "fs";
4598
- import { resolve as pathResolve2, basename as basename3, extname as extname3 } from "path";
4688
+ import { existsSync as existsSync7, mkdirSync as mkdirSync4, writeFileSync as writeFileSync5 } from "fs";
4689
+ import { resolve as pathResolve2, basename as basename4, extname as extname5 } from "path";
4690
+
4691
+ // src/transcribe.ts
4692
+ import * as childProcess from "child_process";
4693
+ import { createHash as createHash3 } from "crypto";
4694
+ import {
4695
+ cpSync,
4696
+ createWriteStream,
4697
+ existsSync as existsSync6,
4698
+ mkdirSync as mkdirSync3,
4699
+ mkdtempSync,
4700
+ readdirSync as readdirSync4,
4701
+ renameSync as renameSync2,
4702
+ rmSync,
4703
+ writeFileSync as writeFileSync4
4704
+ } from "fs";
4705
+ import { homedir, platform, tmpdir } from "os";
4706
+ import { basename as basename3, dirname as dirname3, extname as extname4, join as join4, resolve as resolve5 } from "path";
4707
+ import { Readable } from "stream";
4708
+ import { pipeline } from "stream/promises";
4709
+ var URL_PREFIXES = ["http://", "https://", "www."];
4710
+ var CACHED_AUDIO_EXTENSIONS = [".m4a", ".opus", ".mp3", ".ogg", ".wav", ".webm"];
4711
+ var DEFAULT_MODEL = "base";
4712
+ var TRANSCRIPTS_DIR = "graphify-out/transcripts";
4713
+ var FALLBACK_PROMPT = "Use proper punctuation and paragraph breaks.";
4714
+ var SHERPA_RELEASE_BASE = "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models";
4715
+ var AUDIO_SAMPLE_RATE = 16e3;
4716
+ var SUPPORTED_MODELS = /* @__PURE__ */ new Set([
4717
+ "tiny",
4718
+ "tiny.en",
4719
+ "base",
4720
+ "base.en",
4721
+ "small",
4722
+ "small.en",
4723
+ "medium",
4724
+ "medium.en",
4725
+ "large-v1",
4726
+ "large-v2",
4727
+ "large-v3",
4728
+ "turbo",
4729
+ "distil-small.en",
4730
+ "distil-medium.en",
4731
+ "distil-large-v2",
4732
+ "distil-large-v3",
4733
+ "distil-large-v3.5"
4734
+ ]);
4735
+ var MODEL_ALIASES = {
4736
+ large: "large-v3"
4737
+ };
4738
+ var recognizerCache = /* @__PURE__ */ new Map();
4739
+ var sherpaModulePromise = null;
4740
+ function runCommand(command, args, options) {
4741
+ const result = childProcess.spawnSync(command, args, {
4742
+ encoding: "utf-8",
4743
+ ...options
4744
+ });
4745
+ if (result.error) {
4746
+ throw result.error;
4747
+ }
4748
+ if (result.status !== 0) {
4749
+ throw new Error(result.stderr?.trim() || result.stdout?.trim() || `${command} failed`);
4750
+ }
4751
+ return result;
4752
+ }
4753
+ function defaultWhisperCacheDir() {
4754
+ if (process.env.GRAPHIFY_WHISPER_CACHE_DIR) {
4755
+ return resolve5(process.env.GRAPHIFY_WHISPER_CACHE_DIR);
4756
+ }
4757
+ if (platform() === "win32") {
4758
+ return join4(
4759
+ process.env.LOCALAPPDATA ?? join4(homedir(), "AppData", "Local"),
4760
+ "graphify",
4761
+ "whisper"
4762
+ );
4763
+ }
4764
+ return join4(process.env.XDG_CACHE_HOME ?? join4(homedir(), ".cache"), "graphify", "whisper");
4765
+ }
4766
+ function ffmpegBinary() {
4767
+ return process.env.GRAPHIFY_FFMPEG_BIN ?? "ffmpeg";
4768
+ }
4769
+ function tarBinary() {
4770
+ return process.env.GRAPHIFY_TAR_BIN ?? "tar";
4771
+ }
4772
+ function resolveRequestedModel(modelName) {
4773
+ const requested = modelName ?? process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
4774
+ const resolved = MODEL_ALIASES[requested] ?? requested;
4775
+ if (!SUPPORTED_MODELS.has(resolved)) {
4776
+ throw new Error(
4777
+ `Unsupported GRAPHIFY_WHISPER_MODEL "${requested}". Supported local TS models: ${[...SUPPORTED_MODELS].sort().join(", ")}`
4778
+ );
4779
+ }
4780
+ return { requested, resolved };
4781
+ }
4782
+ function walkFiles(dir) {
4783
+ if (!existsSync6(dir)) return [];
4784
+ const files = [];
4785
+ for (const entry of readdirSync4(dir, { withFileTypes: true })) {
4786
+ const fullPath = join4(dir, entry.name);
4787
+ if (entry.isDirectory()) {
4788
+ files.push(...walkFiles(fullPath));
4789
+ } else {
4790
+ files.push(fullPath);
4791
+ }
4792
+ }
4793
+ return files;
4794
+ }
4795
+ function findArtifactsIn(dir) {
4796
+ const files = walkFiles(dir);
4797
+ const encoderPath = files.find((path) => path.endsWith("-encoder.int8.onnx")) ?? files.find((path) => path.endsWith("-encoder.onnx"));
4798
+ const decoderPath = files.find((path) => path.endsWith("-decoder.int8.onnx")) ?? files.find((path) => path.endsWith("-decoder.onnx"));
4799
+ const tokensPath = files.find((path) => path.endsWith("-tokens.txt"));
4800
+ if (!encoderPath || !decoderPath || !tokensPath) {
4801
+ return null;
4802
+ }
4803
+ return {
4804
+ modelDir: dir,
4805
+ encoderPath,
4806
+ decoderPath,
4807
+ tokensPath
4808
+ };
4809
+ }
4810
+ function normalizeModelError(detail) {
4811
+ if (detail.includes("404")) {
4812
+ return `${detail}. The local sherpa-onnx release asset was not found for this Whisper model name.`;
4813
+ }
4814
+ return detail;
4815
+ }
4816
+ async function writeResponseToFile(response, destination) {
4817
+ if (!response.ok || !response.body) {
4818
+ throw new Error(`HTTP ${response.status} while downloading ${response.url}`);
4819
+ }
4820
+ await pipeline(Readable.fromWeb(response.body), createWriteStream(destination));
4821
+ }
4822
+ async function ensureWhisperArtifacts(modelName) {
4823
+ const { requested, resolved } = resolveRequestedModel(modelName);
4824
+ const cacheRoot = defaultWhisperCacheDir();
4825
+ mkdirSync3(cacheRoot, { recursive: true });
4826
+ const modelDir = join4(cacheRoot, `sherpa-onnx-whisper-${resolved}`);
4827
+ const cached = findArtifactsIn(modelDir);
4828
+ if (cached) {
4829
+ return { requestedModel: requested, resolvedModel: resolved, ...cached };
4830
+ }
4831
+ const tempDir = mkdtempSync(join4(tmpdir(), "graphify-whisper-model-"));
4832
+ const extractDir = join4(tempDir, "extract");
4833
+ const archiveName = `sherpa-onnx-whisper-${resolved}.tar.bz2`;
4834
+ const archivePath = join4(tempDir, archiveName);
4835
+ mkdirSync3(extractDir, { recursive: true });
4836
+ try {
4837
+ const url = `${SHERPA_RELEASE_BASE}/${archiveName}`;
4838
+ console.log(` downloading whisper model: ${resolved}`);
4839
+ const response = await fetch(url);
4840
+ await writeResponseToFile(response, archivePath);
4841
+ runCommand(tarBinary(), ["-xjf", archivePath, "-C", extractDir]);
4842
+ const extractedRoot = walkFiles(extractDir).map((path) => dirname3(path)).find((path) => findArtifactsIn(path) !== null);
4843
+ const sourceDir = extractedRoot ?? readdirSync4(extractDir, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => join4(extractDir, entry.name)).find((path) => findArtifactsIn(path) !== null);
4844
+ if (!sourceDir) {
4845
+ throw new Error(`Downloaded archive for ${resolved} but could not locate Whisper model files`);
4846
+ }
4847
+ if (existsSync6(modelDir)) {
4848
+ rmSync(modelDir, { recursive: true, force: true });
4849
+ }
4850
+ try {
4851
+ renameSync2(sourceDir, modelDir);
4852
+ } catch {
4853
+ cpSync(sourceDir, modelDir, { recursive: true });
4854
+ }
4855
+ const artifacts = findArtifactsIn(modelDir);
4856
+ if (!artifacts) {
4857
+ throw new Error(`Model cache for ${resolved} is incomplete after extraction`);
4858
+ }
4859
+ return { requestedModel: requested, resolvedModel: resolved, ...artifacts };
4860
+ } catch (error) {
4861
+ const detail = error instanceof Error ? error.message : String(error);
4862
+ throw new Error(normalizeModelError(detail));
4863
+ } finally {
4864
+ rmSync(tempDir, { recursive: true, force: true });
4865
+ }
4866
+ }
4867
+ async function loadSherpaModule() {
4868
+ if (!sherpaModulePromise) {
4869
+ sherpaModulePromise = import("sherpa-onnx-node").then((imported) => Reflect.has(imported, "default") ? Reflect.get(imported, "default") : imported).catch((error) => {
4870
+ sherpaModulePromise = null;
4871
+ const detail = error instanceof Error ? error.message : String(error);
4872
+ throw new Error(
4873
+ `Video transcription requires the optional dependency sherpa-onnx-node. Install it locally, then retry. ${detail}`
4874
+ );
4875
+ });
4876
+ }
4877
+ return sherpaModulePromise;
4878
+ }
4879
+ async function getRecognizer(modelName, sherpa) {
4880
+ const artifacts = await ensureWhisperArtifacts(modelName);
4881
+ const cacheKey = artifacts.modelDir;
4882
+ const existing = recognizerCache.get(cacheKey);
4883
+ if (existing) {
4884
+ return { recognizer: await existing, artifacts };
4885
+ }
4886
+ const createRecognizer = (async () => {
4887
+ const runtime = sherpa ?? await loadSherpaModule();
4888
+ return runtime.OfflineRecognizer.createAsync({
4889
+ featConfig: {
4890
+ sampleRate: AUDIO_SAMPLE_RATE,
4891
+ featureDim: 80
4892
+ },
4893
+ modelConfig: {
4894
+ whisper: {
4895
+ encoder: artifacts.encoderPath,
4896
+ decoder: artifacts.decoderPath,
4897
+ task: "transcribe"
4898
+ },
4899
+ tokens: artifacts.tokensPath,
4900
+ numThreads: 1,
4901
+ provider: "cpu",
4902
+ debug: 0
4903
+ }
4904
+ });
4905
+ })();
4906
+ recognizerCache.set(
4907
+ cacheKey,
4908
+ createRecognizer.catch((error) => {
4909
+ recognizerCache.delete(cacheKey);
4910
+ throw error;
4911
+ })
4912
+ );
4913
+ return { recognizer: await recognizerCache.get(cacheKey), artifacts };
4914
+ }
4915
+ function normalizeToWave(audioPath, workingDir) {
4916
+ const wavPath = join4(workingDir, `${basename3(audioPath, extname4(audioPath))}.wav`);
4917
+ try {
4918
+ runCommand(ffmpegBinary(), [
4919
+ "-y",
4920
+ "-i",
4921
+ audioPath,
4922
+ "-vn",
4923
+ "-ac",
4924
+ "1",
4925
+ "-ar",
4926
+ String(AUDIO_SAMPLE_RATE),
4927
+ "-c:a",
4928
+ "pcm_s16le",
4929
+ wavPath
4930
+ ]);
4931
+ } catch (error) {
4932
+ const detail = error instanceof Error ? error.message : String(error);
4933
+ throw new Error(
4934
+ `Video transcription requires ffmpeg in PATH. Install ffmpeg locally, then retry. ${detail}`
4935
+ );
4936
+ }
4937
+ return wavPath;
4938
+ }
4939
+ function extractTranscriptText(result) {
4940
+ return String(result.text ?? "").trim();
4941
+ }
4942
+ function isUrl(pathLike) {
4943
+ return URL_PREFIXES.some((prefix) => pathLike.startsWith(prefix));
4944
+ }
4945
+ function downloadAudio(url, outputDir) {
4946
+ mkdirSync3(outputDir, { recursive: true });
4947
+ const urlHash = createHash3("sha1").update(url).digest("hex").slice(0, 12);
4948
+ for (const ext of CACHED_AUDIO_EXTENSIONS) {
4949
+ const candidate = join4(outputDir, `yt_${urlHash}${ext}`);
4950
+ if (existsSync6(candidate)) {
4951
+ console.log(` cached audio: ${basename3(candidate)}`);
4952
+ return candidate;
4953
+ }
4954
+ }
4955
+ const outTemplate = join4(outputDir, `yt_${urlHash}.%(ext)s`);
4956
+ try {
4957
+ console.log(` downloading audio: ${url.slice(0, 80)} ...`);
4958
+ runCommand("yt-dlp", [
4959
+ "-f",
4960
+ "bestaudio[ext=m4a]/bestaudio/best",
4961
+ "-o",
4962
+ outTemplate,
4963
+ "--quiet",
4964
+ "--no-warnings",
4965
+ "--no-playlist",
4966
+ url
4967
+ ]);
4968
+ } catch (error) {
4969
+ const detail = error instanceof Error ? error.message : String(error);
4970
+ throw new Error(
4971
+ `YouTube/URL download requires yt-dlp. Install yt-dlp to enable video ingestion. ${detail}`
4972
+ );
4973
+ }
4974
+ for (const entry of readdirSync4(outputDir)) {
4975
+ if (entry.startsWith(`yt_${urlHash}.`)) {
4976
+ return join4(outputDir, entry);
4977
+ }
4978
+ }
4979
+ throw new Error(`yt-dlp finished without producing an audio file for ${url}`);
4980
+ }
4981
+ function buildWhisperPrompt(godNodes2) {
4982
+ const override = process.env.GRAPHIFY_WHISPER_PROMPT;
4983
+ if (override) return override;
4984
+ const labels = godNodes2.map((node) => node.label ?? "").filter((label) => Boolean(label)).slice(0, 5);
4985
+ if (labels.length === 0) {
4986
+ return FALLBACK_PROMPT;
4987
+ }
4988
+ return `Technical discussion about ${labels.join(", ")}. ${FALLBACK_PROMPT}`;
4989
+ }
4990
+ async function transcribe(videoPath, outputDir = TRANSCRIPTS_DIR, initialPrompt, force = false) {
4991
+ const outDir = resolve5(outputDir);
4992
+ mkdirSync3(outDir, { recursive: true });
4993
+ const audioPath = isUrl(videoPath) ? downloadAudio(videoPath, join4(outDir, "downloads")) : resolve5(videoPath);
4994
+ const transcriptPath = join4(outDir, `${basename3(audioPath, extname4(audioPath))}.txt`);
4995
+ if (existsSync6(transcriptPath) && !force) {
4996
+ return transcriptPath;
4997
+ }
4998
+ const prompt = initialPrompt ?? process.env.GRAPHIFY_WHISPER_PROMPT ?? FALLBACK_PROMPT;
4999
+ const requestedModel = process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
5000
+ const tempDir = mkdtempSync(join4(tmpdir(), "graphify-transcribe-"));
5001
+ try {
5002
+ console.log(` transcribing ${basename3(audioPath)} (model=${requestedModel}) ...`);
5003
+ const wavPath = normalizeToWave(audioPath, tempDir);
5004
+ const sherpa = await loadSherpaModule();
5005
+ const { recognizer, artifacts } = await getRecognizer(requestedModel, sherpa);
5006
+ const wave = sherpa.readWave(wavPath);
5007
+ const stream = recognizer.createStream();
5008
+ if (prompt && typeof stream.setOption === "function") {
5009
+ try {
5010
+ stream.setOption("prompt", prompt);
5011
+ } catch {
5012
+ }
5013
+ }
5014
+ stream.acceptWaveform({ samples: wave.samples, sampleRate: wave.sampleRate });
5015
+ const result = await recognizer.decodeAsync(stream);
5016
+ const transcript = extractTranscriptText(result);
5017
+ writeFileSync4(transcriptPath, transcript, "utf-8");
5018
+ if (artifacts.requestedModel !== artifacts.resolvedModel) {
5019
+ console.log(` model alias: ${artifacts.requestedModel} -> ${artifacts.resolvedModel}`);
5020
+ }
5021
+ } catch (error) {
5022
+ if (error instanceof Error && error.message.startsWith("Unsupported GRAPHIFY_WHISPER_MODEL")) {
5023
+ throw error;
5024
+ }
5025
+ const detail = error instanceof Error ? error.message : String(error);
5026
+ throw new Error(
5027
+ `Video transcription requires the local TypeScript toolchain: sherpa-onnx-node + ffmpeg. Retry after installing them. ${detail}`
5028
+ );
5029
+ } finally {
5030
+ rmSync(tempDir, { recursive: true, force: true });
5031
+ }
5032
+ return transcriptPath;
5033
+ }
5034
+ async function transcribeAll(videoFiles, outputDir, initialPrompt, force = false) {
5035
+ if (videoFiles.length === 0) {
5036
+ return [];
5037
+ }
5038
+ const transcriptPaths = [];
5039
+ for (const videoFile of videoFiles) {
5040
+ try {
5041
+ transcriptPaths.push(await transcribe(videoFile, outputDir, initialPrompt, force));
5042
+ } catch (error) {
5043
+ const detail = error instanceof Error ? error.message : String(error);
5044
+ console.log(` warning: could not transcribe ${videoFile}: ${detail}`);
5045
+ }
5046
+ }
5047
+ return transcriptPaths;
5048
+ }
5049
+ function cloneDetection(detection) {
5050
+ return JSON.parse(JSON.stringify(detection));
5051
+ }
5052
+ async function augmentDetectionWithTranscripts(detection, options) {
5053
+ const nextDetection = cloneDetection(detection);
5054
+ const source = options?.incremental && nextDetection.new_files ? nextDetection.new_files : nextDetection.files;
5055
+ const videoFiles = [...source.video ?? []];
5056
+ const prompt = options?.initialPrompt ?? buildWhisperPrompt(options?.godNodes ?? []);
5057
+ if (videoFiles.length === 0) {
5058
+ return { detection: nextDetection, transcriptPaths: [], prompt };
5059
+ }
5060
+ const previousModel = process.env.GRAPHIFY_WHISPER_MODEL;
5061
+ if (options?.whisperModel) {
5062
+ process.env.GRAPHIFY_WHISPER_MODEL = options.whisperModel;
5063
+ }
5064
+ try {
5065
+ const transcriptPaths = await transcribeAll(
5066
+ videoFiles,
5067
+ options?.outputDir,
5068
+ prompt,
5069
+ options?.incremental === true
5070
+ );
5071
+ const existingDocuments = source.document ?? [];
5072
+ source.document = [...existingDocuments, ...transcriptPaths];
5073
+ return { detection: nextDetection, transcriptPaths, prompt };
5074
+ } finally {
5075
+ if (options?.whisperModel) {
5076
+ if (previousModel === void 0) {
5077
+ delete process.env.GRAPHIFY_WHISPER_MODEL;
5078
+ } else {
5079
+ process.env.GRAPHIFY_WHISPER_MODEL = previousModel;
5080
+ }
5081
+ }
5082
+ }
5083
+ }
5084
+
5085
+ // src/ingest.ts
4599
5086
  function yamlStr(s) {
4600
5087
  return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, " ").replace(/\r/g, " ");
4601
5088
  }
@@ -4763,7 +5250,7 @@ async function downloadBinary(url, suffix, targetDir) {
4763
5250
  const filename = safeFilename(url, suffix);
4764
5251
  const outPath = pathResolve2(targetDir, filename);
4765
5252
  const data = await safeFetch(url);
4766
- writeFileSync4(outPath, data);
5253
+ writeFileSync5(outPath, data);
4767
5254
  return outPath;
4768
5255
  }
4769
5256
  function normalizeIngestOptions(authorOrOptions, contributor) {
@@ -4779,7 +5266,7 @@ function normalizeIngestOptions(authorOrOptions, contributor) {
4779
5266
  };
4780
5267
  }
4781
5268
  async function ingest(url, targetDir, authorOrOptions = null, contributor = null) {
4782
- mkdirSync3(targetDir, { recursive: true });
5269
+ mkdirSync4(targetDir, { recursive: true });
4783
5270
  const urlType = detectUrlType(url);
4784
5271
  const { author, contributor: normalizedContributor } = normalizeIngestOptions(
4785
5272
  authorOrOptions,
@@ -4790,7 +5277,7 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
4790
5277
  let filename;
4791
5278
  if (urlType === "pdf") {
4792
5279
  const out = await downloadBinary(url, ".pdf", targetDir);
4793
- console.log(`Downloaded PDF: ${basename3(out)}`);
5280
+ console.log(`Downloaded PDF: ${basename4(out)}`);
4794
5281
  return out;
4795
5282
  }
4796
5283
  if (urlType === "image") {
@@ -4800,9 +5287,14 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
4800
5287
  } catch {
4801
5288
  throw new Error(`Invalid URL: ${url}`);
4802
5289
  }
4803
- const suffix = extname3(parsed.pathname) || ".jpg";
5290
+ const suffix = extname5(parsed.pathname) || ".jpg";
4804
5291
  const out = await downloadBinary(url, suffix, targetDir);
4805
- console.log(`Downloaded image: ${basename3(out)}`);
5292
+ console.log(`Downloaded image: ${basename4(out)}`);
5293
+ return out;
5294
+ }
5295
+ if (urlType === "youtube") {
5296
+ const out = downloadAudio(url, targetDir);
5297
+ console.log(`Downloaded audio: ${basename4(out)}`);
4806
5298
  return out;
4807
5299
  }
4808
5300
  if (urlType === "tweet") {
@@ -4814,13 +5306,13 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
4814
5306
  }
4815
5307
  let outPath = pathResolve2(targetDir, filename);
4816
5308
  let counter = 1;
4817
- while (existsSync6(outPath)) {
5309
+ while (existsSync7(outPath)) {
4818
5310
  const stem = filename.replace(/\.md$/, "");
4819
5311
  outPath = pathResolve2(targetDir, `${stem}_${counter}.md`);
4820
5312
  counter++;
4821
5313
  }
4822
- writeFileSync4(outPath, content, "utf-8");
4823
- console.log(`Saved ${urlType}: ${basename3(outPath)}`);
5314
+ writeFileSync5(outPath, content, "utf-8");
5315
+ console.log(`Saved ${urlType}: ${basename4(outPath)}`);
4824
5316
  return outPath;
4825
5317
  }
4826
5318
  function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "query", sourceNodes = null) {
@@ -4840,7 +5332,7 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
4840
5332
  if (!payload.question) throw new Error("saveQueryResult requires a question");
4841
5333
  if (!payload.memoryDir) throw new Error("saveQueryResult requires a memoryDir");
4842
5334
  const effectiveAnswer = payload.answer ?? "";
4843
- mkdirSync3(payload.memoryDir, { recursive: true });
5335
+ mkdirSync4(payload.memoryDir, { recursive: true });
4844
5336
  const now = /* @__PURE__ */ new Date();
4845
5337
  const slug = payload.question.toLowerCase().replace(/[^\w]/g, "_").slice(0, 50).replace(/_+$/, "");
4846
5338
  const ts = now.toISOString().replace(/[-:]/g, "").replace("T", "_").slice(0, 15);
@@ -4873,10 +5365,10 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
4873
5365
  }
4874
5366
  const content = [...frontmatterLines, ...bodyLines].join("\n");
4875
5367
  const outPath = pathResolve2(payload.memoryDir, filename);
4876
- writeFileSync4(outPath, content, "utf-8");
5368
+ writeFileSync5(outPath, content, "utf-8");
4877
5369
  return outPath;
4878
5370
  }
4879
- var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(basename3(process.argv[1]));
5371
+ var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(basename4(process.argv[1]));
4880
5372
  if (isDirectExecution) {
4881
5373
  const url = process.argv[2];
4882
5374
  const targetDir = process.argv[3] ?? "./raw";
@@ -5047,18 +5539,18 @@ function generate(G, communities, cohesionScores, communityLabels, godNodeList,
5047
5539
 
5048
5540
  // src/skill-runtime.ts
5049
5541
  var __filename = fileURLToPath(import.meta.url);
5050
- var __dirname = dirname2(__filename);
5542
+ var __dirname = dirname4(__filename);
5051
5543
  function readJson(path) {
5052
- return JSON.parse(readFileSync5(resolve5(path), "utf-8"));
5544
+ return JSON.parse(readFileSync5(resolve7(path), "utf-8"));
5053
5545
  }
5054
5546
  function writeJson(path, value) {
5055
- const resolved = resolve5(path);
5056
- mkdirSync4(dirname2(resolved), { recursive: true });
5057
- writeFileSync5(resolved, JSON.stringify(value, null, 2), "utf-8");
5547
+ const resolved = resolve7(path);
5548
+ mkdirSync5(dirname4(resolved), { recursive: true });
5549
+ writeFileSync6(resolved, JSON.stringify(value, null, 2), "utf-8");
5058
5550
  }
5059
5551
  function getVersion() {
5060
5552
  try {
5061
- const pkg = JSON.parse(readFileSync5(join4(__dirname, "..", "package.json"), "utf-8"));
5553
+ const pkg = JSON.parse(readFileSync5(join5(__dirname, "..", "package.json"), "utf-8"));
5062
5554
  return pkg.version ?? "unknown";
5063
5555
  } catch {
5064
5556
  return "unknown";
@@ -5094,23 +5586,10 @@ function ensureExtractionShape(value) {
5094
5586
  }
5095
5587
  function loadGraph2(graphPath) {
5096
5588
  const raw = readJson(graphPath);
5097
- const G = new Graph3({ type: "undirected" });
5098
- for (const node of raw.nodes ?? []) {
5099
- const { id, ...attrs } = node;
5100
- G.mergeNode(id, attrs);
5101
- }
5102
- for (const link of raw.links ?? []) {
5103
- const { source, target, ...attrs } = link;
5104
- if (!G.hasNode(source) || !G.hasNode(target)) continue;
5105
- try {
5106
- G.mergeEdge(source, target, attrs);
5107
- } catch {
5108
- }
5109
- }
5110
- if (raw.hyperedges && raw.hyperedges.length > 0) {
5111
- G.setAttribute("hyperedges", raw.hyperedges);
5112
- }
5113
- return G;
5589
+ return loadGraphFromData(raw);
5590
+ }
5591
+ function shouldBuildDirected(opts, existingGraph) {
5592
+ return opts.directed === true || (existingGraph ? isDirectedGraph(existingGraph) : false);
5114
5593
  }
5115
5594
  function mergeHyperedges(existing = [], incoming = []) {
5116
5595
  const seen = /* @__PURE__ */ new Set();
@@ -5182,11 +5661,11 @@ function analyzeGraph(G, detection, root, tokenCost, labelsOverride) {
5182
5661
  }
5183
5662
  function placeholderDetection(root = ".") {
5184
5663
  return {
5185
- files: { code: [], document: [], paper: [], image: [] },
5664
+ files: { code: [], document: [], paper: [], image: [], video: [] },
5186
5665
  total_files: 0,
5187
5666
  total_words: 0,
5188
5667
  needs_graph: true,
5189
- warning: `Reused existing graph at ${resolve5(root)} without re-running corpus detection.`,
5668
+ warning: `Reused existing graph at ${resolve7(root)} without re-running corpus detection.`,
5190
5669
  skipped_sensitive: [],
5191
5670
  graphifyignore_patterns: 0
5192
5671
  };
@@ -5234,8 +5713,8 @@ function updateCostFile(extractionInput, detection, outPath) {
5234
5713
  total_input_tokens: 0,
5235
5714
  total_output_tokens: 0
5236
5715
  };
5237
- const resolved = resolve5(outPath);
5238
- if (existsSync7(resolved)) {
5716
+ const resolved = resolve7(outPath);
5717
+ if (existsSync8(resolved)) {
5239
5718
  cost = readJson(resolved);
5240
5719
  }
5241
5720
  const input = extraction.input_tokens ?? 0;
@@ -5270,8 +5749,8 @@ function runtimeInfo() {
5270
5749
  version: getVersion(),
5271
5750
  node: process.execPath,
5272
5751
  script: __filename,
5273
- module: join4(__dirname, "index.js"),
5274
- cli: join4(__dirname, "cli.js")
5752
+ module: join5(__dirname, "index.js"),
5753
+ cli: join5(__dirname, "cli.js")
5275
5754
  };
5276
5755
  }
5277
5756
  async function main() {
@@ -5281,23 +5760,37 @@ async function main() {
5281
5760
  console.log(JSON.stringify(runtimeInfo(), null, 2));
5282
5761
  });
5283
5762
  program.command("detect").argument("<inputPath>").option("--out <path>").action((inputPath, opts) => {
5284
- const result = detect(resolve5(inputPath));
5763
+ const result = detect(resolve7(inputPath));
5285
5764
  if (opts.out) {
5286
5765
  writeJson(opts.out, result);
5287
- console.log(`Detected ${result.total_files} files in ${resolve5(inputPath)}`);
5766
+ console.log(`Detected ${result.total_files} files in ${resolve7(inputPath)}`);
5288
5767
  } else {
5289
5768
  console.log(JSON.stringify(result, null, 2));
5290
5769
  }
5291
5770
  });
5292
5771
  program.command("detect-incremental").argument("<inputPath>").option("--manifest <path>", "Path to manifest.json", "graphify-out/manifest.json").option("--out <path>").action((inputPath, opts) => {
5293
- const result = detectIncremental(resolve5(inputPath), resolve5(opts.manifest));
5772
+ const result = detectIncremental(resolve7(inputPath), resolve7(opts.manifest));
5294
5773
  if (opts.out) {
5295
5774
  writeJson(opts.out, result);
5296
- console.log(`${result.new_total ?? 0} new/changed file(s) under ${resolve5(inputPath)}`);
5775
+ console.log(`${result.new_total ?? 0} new/changed file(s) under ${resolve7(inputPath)}`);
5297
5776
  } else {
5298
5777
  console.log(JSON.stringify(result, null, 2));
5299
5778
  }
5300
5779
  });
5780
+ program.command("prepare-semantic-detect").requiredOption("--detect <path>", "Path to the base detection JSON").requiredOption("--out <path>", "Path to the augmented semantic detection JSON").requiredOption("--transcripts-out <path>", "Path to the transcript path list JSON").option("--analysis <path>", "Optional analysis JSON from a previous run").option("--incremental", "Use detection.new_files.video and force retranscription").option("--whisper-model <name>", "Whisper model override for local transcription").action(async (opts) => {
5781
+ const detection = readJson(opts.detect);
5782
+ const analysis = opts.analysis && existsSync8(resolve7(opts.analysis)) ? readJson(opts.analysis) : null;
5783
+ const transcriptsDir = join5(dirname4(resolve7(opts.out)), "transcripts");
5784
+ const { detection: semanticDetection, transcriptPaths } = await augmentDetectionWithTranscripts(detection, {
5785
+ outputDir: transcriptsDir,
5786
+ godNodes: analysis?.gods,
5787
+ incremental: opts.incremental,
5788
+ whisperModel: opts.whisperModel
5789
+ });
5790
+ writeJson(opts.out, semanticDetection);
5791
+ writeJson(opts.transcriptsOut, transcriptPaths);
5792
+ console.log(`Transcribed ${transcriptPaths.length} video file(s) -> treating as docs`);
5793
+ });
5301
5794
  program.command("extract-ast").requiredOption("--detect <path>", "Path to detection JSON").requiredOption("--out <path>", "Path to AST extraction JSON").option("--incremental", "Use detection.new_files.code instead of detection.files.code").action(async (opts) => {
5302
5795
  const detection = readJson(opts.detect);
5303
5796
  const codeFiles = opts.incremental ? detection.new_files?.code ?? [] : detection.files.code ?? [];
@@ -5325,15 +5818,15 @@ async function main() {
5325
5818
  ];
5326
5819
  const [cachedNodes, cachedEdges, cachedHyperedges, uncached] = checkSemanticCache(
5327
5820
  allFiles,
5328
- resolve5(opts.root)
5821
+ resolve7(opts.root)
5329
5822
  );
5330
5823
  writeJson(opts.cachedOut, {
5331
5824
  nodes: cachedNodes,
5332
5825
  edges: cachedEdges,
5333
5826
  hyperedges: cachedHyperedges
5334
5827
  });
5335
- mkdirSync4(dirname2(resolve5(opts.uncachedOut)), { recursive: true });
5336
- writeFileSync5(resolve5(opts.uncachedOut), uncached.join("\n"), "utf-8");
5828
+ mkdirSync5(dirname4(resolve7(opts.uncachedOut)), { recursive: true });
5829
+ writeFileSync6(resolve7(opts.uncachedOut), uncached.join("\n"), "utf-8");
5337
5830
  console.log(`Cache: ${allFiles.length - uncached.length} files hit, ${uncached.length} files need extraction`);
5338
5831
  });
5339
5832
  program.command("save-semantic-cache").requiredOption("--input <path>", "Path to semantic extraction JSON").option("--root <path>", "Graph root for cache resolution", ".").action((opts) => {
@@ -5342,7 +5835,7 @@ async function main() {
5342
5835
  extraction.nodes,
5343
5836
  extraction.edges,
5344
5837
  extraction.hyperedges ?? [],
5345
- resolve5(opts.root)
5838
+ resolve7(opts.root)
5346
5839
  );
5347
5840
  console.log(`Cached ${saved} files`);
5348
5841
  });
@@ -5390,11 +5883,11 @@ async function main() {
5390
5883
  `Merged: ${merged.nodes.length} nodes, ${merged.edges.length} edges (${ast.nodes.length} AST + ${semantic.nodes.length} semantic)`
5391
5884
  );
5392
5885
  });
5393
- program.command("finalize-build").requiredOption("--detect <path>").requiredOption("--ast <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").requiredOption("--cost-out <path>").option("--cached <path>", "Optional cached semantic JSON").option("--semantic-new <path>", "Optional fresh semantic JSON").option("--html-out <path>", "Optional graph.html output path").action((opts) => {
5886
+ program.command("finalize-build").requiredOption("--detect <path>").requiredOption("--ast <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").requiredOption("--cost-out <path>").option("--directed", "Build a directed graph (preserves source->target)").option("--cached <path>", "Optional cached semantic JSON").option("--semantic-new <path>", "Optional fresh semantic JSON").option("--html-out <path>", "Optional graph.html output path").action((opts) => {
5394
5887
  const detection = readJson(opts.detect);
5395
5888
  const ast = ensureExtractionShape(readJson(opts.ast));
5396
- const cached = opts.cached && existsSync7(resolve5(opts.cached)) ? readJson(opts.cached) : null;
5397
- const semanticNew = opts.semanticNew && existsSync7(resolve5(opts.semanticNew)) ? readJson(opts.semanticNew) : null;
5889
+ const cached = opts.cached && existsSync8(resolve7(opts.cached)) ? readJson(opts.cached) : null;
5890
+ const semanticNew = opts.semanticNew && existsSync8(resolve7(opts.semanticNew)) ? readJson(opts.semanticNew) : null;
5398
5891
  if (semanticNew) {
5399
5892
  saveSemanticCache(
5400
5893
  semanticNew.nodes,
@@ -5405,35 +5898,37 @@ async function main() {
5405
5898
  }
5406
5899
  const semantic = mergeSemanticArtifacts(cached, semanticNew);
5407
5900
  const extraction = mergeAstAndSemantic(ast, semantic);
5408
- const G = buildFromJson(extraction);
5901
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
5409
5902
  if (G.order === 0) {
5410
5903
  throw new Error("Graph is empty - extraction produced no nodes.");
5411
5904
  }
5412
5905
  const analyzed = analyzeGraph(
5413
5906
  G,
5414
5907
  detection,
5415
- resolve5(opts.root),
5908
+ resolve7(opts.root),
5416
5909
  { input: extraction.input_tokens ?? 0, output: extraction.output_tokens ?? 0 }
5417
5910
  );
5418
- toJson(G, analyzed.communities, resolve5(opts.graphOut));
5419
- writeFileSync5(resolve5(opts.reportOut), analyzed.report, "utf-8");
5911
+ toJson(G, analyzed.communities, resolve7(opts.graphOut), {
5912
+ communityLabels: analyzed.labels
5913
+ });
5914
+ writeFileSync6(resolve7(opts.reportOut), analyzed.report, "utf-8");
5420
5915
  writeJson(opts.analysisOut, analyzed.analysis);
5421
5916
  if (opts.htmlOut) {
5422
- toHtml(G, analyzed.communities, resolve5(opts.htmlOut), {
5917
+ toHtml(G, analyzed.communities, resolve7(opts.htmlOut), {
5423
5918
  communityLabels: analyzed.labels
5424
5919
  });
5425
5920
  }
5426
- saveManifest(detection.files, join4(dirname2(resolve5(opts.graphOut)), "manifest.json"));
5921
+ saveManifest(detection.files, join5(dirname4(resolve7(opts.graphOut)), "manifest.json"));
5427
5922
  const cost = updateCostFile(extraction, detection, opts.costOut);
5428
5923
  console.log(`Graph: ${G.order} nodes, ${G.size} edges, ${analyzed.communities.size} communities`);
5429
5924
  console.log(`This run: ${(extraction.input_tokens ?? 0).toLocaleString()} input tokens, ${(extraction.output_tokens ?? 0).toLocaleString()} output tokens`);
5430
5925
  console.log(`All time: ${cost.total_input_tokens.toLocaleString()} input, ${cost.total_output_tokens.toLocaleString()} output (${cost.runs.length} runs)`);
5431
5926
  });
5432
- program.command("finalize-update").requiredOption("--detect <path>").requiredOption("--ast <path>").requiredOption("--existing-graph <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").requiredOption("--cost-out <path>").option("--cached <path>", "Optional cached semantic JSON").option("--semantic-new <path>", "Optional fresh semantic JSON").option("--html-out <path>", "Optional graph.html output path").action((opts) => {
5927
+ program.command("finalize-update").requiredOption("--detect <path>").requiredOption("--ast <path>").requiredOption("--existing-graph <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").requiredOption("--cost-out <path>").option("--directed", "Build a directed graph (preserves source->target)").option("--cached <path>", "Optional cached semantic JSON").option("--semantic-new <path>", "Optional fresh semantic JSON").option("--html-out <path>", "Optional graph.html output path").action((opts) => {
5433
5928
  const detection = readJson(opts.detect);
5434
5929
  const ast = ensureExtractionShape(readJson(opts.ast));
5435
- const cached = opts.cached && existsSync7(resolve5(opts.cached)) ? readJson(opts.cached) : null;
5436
- const semanticNew = opts.semanticNew && existsSync7(resolve5(opts.semanticNew)) ? readJson(opts.semanticNew) : null;
5930
+ const cached = opts.cached && existsSync8(resolve7(opts.cached)) ? readJson(opts.cached) : null;
5931
+ const semanticNew = opts.semanticNew && existsSync8(resolve7(opts.semanticNew)) ? readJson(opts.semanticNew) : null;
5437
5932
  if (semanticNew) {
5438
5933
  saveSemanticCache(
5439
5934
  semanticNew.nodes,
@@ -5446,35 +5941,39 @@ async function main() {
5446
5941
  const extraction = mergeAstAndSemantic(ast, semantic);
5447
5942
  const oldGraph = loadGraph2(opts.existingGraph);
5448
5943
  const mergedGraph = loadGraph2(opts.existingGraph);
5449
- const newGraph = buildFromJson(extraction);
5944
+ const newGraph = buildFromJson(extraction, {
5945
+ directed: shouldBuildDirected(opts, oldGraph)
5946
+ });
5450
5947
  mergeGraphs(mergedGraph, newGraph);
5451
5948
  const analyzed = analyzeGraph(
5452
5949
  mergedGraph,
5453
5950
  detection,
5454
- resolve5(opts.root),
5951
+ resolve7(opts.root),
5455
5952
  { input: extraction.input_tokens ?? 0, output: extraction.output_tokens ?? 0 }
5456
5953
  );
5457
5954
  analyzed.analysis.diff = graphDiff(oldGraph, mergedGraph);
5458
- toJson(mergedGraph, analyzed.communities, resolve5(opts.graphOut));
5459
- writeFileSync5(resolve5(opts.reportOut), analyzed.report, "utf-8");
5955
+ toJson(mergedGraph, analyzed.communities, resolve7(opts.graphOut), {
5956
+ communityLabels: analyzed.labels
5957
+ });
5958
+ writeFileSync6(resolve7(opts.reportOut), analyzed.report, "utf-8");
5460
5959
  writeJson(opts.analysisOut, analyzed.analysis);
5461
5960
  if (opts.htmlOut) {
5462
- toHtml(mergedGraph, analyzed.communities, resolve5(opts.htmlOut), {
5961
+ toHtml(mergedGraph, analyzed.communities, resolve7(opts.htmlOut), {
5463
5962
  communityLabels: analyzed.labels
5464
5963
  });
5465
5964
  }
5466
- saveManifest(detection.files, join4(dirname2(resolve5(opts.graphOut)), "manifest.json"));
5965
+ saveManifest(detection.files, join5(dirname4(resolve7(opts.graphOut)), "manifest.json"));
5467
5966
  const cost = updateCostFile(extraction, detection, opts.costOut);
5468
5967
  console.log(`Merged: ${mergedGraph.order} nodes, ${mergedGraph.size} edges`);
5469
5968
  console.log(analyzed.analysis.diff.summary);
5470
5969
  console.log(`This run: ${(extraction.input_tokens ?? 0).toLocaleString()} input tokens, ${(extraction.output_tokens ?? 0).toLocaleString()} output tokens`);
5471
5970
  console.log(`All time: ${cost.total_input_tokens.toLocaleString()} input, ${cost.total_output_tokens.toLocaleString()} output (${cost.runs.length} runs)`);
5472
5971
  });
5473
- program.command("analyze-build").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").action((opts) => {
5972
+ program.command("analyze-build").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5474
5973
  const extraction = ensureExtractionShape(readJson(opts.extract));
5475
5974
  const detection = readJson(opts.detect);
5476
- const root = resolve5(opts.root);
5477
- const G = buildFromJson(extraction);
5975
+ const root = resolve7(opts.root);
5976
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
5478
5977
  if (G.order === 0) {
5479
5978
  throw new Error("Graph is empty - extraction produced no nodes.");
5480
5979
  }
@@ -5484,20 +5983,22 @@ async function main() {
5484
5983
  root,
5485
5984
  { input: extraction.input_tokens ?? 0, output: extraction.output_tokens ?? 0 }
5486
5985
  );
5487
- mkdirSync4(dirname2(resolve5(opts.graphOut)), { recursive: true });
5488
- toJson(G, analyzed.communities, resolve5(opts.graphOut));
5489
- writeFileSync5(resolve5(opts.reportOut), analyzed.report, "utf-8");
5986
+ mkdirSync5(dirname4(resolve7(opts.graphOut)), { recursive: true });
5987
+ toJson(G, analyzed.communities, resolve7(opts.graphOut), {
5988
+ communityLabels: analyzed.labels
5989
+ });
5990
+ writeFileSync6(resolve7(opts.reportOut), analyzed.report, "utf-8");
5490
5991
  writeJson(opts.analysisOut, analyzed.analysis);
5491
- saveManifest(detection.files, join4(dirname2(resolve5(opts.graphOut)), "manifest.json"));
5992
+ saveManifest(detection.files, join5(dirname4(resolve7(opts.graphOut)), "manifest.json"));
5492
5993
  console.log(`Graph: ${G.order} nodes, ${G.size} edges, ${analyzed.communities.size} communities`);
5493
5994
  });
5494
- program.command("write-labeled-report").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--analysis <path>").requiredOption("--labels <path>").requiredOption("--root <path>").requiredOption("--report-out <path>").action((opts) => {
5995
+ program.command("write-labeled-report").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--analysis <path>").requiredOption("--labels <path>").requiredOption("--root <path>").requiredOption("--report-out <path>").option("--directed", "Build a directed graph (preserves source->target)").option("--graph-out <path>").option("--html-out <path>").action((opts) => {
5495
5996
  const extraction = ensureExtractionShape(readJson(opts.extract));
5496
5997
  const detection = readJson(opts.detect);
5497
5998
  const analysis = readJson(opts.analysis);
5498
5999
  const labelObject = readJson(opts.labels);
5499
6000
  const labels = objectToStringMap(labelObject);
5500
- const G = buildFromJson(extraction);
6001
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
5501
6002
  const communities = new Map(
5502
6003
  Object.entries(analysis.communities).map(([key, value]) => [Number.parseInt(key, 10), value])
5503
6004
  );
@@ -5514,57 +6015,63 @@ async function main() {
5514
6015
  analysis.surprises,
5515
6016
  detection,
5516
6017
  { input: extraction.input_tokens ?? 0, output: extraction.output_tokens ?? 0 },
5517
- resolve5(opts.root),
6018
+ resolve7(opts.root),
5518
6019
  questions
5519
6020
  );
5520
6021
  analysis.questions = questions;
5521
6022
  analysis.labels = mapToObject(labels);
5522
- writeFileSync5(resolve5(opts.reportOut), report, "utf-8");
6023
+ writeFileSync6(resolve7(opts.reportOut), report, "utf-8");
6024
+ if (opts.graphOut) {
6025
+ toJson(G, communities, resolve7(opts.graphOut), { communityLabels: labels });
6026
+ }
6027
+ if (opts.htmlOut) {
6028
+ toHtml(G, communities, resolve7(opts.htmlOut), { communityLabels: labels });
6029
+ }
5523
6030
  writeJson(opts.analysis, analysis);
5524
- console.log("Report updated with community labels");
6031
+ console.log("Labeled artifacts updated");
5525
6032
  });
5526
- program.command("export-html").requiredOption("--extract <path>").requiredOption("--analysis <path>").option("--labels <path>").requiredOption("--out <path>").action((opts) => {
6033
+ program.command("export-html").requiredOption("--extract <path>").requiredOption("--analysis <path>").option("--labels <path>").requiredOption("--out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5527
6034
  const extraction = ensureExtractionShape(readJson(opts.extract));
5528
6035
  const analysis = readJson(opts.analysis);
5529
6036
  const labels = opts.labels ? objectToStringMap(readJson(opts.labels)) : objectToStringMap(analysis.labels);
5530
6037
  const communities = new Map(
5531
6038
  Object.entries(analysis.communities).map(([key, value]) => [Number.parseInt(key, 10), value])
5532
6039
  );
5533
- const G = buildFromJson(extraction);
5534
- toHtml(G, communities, resolve5(opts.out), { communityLabels: labels });
6040
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
6041
+ toHtml(G, communities, resolve7(opts.out), { communityLabels: labels });
5535
6042
  console.log("graph.html written - open in any browser, no server needed");
5536
6043
  });
5537
- program.command("export-svg").requiredOption("--extract <path>").requiredOption("--analysis <path>").option("--labels <path>").requiredOption("--out <path>").action((opts) => {
6044
+ program.command("export-svg").requiredOption("--extract <path>").requiredOption("--analysis <path>").option("--labels <path>").requiredOption("--out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5538
6045
  const extraction = ensureExtractionShape(readJson(opts.extract));
5539
6046
  const analysis = readJson(opts.analysis);
5540
6047
  const labels = opts.labels ? objectToStringMap(readJson(opts.labels)) : objectToStringMap(analysis.labels);
5541
6048
  const communities = new Map(
5542
6049
  Object.entries(analysis.communities).map(([key, value]) => [Number.parseInt(key, 10), value])
5543
6050
  );
5544
- const G = buildFromJson(extraction);
5545
- toSvg(G, communities, resolve5(opts.out), labels);
6051
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
6052
+ toSvg(G, communities, resolve7(opts.out), labels);
5546
6053
  console.log("graph.svg written - embeds in Obsidian, Notion, GitHub READMEs");
5547
6054
  });
5548
- program.command("export-graphml").requiredOption("--extract <path>").requiredOption("--analysis <path>").requiredOption("--out <path>").action((opts) => {
6055
+ program.command("export-graphml").requiredOption("--extract <path>").requiredOption("--analysis <path>").requiredOption("--out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5549
6056
  const extraction = ensureExtractionShape(readJson(opts.extract));
5550
6057
  const analysis = readJson(opts.analysis);
5551
6058
  const communities = new Map(
5552
6059
  Object.entries(analysis.communities).map(([key, value]) => [Number.parseInt(key, 10), value])
5553
6060
  );
5554
- const G = buildFromJson(extraction);
5555
- toGraphml(G, communities, resolve5(opts.out));
6061
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
6062
+ toGraphml(G, communities, resolve7(opts.out));
5556
6063
  console.log("graph.graphml written - open in Gephi, yEd, or any GraphML tool");
5557
6064
  });
5558
- program.command("export-cypher").requiredOption("--extract <path>").requiredOption("--out <path>").action((opts) => {
6065
+ program.command("export-cypher").requiredOption("--extract <path>").requiredOption("--out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5559
6066
  const extraction = ensureExtractionShape(readJson(opts.extract));
5560
- const G = buildFromJson(extraction);
5561
- toCypher(G, resolve5(opts.out));
6067
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
6068
+ toCypher(G, resolve7(opts.out));
5562
6069
  console.log("cypher.txt written - import with: cypher-shell < graphify-out/cypher.txt");
5563
6070
  });
5564
- program.command("push-neo4j").requiredOption("--extract <path>").requiredOption("--analysis <path>").requiredOption("--uri <uri>").requiredOption("--user <user>").requiredOption("--password <password>").action(async (opts) => {
6071
+ program.command("push-neo4j").requiredOption("--extract <path>").requiredOption("--analysis <path>").requiredOption("--uri <uri>").requiredOption("--user <user>").requiredOption("--password <password>").option("--directed", "Build a directed graph (preserves source->target)").action(async (opts) => {
5565
6072
  const extraction = ensureExtractionShape(readJson(opts.extract));
5566
6073
  const analysis = readJson(opts.analysis);
5567
- const G = buildFromJson(extraction);
6074
+ const G = buildFromJson(extraction, { directed: shouldBuildDirected(opts) });
5568
6075
  const communities = new Map(
5569
6076
  Object.entries(analysis.communities).map(([key, value]) => [Number.parseInt(key, 10), value])
5570
6077
  );
@@ -5578,19 +6085,19 @@ async function main() {
5578
6085
  });
5579
6086
  program.command("benchmark").requiredOption("--graph <path>").option("--corpus-words <n>").action((opts) => {
5580
6087
  const corpusWords = opts.corpusWords ? Number.parseInt(opts.corpusWords, 10) : void 0;
5581
- const result = runBenchmark(resolve5(opts.graph), corpusWords);
6088
+ const result = runBenchmark(resolve7(opts.graph), corpusWords);
5582
6089
  printBenchmark(result);
5583
6090
  });
5584
6091
  program.command("update-cost").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--out <path>").action((opts) => {
5585
6092
  const extraction = ensureExtractionShape(readJson(opts.extract));
5586
6093
  const detection = readJson(opts.detect);
5587
- const outPath = resolve5(opts.out);
6094
+ const outPath = resolve7(opts.out);
5588
6095
  let cost = {
5589
6096
  runs: [],
5590
6097
  total_input_tokens: 0,
5591
6098
  total_output_tokens: 0
5592
6099
  };
5593
- if (existsSync7(outPath)) {
6100
+ if (existsSync8(outPath)) {
5594
6101
  cost = readJson(outPath);
5595
6102
  }
5596
6103
  const input = extraction.input_tokens ?? 0;
@@ -5609,24 +6116,28 @@ async function main() {
5609
6116
  `All time: ${cost.total_input_tokens.toLocaleString()} input, ${cost.total_output_tokens.toLocaleString()} output (${cost.runs.length} runs)`
5610
6117
  );
5611
6118
  });
5612
- program.command("merge-update").requiredOption("--existing-graph <path>").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").action((opts) => {
6119
+ program.command("merge-update").requiredOption("--existing-graph <path>").requiredOption("--extract <path>").requiredOption("--detect <path>").requiredOption("--root <path>").requiredOption("--graph-out <path>").requiredOption("--report-out <path>").requiredOption("--analysis-out <path>").option("--directed", "Build a directed graph (preserves source->target)").action((opts) => {
5613
6120
  const oldGraph = loadGraph2(opts.existingGraph);
5614
6121
  const mergedGraph = loadGraph2(opts.existingGraph);
5615
6122
  const extraction = ensureExtractionShape(readJson(opts.extract));
5616
6123
  const detection = readJson(opts.detect);
5617
- const newGraph = buildFromJson(extraction);
6124
+ const newGraph = buildFromJson(extraction, {
6125
+ directed: shouldBuildDirected(opts, oldGraph)
6126
+ });
5618
6127
  mergeGraphs(mergedGraph, newGraph);
5619
6128
  const analyzed = analyzeGraph(
5620
6129
  mergedGraph,
5621
6130
  detection,
5622
- resolve5(opts.root),
6131
+ resolve7(opts.root),
5623
6132
  { input: extraction.input_tokens ?? 0, output: extraction.output_tokens ?? 0 }
5624
6133
  );
5625
6134
  analyzed.analysis.diff = graphDiff(oldGraph, mergedGraph);
5626
- toJson(mergedGraph, analyzed.communities, resolve5(opts.graphOut));
5627
- writeFileSync5(resolve5(opts.reportOut), analyzed.report, "utf-8");
6135
+ toJson(mergedGraph, analyzed.communities, resolve7(opts.graphOut), {
6136
+ communityLabels: analyzed.labels
6137
+ });
6138
+ writeFileSync6(resolve7(opts.reportOut), analyzed.report, "utf-8");
5628
6139
  writeJson(opts.analysisOut, analyzed.analysis);
5629
- saveManifest(detection.files, join4(dirname2(resolve5(opts.graphOut)), "manifest.json"));
6140
+ saveManifest(detection.files, join5(dirname4(resolve7(opts.graphOut)), "manifest.json"));
5630
6141
  console.log(`Merged: ${mergedGraph.order} nodes, ${mergedGraph.size} edges`);
5631
6142
  console.log(analyzed.analysis.diff.summary);
5632
6143
  });
@@ -5635,11 +6146,13 @@ async function main() {
5635
6146
  const analyzed = analyzeGraph(
5636
6147
  G,
5637
6148
  placeholderDetection(opts.root),
5638
- resolve5(opts.root),
6149
+ resolve7(opts.root),
5639
6150
  { input: 0, output: 0 }
5640
6151
  );
5641
- toJson(G, analyzed.communities, resolve5(opts.graphOut));
5642
- writeFileSync5(resolve5(opts.reportOut), analyzed.report, "utf-8");
6152
+ toJson(G, analyzed.communities, resolve7(opts.graphOut), {
6153
+ communityLabels: analyzed.labels
6154
+ });
6155
+ writeFileSync6(resolve7(opts.reportOut), analyzed.report, "utf-8");
5643
6156
  writeJson(opts.analysisOut, analyzed.analysis);
5644
6157
  console.log(`Re-clustered: ${analyzed.communities.size} communities`);
5645
6158
  });
@@ -5690,7 +6203,7 @@ async function main() {
5690
6203
  console.log(` degree: ${G.degree(nodeId)}`);
5691
6204
  console.log("");
5692
6205
  console.log("CONNECTIONS:");
5693
- G.forEachNeighbor(nodeId, (neighbor) => {
6206
+ forEachTraversalNeighbor(G, nodeId, (neighbor) => {
5694
6207
  const edgeId = G.edge(nodeId, neighbor);
5695
6208
  const edge = edgeId ? G.getEdgeAttributes(edgeId) : {};
5696
6209
  const label = G.getNodeAttribute(neighbor, "label") ?? neighbor;
@@ -5699,7 +6212,7 @@ async function main() {
5699
6212
  });
5700
6213
  });
5701
6214
  program.command("ingest").argument("<url>").option("--target-dir <path>", "Directory to save fetched content", "./raw").option("--author <name>").option("--contributor <name>").action(async (url, opts) => {
5702
- const outPath = await ingest(url, resolve5(opts.targetDir), {
6215
+ const outPath = await ingest(url, resolve7(opts.targetDir), {
5703
6216
  author: opts.author ?? null,
5704
6217
  contributor: opts.contributor ?? null
5705
6218
  });
@@ -5709,7 +6222,7 @@ async function main() {
5709
6222
  const outPath = saveQueryResult({
5710
6223
  question: opts.question,
5711
6224
  answer: opts.answer,
5712
- memoryDir: resolve5(opts.memoryDir),
6225
+ memoryDir: resolve7(opts.memoryDir),
5713
6226
  queryType: opts.queryType,
5714
6227
  sourceNodes: JSON.parse(opts.sourceNodesJson)
5715
6228
  });