graphifyy 0.3.17 → 0.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -14,6 +14,21 @@ var __export = (target, all) => {
14
14
  __defProp(target, name, { get: all[name], enumerable: true });
15
15
  };
16
16
 
17
+ // src/types.ts
18
+ var FileType;
19
+ var init_types = __esm({
20
+ "src/types.ts"() {
21
+ FileType = /* @__PURE__ */ ((FileType2) => {
22
+ FileType2["CODE"] = "code";
23
+ FileType2["DOCUMENT"] = "document";
24
+ FileType2["PAPER"] = "paper";
25
+ FileType2["IMAGE"] = "image";
26
+ FileType2["VIDEO"] = "video";
27
+ return FileType2;
28
+ })(FileType || {});
29
+ }
30
+ });
31
+
17
32
  // src/validate.ts
18
33
  function validateExtraction(data) {
19
34
  if (typeof data !== "object" || data === null || Array.isArray(data)) {
@@ -103,14 +118,80 @@ var init_validate = __esm({
103
118
  }
104
119
  });
105
120
 
121
+ // src/graph.ts
122
+ import Graph from "graphology";
123
+ function createGraph(directed = false) {
124
+ return new Graph({ type: directed ? "directed" : "undirected", multi: false });
125
+ }
126
+ function isDirectedGraph(G) {
127
+ return G.type === "directed";
128
+ }
129
+ function loadGraphFromData(raw) {
130
+ const G = createGraph(raw.directed === true);
131
+ for (const [key, value] of Object.entries(raw.graph ?? {})) {
132
+ G.setAttribute(key, value);
133
+ }
134
+ for (const node of raw.nodes ?? []) {
135
+ const { id, ...attrs } = node;
136
+ G.mergeNode(id, attrs);
137
+ }
138
+ for (const link of raw.links ?? raw.edges ?? []) {
139
+ const { source, target, ...attrs } = link;
140
+ if (!G.hasNode(source) || !G.hasNode(target)) continue;
141
+ try {
142
+ G.mergeEdge(source, target, attrs);
143
+ } catch {
144
+ }
145
+ }
146
+ if (raw.hyperedges && raw.hyperedges.length > 0) {
147
+ G.setAttribute("hyperedges", raw.hyperedges);
148
+ }
149
+ return G;
150
+ }
151
+ function toUndirectedGraph(G) {
152
+ if (!isDirectedGraph(G)) return G.copy();
153
+ const copy = createGraph(false);
154
+ for (const [key, value] of Object.entries(G.getAttributes())) {
155
+ copy.setAttribute(key, value);
156
+ }
157
+ G.forEachNode((nodeId, attrs) => {
158
+ copy.mergeNode(nodeId, attrs);
159
+ });
160
+ G.forEachEdge((_edge, attrs, source, target) => {
161
+ if (!copy.hasNode(source) || !copy.hasNode(target)) return;
162
+ try {
163
+ copy.mergeEdge(source, target, attrs);
164
+ } catch {
165
+ }
166
+ });
167
+ return copy;
168
+ }
169
+ function forEachTraversalNeighbor(G, node, callback) {
170
+ if (isDirectedGraph(G)) {
171
+ G.forEachOutboundNeighbor(node, callback);
172
+ return;
173
+ }
174
+ G.forEachNeighbor(node, callback);
175
+ }
176
+ function traversalNeighbors(G, node) {
177
+ const neighbors = [];
178
+ forEachTraversalNeighbor(G, node, (neighbor) => {
179
+ neighbors.push(neighbor);
180
+ });
181
+ return neighbors;
182
+ }
183
+ var init_graph = __esm({
184
+ "src/graph.ts"() {
185
+ }
186
+ });
187
+
106
188
  // src/build.ts
107
189
  var build_exports = {};
108
190
  __export(build_exports, {
109
191
  build: () => build,
110
192
  buildFromJson: () => buildFromJson
111
193
  });
112
- import Graph from "graphology";
113
- function buildFromJson(extraction) {
194
+ function buildFromJson(extraction, options) {
114
195
  const errors = validateExtraction(extraction);
115
196
  const realErrors = errors.filter((e) => !e.includes("does not match any node id"));
116
197
  if (realErrors.length > 0) {
@@ -118,7 +199,7 @@ function buildFromJson(extraction) {
118
199
  `[graphify] Extraction warning (${realErrors.length} issues): ${realErrors[0]}`
119
200
  );
120
201
  }
121
- const G = new Graph({ type: "undirected", multi: false });
202
+ const G = createGraph(options?.directed === true);
122
203
  for (const node of extraction.nodes ?? []) {
123
204
  const { id, ...attrs } = node;
124
205
  G.mergeNode(id, attrs);
@@ -140,7 +221,7 @@ function buildFromJson(extraction) {
140
221
  }
141
222
  return G;
142
223
  }
143
- function build(extractions) {
224
+ function build(extractions, options) {
144
225
  const combined = {
145
226
  nodes: [],
146
227
  edges: [],
@@ -155,10 +236,11 @@ function build(extractions) {
155
236
  combined.input_tokens += ext.input_tokens ?? 0;
156
237
  combined.output_tokens += ext.output_tokens ?? 0;
157
238
  }
158
- return buildFromJson(combined);
239
+ return buildFromJson(combined, options);
159
240
  }
160
241
  var init_build = __esm({
161
242
  "src/build.ts"() {
243
+ init_graph();
162
244
  init_validate();
163
245
  }
164
246
  });
@@ -199,7 +281,7 @@ __export(cluster_exports, {
199
281
  });
200
282
  import louvain from "graphology-communities-louvain";
201
283
  function partition(G) {
202
- const result = louvain(G);
284
+ const result = louvain(G.type === "directed" ? toUndirectedGraph(G) : G);
203
285
  const map = /* @__PURE__ */ new Map();
204
286
  for (const [node, cid] of Object.entries(result)) {
205
287
  map.set(node, cid);
@@ -305,11 +387,378 @@ var MAX_COMMUNITY_FRACTION, MIN_SPLIT_SIZE;
305
387
  var init_cluster = __esm({
306
388
  "src/cluster.ts"() {
307
389
  init_collections();
390
+ init_graph();
308
391
  MAX_COMMUNITY_FRACTION = 0.25;
309
392
  MIN_SPLIT_SIZE = 10;
310
393
  }
311
394
  });
312
395
 
396
+ // src/detect.ts
397
+ import {
398
+ readdirSync,
399
+ readFileSync,
400
+ writeFileSync,
401
+ statSync,
402
+ existsSync,
403
+ mkdirSync,
404
+ lstatSync
405
+ } from "fs";
406
+ import { join, resolve, extname, basename, relative, sep, dirname } from "path";
407
+ import { createHash } from "crypto";
408
+ function isSensitive(filePath) {
409
+ const name = basename(filePath);
410
+ return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
411
+ }
412
+ function looksLikePaper(filePath) {
413
+ try {
414
+ const text = readFileSync(filePath, "utf-8").slice(0, 3e3);
415
+ const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
416
+ return hits >= PAPER_SIGNAL_THRESHOLD;
417
+ } catch {
418
+ return false;
419
+ }
420
+ }
421
+ function classifyFile(filePath) {
422
+ const ext = extname(filePath).toLowerCase();
423
+ if (CODE_EXTENSIONS.has(ext)) return "code" /* CODE */;
424
+ if (PAPER_EXTENSIONS.has(ext)) {
425
+ const parts = filePath.split(sep);
426
+ if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
427
+ return "paper" /* PAPER */;
428
+ }
429
+ if (IMAGE_EXTENSIONS.has(ext)) return "image" /* IMAGE */;
430
+ if (VIDEO_EXTENSIONS.has(ext)) return "video" /* VIDEO */;
431
+ if (DOC_EXTENSIONS.has(ext)) {
432
+ if (looksLikePaper(filePath)) return "paper" /* PAPER */;
433
+ return "document" /* DOCUMENT */;
434
+ }
435
+ if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
436
+ return null;
437
+ }
438
+ function countWords(filePath) {
439
+ try {
440
+ const text = readFileSync(filePath, "utf-8");
441
+ return text.split(/\s+/).filter(Boolean).length;
442
+ } catch {
443
+ return 0;
444
+ }
445
+ }
446
+ function isNoiseDir(part) {
447
+ if (SKIP_DIRS.has(part)) return true;
448
+ if (part.endsWith("_venv") || part.endsWith("_env")) return true;
449
+ if (part.endsWith(".egg-info")) return true;
450
+ return false;
451
+ }
452
+ function loadGraphifyignore(root) {
453
+ const patterns = [];
454
+ let current = resolve(root);
455
+ while (true) {
456
+ const ignoreFile = join(current, ".graphifyignore");
457
+ if (existsSync(ignoreFile)) {
458
+ for (let line of readFileSync(ignoreFile, "utf-8").split("\n")) {
459
+ line = line.trim();
460
+ if (line && !line.startsWith("#")) {
461
+ patterns.push(line);
462
+ }
463
+ }
464
+ }
465
+ if (existsSync(join(current, ".git"))) {
466
+ break;
467
+ }
468
+ const parent = dirname(current);
469
+ if (parent === current) {
470
+ break;
471
+ }
472
+ current = parent;
473
+ }
474
+ return patterns;
475
+ }
476
+ function matchGlob(text, pattern) {
477
+ const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
478
+ return new RegExp(`^${regex}$`).test(text);
479
+ }
480
+ function isIgnored(filePath, root, patterns) {
481
+ if (patterns.length === 0) return false;
482
+ let rel;
483
+ try {
484
+ rel = relative(root, filePath).replace(/\\/g, "/");
485
+ } catch {
486
+ return false;
487
+ }
488
+ const parts = rel.split("/");
489
+ for (const pattern of patterns) {
490
+ const p = pattern.replace(/^\/+|\/+$/g, "");
491
+ if (!p) continue;
492
+ if (matchGlob(rel, p)) return true;
493
+ if (matchGlob(basename(filePath), p)) return true;
494
+ for (let i = 0; i < parts.length; i++) {
495
+ if (matchGlob(parts[i], p)) return true;
496
+ if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
497
+ }
498
+ }
499
+ return false;
500
+ }
501
+ function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
502
+ const result = [];
503
+ let entries;
504
+ try {
505
+ entries = readdirSync(dir);
506
+ } catch {
507
+ return result;
508
+ }
509
+ for (const entry of entries) {
510
+ const full = join(dir, entry);
511
+ let stat;
512
+ try {
513
+ stat = followSymlinks ? statSync(full) : lstatSync(full);
514
+ } catch {
515
+ continue;
516
+ }
517
+ if (stat.isDirectory()) {
518
+ if (!skipPrune) {
519
+ if (entry.startsWith(".")) continue;
520
+ if (isNoiseDir(entry)) continue;
521
+ if (isIgnored(full, root, ignorePatterns)) continue;
522
+ }
523
+ result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
524
+ } else if (stat.isFile()) {
525
+ result.push(full);
526
+ }
527
+ }
528
+ return result;
529
+ }
530
+ function detect(root, options) {
531
+ const followSymlinks = options?.followSymlinks ?? false;
532
+ const rootResolved = resolve(root);
533
+ const ignorePatterns = loadGraphifyignore(rootResolved);
534
+ const convertedDir = join(rootResolved, "graphify-out", "converted");
535
+ const memoryDir = join(rootResolved, "graphify-out", "memory");
536
+ const files = {
537
+ code: [],
538
+ document: [],
539
+ paper: [],
540
+ image: [],
541
+ video: []
542
+ };
543
+ let totalWords = 0;
544
+ const skippedSensitive = [];
545
+ const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
546
+ if (existsSync(memoryDir)) {
547
+ allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
548
+ }
549
+ const seen = /* @__PURE__ */ new Set();
550
+ for (const p of allFiles) {
551
+ if (seen.has(p)) continue;
552
+ seen.add(p);
553
+ const inMemory = existsSync(memoryDir) && p.startsWith(memoryDir);
554
+ if (!inMemory) {
555
+ if (basename(p).startsWith(".")) continue;
556
+ if (p.startsWith(convertedDir)) continue;
557
+ }
558
+ if (isIgnored(p, rootResolved, ignorePatterns)) continue;
559
+ if (isSensitive(p)) {
560
+ skippedSensitive.push(p);
561
+ continue;
562
+ }
563
+ const ftype = classifyFile(p);
564
+ if (!ftype) continue;
565
+ if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
566
+ skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
567
+ continue;
568
+ }
569
+ files[ftype].push(p);
570
+ if (ftype !== "video" /* VIDEO */) {
571
+ totalWords += countWords(p);
572
+ }
573
+ }
574
+ const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
575
+ const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
576
+ let warning = null;
577
+ if (!needsGraph) {
578
+ warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
579
+ } else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
580
+ warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
581
+ }
582
+ return {
583
+ files,
584
+ total_files: totalFiles,
585
+ total_words: totalWords,
586
+ needs_graph: needsGraph,
587
+ warning,
588
+ skipped_sensitive: skippedSensitive,
589
+ graphifyignore_patterns: ignorePatterns.length
590
+ };
591
+ }
592
+ function loadManifest(manifestPath = MANIFEST_PATH) {
593
+ try {
594
+ return JSON.parse(readFileSync(manifestPath, "utf-8"));
595
+ } catch {
596
+ return {};
597
+ }
598
+ }
599
+ function saveManifest(files, manifestPath = MANIFEST_PATH) {
600
+ const manifest = {};
601
+ for (const fileList of Object.values(files)) {
602
+ for (const f of fileList) {
603
+ try {
604
+ manifest[f] = statSync(f).mtimeMs;
605
+ } catch {
606
+ }
607
+ }
608
+ }
609
+ const dir = join(manifestPath, "..");
610
+ mkdirSync(dir, { recursive: true });
611
+ writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
612
+ }
613
+ function detectIncremental(root, manifestPath = MANIFEST_PATH) {
614
+ const full = detect(root);
615
+ const manifest = loadManifest(manifestPath);
616
+ if (Object.keys(manifest).length === 0) {
617
+ return {
618
+ ...full,
619
+ incremental: true,
620
+ new_files: full.files,
621
+ unchanged_files: Object.fromEntries(Object.keys(full.files).map((k) => [k, []])),
622
+ new_total: full.total_files
623
+ };
624
+ }
625
+ const newFiles = {};
626
+ const unchangedFiles = {};
627
+ for (const k of Object.keys(full.files)) {
628
+ newFiles[k] = [];
629
+ unchangedFiles[k] = [];
630
+ }
631
+ for (const [ftype, fileList] of Object.entries(full.files)) {
632
+ for (const f of fileList) {
633
+ const storedMtime = manifest[f];
634
+ let currentMtime = 0;
635
+ try {
636
+ currentMtime = statSync(f).mtimeMs;
637
+ } catch {
638
+ }
639
+ if (storedMtime === void 0 || currentMtime > storedMtime) {
640
+ newFiles[ftype].push(f);
641
+ } else {
642
+ unchangedFiles[ftype].push(f);
643
+ }
644
+ }
645
+ }
646
+ const currentFiles = new Set(Object.values(full.files).flat());
647
+ const deletedFiles = Object.keys(manifest).filter((f) => !currentFiles.has(f));
648
+ const newTotal = Object.values(newFiles).reduce((s, v) => s + v.length, 0);
649
+ return {
650
+ ...full,
651
+ incremental: true,
652
+ new_files: newFiles,
653
+ unchanged_files: unchangedFiles,
654
+ new_total: newTotal,
655
+ deleted_files: deletedFiles
656
+ };
657
+ }
658
+ var MANIFEST_PATH, CODE_EXTENSIONS, DOC_EXTENSIONS, PAPER_EXTENSIONS, IMAGE_EXTENSIONS, OFFICE_EXTENSIONS, VIDEO_EXTENSIONS, CORPUS_WARN_THRESHOLD, CORPUS_UPPER_THRESHOLD, FILE_COUNT_UPPER, SENSITIVE_PATTERNS, PAPER_SIGNALS, PAPER_SIGNAL_THRESHOLD, ASSET_DIR_MARKERS, SKIP_DIRS;
659
+ var init_detect = __esm({
660
+ "src/detect.ts"() {
661
+ init_types();
662
+ MANIFEST_PATH = "graphify-out/manifest.json";
663
+ CODE_EXTENSIONS = /* @__PURE__ */ new Set([
664
+ ".py",
665
+ ".ts",
666
+ ".js",
667
+ ".jsx",
668
+ ".tsx",
669
+ ".go",
670
+ ".rs",
671
+ ".java",
672
+ ".cpp",
673
+ ".cc",
674
+ ".cxx",
675
+ ".c",
676
+ ".h",
677
+ ".hpp",
678
+ ".rb",
679
+ ".swift",
680
+ ".kt",
681
+ ".kts",
682
+ ".cs",
683
+ ".scala",
684
+ ".php",
685
+ ".lua",
686
+ ".toc",
687
+ ".zig",
688
+ ".ps1",
689
+ ".ex",
690
+ ".exs",
691
+ ".m",
692
+ ".mm",
693
+ ".jl"
694
+ ]);
695
+ DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
696
+ PAPER_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
697
+ IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
698
+ OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
699
+ VIDEO_EXTENSIONS = /* @__PURE__ */ new Set([
700
+ ".mp4",
701
+ ".mov",
702
+ ".webm",
703
+ ".mkv",
704
+ ".avi",
705
+ ".m4v",
706
+ ".mp3",
707
+ ".wav",
708
+ ".m4a",
709
+ ".ogg"
710
+ ]);
711
+ CORPUS_WARN_THRESHOLD = 5e4;
712
+ CORPUS_UPPER_THRESHOLD = 5e5;
713
+ FILE_COUNT_UPPER = 200;
714
+ SENSITIVE_PATTERNS = [
715
+ /(^|[\\/])\.(env|envrc)(\.|$)/i,
716
+ /\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
717
+ /(credential|secret|passwd|password|token|private_key)/i,
718
+ /(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
719
+ /(\.netrc|\.pgpass|\.htpasswd)$/i,
720
+ /(aws_credentials|gcloud_credentials|service.account)/i
721
+ ];
722
+ PAPER_SIGNALS = [
723
+ /\barxiv\b/i,
724
+ /\bdoi\s*:/i,
725
+ /\babstract\b/i,
726
+ /\bproceedings\b/i,
727
+ /\bjournal\b/i,
728
+ /\bpreprint\b/i,
729
+ /\\cite\{/,
730
+ /\[\d+\]/,
731
+ /\[\n\d+\n\]/,
732
+ /eq\.\s*\d+|equation\s+\d+/i,
733
+ /\d{4}\.\d{4,5}/,
734
+ /\bwe propose\b/i,
735
+ /\bliterature\b/i
736
+ ];
737
+ PAPER_SIGNAL_THRESHOLD = 3;
738
+ ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
739
+ SKIP_DIRS = /* @__PURE__ */ new Set([
740
+ "venv",
741
+ ".venv",
742
+ "env",
743
+ ".env",
744
+ "node_modules",
745
+ "__pycache__",
746
+ ".git",
747
+ "dist",
748
+ "build",
749
+ "target",
750
+ "out",
751
+ "site-packages",
752
+ "lib64",
753
+ ".pytest_cache",
754
+ ".mypy_cache",
755
+ ".ruff_cache",
756
+ ".tox",
757
+ ".eggs"
758
+ ]);
759
+ }
760
+ });
761
+
313
762
  // src/analyze.ts
314
763
  var analyze_exports = {};
315
764
  __export(analyze_exports, {
@@ -351,10 +800,11 @@ function isConceptNode(G, nodeId) {
351
800
  return false;
352
801
  }
353
802
  function fileCategory(path) {
354
- const ext = path.includes(".") ? path.split(".").pop()?.toLowerCase() ?? "" : "";
803
+ const ext = path.includes(".") ? `.${path.split(".").pop()?.toLowerCase() ?? ""}` : "";
355
804
  if (CODE_EXTENSIONS.has(ext)) return "code";
356
805
  if (PAPER_EXTENSIONS.has(ext)) return "paper";
357
806
  if (IMAGE_EXTENSIONS.has(ext)) return "image";
807
+ if (DOC_EXTENSIONS.has(ext)) return "doc";
358
808
  return "doc";
359
809
  }
360
810
  function topLevelDir(path) {
@@ -548,10 +998,10 @@ function suggestQuestions(G, communities, communityLabels, topN = 7) {
548
998
  const cid = nodeCommunity.get(nodeId);
549
999
  const commLabel = cid !== void 0 ? labelMap.get(cid) ?? `Community ${cid}` : "unknown";
550
1000
  const neighborComms = /* @__PURE__ */ new Set();
551
- G.forEachNeighbor(nodeId, (n) => {
1001
+ for (const n of traversalNeighbors(G, nodeId)) {
552
1002
  const nc = nodeCommunity.get(n);
553
1003
  if (nc !== void 0 && nc !== cid) neighborComms.add(nc);
554
- });
1004
+ }
555
1005
  if (neighborComms.size > 0) {
556
1006
  const otherLabels = [...neighborComms].map((c) => labelMap.get(c) ?? `Community ${c}`);
557
1007
  questions.push({
@@ -680,30 +1130,12 @@ function graphDiff(GOld, GNew) {
680
1130
  summary: parts.length > 0 ? parts.join(", ") : "no changes"
681
1131
  };
682
1132
  }
683
- var CODE_EXTENSIONS, PAPER_EXTENSIONS, IMAGE_EXTENSIONS;
684
1133
  var init_analyze = __esm({
685
1134
  "src/analyze.ts"() {
686
1135
  init_collections();
687
1136
  init_cluster();
688
- CODE_EXTENSIONS = /* @__PURE__ */ new Set([
689
- "py",
690
- "ts",
691
- "tsx",
692
- "js",
693
- "go",
694
- "rs",
695
- "java",
696
- "rb",
697
- "cpp",
698
- "c",
699
- "h",
700
- "cs",
701
- "kt",
702
- "scala",
703
- "php"
704
- ]);
705
- PAPER_EXTENSIONS = /* @__PURE__ */ new Set(["pdf"]);
706
- IMAGE_EXTENSIONS = /* @__PURE__ */ new Set(["png", "jpg", "jpeg", "webp", "gif", "svg"]);
1137
+ init_graph();
1138
+ init_detect();
707
1139
  }
708
1140
  });
709
1141
 
@@ -873,7 +1305,7 @@ var init_report = __esm({
873
1305
 
874
1306
  // src/security.ts
875
1307
  import { resolve as pathResolve } from "path";
876
- import { existsSync } from "fs";
1308
+ import { existsSync as existsSync2 } from "fs";
877
1309
  import { URL as URL2 } from "url";
878
1310
  import * as dns from "dns/promises";
879
1311
  import * as net from "net";
@@ -973,7 +1405,7 @@ async function safeFetchText(url, maxBytes = MAX_TEXT_BYTES, timeout = 15e3) {
973
1405
  }
974
1406
  function validateGraphPath(filePath, base) {
975
1407
  const resolvedBase = pathResolve(base ?? "graphify-out");
976
- if (!existsSync(resolvedBase)) {
1408
+ if (!existsSync2(resolvedBase)) {
977
1409
  throw new Error(
978
1410
  `Graph base directory does not exist: ${resolvedBase}. Run the graphify skill first to build the graph (for Codex: $graphify .).`
979
1411
  );
@@ -984,7 +1416,7 @@ function validateGraphPath(filePath, base) {
984
1416
  `Path '${filePath}' escapes the allowed directory ${resolvedBase}. Only paths inside graphify-out/ are permitted.`
985
1417
  );
986
1418
  }
987
- if (!existsSync(resolved)) {
1419
+ if (!existsSync2(resolved)) {
988
1420
  throw new Error(`Graph file not found: ${resolved}`);
989
1421
  }
990
1422
  return resolved;
@@ -1022,7 +1454,7 @@ __export(export_exports, {
1022
1454
  toJson: () => toJson,
1023
1455
  toSvg: () => toSvg
1024
1456
  });
1025
- import { writeFileSync } from "fs";
1457
+ import { writeFileSync as writeFileSync2 } from "fs";
1026
1458
  function nodeCommunityMap2(communities) {
1027
1459
  const communityMap = toNumericMap(communities);
1028
1460
  const result = /* @__PURE__ */ new Map();
@@ -1050,14 +1482,17 @@ function normalizeCommunityLabels(labelsOrOptions) {
1050
1482
  }
1051
1483
  return toNumericMap(labelsOrOptions.communityLabels);
1052
1484
  }
1053
- function toJson(G, communities, outputPath) {
1485
+ function toJson(G, communities, outputPath, communityLabelsOrOptions) {
1054
1486
  const nodeComm = nodeCommunityMap2(communities);
1487
+ const communityLabels = normalizeCommunityLabels(communityLabelsOrOptions);
1055
1488
  const nodes = [];
1056
1489
  G.forEachNode((nodeId, attrs) => {
1490
+ const communityId = nodeComm.get(nodeId) ?? null;
1057
1491
  nodes.push({
1058
1492
  id: nodeId,
1059
1493
  ...attrs,
1060
- community: nodeComm.get(nodeId) ?? null
1494
+ community: communityId,
1495
+ community_name: communityId !== null ? sanitizeLabel(communityLabels?.get(communityId) ?? `Community ${communityId}`) : null
1061
1496
  });
1062
1497
  });
1063
1498
  const links = [];
@@ -1074,15 +1509,20 @@ function toJson(G, communities, outputPath) {
1074
1509
  links.push(link);
1075
1510
  });
1076
1511
  const hyperedges = G.getAttribute("hyperedges") ?? [];
1512
+ const communityLabelsObject = communityLabels ? Object.fromEntries(
1513
+ [...communityLabels.entries()].sort((a, b) => a[0] - b[0]).map(([cid, label]) => [String(cid), sanitizeLabel(label)])
1514
+ ) : {};
1077
1515
  const output = {
1078
- directed: false,
1516
+ directed: isDirectedGraph(G),
1079
1517
  multigraph: false,
1080
- graph: {},
1518
+ graph: {
1519
+ community_labels: communityLabelsObject
1520
+ },
1081
1521
  nodes,
1082
1522
  links,
1083
1523
  hyperedges
1084
1524
  };
1085
- writeFileSync(outputPath, JSON.stringify(output, null, 2), "utf-8");
1525
+ writeFileSync2(outputPath, JSON.stringify(output, null, 2), "utf-8");
1086
1526
  }
1087
1527
  function toCypher(G, outputPath) {
1088
1528
  const lines = ["// Neo4j Cypher import - generated by the graphify skill", ""];
@@ -1104,7 +1544,7 @@ function toCypher(G, outputPath) {
1104
1544
  `MATCH (a {id: '${uEsc}'}), (b {id: '${vEsc}'}) MERGE (a)-[:${rel} {confidence: '${conf}'}]->(b);`
1105
1545
  );
1106
1546
  });
1107
- writeFileSync(outputPath, lines.join("\n"), "utf-8");
1547
+ writeFileSync2(outputPath, lines.join("\n"), "utf-8");
1108
1548
  }
1109
1549
  function neo4jLabel(label) {
1110
1550
  const sanitized = label.replace(/[^A-Za-z0-9_]/g, "");
@@ -1343,9 +1783,24 @@ function focusNode(nodeId) {
1343
1783
  showInfo(nodeId);
1344
1784
  }
1345
1785
 
1786
+ let hoveredNodeId = null;
1787
+ network.on('hoverNode', params => {
1788
+ hoveredNodeId = params.node;
1789
+ container.style.cursor = 'pointer';
1790
+ });
1791
+ network.on('blurNode', () => {
1792
+ hoveredNodeId = null;
1793
+ container.style.cursor = 'default';
1794
+ });
1795
+ container.addEventListener('click', () => {
1796
+ if (hoveredNodeId !== null) {
1797
+ showInfo(hoveredNodeId);
1798
+ network.selectNodes([hoveredNodeId]);
1799
+ }
1800
+ });
1346
1801
  network.on('click', params => {
1347
1802
  if (params.nodes.length > 0) showInfo(params.nodes[0]);
1348
- else document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
1803
+ else if (hoveredNodeId === null) document.getElementById('info-content').innerHTML = '<span class="empty">Click a node to inspect it</span>';
1349
1804
  });
1350
1805
 
1351
1806
  const searchInput = document.getElementById('search');
@@ -1500,7 +1955,7 @@ ${htmlScript(nodesJson, edgesJson, legendJson)}
1500
1955
  ${hyperedgeScript(hyperedgesJson)}
1501
1956
  </body>
1502
1957
  </html>`;
1503
- writeFileSync(outputPath, html, "utf-8");
1958
+ writeFileSync2(outputPath, html, "utf-8");
1504
1959
  }
1505
1960
  function toGraphml(G, communities, outputPath) {
1506
1961
  const nodeComm = nodeCommunityMap2(communities);
@@ -1516,7 +1971,7 @@ function toGraphml(G, communities, outputPath) {
1516
1971
  lines.push(' <key id="community" for="node" attr.name="community" attr.type="int"/>');
1517
1972
  lines.push(' <key id="relation" for="edge" attr.name="relation" attr.type="string"/>');
1518
1973
  lines.push(' <key id="confidence" for="edge" attr.name="confidence" attr.type="string"/>');
1519
- lines.push(' <graph id="G" edgedefault="undirected">');
1974
+ lines.push(` <graph id="G" edgedefault="${isDirectedGraph(G) ? "directed" : "undirected"}">`);
1520
1975
  G.forEachNode((nodeId, data) => {
1521
1976
  lines.push(` <node id="${xmlEsc(nodeId)}">`);
1522
1977
  lines.push(` <data key="label">${xmlEsc(data.label ?? nodeId)}</data>`);
@@ -1533,7 +1988,7 @@ function toGraphml(G, communities, outputPath) {
1533
1988
  });
1534
1989
  lines.push(" </graph>");
1535
1990
  lines.push("</graphml>");
1536
- writeFileSync(outputPath, lines.join("\n"), "utf-8");
1991
+ writeFileSync2(outputPath, lines.join("\n"), "utf-8");
1537
1992
  }
1538
1993
  function toSvg(G, communities, outputPath, communityLabelsOrOptions, figsize = [20, 14]) {
1539
1994
  const communityMap = toNumericMap(communities);
@@ -1606,7 +2061,7 @@ function toSvg(G, communities, outputPath, communityLabelsOrOptions, figsize = [
1606
2061
  }
1607
2062
  }
1608
2063
  svgParts.push("</svg>");
1609
- writeFileSync(outputPath, svgParts.join("\n"), "utf-8");
2064
+ writeFileSync2(outputPath, svgParts.join("\n"), "utf-8");
1610
2065
  }
1611
2066
  function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFilenames) {
1612
2067
  const communityMap = toNumericMap(communities);
@@ -1615,7 +2070,7 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
1615
2070
  const providedNodeFilenames = options?.nodeFilenames ?? nodeFilenames;
1616
2071
  const CANVAS_COLORS = ["1", "2", "3", "4", "5", "6"];
1617
2072
  function safeName(label) {
1618
- return label.replace(/[\\/*?:"<>|#^[\]]/g, "").trim() || "unnamed";
2073
+ return label.replace(/\r\n/g, " ").replace(/\r/g, " ").replace(/\n/g, " ").replace(/[\\/*?:"<>|#^[\]]/g, "").trim() || "unnamed";
1619
2074
  }
1620
2075
  let filenameMap;
1621
2076
  if (!providedNodeFilenames) {
@@ -1694,13 +2149,13 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
1694
2149
  for (let idx = 0; idx < sortedCids.length; idx++) {
1695
2150
  const cid = sortedCids[idx];
1696
2151
  const members = communityMap.get(cid) ?? [];
1697
- const communityName = communityLabels?.get(cid) ?? `Community ${cid}`;
2152
+ const communityName2 = communityLabels?.get(cid) ?? `Community ${cid}`;
1698
2153
  const [gx, gy, gw, gh] = groupLayout.get(cid) ?? [0, 0, 600, 400];
1699
2154
  const canvasColor = CANVAS_COLORS[idx % CANVAS_COLORS.length];
1700
2155
  canvasNodes.push({
1701
2156
  id: `g${cid}`,
1702
2157
  type: "group",
1703
- label: communityName,
2158
+ label: communityName2,
1704
2159
  x: gx,
1705
2160
  y: gy,
1706
2161
  width: gw,
@@ -1750,12 +2205,13 @@ function toCanvas(G, communities, outputPath, communityLabelsOrOptions, nodeFile
1750
2205
  });
1751
2206
  }
1752
2207
  const canvasData = { nodes: canvasNodes, edges: canvasEdges };
1753
- writeFileSync(outputPath, JSON.stringify(canvasData, null, 2), "utf-8");
2208
+ writeFileSync2(outputPath, JSON.stringify(canvasData, null, 2), "utf-8");
1754
2209
  }
1755
2210
  var COMMUNITY_COLORS, MAX_NODES_FOR_VIZ, CONFIDENCE_SCORE_DEFAULTS;
1756
2211
  var init_export = __esm({
1757
2212
  "src/export.ts"() {
1758
2213
  init_security();
2214
+ init_graph();
1759
2215
  init_collections();
1760
2216
  COMMUNITY_COLORS = [
1761
2217
  "#4E79A7",
@@ -1781,9 +2237,21 @@ var init_export = __esm({
1781
2237
  // src/cache.ts
1782
2238
  import { createHash as createHash2 } from "crypto";
1783
2239
  import { readFileSync as readFileSync2, writeFileSync as writeFileSync4, mkdirSync as mkdirSync3, readdirSync as readdirSync2, unlinkSync, renameSync, existsSync as existsSync3 } from "fs";
1784
- import { join as join3, resolve as resolve2 } from "path";
2240
+ import { extname as extname2, join as join3, resolve as resolve2 } from "path";
2241
+ function bodyContent(content) {
2242
+ const text = content.toString("utf-8");
2243
+ if (!text.startsWith("---")) {
2244
+ return content;
2245
+ }
2246
+ const end = text.indexOf("\n---", 3);
2247
+ if (end === -1) {
2248
+ return content;
2249
+ }
2250
+ return Buffer.from(text.slice(end + 4), "utf-8");
2251
+ }
1785
2252
  function fileHash(filePath) {
1786
- const content = readFileSync2(filePath);
2253
+ const raw = readFileSync2(filePath);
2254
+ const content = extname2(filePath).toLowerCase() === ".md" ? bodyContent(raw) : raw;
1787
2255
  const resolved = resolve2(filePath);
1788
2256
  const h = createHash2("sha256");
1789
2257
  h.update(content);
@@ -1906,7 +2374,7 @@ __export(extract_exports, {
1906
2374
  extractZig: () => extractZig
1907
2375
  });
1908
2376
  import { readFileSync as readFileSync3, readdirSync as readdirSync3, lstatSync as lstatSync2, realpathSync, existsSync as existsSync4 } from "fs";
1909
- import { resolve as resolve3, basename as basename2, extname as extname2, dirname, join as join4, sep as sep2 } from "path";
2377
+ import { resolve as resolve3, basename as basename2, extname as extname3, dirname as dirname2, join as join4, sep as sep2 } from "path";
1910
2378
  import { createRequire } from "module";
1911
2379
  import * as TreeSitter from "web-tree-sitter";
1912
2380
  function getModuleRequire() {
@@ -2362,7 +2830,7 @@ async function _extractGeneric(filePath, config) {
2362
2830
  return { nodes: [], edges: [], error: String(e) };
2363
2831
  }
2364
2832
  const root = tree.rootNode;
2365
- const stem = basename2(filePath, extname2(filePath));
2833
+ const stem = basename2(filePath, extname3(filePath));
2366
2834
  const strPath = filePath;
2367
2835
  const nodes = [];
2368
2836
  const edges = [];
@@ -2731,10 +3199,10 @@ async function _extractGeneric(filePath, config) {
2731
3199
  source: callerNid,
2732
3200
  target: tgtNid,
2733
3201
  relation: "calls",
2734
- confidence: "INFERRED",
3202
+ confidence: "EXTRACTED",
2735
3203
  source_file: strPath,
2736
3204
  source_location: `L${line}`,
2737
- weight: 0.8
3205
+ weight: 1
2738
3206
  });
2739
3207
  }
2740
3208
  }
@@ -2770,7 +3238,7 @@ async function _extractPythonRationale(filePath, result) {
2770
3238
  } catch {
2771
3239
  return;
2772
3240
  }
2773
- const stem = basename2(filePath, extname2(filePath));
3241
+ const stem = basename2(filePath, extname3(filePath));
2774
3242
  const strPath = filePath;
2775
3243
  const { nodes, edges } = result;
2776
3244
  const seenIds = new Set(nodes.map((n) => n.id));
@@ -2866,7 +3334,7 @@ async function extractPython(filePath) {
2866
3334
  return result;
2867
3335
  }
2868
3336
  async function extractJs(filePath) {
2869
- const ext = extname2(filePath);
3337
+ const ext = extname3(filePath);
2870
3338
  const config = ext === ".ts" || ext === ".tsx" ? _TS_CONFIG : _JS_CONFIG;
2871
3339
  return _extractGeneric(filePath, config);
2872
3340
  }
@@ -2917,7 +3385,7 @@ async function extractJulia(filePath) {
2917
3385
  return { nodes: [], edges: [], error: String(e) };
2918
3386
  }
2919
3387
  const root = tree.rootNode;
2920
- const stem = basename2(filePath, extname2(filePath));
3388
+ const stem = basename2(filePath, extname3(filePath));
2921
3389
  const strPath = filePath;
2922
3390
  const nodes = [];
2923
3391
  const edges = [];
@@ -3113,8 +3581,8 @@ async function extractGo(filePath) {
3113
3581
  return { nodes: [], edges: [], error: String(e) };
3114
3582
  }
3115
3583
  const root = tree.rootNode;
3116
- const stem = basename2(filePath, extname2(filePath));
3117
- const pkgScope = dirname(filePath).split(sep2).pop() || stem;
3584
+ const stem = basename2(filePath, extname3(filePath));
3585
+ const pkgScope = dirname2(filePath).split(sep2).pop() || stem;
3118
3586
  const strPath = filePath;
3119
3587
  const nodes = [];
3120
3588
  const edges = [];
@@ -3257,10 +3725,10 @@ async function extractGo(filePath) {
3257
3725
  source: callerNid,
3258
3726
  target: tgtNid,
3259
3727
  relation: "calls",
3260
- confidence: "INFERRED",
3728
+ confidence: "EXTRACTED",
3261
3729
  source_file: strPath,
3262
3730
  source_location: `L${line}`,
3263
- weight: 0.8
3731
+ weight: 1
3264
3732
  });
3265
3733
  }
3266
3734
  }
@@ -3295,7 +3763,7 @@ async function extractRust(filePath) {
3295
3763
  return { nodes: [], edges: [], error: String(e) };
3296
3764
  }
3297
3765
  const root = tree.rootNode;
3298
- const stem = basename2(filePath, extname2(filePath));
3766
+ const stem = basename2(filePath, extname3(filePath));
3299
3767
  const strPath = filePath;
3300
3768
  const nodes = [];
3301
3769
  const edges = [];
@@ -3412,10 +3880,10 @@ async function extractRust(filePath) {
3412
3880
  source: callerNid,
3413
3881
  target: tgtNid,
3414
3882
  relation: "calls",
3415
- confidence: "INFERRED",
3883
+ confidence: "EXTRACTED",
3416
3884
  source_file: strPath,
3417
3885
  source_location: `L${line}`,
3418
- weight: 0.8
3886
+ weight: 1
3419
3887
  });
3420
3888
  }
3421
3889
  }
@@ -3450,7 +3918,7 @@ async function extractZig(filePath) {
3450
3918
  return { nodes: [], edges: [], error: String(e) };
3451
3919
  }
3452
3920
  const root = tree.rootNode;
3453
- const stem = basename2(filePath, extname2(filePath));
3921
+ const stem = basename2(filePath, extname3(filePath));
3454
3922
  const strPath = filePath;
3455
3923
  const nodes = [];
3456
3924
  const edges = [];
@@ -3574,7 +4042,7 @@ async function extractZig(filePath) {
3574
4042
  const pair = `${callerNid}|${tgtNid}`;
3575
4043
  if (!seenCallPairs.has(pair)) {
3576
4044
  seenCallPairs.add(pair);
3577
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
4045
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
3578
4046
  }
3579
4047
  }
3580
4048
  }
@@ -3608,7 +4076,7 @@ async function extractPowershell(filePath) {
3608
4076
  return { nodes: [], edges: [], error: String(e) };
3609
4077
  }
3610
4078
  const root = tree.rootNode;
3611
- const stem = basename2(filePath, extname2(filePath));
4079
+ const stem = basename2(filePath, extname3(filePath));
3612
4080
  const strPath = filePath;
3613
4081
  const nodes = [];
3614
4082
  const edges = [];
@@ -3757,7 +4225,7 @@ async function extractPowershell(filePath) {
3757
4225
  const pair = `${callerNid}|${tgtNid}`;
3758
4226
  if (!seenCallPairs.has(pair)) {
3759
4227
  seenCallPairs.add(pair);
3760
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
4228
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
3761
4229
  }
3762
4230
  }
3763
4231
  }
@@ -3792,7 +4260,7 @@ async function extractObjc(filePath) {
3792
4260
  return { nodes: [], edges: [], error: String(e) };
3793
4261
  }
3794
4262
  const root = tree.rootNode;
3795
- const stem = basename2(filePath, extname2(filePath));
4263
+ const stem = basename2(filePath, extname3(filePath));
3796
4264
  const strPath = filePath;
3797
4265
  const nodes = [];
3798
4266
  const edges = [];
@@ -3963,7 +4431,7 @@ async function extractObjc(filePath) {
3963
4431
  const pair = `${callerNid}|${candidate}`;
3964
4432
  if (!seenCalls.has(pair) && callerNid !== candidate) {
3965
4433
  seenCalls.add(pair);
3966
- addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "INFERRED", 0.8);
4434
+ addEdge(callerNid, candidate, "calls", bodyNode.startPosition.row + 1, "EXTRACTED", 1);
3967
4435
  }
3968
4436
  }
3969
4437
  }
@@ -3995,7 +4463,7 @@ async function extractElixir(filePath) {
3995
4463
  return { nodes: [], edges: [], error: String(e) };
3996
4464
  }
3997
4465
  const root = tree.rootNode;
3998
- const stem = basename2(filePath, extname2(filePath));
4466
+ const stem = basename2(filePath, extname3(filePath));
3999
4467
  const strPath = filePath;
4000
4468
  const nodes = [];
4001
4469
  const edges = [];
@@ -4154,7 +4622,7 @@ async function extractElixir(filePath) {
4154
4622
  const pair = `${callerNid}|${tgtNid}`;
4155
4623
  if (!seenCallPairs.has(pair)) {
4156
4624
  seenCallPairs.add(pair);
4157
- addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "INFERRED", 0.8);
4625
+ addEdge(callerNid, tgtNid, "calls", node.startPosition.row + 1, "EXTRACTED", 1);
4158
4626
  }
4159
4627
  }
4160
4628
  }
@@ -4181,7 +4649,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4181
4649
  for (const node of fileResult.nodes ?? []) {
4182
4650
  const src = node.source_file ?? "";
4183
4651
  if (!src) continue;
4184
- const fileStem = basename2(src, extname2(src));
4652
+ const fileStem = basename2(src, extname3(src));
4185
4653
  const label = node.label ?? "";
4186
4654
  const nid = node.id ?? "";
4187
4655
  if (label && !label.endsWith(")") && !label.endsWith(".py") && !label.startsWith("_")) {
@@ -4193,7 +4661,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4193
4661
  const newEdges = [];
4194
4662
  const stemToPath = /* @__PURE__ */ new Map();
4195
4663
  for (const p of paths) {
4196
- stemToPath.set(basename2(p, extname2(p)), p);
4664
+ stemToPath.set(basename2(p, extname3(p)), p);
4197
4665
  }
4198
4666
  for (let idx = 0; idx < perFile.length; idx++) {
4199
4667
  let walkImports = function(node) {
@@ -4257,7 +4725,7 @@ async function _resolveCrossFileImports(perFile, paths) {
4257
4725
  };
4258
4726
  const fileResult = perFile[idx];
4259
4727
  const filePath = paths[idx];
4260
- const fileStem = basename2(filePath, extname2(filePath));
4728
+ const fileStem = basename2(filePath, extname3(filePath));
4261
4729
  const strPath = filePath;
4262
4730
  const localClasses = fileResult.nodes.filter(
4263
4731
  (n) => n.source_file === strPath && !n.label.endsWith(")") && !n.label.endsWith(".py") && n.id !== _makeId(fileStem)
@@ -4283,7 +4751,7 @@ async function extractWithDiagnostics(paths) {
4283
4751
  if (paths.length === 0) {
4284
4752
  root = ".";
4285
4753
  } else if (paths.length === 1) {
4286
- root = dirname(paths[0]);
4754
+ root = dirname2(paths[0]);
4287
4755
  } else {
4288
4756
  const parts = paths.map((p) => p.split(sep2));
4289
4757
  const minLen = Math.min(...parts.map((p) => p.length));
@@ -4306,7 +4774,7 @@ async function extractWithDiagnostics(paths) {
4306
4774
  `);
4307
4775
  }
4308
4776
  const filePath = paths[i];
4309
- const ext = extname2(filePath);
4777
+ const ext = extname3(filePath);
4310
4778
  const extractor = _DISPATCH[ext];
4311
4779
  if (!extractor) continue;
4312
4780
  const cached = loadCached(filePath, root);
@@ -4332,9 +4800,9 @@ async function extractWithDiagnostics(paths) {
4332
4800
  allNodes.push(...result.nodes ?? []);
4333
4801
  allEdges.push(...result.edges ?? []);
4334
4802
  }
4335
- const pyPaths = paths.filter((p) => extname2(p) === ".py");
4803
+ const pyPaths = paths.filter((p) => extname3(p) === ".py");
4336
4804
  if (pyPaths.length > 0) {
4337
- const pyResults = perFile.filter((_r, i) => extname2(paths[i]) === ".py");
4805
+ const pyResults = perFile.filter((_r, i) => extname3(paths[i]) === ".py");
4338
4806
  try {
4339
4807
  const crossFileEdges = await _resolveCrossFileImports(pyResults, pyPaths);
4340
4808
  allEdges.push(...crossFileEdges);
@@ -4388,7 +4856,7 @@ function collectFiles(target, options) {
4388
4856
  const real = realpathSync(fullPath);
4389
4857
  if (visited.has(real)) continue;
4390
4858
  visited.add(real);
4391
- const parentReal = realpathSync(dirname(fullPath));
4859
+ const parentReal = realpathSync(dirname2(fullPath));
4392
4860
  if (parentReal === real || parentReal.startsWith(real + sep2)) continue;
4393
4861
  } catch {
4394
4862
  continue;
@@ -4398,7 +4866,7 @@ function collectFiles(target, options) {
4398
4866
  if (pathParts.some((part) => part.startsWith("."))) continue;
4399
4867
  walkDir2(fullPath, visited);
4400
4868
  } else if (stat.isFile()) {
4401
- const ext = extname2(entry);
4869
+ const ext = extname3(entry);
4402
4870
  if (_EXTENSIONS.has(ext)) {
4403
4871
  results.push(fullPath);
4404
4872
  }
@@ -4673,16 +5141,8 @@ var init_extract = __esm({
4673
5141
  }
4674
5142
  });
4675
5143
 
4676
- // src/types.ts
4677
- var FileType = /* @__PURE__ */ ((FileType2) => {
4678
- FileType2["CODE"] = "code";
4679
- FileType2["DOCUMENT"] = "document";
4680
- FileType2["PAPER"] = "paper";
4681
- FileType2["IMAGE"] = "image";
4682
- return FileType2;
4683
- })(FileType || {});
4684
-
4685
5144
  // src/index.ts
5145
+ init_types();
4686
5146
  init_validate();
4687
5147
  init_build();
4688
5148
  init_cluster();
@@ -4692,22 +5152,23 @@ init_export();
4692
5152
 
4693
5153
  // src/wiki.ts
4694
5154
  init_collections();
4695
- import { mkdirSync, writeFileSync as writeFileSync2 } from "fs";
4696
- import { join } from "path";
5155
+ init_graph();
5156
+ import { mkdirSync as mkdirSync2, writeFileSync as writeFileSync3 } from "fs";
5157
+ import { join as join2 } from "path";
4697
5158
  function safeFilename(name) {
4698
- return name.replace(/\//g, "-").replace(/ /g, "_").replace(/:/g, "-");
5159
+ return name.replace(/\r\n/g, " ").replace(/\r/g, " ").replace(/\n/g, " ").replace(/\//g, "-").replace(/ /g, "_").replace(/:/g, "-");
4699
5160
  }
4700
5161
  function crossCommunityLinks(G, nodes, ownCid, labels) {
4701
5162
  const labelMap = toNumericMap(labels);
4702
5163
  const counts = /* @__PURE__ */ new Map();
4703
5164
  for (const nid of nodes) {
4704
- G.forEachNeighbor(nid, (neighbor) => {
5165
+ for (const neighbor of traversalNeighbors(G, nid)) {
4705
5166
  const ncid = G.getNodeAttribute(neighbor, "community");
4706
5167
  if (ncid !== void 0 && ncid !== ownCid) {
4707
5168
  const label = labelMap.get(ncid) ?? `Community ${ncid}`;
4708
5169
  counts.set(label, (counts.get(label) ?? 0) + 1);
4709
5170
  }
4710
- });
5171
+ }
4711
5172
  }
4712
5173
  return [...counts.entries()].sort((a, b) => b[1] - a[1]);
4713
5174
  }
@@ -4771,15 +5232,15 @@ function godNodeArticle(G, nid, labels) {
4771
5232
  const nodeLabel = d.label ?? nid;
4772
5233
  const src = d.source_file ?? "";
4773
5234
  const cid = d.community;
4774
- const communityName = cid !== void 0 ? labels.get(cid) ?? `Community ${cid}` : void 0;
5235
+ const communityName2 = cid !== void 0 ? labels.get(cid) ?? `Community ${cid}` : void 0;
4775
5236
  const lines = [];
4776
5237
  lines.push(`# ${nodeLabel}`, "");
4777
5238
  lines.push(`> God node \xB7 ${G.degree(nid)} connections \xB7 \`${src}\``, "");
4778
- if (communityName) {
4779
- lines.push(`**Community:** [[${communityName}]]`, "");
5239
+ if (communityName2) {
5240
+ lines.push(`**Community:** [[${communityName2}]]`, "");
4780
5241
  }
4781
5242
  const byRelation = /* @__PURE__ */ new Map();
4782
- const neighbors = [...G.neighbors(nid)].sort((a, b) => G.degree(b) - G.degree(a));
5243
+ const neighbors = traversalNeighbors(G, nid).sort((a, b) => G.degree(b) - G.degree(a));
4783
5244
  for (const neighbor of neighbors) {
4784
5245
  const ed = G.getEdgeAttributes(G.edge(nid, neighbor));
4785
5246
  const rel = ed.relation ?? "related";
@@ -4787,421 +5248,90 @@ function godNodeArticle(G, nid, labels) {
4787
5248
  const conf = ed.confidence ?? "";
4788
5249
  const confStr = conf ? ` \`${conf}\`` : "";
4789
5250
  if (!byRelation.has(rel)) byRelation.set(rel, []);
4790
- byRelation.get(rel).push(`[[${neighborLabel}]]${confStr}`);
4791
- }
4792
- lines.push("## Connections by Relation", "");
4793
- for (const [rel, targets] of [...byRelation.entries()].sort()) {
4794
- lines.push(`### ${rel}`);
4795
- for (const t of targets.slice(0, 20)) {
4796
- lines.push(`- ${t}`);
4797
- }
4798
- lines.push("");
4799
- }
4800
- lines.push("---", "", "*Part of the graphify knowledge wiki. See [[index]] to navigate.*");
4801
- return lines.join("\n");
4802
- }
4803
- function indexMd(communities, labels, godNodesData, totalNodes, totalEdges) {
4804
- const lines = [
4805
- "# Knowledge Graph Index",
4806
- "",
4807
- "> Auto-generated by graphify. Start here \u2014 read community articles for context, then drill into god nodes for detail.",
4808
- "",
4809
- `**${totalNodes} nodes \xB7 ${totalEdges} edges \xB7 ${communities.size} communities**`,
4810
- "",
4811
- "---",
4812
- "",
4813
- "## Communities",
4814
- "(sorted by size, largest first)",
4815
- ""
4816
- ];
4817
- const sorted = [...communities.entries()].sort((a, b) => b[1].length - a[1].length);
4818
- for (const [cid, nodes] of sorted) {
4819
- const label = labels.get(cid) ?? `Community ${cid}`;
4820
- lines.push(`- [[${label}]] \u2014 ${nodes.length} nodes`);
4821
- }
4822
- lines.push("");
4823
- if (godNodesData.length > 0) {
4824
- lines.push("## God Nodes", "(most connected concepts \u2014 the load-bearing abstractions)", "");
4825
- for (const node of godNodesData) {
4826
- lines.push(`- [[${node.label}]] \u2014 ${node.edges} connections`);
4827
- }
4828
- lines.push("");
4829
- }
4830
- lines.push(
4831
- "---",
4832
- "",
4833
- "*Generated by [graphify](https://github.com/safishamsi/graphify)*"
4834
- );
4835
- return lines.join("\n");
4836
- }
4837
- function toWiki(G, communities, outputDir, options) {
4838
- const communityMap = toNumericMap(communities);
4839
- mkdirSync(outputDir, { recursive: true });
4840
- const labels = options?.communityLabels ? toNumericMap(options.communityLabels) : new Map([...communityMap.keys()].map((cid) => [cid, `Community ${cid}`]));
4841
- const cohesion = toNumericMap(options?.cohesion);
4842
- const godNodesData = options?.godNodesData ?? [];
4843
- let count = 0;
4844
- for (const [cid, nodes] of communityMap) {
4845
- const label = labels.get(cid) ?? `Community ${cid}`;
4846
- const article = communityArticle(G, cid, nodes, label, labels, cohesion.get(cid));
4847
- writeFileSync2(join(outputDir, `${safeFilename(label)}.md`), article);
4848
- count++;
4849
- }
4850
- for (const nodeData of godNodesData) {
4851
- const nid = nodeData.id;
4852
- if (nid && G.hasNode(nid)) {
4853
- const article = godNodeArticle(G, nid, labels);
4854
- writeFileSync2(join(outputDir, `${safeFilename(nodeData.label)}.md`), article);
4855
- count++;
4856
- }
4857
- }
4858
- writeFileSync2(
4859
- join(outputDir, "index.md"),
4860
- indexMd(communityMap, labels, godNodesData, G.order, G.size)
4861
- );
4862
- return count;
4863
- }
4864
-
4865
- // src/detect.ts
4866
- import {
4867
- readdirSync,
4868
- readFileSync,
4869
- writeFileSync as writeFileSync3,
4870
- statSync,
4871
- existsSync as existsSync2,
4872
- mkdirSync as mkdirSync2,
4873
- lstatSync
4874
- } from "fs";
4875
- import { join as join2, resolve, extname, basename, relative, sep } from "path";
4876
- import { createHash } from "crypto";
4877
- var MANIFEST_PATH = "graphify-out/manifest.json";
4878
- var CODE_EXTENSIONS2 = /* @__PURE__ */ new Set([
4879
- ".py",
4880
- ".ts",
4881
- ".js",
4882
- ".jsx",
4883
- ".tsx",
4884
- ".go",
4885
- ".rs",
4886
- ".java",
4887
- ".cpp",
4888
- ".cc",
4889
- ".cxx",
4890
- ".c",
4891
- ".h",
4892
- ".hpp",
4893
- ".rb",
4894
- ".swift",
4895
- ".kt",
4896
- ".kts",
4897
- ".cs",
4898
- ".scala",
4899
- ".php",
4900
- ".lua",
4901
- ".toc",
4902
- ".zig",
4903
- ".ps1",
4904
- ".ex",
4905
- ".exs",
4906
- ".m",
4907
- ".mm",
4908
- ".jl"
4909
- ]);
4910
- var DOC_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".txt", ".rst"]);
4911
- var PAPER_EXTENSIONS2 = /* @__PURE__ */ new Set([".pdf"]);
4912
- var IMAGE_EXTENSIONS2 = /* @__PURE__ */ new Set([".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"]);
4913
- var OFFICE_EXTENSIONS = /* @__PURE__ */ new Set([".docx", ".xlsx"]);
4914
- var CORPUS_WARN_THRESHOLD = 5e4;
4915
- var CORPUS_UPPER_THRESHOLD = 5e5;
4916
- var FILE_COUNT_UPPER = 200;
4917
- var SENSITIVE_PATTERNS = [
4918
- /(^|[\\/])\.(env|envrc)(\.|$)/i,
4919
- /\.(pem|key|p12|pfx|cert|crt|der|p8)$/i,
4920
- /(credential|secret|passwd|password|token|private_key)/i,
4921
- /(id_rsa|id_dsa|id_ecdsa|id_ed25519)(\.pub)?$/,
4922
- /(\.netrc|\.pgpass|\.htpasswd)$/i,
4923
- /(aws_credentials|gcloud_credentials|service.account)/i
4924
- ];
4925
- var PAPER_SIGNALS = [
4926
- /\barxiv\b/i,
4927
- /\bdoi\s*:/i,
4928
- /\babstract\b/i,
4929
- /\bproceedings\b/i,
4930
- /\bjournal\b/i,
4931
- /\bpreprint\b/i,
4932
- /\\cite\{/,
4933
- /\[\d+\]/,
4934
- /\[\n\d+\n\]/,
4935
- /eq\.\s*\d+|equation\s+\d+/i,
4936
- /\d{4}\.\d{4,5}/,
4937
- /\bwe propose\b/i,
4938
- /\bliterature\b/i
4939
- ];
4940
- var PAPER_SIGNAL_THRESHOLD = 3;
4941
- function isSensitive(filePath) {
4942
- const name = basename(filePath);
4943
- return SENSITIVE_PATTERNS.some((p) => p.test(name) || p.test(filePath));
4944
- }
4945
- function looksLikePaper(filePath) {
4946
- try {
4947
- const text = readFileSync(filePath, "utf-8").slice(0, 3e3);
4948
- const hits = PAPER_SIGNALS.filter((p) => p.test(text)).length;
4949
- return hits >= PAPER_SIGNAL_THRESHOLD;
4950
- } catch {
4951
- return false;
4952
- }
4953
- }
4954
- var ASSET_DIR_MARKERS = /* @__PURE__ */ new Set([".imageset", ".xcassets", ".appiconset", ".colorset", ".launchimage"]);
4955
- function classifyFile(filePath) {
4956
- const ext = extname(filePath).toLowerCase();
4957
- if (CODE_EXTENSIONS2.has(ext)) return "code" /* CODE */;
4958
- if (PAPER_EXTENSIONS2.has(ext)) {
4959
- const parts = filePath.split(sep);
4960
- if (parts.some((p) => [...ASSET_DIR_MARKERS].some((m) => p.endsWith(m)))) return null;
4961
- return "paper" /* PAPER */;
4962
- }
4963
- if (IMAGE_EXTENSIONS2.has(ext)) return "image" /* IMAGE */;
4964
- if (DOC_EXTENSIONS.has(ext)) {
4965
- if (looksLikePaper(filePath)) return "paper" /* PAPER */;
4966
- return "document" /* DOCUMENT */;
4967
- }
4968
- if (OFFICE_EXTENSIONS.has(ext)) return "document" /* DOCUMENT */;
4969
- return null;
4970
- }
4971
- function countWords(filePath) {
4972
- try {
4973
- const text = readFileSync(filePath, "utf-8");
4974
- return text.split(/\s+/).filter(Boolean).length;
4975
- } catch {
4976
- return 0;
4977
- }
4978
- }
4979
- var SKIP_DIRS = /* @__PURE__ */ new Set([
4980
- "venv",
4981
- ".venv",
4982
- "env",
4983
- ".env",
4984
- "node_modules",
4985
- "__pycache__",
4986
- ".git",
4987
- "dist",
4988
- "build",
4989
- "target",
4990
- "out",
4991
- "site-packages",
4992
- "lib64",
4993
- ".pytest_cache",
4994
- ".mypy_cache",
4995
- ".ruff_cache",
4996
- ".tox",
4997
- ".eggs"
4998
- ]);
4999
- function isNoiseDir(part) {
5000
- if (SKIP_DIRS.has(part)) return true;
5001
- if (part.endsWith("_venv") || part.endsWith("_env")) return true;
5002
- if (part.endsWith(".egg-info")) return true;
5003
- return false;
5004
- }
5005
- function loadGraphifyignore(root) {
5006
- const ignoreFile = join2(root, ".graphifyignore");
5007
- if (!existsSync2(ignoreFile)) return [];
5008
- const patterns = [];
5009
- for (let line of readFileSync(ignoreFile, "utf-8").split("\n")) {
5010
- line = line.trim();
5011
- if (line && !line.startsWith("#")) {
5012
- patterns.push(line);
5013
- }
5014
- }
5015
- return patterns;
5016
- }
5017
- function matchGlob(text, pattern) {
5018
- const regex = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
5019
- return new RegExp(`^${regex}$`).test(text);
5020
- }
5021
- function isIgnored(filePath, root, patterns) {
5022
- if (patterns.length === 0) return false;
5023
- let rel;
5024
- try {
5025
- rel = relative(root, filePath).replace(/\\/g, "/");
5026
- } catch {
5027
- return false;
5028
- }
5029
- const parts = rel.split("/");
5030
- for (const pattern of patterns) {
5031
- const p = pattern.replace(/^\/+|\/+$/g, "");
5032
- if (!p) continue;
5033
- if (matchGlob(rel, p)) return true;
5034
- if (matchGlob(basename(filePath), p)) return true;
5035
- for (let i = 0; i < parts.length; i++) {
5036
- if (matchGlob(parts[i], p)) return true;
5037
- if (matchGlob(parts.slice(0, i + 1).join("/"), p)) return true;
5038
- }
5039
- }
5040
- return false;
5041
- }
5042
- function walkDir(dir, root, ignorePatterns, followSymlinks, skipPrune) {
5043
- const result = [];
5044
- let entries;
5045
- try {
5046
- entries = readdirSync(dir);
5047
- } catch {
5048
- return result;
5049
- }
5050
- for (const entry of entries) {
5051
- const full = join2(dir, entry);
5052
- let stat;
5053
- try {
5054
- stat = followSymlinks ? statSync(full) : lstatSync(full);
5055
- } catch {
5056
- continue;
5057
- }
5058
- if (stat.isDirectory()) {
5059
- if (!skipPrune) {
5060
- if (entry.startsWith(".")) continue;
5061
- if (isNoiseDir(entry)) continue;
5062
- if (isIgnored(full, root, ignorePatterns)) continue;
5063
- }
5064
- result.push(...walkDir(full, root, ignorePatterns, followSymlinks, skipPrune));
5065
- } else if (stat.isFile()) {
5066
- result.push(full);
5067
- }
5068
- }
5069
- return result;
5070
- }
5071
- function detect(root, options) {
5072
- const followSymlinks = options?.followSymlinks ?? false;
5073
- const rootResolved = resolve(root);
5074
- const ignorePatterns = loadGraphifyignore(rootResolved);
5075
- const convertedDir = join2(rootResolved, "graphify-out", "converted");
5076
- const memoryDir = join2(rootResolved, "graphify-out", "memory");
5077
- const files = {
5078
- code: [],
5079
- document: [],
5080
- paper: [],
5081
- image: []
5082
- };
5083
- let totalWords = 0;
5084
- const skippedSensitive = [];
5085
- const allFiles = walkDir(rootResolved, rootResolved, ignorePatterns, followSymlinks, false);
5086
- if (existsSync2(memoryDir)) {
5087
- allFiles.push(...walkDir(memoryDir, rootResolved, ignorePatterns, followSymlinks, true));
5088
- }
5089
- const seen = /* @__PURE__ */ new Set();
5090
- for (const p of allFiles) {
5091
- if (seen.has(p)) continue;
5092
- seen.add(p);
5093
- const inMemory = existsSync2(memoryDir) && p.startsWith(memoryDir);
5094
- if (!inMemory) {
5095
- if (basename(p).startsWith(".")) continue;
5096
- if (p.startsWith(convertedDir)) continue;
5097
- }
5098
- if (isIgnored(p, rootResolved, ignorePatterns)) continue;
5099
- if (isSensitive(p)) {
5100
- skippedSensitive.push(p);
5101
- continue;
5102
- }
5103
- const ftype = classifyFile(p);
5104
- if (!ftype) continue;
5105
- if (OFFICE_EXTENSIONS.has(extname(p).toLowerCase())) {
5106
- skippedSensitive.push(p + " [office conversion requires async - use pipeline]");
5107
- continue;
5108
- }
5109
- files[ftype].push(p);
5110
- totalWords += countWords(p);
5111
- }
5112
- const totalFiles = Object.values(files).reduce((s, v) => s + v.length, 0);
5113
- const needsGraph = totalWords >= CORPUS_WARN_THRESHOLD;
5114
- let warning = null;
5115
- if (!needsGraph) {
5116
- warning = `Corpus is ~${totalWords.toLocaleString()} words - fits in a single context window. You may not need a graph.`;
5117
- } else if (totalWords >= CORPUS_UPPER_THRESHOLD || totalFiles >= FILE_COUNT_UPPER) {
5118
- warning = `Large corpus: ${totalFiles} files \xB7 ~${totalWords.toLocaleString()} words. Semantic extraction will be expensive (many Claude tokens). Consider running on a subfolder, or use --no-semantic to run AST-only.`;
5251
+ byRelation.get(rel).push(`[[${neighborLabel}]]${confStr}`);
5119
5252
  }
5120
- return {
5121
- files,
5122
- total_files: totalFiles,
5123
- total_words: totalWords,
5124
- needs_graph: needsGraph,
5125
- warning,
5126
- skipped_sensitive: skippedSensitive,
5127
- graphifyignore_patterns: ignorePatterns.length
5128
- };
5129
- }
5130
- function loadManifest(manifestPath = MANIFEST_PATH) {
5131
- try {
5132
- return JSON.parse(readFileSync(manifestPath, "utf-8"));
5133
- } catch {
5134
- return {};
5253
+ lines.push("## Connections by Relation", "");
5254
+ for (const [rel, targets] of [...byRelation.entries()].sort()) {
5255
+ lines.push(`### ${rel}`);
5256
+ for (const t of targets.slice(0, 20)) {
5257
+ lines.push(`- ${t}`);
5258
+ }
5259
+ lines.push("");
5135
5260
  }
5261
+ lines.push("---", "", "*Part of the graphify knowledge wiki. See [[index]] to navigate.*");
5262
+ return lines.join("\n");
5136
5263
  }
5137
- function saveManifest(files, manifestPath = MANIFEST_PATH) {
5138
- const manifest = {};
5139
- for (const fileList of Object.values(files)) {
5140
- for (const f of fileList) {
5141
- try {
5142
- manifest[f] = statSync(f).mtimeMs;
5143
- } catch {
5144
- }
5264
+ function indexMd(communities, labels, godNodesData, totalNodes, totalEdges) {
5265
+ const lines = [
5266
+ "# Knowledge Graph Index",
5267
+ "",
5268
+ "> Auto-generated by graphify. Start here \u2014 read community articles for context, then drill into god nodes for detail.",
5269
+ "",
5270
+ `**${totalNodes} nodes \xB7 ${totalEdges} edges \xB7 ${communities.size} communities**`,
5271
+ "",
5272
+ "---",
5273
+ "",
5274
+ "## Communities",
5275
+ "(sorted by size, largest first)",
5276
+ ""
5277
+ ];
5278
+ const sorted = [...communities.entries()].sort((a, b) => b[1].length - a[1].length);
5279
+ for (const [cid, nodes] of sorted) {
5280
+ const label = labels.get(cid) ?? `Community ${cid}`;
5281
+ lines.push(`- [[${label}]] \u2014 ${nodes.length} nodes`);
5282
+ }
5283
+ lines.push("");
5284
+ if (godNodesData.length > 0) {
5285
+ lines.push("## God Nodes", "(most connected concepts \u2014 the load-bearing abstractions)", "");
5286
+ for (const node of godNodesData) {
5287
+ lines.push(`- [[${node.label}]] \u2014 ${node.edges} connections`);
5145
5288
  }
5289
+ lines.push("");
5146
5290
  }
5147
- const dir = join2(manifestPath, "..");
5148
- mkdirSync2(dir, { recursive: true });
5149
- writeFileSync3(manifestPath, JSON.stringify(manifest, null, 2));
5291
+ lines.push(
5292
+ "---",
5293
+ "",
5294
+ "*Generated by [graphify](https://github.com/safishamsi/graphify)*"
5295
+ );
5296
+ return lines.join("\n");
5150
5297
  }
5151
- function detectIncremental(root, manifestPath = MANIFEST_PATH) {
5152
- const full = detect(root);
5153
- const manifest = loadManifest(manifestPath);
5154
- if (Object.keys(manifest).length === 0) {
5155
- return {
5156
- ...full,
5157
- incremental: true,
5158
- new_files: full.files,
5159
- unchanged_files: Object.fromEntries(Object.keys(full.files).map((k) => [k, []])),
5160
- new_total: full.total_files
5161
- };
5162
- }
5163
- const newFiles = {};
5164
- const unchangedFiles = {};
5165
- for (const k of Object.keys(full.files)) {
5166
- newFiles[k] = [];
5167
- unchangedFiles[k] = [];
5298
+ function toWiki(G, communities, outputDir, options) {
5299
+ const communityMap = toNumericMap(communities);
5300
+ mkdirSync2(outputDir, { recursive: true });
5301
+ const labels = options?.communityLabels ? toNumericMap(options.communityLabels) : new Map([...communityMap.keys()].map((cid) => [cid, `Community ${cid}`]));
5302
+ const cohesion = toNumericMap(options?.cohesion);
5303
+ const godNodesData = options?.godNodesData ?? [];
5304
+ let count = 0;
5305
+ for (const [cid, nodes] of communityMap) {
5306
+ const label = labels.get(cid) ?? `Community ${cid}`;
5307
+ const article = communityArticle(G, cid, nodes, label, labels, cohesion.get(cid));
5308
+ writeFileSync3(join2(outputDir, `${safeFilename(label)}.md`), article);
5309
+ count++;
5168
5310
  }
5169
- for (const [ftype, fileList] of Object.entries(full.files)) {
5170
- for (const f of fileList) {
5171
- const storedMtime = manifest[f];
5172
- let currentMtime = 0;
5173
- try {
5174
- currentMtime = statSync(f).mtimeMs;
5175
- } catch {
5176
- }
5177
- if (storedMtime === void 0 || currentMtime > storedMtime) {
5178
- newFiles[ftype].push(f);
5179
- } else {
5180
- unchangedFiles[ftype].push(f);
5181
- }
5311
+ for (const nodeData of godNodesData) {
5312
+ const nid = nodeData.id;
5313
+ if (nid && G.hasNode(nid)) {
5314
+ const article = godNodeArticle(G, nid, labels);
5315
+ writeFileSync3(join2(outputDir, `${safeFilename(nodeData.label)}.md`), article);
5316
+ count++;
5182
5317
  }
5183
5318
  }
5184
- const currentFiles = new Set(Object.values(full.files).flat());
5185
- const deletedFiles = Object.keys(manifest).filter((f) => !currentFiles.has(f));
5186
- const newTotal = Object.values(newFiles).reduce((s, v) => s + v.length, 0);
5187
- return {
5188
- ...full,
5189
- incremental: true,
5190
- new_files: newFiles,
5191
- unchanged_files: unchangedFiles,
5192
- new_total: newTotal,
5193
- deleted_files: deletedFiles
5194
- };
5319
+ writeFileSync3(
5320
+ join2(outputDir, "index.md"),
5321
+ indexMd(communityMap, labels, godNodesData, G.order, G.size)
5322
+ );
5323
+ return count;
5195
5324
  }
5196
5325
 
5197
5326
  // src/index.ts
5327
+ init_detect();
5198
5328
  init_extract();
5199
5329
  init_cache();
5200
5330
  init_security();
5201
5331
 
5202
5332
  // src/benchmark.ts
5333
+ init_graph();
5203
5334
  import { readFileSync as readFileSync4, existsSync as existsSync5 } from "fs";
5204
- import Graph2 from "graphology";
5205
5335
  var CHARS_PER_TOKEN = 4;
5206
5336
  function estimateTokens(text) {
5207
5337
  return Math.max(1, Math.floor(text.length / CHARS_PER_TOKEN));
@@ -5223,7 +5353,7 @@ function querySubgraphTokens(G, question, depth = 3) {
5223
5353
  for (let d = 0; d < depth; d++) {
5224
5354
  const nextFrontier = /* @__PURE__ */ new Set();
5225
5355
  for (const n of frontier) {
5226
- G.forEachNeighbor(n, (neighbor) => {
5356
+ forEachTraversalNeighbor(G, n, (neighbor) => {
5227
5357
  if (!visited.has(neighbor)) {
5228
5358
  nextFrontier.add(neighbor);
5229
5359
  edgesSeen.push([n, neighbor]);
@@ -5258,21 +5388,7 @@ var SAMPLE_QUESTIONS = [
5258
5388
  ];
5259
5389
  function loadGraph(graphPath) {
5260
5390
  const raw = JSON.parse(readFileSync4(graphPath, "utf-8"));
5261
- const G = new Graph2({ type: "undirected" });
5262
- for (const node of raw.nodes ?? []) {
5263
- const { id, ...attrs } = node;
5264
- G.mergeNode(id, attrs);
5265
- }
5266
- for (const link of raw.links ?? []) {
5267
- const { source, target, ...attrs } = link;
5268
- if (G.hasNode(source) && G.hasNode(target)) {
5269
- try {
5270
- G.mergeEdge(source, target, attrs);
5271
- } catch {
5272
- }
5273
- }
5274
- }
5275
- return G;
5391
+ return loadGraphFromData(raw);
5276
5392
  }
5277
5393
  function runBenchmark(graphPath = "graphify-out/graph.json", corpusWordsOrOptions, questions) {
5278
5394
  const options = typeof corpusWordsOrOptions === "number" ? { corpusWords: corpusWordsOrOptions, questions } : corpusWordsOrOptions ?? {};
@@ -5336,8 +5452,404 @@ graphify token reduction benchmark`);
5336
5452
 
5337
5453
  // src/ingest.ts
5338
5454
  init_security();
5339
- import { existsSync as existsSync6, mkdirSync as mkdirSync4, writeFileSync as writeFileSync5 } from "fs";
5340
- import { resolve as pathResolve2, basename as basename3, extname as extname3 } from "path";
5455
+ import { existsSync as existsSync7, mkdirSync as mkdirSync5, writeFileSync as writeFileSync6 } from "fs";
5456
+ import { resolve as pathResolve2, basename as basename4, extname as extname5 } from "path";
5457
+
5458
+ // src/transcribe.ts
5459
+ import * as childProcess from "child_process";
5460
+ import { createHash as createHash3 } from "crypto";
5461
+ import {
5462
+ cpSync,
5463
+ createWriteStream,
5464
+ existsSync as existsSync6,
5465
+ mkdirSync as mkdirSync4,
5466
+ mkdtempSync,
5467
+ readdirSync as readdirSync4,
5468
+ renameSync as renameSync2,
5469
+ rmSync,
5470
+ writeFileSync as writeFileSync5
5471
+ } from "fs";
5472
+ import { homedir, platform, tmpdir } from "os";
5473
+ import { basename as basename3, dirname as dirname3, extname as extname4, join as join5, resolve as resolve5 } from "path";
5474
+ import { Readable } from "stream";
5475
+ import { pipeline } from "stream/promises";
5476
+ var URL_PREFIXES = ["http://", "https://", "www."];
5477
+ var CACHED_AUDIO_EXTENSIONS = [".m4a", ".opus", ".mp3", ".ogg", ".wav", ".webm"];
5478
+ var DEFAULT_MODEL = "base";
5479
+ var TRANSCRIPTS_DIR = "graphify-out/transcripts";
5480
+ var FALLBACK_PROMPT = "Use proper punctuation and paragraph breaks.";
5481
+ var SHERPA_RELEASE_BASE = "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models";
5482
+ var AUDIO_SAMPLE_RATE = 16e3;
5483
+ var SUPPORTED_MODELS = /* @__PURE__ */ new Set([
5484
+ "tiny",
5485
+ "tiny.en",
5486
+ "base",
5487
+ "base.en",
5488
+ "small",
5489
+ "small.en",
5490
+ "medium",
5491
+ "medium.en",
5492
+ "large-v1",
5493
+ "large-v2",
5494
+ "large-v3",
5495
+ "turbo",
5496
+ "distil-small.en",
5497
+ "distil-medium.en",
5498
+ "distil-large-v2",
5499
+ "distil-large-v3",
5500
+ "distil-large-v3.5"
5501
+ ]);
5502
+ var MODEL_ALIASES = {
5503
+ large: "large-v3"
5504
+ };
5505
+ var recognizerCache = /* @__PURE__ */ new Map();
5506
+ var sherpaModulePromise = null;
5507
+ function runCommand(command, args, options) {
5508
+ const result = childProcess.spawnSync(command, args, {
5509
+ encoding: "utf-8",
5510
+ ...options
5511
+ });
5512
+ if (result.error) {
5513
+ throw result.error;
5514
+ }
5515
+ if (result.status !== 0) {
5516
+ throw new Error(result.stderr?.trim() || result.stdout?.trim() || `${command} failed`);
5517
+ }
5518
+ return result;
5519
+ }
5520
+ function defaultWhisperCacheDir() {
5521
+ if (process.env.GRAPHIFY_WHISPER_CACHE_DIR) {
5522
+ return resolve5(process.env.GRAPHIFY_WHISPER_CACHE_DIR);
5523
+ }
5524
+ if (platform() === "win32") {
5525
+ return join5(
5526
+ process.env.LOCALAPPDATA ?? join5(homedir(), "AppData", "Local"),
5527
+ "graphify",
5528
+ "whisper"
5529
+ );
5530
+ }
5531
+ return join5(process.env.XDG_CACHE_HOME ?? join5(homedir(), ".cache"), "graphify", "whisper");
5532
+ }
5533
+ function ffmpegBinary() {
5534
+ return process.env.GRAPHIFY_FFMPEG_BIN ?? "ffmpeg";
5535
+ }
5536
+ function tarBinary() {
5537
+ return process.env.GRAPHIFY_TAR_BIN ?? "tar";
5538
+ }
5539
+ function resolveRequestedModel(modelName) {
5540
+ const requested = modelName ?? process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
5541
+ const resolved = MODEL_ALIASES[requested] ?? requested;
5542
+ if (!SUPPORTED_MODELS.has(resolved)) {
5543
+ throw new Error(
5544
+ `Unsupported GRAPHIFY_WHISPER_MODEL "${requested}". Supported local TS models: ${[...SUPPORTED_MODELS].sort().join(", ")}`
5545
+ );
5546
+ }
5547
+ return { requested, resolved };
5548
+ }
5549
+ function walkFiles(dir) {
5550
+ if (!existsSync6(dir)) return [];
5551
+ const files = [];
5552
+ for (const entry of readdirSync4(dir, { withFileTypes: true })) {
5553
+ const fullPath = join5(dir, entry.name);
5554
+ if (entry.isDirectory()) {
5555
+ files.push(...walkFiles(fullPath));
5556
+ } else {
5557
+ files.push(fullPath);
5558
+ }
5559
+ }
5560
+ return files;
5561
+ }
5562
+ function findArtifactsIn(dir) {
5563
+ const files = walkFiles(dir);
5564
+ const encoderPath = files.find((path) => path.endsWith("-encoder.int8.onnx")) ?? files.find((path) => path.endsWith("-encoder.onnx"));
5565
+ const decoderPath = files.find((path) => path.endsWith("-decoder.int8.onnx")) ?? files.find((path) => path.endsWith("-decoder.onnx"));
5566
+ const tokensPath = files.find((path) => path.endsWith("-tokens.txt"));
5567
+ if (!encoderPath || !decoderPath || !tokensPath) {
5568
+ return null;
5569
+ }
5570
+ return {
5571
+ modelDir: dir,
5572
+ encoderPath,
5573
+ decoderPath,
5574
+ tokensPath
5575
+ };
5576
+ }
5577
+ function normalizeModelError(detail) {
5578
+ if (detail.includes("404")) {
5579
+ return `${detail}. The local sherpa-onnx release asset was not found for this Whisper model name.`;
5580
+ }
5581
+ return detail;
5582
+ }
5583
+ async function writeResponseToFile(response, destination) {
5584
+ if (!response.ok || !response.body) {
5585
+ throw new Error(`HTTP ${response.status} while downloading ${response.url}`);
5586
+ }
5587
+ await pipeline(Readable.fromWeb(response.body), createWriteStream(destination));
5588
+ }
5589
+ async function ensureWhisperArtifacts(modelName) {
5590
+ const { requested, resolved } = resolveRequestedModel(modelName);
5591
+ const cacheRoot = defaultWhisperCacheDir();
5592
+ mkdirSync4(cacheRoot, { recursive: true });
5593
+ const modelDir = join5(cacheRoot, `sherpa-onnx-whisper-${resolved}`);
5594
+ const cached = findArtifactsIn(modelDir);
5595
+ if (cached) {
5596
+ return { requestedModel: requested, resolvedModel: resolved, ...cached };
5597
+ }
5598
+ const tempDir = mkdtempSync(join5(tmpdir(), "graphify-whisper-model-"));
5599
+ const extractDir = join5(tempDir, "extract");
5600
+ const archiveName = `sherpa-onnx-whisper-${resolved}.tar.bz2`;
5601
+ const archivePath = join5(tempDir, archiveName);
5602
+ mkdirSync4(extractDir, { recursive: true });
5603
+ try {
5604
+ const url = `${SHERPA_RELEASE_BASE}/${archiveName}`;
5605
+ console.log(` downloading whisper model: ${resolved}`);
5606
+ const response = await fetch(url);
5607
+ await writeResponseToFile(response, archivePath);
5608
+ runCommand(tarBinary(), ["-xjf", archivePath, "-C", extractDir]);
5609
+ const extractedRoot = walkFiles(extractDir).map((path) => dirname3(path)).find((path) => findArtifactsIn(path) !== null);
5610
+ const sourceDir = extractedRoot ?? readdirSync4(extractDir, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => join5(extractDir, entry.name)).find((path) => findArtifactsIn(path) !== null);
5611
+ if (!sourceDir) {
5612
+ throw new Error(`Downloaded archive for ${resolved} but could not locate Whisper model files`);
5613
+ }
5614
+ if (existsSync6(modelDir)) {
5615
+ rmSync(modelDir, { recursive: true, force: true });
5616
+ }
5617
+ try {
5618
+ renameSync2(sourceDir, modelDir);
5619
+ } catch {
5620
+ cpSync(sourceDir, modelDir, { recursive: true });
5621
+ }
5622
+ const artifacts = findArtifactsIn(modelDir);
5623
+ if (!artifacts) {
5624
+ throw new Error(`Model cache for ${resolved} is incomplete after extraction`);
5625
+ }
5626
+ return { requestedModel: requested, resolvedModel: resolved, ...artifacts };
5627
+ } catch (error) {
5628
+ const detail = error instanceof Error ? error.message : String(error);
5629
+ throw new Error(normalizeModelError(detail));
5630
+ } finally {
5631
+ rmSync(tempDir, { recursive: true, force: true });
5632
+ }
5633
+ }
5634
+ async function loadSherpaModule() {
5635
+ if (!sherpaModulePromise) {
5636
+ sherpaModulePromise = import("sherpa-onnx-node").then((imported) => Reflect.has(imported, "default") ? Reflect.get(imported, "default") : imported).catch((error) => {
5637
+ sherpaModulePromise = null;
5638
+ const detail = error instanceof Error ? error.message : String(error);
5639
+ throw new Error(
5640
+ `Video transcription requires the optional dependency sherpa-onnx-node. Install it locally, then retry. ${detail}`
5641
+ );
5642
+ });
5643
+ }
5644
+ return sherpaModulePromise;
5645
+ }
5646
+ async function getRecognizer(modelName, sherpa) {
5647
+ const artifacts = await ensureWhisperArtifacts(modelName);
5648
+ const cacheKey = artifacts.modelDir;
5649
+ const existing = recognizerCache.get(cacheKey);
5650
+ if (existing) {
5651
+ return { recognizer: await existing, artifacts };
5652
+ }
5653
+ const createRecognizer = (async () => {
5654
+ const runtime = sherpa ?? await loadSherpaModule();
5655
+ return runtime.OfflineRecognizer.createAsync({
5656
+ featConfig: {
5657
+ sampleRate: AUDIO_SAMPLE_RATE,
5658
+ featureDim: 80
5659
+ },
5660
+ modelConfig: {
5661
+ whisper: {
5662
+ encoder: artifacts.encoderPath,
5663
+ decoder: artifacts.decoderPath,
5664
+ task: "transcribe"
5665
+ },
5666
+ tokens: artifacts.tokensPath,
5667
+ numThreads: 1,
5668
+ provider: "cpu",
5669
+ debug: 0
5670
+ }
5671
+ });
5672
+ })();
5673
+ recognizerCache.set(
5674
+ cacheKey,
5675
+ createRecognizer.catch((error) => {
5676
+ recognizerCache.delete(cacheKey);
5677
+ throw error;
5678
+ })
5679
+ );
5680
+ return { recognizer: await recognizerCache.get(cacheKey), artifacts };
5681
+ }
5682
+ function normalizeToWave(audioPath, workingDir) {
5683
+ const wavPath = join5(workingDir, `${basename3(audioPath, extname4(audioPath))}.wav`);
5684
+ try {
5685
+ runCommand(ffmpegBinary(), [
5686
+ "-y",
5687
+ "-i",
5688
+ audioPath,
5689
+ "-vn",
5690
+ "-ac",
5691
+ "1",
5692
+ "-ar",
5693
+ String(AUDIO_SAMPLE_RATE),
5694
+ "-c:a",
5695
+ "pcm_s16le",
5696
+ wavPath
5697
+ ]);
5698
+ } catch (error) {
5699
+ const detail = error instanceof Error ? error.message : String(error);
5700
+ throw new Error(
5701
+ `Video transcription requires ffmpeg in PATH. Install ffmpeg locally, then retry. ${detail}`
5702
+ );
5703
+ }
5704
+ return wavPath;
5705
+ }
5706
+ function extractTranscriptText(result) {
5707
+ return String(result.text ?? "").trim();
5708
+ }
5709
+ function isUrl(pathLike) {
5710
+ return URL_PREFIXES.some((prefix) => pathLike.startsWith(prefix));
5711
+ }
5712
+ function downloadAudio(url, outputDir) {
5713
+ mkdirSync4(outputDir, { recursive: true });
5714
+ const urlHash = createHash3("sha1").update(url).digest("hex").slice(0, 12);
5715
+ for (const ext of CACHED_AUDIO_EXTENSIONS) {
5716
+ const candidate = join5(outputDir, `yt_${urlHash}${ext}`);
5717
+ if (existsSync6(candidate)) {
5718
+ console.log(` cached audio: ${basename3(candidate)}`);
5719
+ return candidate;
5720
+ }
5721
+ }
5722
+ const outTemplate = join5(outputDir, `yt_${urlHash}.%(ext)s`);
5723
+ try {
5724
+ console.log(` downloading audio: ${url.slice(0, 80)} ...`);
5725
+ runCommand("yt-dlp", [
5726
+ "-f",
5727
+ "bestaudio[ext=m4a]/bestaudio/best",
5728
+ "-o",
5729
+ outTemplate,
5730
+ "--quiet",
5731
+ "--no-warnings",
5732
+ "--no-playlist",
5733
+ url
5734
+ ]);
5735
+ } catch (error) {
5736
+ const detail = error instanceof Error ? error.message : String(error);
5737
+ throw new Error(
5738
+ `YouTube/URL download requires yt-dlp. Install yt-dlp to enable video ingestion. ${detail}`
5739
+ );
5740
+ }
5741
+ for (const entry of readdirSync4(outputDir)) {
5742
+ if (entry.startsWith(`yt_${urlHash}.`)) {
5743
+ return join5(outputDir, entry);
5744
+ }
5745
+ }
5746
+ throw new Error(`yt-dlp finished without producing an audio file for ${url}`);
5747
+ }
5748
+ function buildWhisperPrompt(godNodes2) {
5749
+ const override = process.env.GRAPHIFY_WHISPER_PROMPT;
5750
+ if (override) return override;
5751
+ const labels = godNodes2.map((node) => node.label ?? "").filter((label) => Boolean(label)).slice(0, 5);
5752
+ if (labels.length === 0) {
5753
+ return FALLBACK_PROMPT;
5754
+ }
5755
+ return `Technical discussion about ${labels.join(", ")}. ${FALLBACK_PROMPT}`;
5756
+ }
5757
+ async function transcribe(videoPath, outputDir = TRANSCRIPTS_DIR, initialPrompt, force = false) {
5758
+ const outDir = resolve5(outputDir);
5759
+ mkdirSync4(outDir, { recursive: true });
5760
+ const audioPath = isUrl(videoPath) ? downloadAudio(videoPath, join5(outDir, "downloads")) : resolve5(videoPath);
5761
+ const transcriptPath = join5(outDir, `${basename3(audioPath, extname4(audioPath))}.txt`);
5762
+ if (existsSync6(transcriptPath) && !force) {
5763
+ return transcriptPath;
5764
+ }
5765
+ const prompt = initialPrompt ?? process.env.GRAPHIFY_WHISPER_PROMPT ?? FALLBACK_PROMPT;
5766
+ const requestedModel = process.env.GRAPHIFY_WHISPER_MODEL ?? DEFAULT_MODEL;
5767
+ const tempDir = mkdtempSync(join5(tmpdir(), "graphify-transcribe-"));
5768
+ try {
5769
+ console.log(` transcribing ${basename3(audioPath)} (model=${requestedModel}) ...`);
5770
+ const wavPath = normalizeToWave(audioPath, tempDir);
5771
+ const sherpa = await loadSherpaModule();
5772
+ const { recognizer, artifacts } = await getRecognizer(requestedModel, sherpa);
5773
+ const wave = sherpa.readWave(wavPath);
5774
+ const stream = recognizer.createStream();
5775
+ if (prompt && typeof stream.setOption === "function") {
5776
+ try {
5777
+ stream.setOption("prompt", prompt);
5778
+ } catch {
5779
+ }
5780
+ }
5781
+ stream.acceptWaveform({ samples: wave.samples, sampleRate: wave.sampleRate });
5782
+ const result = await recognizer.decodeAsync(stream);
5783
+ const transcript = extractTranscriptText(result);
5784
+ writeFileSync5(transcriptPath, transcript, "utf-8");
5785
+ if (artifacts.requestedModel !== artifacts.resolvedModel) {
5786
+ console.log(` model alias: ${artifacts.requestedModel} -> ${artifacts.resolvedModel}`);
5787
+ }
5788
+ } catch (error) {
5789
+ if (error instanceof Error && error.message.startsWith("Unsupported GRAPHIFY_WHISPER_MODEL")) {
5790
+ throw error;
5791
+ }
5792
+ const detail = error instanceof Error ? error.message : String(error);
5793
+ throw new Error(
5794
+ `Video transcription requires the local TypeScript toolchain: sherpa-onnx-node + ffmpeg. Retry after installing them. ${detail}`
5795
+ );
5796
+ } finally {
5797
+ rmSync(tempDir, { recursive: true, force: true });
5798
+ }
5799
+ return transcriptPath;
5800
+ }
5801
+ async function transcribeAll(videoFiles, outputDir, initialPrompt, force = false) {
5802
+ if (videoFiles.length === 0) {
5803
+ return [];
5804
+ }
5805
+ const transcriptPaths = [];
5806
+ for (const videoFile of videoFiles) {
5807
+ try {
5808
+ transcriptPaths.push(await transcribe(videoFile, outputDir, initialPrompt, force));
5809
+ } catch (error) {
5810
+ const detail = error instanceof Error ? error.message : String(error);
5811
+ console.log(` warning: could not transcribe ${videoFile}: ${detail}`);
5812
+ }
5813
+ }
5814
+ return transcriptPaths;
5815
+ }
5816
+ function cloneDetection(detection) {
5817
+ return JSON.parse(JSON.stringify(detection));
5818
+ }
5819
+ async function augmentDetectionWithTranscripts(detection, options) {
5820
+ const nextDetection = cloneDetection(detection);
5821
+ const source = options?.incremental && nextDetection.new_files ? nextDetection.new_files : nextDetection.files;
5822
+ const videoFiles = [...source.video ?? []];
5823
+ const prompt = options?.initialPrompt ?? buildWhisperPrompt(options?.godNodes ?? []);
5824
+ if (videoFiles.length === 0) {
5825
+ return { detection: nextDetection, transcriptPaths: [], prompt };
5826
+ }
5827
+ const previousModel = process.env.GRAPHIFY_WHISPER_MODEL;
5828
+ if (options?.whisperModel) {
5829
+ process.env.GRAPHIFY_WHISPER_MODEL = options.whisperModel;
5830
+ }
5831
+ try {
5832
+ const transcriptPaths = await transcribeAll(
5833
+ videoFiles,
5834
+ options?.outputDir,
5835
+ prompt,
5836
+ options?.incremental === true
5837
+ );
5838
+ const existingDocuments = source.document ?? [];
5839
+ source.document = [...existingDocuments, ...transcriptPaths];
5840
+ return { detection: nextDetection, transcriptPaths, prompt };
5841
+ } finally {
5842
+ if (options?.whisperModel) {
5843
+ if (previousModel === void 0) {
5844
+ delete process.env.GRAPHIFY_WHISPER_MODEL;
5845
+ } else {
5846
+ process.env.GRAPHIFY_WHISPER_MODEL = previousModel;
5847
+ }
5848
+ }
5849
+ }
5850
+ }
5851
+
5852
+ // src/ingest.ts
5341
5853
  function yamlStr(s) {
5342
5854
  return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, " ").replace(/\r/g, " ");
5343
5855
  }
@@ -5505,7 +6017,7 @@ async function downloadBinary(url, suffix, targetDir) {
5505
6017
  const filename = safeFilename2(url, suffix);
5506
6018
  const outPath = pathResolve2(targetDir, filename);
5507
6019
  const data = await safeFetch(url);
5508
- writeFileSync5(outPath, data);
6020
+ writeFileSync6(outPath, data);
5509
6021
  return outPath;
5510
6022
  }
5511
6023
  function normalizeIngestOptions(authorOrOptions, contributor) {
@@ -5521,7 +6033,7 @@ function normalizeIngestOptions(authorOrOptions, contributor) {
5521
6033
  };
5522
6034
  }
5523
6035
  async function ingest(url, targetDir, authorOrOptions = null, contributor = null) {
5524
- mkdirSync4(targetDir, { recursive: true });
6036
+ mkdirSync5(targetDir, { recursive: true });
5525
6037
  const urlType = detectUrlType(url);
5526
6038
  const { author, contributor: normalizedContributor } = normalizeIngestOptions(
5527
6039
  authorOrOptions,
@@ -5532,7 +6044,7 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
5532
6044
  let filename;
5533
6045
  if (urlType === "pdf") {
5534
6046
  const out = await downloadBinary(url, ".pdf", targetDir);
5535
- console.log(`Downloaded PDF: ${basename3(out)}`);
6047
+ console.log(`Downloaded PDF: ${basename4(out)}`);
5536
6048
  return out;
5537
6049
  }
5538
6050
  if (urlType === "image") {
@@ -5542,9 +6054,14 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
5542
6054
  } catch {
5543
6055
  throw new Error(`Invalid URL: ${url}`);
5544
6056
  }
5545
- const suffix = extname3(parsed.pathname) || ".jpg";
6057
+ const suffix = extname5(parsed.pathname) || ".jpg";
5546
6058
  const out = await downloadBinary(url, suffix, targetDir);
5547
- console.log(`Downloaded image: ${basename3(out)}`);
6059
+ console.log(`Downloaded image: ${basename4(out)}`);
6060
+ return out;
6061
+ }
6062
+ if (urlType === "youtube") {
6063
+ const out = downloadAudio(url, targetDir);
6064
+ console.log(`Downloaded audio: ${basename4(out)}`);
5548
6065
  return out;
5549
6066
  }
5550
6067
  if (urlType === "tweet") {
@@ -5556,13 +6073,13 @@ async function ingest(url, targetDir, authorOrOptions = null, contributor = null
5556
6073
  }
5557
6074
  let outPath = pathResolve2(targetDir, filename);
5558
6075
  let counter = 1;
5559
- while (existsSync6(outPath)) {
6076
+ while (existsSync7(outPath)) {
5560
6077
  const stem = filename.replace(/\.md$/, "");
5561
6078
  outPath = pathResolve2(targetDir, `${stem}_${counter}.md`);
5562
6079
  counter++;
5563
6080
  }
5564
- writeFileSync5(outPath, content, "utf-8");
5565
- console.log(`Saved ${urlType}: ${basename3(outPath)}`);
6081
+ writeFileSync6(outPath, content, "utf-8");
6082
+ console.log(`Saved ${urlType}: ${basename4(outPath)}`);
5566
6083
  return outPath;
5567
6084
  }
5568
6085
  function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "query", sourceNodes = null) {
@@ -5582,7 +6099,7 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
5582
6099
  if (!payload.question) throw new Error("saveQueryResult requires a question");
5583
6100
  if (!payload.memoryDir) throw new Error("saveQueryResult requires a memoryDir");
5584
6101
  const effectiveAnswer = payload.answer ?? "";
5585
- mkdirSync4(payload.memoryDir, { recursive: true });
6102
+ mkdirSync5(payload.memoryDir, { recursive: true });
5586
6103
  const now = /* @__PURE__ */ new Date();
5587
6104
  const slug = payload.question.toLowerCase().replace(/[^\w]/g, "_").slice(0, 50).replace(/_+$/, "");
5588
6105
  const ts = now.toISOString().replace(/[-:]/g, "").replace("T", "_").slice(0, 15);
@@ -5615,10 +6132,10 @@ function saveQueryResult(questionOrOptions, answer, memoryDir, queryType = "quer
5615
6132
  }
5616
6133
  const content = [...frontmatterLines, ...bodyLines].join("\n");
5617
6134
  const outPath = pathResolve2(payload.memoryDir, filename);
5618
- writeFileSync5(outPath, content, "utf-8");
6135
+ writeFileSync6(outPath, content, "utf-8");
5619
6136
  return outPath;
5620
6137
  }
5621
- var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(basename3(process.argv[1]));
6138
+ var isDirectExecution = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^ingest\.(?:js|mjs|cjs|ts)$/.test(basename4(process.argv[1]));
5622
6139
  if (isDirectExecution) {
5623
6140
  const url = process.argv[2];
5624
6141
  const targetDir = process.argv[3] ?? "./raw";
@@ -5634,16 +6151,16 @@ if (isDirectExecution) {
5634
6151
  }
5635
6152
 
5636
6153
  // src/serve.ts
6154
+ init_graph();
5637
6155
  init_security();
5638
6156
  init_analyze();
5639
6157
  import { readFileSync as readFileSync5 } from "fs";
5640
- import Graph3 from "graphology";
5641
6158
  import { bidirectional } from "graphology-shortest-path/unweighted.js";
5642
- import { basename as basename4 } from "path";
6159
+ import { basename as basename5, dirname as dirname4, resolve as resolve7 } from "path";
5643
6160
  function loadGraph2(graphPath) {
5644
6161
  let safePath;
5645
6162
  try {
5646
- safePath = validateGraphPath(graphPath);
6163
+ safePath = validateGraphPath(graphPath, dirname4(resolve7(graphPath)));
5647
6164
  } catch (err) {
5648
6165
  console.error(`error: ${err instanceof Error ? err.message : err}`);
5649
6166
  process.exit(1);
@@ -5657,21 +6174,7 @@ function loadGraph2(graphPath) {
5657
6174
  );
5658
6175
  process.exit(1);
5659
6176
  }
5660
- const G = new Graph3({ type: "undirected", multi: false });
5661
- const nodes = data.nodes ?? [];
5662
- for (const node of nodes) {
5663
- const { id, ...attrs } = node;
5664
- G.mergeNode(id, attrs);
5665
- }
5666
- const links = data.links ?? data.edges ?? [];
5667
- for (const link of links) {
5668
- const { source, target, ...attrs } = link;
5669
- try {
5670
- G.mergeEdge(source, target, attrs);
5671
- } catch {
5672
- }
5673
- }
5674
- return G;
6177
+ return loadGraphFromData(data);
5675
6178
  }
5676
6179
  function communitiesFromGraph(G) {
5677
6180
  const communities = /* @__PURE__ */ new Map();
@@ -5684,6 +6187,15 @@ function communitiesFromGraph(G) {
5684
6187
  });
5685
6188
  return communities;
5686
6189
  }
6190
+ function communityName(G, cid) {
6191
+ if (cid === void 0 || cid === null) return null;
6192
+ const labels = G.getAttribute("community_labels");
6193
+ const fromGraph = labels?.[String(cid)];
6194
+ if (typeof fromGraph === "string" && fromGraph.length > 0) {
6195
+ return sanitizeLabel(fromGraph);
6196
+ }
6197
+ return null;
6198
+ }
5687
6199
  function scoreNodes(G, terms) {
5688
6200
  const scored = [];
5689
6201
  G.forEachNode((nid, data) => {
@@ -5702,7 +6214,7 @@ function bfs(G, startNodes, depth) {
5702
6214
  for (let i = 0; i < depth; i++) {
5703
6215
  const nextFrontier = /* @__PURE__ */ new Set();
5704
6216
  for (const n of frontier) {
5705
- G.forEachNeighbor(n, (neighbor) => {
6217
+ forEachTraversalNeighbor(G, n, (neighbor) => {
5706
6218
  if (!visited.has(neighbor)) {
5707
6219
  nextFrontier.add(neighbor);
5708
6220
  edges.push([n, neighbor]);
@@ -5722,7 +6234,7 @@ function dfs(G, startNodes, depth) {
5722
6234
  const [node, d] = stack.pop();
5723
6235
  if (visited.has(node) || d > depth) continue;
5724
6236
  visited.add(node);
5725
- G.forEachNeighbor(node, (neighbor) => {
6237
+ forEachTraversalNeighbor(G, node, (neighbor) => {
5726
6238
  if (!visited.has(neighbor)) {
5727
6239
  stack.push([neighbor, d + 1]);
5728
6240
  edges.push([node, neighbor]);
@@ -5801,7 +6313,7 @@ function toolGetNode(G, args) {
5801
6313
  ` ID: ${nid}`,
5802
6314
  ` Source: ${d.source_file ?? ""} ${d.source_location ?? ""}`,
5803
6315
  ` Type: ${d.file_type ?? ""}`,
5804
- ` Community: ${d.community ?? ""}`,
6316
+ ` Community: ${d.community_name ? `${d.community ?? ""} (${d.community_name})` : communityName(G, d.community) ?? String(d.community ?? "")}`,
5805
6317
  ` Degree: ${G.degree(nid)}`
5806
6318
  ].join("\n");
5807
6319
  }
@@ -5812,7 +6324,7 @@ function toolGetNeighbors(G, args) {
5812
6324
  if (matches.length === 0) return `No node matching '${label}' found.`;
5813
6325
  const nid = matches[0];
5814
6326
  const lines = [`Neighbors of ${G.getNodeAttribute(nid, "label") ?? nid}:`];
5815
- G.forEachNeighbor(nid, (neighbor) => {
6327
+ forEachTraversalNeighbor(G, nid, (neighbor) => {
5816
6328
  const edgeKey = G.edge(nid, neighbor);
5817
6329
  if (!edgeKey) return;
5818
6330
  const d = G.getEdgeAttributes(edgeKey);
@@ -5828,7 +6340,8 @@ function toolGetCommunity(communities, G, args) {
5828
6340
  const cid = Number(args.community_id);
5829
6341
  const nodes = communities.get(cid);
5830
6342
  if (!nodes || nodes.length === 0) return `Community ${cid} not found.`;
5831
- const lines = [`Community ${cid} (${nodes.length} nodes):`];
6343
+ const label = communityName(G, cid);
6344
+ const lines = [label ? `Community ${cid} - ${label} (${nodes.length} nodes):` : `Community ${cid} (${nodes.length} nodes):`];
5832
6345
  for (const n of nodes) {
5833
6346
  const d = G.getNodeAttributes(n);
5834
6347
  lines.push(` ${d.label ?? n} [${d.source_file ?? ""}]`);
@@ -6050,8 +6563,13 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
6050
6563
  if (!handler) {
6051
6564
  return { content: [{ type: "text", text: `Unknown tool: ${name}` }] };
6052
6565
  }
6053
- const text = handler(args ?? {});
6054
- return { content: [{ type: "text", text }] };
6566
+ try {
6567
+ const text = handler(args ?? {});
6568
+ return { content: [{ type: "text", text }] };
6569
+ } catch (err) {
6570
+ const message = err instanceof Error ? err.message : String(err);
6571
+ return { content: [{ type: "text", text: `Error executing ${name}: ${message}` }] };
6572
+ }
6055
6573
  });
6056
6574
  const serverTransport = transport ?? new StdioServerTransport();
6057
6575
  let keepAlive;
@@ -6059,14 +6577,14 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
6059
6577
  keepAlive = setInterval(() => void 0, 6e4);
6060
6578
  process.stdin?.resume();
6061
6579
  }
6062
- const closed = new Promise((resolve5) => {
6580
+ const closed = new Promise((resolve8) => {
6063
6581
  const previousOnClose = server.onclose;
6064
6582
  server.onclose = () => {
6065
6583
  if (keepAlive) {
6066
6584
  clearInterval(keepAlive);
6067
6585
  }
6068
6586
  previousOnClose?.();
6069
- resolve5();
6587
+ resolve8();
6070
6588
  };
6071
6589
  });
6072
6590
  await server.connect(serverTransport);
@@ -6074,7 +6592,7 @@ async function serve(graphPath = "graphify-out/graph.json", transport) {
6074
6592
  await closed;
6075
6593
  }
6076
6594
  }
6077
- var isDirectExecution2 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^serve\.(?:js|mjs|cjs|ts)$/.test(basename4(process.argv[1]));
6595
+ var isDirectExecution2 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^serve\.(?:js|mjs|cjs|ts)$/.test(basename5(process.argv[1]));
6078
6596
  if (isDirectExecution2) {
6079
6597
  const graphPath = process.argv[2] ?? "graphify-out/graph.json";
6080
6598
  serve(graphPath).catch((err) => {
@@ -6084,59 +6602,14 @@ if (isDirectExecution2) {
6084
6602
  }
6085
6603
 
6086
6604
  // src/watch.ts
6087
- import { existsSync as existsSync7, mkdirSync as mkdirSync5, writeFileSync as writeFileSync6, unlinkSync as unlinkSync2 } from "fs";
6088
- import { resolve as pathResolve3, extname as extname4, basename as basename5 } from "path";
6605
+ init_detect();
6606
+ import { existsSync as existsSync8, mkdirSync as mkdirSync6, writeFileSync as writeFileSync7, unlinkSync as unlinkSync2 } from "fs";
6607
+ import { resolve as pathResolve3, extname as extname6, basename as basename6 } from "path";
6089
6608
  var WATCHED_EXTENSIONS = /* @__PURE__ */ new Set([
6090
- ".py",
6091
- ".ts",
6092
- ".js",
6093
- ".go",
6094
- ".rs",
6095
- ".java",
6096
- ".cpp",
6097
- ".c",
6098
- ".rb",
6099
- ".swift",
6100
- ".kt",
6101
- ".cs",
6102
- ".scala",
6103
- ".php",
6104
- ".cc",
6105
- ".cxx",
6106
- ".hpp",
6107
- ".h",
6108
- ".kts",
6109
- ".md",
6110
- ".txt",
6111
- ".rst",
6112
- ".pdf",
6113
- ".png",
6114
- ".jpg",
6115
- ".jpeg",
6116
- ".webp",
6117
- ".gif",
6118
- ".svg"
6119
- ]);
6120
- var CODE_EXTENSIONS3 = /* @__PURE__ */ new Set([
6121
- ".py",
6122
- ".ts",
6123
- ".js",
6124
- ".go",
6125
- ".rs",
6126
- ".java",
6127
- ".cpp",
6128
- ".c",
6129
- ".rb",
6130
- ".swift",
6131
- ".kt",
6132
- ".cs",
6133
- ".scala",
6134
- ".php",
6135
- ".cc",
6136
- ".cxx",
6137
- ".hpp",
6138
- ".h",
6139
- ".kts"
6609
+ ...CODE_EXTENSIONS,
6610
+ ...DOC_EXTENSIONS,
6611
+ ...PAPER_EXTENSIONS,
6612
+ ...IMAGE_EXTENSIONS
6140
6613
  ]);
6141
6614
  async function rebuildCode(watchPath, followSymlinks = false) {
6142
6615
  try {
@@ -6189,7 +6662,7 @@ async function rebuildCode(watchPath, followSymlinks = false) {
6189
6662
  }
6190
6663
  const questions = suggestQuestions2(G, communities, labels);
6191
6664
  const outDir = pathResolve3(watchPath, "graphify-out");
6192
- mkdirSync5(outDir, { recursive: true });
6665
+ mkdirSync6(outDir, { recursive: true });
6193
6666
  const report = generate2(
6194
6667
  G,
6195
6668
  communities,
@@ -6202,10 +6675,10 @@ async function rebuildCode(watchPath, followSymlinks = false) {
6202
6675
  watchPath,
6203
6676
  questions
6204
6677
  );
6205
- writeFileSync6(pathResolve3(outDir, "GRAPH_REPORT.md"), report, "utf-8");
6206
- toJson2(G, communities, pathResolve3(outDir, "graph.json"));
6678
+ writeFileSync7(pathResolve3(outDir, "GRAPH_REPORT.md"), report, "utf-8");
6679
+ toJson2(G, communities, pathResolve3(outDir, "graph.json"), { communityLabels: labels });
6207
6680
  const flagPath = pathResolve3(outDir, "needs_update");
6208
- if (existsSync7(flagPath)) {
6681
+ if (existsSync8(flagPath)) {
6209
6682
  unlinkSync2(flagPath);
6210
6683
  }
6211
6684
  console.log(
@@ -6224,9 +6697,9 @@ async function rebuildCode(watchPath, followSymlinks = false) {
6224
6697
  }
6225
6698
  function notifyOnly(watchPath) {
6226
6699
  const outDir = pathResolve3(watchPath, "graphify-out");
6227
- mkdirSync5(outDir, { recursive: true });
6700
+ mkdirSync6(outDir, { recursive: true });
6228
6701
  const flagPath = pathResolve3(outDir, "needs_update");
6229
- writeFileSync6(flagPath, "1", "utf-8");
6702
+ writeFileSync7(flagPath, "1", "utf-8");
6230
6703
  console.log(`
6231
6704
  [graphify watch] New or changed files detected in ${watchPath}`);
6232
6705
  console.log(
@@ -6238,7 +6711,7 @@ function notifyOnly(watchPath) {
6238
6711
  console.log(`[graphify watch] Flag written to ${flagPath}`);
6239
6712
  }
6240
6713
  function hasNonCode(changedPaths) {
6241
- return changedPaths.some((p) => !CODE_EXTENSIONS3.has(extname4(p).toLowerCase()));
6714
+ return changedPaths.some((p) => !CODE_EXTENSIONS.has(extname6(p).toLowerCase()));
6242
6715
  }
6243
6716
  async function watch(watchPath, debounce = 3) {
6244
6717
  let chokidar;
@@ -6262,7 +6735,7 @@ async function watch(watchPath, debounce = 3) {
6262
6735
  ]
6263
6736
  });
6264
6737
  watcher.on("all", (_event, filePath) => {
6265
- const ext = extname4(filePath).toLowerCase();
6738
+ const ext = extname6(filePath).toLowerCase();
6266
6739
  if (!WATCHED_EXTENSIONS.has(ext)) return;
6267
6740
  const parts = filePath.split("/");
6268
6741
  if (parts.some((part) => part.startsWith(".") && part !== ".")) return;
@@ -6301,7 +6774,7 @@ async function watch(watchPath, debounce = 3) {
6301
6774
  process.on("SIGINT", cleanup);
6302
6775
  process.on("SIGTERM", cleanup);
6303
6776
  }
6304
- var isDirectExecution3 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^watch\.(?:js|mjs|cjs|ts)$/.test(basename5(process.argv[1]));
6777
+ var isDirectExecution3 = typeof process !== "undefined" && typeof process.argv[1] === "string" && /^watch\.(?:js|mjs|cjs|ts)$/.test(basename6(process.argv[1]));
6305
6778
  if (isDirectExecution3) {
6306
6779
  const watchPath = process.argv[2] ?? ".";
6307
6780
  const debounce = process.argv[3] ? parseFloat(process.argv[3]) : 3;
@@ -6313,8 +6786,10 @@ if (isDirectExecution3) {
6313
6786
  export {
6314
6787
  FileType,
6315
6788
  assertValid,
6789
+ augmentDetectionWithTranscripts,
6316
6790
  build,
6317
6791
  buildFromJson,
6792
+ buildWhisperPrompt,
6318
6793
  checkSemanticCache,
6319
6794
  classifyFile,
6320
6795
  cluster,
@@ -6322,6 +6797,7 @@ export {
6322
6797
  collectFiles,
6323
6798
  detect,
6324
6799
  detectIncremental,
6800
+ downloadAudio,
6325
6801
  extract,
6326
6802
  fileHash,
6327
6803
  generate as generateReport,
@@ -6351,6 +6827,8 @@ export {
6351
6827
  toJson,
6352
6828
  toSvg,
6353
6829
  toWiki,
6830
+ transcribe,
6831
+ transcribeAll,
6354
6832
  validateExtraction,
6355
6833
  validateGraphPath,
6356
6834
  validateUrl,