@blamechris/repo-memory 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +23 -6
  2. package/dist/cli/index-command.d.ts +30 -0
  3. package/dist/cli/index-command.d.ts.map +1 -0
  4. package/dist/cli/index-command.js +112 -0
  5. package/dist/cli/index-command.js.map +1 -0
  6. package/dist/grammars/tree-sitter-go.wasm +0 -0
  7. package/dist/grammars/tree-sitter-java.wasm +0 -0
  8. package/dist/grammars/tree-sitter-kotlin.wasm +0 -0
  9. package/dist/grammars/tree-sitter-python.wasm +0 -0
  10. package/dist/grammars/tree-sitter-rust.wasm +0 -0
  11. package/dist/indexer/ast-summarizer.d.ts +3 -3
  12. package/dist/indexer/ast-summarizer.d.ts.map +1 -1
  13. package/dist/indexer/ast-summarizer.js +900 -27
  14. package/dist/indexer/ast-summarizer.js.map +1 -1
  15. package/dist/indexer/imports.d.ts +2 -0
  16. package/dist/indexer/imports.d.ts.map +1 -1
  17. package/dist/indexer/imports.js +69 -0
  18. package/dist/indexer/imports.js.map +1 -1
  19. package/dist/indexer/summarize.d.ts.map +1 -1
  20. package/dist/indexer/summarize.js +15 -4
  21. package/dist/indexer/summarize.js.map +1 -1
  22. package/dist/persistence/db.d.ts +2 -0
  23. package/dist/persistence/db.d.ts.map +1 -1
  24. package/dist/persistence/db.js +5 -1
  25. package/dist/persistence/db.js.map +1 -1
  26. package/dist/server.js +14 -5
  27. package/dist/server.js.map +1 -1
  28. package/dist/tools/get-file-summary.d.ts +9 -1
  29. package/dist/tools/get-file-summary.d.ts.map +1 -1
  30. package/dist/tools/get-file-summary.js +9 -4
  31. package/dist/tools/get-file-summary.js.map +1 -1
  32. package/dist/tools/get-related-files.d.ts.map +1 -1
  33. package/dist/tools/get-related-files.js +4 -1
  34. package/dist/tools/get-related-files.js.map +1 -1
  35. package/package.json +1 -1
@@ -14,6 +14,12 @@ const EXT_TO_GRAMMAR = {
14
14
  '.jsx': 'javascript',
15
15
  '.mjs': 'javascript',
16
16
  '.cjs': 'javascript',
17
+ '.py': 'python',
18
+ '.go': 'go',
19
+ '.rs': 'rust',
20
+ '.kt': 'kotlin',
21
+ '.kts': 'kotlin',
22
+ '.java': 'java',
17
23
  };
18
24
  const MAX_PURPOSE_LENGTH = 160;
19
25
  const require = createRequire(import.meta.url);
@@ -68,6 +74,29 @@ async function getParser(grammar) {
68
74
  function stripQuotes(text) {
69
75
  return text.replace(/^['"`]|['"`]$/g, '');
70
76
  }
77
+ function emptyResult() {
78
+ return {
79
+ exports: [],
80
+ imports: [],
81
+ topLevelDeclarations: [],
82
+ classes: [],
83
+ functions: [],
84
+ typeNames: [],
85
+ constNames: [],
86
+ fileDoc: null,
87
+ };
88
+ }
89
+ function dedupeResult(out) {
90
+ out.exports = [...new Set(out.exports)];
91
+ out.imports = [...new Set(out.imports)];
92
+ out.topLevelDeclarations = [...new Set(out.topLevelDeclarations)];
93
+ return out;
94
+ }
95
+ /** Keep only the first sentence so multi-sentence doc lines stay short. */
96
+ function firstSentence(line) {
97
+ const sentenceEnd = line.indexOf('. ');
98
+ return sentenceEnd === -1 ? line : line.slice(0, sentenceEnd + 1);
99
+ }
71
100
  /** First meaningful line of a comment, with `/**`, `*` and `//` markers removed. */
72
101
  function commentFirstLine(text) {
73
102
  const cleaned = text
@@ -76,9 +105,7 @@ function commentFirstLine(text) {
76
105
  for (const rawLine of cleaned.split('\n')) {
77
106
  const line = rawLine.replace(/^\s*(?:\*|\/\/)?\s*/, '').trim();
78
107
  if (line.length > 0 && !line.startsWith('@') && !line.startsWith('eslint')) {
79
- // Keep only the first sentence so multi-sentence doc lines stay short.
80
- const sentenceEnd = line.indexOf('. ');
81
- return sentenceEnd === -1 ? line : line.slice(0, sentenceEnd + 1);
108
+ return firstSentence(line);
82
109
  }
83
110
  }
84
111
  return null;
@@ -148,7 +175,7 @@ function collectDeclaration(node, exported, doc, out) {
148
175
  const name = node.childForFieldName('name')?.text;
149
176
  if (name) {
150
177
  out.topLevelDeclarations.push(`class ${name}`);
151
- out.classes.push({ name, methodCount: countMethods(node), doc, exported });
178
+ out.classes.push({ name, kind: 'class', methodCount: countMethods(node), doc, exported });
152
179
  if (exported)
153
180
  out.exports.push(name);
154
181
  }
@@ -238,17 +265,8 @@ function collectExportStatement(node, out) {
238
265
  }
239
266
  }
240
267
  }
241
- function extract(root) {
242
- const out = {
243
- exports: [],
244
- imports: [],
245
- topLevelDeclarations: [],
246
- classes: [],
247
- functions: [],
248
- typeNames: [],
249
- constNames: [],
250
- fileDoc: null,
251
- };
268
+ function extractTsJs(root) {
269
+ const out = emptyResult();
252
270
  let seenCode = false;
253
271
  for (const child of root.namedChildren) {
254
272
  if (!child)
@@ -276,16 +294,869 @@ function extract(root) {
276
294
  break;
277
295
  }
278
296
  }
279
- out.exports = [...new Set(out.exports)];
280
- out.imports = [...new Set(out.imports)];
281
- out.topLevelDeclarations = [...new Set(out.topLevelDeclarations)];
282
- return out;
297
+ return dedupeResult(out);
298
+ }
299
+ // ---------------------------------------------------------------------------
300
+ // Python extraction
301
+ // ---------------------------------------------------------------------------
302
+ /** First line of a Python string literal's content, quotes and prefixes removed. */
303
+ function pyStringFirstLine(stringNode) {
304
+ const content = stringNode.namedChildren.find((c) => c?.type === 'string_content');
305
+ if (!content)
306
+ return null;
307
+ for (const rawLine of content.text.split('\n')) {
308
+ const line = rawLine.trim();
309
+ if (line.length > 0)
310
+ return firstSentence(line);
311
+ }
312
+ return null;
313
+ }
314
+ /** Docstring first line of a `block` (or `module`) node, when present. */
315
+ function pyDocstring(body) {
316
+ const first = body?.namedChildren.find((c) => c !== null && c.type !== 'comment');
317
+ if (first && first.type === 'expression_statement') {
318
+ const str = first.namedChildren.find((c) => c?.type === 'string');
319
+ if (str)
320
+ return pyStringFirstLine(str);
321
+ }
322
+ return null;
323
+ }
324
+ function countPyMethods(classNode) {
325
+ const body = classNode.childForFieldName('body');
326
+ if (!body)
327
+ return 0;
328
+ let count = 0;
329
+ for (let child of body.namedChildren) {
330
+ if (!child)
331
+ continue;
332
+ if (child.type === 'decorated_definition') {
333
+ child = child.childForFieldName('definition') ?? child;
334
+ }
335
+ if (child.type === 'function_definition') {
336
+ if (child.childForFieldName('name')?.text !== '__init__')
337
+ count++;
338
+ }
339
+ }
340
+ return count;
341
+ }
342
+ function extractPython(root) {
343
+ const out = emptyResult();
344
+ out.fileDoc = pyDocstring(root);
345
+ let allList = null;
346
+ const publicNames = [];
347
+ for (let child of root.namedChildren) {
348
+ if (!child)
349
+ continue;
350
+ if (child.type === 'decorated_definition') {
351
+ child = child.childForFieldName('definition') ?? child;
352
+ }
353
+ switch (child.type) {
354
+ case 'import_statement': {
355
+ for (const item of child.namedChildren) {
356
+ if (!item)
357
+ continue;
358
+ if (item.type === 'dotted_name')
359
+ out.imports.push(item.text);
360
+ else if (item.type === 'aliased_import') {
361
+ const name = item.childForFieldName('name');
362
+ if (name)
363
+ out.imports.push(name.text);
364
+ }
365
+ }
366
+ break;
367
+ }
368
+ case 'import_from_statement': {
369
+ const module = child.childForFieldName('module_name');
370
+ if (module)
371
+ out.imports.push(module.text);
372
+ break;
373
+ }
374
+ case 'function_definition': {
375
+ const name = child.childForFieldName('name')?.text;
376
+ if (name) {
377
+ const isPublic = !name.startsWith('_');
378
+ out.topLevelDeclarations.push(`def ${name}`);
379
+ out.functions.push({
380
+ name,
381
+ doc: pyDocstring(child.childForFieldName('body')),
382
+ exported: isPublic,
383
+ });
384
+ if (isPublic)
385
+ publicNames.push(name);
386
+ }
387
+ break;
388
+ }
389
+ case 'class_definition': {
390
+ const name = child.childForFieldName('name')?.text;
391
+ if (name) {
392
+ const isPublic = !name.startsWith('_');
393
+ out.topLevelDeclarations.push(`class ${name}`);
394
+ out.classes.push({
395
+ name,
396
+ kind: 'class',
397
+ methodCount: countPyMethods(child),
398
+ doc: pyDocstring(child.childForFieldName('body')),
399
+ exported: isPublic,
400
+ });
401
+ if (isPublic)
402
+ publicNames.push(name);
403
+ }
404
+ break;
405
+ }
406
+ case 'expression_statement': {
407
+ const assignment = child.namedChildren.find((c) => c?.type === 'assignment');
408
+ const left = assignment?.childForFieldName('left');
409
+ if (!assignment || !left || left.type !== 'identifier')
410
+ break;
411
+ const name = left.text;
412
+ if (name === '__all__') {
413
+ const right = assignment.childForFieldName('right');
414
+ if (right && (right.type === 'list' || right.type === 'tuple')) {
415
+ allList = [];
416
+ for (const item of right.namedChildren) {
417
+ if (item?.type !== 'string')
418
+ continue;
419
+ const content = item.namedChildren.find((c) => c?.type === 'string_content');
420
+ if (content)
421
+ allList.push(content.text);
422
+ }
423
+ }
424
+ }
425
+ else if (!name.startsWith('_')) {
426
+ publicNames.push(name);
427
+ if (/^[A-Z0-9_]+$/.test(name))
428
+ out.constNames.push(name);
429
+ }
430
+ break;
431
+ }
432
+ default:
433
+ break;
434
+ }
435
+ }
436
+ // `__all__` is the explicit export list; otherwise every public top-level
437
+ // binding (def/class/assignment without a leading underscore) is exported.
438
+ out.exports = allList ?? publicNames;
439
+ return dedupeResult(out);
440
+ }
441
+ // ---------------------------------------------------------------------------
442
+ // Go extraction
443
+ // ---------------------------------------------------------------------------
444
+ function isGoExported(name) {
445
+ return /^[A-Z]/.test(name);
446
+ }
447
+ /**
448
+ * First line of the contiguous `//` comment block directly above `node`
449
+ * (Go doc-comment convention: no blank line between comment and declaration).
450
+ */
451
+ function goDocComment(node) {
452
+ let expectedRow = node.startPosition.row;
453
+ let current = node.previousNamedSibling;
454
+ let top = null;
455
+ while (current && current.type === 'comment' && current.endPosition.row === expectedRow - 1) {
456
+ top = current;
457
+ expectedRow = current.startPosition.row;
458
+ current = current.previousNamedSibling;
459
+ }
460
+ return top ? commentFirstLine(top.text) : null;
461
+ }
462
+ function goImportPath(spec, out) {
463
+ const path = spec.childForFieldName('path');
464
+ if (path)
465
+ out.imports.push(stripQuotes(path.text));
466
+ }
467
+ /** Names declared in a `var_spec` / `const_spec` (may bind several identifiers). */
468
+ function goSpecNames(spec) {
469
+ const names = [];
470
+ for (const child of spec.namedChildren) {
471
+ if (child?.type === 'identifier')
472
+ names.push(child.text);
473
+ else
474
+ break; // identifiers come first; stop at the type/value part
475
+ }
476
+ return names;
477
+ }
478
+ /** Promote named types with methods to class-like entries, the rest to typeNames. */
479
+ function resolveNamedTypes(types, methodCounts, out) {
480
+ for (const entry of types) {
481
+ const methodCount = methodCounts.get(entry.name) ?? 0;
482
+ if (methodCount > 0) {
483
+ out.classes.push({ ...entry, methodCount });
484
+ }
485
+ else {
486
+ out.typeNames.push(entry.name);
487
+ }
488
+ }
489
+ }
490
+ function extractGo(root) {
491
+ const out = emptyResult();
492
+ const types = [];
493
+ const methodCounts = new Map();
494
+ for (const child of root.namedChildren) {
495
+ if (!child)
496
+ continue;
497
+ switch (child.type) {
498
+ case 'package_clause':
499
+ out.fileDoc = goDocComment(child);
500
+ break;
501
+ case 'import_declaration': {
502
+ for (const inner of child.namedChildren) {
503
+ if (!inner)
504
+ continue;
505
+ if (inner.type === 'import_spec')
506
+ goImportPath(inner, out);
507
+ else if (inner.type === 'import_spec_list') {
508
+ for (const spec of inner.namedChildren) {
509
+ if (spec?.type === 'import_spec')
510
+ goImportPath(spec, out);
511
+ }
512
+ }
513
+ }
514
+ break;
515
+ }
516
+ case 'function_declaration': {
517
+ const name = child.childForFieldName('name')?.text;
518
+ if (name) {
519
+ out.topLevelDeclarations.push(`func ${name}`);
520
+ out.functions.push({ name, doc: goDocComment(child), exported: isGoExported(name) });
521
+ if (isGoExported(name))
522
+ out.exports.push(name);
523
+ }
524
+ break;
525
+ }
526
+ case 'method_declaration': {
527
+ const name = child.childForFieldName('name')?.text;
528
+ if (name) {
529
+ out.topLevelDeclarations.push(`func ${name}`);
530
+ if (isGoExported(name))
531
+ out.exports.push(name);
532
+ // Attribute the method to its receiver's named type.
533
+ const receiver = child.childForFieldName('receiver');
534
+ const receiverType = receiver?.descendantsOfType('type_identifier')[0]?.text;
535
+ if (receiverType) {
536
+ methodCounts.set(receiverType, (methodCounts.get(receiverType) ?? 0) + 1);
537
+ }
538
+ }
539
+ break;
540
+ }
541
+ case 'type_declaration': {
542
+ for (const spec of child.namedChildren) {
543
+ if (!spec || (spec.type !== 'type_spec' && spec.type !== 'type_alias'))
544
+ continue;
545
+ const name = spec.childForFieldName('name')?.text;
546
+ if (!name)
547
+ continue;
548
+ const typeNode = spec.childForFieldName('type');
549
+ const kind = typeNode?.type === 'struct_type'
550
+ ? 'struct'
551
+ : typeNode?.type === 'interface_type'
552
+ ? 'interface'
553
+ : 'type';
554
+ out.topLevelDeclarations.push(kind === 'type' ? `type ${name}` : `type ${name} ${kind}`);
555
+ types.push({ name, kind, doc: goDocComment(child), exported: isGoExported(name) });
556
+ if (isGoExported(name))
557
+ out.exports.push(name);
558
+ }
559
+ break;
560
+ }
561
+ case 'var_declaration':
562
+ case 'const_declaration': {
563
+ const keyword = child.type === 'var_declaration' ? 'var' : 'const';
564
+ for (const spec of child.namedChildren) {
565
+ if (!spec || (spec.type !== 'var_spec' && spec.type !== 'const_spec'))
566
+ continue;
567
+ for (const name of goSpecNames(spec)) {
568
+ out.topLevelDeclarations.push(`${keyword} ${name}`);
569
+ if (keyword === 'const')
570
+ out.constNames.push(name);
571
+ if (isGoExported(name))
572
+ out.exports.push(name);
573
+ }
574
+ }
575
+ break;
576
+ }
577
+ default:
578
+ break;
579
+ }
580
+ }
581
+ resolveNamedTypes(types, methodCounts, out);
582
+ return dedupeResult(out);
583
+ }
584
+ // ---------------------------------------------------------------------------
585
+ // Rust extraction
586
+ // ---------------------------------------------------------------------------
587
+ /** Plain `pub` only — `pub(crate)` / `pub(super)` are not part of the public API. */
588
+ function isRustPub(node) {
589
+ return node.namedChildren.some((c) => c?.type === 'visibility_modifier' && c.text === 'pub');
590
+ }
591
+ /**
592
+ * First line of the contiguous `///` doc-comment block directly above `node`,
593
+ * skipping any `#[...]` attributes between the docs and the item.
594
+ */
595
+ function rustDocComment(node) {
596
+ let expectedRow = node.startPosition.row;
597
+ let current = node.previousNamedSibling;
598
+ while (current && current.type === 'attribute_item' && current.endPosition.row === expectedRow - 1) {
599
+ expectedRow = current.startPosition.row;
600
+ current = current.previousNamedSibling;
601
+ }
602
+ let top = null;
603
+ while (current &&
604
+ current.type === 'line_comment' &&
605
+ current.text.startsWith('///') &&
606
+ current.endPosition.row === expectedRow - 1) {
607
+ top = current;
608
+ expectedRow = current.startPosition.row;
609
+ current = current.previousNamedSibling;
610
+ }
611
+ if (top) {
612
+ const line = top.text.replace(/^\/\/\/\s*/, '').trim();
613
+ return line.length > 0 ? firstSentence(line) : null;
614
+ }
615
+ if (current && current.type === 'block_comment' && current.text.startsWith('/**')) {
616
+ return commentFirstLine(current.text);
617
+ }
618
+ return null;
619
+ }
620
+ /** Module path of a `use` argument, with grouped/aliased/glob tails removed. */
621
+ function rustUseBasePath(argument) {
622
+ switch (argument.type) {
623
+ case 'scoped_use_list': {
624
+ const path = argument.childForFieldName('path');
625
+ return path ? path.text : argument.text;
626
+ }
627
+ case 'use_as_clause': {
628
+ const path = argument.childForFieldName('path');
629
+ return path ? path.text : argument.text;
630
+ }
631
+ case 'use_wildcard': {
632
+ const inner = argument.namedChildren.find((c) => c !== null);
633
+ return inner ? inner.text : argument.text;
634
+ }
635
+ default:
636
+ return argument.text;
637
+ }
638
+ }
639
+ /** Visible names introduced by a `pub use` argument (re-exports). */
640
+ function rustUseNames(argument, out) {
641
+ switch (argument.type) {
642
+ case 'identifier':
643
+ case 'crate':
644
+ case 'self':
645
+ case 'super':
646
+ out.push(argument.text);
647
+ break;
648
+ case 'scoped_identifier': {
649
+ const name = argument.childForFieldName('name');
650
+ if (name)
651
+ out.push(name.text);
652
+ break;
653
+ }
654
+ case 'use_as_clause': {
655
+ const alias = argument.childForFieldName('alias');
656
+ if (alias)
657
+ out.push(alias.text);
658
+ break;
659
+ }
660
+ case 'scoped_use_list': {
661
+ const list = argument.childForFieldName('list');
662
+ for (const item of list?.namedChildren ?? []) {
663
+ if (item)
664
+ rustUseNames(item, out);
665
+ }
666
+ break;
667
+ }
668
+ case 'use_list': {
669
+ for (const item of argument.namedChildren) {
670
+ if (item)
671
+ rustUseNames(item, out);
672
+ }
673
+ break;
674
+ }
675
+ default:
676
+ break;
677
+ }
678
+ }
679
+ const RUST_TYPE_ITEMS = {
680
+ struct_item: 'struct',
681
+ enum_item: 'enum',
682
+ trait_item: 'trait',
683
+ union_item: 'union',
684
+ };
685
+ function extractRust(root) {
686
+ const out = emptyResult();
687
+ const types = [];
688
+ const methodCounts = new Map();
689
+ let seenCode = false;
690
+ for (const child of root.namedChildren) {
691
+ if (!child)
692
+ continue;
693
+ const pub = isRustPub(child);
694
+ switch (child.type) {
695
+ case 'line_comment':
696
+ if (!seenCode && out.fileDoc === null && child.text.startsWith('//!')) {
697
+ const line = child.text.replace(/^\/\/!\s*/, '').trim();
698
+ if (line.length > 0)
699
+ out.fileDoc = firstSentence(line);
700
+ }
701
+ continue; // comments don't count as code
702
+ case 'block_comment':
703
+ continue;
704
+ case 'use_declaration': {
705
+ const argument = child.childForFieldName('argument');
706
+ if (argument) {
707
+ out.imports.push(rustUseBasePath(argument));
708
+ if (pub)
709
+ rustUseNames(argument, out.exports); // `pub use` re-exports
710
+ }
711
+ break;
712
+ }
713
+ case 'mod_item': {
714
+ const name = child.childForFieldName('name')?.text;
715
+ if (name) {
716
+ out.topLevelDeclarations.push(`mod ${name}`);
717
+ if (!child.childForFieldName('body'))
718
+ out.imports.push(name); // `mod x;`
719
+ if (pub)
720
+ out.exports.push(name);
721
+ }
722
+ break;
723
+ }
724
+ case 'function_item': {
725
+ const name = child.childForFieldName('name')?.text;
726
+ if (name) {
727
+ out.topLevelDeclarations.push(`fn ${name}`);
728
+ out.functions.push({ name, doc: rustDocComment(child), exported: pub });
729
+ if (pub)
730
+ out.exports.push(name);
731
+ }
732
+ break;
733
+ }
734
+ case 'struct_item':
735
+ case 'enum_item':
736
+ case 'trait_item':
737
+ case 'union_item': {
738
+ const name = child.childForFieldName('name')?.text;
739
+ if (name) {
740
+ const kind = RUST_TYPE_ITEMS[child.type];
741
+ out.topLevelDeclarations.push(`${kind} ${name}`);
742
+ types.push({ name, kind, doc: rustDocComment(child), exported: pub });
743
+ if (pub)
744
+ out.exports.push(name);
745
+ }
746
+ break;
747
+ }
748
+ case 'impl_item': {
749
+ // Strip generic params so `impl Config<T>` matches `struct Config`.
750
+ const typeName = child.childForFieldName('type')?.text.replace(/<.*$/s, '');
751
+ if (typeName) {
752
+ out.topLevelDeclarations.push(`impl ${typeName}`);
753
+ const body = child.childForFieldName('body');
754
+ let methods = 0;
755
+ for (const item of body?.namedChildren ?? []) {
756
+ if (item?.type === 'function_item')
757
+ methods++;
758
+ }
759
+ if (methods > 0) {
760
+ methodCounts.set(typeName, (methodCounts.get(typeName) ?? 0) + methods);
761
+ }
762
+ }
763
+ break;
764
+ }
765
+ case 'type_item': {
766
+ const name = child.childForFieldName('name')?.text;
767
+ if (name) {
768
+ out.topLevelDeclarations.push(`type ${name}`);
769
+ out.typeNames.push(name);
770
+ if (pub)
771
+ out.exports.push(name);
772
+ }
773
+ break;
774
+ }
775
+ case 'const_item':
776
+ case 'static_item': {
777
+ const name = child.childForFieldName('name')?.text;
778
+ if (name) {
779
+ out.topLevelDeclarations.push(`${child.type === 'const_item' ? 'const' : 'static'} ${name}`);
780
+ out.constNames.push(name);
781
+ if (pub)
782
+ out.exports.push(name);
783
+ }
784
+ break;
785
+ }
786
+ default:
787
+ break;
788
+ }
789
+ seenCode = true;
790
+ }
791
+ resolveNamedTypes(types, methodCounts, out);
792
+ return dedupeResult(out);
793
+ }
794
+ // ---------------------------------------------------------------------------
795
+ // Kotlin extraction
796
+ // ---------------------------------------------------------------------------
797
+ /**
798
+ * True unless the declaration carries a `private`/`internal`/`protected`
799
+ * visibility modifier (`public` is the Kotlin default).
800
+ */
801
+ function ktIsPublic(node) {
802
+ const mods = node.namedChildren.find((c) => c?.type === 'modifiers');
803
+ for (const m of mods?.namedChildren ?? []) {
804
+ if (m?.type === 'visibility_modifier' && m.text !== 'public')
805
+ return false;
806
+ }
807
+ return true;
808
+ }
809
+ /** The Kotlin grammar exposes keywords (`interface`, `enum`, …) as anonymous tokens. */
810
+ function ktHasToken(node, token) {
811
+ return node.children.some((c) => c !== null && c.type === token);
812
+ }
813
+ function ktHasClassModifier(node, modifier) {
814
+ const mods = node.namedChildren.find((c) => c?.type === 'modifiers');
815
+ return (mods?.namedChildren ?? []).some((m) => m !== null && m.type === 'class_modifier' && m.text === modifier);
816
+ }
817
+ /**
818
+ * KDoc block immediately above `node`. The Kotlin grammar attaches the
819
+ * comment preceding the first declaration after the imports as the trailing
820
+ * descendant of the import list, so that path is checked too.
821
+ */
822
+ function ktPrecedingDoc(node) {
823
+ let prev = node.previousNamedSibling;
824
+ if (prev && (prev.type === 'import_list' || prev.type === 'package_header')) {
825
+ let tail = prev;
826
+ while (tail.lastNamedChild)
827
+ tail = tail.lastNamedChild;
828
+ prev = tail;
829
+ }
830
+ if (prev && prev.type === 'multiline_comment' && prev.text.startsWith('/**')) {
831
+ return commentFirstLine(prev.text);
832
+ }
833
+ return null;
834
+ }
835
+ /** Method count of a class/object body, including companion-object functions. */
836
+ function ktCountMethods(body) {
837
+ let count = 0;
838
+ for (const child of body?.namedChildren ?? []) {
839
+ if (!child)
840
+ continue;
841
+ if (child.type === 'function_declaration')
842
+ count++;
843
+ else if (child.type === 'companion_object') {
844
+ count += ktCountMethods(child.namedChildren.find((c) => c?.type === 'class_body'));
845
+ }
846
+ }
847
+ return count;
848
+ }
849
+ function extractKotlin(root) {
850
+ const out = emptyResult();
851
+ let seenCode = false;
852
+ for (const child of root.namedChildren) {
853
+ if (!child)
854
+ continue;
855
+ switch (child.type) {
856
+ case 'multiline_comment':
857
+ if (!seenCode && out.fileDoc === null && child.text.startsWith('/**')) {
858
+ out.fileDoc = commentFirstLine(child.text);
859
+ }
860
+ continue;
861
+ case 'line_comment':
862
+ continue;
863
+ case 'package_header':
864
+ break;
865
+ case 'import_list': {
866
+ for (const header of child.namedChildren) {
867
+ if (header?.type !== 'import_header')
868
+ continue;
869
+ // `identifier` is the dotted path; a trailing `.*` (wildcard_import)
870
+ // and `as` aliases sit outside it, so they are stripped for free.
871
+ const path = header.namedChildren.find((c) => c?.type === 'identifier');
872
+ if (path)
873
+ out.imports.push(path.text.replace(/\s/g, ''));
874
+ }
875
+ break;
876
+ }
877
+ case 'class_declaration': {
878
+ const name = child.namedChildren.find((c) => c?.type === 'type_identifier')?.text;
879
+ if (!name)
880
+ break;
881
+ const pub = ktIsPublic(child);
882
+ const doc = ktPrecedingDoc(child);
883
+ const body = child.namedChildren.find((c) => c?.type === 'class_body');
884
+ if (ktHasToken(child, 'interface')) {
885
+ out.topLevelDeclarations.push(`interface ${name}`);
886
+ out.classes.push({
887
+ name,
888
+ kind: 'interface',
889
+ methodCount: ktCountMethods(body),
890
+ doc,
891
+ exported: pub,
892
+ });
893
+ }
894
+ else if (ktHasToken(child, 'enum')) {
895
+ out.topLevelDeclarations.push(`enum class ${name}`);
896
+ out.typeNames.push(name);
897
+ }
898
+ else {
899
+ const kind = ktHasClassModifier(child, 'data') ? 'data class' : 'class';
900
+ out.topLevelDeclarations.push(`${kind} ${name}`);
901
+ out.classes.push({ name, kind, methodCount: ktCountMethods(body), doc, exported: pub });
902
+ }
903
+ if (pub)
904
+ out.exports.push(name);
905
+ break;
906
+ }
907
+ case 'object_declaration': {
908
+ const name = child.namedChildren.find((c) => c?.type === 'type_identifier')?.text;
909
+ if (name) {
910
+ const pub = ktIsPublic(child);
911
+ const body = child.namedChildren.find((c) => c?.type === 'class_body');
912
+ out.topLevelDeclarations.push(`object ${name}`);
913
+ out.classes.push({
914
+ name,
915
+ kind: 'object',
916
+ methodCount: ktCountMethods(body),
917
+ doc: ktPrecedingDoc(child),
918
+ exported: pub,
919
+ });
920
+ if (pub)
921
+ out.exports.push(name);
922
+ }
923
+ break;
924
+ }
925
+ case 'function_declaration': {
926
+ const name = child.namedChildren.find((c) => c?.type === 'simple_identifier')?.text;
927
+ if (name) {
928
+ const pub = ktIsPublic(child);
929
+ out.topLevelDeclarations.push(`fun ${name}`);
930
+ out.functions.push({ name, doc: ktPrecedingDoc(child), exported: pub });
931
+ if (pub)
932
+ out.exports.push(name);
933
+ }
934
+ break;
935
+ }
936
+ case 'property_declaration': {
937
+ const kind = child.namedChildren.find((c) => c?.type === 'binding_pattern_kind')?.text ?? 'val';
938
+ const decl = child.namedChildren.find((c) => c?.type === 'variable_declaration');
939
+ const name = decl?.namedChildren.find((c) => c?.type === 'simple_identifier')?.text;
940
+ if (name) {
941
+ out.topLevelDeclarations.push(`${kind} ${name}`);
942
+ if (kind === 'val')
943
+ out.constNames.push(name);
944
+ if (ktIsPublic(child))
945
+ out.exports.push(name);
946
+ }
947
+ break;
948
+ }
949
+ case 'type_alias': {
950
+ const name = child.namedChildren.find((c) => c?.type === 'type_identifier')?.text;
951
+ if (name) {
952
+ out.topLevelDeclarations.push(`typealias ${name}`);
953
+ out.typeNames.push(name);
954
+ if (ktIsPublic(child))
955
+ out.exports.push(name);
956
+ }
957
+ break;
958
+ }
959
+ default:
960
+ break;
961
+ }
962
+ seenCode = true;
963
+ }
964
+ return dedupeResult(out);
965
+ }
966
+ // ---------------------------------------------------------------------------
967
+ // Java extraction
968
+ // ---------------------------------------------------------------------------
969
+ /** Javadoc block immediately preceding `node`. */
970
+ function javaPrecedingDoc(node) {
971
+ const prev = node.previousNamedSibling;
972
+ if (prev && prev.type === 'block_comment' && prev.text.startsWith('/**')) {
973
+ return commentFirstLine(prev.text);
974
+ }
975
+ return null;
976
+ }
977
+ /** Java modifiers (`public`, `static`, …) are anonymous tokens under `modifiers`. */
978
+ function javaHasModifier(node, modifier) {
979
+ const mods = node.namedChildren.find((c) => c?.type === 'modifiers');
980
+ return (mods?.children ?? []).some((c) => c !== null && c.type === modifier);
981
+ }
982
+ /**
983
+ * Collect the public members of a top-level type into `out.exports` (when the
984
+ * type itself is public) and return its method count. Interface members are
985
+ * implicitly public. A `static main` method is recorded as a function so the
986
+ * purpose generator can mark the file as an entry point.
987
+ */
988
+ function javaCollectMembers(typeNode, kind, typePublic, out) {
989
+ const body = typeNode.childForFieldName('body');
990
+ let methods = 0;
991
+ for (const member of body?.namedChildren ?? []) {
992
+ if (!member)
993
+ continue;
994
+ if (member.type === 'method_declaration') {
995
+ methods++;
996
+ const name = member.childForFieldName('name')?.text;
997
+ if (!name)
998
+ continue;
999
+ const memberPublic = kind === 'interface' || javaHasModifier(member, 'public');
1000
+ if (name === 'main' && javaHasModifier(member, 'static')) {
1001
+ out.functions.push({ name, doc: null, exported: memberPublic });
1002
+ }
1003
+ if (typePublic && memberPublic)
1004
+ out.exports.push(name);
1005
+ }
1006
+ else if (member.type === 'field_declaration') {
1007
+ if (!typePublic)
1008
+ continue;
1009
+ if (kind !== 'interface' && !javaHasModifier(member, 'public'))
1010
+ continue;
1011
+ for (const decl of member.namedChildren) {
1012
+ if (decl?.type !== 'variable_declarator')
1013
+ continue;
1014
+ const name = decl.childForFieldName('name')?.text;
1015
+ if (!name)
1016
+ continue;
1017
+ out.exports.push(name);
1018
+ if (/^[A-Z0-9_]+$/.test(name))
1019
+ out.constNames.push(name);
1020
+ }
1021
+ }
1022
+ }
1023
+ return methods;
1024
+ }
1025
+ function extractJava(root) {
1026
+ const out = emptyResult();
1027
+ let seenCode = false;
1028
+ for (const child of root.namedChildren) {
1029
+ if (!child)
1030
+ continue;
1031
+ switch (child.type) {
1032
+ case 'block_comment':
1033
+ if (!seenCode && out.fileDoc === null && child.text.startsWith('/**')) {
1034
+ out.fileDoc = commentFirstLine(child.text);
1035
+ }
1036
+ continue;
1037
+ case 'line_comment':
1038
+ continue;
1039
+ case 'package_declaration':
1040
+ break;
1041
+ case 'import_declaration': {
1042
+ // The dotted path; a trailing `.*` is a separate `asterisk` node, so
1043
+ // wildcard imports already arrive stripped.
1044
+ const path = child.namedChildren.find((c) => c?.type === 'scoped_identifier' || c?.type === 'identifier');
1045
+ if (path)
1046
+ out.imports.push(path.text.replace(/\s/g, ''));
1047
+ break;
1048
+ }
1049
+ case 'class_declaration':
1050
+ case 'interface_declaration':
1051
+ case 'record_declaration': {
1052
+ const name = child.childForFieldName('name')?.text;
1053
+ if (!name)
1054
+ break;
1055
+ const kind = child.type === 'class_declaration'
1056
+ ? 'class'
1057
+ : child.type === 'interface_declaration'
1058
+ ? 'interface'
1059
+ : 'record';
1060
+ const pub = javaHasModifier(child, 'public');
1061
+ out.topLevelDeclarations.push(`${kind} ${name}`);
1062
+ const methodCount = javaCollectMembers(child, kind, pub, out);
1063
+ out.classes.push({ name, kind, methodCount, doc: javaPrecedingDoc(child), exported: pub });
1064
+ if (pub)
1065
+ out.exports.push(name);
1066
+ break;
1067
+ }
1068
+ case 'enum_declaration':
1069
+ case 'annotation_type_declaration': {
1070
+ const name = child.childForFieldName('name')?.text;
1071
+ if (name) {
1072
+ const kind = child.type === 'enum_declaration' ? 'enum' : 'annotation';
1073
+ out.topLevelDeclarations.push(`${kind} ${name}`);
1074
+ out.typeNames.push(name);
1075
+ if (javaHasModifier(child, 'public'))
1076
+ out.exports.push(name);
1077
+ }
1078
+ break;
1079
+ }
1080
+ default:
1081
+ break;
1082
+ }
1083
+ seenCode = true;
1084
+ }
1085
+ return dedupeResult(out);
1086
+ }
1087
+ function extract(root, grammar) {
1088
+ switch (grammar) {
1089
+ case 'python':
1090
+ return extractPython(root);
1091
+ case 'go':
1092
+ return extractGo(root);
1093
+ case 'rust':
1094
+ return extractRust(root);
1095
+ case 'kotlin':
1096
+ return extractKotlin(root);
1097
+ case 'java':
1098
+ return extractJava(root);
1099
+ default:
1100
+ return extractTsJs(root);
1101
+ }
283
1102
  }
284
1103
  // ---------------------------------------------------------------------------
285
1104
  // Purpose line generation
286
1105
  // ---------------------------------------------------------------------------
287
- function fileCategory(filePath) {
1106
+ function fileCategory(filePath, contents) {
288
1107
  const basename = getBasename(filePath);
1108
+ const ext = getExtension(filePath);
1109
+ const dir = filePath.replace(/\\/g, '/');
1110
+ const inTestsDir = dir.includes('/tests/') || dir.includes('/test/') || /^tests?\//.test(dir);
1111
+ if (ext === '.py') {
1112
+ if (basename.startsWith('test_') || basename.endsWith('_test.py'))
1113
+ return 'test';
1114
+ if (basename === 'conftest.py')
1115
+ return 'test';
1116
+ if (inTestsDir)
1117
+ return 'test';
1118
+ if (basename === 'setup.py' || basename === 'settings.py' || basename === 'config.py') {
1119
+ return 'config';
1120
+ }
1121
+ if (basename === '__init__.py' || basename === '__main__.py')
1122
+ return 'entry point';
1123
+ if (/\bif\s+__name__\s*==\s*['"]__main__['"]\s*:/.test(contents))
1124
+ return 'entry point';
1125
+ return null;
1126
+ }
1127
+ if (ext === '.go') {
1128
+ if (basename.endsWith('_test.go'))
1129
+ return 'test';
1130
+ if (basename === 'main.go')
1131
+ return 'entry point';
1132
+ return null;
1133
+ }
1134
+ if (ext === '.rs') {
1135
+ if (inTestsDir || basename.startsWith('test_'))
1136
+ return 'test';
1137
+ if (basename === 'main.rs' || basename === 'lib.rs' || basename === 'mod.rs') {
1138
+ return 'entry point';
1139
+ }
1140
+ if (basename === 'build.rs')
1141
+ return 'config';
1142
+ return null;
1143
+ }
1144
+ if (ext === '.kt' || ext === '.kts') {
1145
+ if (/Test\.kts?$/.test(basename) || inTestsDir || dir.includes('/androidTest/'))
1146
+ return 'test';
1147
+ if (basename.endsWith('.gradle.kts'))
1148
+ return 'config';
1149
+ if (basename === 'Main.kt')
1150
+ return 'entry point';
1151
+ return null;
1152
+ }
1153
+ if (ext === '.java') {
1154
+ if (/Tests?\.java$/.test(basename) || inTestsDir)
1155
+ return 'test';
1156
+ if (basename === 'Main.java')
1157
+ return 'entry point';
1158
+ return null;
1159
+ }
289
1160
  if (filePath.endsWith('.d.ts'))
290
1161
  return 'types';
291
1162
  if (/\.(?:test|spec)\.[tj]sx?$/.test(basename))
@@ -304,7 +1175,7 @@ function listNames(names, max) {
304
1175
  return `${names.slice(0, max).join(', ')} (+${names.length - max} more)`;
305
1176
  }
306
1177
  function buildPurpose(filePath, contents, info) {
307
- let category = fileCategory(filePath);
1178
+ let category = fileCategory(filePath, contents);
308
1179
  // Executable entry points: shebang line or a top-level main() function.
309
1180
  if (category === null &&
310
1181
  (contents.startsWith('#!') || info.functions.some((f) => f.name === 'main'))) {
@@ -318,7 +1189,7 @@ function buildPurpose(filePath, contents, info) {
318
1189
  if (classes.length > 0) {
319
1190
  const primary = [...classes].sort((a, b) => b.methodCount - a.methodCount)[0];
320
1191
  const desc = primary.doc ?? info.fileDoc;
321
- const head = `class ${primary.name} (${primary.methodCount} method${primary.methodCount === 1 ? '' : 's'})`;
1192
+ const head = `${primary.kind} ${primary.name} (${primary.methodCount} method${primary.methodCount === 1 ? '' : 's'})`;
322
1193
  const rest = classes.filter((c) => c !== primary).map((c) => c.name);
323
1194
  const suffix = rest.length > 0 ? ` +${listNames(rest, 2)}` : '';
324
1195
  detail = desc ? `${head}${suffix}: ${desc}` : `${head}${suffix}`;
@@ -355,9 +1226,9 @@ function buildPurpose(filePath, contents, info) {
355
1226
  // Entry point
356
1227
  // ---------------------------------------------------------------------------
357
1228
  /**
358
- * Summarize a TS/JS file from its syntax tree. Falls back to the regex
359
- * summarizer for unsupported extensions, empty files, parse errors, or when
360
- * the WASM runtime cannot be loaded.
1229
+ * Summarize a TS/JS, Python, Go, Rust, Kotlin or Java file from its syntax
1230
+ * tree. Falls back to the regex summarizer for unsupported extensions, empty
1231
+ * files, parse errors, or when the WASM runtime cannot be loaded.
361
1232
  */
362
1233
  export async function summarizeFileAst(filePath, contents) {
363
1234
  const grammar = EXT_TO_GRAMMAR[getExtension(filePath)];
@@ -373,14 +1244,16 @@ export async function summarizeFileAst(filePath, contents) {
373
1244
  runtimeBroken = true;
374
1245
  return summarizeFile(filePath, contents);
375
1246
  }
376
- const tree = parser.parse(contents);
1247
+ // Some grammars (Go) need a statement terminator after the last declaration;
1248
+ // parse with a trailing newline so files missing one don't report errors.
1249
+ const tree = parser.parse(contents.endsWith('\n') ? contents : `${contents}\n`);
377
1250
  if (!tree)
378
1251
  return summarizeFile(filePath, contents);
379
1252
  try {
380
1253
  if (tree.rootNode.hasError) {
381
1254
  return summarizeFile(filePath, contents);
382
1255
  }
383
- const info = extract(tree.rootNode);
1256
+ const info = extract(tree.rootNode, grammar);
384
1257
  return {
385
1258
  purpose: buildPurpose(filePath, contents, info),
386
1259
  exports: info.exports,