@swarmvaultai/engine 0.6.8 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +3 -3
  2. package/dist/index.js +2536 -219
  3. package/package.json +10 -1
package/dist/index.js CHANGED
@@ -1434,6 +1434,7 @@ import path5 from "path";
1434
1434
  var require2 = createRequire(import.meta.url);
1435
1435
  var TREE_SITTER_RUNTIME_PACKAGE = "@vscode/tree-sitter-wasm";
1436
1436
  var TREE_SITTER_EXTRA_GRAMMARS_PACKAGE = "tree-sitter-wasms";
1437
+ var SWIFT_TREE_SITTER_OPT_IN_ENV = "SWARMVAULT_ENABLE_SWIFT_TREE_SITTER";
1437
1438
  var packageRootCache = /* @__PURE__ */ new Map();
1438
1439
  var RATIONALE_MARKERS = ["NOTE:", "IMPORTANT:", "HACK:", "WHY:", "RATIONALE:"];
1439
1440
  function stripKnownCommentPrefix(line) {
@@ -1464,7 +1465,16 @@ var grammarAssetByLanguage = {
1464
1465
  cpp: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-cpp.wasm" },
1465
1466
  php: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-php.wasm" },
1466
1467
  ruby: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-ruby.wasm" },
1467
- powershell: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-powershell.wasm" }
1468
+ powershell: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-powershell.wasm" },
1469
+ swift: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-swift.wasm" },
1470
+ elixir: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-elixir.wasm" },
1471
+ ocaml: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-ocaml.wasm" },
1472
+ objc: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-objc.wasm" },
1473
+ rescript: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-rescript.wasm" },
1474
+ solidity: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-solidity.wasm" },
1475
+ html: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-html.wasm" },
1476
+ css: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-css.wasm" },
1477
+ vue: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-vue.wasm" }
1468
1478
  };
1469
1479
  function resolvePackageRoot(packageName) {
1470
1480
  const cached = packageRootCache.get(packageName);
@@ -1524,7 +1534,7 @@ function normalizeSymbolReference(value) {
1524
1534
  }
1525
1535
  function stripCodeExtension(filePath) {
1526
1536
  return filePath.replace(
1527
- /\.(?:[cm]?jsx?|tsx?|mts|cts|sh|bash|zsh|py|go|rs|java|kt|kts|scala|sc|dart|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i,
1537
+ /\.(?:[cm]?jsx?|tsx?|mts|cts|sh|bash|zsh|py|go|rs|java|kt|kts|scala|sc|dart|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx|swift|exs?|mli?|mm|resi?|sol|html?|css|vue)$/i,
1528
1538
  ""
1529
1539
  );
1530
1540
  }
@@ -1812,6 +1822,72 @@ function descendantTypeNames(node) {
1812
1822
  function quotedPath(value) {
1813
1823
  return value.replace(/^['"<]+|['">]+$/g, "").trim();
1814
1824
  }
1825
+ function neutralizePreprocessorDirectives(content) {
1826
+ const lines = content.split("\n");
1827
+ const active = [];
1828
+ const isActive = () => active.every(Boolean);
1829
+ const directiveHead = (line) => {
1830
+ const trimmed = line.trimStart();
1831
+ if (trimmed[0] !== "#") {
1832
+ return void 0;
1833
+ }
1834
+ const rest = trimmed.slice(1).trimStart();
1835
+ const match = rest.match(/^([A-Za-z]+)/);
1836
+ return match?.[1]?.toLowerCase();
1837
+ };
1838
+ const out = [];
1839
+ for (const line of lines) {
1840
+ const head = directiveHead(line);
1841
+ if (head === "if" || head === "ifdef" || head === "ifndef") {
1842
+ active.push(isActive());
1843
+ out.push("");
1844
+ continue;
1845
+ }
1846
+ if (head === "elif") {
1847
+ if (active.length > 0) {
1848
+ active[active.length - 1] = false;
1849
+ }
1850
+ out.push("");
1851
+ continue;
1852
+ }
1853
+ if (head === "else") {
1854
+ if (active.length > 0) {
1855
+ active[active.length - 1] = false;
1856
+ }
1857
+ out.push("");
1858
+ continue;
1859
+ }
1860
+ if (head === "endif") {
1861
+ if (active.length > 0) {
1862
+ active.pop();
1863
+ }
1864
+ out.push("");
1865
+ continue;
1866
+ }
1867
+ if (!isActive()) {
1868
+ out.push("");
1869
+ continue;
1870
+ }
1871
+ out.push(line);
1872
+ }
1873
+ return out.join("\n");
1874
+ }
1875
+ function detectShellDialect(content) {
1876
+ const prefix = content.slice(0, 4096);
1877
+ if (/^#!\s*(?:\/usr\/bin\/env\s+)?zsh\b/m.test(prefix)) {
1878
+ return "zsh";
1879
+ }
1880
+ if (/^\s*#compdef\b/m.test(prefix)) {
1881
+ return "zsh";
1882
+ }
1883
+ if (/\$\{\([fFsq@%]/.test(prefix)) {
1884
+ return "zsh";
1885
+ }
1886
+ if (/\b(?:setopt|unsetopt|zmodload|compinit|autoload\s+-Uz)\b/.test(prefix)) {
1887
+ return "zsh";
1888
+ }
1889
+ return "bash";
1890
+ }
1815
1891
  function diagnosticsFromTree(rootNode) {
1816
1892
  if (!rootNode.hasError) {
1817
1893
  return [];
@@ -1861,6 +1937,18 @@ function treeSitterCompatibilityDiagnostic(language, error) {
1861
1937
  column: 1
1862
1938
  };
1863
1939
  }
1940
+ function swiftTreeSitterEnabled() {
1941
+ return process.env[SWIFT_TREE_SITTER_OPT_IN_ENV] === "1";
1942
+ }
1943
+ function swiftTreeSitterDisabledDiagnostic() {
1944
+ return {
1945
+ code: 9012,
1946
+ category: "warning",
1947
+ message: `Swift parser-backed analysis is disabled by default because the packaged tree-sitter grammar can trigger Node/V8 out-of-memory crashes during WASM compilation. Set ${SWIFT_TREE_SITTER_OPT_IN_ENV}=1 to opt in anyway.`,
1948
+ line: 1,
1949
+ column: 1
1950
+ };
1951
+ }
1864
1952
  function flattenPythonDottedName(node) {
1865
1953
  if (!node) {
1866
1954
  return "";
@@ -2691,24 +2779,20 @@ function zigDeclarationKind(node) {
2691
2779
  }
2692
2780
  return void 0;
2693
2781
  }
2694
- function bashCodeAnalysis(manifest, rootNode, diagnostics) {
2782
+ function bashCodeAnalysis(manifest, rootNode, diagnostics, rawContent) {
2695
2783
  const imports = [];
2696
2784
  const draftSymbols = [];
2697
2785
  const exportLabels = [];
2698
- for (const child of rootNode.namedChildren) {
2699
- if (!child) {
2700
- continue;
2701
- }
2702
- if (child.type === "command") {
2703
- const parsed = parseBashImport(child);
2704
- if (parsed) {
2705
- imports.push(parsed);
2706
- }
2707
- continue;
2708
- }
2709
- if (child.type !== "function_definition") {
2710
- continue;
2786
+ const commandNodes = rootNode.descendantsOfType("command").filter((node) => node !== null);
2787
+ for (const command of commandNodes) {
2788
+ const parsed = parseBashImport(command);
2789
+ if (parsed) {
2790
+ imports.push(parsed);
2711
2791
  }
2792
+ }
2793
+ const functionNodes = rootNode.descendantsOfType("function_definition").filter((node) => node !== null);
2794
+ const functionByName = /* @__PURE__ */ new Map();
2795
+ for (const child of functionNodes) {
2712
2796
  const name = nodeText(child.childForFieldName("name") ?? child.namedChildren.at(0) ?? null).trim();
2713
2797
  if (!name) {
2714
2798
  continue;
@@ -2724,16 +2808,44 @@ function bashCodeAnalysis(manifest, rootNode, diagnostics) {
2724
2808
  bodyText: nodeText(child.childForFieldName("body") ?? findNamedChild(child, "compound_statement"))
2725
2809
  });
2726
2810
  exportLabels.push(name);
2811
+ if (!functionByName.has(name)) {
2812
+ functionByName.set(name, child);
2813
+ }
2727
2814
  }
2728
2815
  for (let index = 0; index < draftSymbols.length; index += 1) {
2729
- const functionNode = rootNode.namedChildren.find(
2730
- (child) => child?.type === "function_definition" && nodeText(child.childForFieldName("name") ?? child.namedChildren.at(0) ?? null).trim() === draftSymbols[index]?.name
2731
- );
2732
- draftSymbols[index].callNames = bashCallNamesFromBody(
2816
+ const symbol = draftSymbols[index];
2817
+ const functionNode = functionByName.get(symbol.name);
2818
+ symbol.callNames = bashCallNamesFromBody(
2733
2819
  functionNode?.childForFieldName("body") ?? findNamedChild(functionNode, "compound_statement"),
2734
- draftSymbols[index].name
2820
+ symbol.name
2735
2821
  );
2736
2822
  }
2823
+ if (draftSymbols.length === 0 && rawContent) {
2824
+ const seen = /* @__PURE__ */ new Set();
2825
+ for (const line of rawContent.split("\n")) {
2826
+ const trimmed = line.trimStart();
2827
+ let match = trimmed.match(/^function\s+([A-Za-z_][\w-]*)\s*(?:\(\))?/);
2828
+ if (!match) {
2829
+ match = trimmed.match(/^([A-Za-z_][\w-]*)\s*\(\)/);
2830
+ }
2831
+ const name = match?.[1];
2832
+ if (!name || seen.has(name)) {
2833
+ continue;
2834
+ }
2835
+ seen.add(name);
2836
+ draftSymbols.push({
2837
+ name,
2838
+ kind: "function",
2839
+ signature: singleLineSignature(trimmed),
2840
+ exported: true,
2841
+ callNames: [],
2842
+ extendsNames: [],
2843
+ implementsNames: [],
2844
+ bodyText: ""
2845
+ });
2846
+ exportLabels.push(name);
2847
+ }
2848
+ }
2737
2849
  return finalizeCodeAnalysis(manifest, "bash", imports, draftSymbols, exportLabels, diagnostics);
2738
2850
  }
2739
2851
  function dartCodeAnalysis(manifest, rootNode, diagnostics) {
@@ -3518,7 +3630,23 @@ function csharpCodeAnalysis(manifest, rootNode, diagnostics) {
3518
3630
  if (child.type === "file_scoped_namespace_declaration" || child.type === "namespace_declaration") {
3519
3631
  namespaceName = nodeText(child.childForFieldName("name")) || namespaceName;
3520
3632
  if (child.type === "namespace_declaration") {
3521
- for (const nested of child.namedChildren) {
3633
+ const nameNode = child.childForFieldName("name");
3634
+ const namespaceMembers = [];
3635
+ for (const directChild of child.namedChildren) {
3636
+ if (!directChild || directChild === nameNode) {
3637
+ continue;
3638
+ }
3639
+ if (directChild.type === "declaration_list") {
3640
+ for (const inner of directChild.namedChildren) {
3641
+ if (inner) {
3642
+ namespaceMembers.push(inner);
3643
+ }
3644
+ }
3645
+ continue;
3646
+ }
3647
+ namespaceMembers.push(directChild);
3648
+ }
3649
+ for (const nested of namespaceMembers) {
3522
3650
  if (nested && nested !== child.childForFieldName("name")) {
3523
3651
  if (["class_declaration", "interface_declaration", "enum_declaration", "struct_declaration", "record_declaration"].includes(
3524
3652
  nested.type
@@ -3806,64 +3934,174 @@ function powershellCodeAnalysis(manifest, rootNode, diagnostics) {
3806
3934
  }
3807
3935
  return finalizeCodeAnalysis(manifest, "powershell", imports, draftSymbols, exportLabels, diagnostics);
3808
3936
  }
3809
- function cFamilyCodeAnalysis(manifest, language, rootNode, diagnostics) {
3937
+ function parseSwiftImport(node) {
3938
+ const identifierNode = findNamedChild(node, "identifier");
3939
+ if (!identifierNode) {
3940
+ return void 0;
3941
+ }
3942
+ const specifier = identifierNode.text.trim();
3943
+ if (!specifier) {
3944
+ return void 0;
3945
+ }
3946
+ return {
3947
+ specifier,
3948
+ importedSymbols: [],
3949
+ // Swift does not have file-local relative imports; every `import` references
3950
+ // an external module (Foundation, UIKit, a SwiftPM package product, or the
3951
+ // current target's own module). Mark them all as external so the dependency
3952
+ // aggregator groups them with other package-level graph edges.
3953
+ isExternal: true,
3954
+ reExport: false
3955
+ };
3956
+ }
3957
+ function swiftDeclarationKindFromKeyword(node) {
3958
+ for (const child of node.children) {
3959
+ if (!child) {
3960
+ continue;
3961
+ }
3962
+ if (child.type === "struct") {
3963
+ return "struct";
3964
+ }
3965
+ if (child.type === "enum") {
3966
+ return "enum";
3967
+ }
3968
+ if (child.type === "class") {
3969
+ return "class";
3970
+ }
3971
+ }
3972
+ return "class";
3973
+ }
3974
+ function swiftVisibilityKeyword(node) {
3975
+ const modifiers = findNamedChild(node, "modifiers");
3976
+ if (!modifiers) {
3977
+ return void 0;
3978
+ }
3979
+ const visibility = findNamedChild(modifiers, "visibility_modifier");
3980
+ if (!visibility) {
3981
+ return void 0;
3982
+ }
3983
+ for (const kw of visibility.children) {
3984
+ if (!kw) {
3985
+ continue;
3986
+ }
3987
+ if (kw.type === "public" || kw.type === "private" || kw.type === "fileprivate" || kw.type === "internal" || kw.type === "open") {
3988
+ return kw.type;
3989
+ }
3990
+ }
3991
+ return void 0;
3992
+ }
3993
+ function swiftExported(node) {
3994
+ const visibility = swiftVisibilityKeyword(node);
3995
+ return visibility !== "private" && visibility !== "fileprivate";
3996
+ }
3997
+ function swiftCodeAnalysis(manifest, rootNode, diagnostics) {
3810
3998
  const imports = [];
3811
3999
  const draftSymbols = [];
3812
4000
  const exportLabels = [];
3813
- const functionNameFromDeclarator = (node) => {
3814
- if (!node) {
3815
- return void 0;
4001
+ const recordParentTypes = (declaration) => {
4002
+ const specifiers = declaration.namedChildren.filter((item) => item?.type === "inheritance_specifier");
4003
+ if (specifiers.length === 0) {
4004
+ return [];
3816
4005
  }
3817
- const declarator = node.childForFieldName("declarator");
3818
- if (declarator) {
3819
- return functionNameFromDeclarator(declarator);
4006
+ const ordered = [];
4007
+ for (const specifier of specifiers) {
4008
+ const primary = findNamedChild(specifier, "user_type") ?? findNamedChild(specifier, "type_identifier") ?? specifier.namedChildren.find((item) => item !== null) ?? null;
4009
+ if (!primary) {
4010
+ continue;
4011
+ }
4012
+ const name = normalizeSymbolReference(primary.text);
4013
+ if (name) {
4014
+ ordered.push(name);
4015
+ }
3820
4016
  }
3821
- return extractIdentifier(node);
4017
+ return uniqueBy(ordered, (item) => item);
3822
4018
  };
3823
4019
  for (const child of rootNode.namedChildren) {
3824
4020
  if (!child) {
3825
4021
  continue;
3826
4022
  }
3827
- if (child.type === "preproc_include") {
3828
- const parsed = parseCppInclude(child);
4023
+ if (child.type === "import_declaration") {
4024
+ const parsed = parseSwiftImport(child);
3829
4025
  if (parsed) {
3830
4026
  imports.push(parsed);
3831
4027
  }
3832
4028
  continue;
3833
4029
  }
3834
- if (["class_specifier", "struct_specifier", "enum_specifier"].includes(child.type)) {
3835
- const name = extractIdentifier(child.childForFieldName("name"));
4030
+ if (child.type === "protocol_declaration") {
4031
+ const name = extractIdentifier(findNamedChild(child, "type_identifier"));
3836
4032
  if (!name) {
3837
4033
  continue;
3838
4034
  }
3839
- const kind = child.type === "enum_specifier" ? "enum" : child.type === "struct_specifier" ? "struct" : "class";
3840
- const baseClassClause = findNamedChild(child, "base_class_clause") ?? child.childForFieldName("base_class_clause");
3841
- const bases = baseClassClause ? uniqueBy(
3842
- baseClassClause.namedChildren.filter((item) => item !== null && item.type !== "access_specifier").map((item) => normalizeSymbolReference(item.text.replace(/\b(public|private|protected|virtual)\b/g, "").trim())).filter(Boolean),
3843
- (item) => item
3844
- ) : [];
3845
- const exported = !/\bstatic\b/.test(child.text);
4035
+ const parents = recordParentTypes(child);
4036
+ const exported = swiftExported(child);
4037
+ draftSymbols.push({
4038
+ name,
4039
+ kind: "interface",
4040
+ signature: singleLineSignature(child.text),
4041
+ exported,
4042
+ callNames: [],
4043
+ extendsNames: parents,
4044
+ implementsNames: [],
4045
+ bodyText: nodeText(findNamedChild(child, "protocol_body")) || child.text
4046
+ });
4047
+ if (exported) {
4048
+ exportLabels.push(name);
4049
+ }
4050
+ continue;
4051
+ }
4052
+ if (child.type === "class_declaration") {
4053
+ const name = extractIdentifier(findNamedChild(child, "type_identifier"));
4054
+ if (!name) {
4055
+ continue;
4056
+ }
4057
+ const kind = swiftDeclarationKindFromKeyword(child);
4058
+ const parentTypes = recordParentTypes(child);
4059
+ const extendsNames = kind === "class" && parentTypes.length > 0 ? [parentTypes[0]] : [];
4060
+ const implementsNames = kind === "class" ? parentTypes.slice(1) : parentTypes;
4061
+ const exported = swiftExported(child);
4062
+ const body = findNamedChild(child, "class_body") ?? findNamedChild(child, "enum_class_body");
3846
4063
  draftSymbols.push({
3847
4064
  name,
3848
4065
  kind,
3849
4066
  signature: singleLineSignature(child.text),
3850
4067
  exported,
3851
4068
  callNames: [],
3852
- extendsNames: bases,
4069
+ extendsNames,
4070
+ implementsNames,
4071
+ bodyText: nodeText(body) || child.text
4072
+ });
4073
+ if (exported) {
4074
+ exportLabels.push(name);
4075
+ }
4076
+ continue;
4077
+ }
4078
+ if (child.type === "typealias_declaration") {
4079
+ const name = extractIdentifier(findNamedChild(child, "type_identifier"));
4080
+ if (!name) {
4081
+ continue;
4082
+ }
4083
+ const exported = swiftExported(child);
4084
+ draftSymbols.push({
4085
+ name,
4086
+ kind: "type_alias",
4087
+ signature: singleLineSignature(child.text),
4088
+ exported,
4089
+ callNames: [],
4090
+ extendsNames: [],
3853
4091
  implementsNames: [],
3854
- bodyText: nodeText(child.childForFieldName("body")) || child.text
4092
+ bodyText: child.text
3855
4093
  });
3856
4094
  if (exported) {
3857
4095
  exportLabels.push(name);
3858
4096
  }
3859
4097
  continue;
3860
4098
  }
3861
- if (child.type === "function_definition") {
3862
- const name = functionNameFromDeclarator(child.childForFieldName("declarator"));
4099
+ if (child.type === "function_declaration") {
4100
+ const name = extractIdentifier(findNamedChild(child, "simple_identifier") ?? findNamedChild(child, "identifier"));
3863
4101
  if (!name) {
3864
4102
  continue;
3865
4103
  }
3866
- const exported = !/\bstatic\b/.test(child.text);
4104
+ const exported = swiftExported(child);
3867
4105
  draftSymbols.push({
3868
4106
  name,
3869
4107
  kind: "function",
@@ -3872,87 +4110,1163 @@ function cFamilyCodeAnalysis(manifest, language, rootNode, diagnostics) {
3872
4110
  callNames: [],
3873
4111
  extendsNames: [],
3874
4112
  implementsNames: [],
3875
- bodyText: nodeText(child.childForFieldName("body")) || child.text
4113
+ bodyText: nodeText(findNamedChild(child, "function_body")) || child.text
3876
4114
  });
3877
4115
  if (exported) {
3878
4116
  exportLabels.push(name);
3879
4117
  }
4118
+ continue;
4119
+ }
4120
+ if (child.type === "property_declaration") {
4121
+ const exported = swiftExported(child);
4122
+ const patterns = child.namedChildren.filter((item) => item?.type === "pattern");
4123
+ for (const pattern of patterns) {
4124
+ const name = extractIdentifier(findNamedChild(pattern, "simple_identifier") ?? pattern.namedChildren[0] ?? null);
4125
+ if (!name) {
4126
+ continue;
4127
+ }
4128
+ draftSymbols.push({
4129
+ name,
4130
+ kind: "variable",
4131
+ signature: singleLineSignature(child.text),
4132
+ exported,
4133
+ callNames: [],
4134
+ extendsNames: [],
4135
+ implementsNames: [],
4136
+ bodyText: child.text
4137
+ });
4138
+ if (exported) {
4139
+ exportLabels.push(name);
4140
+ }
4141
+ }
3880
4142
  }
3881
4143
  }
3882
- return finalizeCodeAnalysis(manifest, language, imports, draftSymbols, exportLabels, diagnostics);
4144
+ return finalizeCodeAnalysis(manifest, "swift", imports, draftSymbols, exportLabels, diagnostics);
3883
4145
  }
3884
- async function analyzeTreeSitterCode(manifest, content, language) {
3885
- let tree = null;
3886
- try {
3887
- const module = await getTreeSitterModule();
3888
- await ensureTreeSitterInit(module);
3889
- const parser = new module.Parser();
3890
- parser.setLanguage(await loadLanguage(language));
3891
- tree = parser.parse(content);
3892
- } catch (error) {
3893
- return {
3894
- code: finalizeCodeAnalysis(manifest, language, [], [], [], [treeSitterCompatibilityDiagnostic(language, error)]),
3895
- rationales: []
3896
- };
4146
+ function elixirCallIdentifier(callNode) {
4147
+ return findNamedChild(callNode, "identifier")?.text.trim() || void 0;
4148
+ }
4149
+ function elixirFirstModulePath(argumentsNode) {
4150
+ if (!argumentsNode) {
4151
+ return void 0;
3897
4152
  }
3898
- if (!tree) {
3899
- return {
3900
- code: finalizeCodeAnalysis(
3901
- manifest,
3902
- language,
3903
- [],
3904
- [],
3905
- [],
3906
- [
3907
- {
3908
- code: 9e3,
3909
- category: "error",
3910
- message: `Failed to parse ${language} source.`,
3911
- line: 1,
3912
- column: 1
3913
- }
3914
- ]
3915
- ),
3916
- rationales: []
3917
- };
4153
+ for (const child of argumentsNode.namedChildren) {
4154
+ if (!child) {
4155
+ continue;
4156
+ }
4157
+ if (child.type === "alias" || child.type === "identifier") {
4158
+ const text = child.text.trim();
4159
+ if (text) {
4160
+ return text;
4161
+ }
4162
+ }
3918
4163
  }
3919
- try {
3920
- const diagnostics = language === "lua" ? [] : diagnosticsFromTree(tree.rootNode);
3921
- const rationales = extractTreeSitterRationales(manifest, language, tree.rootNode);
3922
- switch (language) {
3923
- case "bash":
3924
- return { code: bashCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3925
- case "python":
3926
- return { code: pythonCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3927
- case "go":
3928
- return { code: goCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3929
- case "rust":
3930
- return { code: rustCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3931
- case "java":
3932
- return { code: javaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3933
- case "kotlin":
3934
- return { code: kotlinCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3935
- case "scala":
3936
- return { code: scalaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3937
- case "dart":
3938
- return { code: dartCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3939
- case "lua":
3940
- return { code: luaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3941
- case "zig":
3942
- return { code: zigCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3943
- case "csharp":
3944
- return { code: csharpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3945
- case "php":
3946
- return { code: phpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3947
- case "ruby":
3948
- return { code: rubyCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3949
- case "powershell":
3950
- return { code: powershellCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3951
- case "c":
3952
- case "cpp":
3953
- return { code: cFamilyCodeAnalysis(manifest, language, tree.rootNode, diagnostics), rationales };
3954
- default:
3955
- return {
4164
+ return void 0;
4165
+ }
4166
+ function elixirFunctionNameFromArguments(argumentsNode) {
4167
+ if (!argumentsNode) {
4168
+ return void 0;
4169
+ }
4170
+ const first = argumentsNode.namedChildren.find((item) => item !== null);
4171
+ if (!first) {
4172
+ return void 0;
4173
+ }
4174
+ if (first.type === "call") {
4175
+ const inner = findNamedChild(first, "identifier");
4176
+ return inner?.text.trim() || void 0;
4177
+ }
4178
+ if (first.type === "identifier") {
4179
+ return first.text.trim() || void 0;
4180
+ }
4181
+ return void 0;
4182
+ }
4183
+ var ELIXIR_IMPORT_MACROS = /* @__PURE__ */ new Set(["alias", "import", "require", "use"]);
4184
+ var ELIXIR_PUBLIC_DEF_MACROS = /* @__PURE__ */ new Set(["def", "defmacro"]);
4185
+ var ELIXIR_PRIVATE_DEF_MACROS = /* @__PURE__ */ new Set(["defp", "defmacrop"]);
4186
+ function elixirCodeAnalysis(manifest, rootNode, diagnostics) {
4187
+ const imports = [];
4188
+ const draftSymbols = [];
4189
+ const exportLabels = [];
4190
+ let primaryModuleName;
4191
+ for (const topCall of rootNode.namedChildren) {
4192
+ if (!topCall || topCall.type !== "call") {
4193
+ continue;
4194
+ }
4195
+ const macroName = elixirCallIdentifier(topCall);
4196
+ if (macroName !== "defmodule" && macroName !== "defprotocol") {
4197
+ continue;
4198
+ }
4199
+ const moduleArgs = findNamedChild(topCall, "arguments");
4200
+ const moduleName = elixirFirstModulePath(moduleArgs);
4201
+ if (!moduleName) {
4202
+ continue;
4203
+ }
4204
+ const moduleKind = macroName === "defprotocol" ? "interface" : "class";
4205
+ const moduleHeaderLine = topCall.text.split("\n")[0] ?? topCall.text;
4206
+ if (primaryModuleName === void 0) {
4207
+ primaryModuleName = moduleName;
4208
+ }
4209
+ draftSymbols.push({
4210
+ name: moduleName,
4211
+ kind: moduleKind,
4212
+ signature: singleLineSignature(moduleHeaderLine),
4213
+ // Modules and protocols are always module-level public in Elixir.
4214
+ exported: true,
4215
+ callNames: [],
4216
+ extendsNames: [],
4217
+ implementsNames: [],
4218
+ bodyText: topCall.text
4219
+ });
4220
+ exportLabels.push(moduleName);
4221
+ const doBlock = findNamedChild(topCall, "do_block");
4222
+ if (!doBlock) {
4223
+ continue;
4224
+ }
4225
+ for (const innerNode of doBlock.namedChildren) {
4226
+ if (!innerNode || innerNode.type !== "call") {
4227
+ continue;
4228
+ }
4229
+ const innerMacro = elixirCallIdentifier(innerNode);
4230
+ if (!innerMacro) {
4231
+ continue;
4232
+ }
4233
+ if (ELIXIR_IMPORT_MACROS.has(innerMacro)) {
4234
+ const importArgs = findNamedChild(innerNode, "arguments");
4235
+ const modulePath = elixirFirstModulePath(importArgs);
4236
+ if (!modulePath) {
4237
+ continue;
4238
+ }
4239
+ imports.push({
4240
+ specifier: modulePath,
4241
+ importedSymbols: [],
4242
+ // Elixir imports always target a compiled BEAM module; there is no
4243
+ // notion of "file-local" relative imports the way Python or JS use them.
4244
+ // Treat every entry as external.
4245
+ isExternal: true,
4246
+ reExport: false
4247
+ });
4248
+ continue;
4249
+ }
4250
+ if (ELIXIR_PUBLIC_DEF_MACROS.has(innerMacro) || ELIXIR_PRIVATE_DEF_MACROS.has(innerMacro)) {
4251
+ const innerArgs = findNamedChild(innerNode, "arguments");
4252
+ const fnName = elixirFunctionNameFromArguments(innerArgs);
4253
+ if (!fnName) {
4254
+ continue;
4255
+ }
4256
+ const qualifiedName = `${moduleName}.${fnName}`;
4257
+ const exported = ELIXIR_PUBLIC_DEF_MACROS.has(innerMacro);
4258
+ const headerLine = innerNode.text.split("\n")[0] ?? innerNode.text;
4259
+ draftSymbols.push({
4260
+ name: qualifiedName,
4261
+ kind: "function",
4262
+ signature: singleLineSignature(headerLine),
4263
+ exported,
4264
+ callNames: [],
4265
+ extendsNames: [],
4266
+ implementsNames: [],
4267
+ bodyText: nodeText(findNamedChild(innerNode, "do_block")) || innerNode.text
4268
+ });
4269
+ if (exported) {
4270
+ exportLabels.push(qualifiedName);
4271
+ }
4272
+ }
4273
+ }
4274
+ }
4275
+ return finalizeCodeAnalysis(manifest, "elixir", imports, draftSymbols, exportLabels, diagnostics, {
4276
+ moduleName: primaryModuleName
4277
+ });
4278
+ }
4279
+ function parseOCamlOpen(node) {
4280
+ const modulePath = findNamedChild(node, "module_path");
4281
+ if (!modulePath) {
4282
+ return void 0;
4283
+ }
4284
+ const specifier = modulePath.text.trim();
4285
+ if (!specifier) {
4286
+ return void 0;
4287
+ }
4288
+ return {
4289
+ specifier,
4290
+ importedSymbols: [],
4291
+ // Every OCaml `open` references a compiled module; there is no file-local
4292
+ // "./sibling" form. Classify as external and let resolveCodeImport's single-
4293
+ // candidate short-circuit promote it to local when an alias matches.
4294
+ isExternal: true,
4295
+ reExport: false
4296
+ };
4297
+ }
4298
+ function ocamlValueBindingKind(letBinding) {
4299
+ if (!letBinding) {
4300
+ return void 0;
4301
+ }
4302
+ const hasParameter = letBinding.namedChildren.some((child) => child?.type === "parameter");
4303
+ return hasParameter ? "function" : "variable";
4304
+ }
4305
+ function ocamlTypeKind(typeBinding) {
4306
+ if (!typeBinding) {
4307
+ return "type_alias";
4308
+ }
4309
+ for (const child of typeBinding.namedChildren) {
4310
+ if (!child) {
4311
+ continue;
4312
+ }
4313
+ if (child.type === "record_declaration") {
4314
+ return "struct";
4315
+ }
4316
+ if (child.type === "variant_declaration") {
4317
+ return "enum";
4318
+ }
4319
+ }
4320
+ return "type_alias";
4321
+ }
4322
+ function ocamlCodeAnalysis(manifest, rootNode, diagnostics) {
4323
+ const imports = [];
4324
+ const draftSymbols = [];
4325
+ const exportLabels = [];
4326
+ for (const child of rootNode.namedChildren) {
4327
+ if (!child) {
4328
+ continue;
4329
+ }
4330
+ if (child.type === "open_module") {
4331
+ const parsed = parseOCamlOpen(child);
4332
+ if (parsed) {
4333
+ imports.push(parsed);
4334
+ }
4335
+ continue;
4336
+ }
4337
+ if (child.type === "module_definition") {
4338
+ const binding = findNamedChild(child, "module_binding");
4339
+ const moduleNameNode = binding ? findNamedChild(binding, "module_name") : null;
4340
+ const name = moduleNameNode?.text.trim();
4341
+ if (!name) {
4342
+ continue;
4343
+ }
4344
+ draftSymbols.push({
4345
+ name,
4346
+ kind: "class",
4347
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4348
+ // OCaml's `let`/`module` bindings are exported from the containing
4349
+ // compilation unit unless an explicit `.mli` interface hides them.
4350
+ // Treat everything defined in a `.ml` file as exported; consumers who
4351
+ // want hiding should rely on the downstream interface-file merge.
4352
+ exported: true,
4353
+ callNames: [],
4354
+ extendsNames: [],
4355
+ implementsNames: [],
4356
+ bodyText: nodeText(findNamedChild(binding, "structure")) || child.text
4357
+ });
4358
+ exportLabels.push(name);
4359
+ continue;
4360
+ }
4361
+ if (child.type === "module_type_definition") {
4362
+ const nameNode = findNamedChild(child, "module_type_name");
4363
+ const name = nameNode?.text.trim();
4364
+ if (!name) {
4365
+ continue;
4366
+ }
4367
+ draftSymbols.push({
4368
+ name,
4369
+ kind: "interface",
4370
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4371
+ exported: true,
4372
+ callNames: [],
4373
+ extendsNames: [],
4374
+ implementsNames: [],
4375
+ bodyText: nodeText(findNamedChild(child, "signature")) || child.text
4376
+ });
4377
+ exportLabels.push(name);
4378
+ continue;
4379
+ }
4380
+ if (child.type === "type_definition") {
4381
+ const binding = findNamedChild(child, "type_binding");
4382
+ const typeConstructorNode = binding ? findNamedChild(binding, "type_constructor") : null;
4383
+ const name = typeConstructorNode?.text.trim();
4384
+ if (!name) {
4385
+ continue;
4386
+ }
4387
+ const kind = ocamlTypeKind(binding);
4388
+ draftSymbols.push({
4389
+ name,
4390
+ kind,
4391
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4392
+ exported: true,
4393
+ callNames: [],
4394
+ extendsNames: [],
4395
+ implementsNames: [],
4396
+ bodyText: child.text
4397
+ });
4398
+ exportLabels.push(name);
4399
+ continue;
4400
+ }
4401
+ if (child.type === "value_definition") {
4402
+ const binding = findNamedChild(child, "let_binding");
4403
+ if (!binding) {
4404
+ continue;
4405
+ }
4406
+ const valueNameNode = findNamedChild(binding, "value_name");
4407
+ const name = valueNameNode?.text.trim();
4408
+ if (!name) {
4409
+ continue;
4410
+ }
4411
+ const kind = ocamlValueBindingKind(binding) ?? "function";
4412
+ draftSymbols.push({
4413
+ name,
4414
+ kind,
4415
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4416
+ exported: true,
4417
+ callNames: [],
4418
+ extendsNames: [],
4419
+ implementsNames: [],
4420
+ bodyText: child.text
4421
+ });
4422
+ exportLabels.push(name);
4423
+ }
4424
+ }
4425
+ return finalizeCodeAnalysis(manifest, "ocaml", imports, draftSymbols, exportLabels, diagnostics);
4426
+ }
4427
+ function objcCodeAnalysis(manifest, rootNode, diagnostics) {
4428
+ const imports = [];
4429
+ const draftSymbols = [];
4430
+ const exportLabels = [];
4431
+ const declaredClassNames = /* @__PURE__ */ new Set();
4432
+ const functionNameFromDeclarator = (node) => {
4433
+ if (!node) {
4434
+ return void 0;
4435
+ }
4436
+ const declarator = node.childForFieldName("declarator");
4437
+ if (declarator) {
4438
+ return functionNameFromDeclarator(declarator);
4439
+ }
4440
+ return extractIdentifier(node);
4441
+ };
4442
+ for (const child of rootNode.namedChildren) {
4443
+ if (!child) {
4444
+ continue;
4445
+ }
4446
+ if (child.type === "preproc_include") {
4447
+ const parsed = parseCppInclude(child);
4448
+ if (parsed) {
4449
+ imports.push(parsed);
4450
+ }
4451
+ continue;
4452
+ }
4453
+ if (child.type === "protocol_declaration") {
4454
+ const nameNode = findNamedChild(child, "identifier");
4455
+ const name = nameNode?.text.trim();
4456
+ if (!name) {
4457
+ continue;
4458
+ }
4459
+ const refList = findNamedChild(child, "protocol_reference_list");
4460
+ const parents = refList ? uniqueBy(
4461
+ refList.namedChildren.filter((item) => item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
4462
+ (item) => item
4463
+ ) : [];
4464
+ draftSymbols.push({
4465
+ name,
4466
+ kind: "interface",
4467
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4468
+ exported: true,
4469
+ callNames: [],
4470
+ extendsNames: parents,
4471
+ implementsNames: [],
4472
+ bodyText: child.text
4473
+ });
4474
+ exportLabels.push(name);
4475
+ continue;
4476
+ }
4477
+ if (child.type === "class_interface") {
4478
+ const identifierChildren = child.namedChildren.filter((item) => item?.type === "identifier");
4479
+ const name = identifierChildren[0]?.text.trim();
4480
+ if (!name) {
4481
+ continue;
4482
+ }
4483
+ const superclass = identifierChildren[1]?.text.trim();
4484
+ const parameterized = findNamedChild(child, "parameterized_arguments");
4485
+ const protocols = parameterized ? uniqueBy(
4486
+ parameterized.namedChildren.filter((item) => item?.type === "type_name" || item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
4487
+ (item) => item
4488
+ ) : [];
4489
+ declaredClassNames.add(name);
4490
+ draftSymbols.push({
4491
+ name,
4492
+ kind: "class",
4493
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4494
+ exported: true,
4495
+ callNames: [],
4496
+ extendsNames: superclass ? [superclass] : [],
4497
+ implementsNames: protocols,
4498
+ bodyText: child.text
4499
+ });
4500
+ exportLabels.push(name);
4501
+ continue;
4502
+ }
4503
+ if (child.type === "class_implementation") {
4504
+ const nameNode = findNamedChild(child, "identifier");
4505
+ const name = nameNode?.text.trim();
4506
+ if (!name) {
4507
+ continue;
4508
+ }
4509
+ if (declaredClassNames.has(name)) {
4510
+ continue;
4511
+ }
4512
+ declaredClassNames.add(name);
4513
+ draftSymbols.push({
4514
+ name,
4515
+ kind: "class",
4516
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4517
+ exported: true,
4518
+ callNames: [],
4519
+ extendsNames: [],
4520
+ implementsNames: [],
4521
+ bodyText: child.text
4522
+ });
4523
+ exportLabels.push(name);
4524
+ continue;
4525
+ }
4526
+ if (child.type === "function_definition") {
4527
+ const name = functionNameFromDeclarator(child.childForFieldName("declarator"));
4528
+ if (!name) {
4529
+ continue;
4530
+ }
4531
+ draftSymbols.push({
4532
+ name,
4533
+ kind: "function",
4534
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4535
+ exported: true,
4536
+ callNames: [],
4537
+ extendsNames: [],
4538
+ implementsNames: [],
4539
+ bodyText: nodeText(child.childForFieldName("body")) || child.text
4540
+ });
4541
+ exportLabels.push(name);
4542
+ }
4543
+ }
4544
+ return finalizeCodeAnalysis(manifest, "objc", imports, draftSymbols, exportLabels, diagnostics);
4545
+ }
4546
+ function rescriptCodeAnalysis(manifest, rootNode, diagnostics) {
4547
+ const imports = [];
4548
+ const draftSymbols = [];
4549
+ const exportLabels = [];
4550
+ const rescriptTypeKind = (typeBinding) => {
4551
+ if (!typeBinding) {
4552
+ return "type_alias";
4553
+ }
4554
+ for (const child of typeBinding.namedChildren) {
4555
+ if (!child) {
4556
+ continue;
4557
+ }
4558
+ if (child.type === "variant_type") {
4559
+ return "enum";
4560
+ }
4561
+ if (child.type === "record_type") {
4562
+ return "struct";
4563
+ }
4564
+ }
4565
+ return "type_alias";
4566
+ };
4567
+ const rescriptLetBindingKind = (letBinding) => {
4568
+ if (!letBinding) {
4569
+ return "variable";
4570
+ }
4571
+ for (const child of letBinding.namedChildren) {
4572
+ if (child?.type === "function") {
4573
+ return "function";
4574
+ }
4575
+ }
4576
+ return "variable";
4577
+ };
4578
+ for (const child of rootNode.namedChildren) {
4579
+ if (!child) {
4580
+ continue;
4581
+ }
4582
+ if (child.type === "open_statement") {
4583
+ const identNode = findNamedChild(child, "module_identifier");
4584
+ const specifier = identNode?.text.trim();
4585
+ if (!specifier) {
4586
+ continue;
4587
+ }
4588
+ imports.push({
4589
+ specifier,
4590
+ importedSymbols: [],
4591
+ // ReScript modules resolve through the build system's own module graph;
4592
+ // they are never file-local in the Python "./relative" sense.
4593
+ isExternal: true,
4594
+ reExport: false
4595
+ });
4596
+ continue;
4597
+ }
4598
+ if (child.type === "module_declaration") {
4599
+ const binding = findNamedChild(child, "module_binding");
4600
+ const nameNode = binding ? findNamedChild(binding, "module_identifier") : null;
4601
+ const name = nameNode?.text.trim();
4602
+ if (!name) {
4603
+ continue;
4604
+ }
4605
+ draftSymbols.push({
4606
+ name,
4607
+ kind: "class",
4608
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4609
+ exported: true,
4610
+ callNames: [],
4611
+ extendsNames: [],
4612
+ implementsNames: [],
4613
+ bodyText: nodeText(findNamedChild(binding, "block")) || child.text
4614
+ });
4615
+ exportLabels.push(name);
4616
+ continue;
4617
+ }
4618
+ if (child.type === "type_declaration") {
4619
+ const binding = findNamedChild(child, "type_binding");
4620
+ const nameNode = binding ? findNamedChild(binding, "type_identifier") : null;
4621
+ const name = nameNode?.text.trim();
4622
+ if (!name) {
4623
+ continue;
4624
+ }
4625
+ const kind = rescriptTypeKind(binding);
4626
+ draftSymbols.push({
4627
+ name,
4628
+ kind,
4629
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4630
+ exported: true,
4631
+ callNames: [],
4632
+ extendsNames: [],
4633
+ implementsNames: [],
4634
+ bodyText: child.text
4635
+ });
4636
+ exportLabels.push(name);
4637
+ continue;
4638
+ }
4639
+ if (child.type === "let_declaration") {
4640
+ const binding = findNamedChild(child, "let_binding");
4641
+ const nameNode = binding ? findNamedChild(binding, "value_identifier") : null;
4642
+ const name = nameNode?.text.trim();
4643
+ if (!name) {
4644
+ continue;
4645
+ }
4646
+ const kind = rescriptLetBindingKind(binding);
4647
+ draftSymbols.push({
4648
+ name,
4649
+ kind,
4650
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4651
+ exported: true,
4652
+ callNames: [],
4653
+ extendsNames: [],
4654
+ implementsNames: [],
4655
+ bodyText: child.text
4656
+ });
4657
+ exportLabels.push(name);
4658
+ }
4659
+ }
4660
+ return finalizeCodeAnalysis(manifest, "rescript", imports, draftSymbols, exportLabels, diagnostics);
4661
+ }
4662
+ function parseSolidityImport(node) {
4663
+ const stringNode = node.namedChildren.find((item) => item?.type === "string");
4664
+ if (!stringNode) {
4665
+ return [];
4666
+ }
4667
+ const specifier = quotedPath(stringNode.text);
4668
+ if (!specifier) {
4669
+ return [];
4670
+ }
4671
+ const importedSymbols = uniqueBy(
4672
+ node.namedChildren.filter((item) => item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
4673
+ (item) => item
4674
+ );
4675
+ const isLocal = specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/");
4676
+ return [
4677
+ {
4678
+ specifier,
4679
+ importedSymbols,
4680
+ isExternal: !isLocal,
4681
+ reExport: false
4682
+ }
4683
+ ];
4684
+ }
4685
+ function solidityCodeAnalysis(manifest, rootNode, diagnostics) {
4686
+ const imports = [];
4687
+ const draftSymbols = [];
4688
+ const exportLabels = [];
4689
+ const collectParents = (declaration) => {
4690
+ const specifiers = declaration.namedChildren.filter((item) => item?.type === "inheritance_specifier");
4691
+ const names = [];
4692
+ for (const specifier of specifiers) {
4693
+ for (const node of specifier.namedChildren) {
4694
+ if (node && (node.type === "user_defined_type" || node.type === "identifier")) {
4695
+ const text = normalizeSymbolReference(node.text);
4696
+ if (text) {
4697
+ names.push(text);
4698
+ }
4699
+ }
4700
+ }
4701
+ }
4702
+ return uniqueBy(names, (item) => item);
4703
+ };
4704
+ for (const child of rootNode.namedChildren) {
4705
+ if (!child) {
4706
+ continue;
4707
+ }
4708
+ if (child.type === "import_directive") {
4709
+ for (const parsed of parseSolidityImport(child)) {
4710
+ imports.push(parsed);
4711
+ }
4712
+ continue;
4713
+ }
4714
+ if (child.type === "interface_declaration") {
4715
+ const nameNode = findNamedChild(child, "identifier");
4716
+ const name = nameNode?.text.trim();
4717
+ if (!name) {
4718
+ continue;
4719
+ }
4720
+ const parents = collectParents(child);
4721
+ draftSymbols.push({
4722
+ name,
4723
+ kind: "interface",
4724
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4725
+ exported: true,
4726
+ callNames: [],
4727
+ extendsNames: parents,
4728
+ implementsNames: [],
4729
+ bodyText: nodeText(findNamedChild(child, "contract_body")) || child.text
4730
+ });
4731
+ exportLabels.push(name);
4732
+ continue;
4733
+ }
4734
+ if (child.type === "library_declaration" || child.type === "contract_declaration") {
4735
+ const nameNode = findNamedChild(child, "identifier");
4736
+ const name = nameNode?.text.trim();
4737
+ if (!name) {
4738
+ continue;
4739
+ }
4740
+ const parents = child.type === "contract_declaration" ? collectParents(child) : [];
4741
+ draftSymbols.push({
4742
+ name,
4743
+ kind: "class",
4744
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4745
+ exported: true,
4746
+ callNames: [],
4747
+ extendsNames: [],
4748
+ // Solidity supports multiple inheritance; list every parent contract
4749
+ // as a `implements` edge rather than arbitrarily promoting one to
4750
+ // `extends`.
4751
+ implementsNames: parents,
4752
+ bodyText: nodeText(findNamedChild(child, "contract_body")) || child.text
4753
+ });
4754
+ exportLabels.push(name);
4755
+ continue;
4756
+ }
4757
+ if (child.type === "struct_declaration") {
4758
+ const nameNode = findNamedChild(child, "identifier");
4759
+ const name = nameNode?.text.trim();
4760
+ if (!name) {
4761
+ continue;
4762
+ }
4763
+ draftSymbols.push({
4764
+ name,
4765
+ kind: "struct",
4766
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4767
+ exported: true,
4768
+ callNames: [],
4769
+ extendsNames: [],
4770
+ implementsNames: [],
4771
+ bodyText: child.text
4772
+ });
4773
+ exportLabels.push(name);
4774
+ continue;
4775
+ }
4776
+ if (child.type === "enum_declaration") {
4777
+ const nameNode = findNamedChild(child, "identifier");
4778
+ const name = nameNode?.text.trim();
4779
+ if (!name) {
4780
+ continue;
4781
+ }
4782
+ draftSymbols.push({
4783
+ name,
4784
+ kind: "enum",
4785
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4786
+ exported: true,
4787
+ callNames: [],
4788
+ extendsNames: [],
4789
+ implementsNames: [],
4790
+ bodyText: child.text
4791
+ });
4792
+ exportLabels.push(name);
4793
+ continue;
4794
+ }
4795
+ if (child.type === "function_definition") {
4796
+ const nameNode = findNamedChild(child, "identifier");
4797
+ const name = nameNode?.text.trim();
4798
+ if (!name) {
4799
+ continue;
4800
+ }
4801
+ draftSymbols.push({
4802
+ name,
4803
+ kind: "function",
4804
+ signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
4805
+ exported: true,
4806
+ callNames: [],
4807
+ extendsNames: [],
4808
+ implementsNames: [],
4809
+ bodyText: nodeText(findNamedChild(child, "function_body")) || child.text
4810
+ });
4811
+ exportLabels.push(name);
4812
+ }
4813
+ }
4814
+ return finalizeCodeAnalysis(manifest, "solidity", imports, draftSymbols, exportLabels, diagnostics);
4815
+ }
4816
+ function htmlAttributeValue(attribute) {
4817
+ const quoted = attribute.namedChildren.find((c) => c?.type === "quoted_attribute_value");
4818
+ if (quoted) {
4819
+ const inner = quoted.namedChildren.find((c) => c?.type === "attribute_value");
4820
+ if (inner) {
4821
+ return inner.text.trim();
4822
+ }
4823
+ const raw = quoted.text;
4824
+ if (raw.length >= 2 && (raw[0] === '"' || raw[0] === "'")) {
4825
+ return raw.slice(1, -1).trim();
4826
+ }
4827
+ return raw.trim();
4828
+ }
4829
+ const bare = attribute.namedChildren.find((c) => c?.type === "attribute_value");
4830
+ return bare?.text.trim();
4831
+ }
4832
+ function htmlAttributesOf(element) {
4833
+ const out = /* @__PURE__ */ new Map();
4834
+ const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag");
4835
+ if (!startTag) {
4836
+ return out;
4837
+ }
4838
+ for (const child of startTag.namedChildren) {
4839
+ if (!child || child.type !== "attribute") {
4840
+ continue;
4841
+ }
4842
+ const nameNode = findNamedChild(child, "attribute_name");
4843
+ const name = nameNode?.text.trim().toLowerCase();
4844
+ if (!name) {
4845
+ continue;
4846
+ }
4847
+ const value = htmlAttributeValue(child);
4848
+ if (value !== void 0) {
4849
+ out.set(name, value);
4850
+ }
4851
+ }
4852
+ return out;
4853
+ }
4854
+ function htmlTagName(element) {
4855
+ const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag") ?? null;
4856
+ if (!startTag) {
4857
+ return void 0;
4858
+ }
4859
+ return findNamedChild(startTag, "tag_name")?.text.trim().toLowerCase();
4860
+ }
4861
+ function htmlCodeAnalysis(manifest, rootNode, diagnostics) {
4862
+ const imports = [];
4863
+ const draftSymbols = [];
4864
+ const exportLabels = [];
4865
+ const seenSymbolNames = /* @__PURE__ */ new Set();
4866
+ const isLocalAssetSpecifier = (specifier) => {
4867
+ if (!specifier) {
4868
+ return false;
4869
+ }
4870
+ if (specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")) {
4871
+ return true;
4872
+ }
4873
+ if (specifier.startsWith("http://") || specifier.startsWith("https://") || specifier.startsWith("//")) {
4874
+ return false;
4875
+ }
4876
+ return !specifier.includes(":");
4877
+ };
4878
+ const elements = rootNode.descendantsOfType(["element", "script_element", "style_element"]).filter((item) => item !== null);
4879
+ for (const element of elements) {
4880
+ const attrs = htmlAttributesOf(element);
4881
+ const tagName = htmlTagName(element);
4882
+ if (tagName === "link") {
4883
+ const rel = attrs.get("rel");
4884
+ const href = attrs.get("href");
4885
+ if (rel === "stylesheet" && href) {
4886
+ imports.push({
4887
+ specifier: href,
4888
+ importedSymbols: [],
4889
+ isExternal: !isLocalAssetSpecifier(href),
4890
+ reExport: false
4891
+ });
4892
+ }
4893
+ continue;
4894
+ }
4895
+ if (element.type === "script_element") {
4896
+ const src = attrs.get("src");
4897
+ if (src) {
4898
+ imports.push({
4899
+ specifier: src,
4900
+ importedSymbols: [],
4901
+ isExternal: !isLocalAssetSpecifier(src),
4902
+ reExport: false
4903
+ });
4904
+ }
4905
+ continue;
4906
+ }
4907
+ if (tagName && tagName.includes("-")) {
4908
+ if (!seenSymbolNames.has(tagName)) {
4909
+ seenSymbolNames.add(tagName);
4910
+ draftSymbols.push({
4911
+ name: tagName,
4912
+ kind: "class",
4913
+ signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
4914
+ exported: true,
4915
+ callNames: [],
4916
+ extendsNames: [],
4917
+ implementsNames: [],
4918
+ bodyText: element.text
4919
+ });
4920
+ exportLabels.push(tagName);
4921
+ }
4922
+ }
4923
+ const id = attrs.get("id");
4924
+ if (id && !seenSymbolNames.has(id)) {
4925
+ seenSymbolNames.add(id);
4926
+ draftSymbols.push({
4927
+ name: id,
4928
+ kind: "variable",
4929
+ signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
4930
+ exported: true,
4931
+ callNames: [],
4932
+ extendsNames: [],
4933
+ implementsNames: [],
4934
+ bodyText: element.text
4935
+ });
4936
+ exportLabels.push(id);
4937
+ }
4938
+ }
4939
+ return finalizeCodeAnalysis(manifest, "html", imports, draftSymbols, exportLabels, diagnostics);
4940
+ }
4941
+ function parseCssImport(node) {
4942
+ const directString = node.namedChildren.find((c) => c?.type === "string_value");
4943
+ if (directString) {
4944
+ const specifier = quotedPath(directString.text);
4945
+ if (!specifier) {
4946
+ return void 0;
4947
+ }
4948
+ return {
4949
+ specifier,
4950
+ importedSymbols: [],
4951
+ isExternal: !(specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")),
4952
+ reExport: false
4953
+ };
4954
+ }
4955
+ const call = node.namedChildren.find((c) => c?.type === "call_expression");
4956
+ if (call) {
4957
+ const args = findNamedChild(call, "arguments");
4958
+ const stringNode = args?.namedChildren.find((c) => c?.type === "string_value");
4959
+ if (stringNode) {
4960
+ const specifier = quotedPath(stringNode.text);
4961
+ if (!specifier) {
4962
+ return void 0;
4963
+ }
4964
+ return {
4965
+ specifier,
4966
+ importedSymbols: [],
4967
+ isExternal: !(specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")),
4968
+ reExport: false
4969
+ };
4970
+ }
4971
+ }
4972
+ return void 0;
4973
+ }
4974
+ function cssCodeAnalysis(manifest, rootNode, diagnostics) {
4975
+ const imports = [];
4976
+ const draftSymbols = [];
4977
+ const exportLabels = [];
4978
+ const seenSymbols = /* @__PURE__ */ new Set();
4979
+ const addSelectorSymbol = (name, ruleText) => {
4980
+ const trimmed = name.trim();
4981
+ if (!trimmed || seenSymbols.has(trimmed)) {
4982
+ return;
4983
+ }
4984
+ seenSymbols.add(trimmed);
4985
+ draftSymbols.push({
4986
+ name: trimmed,
4987
+ kind: "class",
4988
+ signature: singleLineSignature(ruleText.split("\n")[0] ?? ruleText),
4989
+ exported: true,
4990
+ callNames: [],
4991
+ extendsNames: [],
4992
+ implementsNames: [],
4993
+ bodyText: ruleText
4994
+ });
4995
+ exportLabels.push(trimmed);
4996
+ };
4997
+ for (const child of rootNode.namedChildren) {
4998
+ if (!child) {
4999
+ continue;
5000
+ }
5001
+ if (child.type === "import_statement") {
5002
+ const parsed = parseCssImport(child);
5003
+ if (parsed) {
5004
+ imports.push(parsed);
5005
+ }
5006
+ continue;
5007
+ }
5008
+ if (child.type === "rule_set") {
5009
+ const selectors = findNamedChild(child, "selectors");
5010
+ if (!selectors) {
5011
+ continue;
5012
+ }
5013
+ const selectorText = normalizeWhitespace(selectors.text);
5014
+ addSelectorSymbol(selectorText, child.text);
5015
+ continue;
5016
+ }
5017
+ if (child.type === "keyframes_statement") {
5018
+ const nameNode = child.namedChildren.find((c) => c?.type === "keyframes_name" || c?.type === "plain_value");
5019
+ const name = nameNode?.text.trim();
5020
+ if (name) {
5021
+ addSelectorSymbol(`@keyframes ${name}`, child.text);
5022
+ }
5023
+ }
5024
+ }
5025
+ return finalizeCodeAnalysis(manifest, "css", imports, draftSymbols, exportLabels, diagnostics);
5026
+ }
5027
+ function vueCodeAnalysis(manifest, rootNode, diagnostics) {
5028
+ const imports = [];
5029
+ const draftSymbols = [];
5030
+ const exportLabels = [];
5031
+ const seenSymbols = /* @__PURE__ */ new Set();
5032
+ const repoPath = manifest.repoRelativePath ?? path5.basename(manifest.originalPath ?? manifest.storedPath);
5033
+ const basename = path5.posix.basename(stripCodeExtension(toPosix(repoPath)));
5034
+ if (basename) {
5035
+ seenSymbols.add(basename);
5036
+ draftSymbols.push({
5037
+ name: basename,
5038
+ kind: "class",
5039
+ signature: `vue component ${basename}`,
5040
+ exported: true,
5041
+ callNames: [],
5042
+ extendsNames: [],
5043
+ implementsNames: [],
5044
+ bodyText: rootNode.text
5045
+ });
5046
+ exportLabels.push(basename);
5047
+ }
5048
+ const templateElement = rootNode.namedChildren.find((c) => c?.type === "template_element");
5049
+ if (templateElement) {
5050
+ const elements = templateElement.descendantsOfType(["element"]).filter((item) => item !== null);
5051
+ for (const element of elements) {
5052
+ const tagName = htmlTagName(element);
5053
+ const attrs = htmlAttributesOf(element);
5054
+ const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag") ?? null;
5055
+ const rawTagName = startTag ? findNamedChild(startTag, "tag_name")?.text.trim() : void 0;
5056
+ if (rawTagName && /^[A-Z]/.test(rawTagName) && !seenSymbols.has(rawTagName)) {
5057
+ seenSymbols.add(rawTagName);
5058
+ draftSymbols.push({
5059
+ name: rawTagName,
5060
+ kind: "class",
5061
+ signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
5062
+ exported: true,
5063
+ callNames: [],
5064
+ extendsNames: [],
5065
+ implementsNames: [],
5066
+ bodyText: element.text
5067
+ });
5068
+ exportLabels.push(rawTagName);
5069
+ }
5070
+ if (tagName && !tagName.includes("-") && !(rawTagName && /^[A-Z]/.test(rawTagName))) {
5071
+ const id = attrs.get("id");
5072
+ if (id && !seenSymbols.has(id)) {
5073
+ seenSymbols.add(id);
5074
+ draftSymbols.push({
5075
+ name: id,
5076
+ kind: "variable",
5077
+ signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
5078
+ exported: true,
5079
+ callNames: [],
5080
+ extendsNames: [],
5081
+ implementsNames: [],
5082
+ bodyText: element.text
5083
+ });
5084
+ exportLabels.push(id);
5085
+ }
5086
+ }
5087
+ }
5088
+ }
5089
+ return finalizeCodeAnalysis(manifest, "vue", imports, draftSymbols, exportLabels, diagnostics);
5090
+ }
5091
+ function cFamilyCodeAnalysis(manifest, language, rootNode, diagnostics) {
5092
+ const imports = [];
5093
+ const draftSymbols = [];
5094
+ const exportLabels = [];
5095
+ const functionNameFromDeclarator = (node) => {
5096
+ if (!node) {
5097
+ return void 0;
5098
+ }
5099
+ const declarator = node.childForFieldName("declarator");
5100
+ if (declarator) {
5101
+ return functionNameFromDeclarator(declarator);
5102
+ }
5103
+ return extractIdentifier(node);
5104
+ };
5105
+ for (const child of rootNode.namedChildren) {
5106
+ if (!child) {
5107
+ continue;
5108
+ }
5109
+ if (child.type === "preproc_include") {
5110
+ const parsed = parseCppInclude(child);
5111
+ if (parsed) {
5112
+ imports.push(parsed);
5113
+ }
5114
+ continue;
5115
+ }
5116
+ if (["class_specifier", "struct_specifier", "enum_specifier"].includes(child.type)) {
5117
+ const name = extractIdentifier(child.childForFieldName("name"));
5118
+ if (!name) {
5119
+ continue;
5120
+ }
5121
+ const kind = child.type === "enum_specifier" ? "enum" : child.type === "struct_specifier" ? "struct" : "class";
5122
+ const baseClassClause = findNamedChild(child, "base_class_clause") ?? child.childForFieldName("base_class_clause");
5123
+ const bases = baseClassClause ? uniqueBy(
5124
+ baseClassClause.namedChildren.filter((item) => item !== null && item.type !== "access_specifier").map((item) => normalizeSymbolReference(item.text.replace(/\b(public|private|protected|virtual)\b/g, "").trim())).filter(Boolean),
5125
+ (item) => item
5126
+ ) : [];
5127
+ const exported = !/\bstatic\b/.test(child.text);
5128
+ draftSymbols.push({
5129
+ name,
5130
+ kind,
5131
+ signature: singleLineSignature(child.text),
5132
+ exported,
5133
+ callNames: [],
5134
+ extendsNames: bases,
5135
+ implementsNames: [],
5136
+ bodyText: nodeText(child.childForFieldName("body")) || child.text
5137
+ });
5138
+ if (exported) {
5139
+ exportLabels.push(name);
5140
+ }
5141
+ continue;
5142
+ }
5143
+ if (child.type === "function_definition") {
5144
+ const name = functionNameFromDeclarator(child.childForFieldName("declarator"));
5145
+ if (!name) {
5146
+ continue;
5147
+ }
5148
+ const exported = !/\bstatic\b/.test(child.text);
5149
+ draftSymbols.push({
5150
+ name,
5151
+ kind: "function",
5152
+ signature: singleLineSignature(child.text),
5153
+ exported,
5154
+ callNames: [],
5155
+ extendsNames: [],
5156
+ implementsNames: [],
5157
+ bodyText: nodeText(child.childForFieldName("body")) || child.text
5158
+ });
5159
+ if (exported) {
5160
+ exportLabels.push(name);
5161
+ }
5162
+ }
5163
+ }
5164
+ return finalizeCodeAnalysis(manifest, language, imports, draftSymbols, exportLabels, diagnostics);
5165
+ }
5166
+ async function analyzeTreeSitterCode(manifest, content, language) {
5167
+ if (language === "swift" && !swiftTreeSitterEnabled()) {
5168
+ return {
5169
+ code: finalizeCodeAnalysis(manifest, language, [], [], [], [swiftTreeSitterDisabledDiagnostic()]),
5170
+ rationales: []
5171
+ };
5172
+ }
5173
+ const parseInput = language === "c" || language === "cpp" || language === "csharp" ? neutralizePreprocessorDirectives(content) : content;
5174
+ let tree = null;
5175
+ try {
5176
+ const module = await getTreeSitterModule();
5177
+ await ensureTreeSitterInit(module);
5178
+ const parser = new module.Parser();
5179
+ parser.setLanguage(await loadLanguage(language));
5180
+ tree = parser.parse(parseInput);
5181
+ } catch (error) {
5182
+ const diagnostic = treeSitterCompatibilityDiagnostic(language, error);
5183
+ if (language === "bash" && typeof diagnostic.message === "string" && diagnostic.message.includes("resolved is not a function")) {
5184
+ diagnostic.category = "warning";
5185
+ }
5186
+ return {
5187
+ code: finalizeCodeAnalysis(manifest, language, [], [], [], [diagnostic]),
5188
+ rationales: []
5189
+ };
5190
+ }
5191
+ if (!tree) {
5192
+ return {
5193
+ code: finalizeCodeAnalysis(
5194
+ manifest,
5195
+ language,
5196
+ [],
5197
+ [],
5198
+ [],
5199
+ [
5200
+ {
5201
+ code: 9e3,
5202
+ category: "error",
5203
+ message: `Failed to parse ${language} source.`,
5204
+ line: 1,
5205
+ column: 1
5206
+ }
5207
+ ]
5208
+ ),
5209
+ rationales: []
5210
+ };
5211
+ }
5212
+ try {
5213
+ const suppressDiagnostics = language === "lua" || language === "bash" && detectShellDialect(content) === "zsh";
5214
+ const rawDiagnostics = suppressDiagnostics ? [] : diagnosticsFromTree(tree.rootNode);
5215
+ const grammarGappedLanguages = /* @__PURE__ */ new Set(["c", "cpp", "csharp", "bash"]);
5216
+ const diagnostics = grammarGappedLanguages.has(language) ? rawDiagnostics.map((d) => d.category === "error" ? { ...d, category: "warning" } : d) : rawDiagnostics;
5217
+ const rationales = extractTreeSitterRationales(manifest, language, tree.rootNode);
5218
+ switch (language) {
5219
+ case "bash":
5220
+ return { code: bashCodeAnalysis(manifest, tree.rootNode, diagnostics, content), rationales };
5221
+ case "python":
5222
+ return { code: pythonCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5223
+ case "go":
5224
+ return { code: goCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5225
+ case "rust":
5226
+ return { code: rustCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5227
+ case "java":
5228
+ return { code: javaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5229
+ case "kotlin":
5230
+ return { code: kotlinCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5231
+ case "scala":
5232
+ return { code: scalaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5233
+ case "dart":
5234
+ return { code: dartCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5235
+ case "lua":
5236
+ return { code: luaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5237
+ case "zig":
5238
+ return { code: zigCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5239
+ case "csharp":
5240
+ return { code: csharpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5241
+ case "php":
5242
+ return { code: phpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5243
+ case "ruby":
5244
+ return { code: rubyCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5245
+ case "powershell":
5246
+ return { code: powershellCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5247
+ case "swift":
5248
+ return { code: swiftCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5249
+ case "elixir":
5250
+ return { code: elixirCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5251
+ case "ocaml":
5252
+ return { code: ocamlCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5253
+ case "objc":
5254
+ return { code: objcCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5255
+ case "rescript":
5256
+ return { code: rescriptCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5257
+ case "solidity":
5258
+ return { code: solidityCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5259
+ case "html":
5260
+ return { code: htmlCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5261
+ case "css":
5262
+ return { code: cssCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5263
+ case "vue":
5264
+ return { code: vueCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
5265
+ case "c":
5266
+ case "cpp":
5267
+ return { code: cFamilyCodeAnalysis(manifest, language, tree.rootNode, diagnostics), rationales };
5268
+ default:
5269
+ return {
3956
5270
  code: finalizeCodeAnalysis(
3957
5271
  manifest,
3958
5272
  language,
@@ -4641,6 +5955,33 @@ function inferCodeLanguage(filePath, mimeType = "", options = {}) {
4641
5955
  if (extension === ".ps1" || extension === ".psm1" || extension === ".psd1") {
4642
5956
  return "powershell";
4643
5957
  }
5958
+ if (extension === ".swift") {
5959
+ return "swift";
5960
+ }
5961
+ if (extension === ".ex" || extension === ".exs") {
5962
+ return "elixir";
5963
+ }
5964
+ if (extension === ".ml" || extension === ".mli") {
5965
+ return "ocaml";
5966
+ }
5967
+ if (extension === ".m" || extension === ".mm") {
5968
+ return "objc";
5969
+ }
5970
+ if (extension === ".res" || extension === ".resi") {
5971
+ return "rescript";
5972
+ }
5973
+ if (extension === ".sol") {
5974
+ return "solidity";
5975
+ }
5976
+ if (extension === ".html" || extension === ".htm") {
5977
+ return "html";
5978
+ }
5979
+ if (extension === ".css") {
5980
+ return "css";
5981
+ }
5982
+ if (extension === ".vue") {
5983
+ return "vue";
5984
+ }
4644
5985
  if (extension === ".c") {
4645
5986
  return "c";
4646
5987
  }
@@ -4829,6 +6170,24 @@ function candidateExtensionsFor(language) {
4829
6170
  return [".c", ".h"];
4830
6171
  case "cpp":
4831
6172
  return [".cc", ".cpp", ".cxx", ".h", ".hh", ".hpp", ".hxx"];
6173
+ case "swift":
6174
+ return [".swift"];
6175
+ case "elixir":
6176
+ return [".ex", ".exs"];
6177
+ case "ocaml":
6178
+ return [".ml", ".mli"];
6179
+ case "objc":
6180
+ return [".m", ".mm", ".h"];
6181
+ case "rescript":
6182
+ return [".res", ".resi"];
6183
+ case "solidity":
6184
+ return [".sol"];
6185
+ case "html":
6186
+ return [".css", ".js", ".mjs", ".cjs", ".html", ".htm"];
6187
+ case "css":
6188
+ return [".css"];
6189
+ default:
6190
+ return [];
4832
6191
  }
4833
6192
  }
4834
6193
  async function buildCodeIndex(rootDir, manifests, analyses) {
@@ -4932,6 +6291,39 @@ async function buildCodeIndex(rootDir, manifests, analyses) {
4932
6291
  case "powershell":
4933
6292
  recordAlias(aliases, basename);
4934
6293
  break;
6294
+ case "elixir":
6295
+ for (const symbol of analysis.code.symbols) {
6296
+ if (symbol.kind === "class" || symbol.kind === "interface") {
6297
+ recordAlias(aliases, symbol.name);
6298
+ }
6299
+ }
6300
+ break;
6301
+ case "ocaml": {
6302
+ if (basename) {
6303
+ const capitalized = basename.charAt(0).toUpperCase() + basename.slice(1);
6304
+ recordAlias(aliases, capitalized);
6305
+ recordAlias(aliases, basename);
6306
+ }
6307
+ for (const symbol of analysis.code.symbols) {
6308
+ if (symbol.kind === "class" || symbol.kind === "interface") {
6309
+ recordAlias(aliases, symbol.name);
6310
+ }
6311
+ }
6312
+ break;
6313
+ }
6314
+ case "rescript": {
6315
+ if (basename) {
6316
+ const capitalized = basename.charAt(0).toUpperCase() + basename.slice(1);
6317
+ recordAlias(aliases, capitalized);
6318
+ recordAlias(aliases, basename);
6319
+ }
6320
+ for (const symbol of analysis.code.symbols) {
6321
+ if (symbol.kind === "class") {
6322
+ recordAlias(aliases, symbol.name);
6323
+ }
6324
+ }
6325
+ break;
6326
+ }
4935
6327
  default:
4936
6328
  break;
4937
6329
  }
@@ -5133,6 +6525,9 @@ function findImportCandidates(manifest, codeImport, lookup) {
5133
6525
  case "kotlin":
5134
6526
  case "scala":
5135
6527
  case "csharp":
6528
+ case "elixir":
6529
+ case "ocaml":
6530
+ case "rescript":
5136
6531
  return aliasMatches(lookup, codeImport.specifier);
5137
6532
  case "dart":
5138
6533
  return repoRelativePath && dartSpecifierLooksLocal2(codeImport.specifier) ? repoPathMatches(lookup, ...importResolutionCandidates(repoRelativePath, codeImport.specifier, candidateExtensionsFor(language))) : aliasMatches(lookup, codeImport.specifier);
@@ -5167,6 +6562,10 @@ function findImportCandidates(manifest, codeImport, lookup) {
5167
6562
  }
5168
6563
  case "c":
5169
6564
  case "cpp":
6565
+ case "objc":
6566
+ case "solidity":
6567
+ case "html":
6568
+ case "css":
5170
6569
  return repoRelativePath && !codeImport.isExternal ? repoPathMatches(lookup, ...importResolutionCandidates(repoRelativePath, codeImport.specifier, candidateExtensionsFor(language))) : aliasMatches(lookup, codeImport.specifier);
5171
6570
  default:
5172
6571
  return [];
@@ -5192,8 +6591,10 @@ function importLooksLocal(manifest, codeImport, candidates) {
5192
6591
  case "powershell":
5193
6592
  case "c":
5194
6593
  case "cpp":
6594
+ case "objc":
5195
6595
  case "kotlin":
5196
6596
  case "scala":
6597
+ case "solidity":
5197
6598
  return !codeImport.isExternal;
5198
6599
  case "bash":
5199
6600
  return bashSpecifierLooksLocal2(codeImport.specifier);
@@ -5271,6 +6672,43 @@ import { strFromU8, unzipSync } from "fflate";
5271
6672
  import { JSDOM } from "jsdom";
5272
6673
  import TurndownService from "turndown";
5273
6674
  import { z } from "zod";
6675
+
6676
+ // src/markdown-ast.ts
6677
+ import { fromMarkdown } from "mdast-util-from-markdown";
6678
+ function parseMarkdownNodes(text) {
6679
+ try {
6680
+ const root = fromMarkdown(text);
6681
+ return Array.isArray(root.children) ? root.children : [];
6682
+ } catch {
6683
+ return [];
6684
+ }
6685
+ }
6686
+ function markdownNodeText(node) {
6687
+ if (node.type === "text" || node.type === "inlineCode" || node.type === "code") {
6688
+ return normalizeWhitespace(node.value ?? "");
6689
+ }
6690
+ if (node.type === "image") {
6691
+ return normalizeWhitespace(node.alt ?? "");
6692
+ }
6693
+ if (node.type === "break" || node.type === "thematicBreak") {
6694
+ return " ";
6695
+ }
6696
+ return normalizeWhitespace((node.children ?? []).map((child) => markdownNodeText(child)).join(" "));
6697
+ }
6698
+ function firstMarkdownHeading(text) {
6699
+ const nodes = parseMarkdownNodes(text);
6700
+ for (const node of nodes) {
6701
+ if (node.type === "heading") {
6702
+ const title = markdownNodeText(node).trim();
6703
+ if (title) {
6704
+ return title;
6705
+ }
6706
+ }
6707
+ }
6708
+ return void 0;
6709
+ }
6710
+
6711
+ // src/extraction.ts
5274
6712
  var imageVisionExtractionSchema = z.object({
5275
6713
  title: z.string().min(1).nullable().optional(),
5276
6714
  summary: z.string().min(1),
@@ -5665,18 +7103,158 @@ async function extractDocxText(input) {
5665
7103
  metadata: parseOfficeCoreMetadata(input.bytes),
5666
7104
  warnings: warnings.length ? warnings : void 0
5667
7105
  };
5668
- if (!extractedText) {
5669
- artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
7106
+ if (!extractedText) {
7107
+ artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
7108
+ }
7109
+ return {
7110
+ extractedText: extractedText || void 0,
7111
+ artifact
7112
+ };
7113
+ } catch (error) {
7114
+ return {
7115
+ artifact: {
7116
+ ...extractionMetadata("docx", input.mimeType, "docx_text"),
7117
+ warnings: [`DOCX text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
7118
+ }
7119
+ };
7120
+ }
7121
+ }
7122
+ function jupyterCellSource(cell) {
7123
+ const source = cell.source;
7124
+ if (Array.isArray(source)) {
7125
+ return source.join("");
7126
+ }
7127
+ if (typeof source === "string") {
7128
+ return source;
7129
+ }
7130
+ return "";
7131
+ }
7132
+ function jupyterOutputSummary(outputs) {
7133
+ if (!Array.isArray(outputs) || outputs.length === 0) {
7134
+ return null;
7135
+ }
7136
+ const parts = [];
7137
+ for (const output of outputs) {
7138
+ const data = output.data;
7139
+ if (data && typeof data === "object") {
7140
+ const text = data["text/plain"] ?? data["text/markdown"];
7141
+ if (typeof text === "string") {
7142
+ parts.push(text.trim());
7143
+ continue;
7144
+ }
7145
+ if (Array.isArray(text)) {
7146
+ parts.push(text.join("").trim());
7147
+ continue;
7148
+ }
7149
+ }
7150
+ const textField = output.text;
7151
+ if (typeof textField === "string") {
7152
+ parts.push(textField.trim());
7153
+ continue;
7154
+ }
7155
+ if (Array.isArray(textField)) {
7156
+ parts.push(textField.join("").trim());
7157
+ }
7158
+ }
7159
+ const joined = parts.filter(Boolean).join("\n").trim();
7160
+ if (!joined) {
7161
+ return `[${outputs.length} non-text output${outputs.length === 1 ? "" : "s"}]`;
7162
+ }
7163
+ return joined.length > 1200 ? `${joined.slice(0, 1200)}
7164
+ [output truncated]` : joined;
7165
+ }
7166
+ async function extractJupyterNotebook(input) {
7167
+ try {
7168
+ const text = decodeTextBytes(input.bytes);
7169
+ const notebook = JSON.parse(text);
7170
+ const cells = Array.isArray(notebook.cells) ? notebook.cells : [];
7171
+ const kernelLanguage = notebook.metadata?.language_info?.name?.trim() || notebook.metadata?.kernelspec?.language?.trim() || "";
7172
+ const kernelDisplay = notebook.metadata?.kernelspec?.display_name?.trim() || "";
7173
+ let notebookTitle = typeof notebook.metadata?.title === "string" ? notebook.metadata.title.trim() : "";
7174
+ if (!notebookTitle) {
7175
+ for (const cell of cells) {
7176
+ if (cell.cell_type === "markdown") {
7177
+ const heading2 = firstMarkdownHeading(jupyterCellSource(cell));
7178
+ if (heading2) {
7179
+ notebookTitle = heading2;
7180
+ break;
7181
+ }
7182
+ }
7183
+ }
7184
+ }
7185
+ if (!notebookTitle && input.fileName) {
7186
+ notebookTitle = path7.basename(input.fileName, path7.extname(input.fileName));
7187
+ }
7188
+ const sections = [];
7189
+ let markdownCellCount = 0;
7190
+ let codeCellCount = 0;
7191
+ let outputCount = 0;
7192
+ for (const cell of cells) {
7193
+ const source = jupyterCellSource(cell).trim();
7194
+ if (!source) {
7195
+ continue;
7196
+ }
7197
+ if (cell.cell_type === "markdown") {
7198
+ markdownCellCount += 1;
7199
+ sections.push(source);
7200
+ sections.push("");
7201
+ continue;
7202
+ }
7203
+ if (cell.cell_type === "code") {
7204
+ codeCellCount += 1;
7205
+ const fence = kernelLanguage || "";
7206
+ sections.push(`\`\`\`${fence}`);
7207
+ sections.push(source);
7208
+ sections.push("```");
7209
+ const outputSummary = jupyterOutputSummary(cell.outputs);
7210
+ if (outputSummary) {
7211
+ outputCount += Array.isArray(cell.outputs) ? cell.outputs.length : 0;
7212
+ sections.push("");
7213
+ sections.push("_Output:_");
7214
+ sections.push("");
7215
+ sections.push(outputSummary);
7216
+ }
7217
+ sections.push("");
7218
+ continue;
7219
+ }
7220
+ sections.push(source);
7221
+ sections.push("");
7222
+ }
7223
+ const heading = notebookTitle ? [`# ${notebookTitle}`, ""] : [];
7224
+ const extractedText = [
7225
+ ...heading,
7226
+ `Jupyter Notebook (${cells.length} cell${cells.length === 1 ? "" : "s"}, kernel: ${kernelDisplay || kernelLanguage || "unknown"})`,
7227
+ "",
7228
+ ...sections
7229
+ ].join("\n").trim();
7230
+ const metadata = {
7231
+ cell_count: String(cells.length),
7232
+ markdown_cells: String(markdownCellCount),
7233
+ code_cells: String(codeCellCount),
7234
+ output_count: String(outputCount)
7235
+ };
7236
+ if (kernelLanguage) {
7237
+ metadata.kernel_language = kernelLanguage;
7238
+ }
7239
+ if (kernelDisplay) {
7240
+ metadata.kernel_display_name = kernelDisplay;
7241
+ }
7242
+ if (notebook.nbformat !== void 0) {
7243
+ metadata.nbformat = `${notebook.nbformat}${notebook.nbformat_minor !== void 0 ? `.${notebook.nbformat_minor}` : ""}`;
5670
7244
  }
5671
7245
  return {
7246
+ title: notebookTitle || void 0,
5672
7247
  extractedText: extractedText || void 0,
5673
- artifact
7248
+ artifact: {
7249
+ ...extractionMetadata("jupyter", input.mimeType, "jupyter_text"),
7250
+ metadata
7251
+ }
5674
7252
  };
5675
7253
  } catch (error) {
5676
7254
  return {
5677
7255
  artifact: {
5678
- ...extractionMetadata("docx", input.mimeType, "docx_text"),
5679
- warnings: [`DOCX text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
7256
+ ...extractionMetadata("jupyter", input.mimeType, "jupyter_text"),
7257
+ warnings: [`Jupyter notebook extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5680
7258
  }
5681
7259
  };
5682
7260
  }
@@ -5731,7 +7309,7 @@ async function extractCsvText(input) {
5731
7309
  };
5732
7310
  }
5733
7311
  }
5734
- async function extractXlsxText(input) {
7312
+ async function extractSpreadsheetWorkbook(input, sourceKind, extractor) {
5735
7313
  try {
5736
7314
  const XLSX = await import("xlsx");
5737
7315
  const workbook = XLSX.read(input.bytes, { type: "buffer", cellFormula: false, cellHTML: false, cellStyles: false });
@@ -5772,7 +7350,7 @@ async function extractXlsxText(input) {
5772
7350
  title,
5773
7351
  extractedText,
5774
7352
  artifact: {
5775
- ...extractionMetadata("xlsx", input.mimeType, "xlsx_text"),
7353
+ ...extractionMetadata(sourceKind, input.mimeType, extractor),
5776
7354
  metadata,
5777
7355
  warnings
5778
7356
  }
@@ -5780,12 +7358,20 @@ async function extractXlsxText(input) {
5780
7358
  } catch (error) {
5781
7359
  return {
5782
7360
  artifact: {
5783
- ...extractionMetadata("xlsx", input.mimeType, "xlsx_text"),
5784
- warnings: [`XLSX extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
7361
+ ...extractionMetadata(sourceKind, input.mimeType, extractor),
7362
+ warnings: [
7363
+ `${sourceKind.toUpperCase()} extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`
7364
+ ]
5785
7365
  }
5786
7366
  };
5787
7367
  }
5788
7368
  }
7369
+ async function extractXlsxText(input) {
7370
+ return extractSpreadsheetWorkbook(input, "xlsx", "xlsx_text");
7371
+ }
7372
+ async function extractOdsText(input) {
7373
+ return extractSpreadsheetWorkbook(input, "ods", "ods_text");
7374
+ }
5789
7375
  async function extractPptxText(input) {
5790
7376
  try {
5791
7377
  const archive = unzipSync(new Uint8Array(input.bytes));
@@ -6038,66 +7624,707 @@ function calendarAttendees(value) {
6038
7624
  return name || address;
6039
7625
  }).filter(Boolean);
6040
7626
  }
6041
- function slackFormatSpeakerId(input, usersById) {
6042
- return usersById.get(input) ?? input;
7627
+ function slackFormatSpeakerId(input, usersById) {
7628
+ return usersById.get(input) ?? input;
7629
+ }
7630
+ function slackNormalizeText(text, usersById) {
7631
+ return normalizeWhitespace(
7632
+ text.replace(/<@([A-Z0-9]+)>/g, (_, userId) => `@${slackFormatSpeakerId(userId, usersById)}`).replace(/<#[A-Z0-9]+\|([^>]+)>/g, "#$1").replace(/<(https?:\/\/[^>|]+)\|([^>]+)>/g, "$2 ($1)").replace(/<(https?:\/\/[^>]+)>/g, "$1")
7633
+ );
7634
+ }
7635
+ function slackMessageTimestamp(ts2, fallbackDate) {
7636
+ const numeric = Number(ts2);
7637
+ if (Number.isFinite(numeric) && numeric > 0) {
7638
+ return new Date(numeric * 1e3).toISOString();
7639
+ }
7640
+ return (/* @__PURE__ */ new Date(`${fallbackDate}T00:00:00.000Z`)).toISOString();
7641
+ }
7642
+ async function loadZipMessageBuffers(bytes) {
7643
+ const { MboxStream } = await import("node-mbox");
7644
+ const stream = MboxStream(Readable.from([bytes]));
7645
+ return await new Promise((resolve, reject) => {
7646
+ const messages = [];
7647
+ stream.on("data", (message) => {
7648
+ messages.push(Buffer.isBuffer(message) ? message : Buffer.from(message));
7649
+ });
7650
+ stream.on("error", reject);
7651
+ stream.on("finish", () => resolve(messages));
7652
+ stream.on("end", () => resolve(messages));
7653
+ });
7654
+ }
7655
+ function archiveEntriesAsText(archive) {
7656
+ return new Map(
7657
+ Object.entries(archive).filter(([, value]) => value).map(([entryPath, value]) => [entryPath, strFromU8(value)])
7658
+ );
7659
+ }
7660
+ function looksLikeSlackEntries(entries) {
7661
+ const all = [...entries];
7662
+ const hasChannelsIndex = all.some(
7663
+ (entry) => entry === "channels.json" || entry === "groups.json" || entry === "dms.json" || entry === "mpims.json"
7664
+ );
7665
+ const hasChannelDayFiles = all.some((entry) => /^[^/]+\/\d{4}-\d{2}-\d{2}\.json$/i.test(entry));
7666
+ return hasChannelsIndex && hasChannelDayFiles;
7667
+ }
7668
+ function slackEntriesFromChannelIndex(raw, usersById) {
7669
+ const entries = /* @__PURE__ */ new Map();
7670
+ if (!Array.isArray(raw)) {
7671
+ return entries;
7672
+ }
7673
+ for (const item of raw) {
7674
+ if (!item || typeof item !== "object") {
7675
+ continue;
7676
+ }
7677
+ const value = item;
7678
+ const id = normalizeWhitespace(value.id ?? "");
7679
+ const title = normalizeWhitespace(value.name ?? "");
7680
+ if (!title) {
7681
+ continue;
7682
+ }
7683
+ const members = (Array.isArray(value.members) ? value.members : value.user ? [value.user] : []).map((member) => slackFormatSpeakerId(member, usersById)).filter(Boolean);
7684
+ entries.set(title, { id, title, members });
7685
+ }
7686
+ return entries;
7687
+ }
7688
+ function parseOdfMetadata(bytes) {
7689
+ try {
7690
+ const archive = unzipSync(new Uint8Array(bytes));
7691
+ const metaXml = zipEntryText(archive, "meta.xml");
7692
+ if (!metaXml) {
7693
+ return void 0;
7694
+ }
7695
+ const document = parseXmlDocument(metaXml);
7696
+ const valuesByLocalName = /* @__PURE__ */ new Map();
7697
+ for (const node of Array.from(document.getElementsByTagName("*"))) {
7698
+ const localName = node.localName?.trim().toLowerCase();
7699
+ const text = normalizeWhitespace(node.textContent ?? "");
7700
+ if (!localName || !text || valuesByLocalName.has(localName)) {
7701
+ continue;
7702
+ }
7703
+ valuesByLocalName.set(localName, text);
7704
+ }
7705
+ const metadata = {};
7706
+ const mappings = [
7707
+ ["title", "title"],
7708
+ ["author", "creator"],
7709
+ ["subject", "subject"],
7710
+ ["description", "description"],
7711
+ ["keywords", "keyword"],
7712
+ ["initial_creator", "initial-creator"],
7713
+ ["created", "creation-date"],
7714
+ ["modified", "date"]
7715
+ ];
7716
+ for (const [targetKey, sourceKey] of mappings) {
7717
+ const value = valuesByLocalName.get(sourceKey);
7718
+ if (value) {
7719
+ metadata[targetKey] = value;
7720
+ }
7721
+ }
7722
+ return Object.keys(metadata).length ? metadata : void 0;
7723
+ } catch {
7724
+ return void 0;
7725
+ }
7726
+ }
7727
+ function collectOdfTextNodes(contentXml) {
7728
+ const document = parseXmlDocument(contentXml);
7729
+ const nodes = [];
7730
+ for (const node of Array.from(document.getElementsByTagName("*"))) {
7731
+ const localName = node.localName ?? "";
7732
+ if (localName === "h") {
7733
+ const level = Number.parseInt(node.getAttribute("text:outline-level") ?? "1", 10);
7734
+ const text = normalizeWhitespace(node.textContent ?? "");
7735
+ if (text) {
7736
+ nodes.push({ heading: Number.isFinite(level) && level > 0 ? level : 1, text });
7737
+ }
7738
+ continue;
7739
+ }
7740
+ if (localName === "p" || localName === "list-item") {
7741
+ if (node.closest?.("h")) {
7742
+ continue;
7743
+ }
7744
+ const text = normalizeWhitespace(node.textContent ?? "");
7745
+ if (text) {
7746
+ nodes.push({ text });
7747
+ }
7748
+ }
7749
+ }
7750
+ return nodes;
7751
+ }
7752
+ function renderOdfTextNodes(nodes) {
7753
+ const lines = [];
7754
+ for (const node of nodes) {
7755
+ if (node.heading) {
7756
+ lines.push("");
7757
+ lines.push(`${"#".repeat(Math.min(node.heading, 6))} ${node.text}`);
7758
+ lines.push("");
7759
+ continue;
7760
+ }
7761
+ lines.push(node.text);
7762
+ lines.push("");
7763
+ }
7764
+ return lines.join("\n").trim();
7765
+ }
7766
+ async function extractOdtText(input) {
7767
+ try {
7768
+ const archive = unzipSync(new Uint8Array(input.bytes));
7769
+ const contentXml = zipEntryText(archive, "content.xml");
7770
+ if (!contentXml) {
7771
+ throw new Error("Missing content.xml");
7772
+ }
7773
+ const metadata = parseOdfMetadata(input.bytes);
7774
+ const textNodes = collectOdfTextNodes(contentXml);
7775
+ const headingCount = textNodes.filter((node) => node.heading).length;
7776
+ const paragraphCount = textNodes.filter((node) => !node.heading).length;
7777
+ const title = metadata?.title || textNodes.find((node) => node.heading === 1)?.text || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
7778
+ const body = renderOdfTextNodes(textNodes);
7779
+ const extractedText = [title ? `# ${title}` : null, "", body].filter((item) => item !== null).join("\n").trim();
7780
+ return {
7781
+ title,
7782
+ extractedText: extractedText || void 0,
7783
+ artifact: {
7784
+ ...extractionMetadata("odt", input.mimeType, "odt_text"),
7785
+ metadata: {
7786
+ ...metadata ?? {},
7787
+ heading_count: String(headingCount),
7788
+ paragraph_count: String(paragraphCount)
7789
+ }
7790
+ }
7791
+ };
7792
+ } catch (error) {
7793
+ return {
7794
+ artifact: {
7795
+ ...extractionMetadata("odt", input.mimeType, "odt_text"),
7796
+ warnings: [`ODT extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
7797
+ }
7798
+ };
7799
+ }
7800
+ }
7801
+ async function extractOdpText(input) {
7802
+ try {
7803
+ const archive = unzipSync(new Uint8Array(input.bytes));
7804
+ const contentXml = zipEntryText(archive, "content.xml");
7805
+ if (!contentXml) {
7806
+ throw new Error("Missing content.xml");
7807
+ }
7808
+ const metadata = parseOdfMetadata(input.bytes);
7809
+ const document = parseXmlDocument(contentXml);
7810
+ const pages = Array.from(document.getElementsByTagName("*")).filter((node) => node.localName === "page");
7811
+ const slideSections = [];
7812
+ pages.slice(0, 60).forEach((page, index) => {
7813
+ const slideName = page.getAttribute("draw:name") ?? `Slide ${index + 1}`;
7814
+ const text = normalizeWhitespace(page.textContent ?? "");
7815
+ slideSections.push(`## Slide ${index + 1}: ${slideName}`);
7816
+ if (text) {
7817
+ slideSections.push(text);
7818
+ }
7819
+ slideSections.push("");
7820
+ });
7821
+ const title = metadata?.title || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
7822
+ const extractedText = [title ? `# ${title}` : null, `Slides: ${pages.length}`, "", ...slideSections].filter((item) => Boolean(item)).join("\n").trim();
7823
+ const warnings = pages.length > 60 ? ["ODP extraction truncated to the first 60 slides."] : void 0;
7824
+ return {
7825
+ title,
7826
+ extractedText: extractedText || void 0,
7827
+ artifact: {
7828
+ ...extractionMetadata("odp", input.mimeType, "odp_text"),
7829
+ metadata: {
7830
+ ...metadata ?? {},
7831
+ slide_count: String(pages.length)
7832
+ },
7833
+ warnings
7834
+ }
7835
+ };
7836
+ } catch (error) {
7837
+ return {
7838
+ artifact: {
7839
+ ...extractionMetadata("odp", input.mimeType, "odp_text"),
7840
+ warnings: [`ODP extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
7841
+ }
7842
+ };
7843
+ }
7844
+ }
7845
+ function inferStructuredFormat(mimeType, fileName) {
7846
+ const lower = (fileName ?? "").toLowerCase();
7847
+ if (lower.endsWith(".jsonc") || lower.endsWith(".json") || lower.endsWith(".json5") || mimeType === "application/json" || mimeType === "application/json5") {
7848
+ return "json";
7849
+ }
7850
+ if (lower.endsWith(".yaml") || lower.endsWith(".yml") || mimeType === "application/yaml" || mimeType === "application/x-yaml") {
7851
+ return "yaml";
7852
+ }
7853
+ if (lower.endsWith(".toml") || mimeType === "application/toml") {
7854
+ return "toml";
7855
+ }
7856
+ if (lower.endsWith(".xml") || mimeType === "application/xml" || mimeType === "text/xml") {
7857
+ return "xml";
7858
+ }
7859
+ if (lower.endsWith(".ini") || lower.endsWith(".conf") || lower.endsWith(".cfg")) {
7860
+ return "ini";
7861
+ }
7862
+ if (lower.endsWith(".env")) {
7863
+ return "env";
7864
+ }
7865
+ if (lower.endsWith(".properties")) {
7866
+ return "properties";
7867
+ }
7868
+ return null;
7869
+ }
7870
+ function parseEnvFile(text) {
7871
+ const result = {};
7872
+ for (const rawLine of text.split(/\r?\n/)) {
7873
+ const line = rawLine.trim();
7874
+ if (!line || line.startsWith("#")) {
7875
+ continue;
7876
+ }
7877
+ const eqIndex = line.indexOf("=");
7878
+ if (eqIndex <= 0) {
7879
+ continue;
7880
+ }
7881
+ const key = line.slice(0, eqIndex).trim();
7882
+ let value = line.slice(eqIndex + 1).trim();
7883
+ if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
7884
+ value = value.slice(1, -1);
7885
+ }
7886
+ result[key] = value;
7887
+ }
7888
+ return result;
7889
+ }
7890
+ function parsePropertiesFile(text) {
7891
+ const result = {};
7892
+ for (const rawLine of text.split(/\r?\n/)) {
7893
+ const line = rawLine.trim();
7894
+ if (!line || line.startsWith("#") || line.startsWith("!")) {
7895
+ continue;
7896
+ }
7897
+ let sep = line.indexOf("=");
7898
+ if (sep < 0) {
7899
+ sep = line.indexOf(":");
7900
+ }
7901
+ if (sep <= 0) {
7902
+ continue;
7903
+ }
7904
+ const key = line.slice(0, sep).trim();
7905
+ const value = line.slice(sep + 1).trim();
7906
+ result[key] = value;
7907
+ }
7908
+ return result;
7909
+ }
7910
+ function parseXmlToSchema(text) {
7911
+ const document = parseXmlDocument(text);
7912
+ const root = document.documentElement;
7913
+ if (!root) {
7914
+ return {};
7915
+ }
7916
+ const childCounts = /* @__PURE__ */ new Map();
7917
+ for (const child of Array.from(root.children)) {
7918
+ const name = child.tagName || child.localName || "";
7919
+ if (!name) {
7920
+ continue;
7921
+ }
7922
+ childCounts.set(name, (childCounts.get(name) ?? 0) + 1);
7923
+ }
7924
+ const result = {};
7925
+ for (const [name, count] of childCounts.entries()) {
7926
+ result[name] = { count };
7927
+ }
7928
+ return { [root.tagName || "root"]: result };
7929
+ }
7930
+ function describeJsonShape(value) {
7931
+ if (value === null) {
7932
+ return { type: "null", size: 0, depth: 0 };
7933
+ }
7934
+ if (Array.isArray(value)) {
7935
+ const depths = value.map((entry) => describeJsonShape(entry).depth);
7936
+ return { type: "array", size: value.length, depth: 1 + (depths.length ? Math.max(...depths) : 0) };
7937
+ }
7938
+ if (typeof value === "object") {
7939
+ const entries = Object.entries(value);
7940
+ const depths = entries.map(([, v]) => describeJsonShape(v).depth);
7941
+ return { type: "object", size: entries.length, depth: 1 + (depths.length ? Math.max(...depths) : 0) };
7942
+ }
7943
+ return { type: typeof value, size: 0, depth: 0 };
7944
+ }
7945
+ function describeTopLevelSchema(value) {
7946
+ if (value === null || typeof value !== "object" || Array.isArray(value)) {
7947
+ const shape = describeJsonShape(value);
7948
+ return [`(root) ${shape.type}${shape.size ? ` (${shape.size})` : ""}`];
7949
+ }
7950
+ const entries = Object.entries(value);
7951
+ return entries.slice(0, 20).map(([key, child]) => {
7952
+ const shape = describeJsonShape(child);
7953
+ const sizeHint = shape.type === "array" ? ` (${shape.size} items)` : shape.type === "object" ? ` (${shape.size} keys)` : "";
7954
+ return `${key}: ${shape.type}${sizeHint}`;
7955
+ });
7956
+ }
7957
+ async function parseStructuredPayload(bytes, format) {
7958
+ const text = decodeTextBytes(bytes);
7959
+ if (format === "json") {
7960
+ const cleaned = text.replace(/^\uFEFF/, "");
7961
+ return { format, value: JSON.parse(cleaned) };
7962
+ }
7963
+ if (format === "yaml") {
7964
+ const yamlModule = await import("yaml");
7965
+ return { format, value: yamlModule.parse(text) };
7966
+ }
7967
+ if (format === "toml") {
7968
+ const tomlModule = await import("smol-toml");
7969
+ return { format, value: tomlModule.parse(text) };
7970
+ }
7971
+ if (format === "xml") {
7972
+ return { format, value: parseXmlToSchema(text) };
7973
+ }
7974
+ if (format === "ini") {
7975
+ try {
7976
+ const tomlModule = await import("smol-toml");
7977
+ return { format, value: tomlModule.parse(text) };
7978
+ } catch {
7979
+ return { format, value: parsePropertiesFile(text) };
7980
+ }
7981
+ }
7982
+ if (format === "env") {
7983
+ return { format, value: parseEnvFile(text) };
7984
+ }
7985
+ return { format, value: parsePropertiesFile(text) };
7986
+ }
7987
+ async function extractStructuredData(input) {
7988
+ const format = inferStructuredFormat(input.mimeType, input.fileName);
7989
+ if (!format) {
7990
+ return {
7991
+ artifact: {
7992
+ ...extractionMetadata("data", input.mimeType, "structured_data"),
7993
+ warnings: ["Structured data extraction skipped: format not recognized."]
7994
+ }
7995
+ };
7996
+ }
7997
+ try {
7998
+ const { value } = await parseStructuredPayload(input.bytes, format);
7999
+ const shape = describeJsonShape(value);
8000
+ const schemaLines = describeTopLevelSchema(value);
8001
+ const previewText = decodeTextBytes(input.bytes);
8002
+ const previewLines = previewText.split(/\r?\n/).slice(0, 40);
8003
+ const truncated = previewText.split(/\r?\n/).length > previewLines.length;
8004
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
8005
+ const extractedText = [
8006
+ title ? `# ${title}` : null,
8007
+ `Format: ${format.toUpperCase()}`,
8008
+ `Top-level: ${shape.type}`,
8009
+ shape.type === "object" || shape.type === "array" ? `Size: ${shape.size}` : null,
8010
+ `Nested depth: ${shape.depth}`,
8011
+ "",
8012
+ "## Schema",
8013
+ "",
8014
+ ...schemaLines.map((entry) => `- ${entry}`),
8015
+ "",
8016
+ "## Preview",
8017
+ "",
8018
+ `\`\`\`${format}`,
8019
+ ...previewLines,
8020
+ truncated ? "\u2026" : null,
8021
+ "```"
8022
+ ].filter((item) => item !== null).join("\n").trim();
8023
+ return {
8024
+ title,
8025
+ extractedText,
8026
+ artifact: {
8027
+ ...extractionMetadata("data", input.mimeType, "structured_data"),
8028
+ metadata: {
8029
+ format,
8030
+ top_level_type: shape.type,
8031
+ top_level_size: String(shape.size),
8032
+ nested_depth: String(shape.depth)
8033
+ }
8034
+ }
8035
+ };
8036
+ } catch (error) {
8037
+ return {
8038
+ artifact: {
8039
+ ...extractionMetadata("data", input.mimeType, "structured_data"),
8040
+ warnings: [`Structured data extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
8041
+ }
8042
+ };
8043
+ }
8044
+ }
8045
+ function formatBibCreator(creator) {
8046
+ if (creator.name) {
8047
+ return creator.name;
8048
+ }
8049
+ const parts = [creator.prefix, creator.firstName, creator.lastName, creator.suffix].filter(Boolean);
8050
+ return parts.join(" ");
6043
8051
  }
6044
- function slackNormalizeText(text, usersById) {
6045
- return normalizeWhitespace(
6046
- text.replace(/<@([A-Z0-9]+)>/g, (_, userId) => `@${slackFormatSpeakerId(userId, usersById)}`).replace(/<#[A-Z0-9]+\|([^>]+)>/g, "#$1").replace(/<(https?:\/\/[^>|]+)\|([^>]+)>/g, "$2 ($1)").replace(/<(https?:\/\/[^>]+)>/g, "$1")
6047
- );
8052
+ function bibFieldString(value) {
8053
+ if (value == null) {
8054
+ return "";
8055
+ }
8056
+ if (typeof value === "string") {
8057
+ return value.trim();
8058
+ }
8059
+ if (typeof value === "number" || typeof value === "boolean") {
8060
+ return String(value);
8061
+ }
8062
+ if (Array.isArray(value)) {
8063
+ return value.map((item) => bibFieldString(item)).filter(Boolean).join(", ");
8064
+ }
8065
+ if (typeof value === "object") {
8066
+ return bibFieldString(value.name ?? "");
8067
+ }
8068
+ return String(value);
6048
8069
  }
6049
- function slackMessageTimestamp(ts2, fallbackDate) {
6050
- const numeric = Number(ts2);
6051
- if (Number.isFinite(numeric) && numeric > 0) {
6052
- return new Date(numeric * 1e3).toISOString();
8070
+ async function extractBibTeXText(input) {
8071
+ try {
8072
+ const bibtex = await import("@retorquere/bibtex-parser");
8073
+ const text = decodeTextBytes(input.bytes);
8074
+ const library = bibtex.parse(text);
8075
+ const entries = Array.isArray(library.entries) ? library.entries : [];
8076
+ const citationTypes = /* @__PURE__ */ new Map();
8077
+ for (const entry of entries) {
8078
+ const type = (entry.type ?? "misc").toLowerCase();
8079
+ citationTypes.set(type, (citationTypes.get(type) ?? 0) + 1);
8080
+ }
8081
+ const entrySections = [];
8082
+ for (const entry of entries.slice(0, 200)) {
8083
+ const fields = entry.fields ?? {};
8084
+ const title2 = bibFieldString(fields.title);
8085
+ const authorList = Array.isArray(fields.author) ? fields.author.map((creator) => formatBibCreator(creator)).filter(Boolean) : bibFieldString(fields.author).split(/\s+and\s+/i).filter(Boolean);
8086
+ const editorList = Array.isArray(fields.editor) ? fields.editor.map((creator) => formatBibCreator(creator)).filter(Boolean) : [];
8087
+ const year = bibFieldString(fields.year ?? fields.date ?? "");
8088
+ const journal = bibFieldString(fields.journal ?? fields.booktitle ?? fields.publisher ?? "");
8089
+ const doi = bibFieldString(fields.doi);
8090
+ const url = bibFieldString(fields.url);
8091
+ const credit = authorList.length ? authorList.join(", ") : editorList.length ? `${editorList.join(", ")} (eds.)` : "Unknown";
8092
+ const descriptorParts = [credit];
8093
+ if (year) {
8094
+ descriptorParts.push(year);
8095
+ }
8096
+ const descriptor = descriptorParts.join(", ");
8097
+ const trailing = [];
8098
+ if (journal) {
8099
+ trailing.push(journal);
8100
+ }
8101
+ if (doi) {
8102
+ trailing.push(`doi:${doi}`);
8103
+ }
8104
+ if (url) {
8105
+ trailing.push(url);
8106
+ }
8107
+ const trailingText = trailing.length ? ` \u2014 ${trailing.join(", ")}` : "";
8108
+ entrySections.push(`- [${entry.key}] ${title2 || "(untitled)"} (${descriptor})${trailingText}`);
8109
+ }
8110
+ const totalEntries = entries.length;
8111
+ const truncated = entries.length > 200;
8112
+ const typeSummary = [...citationTypes.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])).map(([type, count]) => `${type} (${count})`).join(", ");
8113
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "BibTeX library";
8114
+ const extractedText = [
8115
+ `# ${title}`,
8116
+ "",
8117
+ `BibTeX library with ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}.`,
8118
+ typeSummary ? `Citation types: ${typeSummary}.` : null,
8119
+ "",
8120
+ "## Entries",
8121
+ "",
8122
+ ...entrySections,
8123
+ truncated ? `
8124
+ _Preview truncated to the first 200 entries._` : null
8125
+ ].filter((item) => item !== null).join("\n").trim();
8126
+ const warnings = library.errors?.length ? [`BibTeX parser reported ${library.errors.length} parse error(s).`] : void 0;
8127
+ return {
8128
+ title,
8129
+ extractedText,
8130
+ artifact: {
8131
+ ...extractionMetadata("bibtex", input.mimeType, "bibtex_text"),
8132
+ metadata: {
8133
+ entry_count: String(totalEntries),
8134
+ citation_types: [...citationTypes.keys()].sort().join(",")
8135
+ },
8136
+ warnings
8137
+ }
8138
+ };
8139
+ } catch (error) {
8140
+ return {
8141
+ artifact: {
8142
+ ...extractionMetadata("bibtex", input.mimeType, "bibtex_text"),
8143
+ warnings: [`BibTeX extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
8144
+ }
8145
+ };
6053
8146
  }
6054
- return (/* @__PURE__ */ new Date(`${fallbackDate}T00:00:00.000Z`)).toISOString();
6055
8147
  }
6056
- async function loadZipMessageBuffers(bytes) {
6057
- const { MboxStream } = await import("node-mbox");
6058
- const stream = MboxStream(Readable.from([bytes]));
6059
- return await new Promise((resolve, reject) => {
6060
- const messages = [];
6061
- stream.on("data", (message) => {
6062
- messages.push(Buffer.isBuffer(message) ? message : Buffer.from(message));
8148
+ async function extractRtfText(input) {
8149
+ try {
8150
+ const rtfParser = await import("rtf-parser");
8151
+ const parseString = rtfParser.string ?? rtfParser.default?.string;
8152
+ if (typeof parseString !== "function") {
8153
+ throw new Error("rtf-parser did not expose a string parser.");
8154
+ }
8155
+ const rtfText = decodeTextBytes(input.bytes);
8156
+ const document = await new Promise((resolve, reject) => {
8157
+ parseString(rtfText, (err, doc) => {
8158
+ if (err || !doc) {
8159
+ reject(err ?? new Error("RTF parse returned no document"));
8160
+ return;
8161
+ }
8162
+ resolve(doc);
8163
+ });
6063
8164
  });
6064
- stream.on("error", reject);
6065
- stream.on("finish", () => resolve(messages));
6066
- stream.on("end", () => resolve(messages));
6067
- });
6068
- }
6069
- function archiveEntriesAsText(archive) {
6070
- return new Map(
6071
- Object.entries(archive).filter(([, value]) => value).map(([entryPath, value]) => [entryPath, strFromU8(value)])
6072
- );
8165
+ const paragraphs = [];
8166
+ for (const paragraph of document.content ?? []) {
8167
+ const spans = paragraph.content ?? [];
8168
+ const text = normalizeWhitespace(spans.map((span) => span.value ?? "").join(""));
8169
+ if (text) {
8170
+ paragraphs.push(text);
8171
+ }
8172
+ }
8173
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
8174
+ const extractedText = [title ? `# ${title}` : null, "", ...paragraphs].filter((item) => item !== null).join("\n\n").trim();
8175
+ return {
8176
+ title,
8177
+ extractedText: extractedText || void 0,
8178
+ artifact: {
8179
+ ...extractionMetadata("rtf", input.mimeType, "rtf_text"),
8180
+ metadata: {
8181
+ paragraph_count: String(paragraphs.length)
8182
+ }
8183
+ }
8184
+ };
8185
+ } catch (error) {
8186
+ return {
8187
+ artifact: {
8188
+ ...extractionMetadata("rtf", input.mimeType, "rtf_text"),
8189
+ warnings: [`RTF extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
8190
+ }
8191
+ };
8192
+ }
6073
8193
  }
6074
- function looksLikeSlackEntries(entries) {
6075
- const all = [...entries];
6076
- const hasChannelsIndex = all.some(
6077
- (entry) => entry === "channels.json" || entry === "groups.json" || entry === "dms.json" || entry === "mpims.json"
6078
- );
6079
- const hasChannelDayFiles = all.some((entry) => /^[^/]+\/\d{4}-\d{2}-\d{2}\.json$/i.test(entry));
6080
- return hasChannelsIndex && hasChannelDayFiles;
8194
+ function collectOrgNodeText(node) {
8195
+ if (typeof node.value === "string") {
8196
+ return node.value;
8197
+ }
8198
+ if (!Array.isArray(node.children)) {
8199
+ return "";
8200
+ }
8201
+ return node.children.map((child) => collectOrgNodeText(child)).join("");
6081
8202
  }
6082
- function slackEntriesFromChannelIndex(raw, usersById) {
6083
- const entries = /* @__PURE__ */ new Map();
6084
- if (!Array.isArray(raw)) {
6085
- return entries;
8203
+ function renderOrgNode(node, lines) {
8204
+ if (node.type === "headline") {
8205
+ const depth = Math.min(Math.max(node.level ?? 1, 1), 6);
8206
+ const keyword = node.keyword ? `${node.keyword} ` : "";
8207
+ const tags = node.tags?.length ? ` \`${node.tags.join(":")}\`` : "";
8208
+ const text = normalizeWhitespace(collectOrgNodeText(node));
8209
+ lines.push("");
8210
+ lines.push(`${"#".repeat(depth)} ${keyword}${text}${tags}`.trim());
8211
+ lines.push("");
8212
+ return;
6086
8213
  }
6087
- for (const item of raw) {
6088
- if (!item || typeof item !== "object") {
6089
- continue;
8214
+ if (node.type === "paragraph") {
8215
+ const text = normalizeWhitespace(collectOrgNodeText(node));
8216
+ if (text) {
8217
+ lines.push(text);
8218
+ lines.push("");
6090
8219
  }
6091
- const value = item;
6092
- const id = normalizeWhitespace(value.id ?? "");
6093
- const title = normalizeWhitespace(value.name ?? "");
6094
- if (!title) {
6095
- continue;
8220
+ return;
8221
+ }
8222
+ if (node.type === "list") {
8223
+ for (const child of node.children ?? []) {
8224
+ if (child.type === "list.item") {
8225
+ const text = normalizeWhitespace(collectOrgNodeText(child));
8226
+ if (text) {
8227
+ lines.push(`- ${text}`);
8228
+ }
8229
+ }
6096
8230
  }
6097
- const members = (Array.isArray(value.members) ? value.members : value.user ? [value.user] : []).map((member) => slackFormatSpeakerId(member, usersById)).filter(Boolean);
6098
- entries.set(title, { id, title, members });
8231
+ lines.push("");
8232
+ return;
8233
+ }
8234
+ if (node.type === "block") {
8235
+ const name = node.name ?? "";
8236
+ const body = typeof node.value === "string" ? node.value.trimEnd() : "";
8237
+ if (body) {
8238
+ lines.push(`\`\`\`${name === "src" ? "" : name.toLowerCase()}`);
8239
+ lines.push(body);
8240
+ lines.push("```");
8241
+ lines.push("");
8242
+ }
8243
+ return;
8244
+ }
8245
+ for (const child of node.children ?? []) {
8246
+ renderOrgNode(child, lines);
8247
+ }
8248
+ }
8249
+ async function extractOrgText(input) {
8250
+ try {
8251
+ const orga = await import("orga");
8252
+ const text = decodeTextBytes(input.bytes);
8253
+ const document = orga.parse(text);
8254
+ const properties = document.properties ?? {};
8255
+ const documentTitle = Array.isArray(properties.title) ? properties.title.join(" ") : typeof properties.title === "string" ? properties.title : "";
8256
+ let headlineCount = 0;
8257
+ let todoCount = 0;
8258
+ const walk = (node) => {
8259
+ if (node.type === "headline") {
8260
+ headlineCount += 1;
8261
+ if (node.keyword) {
8262
+ todoCount += 1;
8263
+ }
8264
+ }
8265
+ for (const child of node.children ?? []) {
8266
+ walk(child);
8267
+ }
8268
+ };
8269
+ walk(document);
8270
+ const bodyLines = [];
8271
+ for (const child of document.children ?? []) {
8272
+ renderOrgNode(child, bodyLines);
8273
+ }
8274
+ const title = documentTitle.trim() || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
8275
+ const extractedText = [title ? `# ${title}` : null, "", ...bodyLines].filter((item) => item !== null).join("\n").trim();
8276
+ return {
8277
+ title,
8278
+ extractedText: extractedText || void 0,
8279
+ artifact: {
8280
+ ...extractionMetadata("org", input.mimeType, "org_text"),
8281
+ metadata: {
8282
+ headline_count: String(headlineCount),
8283
+ todo_count: String(todoCount)
8284
+ }
8285
+ }
8286
+ };
8287
+ } catch (error) {
8288
+ return {
8289
+ artifact: {
8290
+ ...extractionMetadata("org", input.mimeType, "org_text"),
8291
+ warnings: [`Org extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
8292
+ }
8293
+ };
8294
+ }
8295
+ }
8296
+ async function extractAsciiDocText(input) {
8297
+ try {
8298
+ const asciidoctorModule = await import("@asciidoctor/core");
8299
+ const factory = asciidoctorModule.default ?? asciidoctorModule;
8300
+ const processor = factory();
8301
+ const source = decodeTextBytes(input.bytes);
8302
+ const loaded = processor.load(source, { safe: "safe" });
8303
+ const html = processor.convert(source, { safe: "safe", standalone: false });
8304
+ const markdown = htmlToMarkdown(html);
8305
+ const docTitle = (typeof loaded.getTitle === "function" ? loaded.getTitle() : void 0) ?? void 0;
8306
+ const fileTitle = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
8307
+ const title = docTitle?.trim() || fileTitle;
8308
+ const extractedText = [title ? `# ${title}` : null, "", markdown].filter((item) => item !== null).join("\n").trim();
8309
+ return {
8310
+ title,
8311
+ extractedText: extractedText || void 0,
8312
+ artifact: {
8313
+ ...extractionMetadata("asciidoc", input.mimeType, "asciidoc_text"),
8314
+ metadata: {
8315
+ html_size: String(html.length),
8316
+ markdown_size: String(markdown.length)
8317
+ }
8318
+ }
8319
+ };
8320
+ } catch (error) {
8321
+ return {
8322
+ artifact: {
8323
+ ...extractionMetadata("asciidoc", input.mimeType, "asciidoc_text"),
8324
+ warnings: [`AsciiDoc extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
8325
+ }
8326
+ };
6099
8327
  }
6100
- return entries;
6101
8328
  }
6102
8329
  async function extractTranscriptText(input) {
6103
8330
  try {
@@ -6537,41 +8764,6 @@ async function appendWatchRun(rootDir, run) {
6537
8764
  await appendJsonLine(paths.jobsLogPath, run);
6538
8765
  }
6539
8766
 
6540
- // src/markdown-ast.ts
6541
- import { fromMarkdown } from "mdast-util-from-markdown";
6542
- function parseMarkdownNodes(text) {
6543
- try {
6544
- const root = fromMarkdown(text);
6545
- return Array.isArray(root.children) ? root.children : [];
6546
- } catch {
6547
- return [];
6548
- }
6549
- }
6550
- function markdownNodeText(node) {
6551
- if (node.type === "text" || node.type === "inlineCode" || node.type === "code") {
6552
- return normalizeWhitespace(node.value ?? "");
6553
- }
6554
- if (node.type === "image") {
6555
- return normalizeWhitespace(node.alt ?? "");
6556
- }
6557
- if (node.type === "break" || node.type === "thematicBreak") {
6558
- return " ";
6559
- }
6560
- return normalizeWhitespace((node.children ?? []).map((child) => markdownNodeText(child)).join(" "));
6561
- }
6562
- function firstMarkdownHeading(text) {
6563
- const nodes = parseMarkdownNodes(text);
6564
- for (const node of nodes) {
6565
- if (node.type === "heading") {
6566
- const title = markdownNodeText(node).trim();
6567
- if (title) {
6568
- return title;
6569
- }
6570
- }
6571
- }
6572
- return void 0;
6573
- }
6574
-
6575
8767
  // src/source-classification.ts
6576
8768
  import path9 from "path";
6577
8769
  var ALL_SOURCE_CLASSES = ["first_party", "third_party", "resource", "generated"];
@@ -6902,7 +9094,7 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
6902
9094
  if (isTranscriptFilePath(filePath) || mimeType === "application/x-subrip" || mimeType === "text/vtt") {
6903
9095
  return "transcript";
6904
9096
  }
6905
- if (mimeType.includes("markdown")) {
9097
+ if (mimeType.includes("markdown") || filePath.toLowerCase().endsWith(".mdx")) {
6906
9098
  return "markdown";
6907
9099
  }
6908
9100
  if (mimeType.includes("html")) {
@@ -6911,7 +9103,7 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
6911
9103
  if (mimeType === "application/pdf" || filePath.toLowerCase().endsWith(".pdf")) {
6912
9104
  return "pdf";
6913
9105
  }
6914
- if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || filePath.toLowerCase().endsWith(".docx")) {
9106
+ if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/vnd.ms-word.document.macroenabled.12" || mimeType === "application/vnd.ms-word.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.template" || filePath.toLowerCase().endsWith(".docx") || filePath.toLowerCase().endsWith(".docm") || filePath.toLowerCase().endsWith(".dotx") || filePath.toLowerCase().endsWith(".dotm")) {
6915
9107
  return "docx";
6916
9108
  }
6917
9109
  if (isEmailFilePath(filePath) || mimeType === "message/rfc822" || mimeType === "application/mbox") {
@@ -6926,20 +9118,66 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
6926
9118
  if (mimeType === "text/csv" || mimeType === "text/tab-separated-values" || filePath.toLowerCase().endsWith(".csv") || filePath.toLowerCase().endsWith(".tsv")) {
6927
9119
  return "csv";
6928
9120
  }
9121
+ if (mimeType === "application/x-ipynb+json" || filePath.toLowerCase().endsWith(".ipynb")) {
9122
+ return "jupyter";
9123
+ }
9124
+ if (mimeType === "application/vnd.oasis.opendocument.text" || filePath.toLowerCase().endsWith(".odt")) {
9125
+ return "odt";
9126
+ }
9127
+ if (mimeType === "application/vnd.oasis.opendocument.presentation" || filePath.toLowerCase().endsWith(".odp")) {
9128
+ return "odp";
9129
+ }
9130
+ if (mimeType === "application/vnd.oasis.opendocument.spreadsheet" || filePath.toLowerCase().endsWith(".ods")) {
9131
+ return "ods";
9132
+ }
9133
+ if (filePath.toLowerCase().endsWith(".bib") || mimeType === "application/x-bibtex") {
9134
+ return "bibtex";
9135
+ }
9136
+ if (mimeType === "application/rtf" || mimeType === "text/rtf" || filePath.toLowerCase().endsWith(".rtf")) {
9137
+ return "rtf";
9138
+ }
9139
+ if (filePath.toLowerCase().endsWith(".org") || mimeType === "text/x-org") {
9140
+ return "org";
9141
+ }
9142
+ if (filePath.toLowerCase().endsWith(".adoc") || filePath.toLowerCase().endsWith(".asciidoc") || mimeType === "text/x-asciidoc") {
9143
+ return "asciidoc";
9144
+ }
9145
+ if (isStructuredDataPath(filePath, mimeType)) {
9146
+ return "data";
9147
+ }
6929
9148
  if (mimeType.startsWith("text/") || isStructuredTextMime(mimeType)) {
6930
9149
  return "text";
6931
9150
  }
6932
- if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || filePath.toLowerCase().endsWith(".xlsx")) {
9151
+ if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.ms-excel" || mimeType === "application/vnd.ms-excel.sheet.macroenabled.12" || mimeType === "application/vnd.ms-excel.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.template" || filePath.toLowerCase().endsWith(".xlsx") || filePath.toLowerCase().endsWith(".xlsm") || filePath.toLowerCase().endsWith(".xltx") || filePath.toLowerCase().endsWith(".xltm") || filePath.toLowerCase().endsWith(".xls")) {
6933
9152
  return "xlsx";
6934
9153
  }
6935
- if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || filePath.toLowerCase().endsWith(".pptx")) {
9154
+ if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || mimeType === "application/vnd.ms-powerpoint.presentation.macroenabled.12" || mimeType === "application/vnd.ms-powerpoint.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.presentationml.template" || filePath.toLowerCase().endsWith(".pptx") || filePath.toLowerCase().endsWith(".pptm") || filePath.toLowerCase().endsWith(".potx") || filePath.toLowerCase().endsWith(".potm")) {
6936
9155
  return "pptx";
6937
9156
  }
6938
- if (mimeType.startsWith("image/")) {
9157
+ if (mimeType.startsWith("image/") || isImagePath(filePath)) {
6939
9158
  return "image";
6940
9159
  }
6941
9160
  return "binary";
6942
9161
  }
9162
+ var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([
9163
+ ".png",
9164
+ ".jpg",
9165
+ ".jpeg",
9166
+ ".webp",
9167
+ ".gif",
9168
+ ".bmp",
9169
+ ".ico",
9170
+ ".tiff",
9171
+ ".tif",
9172
+ ".heic",
9173
+ ".heif",
9174
+ ".avif",
9175
+ ".jxl",
9176
+ ".svg"
9177
+ ]);
9178
+ function isImagePath(filePath) {
9179
+ return IMAGE_EXTENSIONS.has(path12.extname(filePath).toLowerCase());
9180
+ }
6943
9181
  function isStructuredTextMime(mimeType) {
6944
9182
  switch (mimeType) {
6945
9183
  case "application/json":
@@ -6960,6 +9198,23 @@ function isStructuredTextMime(mimeType) {
6960
9198
  return false;
6961
9199
  }
6962
9200
  }
9201
+ function isStructuredDataPath(filePath, mimeType) {
9202
+ const lower = filePath.toLowerCase();
9203
+ if (lower.endsWith(".yaml") || lower.endsWith(".yml") || lower.endsWith(".toml") || mimeType === "application/toml" || mimeType === "application/yaml" || mimeType === "application/x-yaml") {
9204
+ return true;
9205
+ }
9206
+ if (lower.endsWith(".xml") || lower.endsWith(".ini") || lower.endsWith(".env") || lower.endsWith(".properties") || lower.endsWith(".conf") || lower.endsWith(".cfg") || mimeType === "application/xml" || mimeType === "text/xml") {
9207
+ return true;
9208
+ }
9209
+ if (lower.endsWith(".json") || lower.endsWith(".jsonc") || lower.endsWith(".json5") || mimeType === "application/json" || mimeType === "application/json5") {
9210
+ const base = path12.basename(lower);
9211
+ if (base === "package.json" || base === "package-lock.json" || base === "tsconfig.json" || base === "pnpm-lock.yaml") {
9212
+ return false;
9213
+ }
9214
+ return true;
9215
+ }
9216
+ return false;
9217
+ }
6963
9218
  async function localCodeDetectionOptions(absolutePath, payloadBytes) {
6964
9219
  if (path12.extname(absolutePath)) {
6965
9220
  return {};
@@ -8615,6 +10870,60 @@ async function prepareFileInputs(rootDir, absoluteInput, repoRoot, sourceClass)
8615
10870
  title = extracted.title?.trim() || title;
8616
10871
  extractedText = extracted.extractedText;
8617
10872
  extractionArtifact = extracted.artifact;
10873
+ } else if (sourceKind === "jupyter") {
10874
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10875
+ const extracted = await extractJupyterNotebook({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10876
+ title = extracted.title?.trim() || title;
10877
+ extractedText = extracted.extractedText;
10878
+ extractionArtifact = extracted.artifact;
10879
+ } else if (sourceKind === "odt") {
10880
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10881
+ const extracted = await extractOdtText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10882
+ title = extracted.title?.trim() || title;
10883
+ extractedText = extracted.extractedText;
10884
+ extractionArtifact = extracted.artifact;
10885
+ } else if (sourceKind === "odp") {
10886
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10887
+ const extracted = await extractOdpText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10888
+ title = extracted.title?.trim() || title;
10889
+ extractedText = extracted.extractedText;
10890
+ extractionArtifact = extracted.artifact;
10891
+ } else if (sourceKind === "ods") {
10892
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10893
+ const extracted = await extractOdsText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10894
+ title = extracted.title?.trim() || title;
10895
+ extractedText = extracted.extractedText;
10896
+ extractionArtifact = extracted.artifact;
10897
+ } else if (sourceKind === "data") {
10898
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10899
+ const extracted = await extractStructuredData({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10900
+ title = extracted.title?.trim() || title;
10901
+ extractedText = extracted.extractedText;
10902
+ extractionArtifact = extracted.artifact;
10903
+ } else if (sourceKind === "bibtex") {
10904
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10905
+ const extracted = await extractBibTeXText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10906
+ title = extracted.title?.trim() || title;
10907
+ extractedText = extracted.extractedText;
10908
+ extractionArtifact = extracted.artifact;
10909
+ } else if (sourceKind === "rtf") {
10910
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10911
+ const extracted = await extractRtfText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10912
+ title = extracted.title?.trim() || title;
10913
+ extractedText = extracted.extractedText;
10914
+ extractionArtifact = extracted.artifact;
10915
+ } else if (sourceKind === "org") {
10916
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10917
+ const extracted = await extractOrgText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10918
+ title = extracted.title?.trim() || title;
10919
+ extractedText = extracted.extractedText;
10920
+ extractionArtifact = extracted.artifact;
10921
+ } else if (sourceKind === "asciidoc") {
10922
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
10923
+ const extracted = await extractAsciiDocText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
10924
+ title = extracted.title?.trim() || title;
10925
+ extractedText = extracted.extractedText;
10926
+ extractionArtifact = extracted.artifact;
8618
10927
  } else if (sourceKind === "epub") {
8619
10928
  title = path12.basename(absoluteInput, path12.extname(absoluteInput));
8620
10929
  const extracted = await extractEpubChapters({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
@@ -8941,7 +11250,11 @@ async function collectInboxAttachmentRefs(inputDir, files) {
8941
11250
  for (const absolutePath of files) {
8942
11251
  const mimeType = guessMimeType(absolutePath);
8943
11252
  const detectionOptions = await localCodeDetectionOptions(absolutePath);
8944
- const sourceKind = inferKind(mimeType, absolutePath, detectionOptions);
11253
+ let sourceKind = inferKind(mimeType, absolutePath, detectionOptions);
11254
+ const lowerExt = path12.extname(absolutePath).toLowerCase();
11255
+ if ((lowerExt === ".html" || lowerExt === ".htm") && sourceKind === "code") {
11256
+ sourceKind = "html";
11257
+ }
8945
11258
  if (sourceKind !== "markdown" && sourceKind !== "html") {
8946
11259
  continue;
8947
11260
  }
@@ -9285,7 +11598,11 @@ async function importInbox(rootDir, inputDir) {
9285
11598
  const mimeType = guessMimeType(absolutePath);
9286
11599
  const detectionOptions = await localCodeDetectionOptions(absolutePath);
9287
11600
  let sourceKind = inferKind(mimeType, absolutePath, detectionOptions);
9288
- if (sourceKind === "binary" && path12.extname(absolutePath).toLowerCase() === ".zip") {
11601
+ const lowerExt = path12.extname(absolutePath).toLowerCase();
11602
+ if ((lowerExt === ".html" || lowerExt === ".htm") && sourceKind === "code") {
11603
+ sourceKind = "html";
11604
+ }
11605
+ if (sourceKind === "binary" && lowerExt === ".zip") {
9289
11606
  const bytes = await fs11.readFile(absolutePath);
9290
11607
  if (isSlackExportArchive(bytes)) {
9291
11608
  sourceKind = "chat_export";
@@ -18270,7 +20587,7 @@ async function bootstrapDemo(rootDir, input) {
18270
20587
  }
18271
20588
 
18272
20589
  // src/mcp.ts
18273
- var SERVER_VERSION = "0.6.8";
20590
+ var SERVER_VERSION = "0.7.1";
18274
20591
  async function createMcpServer(rootDir) {
18275
20592
  const server = new McpServer({
18276
20593
  name: "swarmvault",