@swarmvaultai/engine 0.6.8 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -3
- package/dist/index.js +2536 -219
- package/package.json +9 -1
package/dist/index.js
CHANGED
|
@@ -1434,6 +1434,7 @@ import path5 from "path";
|
|
|
1434
1434
|
var require2 = createRequire(import.meta.url);
|
|
1435
1435
|
var TREE_SITTER_RUNTIME_PACKAGE = "@vscode/tree-sitter-wasm";
|
|
1436
1436
|
var TREE_SITTER_EXTRA_GRAMMARS_PACKAGE = "tree-sitter-wasms";
|
|
1437
|
+
var SWIFT_TREE_SITTER_OPT_IN_ENV = "SWARMVAULT_ENABLE_SWIFT_TREE_SITTER";
|
|
1437
1438
|
var packageRootCache = /* @__PURE__ */ new Map();
|
|
1438
1439
|
var RATIONALE_MARKERS = ["NOTE:", "IMPORTANT:", "HACK:", "WHY:", "RATIONALE:"];
|
|
1439
1440
|
function stripKnownCommentPrefix(line) {
|
|
@@ -1464,7 +1465,16 @@ var grammarAssetByLanguage = {
|
|
|
1464
1465
|
cpp: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-cpp.wasm" },
|
|
1465
1466
|
php: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-php.wasm" },
|
|
1466
1467
|
ruby: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-ruby.wasm" },
|
|
1467
|
-
powershell: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-powershell.wasm" }
|
|
1468
|
+
powershell: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-powershell.wasm" },
|
|
1469
|
+
swift: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-swift.wasm" },
|
|
1470
|
+
elixir: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-elixir.wasm" },
|
|
1471
|
+
ocaml: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-ocaml.wasm" },
|
|
1472
|
+
objc: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-objc.wasm" },
|
|
1473
|
+
rescript: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-rescript.wasm" },
|
|
1474
|
+
solidity: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-solidity.wasm" },
|
|
1475
|
+
html: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-html.wasm" },
|
|
1476
|
+
css: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-css.wasm" },
|
|
1477
|
+
vue: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-vue.wasm" }
|
|
1468
1478
|
};
|
|
1469
1479
|
function resolvePackageRoot(packageName) {
|
|
1470
1480
|
const cached = packageRootCache.get(packageName);
|
|
@@ -1524,7 +1534,7 @@ function normalizeSymbolReference(value) {
|
|
|
1524
1534
|
}
|
|
1525
1535
|
function stripCodeExtension(filePath) {
|
|
1526
1536
|
return filePath.replace(
|
|
1527
|
-
/\.(?:[cm]?jsx?|tsx?|mts|cts|sh|bash|zsh|py|go|rs|java|kt|kts|scala|sc|dart|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i,
|
|
1537
|
+
/\.(?:[cm]?jsx?|tsx?|mts|cts|sh|bash|zsh|py|go|rs|java|kt|kts|scala|sc|dart|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx|swift|exs?|mli?|mm|resi?|sol|html?|css|vue)$/i,
|
|
1528
1538
|
""
|
|
1529
1539
|
);
|
|
1530
1540
|
}
|
|
@@ -1812,6 +1822,72 @@ function descendantTypeNames(node) {
|
|
|
1812
1822
|
function quotedPath(value) {
|
|
1813
1823
|
return value.replace(/^['"<]+|['">]+$/g, "").trim();
|
|
1814
1824
|
}
|
|
1825
|
+
function neutralizePreprocessorDirectives(content) {
|
|
1826
|
+
const lines = content.split("\n");
|
|
1827
|
+
const active = [];
|
|
1828
|
+
const isActive = () => active.every(Boolean);
|
|
1829
|
+
const directiveHead = (line) => {
|
|
1830
|
+
const trimmed = line.trimStart();
|
|
1831
|
+
if (trimmed[0] !== "#") {
|
|
1832
|
+
return void 0;
|
|
1833
|
+
}
|
|
1834
|
+
const rest = trimmed.slice(1).trimStart();
|
|
1835
|
+
const match = rest.match(/^([A-Za-z]+)/);
|
|
1836
|
+
return match?.[1]?.toLowerCase();
|
|
1837
|
+
};
|
|
1838
|
+
const out = [];
|
|
1839
|
+
for (const line of lines) {
|
|
1840
|
+
const head = directiveHead(line);
|
|
1841
|
+
if (head === "if" || head === "ifdef" || head === "ifndef") {
|
|
1842
|
+
active.push(isActive());
|
|
1843
|
+
out.push("");
|
|
1844
|
+
continue;
|
|
1845
|
+
}
|
|
1846
|
+
if (head === "elif") {
|
|
1847
|
+
if (active.length > 0) {
|
|
1848
|
+
active[active.length - 1] = false;
|
|
1849
|
+
}
|
|
1850
|
+
out.push("");
|
|
1851
|
+
continue;
|
|
1852
|
+
}
|
|
1853
|
+
if (head === "else") {
|
|
1854
|
+
if (active.length > 0) {
|
|
1855
|
+
active[active.length - 1] = false;
|
|
1856
|
+
}
|
|
1857
|
+
out.push("");
|
|
1858
|
+
continue;
|
|
1859
|
+
}
|
|
1860
|
+
if (head === "endif") {
|
|
1861
|
+
if (active.length > 0) {
|
|
1862
|
+
active.pop();
|
|
1863
|
+
}
|
|
1864
|
+
out.push("");
|
|
1865
|
+
continue;
|
|
1866
|
+
}
|
|
1867
|
+
if (!isActive()) {
|
|
1868
|
+
out.push("");
|
|
1869
|
+
continue;
|
|
1870
|
+
}
|
|
1871
|
+
out.push(line);
|
|
1872
|
+
}
|
|
1873
|
+
return out.join("\n");
|
|
1874
|
+
}
|
|
1875
|
+
function detectShellDialect(content) {
|
|
1876
|
+
const prefix = content.slice(0, 4096);
|
|
1877
|
+
if (/^#!\s*(?:\/usr\/bin\/env\s+)?zsh\b/m.test(prefix)) {
|
|
1878
|
+
return "zsh";
|
|
1879
|
+
}
|
|
1880
|
+
if (/^\s*#compdef\b/m.test(prefix)) {
|
|
1881
|
+
return "zsh";
|
|
1882
|
+
}
|
|
1883
|
+
if (/\$\{\([fFsq@%]/.test(prefix)) {
|
|
1884
|
+
return "zsh";
|
|
1885
|
+
}
|
|
1886
|
+
if (/\b(?:setopt|unsetopt|zmodload|compinit|autoload\s+-Uz)\b/.test(prefix)) {
|
|
1887
|
+
return "zsh";
|
|
1888
|
+
}
|
|
1889
|
+
return "bash";
|
|
1890
|
+
}
|
|
1815
1891
|
function diagnosticsFromTree(rootNode) {
|
|
1816
1892
|
if (!rootNode.hasError) {
|
|
1817
1893
|
return [];
|
|
@@ -1861,6 +1937,18 @@ function treeSitterCompatibilityDiagnostic(language, error) {
|
|
|
1861
1937
|
column: 1
|
|
1862
1938
|
};
|
|
1863
1939
|
}
|
|
1940
|
+
function swiftTreeSitterEnabled() {
|
|
1941
|
+
return process.env[SWIFT_TREE_SITTER_OPT_IN_ENV] === "1";
|
|
1942
|
+
}
|
|
1943
|
+
function swiftTreeSitterDisabledDiagnostic() {
|
|
1944
|
+
return {
|
|
1945
|
+
code: 9012,
|
|
1946
|
+
category: "warning",
|
|
1947
|
+
message: `Swift parser-backed analysis is disabled by default because the packaged tree-sitter grammar can trigger Node/V8 out-of-memory crashes during WASM compilation. Set ${SWIFT_TREE_SITTER_OPT_IN_ENV}=1 to opt in anyway.`,
|
|
1948
|
+
line: 1,
|
|
1949
|
+
column: 1
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
1864
1952
|
function flattenPythonDottedName(node) {
|
|
1865
1953
|
if (!node) {
|
|
1866
1954
|
return "";
|
|
@@ -2691,24 +2779,20 @@ function zigDeclarationKind(node) {
|
|
|
2691
2779
|
}
|
|
2692
2780
|
return void 0;
|
|
2693
2781
|
}
|
|
2694
|
-
function bashCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
2782
|
+
function bashCodeAnalysis(manifest, rootNode, diagnostics, rawContent) {
|
|
2695
2783
|
const imports = [];
|
|
2696
2784
|
const draftSymbols = [];
|
|
2697
2785
|
const exportLabels = [];
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
const parsed = parseBashImport(child);
|
|
2704
|
-
if (parsed) {
|
|
2705
|
-
imports.push(parsed);
|
|
2706
|
-
}
|
|
2707
|
-
continue;
|
|
2708
|
-
}
|
|
2709
|
-
if (child.type !== "function_definition") {
|
|
2710
|
-
continue;
|
|
2786
|
+
const commandNodes = rootNode.descendantsOfType("command").filter((node) => node !== null);
|
|
2787
|
+
for (const command of commandNodes) {
|
|
2788
|
+
const parsed = parseBashImport(command);
|
|
2789
|
+
if (parsed) {
|
|
2790
|
+
imports.push(parsed);
|
|
2711
2791
|
}
|
|
2792
|
+
}
|
|
2793
|
+
const functionNodes = rootNode.descendantsOfType("function_definition").filter((node) => node !== null);
|
|
2794
|
+
const functionByName = /* @__PURE__ */ new Map();
|
|
2795
|
+
for (const child of functionNodes) {
|
|
2712
2796
|
const name = nodeText(child.childForFieldName("name") ?? child.namedChildren.at(0) ?? null).trim();
|
|
2713
2797
|
if (!name) {
|
|
2714
2798
|
continue;
|
|
@@ -2724,16 +2808,44 @@ function bashCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
|
2724
2808
|
bodyText: nodeText(child.childForFieldName("body") ?? findNamedChild(child, "compound_statement"))
|
|
2725
2809
|
});
|
|
2726
2810
|
exportLabels.push(name);
|
|
2811
|
+
if (!functionByName.has(name)) {
|
|
2812
|
+
functionByName.set(name, child);
|
|
2813
|
+
}
|
|
2727
2814
|
}
|
|
2728
2815
|
for (let index = 0; index < draftSymbols.length; index += 1) {
|
|
2729
|
-
const
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
draftSymbols[index].callNames = bashCallNamesFromBody(
|
|
2816
|
+
const symbol = draftSymbols[index];
|
|
2817
|
+
const functionNode = functionByName.get(symbol.name);
|
|
2818
|
+
symbol.callNames = bashCallNamesFromBody(
|
|
2733
2819
|
functionNode?.childForFieldName("body") ?? findNamedChild(functionNode, "compound_statement"),
|
|
2734
|
-
|
|
2820
|
+
symbol.name
|
|
2735
2821
|
);
|
|
2736
2822
|
}
|
|
2823
|
+
if (draftSymbols.length === 0 && rawContent) {
|
|
2824
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2825
|
+
for (const line of rawContent.split("\n")) {
|
|
2826
|
+
const trimmed = line.trimStart();
|
|
2827
|
+
let match = trimmed.match(/^function\s+([A-Za-z_][\w-]*)\s*(?:\(\))?/);
|
|
2828
|
+
if (!match) {
|
|
2829
|
+
match = trimmed.match(/^([A-Za-z_][\w-]*)\s*\(\)/);
|
|
2830
|
+
}
|
|
2831
|
+
const name = match?.[1];
|
|
2832
|
+
if (!name || seen.has(name)) {
|
|
2833
|
+
continue;
|
|
2834
|
+
}
|
|
2835
|
+
seen.add(name);
|
|
2836
|
+
draftSymbols.push({
|
|
2837
|
+
name,
|
|
2838
|
+
kind: "function",
|
|
2839
|
+
signature: singleLineSignature(trimmed),
|
|
2840
|
+
exported: true,
|
|
2841
|
+
callNames: [],
|
|
2842
|
+
extendsNames: [],
|
|
2843
|
+
implementsNames: [],
|
|
2844
|
+
bodyText: ""
|
|
2845
|
+
});
|
|
2846
|
+
exportLabels.push(name);
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2737
2849
|
return finalizeCodeAnalysis(manifest, "bash", imports, draftSymbols, exportLabels, diagnostics);
|
|
2738
2850
|
}
|
|
2739
2851
|
function dartCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
@@ -3518,7 +3630,23 @@ function csharpCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
|
3518
3630
|
if (child.type === "file_scoped_namespace_declaration" || child.type === "namespace_declaration") {
|
|
3519
3631
|
namespaceName = nodeText(child.childForFieldName("name")) || namespaceName;
|
|
3520
3632
|
if (child.type === "namespace_declaration") {
|
|
3521
|
-
|
|
3633
|
+
const nameNode = child.childForFieldName("name");
|
|
3634
|
+
const namespaceMembers = [];
|
|
3635
|
+
for (const directChild of child.namedChildren) {
|
|
3636
|
+
if (!directChild || directChild === nameNode) {
|
|
3637
|
+
continue;
|
|
3638
|
+
}
|
|
3639
|
+
if (directChild.type === "declaration_list") {
|
|
3640
|
+
for (const inner of directChild.namedChildren) {
|
|
3641
|
+
if (inner) {
|
|
3642
|
+
namespaceMembers.push(inner);
|
|
3643
|
+
}
|
|
3644
|
+
}
|
|
3645
|
+
continue;
|
|
3646
|
+
}
|
|
3647
|
+
namespaceMembers.push(directChild);
|
|
3648
|
+
}
|
|
3649
|
+
for (const nested of namespaceMembers) {
|
|
3522
3650
|
if (nested && nested !== child.childForFieldName("name")) {
|
|
3523
3651
|
if (["class_declaration", "interface_declaration", "enum_declaration", "struct_declaration", "record_declaration"].includes(
|
|
3524
3652
|
nested.type
|
|
@@ -3806,64 +3934,174 @@ function powershellCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
|
3806
3934
|
}
|
|
3807
3935
|
return finalizeCodeAnalysis(manifest, "powershell", imports, draftSymbols, exportLabels, diagnostics);
|
|
3808
3936
|
}
|
|
3809
|
-
function
|
|
3937
|
+
function parseSwiftImport(node) {
|
|
3938
|
+
const identifierNode = findNamedChild(node, "identifier");
|
|
3939
|
+
if (!identifierNode) {
|
|
3940
|
+
return void 0;
|
|
3941
|
+
}
|
|
3942
|
+
const specifier = identifierNode.text.trim();
|
|
3943
|
+
if (!specifier) {
|
|
3944
|
+
return void 0;
|
|
3945
|
+
}
|
|
3946
|
+
return {
|
|
3947
|
+
specifier,
|
|
3948
|
+
importedSymbols: [],
|
|
3949
|
+
// Swift does not have file-local relative imports; every `import` references
|
|
3950
|
+
// an external module (Foundation, UIKit, a SwiftPM package product, or the
|
|
3951
|
+
// current target's own module). Mark them all as external so the dependency
|
|
3952
|
+
// aggregator groups them with other package-level graph edges.
|
|
3953
|
+
isExternal: true,
|
|
3954
|
+
reExport: false
|
|
3955
|
+
};
|
|
3956
|
+
}
|
|
3957
|
+
function swiftDeclarationKindFromKeyword(node) {
|
|
3958
|
+
for (const child of node.children) {
|
|
3959
|
+
if (!child) {
|
|
3960
|
+
continue;
|
|
3961
|
+
}
|
|
3962
|
+
if (child.type === "struct") {
|
|
3963
|
+
return "struct";
|
|
3964
|
+
}
|
|
3965
|
+
if (child.type === "enum") {
|
|
3966
|
+
return "enum";
|
|
3967
|
+
}
|
|
3968
|
+
if (child.type === "class") {
|
|
3969
|
+
return "class";
|
|
3970
|
+
}
|
|
3971
|
+
}
|
|
3972
|
+
return "class";
|
|
3973
|
+
}
|
|
3974
|
+
function swiftVisibilityKeyword(node) {
|
|
3975
|
+
const modifiers = findNamedChild(node, "modifiers");
|
|
3976
|
+
if (!modifiers) {
|
|
3977
|
+
return void 0;
|
|
3978
|
+
}
|
|
3979
|
+
const visibility = findNamedChild(modifiers, "visibility_modifier");
|
|
3980
|
+
if (!visibility) {
|
|
3981
|
+
return void 0;
|
|
3982
|
+
}
|
|
3983
|
+
for (const kw of visibility.children) {
|
|
3984
|
+
if (!kw) {
|
|
3985
|
+
continue;
|
|
3986
|
+
}
|
|
3987
|
+
if (kw.type === "public" || kw.type === "private" || kw.type === "fileprivate" || kw.type === "internal" || kw.type === "open") {
|
|
3988
|
+
return kw.type;
|
|
3989
|
+
}
|
|
3990
|
+
}
|
|
3991
|
+
return void 0;
|
|
3992
|
+
}
|
|
3993
|
+
function swiftExported(node) {
|
|
3994
|
+
const visibility = swiftVisibilityKeyword(node);
|
|
3995
|
+
return visibility !== "private" && visibility !== "fileprivate";
|
|
3996
|
+
}
|
|
3997
|
+
function swiftCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
3810
3998
|
const imports = [];
|
|
3811
3999
|
const draftSymbols = [];
|
|
3812
4000
|
const exportLabels = [];
|
|
3813
|
-
const
|
|
3814
|
-
|
|
3815
|
-
|
|
4001
|
+
const recordParentTypes = (declaration) => {
|
|
4002
|
+
const specifiers = declaration.namedChildren.filter((item) => item?.type === "inheritance_specifier");
|
|
4003
|
+
if (specifiers.length === 0) {
|
|
4004
|
+
return [];
|
|
3816
4005
|
}
|
|
3817
|
-
const
|
|
3818
|
-
|
|
3819
|
-
|
|
4006
|
+
const ordered = [];
|
|
4007
|
+
for (const specifier of specifiers) {
|
|
4008
|
+
const primary = findNamedChild(specifier, "user_type") ?? findNamedChild(specifier, "type_identifier") ?? specifier.namedChildren.find((item) => item !== null) ?? null;
|
|
4009
|
+
if (!primary) {
|
|
4010
|
+
continue;
|
|
4011
|
+
}
|
|
4012
|
+
const name = normalizeSymbolReference(primary.text);
|
|
4013
|
+
if (name) {
|
|
4014
|
+
ordered.push(name);
|
|
4015
|
+
}
|
|
3820
4016
|
}
|
|
3821
|
-
return
|
|
4017
|
+
return uniqueBy(ordered, (item) => item);
|
|
3822
4018
|
};
|
|
3823
4019
|
for (const child of rootNode.namedChildren) {
|
|
3824
4020
|
if (!child) {
|
|
3825
4021
|
continue;
|
|
3826
4022
|
}
|
|
3827
|
-
if (child.type === "
|
|
3828
|
-
const parsed =
|
|
4023
|
+
if (child.type === "import_declaration") {
|
|
4024
|
+
const parsed = parseSwiftImport(child);
|
|
3829
4025
|
if (parsed) {
|
|
3830
4026
|
imports.push(parsed);
|
|
3831
4027
|
}
|
|
3832
4028
|
continue;
|
|
3833
4029
|
}
|
|
3834
|
-
if (
|
|
3835
|
-
const name = extractIdentifier(child
|
|
4030
|
+
if (child.type === "protocol_declaration") {
|
|
4031
|
+
const name = extractIdentifier(findNamedChild(child, "type_identifier"));
|
|
3836
4032
|
if (!name) {
|
|
3837
4033
|
continue;
|
|
3838
4034
|
}
|
|
3839
|
-
const
|
|
3840
|
-
const
|
|
3841
|
-
|
|
3842
|
-
|
|
3843
|
-
|
|
3844
|
-
|
|
3845
|
-
|
|
4035
|
+
const parents = recordParentTypes(child);
|
|
4036
|
+
const exported = swiftExported(child);
|
|
4037
|
+
draftSymbols.push({
|
|
4038
|
+
name,
|
|
4039
|
+
kind: "interface",
|
|
4040
|
+
signature: singleLineSignature(child.text),
|
|
4041
|
+
exported,
|
|
4042
|
+
callNames: [],
|
|
4043
|
+
extendsNames: parents,
|
|
4044
|
+
implementsNames: [],
|
|
4045
|
+
bodyText: nodeText(findNamedChild(child, "protocol_body")) || child.text
|
|
4046
|
+
});
|
|
4047
|
+
if (exported) {
|
|
4048
|
+
exportLabels.push(name);
|
|
4049
|
+
}
|
|
4050
|
+
continue;
|
|
4051
|
+
}
|
|
4052
|
+
if (child.type === "class_declaration") {
|
|
4053
|
+
const name = extractIdentifier(findNamedChild(child, "type_identifier"));
|
|
4054
|
+
if (!name) {
|
|
4055
|
+
continue;
|
|
4056
|
+
}
|
|
4057
|
+
const kind = swiftDeclarationKindFromKeyword(child);
|
|
4058
|
+
const parentTypes = recordParentTypes(child);
|
|
4059
|
+
const extendsNames = kind === "class" && parentTypes.length > 0 ? [parentTypes[0]] : [];
|
|
4060
|
+
const implementsNames = kind === "class" ? parentTypes.slice(1) : parentTypes;
|
|
4061
|
+
const exported = swiftExported(child);
|
|
4062
|
+
const body = findNamedChild(child, "class_body") ?? findNamedChild(child, "enum_class_body");
|
|
3846
4063
|
draftSymbols.push({
|
|
3847
4064
|
name,
|
|
3848
4065
|
kind,
|
|
3849
4066
|
signature: singleLineSignature(child.text),
|
|
3850
4067
|
exported,
|
|
3851
4068
|
callNames: [],
|
|
3852
|
-
extendsNames
|
|
4069
|
+
extendsNames,
|
|
4070
|
+
implementsNames,
|
|
4071
|
+
bodyText: nodeText(body) || child.text
|
|
4072
|
+
});
|
|
4073
|
+
if (exported) {
|
|
4074
|
+
exportLabels.push(name);
|
|
4075
|
+
}
|
|
4076
|
+
continue;
|
|
4077
|
+
}
|
|
4078
|
+
if (child.type === "typealias_declaration") {
|
|
4079
|
+
const name = extractIdentifier(findNamedChild(child, "type_identifier"));
|
|
4080
|
+
if (!name) {
|
|
4081
|
+
continue;
|
|
4082
|
+
}
|
|
4083
|
+
const exported = swiftExported(child);
|
|
4084
|
+
draftSymbols.push({
|
|
4085
|
+
name,
|
|
4086
|
+
kind: "type_alias",
|
|
4087
|
+
signature: singleLineSignature(child.text),
|
|
4088
|
+
exported,
|
|
4089
|
+
callNames: [],
|
|
4090
|
+
extendsNames: [],
|
|
3853
4091
|
implementsNames: [],
|
|
3854
|
-
bodyText:
|
|
4092
|
+
bodyText: child.text
|
|
3855
4093
|
});
|
|
3856
4094
|
if (exported) {
|
|
3857
4095
|
exportLabels.push(name);
|
|
3858
4096
|
}
|
|
3859
4097
|
continue;
|
|
3860
4098
|
}
|
|
3861
|
-
if (child.type === "
|
|
3862
|
-
const name =
|
|
4099
|
+
if (child.type === "function_declaration") {
|
|
4100
|
+
const name = extractIdentifier(findNamedChild(child, "simple_identifier") ?? findNamedChild(child, "identifier"));
|
|
3863
4101
|
if (!name) {
|
|
3864
4102
|
continue;
|
|
3865
4103
|
}
|
|
3866
|
-
const exported =
|
|
4104
|
+
const exported = swiftExported(child);
|
|
3867
4105
|
draftSymbols.push({
|
|
3868
4106
|
name,
|
|
3869
4107
|
kind: "function",
|
|
@@ -3872,87 +4110,1163 @@ function cFamilyCodeAnalysis(manifest, language, rootNode, diagnostics) {
|
|
|
3872
4110
|
callNames: [],
|
|
3873
4111
|
extendsNames: [],
|
|
3874
4112
|
implementsNames: [],
|
|
3875
|
-
bodyText: nodeText(child
|
|
4113
|
+
bodyText: nodeText(findNamedChild(child, "function_body")) || child.text
|
|
3876
4114
|
});
|
|
3877
4115
|
if (exported) {
|
|
3878
4116
|
exportLabels.push(name);
|
|
3879
4117
|
}
|
|
4118
|
+
continue;
|
|
4119
|
+
}
|
|
4120
|
+
if (child.type === "property_declaration") {
|
|
4121
|
+
const exported = swiftExported(child);
|
|
4122
|
+
const patterns = child.namedChildren.filter((item) => item?.type === "pattern");
|
|
4123
|
+
for (const pattern of patterns) {
|
|
4124
|
+
const name = extractIdentifier(findNamedChild(pattern, "simple_identifier") ?? pattern.namedChildren[0] ?? null);
|
|
4125
|
+
if (!name) {
|
|
4126
|
+
continue;
|
|
4127
|
+
}
|
|
4128
|
+
draftSymbols.push({
|
|
4129
|
+
name,
|
|
4130
|
+
kind: "variable",
|
|
4131
|
+
signature: singleLineSignature(child.text),
|
|
4132
|
+
exported,
|
|
4133
|
+
callNames: [],
|
|
4134
|
+
extendsNames: [],
|
|
4135
|
+
implementsNames: [],
|
|
4136
|
+
bodyText: child.text
|
|
4137
|
+
});
|
|
4138
|
+
if (exported) {
|
|
4139
|
+
exportLabels.push(name);
|
|
4140
|
+
}
|
|
4141
|
+
}
|
|
3880
4142
|
}
|
|
3881
4143
|
}
|
|
3882
|
-
return finalizeCodeAnalysis(manifest,
|
|
4144
|
+
return finalizeCodeAnalysis(manifest, "swift", imports, draftSymbols, exportLabels, diagnostics);
|
|
3883
4145
|
}
|
|
3884
|
-
|
|
3885
|
-
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
parser.setLanguage(await loadLanguage(language));
|
|
3891
|
-
tree = parser.parse(content);
|
|
3892
|
-
} catch (error) {
|
|
3893
|
-
return {
|
|
3894
|
-
code: finalizeCodeAnalysis(manifest, language, [], [], [], [treeSitterCompatibilityDiagnostic(language, error)]),
|
|
3895
|
-
rationales: []
|
|
3896
|
-
};
|
|
4146
|
+
function elixirCallIdentifier(callNode) {
|
|
4147
|
+
return findNamedChild(callNode, "identifier")?.text.trim() || void 0;
|
|
4148
|
+
}
|
|
4149
|
+
function elixirFirstModulePath(argumentsNode) {
|
|
4150
|
+
if (!argumentsNode) {
|
|
4151
|
+
return void 0;
|
|
3897
4152
|
}
|
|
3898
|
-
|
|
3899
|
-
|
|
3900
|
-
|
|
3901
|
-
|
|
3902
|
-
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
|
|
3907
|
-
|
|
3908
|
-
code: 9e3,
|
|
3909
|
-
category: "error",
|
|
3910
|
-
message: `Failed to parse ${language} source.`,
|
|
3911
|
-
line: 1,
|
|
3912
|
-
column: 1
|
|
3913
|
-
}
|
|
3914
|
-
]
|
|
3915
|
-
),
|
|
3916
|
-
rationales: []
|
|
3917
|
-
};
|
|
4153
|
+
for (const child of argumentsNode.namedChildren) {
|
|
4154
|
+
if (!child) {
|
|
4155
|
+
continue;
|
|
4156
|
+
}
|
|
4157
|
+
if (child.type === "alias" || child.type === "identifier") {
|
|
4158
|
+
const text = child.text.trim();
|
|
4159
|
+
if (text) {
|
|
4160
|
+
return text;
|
|
4161
|
+
}
|
|
4162
|
+
}
|
|
3918
4163
|
}
|
|
3919
|
-
|
|
3920
|
-
|
|
3921
|
-
|
|
3922
|
-
|
|
3923
|
-
|
|
3924
|
-
|
|
3925
|
-
|
|
3926
|
-
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
|
|
3936
|
-
|
|
3937
|
-
|
|
3938
|
-
|
|
3939
|
-
|
|
3940
|
-
|
|
3941
|
-
|
|
3942
|
-
|
|
3943
|
-
|
|
3944
|
-
|
|
3945
|
-
|
|
3946
|
-
|
|
3947
|
-
|
|
3948
|
-
|
|
3949
|
-
|
|
3950
|
-
|
|
3951
|
-
|
|
3952
|
-
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
|
|
4164
|
+
return void 0;
|
|
4165
|
+
}
|
|
4166
|
+
function elixirFunctionNameFromArguments(argumentsNode) {
|
|
4167
|
+
if (!argumentsNode) {
|
|
4168
|
+
return void 0;
|
|
4169
|
+
}
|
|
4170
|
+
const first = argumentsNode.namedChildren.find((item) => item !== null);
|
|
4171
|
+
if (!first) {
|
|
4172
|
+
return void 0;
|
|
4173
|
+
}
|
|
4174
|
+
if (first.type === "call") {
|
|
4175
|
+
const inner = findNamedChild(first, "identifier");
|
|
4176
|
+
return inner?.text.trim() || void 0;
|
|
4177
|
+
}
|
|
4178
|
+
if (first.type === "identifier") {
|
|
4179
|
+
return first.text.trim() || void 0;
|
|
4180
|
+
}
|
|
4181
|
+
return void 0;
|
|
4182
|
+
}
|
|
4183
|
+
var ELIXIR_IMPORT_MACROS = /* @__PURE__ */ new Set(["alias", "import", "require", "use"]);
|
|
4184
|
+
var ELIXIR_PUBLIC_DEF_MACROS = /* @__PURE__ */ new Set(["def", "defmacro"]);
|
|
4185
|
+
var ELIXIR_PRIVATE_DEF_MACROS = /* @__PURE__ */ new Set(["defp", "defmacrop"]);
|
|
4186
|
+
function elixirCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4187
|
+
const imports = [];
|
|
4188
|
+
const draftSymbols = [];
|
|
4189
|
+
const exportLabels = [];
|
|
4190
|
+
let primaryModuleName;
|
|
4191
|
+
for (const topCall of rootNode.namedChildren) {
|
|
4192
|
+
if (!topCall || topCall.type !== "call") {
|
|
4193
|
+
continue;
|
|
4194
|
+
}
|
|
4195
|
+
const macroName = elixirCallIdentifier(topCall);
|
|
4196
|
+
if (macroName !== "defmodule" && macroName !== "defprotocol") {
|
|
4197
|
+
continue;
|
|
4198
|
+
}
|
|
4199
|
+
const moduleArgs = findNamedChild(topCall, "arguments");
|
|
4200
|
+
const moduleName = elixirFirstModulePath(moduleArgs);
|
|
4201
|
+
if (!moduleName) {
|
|
4202
|
+
continue;
|
|
4203
|
+
}
|
|
4204
|
+
const moduleKind = macroName === "defprotocol" ? "interface" : "class";
|
|
4205
|
+
const moduleHeaderLine = topCall.text.split("\n")[0] ?? topCall.text;
|
|
4206
|
+
if (primaryModuleName === void 0) {
|
|
4207
|
+
primaryModuleName = moduleName;
|
|
4208
|
+
}
|
|
4209
|
+
draftSymbols.push({
|
|
4210
|
+
name: moduleName,
|
|
4211
|
+
kind: moduleKind,
|
|
4212
|
+
signature: singleLineSignature(moduleHeaderLine),
|
|
4213
|
+
// Modules and protocols are always module-level public in Elixir.
|
|
4214
|
+
exported: true,
|
|
4215
|
+
callNames: [],
|
|
4216
|
+
extendsNames: [],
|
|
4217
|
+
implementsNames: [],
|
|
4218
|
+
bodyText: topCall.text
|
|
4219
|
+
});
|
|
4220
|
+
exportLabels.push(moduleName);
|
|
4221
|
+
const doBlock = findNamedChild(topCall, "do_block");
|
|
4222
|
+
if (!doBlock) {
|
|
4223
|
+
continue;
|
|
4224
|
+
}
|
|
4225
|
+
for (const innerNode of doBlock.namedChildren) {
|
|
4226
|
+
if (!innerNode || innerNode.type !== "call") {
|
|
4227
|
+
continue;
|
|
4228
|
+
}
|
|
4229
|
+
const innerMacro = elixirCallIdentifier(innerNode);
|
|
4230
|
+
if (!innerMacro) {
|
|
4231
|
+
continue;
|
|
4232
|
+
}
|
|
4233
|
+
if (ELIXIR_IMPORT_MACROS.has(innerMacro)) {
|
|
4234
|
+
const importArgs = findNamedChild(innerNode, "arguments");
|
|
4235
|
+
const modulePath = elixirFirstModulePath(importArgs);
|
|
4236
|
+
if (!modulePath) {
|
|
4237
|
+
continue;
|
|
4238
|
+
}
|
|
4239
|
+
imports.push({
|
|
4240
|
+
specifier: modulePath,
|
|
4241
|
+
importedSymbols: [],
|
|
4242
|
+
// Elixir imports always target a compiled BEAM module; there is no
|
|
4243
|
+
// notion of "file-local" relative imports the way Python or JS use them.
|
|
4244
|
+
// Treat every entry as external.
|
|
4245
|
+
isExternal: true,
|
|
4246
|
+
reExport: false
|
|
4247
|
+
});
|
|
4248
|
+
continue;
|
|
4249
|
+
}
|
|
4250
|
+
if (ELIXIR_PUBLIC_DEF_MACROS.has(innerMacro) || ELIXIR_PRIVATE_DEF_MACROS.has(innerMacro)) {
|
|
4251
|
+
const innerArgs = findNamedChild(innerNode, "arguments");
|
|
4252
|
+
const fnName = elixirFunctionNameFromArguments(innerArgs);
|
|
4253
|
+
if (!fnName) {
|
|
4254
|
+
continue;
|
|
4255
|
+
}
|
|
4256
|
+
const qualifiedName = `${moduleName}.${fnName}`;
|
|
4257
|
+
const exported = ELIXIR_PUBLIC_DEF_MACROS.has(innerMacro);
|
|
4258
|
+
const headerLine = innerNode.text.split("\n")[0] ?? innerNode.text;
|
|
4259
|
+
draftSymbols.push({
|
|
4260
|
+
name: qualifiedName,
|
|
4261
|
+
kind: "function",
|
|
4262
|
+
signature: singleLineSignature(headerLine),
|
|
4263
|
+
exported,
|
|
4264
|
+
callNames: [],
|
|
4265
|
+
extendsNames: [],
|
|
4266
|
+
implementsNames: [],
|
|
4267
|
+
bodyText: nodeText(findNamedChild(innerNode, "do_block")) || innerNode.text
|
|
4268
|
+
});
|
|
4269
|
+
if (exported) {
|
|
4270
|
+
exportLabels.push(qualifiedName);
|
|
4271
|
+
}
|
|
4272
|
+
}
|
|
4273
|
+
}
|
|
4274
|
+
}
|
|
4275
|
+
return finalizeCodeAnalysis(manifest, "elixir", imports, draftSymbols, exportLabels, diagnostics, {
|
|
4276
|
+
moduleName: primaryModuleName
|
|
4277
|
+
});
|
|
4278
|
+
}
|
|
4279
|
+
function parseOCamlOpen(node) {
|
|
4280
|
+
const modulePath = findNamedChild(node, "module_path");
|
|
4281
|
+
if (!modulePath) {
|
|
4282
|
+
return void 0;
|
|
4283
|
+
}
|
|
4284
|
+
const specifier = modulePath.text.trim();
|
|
4285
|
+
if (!specifier) {
|
|
4286
|
+
return void 0;
|
|
4287
|
+
}
|
|
4288
|
+
return {
|
|
4289
|
+
specifier,
|
|
4290
|
+
importedSymbols: [],
|
|
4291
|
+
// Every OCaml `open` references a compiled module; there is no file-local
|
|
4292
|
+
// "./sibling" form. Classify as external and let resolveCodeImport's single-
|
|
4293
|
+
// candidate short-circuit promote it to local when an alias matches.
|
|
4294
|
+
isExternal: true,
|
|
4295
|
+
reExport: false
|
|
4296
|
+
};
|
|
4297
|
+
}
|
|
4298
|
+
function ocamlValueBindingKind(letBinding) {
|
|
4299
|
+
if (!letBinding) {
|
|
4300
|
+
return void 0;
|
|
4301
|
+
}
|
|
4302
|
+
const hasParameter = letBinding.namedChildren.some((child) => child?.type === "parameter");
|
|
4303
|
+
return hasParameter ? "function" : "variable";
|
|
4304
|
+
}
|
|
4305
|
+
function ocamlTypeKind(typeBinding) {
|
|
4306
|
+
if (!typeBinding) {
|
|
4307
|
+
return "type_alias";
|
|
4308
|
+
}
|
|
4309
|
+
for (const child of typeBinding.namedChildren) {
|
|
4310
|
+
if (!child) {
|
|
4311
|
+
continue;
|
|
4312
|
+
}
|
|
4313
|
+
if (child.type === "record_declaration") {
|
|
4314
|
+
return "struct";
|
|
4315
|
+
}
|
|
4316
|
+
if (child.type === "variant_declaration") {
|
|
4317
|
+
return "enum";
|
|
4318
|
+
}
|
|
4319
|
+
}
|
|
4320
|
+
return "type_alias";
|
|
4321
|
+
}
|
|
4322
|
+
function ocamlCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4323
|
+
const imports = [];
|
|
4324
|
+
const draftSymbols = [];
|
|
4325
|
+
const exportLabels = [];
|
|
4326
|
+
for (const child of rootNode.namedChildren) {
|
|
4327
|
+
if (!child) {
|
|
4328
|
+
continue;
|
|
4329
|
+
}
|
|
4330
|
+
if (child.type === "open_module") {
|
|
4331
|
+
const parsed = parseOCamlOpen(child);
|
|
4332
|
+
if (parsed) {
|
|
4333
|
+
imports.push(parsed);
|
|
4334
|
+
}
|
|
4335
|
+
continue;
|
|
4336
|
+
}
|
|
4337
|
+
if (child.type === "module_definition") {
|
|
4338
|
+
const binding = findNamedChild(child, "module_binding");
|
|
4339
|
+
const moduleNameNode = binding ? findNamedChild(binding, "module_name") : null;
|
|
4340
|
+
const name = moduleNameNode?.text.trim();
|
|
4341
|
+
if (!name) {
|
|
4342
|
+
continue;
|
|
4343
|
+
}
|
|
4344
|
+
draftSymbols.push({
|
|
4345
|
+
name,
|
|
4346
|
+
kind: "class",
|
|
4347
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4348
|
+
// OCaml's `let`/`module` bindings are exported from the containing
|
|
4349
|
+
// compilation unit unless an explicit `.mli` interface hides them.
|
|
4350
|
+
// Treat everything defined in a `.ml` file as exported; consumers who
|
|
4351
|
+
// want hiding should rely on the downstream interface-file merge.
|
|
4352
|
+
exported: true,
|
|
4353
|
+
callNames: [],
|
|
4354
|
+
extendsNames: [],
|
|
4355
|
+
implementsNames: [],
|
|
4356
|
+
bodyText: nodeText(findNamedChild(binding, "structure")) || child.text
|
|
4357
|
+
});
|
|
4358
|
+
exportLabels.push(name);
|
|
4359
|
+
continue;
|
|
4360
|
+
}
|
|
4361
|
+
if (child.type === "module_type_definition") {
|
|
4362
|
+
const nameNode = findNamedChild(child, "module_type_name");
|
|
4363
|
+
const name = nameNode?.text.trim();
|
|
4364
|
+
if (!name) {
|
|
4365
|
+
continue;
|
|
4366
|
+
}
|
|
4367
|
+
draftSymbols.push({
|
|
4368
|
+
name,
|
|
4369
|
+
kind: "interface",
|
|
4370
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4371
|
+
exported: true,
|
|
4372
|
+
callNames: [],
|
|
4373
|
+
extendsNames: [],
|
|
4374
|
+
implementsNames: [],
|
|
4375
|
+
bodyText: nodeText(findNamedChild(child, "signature")) || child.text
|
|
4376
|
+
});
|
|
4377
|
+
exportLabels.push(name);
|
|
4378
|
+
continue;
|
|
4379
|
+
}
|
|
4380
|
+
if (child.type === "type_definition") {
|
|
4381
|
+
const binding = findNamedChild(child, "type_binding");
|
|
4382
|
+
const typeConstructorNode = binding ? findNamedChild(binding, "type_constructor") : null;
|
|
4383
|
+
const name = typeConstructorNode?.text.trim();
|
|
4384
|
+
if (!name) {
|
|
4385
|
+
continue;
|
|
4386
|
+
}
|
|
4387
|
+
const kind = ocamlTypeKind(binding);
|
|
4388
|
+
draftSymbols.push({
|
|
4389
|
+
name,
|
|
4390
|
+
kind,
|
|
4391
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4392
|
+
exported: true,
|
|
4393
|
+
callNames: [],
|
|
4394
|
+
extendsNames: [],
|
|
4395
|
+
implementsNames: [],
|
|
4396
|
+
bodyText: child.text
|
|
4397
|
+
});
|
|
4398
|
+
exportLabels.push(name);
|
|
4399
|
+
continue;
|
|
4400
|
+
}
|
|
4401
|
+
if (child.type === "value_definition") {
|
|
4402
|
+
const binding = findNamedChild(child, "let_binding");
|
|
4403
|
+
if (!binding) {
|
|
4404
|
+
continue;
|
|
4405
|
+
}
|
|
4406
|
+
const valueNameNode = findNamedChild(binding, "value_name");
|
|
4407
|
+
const name = valueNameNode?.text.trim();
|
|
4408
|
+
if (!name) {
|
|
4409
|
+
continue;
|
|
4410
|
+
}
|
|
4411
|
+
const kind = ocamlValueBindingKind(binding) ?? "function";
|
|
4412
|
+
draftSymbols.push({
|
|
4413
|
+
name,
|
|
4414
|
+
kind,
|
|
4415
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4416
|
+
exported: true,
|
|
4417
|
+
callNames: [],
|
|
4418
|
+
extendsNames: [],
|
|
4419
|
+
implementsNames: [],
|
|
4420
|
+
bodyText: child.text
|
|
4421
|
+
});
|
|
4422
|
+
exportLabels.push(name);
|
|
4423
|
+
}
|
|
4424
|
+
}
|
|
4425
|
+
return finalizeCodeAnalysis(manifest, "ocaml", imports, draftSymbols, exportLabels, diagnostics);
|
|
4426
|
+
}
|
|
4427
|
+
function objcCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4428
|
+
const imports = [];
|
|
4429
|
+
const draftSymbols = [];
|
|
4430
|
+
const exportLabels = [];
|
|
4431
|
+
const declaredClassNames = /* @__PURE__ */ new Set();
|
|
4432
|
+
const functionNameFromDeclarator = (node) => {
|
|
4433
|
+
if (!node) {
|
|
4434
|
+
return void 0;
|
|
4435
|
+
}
|
|
4436
|
+
const declarator = node.childForFieldName("declarator");
|
|
4437
|
+
if (declarator) {
|
|
4438
|
+
return functionNameFromDeclarator(declarator);
|
|
4439
|
+
}
|
|
4440
|
+
return extractIdentifier(node);
|
|
4441
|
+
};
|
|
4442
|
+
for (const child of rootNode.namedChildren) {
|
|
4443
|
+
if (!child) {
|
|
4444
|
+
continue;
|
|
4445
|
+
}
|
|
4446
|
+
if (child.type === "preproc_include") {
|
|
4447
|
+
const parsed = parseCppInclude(child);
|
|
4448
|
+
if (parsed) {
|
|
4449
|
+
imports.push(parsed);
|
|
4450
|
+
}
|
|
4451
|
+
continue;
|
|
4452
|
+
}
|
|
4453
|
+
if (child.type === "protocol_declaration") {
|
|
4454
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4455
|
+
const name = nameNode?.text.trim();
|
|
4456
|
+
if (!name) {
|
|
4457
|
+
continue;
|
|
4458
|
+
}
|
|
4459
|
+
const refList = findNamedChild(child, "protocol_reference_list");
|
|
4460
|
+
const parents = refList ? uniqueBy(
|
|
4461
|
+
refList.namedChildren.filter((item) => item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
|
|
4462
|
+
(item) => item
|
|
4463
|
+
) : [];
|
|
4464
|
+
draftSymbols.push({
|
|
4465
|
+
name,
|
|
4466
|
+
kind: "interface",
|
|
4467
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4468
|
+
exported: true,
|
|
4469
|
+
callNames: [],
|
|
4470
|
+
extendsNames: parents,
|
|
4471
|
+
implementsNames: [],
|
|
4472
|
+
bodyText: child.text
|
|
4473
|
+
});
|
|
4474
|
+
exportLabels.push(name);
|
|
4475
|
+
continue;
|
|
4476
|
+
}
|
|
4477
|
+
if (child.type === "class_interface") {
|
|
4478
|
+
const identifierChildren = child.namedChildren.filter((item) => item?.type === "identifier");
|
|
4479
|
+
const name = identifierChildren[0]?.text.trim();
|
|
4480
|
+
if (!name) {
|
|
4481
|
+
continue;
|
|
4482
|
+
}
|
|
4483
|
+
const superclass = identifierChildren[1]?.text.trim();
|
|
4484
|
+
const parameterized = findNamedChild(child, "parameterized_arguments");
|
|
4485
|
+
const protocols = parameterized ? uniqueBy(
|
|
4486
|
+
parameterized.namedChildren.filter((item) => item?.type === "type_name" || item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
|
|
4487
|
+
(item) => item
|
|
4488
|
+
) : [];
|
|
4489
|
+
declaredClassNames.add(name);
|
|
4490
|
+
draftSymbols.push({
|
|
4491
|
+
name,
|
|
4492
|
+
kind: "class",
|
|
4493
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4494
|
+
exported: true,
|
|
4495
|
+
callNames: [],
|
|
4496
|
+
extendsNames: superclass ? [superclass] : [],
|
|
4497
|
+
implementsNames: protocols,
|
|
4498
|
+
bodyText: child.text
|
|
4499
|
+
});
|
|
4500
|
+
exportLabels.push(name);
|
|
4501
|
+
continue;
|
|
4502
|
+
}
|
|
4503
|
+
if (child.type === "class_implementation") {
|
|
4504
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4505
|
+
const name = nameNode?.text.trim();
|
|
4506
|
+
if (!name) {
|
|
4507
|
+
continue;
|
|
4508
|
+
}
|
|
4509
|
+
if (declaredClassNames.has(name)) {
|
|
4510
|
+
continue;
|
|
4511
|
+
}
|
|
4512
|
+
declaredClassNames.add(name);
|
|
4513
|
+
draftSymbols.push({
|
|
4514
|
+
name,
|
|
4515
|
+
kind: "class",
|
|
4516
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4517
|
+
exported: true,
|
|
4518
|
+
callNames: [],
|
|
4519
|
+
extendsNames: [],
|
|
4520
|
+
implementsNames: [],
|
|
4521
|
+
bodyText: child.text
|
|
4522
|
+
});
|
|
4523
|
+
exportLabels.push(name);
|
|
4524
|
+
continue;
|
|
4525
|
+
}
|
|
4526
|
+
if (child.type === "function_definition") {
|
|
4527
|
+
const name = functionNameFromDeclarator(child.childForFieldName("declarator"));
|
|
4528
|
+
if (!name) {
|
|
4529
|
+
continue;
|
|
4530
|
+
}
|
|
4531
|
+
draftSymbols.push({
|
|
4532
|
+
name,
|
|
4533
|
+
kind: "function",
|
|
4534
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4535
|
+
exported: true,
|
|
4536
|
+
callNames: [],
|
|
4537
|
+
extendsNames: [],
|
|
4538
|
+
implementsNames: [],
|
|
4539
|
+
bodyText: nodeText(child.childForFieldName("body")) || child.text
|
|
4540
|
+
});
|
|
4541
|
+
exportLabels.push(name);
|
|
4542
|
+
}
|
|
4543
|
+
}
|
|
4544
|
+
return finalizeCodeAnalysis(manifest, "objc", imports, draftSymbols, exportLabels, diagnostics);
|
|
4545
|
+
}
|
|
4546
|
+
function rescriptCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4547
|
+
const imports = [];
|
|
4548
|
+
const draftSymbols = [];
|
|
4549
|
+
const exportLabels = [];
|
|
4550
|
+
const rescriptTypeKind = (typeBinding) => {
|
|
4551
|
+
if (!typeBinding) {
|
|
4552
|
+
return "type_alias";
|
|
4553
|
+
}
|
|
4554
|
+
for (const child of typeBinding.namedChildren) {
|
|
4555
|
+
if (!child) {
|
|
4556
|
+
continue;
|
|
4557
|
+
}
|
|
4558
|
+
if (child.type === "variant_type") {
|
|
4559
|
+
return "enum";
|
|
4560
|
+
}
|
|
4561
|
+
if (child.type === "record_type") {
|
|
4562
|
+
return "struct";
|
|
4563
|
+
}
|
|
4564
|
+
}
|
|
4565
|
+
return "type_alias";
|
|
4566
|
+
};
|
|
4567
|
+
const rescriptLetBindingKind = (letBinding) => {
|
|
4568
|
+
if (!letBinding) {
|
|
4569
|
+
return "variable";
|
|
4570
|
+
}
|
|
4571
|
+
for (const child of letBinding.namedChildren) {
|
|
4572
|
+
if (child?.type === "function") {
|
|
4573
|
+
return "function";
|
|
4574
|
+
}
|
|
4575
|
+
}
|
|
4576
|
+
return "variable";
|
|
4577
|
+
};
|
|
4578
|
+
for (const child of rootNode.namedChildren) {
|
|
4579
|
+
if (!child) {
|
|
4580
|
+
continue;
|
|
4581
|
+
}
|
|
4582
|
+
if (child.type === "open_statement") {
|
|
4583
|
+
const identNode = findNamedChild(child, "module_identifier");
|
|
4584
|
+
const specifier = identNode?.text.trim();
|
|
4585
|
+
if (!specifier) {
|
|
4586
|
+
continue;
|
|
4587
|
+
}
|
|
4588
|
+
imports.push({
|
|
4589
|
+
specifier,
|
|
4590
|
+
importedSymbols: [],
|
|
4591
|
+
// ReScript modules resolve through the build system's own module graph;
|
|
4592
|
+
// they are never file-local in the Python "./relative" sense.
|
|
4593
|
+
isExternal: true,
|
|
4594
|
+
reExport: false
|
|
4595
|
+
});
|
|
4596
|
+
continue;
|
|
4597
|
+
}
|
|
4598
|
+
if (child.type === "module_declaration") {
|
|
4599
|
+
const binding = findNamedChild(child, "module_binding");
|
|
4600
|
+
const nameNode = binding ? findNamedChild(binding, "module_identifier") : null;
|
|
4601
|
+
const name = nameNode?.text.trim();
|
|
4602
|
+
if (!name) {
|
|
4603
|
+
continue;
|
|
4604
|
+
}
|
|
4605
|
+
draftSymbols.push({
|
|
4606
|
+
name,
|
|
4607
|
+
kind: "class",
|
|
4608
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4609
|
+
exported: true,
|
|
4610
|
+
callNames: [],
|
|
4611
|
+
extendsNames: [],
|
|
4612
|
+
implementsNames: [],
|
|
4613
|
+
bodyText: nodeText(findNamedChild(binding, "block")) || child.text
|
|
4614
|
+
});
|
|
4615
|
+
exportLabels.push(name);
|
|
4616
|
+
continue;
|
|
4617
|
+
}
|
|
4618
|
+
if (child.type === "type_declaration") {
|
|
4619
|
+
const binding = findNamedChild(child, "type_binding");
|
|
4620
|
+
const nameNode = binding ? findNamedChild(binding, "type_identifier") : null;
|
|
4621
|
+
const name = nameNode?.text.trim();
|
|
4622
|
+
if (!name) {
|
|
4623
|
+
continue;
|
|
4624
|
+
}
|
|
4625
|
+
const kind = rescriptTypeKind(binding);
|
|
4626
|
+
draftSymbols.push({
|
|
4627
|
+
name,
|
|
4628
|
+
kind,
|
|
4629
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4630
|
+
exported: true,
|
|
4631
|
+
callNames: [],
|
|
4632
|
+
extendsNames: [],
|
|
4633
|
+
implementsNames: [],
|
|
4634
|
+
bodyText: child.text
|
|
4635
|
+
});
|
|
4636
|
+
exportLabels.push(name);
|
|
4637
|
+
continue;
|
|
4638
|
+
}
|
|
4639
|
+
if (child.type === "let_declaration") {
|
|
4640
|
+
const binding = findNamedChild(child, "let_binding");
|
|
4641
|
+
const nameNode = binding ? findNamedChild(binding, "value_identifier") : null;
|
|
4642
|
+
const name = nameNode?.text.trim();
|
|
4643
|
+
if (!name) {
|
|
4644
|
+
continue;
|
|
4645
|
+
}
|
|
4646
|
+
const kind = rescriptLetBindingKind(binding);
|
|
4647
|
+
draftSymbols.push({
|
|
4648
|
+
name,
|
|
4649
|
+
kind,
|
|
4650
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4651
|
+
exported: true,
|
|
4652
|
+
callNames: [],
|
|
4653
|
+
extendsNames: [],
|
|
4654
|
+
implementsNames: [],
|
|
4655
|
+
bodyText: child.text
|
|
4656
|
+
});
|
|
4657
|
+
exportLabels.push(name);
|
|
4658
|
+
}
|
|
4659
|
+
}
|
|
4660
|
+
return finalizeCodeAnalysis(manifest, "rescript", imports, draftSymbols, exportLabels, diagnostics);
|
|
4661
|
+
}
|
|
4662
|
+
function parseSolidityImport(node) {
|
|
4663
|
+
const stringNode = node.namedChildren.find((item) => item?.type === "string");
|
|
4664
|
+
if (!stringNode) {
|
|
4665
|
+
return [];
|
|
4666
|
+
}
|
|
4667
|
+
const specifier = quotedPath(stringNode.text);
|
|
4668
|
+
if (!specifier) {
|
|
4669
|
+
return [];
|
|
4670
|
+
}
|
|
4671
|
+
const importedSymbols = uniqueBy(
|
|
4672
|
+
node.namedChildren.filter((item) => item?.type === "identifier").map((item) => item.text.trim()).filter(Boolean),
|
|
4673
|
+
(item) => item
|
|
4674
|
+
);
|
|
4675
|
+
const isLocal = specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/");
|
|
4676
|
+
return [
|
|
4677
|
+
{
|
|
4678
|
+
specifier,
|
|
4679
|
+
importedSymbols,
|
|
4680
|
+
isExternal: !isLocal,
|
|
4681
|
+
reExport: false
|
|
4682
|
+
}
|
|
4683
|
+
];
|
|
4684
|
+
}
|
|
4685
|
+
function solidityCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4686
|
+
const imports = [];
|
|
4687
|
+
const draftSymbols = [];
|
|
4688
|
+
const exportLabels = [];
|
|
4689
|
+
const collectParents = (declaration) => {
|
|
4690
|
+
const specifiers = declaration.namedChildren.filter((item) => item?.type === "inheritance_specifier");
|
|
4691
|
+
const names = [];
|
|
4692
|
+
for (const specifier of specifiers) {
|
|
4693
|
+
for (const node of specifier.namedChildren) {
|
|
4694
|
+
if (node && (node.type === "user_defined_type" || node.type === "identifier")) {
|
|
4695
|
+
const text = normalizeSymbolReference(node.text);
|
|
4696
|
+
if (text) {
|
|
4697
|
+
names.push(text);
|
|
4698
|
+
}
|
|
4699
|
+
}
|
|
4700
|
+
}
|
|
4701
|
+
}
|
|
4702
|
+
return uniqueBy(names, (item) => item);
|
|
4703
|
+
};
|
|
4704
|
+
for (const child of rootNode.namedChildren) {
|
|
4705
|
+
if (!child) {
|
|
4706
|
+
continue;
|
|
4707
|
+
}
|
|
4708
|
+
if (child.type === "import_directive") {
|
|
4709
|
+
for (const parsed of parseSolidityImport(child)) {
|
|
4710
|
+
imports.push(parsed);
|
|
4711
|
+
}
|
|
4712
|
+
continue;
|
|
4713
|
+
}
|
|
4714
|
+
if (child.type === "interface_declaration") {
|
|
4715
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4716
|
+
const name = nameNode?.text.trim();
|
|
4717
|
+
if (!name) {
|
|
4718
|
+
continue;
|
|
4719
|
+
}
|
|
4720
|
+
const parents = collectParents(child);
|
|
4721
|
+
draftSymbols.push({
|
|
4722
|
+
name,
|
|
4723
|
+
kind: "interface",
|
|
4724
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4725
|
+
exported: true,
|
|
4726
|
+
callNames: [],
|
|
4727
|
+
extendsNames: parents,
|
|
4728
|
+
implementsNames: [],
|
|
4729
|
+
bodyText: nodeText(findNamedChild(child, "contract_body")) || child.text
|
|
4730
|
+
});
|
|
4731
|
+
exportLabels.push(name);
|
|
4732
|
+
continue;
|
|
4733
|
+
}
|
|
4734
|
+
if (child.type === "library_declaration" || child.type === "contract_declaration") {
|
|
4735
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4736
|
+
const name = nameNode?.text.trim();
|
|
4737
|
+
if (!name) {
|
|
4738
|
+
continue;
|
|
4739
|
+
}
|
|
4740
|
+
const parents = child.type === "contract_declaration" ? collectParents(child) : [];
|
|
4741
|
+
draftSymbols.push({
|
|
4742
|
+
name,
|
|
4743
|
+
kind: "class",
|
|
4744
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4745
|
+
exported: true,
|
|
4746
|
+
callNames: [],
|
|
4747
|
+
extendsNames: [],
|
|
4748
|
+
// Solidity supports multiple inheritance; list every parent contract
|
|
4749
|
+
// as a `implements` edge rather than arbitrarily promoting one to
|
|
4750
|
+
// `extends`.
|
|
4751
|
+
implementsNames: parents,
|
|
4752
|
+
bodyText: nodeText(findNamedChild(child, "contract_body")) || child.text
|
|
4753
|
+
});
|
|
4754
|
+
exportLabels.push(name);
|
|
4755
|
+
continue;
|
|
4756
|
+
}
|
|
4757
|
+
if (child.type === "struct_declaration") {
|
|
4758
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4759
|
+
const name = nameNode?.text.trim();
|
|
4760
|
+
if (!name) {
|
|
4761
|
+
continue;
|
|
4762
|
+
}
|
|
4763
|
+
draftSymbols.push({
|
|
4764
|
+
name,
|
|
4765
|
+
kind: "struct",
|
|
4766
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4767
|
+
exported: true,
|
|
4768
|
+
callNames: [],
|
|
4769
|
+
extendsNames: [],
|
|
4770
|
+
implementsNames: [],
|
|
4771
|
+
bodyText: child.text
|
|
4772
|
+
});
|
|
4773
|
+
exportLabels.push(name);
|
|
4774
|
+
continue;
|
|
4775
|
+
}
|
|
4776
|
+
if (child.type === "enum_declaration") {
|
|
4777
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4778
|
+
const name = nameNode?.text.trim();
|
|
4779
|
+
if (!name) {
|
|
4780
|
+
continue;
|
|
4781
|
+
}
|
|
4782
|
+
draftSymbols.push({
|
|
4783
|
+
name,
|
|
4784
|
+
kind: "enum",
|
|
4785
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4786
|
+
exported: true,
|
|
4787
|
+
callNames: [],
|
|
4788
|
+
extendsNames: [],
|
|
4789
|
+
implementsNames: [],
|
|
4790
|
+
bodyText: child.text
|
|
4791
|
+
});
|
|
4792
|
+
exportLabels.push(name);
|
|
4793
|
+
continue;
|
|
4794
|
+
}
|
|
4795
|
+
if (child.type === "function_definition") {
|
|
4796
|
+
const nameNode = findNamedChild(child, "identifier");
|
|
4797
|
+
const name = nameNode?.text.trim();
|
|
4798
|
+
if (!name) {
|
|
4799
|
+
continue;
|
|
4800
|
+
}
|
|
4801
|
+
draftSymbols.push({
|
|
4802
|
+
name,
|
|
4803
|
+
kind: "function",
|
|
4804
|
+
signature: singleLineSignature(child.text.split("\n")[0] ?? child.text),
|
|
4805
|
+
exported: true,
|
|
4806
|
+
callNames: [],
|
|
4807
|
+
extendsNames: [],
|
|
4808
|
+
implementsNames: [],
|
|
4809
|
+
bodyText: nodeText(findNamedChild(child, "function_body")) || child.text
|
|
4810
|
+
});
|
|
4811
|
+
exportLabels.push(name);
|
|
4812
|
+
}
|
|
4813
|
+
}
|
|
4814
|
+
return finalizeCodeAnalysis(manifest, "solidity", imports, draftSymbols, exportLabels, diagnostics);
|
|
4815
|
+
}
|
|
4816
|
+
function htmlAttributeValue(attribute) {
|
|
4817
|
+
const quoted = attribute.namedChildren.find((c) => c?.type === "quoted_attribute_value");
|
|
4818
|
+
if (quoted) {
|
|
4819
|
+
const inner = quoted.namedChildren.find((c) => c?.type === "attribute_value");
|
|
4820
|
+
if (inner) {
|
|
4821
|
+
return inner.text.trim();
|
|
4822
|
+
}
|
|
4823
|
+
const raw = quoted.text;
|
|
4824
|
+
if (raw.length >= 2 && (raw[0] === '"' || raw[0] === "'")) {
|
|
4825
|
+
return raw.slice(1, -1).trim();
|
|
4826
|
+
}
|
|
4827
|
+
return raw.trim();
|
|
4828
|
+
}
|
|
4829
|
+
const bare = attribute.namedChildren.find((c) => c?.type === "attribute_value");
|
|
4830
|
+
return bare?.text.trim();
|
|
4831
|
+
}
|
|
4832
|
+
function htmlAttributesOf(element) {
|
|
4833
|
+
const out = /* @__PURE__ */ new Map();
|
|
4834
|
+
const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag");
|
|
4835
|
+
if (!startTag) {
|
|
4836
|
+
return out;
|
|
4837
|
+
}
|
|
4838
|
+
for (const child of startTag.namedChildren) {
|
|
4839
|
+
if (!child || child.type !== "attribute") {
|
|
4840
|
+
continue;
|
|
4841
|
+
}
|
|
4842
|
+
const nameNode = findNamedChild(child, "attribute_name");
|
|
4843
|
+
const name = nameNode?.text.trim().toLowerCase();
|
|
4844
|
+
if (!name) {
|
|
4845
|
+
continue;
|
|
4846
|
+
}
|
|
4847
|
+
const value = htmlAttributeValue(child);
|
|
4848
|
+
if (value !== void 0) {
|
|
4849
|
+
out.set(name, value);
|
|
4850
|
+
}
|
|
4851
|
+
}
|
|
4852
|
+
return out;
|
|
4853
|
+
}
|
|
4854
|
+
function htmlTagName(element) {
|
|
4855
|
+
const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag") ?? null;
|
|
4856
|
+
if (!startTag) {
|
|
4857
|
+
return void 0;
|
|
4858
|
+
}
|
|
4859
|
+
return findNamedChild(startTag, "tag_name")?.text.trim().toLowerCase();
|
|
4860
|
+
}
|
|
4861
|
+
function htmlCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4862
|
+
const imports = [];
|
|
4863
|
+
const draftSymbols = [];
|
|
4864
|
+
const exportLabels = [];
|
|
4865
|
+
const seenSymbolNames = /* @__PURE__ */ new Set();
|
|
4866
|
+
const isLocalAssetSpecifier = (specifier) => {
|
|
4867
|
+
if (!specifier) {
|
|
4868
|
+
return false;
|
|
4869
|
+
}
|
|
4870
|
+
if (specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")) {
|
|
4871
|
+
return true;
|
|
4872
|
+
}
|
|
4873
|
+
if (specifier.startsWith("http://") || specifier.startsWith("https://") || specifier.startsWith("//")) {
|
|
4874
|
+
return false;
|
|
4875
|
+
}
|
|
4876
|
+
return !specifier.includes(":");
|
|
4877
|
+
};
|
|
4878
|
+
const elements = rootNode.descendantsOfType(["element", "script_element", "style_element"]).filter((item) => item !== null);
|
|
4879
|
+
for (const element of elements) {
|
|
4880
|
+
const attrs = htmlAttributesOf(element);
|
|
4881
|
+
const tagName = htmlTagName(element);
|
|
4882
|
+
if (tagName === "link") {
|
|
4883
|
+
const rel = attrs.get("rel");
|
|
4884
|
+
const href = attrs.get("href");
|
|
4885
|
+
if (rel === "stylesheet" && href) {
|
|
4886
|
+
imports.push({
|
|
4887
|
+
specifier: href,
|
|
4888
|
+
importedSymbols: [],
|
|
4889
|
+
isExternal: !isLocalAssetSpecifier(href),
|
|
4890
|
+
reExport: false
|
|
4891
|
+
});
|
|
4892
|
+
}
|
|
4893
|
+
continue;
|
|
4894
|
+
}
|
|
4895
|
+
if (element.type === "script_element") {
|
|
4896
|
+
const src = attrs.get("src");
|
|
4897
|
+
if (src) {
|
|
4898
|
+
imports.push({
|
|
4899
|
+
specifier: src,
|
|
4900
|
+
importedSymbols: [],
|
|
4901
|
+
isExternal: !isLocalAssetSpecifier(src),
|
|
4902
|
+
reExport: false
|
|
4903
|
+
});
|
|
4904
|
+
}
|
|
4905
|
+
continue;
|
|
4906
|
+
}
|
|
4907
|
+
if (tagName && tagName.includes("-")) {
|
|
4908
|
+
if (!seenSymbolNames.has(tagName)) {
|
|
4909
|
+
seenSymbolNames.add(tagName);
|
|
4910
|
+
draftSymbols.push({
|
|
4911
|
+
name: tagName,
|
|
4912
|
+
kind: "class",
|
|
4913
|
+
signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
|
|
4914
|
+
exported: true,
|
|
4915
|
+
callNames: [],
|
|
4916
|
+
extendsNames: [],
|
|
4917
|
+
implementsNames: [],
|
|
4918
|
+
bodyText: element.text
|
|
4919
|
+
});
|
|
4920
|
+
exportLabels.push(tagName);
|
|
4921
|
+
}
|
|
4922
|
+
}
|
|
4923
|
+
const id = attrs.get("id");
|
|
4924
|
+
if (id && !seenSymbolNames.has(id)) {
|
|
4925
|
+
seenSymbolNames.add(id);
|
|
4926
|
+
draftSymbols.push({
|
|
4927
|
+
name: id,
|
|
4928
|
+
kind: "variable",
|
|
4929
|
+
signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
|
|
4930
|
+
exported: true,
|
|
4931
|
+
callNames: [],
|
|
4932
|
+
extendsNames: [],
|
|
4933
|
+
implementsNames: [],
|
|
4934
|
+
bodyText: element.text
|
|
4935
|
+
});
|
|
4936
|
+
exportLabels.push(id);
|
|
4937
|
+
}
|
|
4938
|
+
}
|
|
4939
|
+
return finalizeCodeAnalysis(manifest, "html", imports, draftSymbols, exportLabels, diagnostics);
|
|
4940
|
+
}
|
|
4941
|
+
function parseCssImport(node) {
|
|
4942
|
+
const directString = node.namedChildren.find((c) => c?.type === "string_value");
|
|
4943
|
+
if (directString) {
|
|
4944
|
+
const specifier = quotedPath(directString.text);
|
|
4945
|
+
if (!specifier) {
|
|
4946
|
+
return void 0;
|
|
4947
|
+
}
|
|
4948
|
+
return {
|
|
4949
|
+
specifier,
|
|
4950
|
+
importedSymbols: [],
|
|
4951
|
+
isExternal: !(specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")),
|
|
4952
|
+
reExport: false
|
|
4953
|
+
};
|
|
4954
|
+
}
|
|
4955
|
+
const call = node.namedChildren.find((c) => c?.type === "call_expression");
|
|
4956
|
+
if (call) {
|
|
4957
|
+
const args = findNamedChild(call, "arguments");
|
|
4958
|
+
const stringNode = args?.namedChildren.find((c) => c?.type === "string_value");
|
|
4959
|
+
if (stringNode) {
|
|
4960
|
+
const specifier = quotedPath(stringNode.text);
|
|
4961
|
+
if (!specifier) {
|
|
4962
|
+
return void 0;
|
|
4963
|
+
}
|
|
4964
|
+
return {
|
|
4965
|
+
specifier,
|
|
4966
|
+
importedSymbols: [],
|
|
4967
|
+
isExternal: !(specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/")),
|
|
4968
|
+
reExport: false
|
|
4969
|
+
};
|
|
4970
|
+
}
|
|
4971
|
+
}
|
|
4972
|
+
return void 0;
|
|
4973
|
+
}
|
|
4974
|
+
function cssCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
4975
|
+
const imports = [];
|
|
4976
|
+
const draftSymbols = [];
|
|
4977
|
+
const exportLabels = [];
|
|
4978
|
+
const seenSymbols = /* @__PURE__ */ new Set();
|
|
4979
|
+
const addSelectorSymbol = (name, ruleText) => {
|
|
4980
|
+
const trimmed = name.trim();
|
|
4981
|
+
if (!trimmed || seenSymbols.has(trimmed)) {
|
|
4982
|
+
return;
|
|
4983
|
+
}
|
|
4984
|
+
seenSymbols.add(trimmed);
|
|
4985
|
+
draftSymbols.push({
|
|
4986
|
+
name: trimmed,
|
|
4987
|
+
kind: "class",
|
|
4988
|
+
signature: singleLineSignature(ruleText.split("\n")[0] ?? ruleText),
|
|
4989
|
+
exported: true,
|
|
4990
|
+
callNames: [],
|
|
4991
|
+
extendsNames: [],
|
|
4992
|
+
implementsNames: [],
|
|
4993
|
+
bodyText: ruleText
|
|
4994
|
+
});
|
|
4995
|
+
exportLabels.push(trimmed);
|
|
4996
|
+
};
|
|
4997
|
+
for (const child of rootNode.namedChildren) {
|
|
4998
|
+
if (!child) {
|
|
4999
|
+
continue;
|
|
5000
|
+
}
|
|
5001
|
+
if (child.type === "import_statement") {
|
|
5002
|
+
const parsed = parseCssImport(child);
|
|
5003
|
+
if (parsed) {
|
|
5004
|
+
imports.push(parsed);
|
|
5005
|
+
}
|
|
5006
|
+
continue;
|
|
5007
|
+
}
|
|
5008
|
+
if (child.type === "rule_set") {
|
|
5009
|
+
const selectors = findNamedChild(child, "selectors");
|
|
5010
|
+
if (!selectors) {
|
|
5011
|
+
continue;
|
|
5012
|
+
}
|
|
5013
|
+
const selectorText = normalizeWhitespace(selectors.text);
|
|
5014
|
+
addSelectorSymbol(selectorText, child.text);
|
|
5015
|
+
continue;
|
|
5016
|
+
}
|
|
5017
|
+
if (child.type === "keyframes_statement") {
|
|
5018
|
+
const nameNode = child.namedChildren.find((c) => c?.type === "keyframes_name" || c?.type === "plain_value");
|
|
5019
|
+
const name = nameNode?.text.trim();
|
|
5020
|
+
if (name) {
|
|
5021
|
+
addSelectorSymbol(`@keyframes ${name}`, child.text);
|
|
5022
|
+
}
|
|
5023
|
+
}
|
|
5024
|
+
}
|
|
5025
|
+
return finalizeCodeAnalysis(manifest, "css", imports, draftSymbols, exportLabels, diagnostics);
|
|
5026
|
+
}
|
|
5027
|
+
function vueCodeAnalysis(manifest, rootNode, diagnostics) {
|
|
5028
|
+
const imports = [];
|
|
5029
|
+
const draftSymbols = [];
|
|
5030
|
+
const exportLabels = [];
|
|
5031
|
+
const seenSymbols = /* @__PURE__ */ new Set();
|
|
5032
|
+
const repoPath = manifest.repoRelativePath ?? path5.basename(manifest.originalPath ?? manifest.storedPath);
|
|
5033
|
+
const basename = path5.posix.basename(stripCodeExtension(toPosix(repoPath)));
|
|
5034
|
+
if (basename) {
|
|
5035
|
+
seenSymbols.add(basename);
|
|
5036
|
+
draftSymbols.push({
|
|
5037
|
+
name: basename,
|
|
5038
|
+
kind: "class",
|
|
5039
|
+
signature: `vue component ${basename}`,
|
|
5040
|
+
exported: true,
|
|
5041
|
+
callNames: [],
|
|
5042
|
+
extendsNames: [],
|
|
5043
|
+
implementsNames: [],
|
|
5044
|
+
bodyText: rootNode.text
|
|
5045
|
+
});
|
|
5046
|
+
exportLabels.push(basename);
|
|
5047
|
+
}
|
|
5048
|
+
const templateElement = rootNode.namedChildren.find((c) => c?.type === "template_element");
|
|
5049
|
+
if (templateElement) {
|
|
5050
|
+
const elements = templateElement.descendantsOfType(["element"]).filter((item) => item !== null);
|
|
5051
|
+
for (const element of elements) {
|
|
5052
|
+
const tagName = htmlTagName(element);
|
|
5053
|
+
const attrs = htmlAttributesOf(element);
|
|
5054
|
+
const startTag = findNamedChild(element, "start_tag") ?? findNamedChild(element, "self_closing_tag") ?? null;
|
|
5055
|
+
const rawTagName = startTag ? findNamedChild(startTag, "tag_name")?.text.trim() : void 0;
|
|
5056
|
+
if (rawTagName && /^[A-Z]/.test(rawTagName) && !seenSymbols.has(rawTagName)) {
|
|
5057
|
+
seenSymbols.add(rawTagName);
|
|
5058
|
+
draftSymbols.push({
|
|
5059
|
+
name: rawTagName,
|
|
5060
|
+
kind: "class",
|
|
5061
|
+
signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
|
|
5062
|
+
exported: true,
|
|
5063
|
+
callNames: [],
|
|
5064
|
+
extendsNames: [],
|
|
5065
|
+
implementsNames: [],
|
|
5066
|
+
bodyText: element.text
|
|
5067
|
+
});
|
|
5068
|
+
exportLabels.push(rawTagName);
|
|
5069
|
+
}
|
|
5070
|
+
if (tagName && !tagName.includes("-") && !(rawTagName && /^[A-Z]/.test(rawTagName))) {
|
|
5071
|
+
const id = attrs.get("id");
|
|
5072
|
+
if (id && !seenSymbols.has(id)) {
|
|
5073
|
+
seenSymbols.add(id);
|
|
5074
|
+
draftSymbols.push({
|
|
5075
|
+
name: id,
|
|
5076
|
+
kind: "variable",
|
|
5077
|
+
signature: singleLineSignature(element.text.split("\n")[0] ?? element.text),
|
|
5078
|
+
exported: true,
|
|
5079
|
+
callNames: [],
|
|
5080
|
+
extendsNames: [],
|
|
5081
|
+
implementsNames: [],
|
|
5082
|
+
bodyText: element.text
|
|
5083
|
+
});
|
|
5084
|
+
exportLabels.push(id);
|
|
5085
|
+
}
|
|
5086
|
+
}
|
|
5087
|
+
}
|
|
5088
|
+
}
|
|
5089
|
+
return finalizeCodeAnalysis(manifest, "vue", imports, draftSymbols, exportLabels, diagnostics);
|
|
5090
|
+
}
|
|
5091
|
+
function cFamilyCodeAnalysis(manifest, language, rootNode, diagnostics) {
|
|
5092
|
+
const imports = [];
|
|
5093
|
+
const draftSymbols = [];
|
|
5094
|
+
const exportLabels = [];
|
|
5095
|
+
const functionNameFromDeclarator = (node) => {
|
|
5096
|
+
if (!node) {
|
|
5097
|
+
return void 0;
|
|
5098
|
+
}
|
|
5099
|
+
const declarator = node.childForFieldName("declarator");
|
|
5100
|
+
if (declarator) {
|
|
5101
|
+
return functionNameFromDeclarator(declarator);
|
|
5102
|
+
}
|
|
5103
|
+
return extractIdentifier(node);
|
|
5104
|
+
};
|
|
5105
|
+
for (const child of rootNode.namedChildren) {
|
|
5106
|
+
if (!child) {
|
|
5107
|
+
continue;
|
|
5108
|
+
}
|
|
5109
|
+
if (child.type === "preproc_include") {
|
|
5110
|
+
const parsed = parseCppInclude(child);
|
|
5111
|
+
if (parsed) {
|
|
5112
|
+
imports.push(parsed);
|
|
5113
|
+
}
|
|
5114
|
+
continue;
|
|
5115
|
+
}
|
|
5116
|
+
if (["class_specifier", "struct_specifier", "enum_specifier"].includes(child.type)) {
|
|
5117
|
+
const name = extractIdentifier(child.childForFieldName("name"));
|
|
5118
|
+
if (!name) {
|
|
5119
|
+
continue;
|
|
5120
|
+
}
|
|
5121
|
+
const kind = child.type === "enum_specifier" ? "enum" : child.type === "struct_specifier" ? "struct" : "class";
|
|
5122
|
+
const baseClassClause = findNamedChild(child, "base_class_clause") ?? child.childForFieldName("base_class_clause");
|
|
5123
|
+
const bases = baseClassClause ? uniqueBy(
|
|
5124
|
+
baseClassClause.namedChildren.filter((item) => item !== null && item.type !== "access_specifier").map((item) => normalizeSymbolReference(item.text.replace(/\b(public|private|protected|virtual)\b/g, "").trim())).filter(Boolean),
|
|
5125
|
+
(item) => item
|
|
5126
|
+
) : [];
|
|
5127
|
+
const exported = !/\bstatic\b/.test(child.text);
|
|
5128
|
+
draftSymbols.push({
|
|
5129
|
+
name,
|
|
5130
|
+
kind,
|
|
5131
|
+
signature: singleLineSignature(child.text),
|
|
5132
|
+
exported,
|
|
5133
|
+
callNames: [],
|
|
5134
|
+
extendsNames: bases,
|
|
5135
|
+
implementsNames: [],
|
|
5136
|
+
bodyText: nodeText(child.childForFieldName("body")) || child.text
|
|
5137
|
+
});
|
|
5138
|
+
if (exported) {
|
|
5139
|
+
exportLabels.push(name);
|
|
5140
|
+
}
|
|
5141
|
+
continue;
|
|
5142
|
+
}
|
|
5143
|
+
if (child.type === "function_definition") {
|
|
5144
|
+
const name = functionNameFromDeclarator(child.childForFieldName("declarator"));
|
|
5145
|
+
if (!name) {
|
|
5146
|
+
continue;
|
|
5147
|
+
}
|
|
5148
|
+
const exported = !/\bstatic\b/.test(child.text);
|
|
5149
|
+
draftSymbols.push({
|
|
5150
|
+
name,
|
|
5151
|
+
kind: "function",
|
|
5152
|
+
signature: singleLineSignature(child.text),
|
|
5153
|
+
exported,
|
|
5154
|
+
callNames: [],
|
|
5155
|
+
extendsNames: [],
|
|
5156
|
+
implementsNames: [],
|
|
5157
|
+
bodyText: nodeText(child.childForFieldName("body")) || child.text
|
|
5158
|
+
});
|
|
5159
|
+
if (exported) {
|
|
5160
|
+
exportLabels.push(name);
|
|
5161
|
+
}
|
|
5162
|
+
}
|
|
5163
|
+
}
|
|
5164
|
+
return finalizeCodeAnalysis(manifest, language, imports, draftSymbols, exportLabels, diagnostics);
|
|
5165
|
+
}
|
|
5166
|
+
async function analyzeTreeSitterCode(manifest, content, language) {
|
|
5167
|
+
if (language === "swift" && !swiftTreeSitterEnabled()) {
|
|
5168
|
+
return {
|
|
5169
|
+
code: finalizeCodeAnalysis(manifest, language, [], [], [], [swiftTreeSitterDisabledDiagnostic()]),
|
|
5170
|
+
rationales: []
|
|
5171
|
+
};
|
|
5172
|
+
}
|
|
5173
|
+
const parseInput = language === "c" || language === "cpp" || language === "csharp" ? neutralizePreprocessorDirectives(content) : content;
|
|
5174
|
+
let tree = null;
|
|
5175
|
+
try {
|
|
5176
|
+
const module = await getTreeSitterModule();
|
|
5177
|
+
await ensureTreeSitterInit(module);
|
|
5178
|
+
const parser = new module.Parser();
|
|
5179
|
+
parser.setLanguage(await loadLanguage(language));
|
|
5180
|
+
tree = parser.parse(parseInput);
|
|
5181
|
+
} catch (error) {
|
|
5182
|
+
const diagnostic = treeSitterCompatibilityDiagnostic(language, error);
|
|
5183
|
+
if (language === "bash" && typeof diagnostic.message === "string" && diagnostic.message.includes("resolved is not a function")) {
|
|
5184
|
+
diagnostic.category = "warning";
|
|
5185
|
+
}
|
|
5186
|
+
return {
|
|
5187
|
+
code: finalizeCodeAnalysis(manifest, language, [], [], [], [diagnostic]),
|
|
5188
|
+
rationales: []
|
|
5189
|
+
};
|
|
5190
|
+
}
|
|
5191
|
+
if (!tree) {
|
|
5192
|
+
return {
|
|
5193
|
+
code: finalizeCodeAnalysis(
|
|
5194
|
+
manifest,
|
|
5195
|
+
language,
|
|
5196
|
+
[],
|
|
5197
|
+
[],
|
|
5198
|
+
[],
|
|
5199
|
+
[
|
|
5200
|
+
{
|
|
5201
|
+
code: 9e3,
|
|
5202
|
+
category: "error",
|
|
5203
|
+
message: `Failed to parse ${language} source.`,
|
|
5204
|
+
line: 1,
|
|
5205
|
+
column: 1
|
|
5206
|
+
}
|
|
5207
|
+
]
|
|
5208
|
+
),
|
|
5209
|
+
rationales: []
|
|
5210
|
+
};
|
|
5211
|
+
}
|
|
5212
|
+
try {
|
|
5213
|
+
const suppressDiagnostics = language === "lua" || language === "bash" && detectShellDialect(content) === "zsh";
|
|
5214
|
+
const rawDiagnostics = suppressDiagnostics ? [] : diagnosticsFromTree(tree.rootNode);
|
|
5215
|
+
const grammarGappedLanguages = /* @__PURE__ */ new Set(["c", "cpp", "csharp", "bash"]);
|
|
5216
|
+
const diagnostics = grammarGappedLanguages.has(language) ? rawDiagnostics.map((d) => d.category === "error" ? { ...d, category: "warning" } : d) : rawDiagnostics;
|
|
5217
|
+
const rationales = extractTreeSitterRationales(manifest, language, tree.rootNode);
|
|
5218
|
+
switch (language) {
|
|
5219
|
+
case "bash":
|
|
5220
|
+
return { code: bashCodeAnalysis(manifest, tree.rootNode, diagnostics, content), rationales };
|
|
5221
|
+
case "python":
|
|
5222
|
+
return { code: pythonCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5223
|
+
case "go":
|
|
5224
|
+
return { code: goCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5225
|
+
case "rust":
|
|
5226
|
+
return { code: rustCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5227
|
+
case "java":
|
|
5228
|
+
return { code: javaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5229
|
+
case "kotlin":
|
|
5230
|
+
return { code: kotlinCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5231
|
+
case "scala":
|
|
5232
|
+
return { code: scalaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5233
|
+
case "dart":
|
|
5234
|
+
return { code: dartCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5235
|
+
case "lua":
|
|
5236
|
+
return { code: luaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5237
|
+
case "zig":
|
|
5238
|
+
return { code: zigCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5239
|
+
case "csharp":
|
|
5240
|
+
return { code: csharpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5241
|
+
case "php":
|
|
5242
|
+
return { code: phpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5243
|
+
case "ruby":
|
|
5244
|
+
return { code: rubyCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5245
|
+
case "powershell":
|
|
5246
|
+
return { code: powershellCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5247
|
+
case "swift":
|
|
5248
|
+
return { code: swiftCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5249
|
+
case "elixir":
|
|
5250
|
+
return { code: elixirCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5251
|
+
case "ocaml":
|
|
5252
|
+
return { code: ocamlCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5253
|
+
case "objc":
|
|
5254
|
+
return { code: objcCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5255
|
+
case "rescript":
|
|
5256
|
+
return { code: rescriptCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5257
|
+
case "solidity":
|
|
5258
|
+
return { code: solidityCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5259
|
+
case "html":
|
|
5260
|
+
return { code: htmlCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5261
|
+
case "css":
|
|
5262
|
+
return { code: cssCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5263
|
+
case "vue":
|
|
5264
|
+
return { code: vueCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
|
|
5265
|
+
case "c":
|
|
5266
|
+
case "cpp":
|
|
5267
|
+
return { code: cFamilyCodeAnalysis(manifest, language, tree.rootNode, diagnostics), rationales };
|
|
5268
|
+
default:
|
|
5269
|
+
return {
|
|
3956
5270
|
code: finalizeCodeAnalysis(
|
|
3957
5271
|
manifest,
|
|
3958
5272
|
language,
|
|
@@ -4641,6 +5955,33 @@ function inferCodeLanguage(filePath, mimeType = "", options = {}) {
|
|
|
4641
5955
|
if (extension === ".ps1" || extension === ".psm1" || extension === ".psd1") {
|
|
4642
5956
|
return "powershell";
|
|
4643
5957
|
}
|
|
5958
|
+
if (extension === ".swift") {
|
|
5959
|
+
return "swift";
|
|
5960
|
+
}
|
|
5961
|
+
if (extension === ".ex" || extension === ".exs") {
|
|
5962
|
+
return "elixir";
|
|
5963
|
+
}
|
|
5964
|
+
if (extension === ".ml" || extension === ".mli") {
|
|
5965
|
+
return "ocaml";
|
|
5966
|
+
}
|
|
5967
|
+
if (extension === ".m" || extension === ".mm") {
|
|
5968
|
+
return "objc";
|
|
5969
|
+
}
|
|
5970
|
+
if (extension === ".res" || extension === ".resi") {
|
|
5971
|
+
return "rescript";
|
|
5972
|
+
}
|
|
5973
|
+
if (extension === ".sol") {
|
|
5974
|
+
return "solidity";
|
|
5975
|
+
}
|
|
5976
|
+
if (extension === ".html" || extension === ".htm") {
|
|
5977
|
+
return "html";
|
|
5978
|
+
}
|
|
5979
|
+
if (extension === ".css") {
|
|
5980
|
+
return "css";
|
|
5981
|
+
}
|
|
5982
|
+
if (extension === ".vue") {
|
|
5983
|
+
return "vue";
|
|
5984
|
+
}
|
|
4644
5985
|
if (extension === ".c") {
|
|
4645
5986
|
return "c";
|
|
4646
5987
|
}
|
|
@@ -4829,6 +6170,24 @@ function candidateExtensionsFor(language) {
|
|
|
4829
6170
|
return [".c", ".h"];
|
|
4830
6171
|
case "cpp":
|
|
4831
6172
|
return [".cc", ".cpp", ".cxx", ".h", ".hh", ".hpp", ".hxx"];
|
|
6173
|
+
case "swift":
|
|
6174
|
+
return [".swift"];
|
|
6175
|
+
case "elixir":
|
|
6176
|
+
return [".ex", ".exs"];
|
|
6177
|
+
case "ocaml":
|
|
6178
|
+
return [".ml", ".mli"];
|
|
6179
|
+
case "objc":
|
|
6180
|
+
return [".m", ".mm", ".h"];
|
|
6181
|
+
case "rescript":
|
|
6182
|
+
return [".res", ".resi"];
|
|
6183
|
+
case "solidity":
|
|
6184
|
+
return [".sol"];
|
|
6185
|
+
case "html":
|
|
6186
|
+
return [".css", ".js", ".mjs", ".cjs", ".html", ".htm"];
|
|
6187
|
+
case "css":
|
|
6188
|
+
return [".css"];
|
|
6189
|
+
default:
|
|
6190
|
+
return [];
|
|
4832
6191
|
}
|
|
4833
6192
|
}
|
|
4834
6193
|
async function buildCodeIndex(rootDir, manifests, analyses) {
|
|
@@ -4932,6 +6291,39 @@ async function buildCodeIndex(rootDir, manifests, analyses) {
|
|
|
4932
6291
|
case "powershell":
|
|
4933
6292
|
recordAlias(aliases, basename);
|
|
4934
6293
|
break;
|
|
6294
|
+
case "elixir":
|
|
6295
|
+
for (const symbol of analysis.code.symbols) {
|
|
6296
|
+
if (symbol.kind === "class" || symbol.kind === "interface") {
|
|
6297
|
+
recordAlias(aliases, symbol.name);
|
|
6298
|
+
}
|
|
6299
|
+
}
|
|
6300
|
+
break;
|
|
6301
|
+
case "ocaml": {
|
|
6302
|
+
if (basename) {
|
|
6303
|
+
const capitalized = basename.charAt(0).toUpperCase() + basename.slice(1);
|
|
6304
|
+
recordAlias(aliases, capitalized);
|
|
6305
|
+
recordAlias(aliases, basename);
|
|
6306
|
+
}
|
|
6307
|
+
for (const symbol of analysis.code.symbols) {
|
|
6308
|
+
if (symbol.kind === "class" || symbol.kind === "interface") {
|
|
6309
|
+
recordAlias(aliases, symbol.name);
|
|
6310
|
+
}
|
|
6311
|
+
}
|
|
6312
|
+
break;
|
|
6313
|
+
}
|
|
6314
|
+
case "rescript": {
|
|
6315
|
+
if (basename) {
|
|
6316
|
+
const capitalized = basename.charAt(0).toUpperCase() + basename.slice(1);
|
|
6317
|
+
recordAlias(aliases, capitalized);
|
|
6318
|
+
recordAlias(aliases, basename);
|
|
6319
|
+
}
|
|
6320
|
+
for (const symbol of analysis.code.symbols) {
|
|
6321
|
+
if (symbol.kind === "class") {
|
|
6322
|
+
recordAlias(aliases, symbol.name);
|
|
6323
|
+
}
|
|
6324
|
+
}
|
|
6325
|
+
break;
|
|
6326
|
+
}
|
|
4935
6327
|
default:
|
|
4936
6328
|
break;
|
|
4937
6329
|
}
|
|
@@ -5133,6 +6525,9 @@ function findImportCandidates(manifest, codeImport, lookup) {
|
|
|
5133
6525
|
case "kotlin":
|
|
5134
6526
|
case "scala":
|
|
5135
6527
|
case "csharp":
|
|
6528
|
+
case "elixir":
|
|
6529
|
+
case "ocaml":
|
|
6530
|
+
case "rescript":
|
|
5136
6531
|
return aliasMatches(lookup, codeImport.specifier);
|
|
5137
6532
|
case "dart":
|
|
5138
6533
|
return repoRelativePath && dartSpecifierLooksLocal2(codeImport.specifier) ? repoPathMatches(lookup, ...importResolutionCandidates(repoRelativePath, codeImport.specifier, candidateExtensionsFor(language))) : aliasMatches(lookup, codeImport.specifier);
|
|
@@ -5167,6 +6562,10 @@ function findImportCandidates(manifest, codeImport, lookup) {
|
|
|
5167
6562
|
}
|
|
5168
6563
|
case "c":
|
|
5169
6564
|
case "cpp":
|
|
6565
|
+
case "objc":
|
|
6566
|
+
case "solidity":
|
|
6567
|
+
case "html":
|
|
6568
|
+
case "css":
|
|
5170
6569
|
return repoRelativePath && !codeImport.isExternal ? repoPathMatches(lookup, ...importResolutionCandidates(repoRelativePath, codeImport.specifier, candidateExtensionsFor(language))) : aliasMatches(lookup, codeImport.specifier);
|
|
5171
6570
|
default:
|
|
5172
6571
|
return [];
|
|
@@ -5192,8 +6591,10 @@ function importLooksLocal(manifest, codeImport, candidates) {
|
|
|
5192
6591
|
case "powershell":
|
|
5193
6592
|
case "c":
|
|
5194
6593
|
case "cpp":
|
|
6594
|
+
case "objc":
|
|
5195
6595
|
case "kotlin":
|
|
5196
6596
|
case "scala":
|
|
6597
|
+
case "solidity":
|
|
5197
6598
|
return !codeImport.isExternal;
|
|
5198
6599
|
case "bash":
|
|
5199
6600
|
return bashSpecifierLooksLocal2(codeImport.specifier);
|
|
@@ -5271,6 +6672,43 @@ import { strFromU8, unzipSync } from "fflate";
|
|
|
5271
6672
|
import { JSDOM } from "jsdom";
|
|
5272
6673
|
import TurndownService from "turndown";
|
|
5273
6674
|
import { z } from "zod";
|
|
6675
|
+
|
|
6676
|
+
// src/markdown-ast.ts
|
|
6677
|
+
import { fromMarkdown } from "mdast-util-from-markdown";
|
|
6678
|
+
function parseMarkdownNodes(text) {
|
|
6679
|
+
try {
|
|
6680
|
+
const root = fromMarkdown(text);
|
|
6681
|
+
return Array.isArray(root.children) ? root.children : [];
|
|
6682
|
+
} catch {
|
|
6683
|
+
return [];
|
|
6684
|
+
}
|
|
6685
|
+
}
|
|
6686
|
+
function markdownNodeText(node) {
|
|
6687
|
+
if (node.type === "text" || node.type === "inlineCode" || node.type === "code") {
|
|
6688
|
+
return normalizeWhitespace(node.value ?? "");
|
|
6689
|
+
}
|
|
6690
|
+
if (node.type === "image") {
|
|
6691
|
+
return normalizeWhitespace(node.alt ?? "");
|
|
6692
|
+
}
|
|
6693
|
+
if (node.type === "break" || node.type === "thematicBreak") {
|
|
6694
|
+
return " ";
|
|
6695
|
+
}
|
|
6696
|
+
return normalizeWhitespace((node.children ?? []).map((child) => markdownNodeText(child)).join(" "));
|
|
6697
|
+
}
|
|
6698
|
+
function firstMarkdownHeading(text) {
|
|
6699
|
+
const nodes = parseMarkdownNodes(text);
|
|
6700
|
+
for (const node of nodes) {
|
|
6701
|
+
if (node.type === "heading") {
|
|
6702
|
+
const title = markdownNodeText(node).trim();
|
|
6703
|
+
if (title) {
|
|
6704
|
+
return title;
|
|
6705
|
+
}
|
|
6706
|
+
}
|
|
6707
|
+
}
|
|
6708
|
+
return void 0;
|
|
6709
|
+
}
|
|
6710
|
+
|
|
6711
|
+
// src/extraction.ts
|
|
5274
6712
|
var imageVisionExtractionSchema = z.object({
|
|
5275
6713
|
title: z.string().min(1).nullable().optional(),
|
|
5276
6714
|
summary: z.string().min(1),
|
|
@@ -5665,18 +7103,158 @@ async function extractDocxText(input) {
|
|
|
5665
7103
|
metadata: parseOfficeCoreMetadata(input.bytes),
|
|
5666
7104
|
warnings: warnings.length ? warnings : void 0
|
|
5667
7105
|
};
|
|
5668
|
-
if (!extractedText) {
|
|
5669
|
-
artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
|
|
7106
|
+
if (!extractedText) {
|
|
7107
|
+
artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
|
|
7108
|
+
}
|
|
7109
|
+
return {
|
|
7110
|
+
extractedText: extractedText || void 0,
|
|
7111
|
+
artifact
|
|
7112
|
+
};
|
|
7113
|
+
} catch (error) {
|
|
7114
|
+
return {
|
|
7115
|
+
artifact: {
|
|
7116
|
+
...extractionMetadata("docx", input.mimeType, "docx_text"),
|
|
7117
|
+
warnings: [`DOCX text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
7118
|
+
}
|
|
7119
|
+
};
|
|
7120
|
+
}
|
|
7121
|
+
}
|
|
7122
|
+
function jupyterCellSource(cell) {
|
|
7123
|
+
const source = cell.source;
|
|
7124
|
+
if (Array.isArray(source)) {
|
|
7125
|
+
return source.join("");
|
|
7126
|
+
}
|
|
7127
|
+
if (typeof source === "string") {
|
|
7128
|
+
return source;
|
|
7129
|
+
}
|
|
7130
|
+
return "";
|
|
7131
|
+
}
|
|
7132
|
+
function jupyterOutputSummary(outputs) {
|
|
7133
|
+
if (!Array.isArray(outputs) || outputs.length === 0) {
|
|
7134
|
+
return null;
|
|
7135
|
+
}
|
|
7136
|
+
const parts = [];
|
|
7137
|
+
for (const output of outputs) {
|
|
7138
|
+
const data = output.data;
|
|
7139
|
+
if (data && typeof data === "object") {
|
|
7140
|
+
const text = data["text/plain"] ?? data["text/markdown"];
|
|
7141
|
+
if (typeof text === "string") {
|
|
7142
|
+
parts.push(text.trim());
|
|
7143
|
+
continue;
|
|
7144
|
+
}
|
|
7145
|
+
if (Array.isArray(text)) {
|
|
7146
|
+
parts.push(text.join("").trim());
|
|
7147
|
+
continue;
|
|
7148
|
+
}
|
|
7149
|
+
}
|
|
7150
|
+
const textField = output.text;
|
|
7151
|
+
if (typeof textField === "string") {
|
|
7152
|
+
parts.push(textField.trim());
|
|
7153
|
+
continue;
|
|
7154
|
+
}
|
|
7155
|
+
if (Array.isArray(textField)) {
|
|
7156
|
+
parts.push(textField.join("").trim());
|
|
7157
|
+
}
|
|
7158
|
+
}
|
|
7159
|
+
const joined = parts.filter(Boolean).join("\n").trim();
|
|
7160
|
+
if (!joined) {
|
|
7161
|
+
return `[${outputs.length} non-text output${outputs.length === 1 ? "" : "s"}]`;
|
|
7162
|
+
}
|
|
7163
|
+
return joined.length > 1200 ? `${joined.slice(0, 1200)}
|
|
7164
|
+
[output truncated]` : joined;
|
|
7165
|
+
}
|
|
7166
|
+
async function extractJupyterNotebook(input) {
|
|
7167
|
+
try {
|
|
7168
|
+
const text = decodeTextBytes(input.bytes);
|
|
7169
|
+
const notebook = JSON.parse(text);
|
|
7170
|
+
const cells = Array.isArray(notebook.cells) ? notebook.cells : [];
|
|
7171
|
+
const kernelLanguage = notebook.metadata?.language_info?.name?.trim() || notebook.metadata?.kernelspec?.language?.trim() || "";
|
|
7172
|
+
const kernelDisplay = notebook.metadata?.kernelspec?.display_name?.trim() || "";
|
|
7173
|
+
let notebookTitle = typeof notebook.metadata?.title === "string" ? notebook.metadata.title.trim() : "";
|
|
7174
|
+
if (!notebookTitle) {
|
|
7175
|
+
for (const cell of cells) {
|
|
7176
|
+
if (cell.cell_type === "markdown") {
|
|
7177
|
+
const heading2 = firstMarkdownHeading(jupyterCellSource(cell));
|
|
7178
|
+
if (heading2) {
|
|
7179
|
+
notebookTitle = heading2;
|
|
7180
|
+
break;
|
|
7181
|
+
}
|
|
7182
|
+
}
|
|
7183
|
+
}
|
|
7184
|
+
}
|
|
7185
|
+
if (!notebookTitle && input.fileName) {
|
|
7186
|
+
notebookTitle = path7.basename(input.fileName, path7.extname(input.fileName));
|
|
7187
|
+
}
|
|
7188
|
+
const sections = [];
|
|
7189
|
+
let markdownCellCount = 0;
|
|
7190
|
+
let codeCellCount = 0;
|
|
7191
|
+
let outputCount = 0;
|
|
7192
|
+
for (const cell of cells) {
|
|
7193
|
+
const source = jupyterCellSource(cell).trim();
|
|
7194
|
+
if (!source) {
|
|
7195
|
+
continue;
|
|
7196
|
+
}
|
|
7197
|
+
if (cell.cell_type === "markdown") {
|
|
7198
|
+
markdownCellCount += 1;
|
|
7199
|
+
sections.push(source);
|
|
7200
|
+
sections.push("");
|
|
7201
|
+
continue;
|
|
7202
|
+
}
|
|
7203
|
+
if (cell.cell_type === "code") {
|
|
7204
|
+
codeCellCount += 1;
|
|
7205
|
+
const fence = kernelLanguage || "";
|
|
7206
|
+
sections.push(`\`\`\`${fence}`);
|
|
7207
|
+
sections.push(source);
|
|
7208
|
+
sections.push("```");
|
|
7209
|
+
const outputSummary = jupyterOutputSummary(cell.outputs);
|
|
7210
|
+
if (outputSummary) {
|
|
7211
|
+
outputCount += Array.isArray(cell.outputs) ? cell.outputs.length : 0;
|
|
7212
|
+
sections.push("");
|
|
7213
|
+
sections.push("_Output:_");
|
|
7214
|
+
sections.push("");
|
|
7215
|
+
sections.push(outputSummary);
|
|
7216
|
+
}
|
|
7217
|
+
sections.push("");
|
|
7218
|
+
continue;
|
|
7219
|
+
}
|
|
7220
|
+
sections.push(source);
|
|
7221
|
+
sections.push("");
|
|
7222
|
+
}
|
|
7223
|
+
const heading = notebookTitle ? [`# ${notebookTitle}`, ""] : [];
|
|
7224
|
+
const extractedText = [
|
|
7225
|
+
...heading,
|
|
7226
|
+
`Jupyter Notebook (${cells.length} cell${cells.length === 1 ? "" : "s"}, kernel: ${kernelDisplay || kernelLanguage || "unknown"})`,
|
|
7227
|
+
"",
|
|
7228
|
+
...sections
|
|
7229
|
+
].join("\n").trim();
|
|
7230
|
+
const metadata = {
|
|
7231
|
+
cell_count: String(cells.length),
|
|
7232
|
+
markdown_cells: String(markdownCellCount),
|
|
7233
|
+
code_cells: String(codeCellCount),
|
|
7234
|
+
output_count: String(outputCount)
|
|
7235
|
+
};
|
|
7236
|
+
if (kernelLanguage) {
|
|
7237
|
+
metadata.kernel_language = kernelLanguage;
|
|
7238
|
+
}
|
|
7239
|
+
if (kernelDisplay) {
|
|
7240
|
+
metadata.kernel_display_name = kernelDisplay;
|
|
7241
|
+
}
|
|
7242
|
+
if (notebook.nbformat !== void 0) {
|
|
7243
|
+
metadata.nbformat = `${notebook.nbformat}${notebook.nbformat_minor !== void 0 ? `.${notebook.nbformat_minor}` : ""}`;
|
|
5670
7244
|
}
|
|
5671
7245
|
return {
|
|
7246
|
+
title: notebookTitle || void 0,
|
|
5672
7247
|
extractedText: extractedText || void 0,
|
|
5673
|
-
artifact
|
|
7248
|
+
artifact: {
|
|
7249
|
+
...extractionMetadata("jupyter", input.mimeType, "jupyter_text"),
|
|
7250
|
+
metadata
|
|
7251
|
+
}
|
|
5674
7252
|
};
|
|
5675
7253
|
} catch (error) {
|
|
5676
7254
|
return {
|
|
5677
7255
|
artifact: {
|
|
5678
|
-
...extractionMetadata("
|
|
5679
|
-
warnings: [`
|
|
7256
|
+
...extractionMetadata("jupyter", input.mimeType, "jupyter_text"),
|
|
7257
|
+
warnings: [`Jupyter notebook extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
5680
7258
|
}
|
|
5681
7259
|
};
|
|
5682
7260
|
}
|
|
@@ -5731,7 +7309,7 @@ async function extractCsvText(input) {
|
|
|
5731
7309
|
};
|
|
5732
7310
|
}
|
|
5733
7311
|
}
|
|
5734
|
-
async function
|
|
7312
|
+
async function extractSpreadsheetWorkbook(input, sourceKind, extractor) {
|
|
5735
7313
|
try {
|
|
5736
7314
|
const XLSX = await import("xlsx");
|
|
5737
7315
|
const workbook = XLSX.read(input.bytes, { type: "buffer", cellFormula: false, cellHTML: false, cellStyles: false });
|
|
@@ -5772,7 +7350,7 @@ async function extractXlsxText(input) {
|
|
|
5772
7350
|
title,
|
|
5773
7351
|
extractedText,
|
|
5774
7352
|
artifact: {
|
|
5775
|
-
...extractionMetadata(
|
|
7353
|
+
...extractionMetadata(sourceKind, input.mimeType, extractor),
|
|
5776
7354
|
metadata,
|
|
5777
7355
|
warnings
|
|
5778
7356
|
}
|
|
@@ -5780,12 +7358,20 @@ async function extractXlsxText(input) {
|
|
|
5780
7358
|
} catch (error) {
|
|
5781
7359
|
return {
|
|
5782
7360
|
artifact: {
|
|
5783
|
-
...extractionMetadata(
|
|
5784
|
-
warnings: [
|
|
7361
|
+
...extractionMetadata(sourceKind, input.mimeType, extractor),
|
|
7362
|
+
warnings: [
|
|
7363
|
+
`${sourceKind.toUpperCase()} extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`
|
|
7364
|
+
]
|
|
5785
7365
|
}
|
|
5786
7366
|
};
|
|
5787
7367
|
}
|
|
5788
7368
|
}
|
|
7369
|
+
async function extractXlsxText(input) {
|
|
7370
|
+
return extractSpreadsheetWorkbook(input, "xlsx", "xlsx_text");
|
|
7371
|
+
}
|
|
7372
|
+
async function extractOdsText(input) {
|
|
7373
|
+
return extractSpreadsheetWorkbook(input, "ods", "ods_text");
|
|
7374
|
+
}
|
|
5789
7375
|
async function extractPptxText(input) {
|
|
5790
7376
|
try {
|
|
5791
7377
|
const archive = unzipSync(new Uint8Array(input.bytes));
|
|
@@ -6038,66 +7624,707 @@ function calendarAttendees(value) {
|
|
|
6038
7624
|
return name || address;
|
|
6039
7625
|
}).filter(Boolean);
|
|
6040
7626
|
}
|
|
6041
|
-
function slackFormatSpeakerId(input, usersById) {
|
|
6042
|
-
return usersById.get(input) ?? input;
|
|
7627
|
+
function slackFormatSpeakerId(input, usersById) {
|
|
7628
|
+
return usersById.get(input) ?? input;
|
|
7629
|
+
}
|
|
7630
|
+
function slackNormalizeText(text, usersById) {
|
|
7631
|
+
return normalizeWhitespace(
|
|
7632
|
+
text.replace(/<@([A-Z0-9]+)>/g, (_, userId) => `@${slackFormatSpeakerId(userId, usersById)}`).replace(/<#[A-Z0-9]+\|([^>]+)>/g, "#$1").replace(/<(https?:\/\/[^>|]+)\|([^>]+)>/g, "$2 ($1)").replace(/<(https?:\/\/[^>]+)>/g, "$1")
|
|
7633
|
+
);
|
|
7634
|
+
}
|
|
7635
|
+
function slackMessageTimestamp(ts2, fallbackDate) {
|
|
7636
|
+
const numeric = Number(ts2);
|
|
7637
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
7638
|
+
return new Date(numeric * 1e3).toISOString();
|
|
7639
|
+
}
|
|
7640
|
+
return (/* @__PURE__ */ new Date(`${fallbackDate}T00:00:00.000Z`)).toISOString();
|
|
7641
|
+
}
|
|
7642
|
+
async function loadZipMessageBuffers(bytes) {
|
|
7643
|
+
const { MboxStream } = await import("node-mbox");
|
|
7644
|
+
const stream = MboxStream(Readable.from([bytes]));
|
|
7645
|
+
return await new Promise((resolve, reject) => {
|
|
7646
|
+
const messages = [];
|
|
7647
|
+
stream.on("data", (message) => {
|
|
7648
|
+
messages.push(Buffer.isBuffer(message) ? message : Buffer.from(message));
|
|
7649
|
+
});
|
|
7650
|
+
stream.on("error", reject);
|
|
7651
|
+
stream.on("finish", () => resolve(messages));
|
|
7652
|
+
stream.on("end", () => resolve(messages));
|
|
7653
|
+
});
|
|
7654
|
+
}
|
|
7655
|
+
function archiveEntriesAsText(archive) {
|
|
7656
|
+
return new Map(
|
|
7657
|
+
Object.entries(archive).filter(([, value]) => value).map(([entryPath, value]) => [entryPath, strFromU8(value)])
|
|
7658
|
+
);
|
|
7659
|
+
}
|
|
7660
|
+
function looksLikeSlackEntries(entries) {
|
|
7661
|
+
const all = [...entries];
|
|
7662
|
+
const hasChannelsIndex = all.some(
|
|
7663
|
+
(entry) => entry === "channels.json" || entry === "groups.json" || entry === "dms.json" || entry === "mpims.json"
|
|
7664
|
+
);
|
|
7665
|
+
const hasChannelDayFiles = all.some((entry) => /^[^/]+\/\d{4}-\d{2}-\d{2}\.json$/i.test(entry));
|
|
7666
|
+
return hasChannelsIndex && hasChannelDayFiles;
|
|
7667
|
+
}
|
|
7668
|
+
function slackEntriesFromChannelIndex(raw, usersById) {
|
|
7669
|
+
const entries = /* @__PURE__ */ new Map();
|
|
7670
|
+
if (!Array.isArray(raw)) {
|
|
7671
|
+
return entries;
|
|
7672
|
+
}
|
|
7673
|
+
for (const item of raw) {
|
|
7674
|
+
if (!item || typeof item !== "object") {
|
|
7675
|
+
continue;
|
|
7676
|
+
}
|
|
7677
|
+
const value = item;
|
|
7678
|
+
const id = normalizeWhitespace(value.id ?? "");
|
|
7679
|
+
const title = normalizeWhitespace(value.name ?? "");
|
|
7680
|
+
if (!title) {
|
|
7681
|
+
continue;
|
|
7682
|
+
}
|
|
7683
|
+
const members = (Array.isArray(value.members) ? value.members : value.user ? [value.user] : []).map((member) => slackFormatSpeakerId(member, usersById)).filter(Boolean);
|
|
7684
|
+
entries.set(title, { id, title, members });
|
|
7685
|
+
}
|
|
7686
|
+
return entries;
|
|
7687
|
+
}
|
|
7688
|
+
function parseOdfMetadata(bytes) {
|
|
7689
|
+
try {
|
|
7690
|
+
const archive = unzipSync(new Uint8Array(bytes));
|
|
7691
|
+
const metaXml = zipEntryText(archive, "meta.xml");
|
|
7692
|
+
if (!metaXml) {
|
|
7693
|
+
return void 0;
|
|
7694
|
+
}
|
|
7695
|
+
const document = parseXmlDocument(metaXml);
|
|
7696
|
+
const valuesByLocalName = /* @__PURE__ */ new Map();
|
|
7697
|
+
for (const node of Array.from(document.getElementsByTagName("*"))) {
|
|
7698
|
+
const localName = node.localName?.trim().toLowerCase();
|
|
7699
|
+
const text = normalizeWhitespace(node.textContent ?? "");
|
|
7700
|
+
if (!localName || !text || valuesByLocalName.has(localName)) {
|
|
7701
|
+
continue;
|
|
7702
|
+
}
|
|
7703
|
+
valuesByLocalName.set(localName, text);
|
|
7704
|
+
}
|
|
7705
|
+
const metadata = {};
|
|
7706
|
+
const mappings = [
|
|
7707
|
+
["title", "title"],
|
|
7708
|
+
["author", "creator"],
|
|
7709
|
+
["subject", "subject"],
|
|
7710
|
+
["description", "description"],
|
|
7711
|
+
["keywords", "keyword"],
|
|
7712
|
+
["initial_creator", "initial-creator"],
|
|
7713
|
+
["created", "creation-date"],
|
|
7714
|
+
["modified", "date"]
|
|
7715
|
+
];
|
|
7716
|
+
for (const [targetKey, sourceKey] of mappings) {
|
|
7717
|
+
const value = valuesByLocalName.get(sourceKey);
|
|
7718
|
+
if (value) {
|
|
7719
|
+
metadata[targetKey] = value;
|
|
7720
|
+
}
|
|
7721
|
+
}
|
|
7722
|
+
return Object.keys(metadata).length ? metadata : void 0;
|
|
7723
|
+
} catch {
|
|
7724
|
+
return void 0;
|
|
7725
|
+
}
|
|
7726
|
+
}
|
|
7727
|
+
function collectOdfTextNodes(contentXml) {
|
|
7728
|
+
const document = parseXmlDocument(contentXml);
|
|
7729
|
+
const nodes = [];
|
|
7730
|
+
for (const node of Array.from(document.getElementsByTagName("*"))) {
|
|
7731
|
+
const localName = node.localName ?? "";
|
|
7732
|
+
if (localName === "h") {
|
|
7733
|
+
const level = Number.parseInt(node.getAttribute("text:outline-level") ?? "1", 10);
|
|
7734
|
+
const text = normalizeWhitespace(node.textContent ?? "");
|
|
7735
|
+
if (text) {
|
|
7736
|
+
nodes.push({ heading: Number.isFinite(level) && level > 0 ? level : 1, text });
|
|
7737
|
+
}
|
|
7738
|
+
continue;
|
|
7739
|
+
}
|
|
7740
|
+
if (localName === "p" || localName === "list-item") {
|
|
7741
|
+
if (node.closest?.("h")) {
|
|
7742
|
+
continue;
|
|
7743
|
+
}
|
|
7744
|
+
const text = normalizeWhitespace(node.textContent ?? "");
|
|
7745
|
+
if (text) {
|
|
7746
|
+
nodes.push({ text });
|
|
7747
|
+
}
|
|
7748
|
+
}
|
|
7749
|
+
}
|
|
7750
|
+
return nodes;
|
|
7751
|
+
}
|
|
7752
|
+
function renderOdfTextNodes(nodes) {
|
|
7753
|
+
const lines = [];
|
|
7754
|
+
for (const node of nodes) {
|
|
7755
|
+
if (node.heading) {
|
|
7756
|
+
lines.push("");
|
|
7757
|
+
lines.push(`${"#".repeat(Math.min(node.heading, 6))} ${node.text}`);
|
|
7758
|
+
lines.push("");
|
|
7759
|
+
continue;
|
|
7760
|
+
}
|
|
7761
|
+
lines.push(node.text);
|
|
7762
|
+
lines.push("");
|
|
7763
|
+
}
|
|
7764
|
+
return lines.join("\n").trim();
|
|
7765
|
+
}
|
|
7766
|
+
async function extractOdtText(input) {
|
|
7767
|
+
try {
|
|
7768
|
+
const archive = unzipSync(new Uint8Array(input.bytes));
|
|
7769
|
+
const contentXml = zipEntryText(archive, "content.xml");
|
|
7770
|
+
if (!contentXml) {
|
|
7771
|
+
throw new Error("Missing content.xml");
|
|
7772
|
+
}
|
|
7773
|
+
const metadata = parseOdfMetadata(input.bytes);
|
|
7774
|
+
const textNodes = collectOdfTextNodes(contentXml);
|
|
7775
|
+
const headingCount = textNodes.filter((node) => node.heading).length;
|
|
7776
|
+
const paragraphCount = textNodes.filter((node) => !node.heading).length;
|
|
7777
|
+
const title = metadata?.title || textNodes.find((node) => node.heading === 1)?.text || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
|
|
7778
|
+
const body = renderOdfTextNodes(textNodes);
|
|
7779
|
+
const extractedText = [title ? `# ${title}` : null, "", body].filter((item) => item !== null).join("\n").trim();
|
|
7780
|
+
return {
|
|
7781
|
+
title,
|
|
7782
|
+
extractedText: extractedText || void 0,
|
|
7783
|
+
artifact: {
|
|
7784
|
+
...extractionMetadata("odt", input.mimeType, "odt_text"),
|
|
7785
|
+
metadata: {
|
|
7786
|
+
...metadata ?? {},
|
|
7787
|
+
heading_count: String(headingCount),
|
|
7788
|
+
paragraph_count: String(paragraphCount)
|
|
7789
|
+
}
|
|
7790
|
+
}
|
|
7791
|
+
};
|
|
7792
|
+
} catch (error) {
|
|
7793
|
+
return {
|
|
7794
|
+
artifact: {
|
|
7795
|
+
...extractionMetadata("odt", input.mimeType, "odt_text"),
|
|
7796
|
+
warnings: [`ODT extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
7797
|
+
}
|
|
7798
|
+
};
|
|
7799
|
+
}
|
|
7800
|
+
}
|
|
7801
|
+
async function extractOdpText(input) {
|
|
7802
|
+
try {
|
|
7803
|
+
const archive = unzipSync(new Uint8Array(input.bytes));
|
|
7804
|
+
const contentXml = zipEntryText(archive, "content.xml");
|
|
7805
|
+
if (!contentXml) {
|
|
7806
|
+
throw new Error("Missing content.xml");
|
|
7807
|
+
}
|
|
7808
|
+
const metadata = parseOdfMetadata(input.bytes);
|
|
7809
|
+
const document = parseXmlDocument(contentXml);
|
|
7810
|
+
const pages = Array.from(document.getElementsByTagName("*")).filter((node) => node.localName === "page");
|
|
7811
|
+
const slideSections = [];
|
|
7812
|
+
pages.slice(0, 60).forEach((page, index) => {
|
|
7813
|
+
const slideName = page.getAttribute("draw:name") ?? `Slide ${index + 1}`;
|
|
7814
|
+
const text = normalizeWhitespace(page.textContent ?? "");
|
|
7815
|
+
slideSections.push(`## Slide ${index + 1}: ${slideName}`);
|
|
7816
|
+
if (text) {
|
|
7817
|
+
slideSections.push(text);
|
|
7818
|
+
}
|
|
7819
|
+
slideSections.push("");
|
|
7820
|
+
});
|
|
7821
|
+
const title = metadata?.title || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
|
|
7822
|
+
const extractedText = [title ? `# ${title}` : null, `Slides: ${pages.length}`, "", ...slideSections].filter((item) => Boolean(item)).join("\n").trim();
|
|
7823
|
+
const warnings = pages.length > 60 ? ["ODP extraction truncated to the first 60 slides."] : void 0;
|
|
7824
|
+
return {
|
|
7825
|
+
title,
|
|
7826
|
+
extractedText: extractedText || void 0,
|
|
7827
|
+
artifact: {
|
|
7828
|
+
...extractionMetadata("odp", input.mimeType, "odp_text"),
|
|
7829
|
+
metadata: {
|
|
7830
|
+
...metadata ?? {},
|
|
7831
|
+
slide_count: String(pages.length)
|
|
7832
|
+
},
|
|
7833
|
+
warnings
|
|
7834
|
+
}
|
|
7835
|
+
};
|
|
7836
|
+
} catch (error) {
|
|
7837
|
+
return {
|
|
7838
|
+
artifact: {
|
|
7839
|
+
...extractionMetadata("odp", input.mimeType, "odp_text"),
|
|
7840
|
+
warnings: [`ODP extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
7841
|
+
}
|
|
7842
|
+
};
|
|
7843
|
+
}
|
|
7844
|
+
}
|
|
7845
|
+
function inferStructuredFormat(mimeType, fileName) {
|
|
7846
|
+
const lower = (fileName ?? "").toLowerCase();
|
|
7847
|
+
if (lower.endsWith(".jsonc") || lower.endsWith(".json") || lower.endsWith(".json5") || mimeType === "application/json" || mimeType === "application/json5") {
|
|
7848
|
+
return "json";
|
|
7849
|
+
}
|
|
7850
|
+
if (lower.endsWith(".yaml") || lower.endsWith(".yml") || mimeType === "application/yaml" || mimeType === "application/x-yaml") {
|
|
7851
|
+
return "yaml";
|
|
7852
|
+
}
|
|
7853
|
+
if (lower.endsWith(".toml") || mimeType === "application/toml") {
|
|
7854
|
+
return "toml";
|
|
7855
|
+
}
|
|
7856
|
+
if (lower.endsWith(".xml") || mimeType === "application/xml" || mimeType === "text/xml") {
|
|
7857
|
+
return "xml";
|
|
7858
|
+
}
|
|
7859
|
+
if (lower.endsWith(".ini") || lower.endsWith(".conf") || lower.endsWith(".cfg")) {
|
|
7860
|
+
return "ini";
|
|
7861
|
+
}
|
|
7862
|
+
if (lower.endsWith(".env")) {
|
|
7863
|
+
return "env";
|
|
7864
|
+
}
|
|
7865
|
+
if (lower.endsWith(".properties")) {
|
|
7866
|
+
return "properties";
|
|
7867
|
+
}
|
|
7868
|
+
return null;
|
|
7869
|
+
}
|
|
7870
|
+
function parseEnvFile(text) {
|
|
7871
|
+
const result = {};
|
|
7872
|
+
for (const rawLine of text.split(/\r?\n/)) {
|
|
7873
|
+
const line = rawLine.trim();
|
|
7874
|
+
if (!line || line.startsWith("#")) {
|
|
7875
|
+
continue;
|
|
7876
|
+
}
|
|
7877
|
+
const eqIndex = line.indexOf("=");
|
|
7878
|
+
if (eqIndex <= 0) {
|
|
7879
|
+
continue;
|
|
7880
|
+
}
|
|
7881
|
+
const key = line.slice(0, eqIndex).trim();
|
|
7882
|
+
let value = line.slice(eqIndex + 1).trim();
|
|
7883
|
+
if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
|
|
7884
|
+
value = value.slice(1, -1);
|
|
7885
|
+
}
|
|
7886
|
+
result[key] = value;
|
|
7887
|
+
}
|
|
7888
|
+
return result;
|
|
7889
|
+
}
|
|
7890
|
+
function parsePropertiesFile(text) {
|
|
7891
|
+
const result = {};
|
|
7892
|
+
for (const rawLine of text.split(/\r?\n/)) {
|
|
7893
|
+
const line = rawLine.trim();
|
|
7894
|
+
if (!line || line.startsWith("#") || line.startsWith("!")) {
|
|
7895
|
+
continue;
|
|
7896
|
+
}
|
|
7897
|
+
let sep = line.indexOf("=");
|
|
7898
|
+
if (sep < 0) {
|
|
7899
|
+
sep = line.indexOf(":");
|
|
7900
|
+
}
|
|
7901
|
+
if (sep <= 0) {
|
|
7902
|
+
continue;
|
|
7903
|
+
}
|
|
7904
|
+
const key = line.slice(0, sep).trim();
|
|
7905
|
+
const value = line.slice(sep + 1).trim();
|
|
7906
|
+
result[key] = value;
|
|
7907
|
+
}
|
|
7908
|
+
return result;
|
|
7909
|
+
}
|
|
7910
|
+
function parseXmlToSchema(text) {
|
|
7911
|
+
const document = parseXmlDocument(text);
|
|
7912
|
+
const root = document.documentElement;
|
|
7913
|
+
if (!root) {
|
|
7914
|
+
return {};
|
|
7915
|
+
}
|
|
7916
|
+
const childCounts = /* @__PURE__ */ new Map();
|
|
7917
|
+
for (const child of Array.from(root.children)) {
|
|
7918
|
+
const name = child.tagName || child.localName || "";
|
|
7919
|
+
if (!name) {
|
|
7920
|
+
continue;
|
|
7921
|
+
}
|
|
7922
|
+
childCounts.set(name, (childCounts.get(name) ?? 0) + 1);
|
|
7923
|
+
}
|
|
7924
|
+
const result = {};
|
|
7925
|
+
for (const [name, count] of childCounts.entries()) {
|
|
7926
|
+
result[name] = { count };
|
|
7927
|
+
}
|
|
7928
|
+
return { [root.tagName || "root"]: result };
|
|
7929
|
+
}
|
|
7930
|
+
function describeJsonShape(value) {
|
|
7931
|
+
if (value === null) {
|
|
7932
|
+
return { type: "null", size: 0, depth: 0 };
|
|
7933
|
+
}
|
|
7934
|
+
if (Array.isArray(value)) {
|
|
7935
|
+
const depths = value.map((entry) => describeJsonShape(entry).depth);
|
|
7936
|
+
return { type: "array", size: value.length, depth: 1 + (depths.length ? Math.max(...depths) : 0) };
|
|
7937
|
+
}
|
|
7938
|
+
if (typeof value === "object") {
|
|
7939
|
+
const entries = Object.entries(value);
|
|
7940
|
+
const depths = entries.map(([, v]) => describeJsonShape(v).depth);
|
|
7941
|
+
return { type: "object", size: entries.length, depth: 1 + (depths.length ? Math.max(...depths) : 0) };
|
|
7942
|
+
}
|
|
7943
|
+
return { type: typeof value, size: 0, depth: 0 };
|
|
7944
|
+
}
|
|
7945
|
+
function describeTopLevelSchema(value) {
|
|
7946
|
+
if (value === null || typeof value !== "object" || Array.isArray(value)) {
|
|
7947
|
+
const shape = describeJsonShape(value);
|
|
7948
|
+
return [`(root) ${shape.type}${shape.size ? ` (${shape.size})` : ""}`];
|
|
7949
|
+
}
|
|
7950
|
+
const entries = Object.entries(value);
|
|
7951
|
+
return entries.slice(0, 20).map(([key, child]) => {
|
|
7952
|
+
const shape = describeJsonShape(child);
|
|
7953
|
+
const sizeHint = shape.type === "array" ? ` (${shape.size} items)` : shape.type === "object" ? ` (${shape.size} keys)` : "";
|
|
7954
|
+
return `${key}: ${shape.type}${sizeHint}`;
|
|
7955
|
+
});
|
|
7956
|
+
}
|
|
7957
|
+
async function parseStructuredPayload(bytes, format) {
|
|
7958
|
+
const text = decodeTextBytes(bytes);
|
|
7959
|
+
if (format === "json") {
|
|
7960
|
+
const cleaned = text.replace(/^\uFEFF/, "");
|
|
7961
|
+
return { format, value: JSON.parse(cleaned) };
|
|
7962
|
+
}
|
|
7963
|
+
if (format === "yaml") {
|
|
7964
|
+
const yamlModule = await import("yaml");
|
|
7965
|
+
return { format, value: yamlModule.parse(text) };
|
|
7966
|
+
}
|
|
7967
|
+
if (format === "toml") {
|
|
7968
|
+
const tomlModule = await import("smol-toml");
|
|
7969
|
+
return { format, value: tomlModule.parse(text) };
|
|
7970
|
+
}
|
|
7971
|
+
if (format === "xml") {
|
|
7972
|
+
return { format, value: parseXmlToSchema(text) };
|
|
7973
|
+
}
|
|
7974
|
+
if (format === "ini") {
|
|
7975
|
+
try {
|
|
7976
|
+
const tomlModule = await import("smol-toml");
|
|
7977
|
+
return { format, value: tomlModule.parse(text) };
|
|
7978
|
+
} catch {
|
|
7979
|
+
return { format, value: parsePropertiesFile(text) };
|
|
7980
|
+
}
|
|
7981
|
+
}
|
|
7982
|
+
if (format === "env") {
|
|
7983
|
+
return { format, value: parseEnvFile(text) };
|
|
7984
|
+
}
|
|
7985
|
+
return { format, value: parsePropertiesFile(text) };
|
|
7986
|
+
}
|
|
7987
|
+
async function extractStructuredData(input) {
|
|
7988
|
+
const format = inferStructuredFormat(input.mimeType, input.fileName);
|
|
7989
|
+
if (!format) {
|
|
7990
|
+
return {
|
|
7991
|
+
artifact: {
|
|
7992
|
+
...extractionMetadata("data", input.mimeType, "structured_data"),
|
|
7993
|
+
warnings: ["Structured data extraction skipped: format not recognized."]
|
|
7994
|
+
}
|
|
7995
|
+
};
|
|
7996
|
+
}
|
|
7997
|
+
try {
|
|
7998
|
+
const { value } = await parseStructuredPayload(input.bytes, format);
|
|
7999
|
+
const shape = describeJsonShape(value);
|
|
8000
|
+
const schemaLines = describeTopLevelSchema(value);
|
|
8001
|
+
const previewText = decodeTextBytes(input.bytes);
|
|
8002
|
+
const previewLines = previewText.split(/\r?\n/).slice(0, 40);
|
|
8003
|
+
const truncated = previewText.split(/\r?\n/).length > previewLines.length;
|
|
8004
|
+
const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
|
|
8005
|
+
const extractedText = [
|
|
8006
|
+
title ? `# ${title}` : null,
|
|
8007
|
+
`Format: ${format.toUpperCase()}`,
|
|
8008
|
+
`Top-level: ${shape.type}`,
|
|
8009
|
+
shape.type === "object" || shape.type === "array" ? `Size: ${shape.size}` : null,
|
|
8010
|
+
`Nested depth: ${shape.depth}`,
|
|
8011
|
+
"",
|
|
8012
|
+
"## Schema",
|
|
8013
|
+
"",
|
|
8014
|
+
...schemaLines.map((entry) => `- ${entry}`),
|
|
8015
|
+
"",
|
|
8016
|
+
"## Preview",
|
|
8017
|
+
"",
|
|
8018
|
+
`\`\`\`${format}`,
|
|
8019
|
+
...previewLines,
|
|
8020
|
+
truncated ? "\u2026" : null,
|
|
8021
|
+
"```"
|
|
8022
|
+
].filter((item) => item !== null).join("\n").trim();
|
|
8023
|
+
return {
|
|
8024
|
+
title,
|
|
8025
|
+
extractedText,
|
|
8026
|
+
artifact: {
|
|
8027
|
+
...extractionMetadata("data", input.mimeType, "structured_data"),
|
|
8028
|
+
metadata: {
|
|
8029
|
+
format,
|
|
8030
|
+
top_level_type: shape.type,
|
|
8031
|
+
top_level_size: String(shape.size),
|
|
8032
|
+
nested_depth: String(shape.depth)
|
|
8033
|
+
}
|
|
8034
|
+
}
|
|
8035
|
+
};
|
|
8036
|
+
} catch (error) {
|
|
8037
|
+
return {
|
|
8038
|
+
artifact: {
|
|
8039
|
+
...extractionMetadata("data", input.mimeType, "structured_data"),
|
|
8040
|
+
warnings: [`Structured data extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
8041
|
+
}
|
|
8042
|
+
};
|
|
8043
|
+
}
|
|
8044
|
+
}
|
|
8045
|
+
function formatBibCreator(creator) {
|
|
8046
|
+
if (creator.name) {
|
|
8047
|
+
return creator.name;
|
|
8048
|
+
}
|
|
8049
|
+
const parts = [creator.prefix, creator.firstName, creator.lastName, creator.suffix].filter(Boolean);
|
|
8050
|
+
return parts.join(" ");
|
|
6043
8051
|
}
|
|
6044
|
-
function
|
|
6045
|
-
|
|
6046
|
-
|
|
6047
|
-
|
|
8052
|
+
function bibFieldString(value) {
|
|
8053
|
+
if (value == null) {
|
|
8054
|
+
return "";
|
|
8055
|
+
}
|
|
8056
|
+
if (typeof value === "string") {
|
|
8057
|
+
return value.trim();
|
|
8058
|
+
}
|
|
8059
|
+
if (typeof value === "number" || typeof value === "boolean") {
|
|
8060
|
+
return String(value);
|
|
8061
|
+
}
|
|
8062
|
+
if (Array.isArray(value)) {
|
|
8063
|
+
return value.map((item) => bibFieldString(item)).filter(Boolean).join(", ");
|
|
8064
|
+
}
|
|
8065
|
+
if (typeof value === "object") {
|
|
8066
|
+
return bibFieldString(value.name ?? "");
|
|
8067
|
+
}
|
|
8068
|
+
return String(value);
|
|
6048
8069
|
}
|
|
6049
|
-
function
|
|
6050
|
-
|
|
6051
|
-
|
|
6052
|
-
|
|
8070
|
+
async function extractBibTeXText(input) {
|
|
8071
|
+
try {
|
|
8072
|
+
const bibtex = await import("@retorquere/bibtex-parser");
|
|
8073
|
+
const text = decodeTextBytes(input.bytes);
|
|
8074
|
+
const library = bibtex.parse(text);
|
|
8075
|
+
const entries = Array.isArray(library.entries) ? library.entries : [];
|
|
8076
|
+
const citationTypes = /* @__PURE__ */ new Map();
|
|
8077
|
+
for (const entry of entries) {
|
|
8078
|
+
const type = (entry.type ?? "misc").toLowerCase();
|
|
8079
|
+
citationTypes.set(type, (citationTypes.get(type) ?? 0) + 1);
|
|
8080
|
+
}
|
|
8081
|
+
const entrySections = [];
|
|
8082
|
+
for (const entry of entries.slice(0, 200)) {
|
|
8083
|
+
const fields = entry.fields ?? {};
|
|
8084
|
+
const title2 = bibFieldString(fields.title);
|
|
8085
|
+
const authorList = Array.isArray(fields.author) ? fields.author.map((creator) => formatBibCreator(creator)).filter(Boolean) : bibFieldString(fields.author).split(/\s+and\s+/i).filter(Boolean);
|
|
8086
|
+
const editorList = Array.isArray(fields.editor) ? fields.editor.map((creator) => formatBibCreator(creator)).filter(Boolean) : [];
|
|
8087
|
+
const year = bibFieldString(fields.year ?? fields.date ?? "");
|
|
8088
|
+
const journal = bibFieldString(fields.journal ?? fields.booktitle ?? fields.publisher ?? "");
|
|
8089
|
+
const doi = bibFieldString(fields.doi);
|
|
8090
|
+
const url = bibFieldString(fields.url);
|
|
8091
|
+
const credit = authorList.length ? authorList.join(", ") : editorList.length ? `${editorList.join(", ")} (eds.)` : "Unknown";
|
|
8092
|
+
const descriptorParts = [credit];
|
|
8093
|
+
if (year) {
|
|
8094
|
+
descriptorParts.push(year);
|
|
8095
|
+
}
|
|
8096
|
+
const descriptor = descriptorParts.join(", ");
|
|
8097
|
+
const trailing = [];
|
|
8098
|
+
if (journal) {
|
|
8099
|
+
trailing.push(journal);
|
|
8100
|
+
}
|
|
8101
|
+
if (doi) {
|
|
8102
|
+
trailing.push(`doi:${doi}`);
|
|
8103
|
+
}
|
|
8104
|
+
if (url) {
|
|
8105
|
+
trailing.push(url);
|
|
8106
|
+
}
|
|
8107
|
+
const trailingText = trailing.length ? ` \u2014 ${trailing.join(", ")}` : "";
|
|
8108
|
+
entrySections.push(`- [${entry.key}] ${title2 || "(untitled)"} (${descriptor})${trailingText}`);
|
|
8109
|
+
}
|
|
8110
|
+
const totalEntries = entries.length;
|
|
8111
|
+
const truncated = entries.length > 200;
|
|
8112
|
+
const typeSummary = [...citationTypes.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])).map(([type, count]) => `${type} (${count})`).join(", ");
|
|
8113
|
+
const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "BibTeX library";
|
|
8114
|
+
const extractedText = [
|
|
8115
|
+
`# ${title}`,
|
|
8116
|
+
"",
|
|
8117
|
+
`BibTeX library with ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}.`,
|
|
8118
|
+
typeSummary ? `Citation types: ${typeSummary}.` : null,
|
|
8119
|
+
"",
|
|
8120
|
+
"## Entries",
|
|
8121
|
+
"",
|
|
8122
|
+
...entrySections,
|
|
8123
|
+
truncated ? `
|
|
8124
|
+
_Preview truncated to the first 200 entries._` : null
|
|
8125
|
+
].filter((item) => item !== null).join("\n").trim();
|
|
8126
|
+
const warnings = library.errors?.length ? [`BibTeX parser reported ${library.errors.length} parse error(s).`] : void 0;
|
|
8127
|
+
return {
|
|
8128
|
+
title,
|
|
8129
|
+
extractedText,
|
|
8130
|
+
artifact: {
|
|
8131
|
+
...extractionMetadata("bibtex", input.mimeType, "bibtex_text"),
|
|
8132
|
+
metadata: {
|
|
8133
|
+
entry_count: String(totalEntries),
|
|
8134
|
+
citation_types: [...citationTypes.keys()].sort().join(",")
|
|
8135
|
+
},
|
|
8136
|
+
warnings
|
|
8137
|
+
}
|
|
8138
|
+
};
|
|
8139
|
+
} catch (error) {
|
|
8140
|
+
return {
|
|
8141
|
+
artifact: {
|
|
8142
|
+
...extractionMetadata("bibtex", input.mimeType, "bibtex_text"),
|
|
8143
|
+
warnings: [`BibTeX extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
8144
|
+
}
|
|
8145
|
+
};
|
|
6053
8146
|
}
|
|
6054
|
-
return (/* @__PURE__ */ new Date(`${fallbackDate}T00:00:00.000Z`)).toISOString();
|
|
6055
8147
|
}
|
|
6056
|
-
async function
|
|
6057
|
-
|
|
6058
|
-
|
|
6059
|
-
|
|
6060
|
-
|
|
6061
|
-
|
|
6062
|
-
|
|
8148
|
+
async function extractRtfText(input) {
|
|
8149
|
+
try {
|
|
8150
|
+
const rtfParser = await import("rtf-parser");
|
|
8151
|
+
const parseString = rtfParser.string ?? rtfParser.default?.string;
|
|
8152
|
+
if (typeof parseString !== "function") {
|
|
8153
|
+
throw new Error("rtf-parser did not expose a string parser.");
|
|
8154
|
+
}
|
|
8155
|
+
const rtfText = decodeTextBytes(input.bytes);
|
|
8156
|
+
const document = await new Promise((resolve, reject) => {
|
|
8157
|
+
parseString(rtfText, (err, doc) => {
|
|
8158
|
+
if (err || !doc) {
|
|
8159
|
+
reject(err ?? new Error("RTF parse returned no document"));
|
|
8160
|
+
return;
|
|
8161
|
+
}
|
|
8162
|
+
resolve(doc);
|
|
8163
|
+
});
|
|
6063
8164
|
});
|
|
6064
|
-
|
|
6065
|
-
|
|
6066
|
-
|
|
6067
|
-
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6072
|
-
|
|
8165
|
+
const paragraphs = [];
|
|
8166
|
+
for (const paragraph of document.content ?? []) {
|
|
8167
|
+
const spans = paragraph.content ?? [];
|
|
8168
|
+
const text = normalizeWhitespace(spans.map((span) => span.value ?? "").join(""));
|
|
8169
|
+
if (text) {
|
|
8170
|
+
paragraphs.push(text);
|
|
8171
|
+
}
|
|
8172
|
+
}
|
|
8173
|
+
const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
|
|
8174
|
+
const extractedText = [title ? `# ${title}` : null, "", ...paragraphs].filter((item) => item !== null).join("\n\n").trim();
|
|
8175
|
+
return {
|
|
8176
|
+
title,
|
|
8177
|
+
extractedText: extractedText || void 0,
|
|
8178
|
+
artifact: {
|
|
8179
|
+
...extractionMetadata("rtf", input.mimeType, "rtf_text"),
|
|
8180
|
+
metadata: {
|
|
8181
|
+
paragraph_count: String(paragraphs.length)
|
|
8182
|
+
}
|
|
8183
|
+
}
|
|
8184
|
+
};
|
|
8185
|
+
} catch (error) {
|
|
8186
|
+
return {
|
|
8187
|
+
artifact: {
|
|
8188
|
+
...extractionMetadata("rtf", input.mimeType, "rtf_text"),
|
|
8189
|
+
warnings: [`RTF extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
8190
|
+
}
|
|
8191
|
+
};
|
|
8192
|
+
}
|
|
6073
8193
|
}
|
|
6074
|
-
function
|
|
6075
|
-
|
|
6076
|
-
|
|
6077
|
-
|
|
6078
|
-
)
|
|
6079
|
-
|
|
6080
|
-
|
|
8194
|
+
function collectOrgNodeText(node) {
|
|
8195
|
+
if (typeof node.value === "string") {
|
|
8196
|
+
return node.value;
|
|
8197
|
+
}
|
|
8198
|
+
if (!Array.isArray(node.children)) {
|
|
8199
|
+
return "";
|
|
8200
|
+
}
|
|
8201
|
+
return node.children.map((child) => collectOrgNodeText(child)).join("");
|
|
6081
8202
|
}
|
|
6082
|
-
function
|
|
6083
|
-
|
|
6084
|
-
|
|
6085
|
-
|
|
8203
|
+
function renderOrgNode(node, lines) {
|
|
8204
|
+
if (node.type === "headline") {
|
|
8205
|
+
const depth = Math.min(Math.max(node.level ?? 1, 1), 6);
|
|
8206
|
+
const keyword = node.keyword ? `${node.keyword} ` : "";
|
|
8207
|
+
const tags = node.tags?.length ? ` \`${node.tags.join(":")}\`` : "";
|
|
8208
|
+
const text = normalizeWhitespace(collectOrgNodeText(node));
|
|
8209
|
+
lines.push("");
|
|
8210
|
+
lines.push(`${"#".repeat(depth)} ${keyword}${text}${tags}`.trim());
|
|
8211
|
+
lines.push("");
|
|
8212
|
+
return;
|
|
6086
8213
|
}
|
|
6087
|
-
|
|
6088
|
-
|
|
6089
|
-
|
|
8214
|
+
if (node.type === "paragraph") {
|
|
8215
|
+
const text = normalizeWhitespace(collectOrgNodeText(node));
|
|
8216
|
+
if (text) {
|
|
8217
|
+
lines.push(text);
|
|
8218
|
+
lines.push("");
|
|
6090
8219
|
}
|
|
6091
|
-
|
|
6092
|
-
|
|
6093
|
-
|
|
6094
|
-
|
|
6095
|
-
|
|
8220
|
+
return;
|
|
8221
|
+
}
|
|
8222
|
+
if (node.type === "list") {
|
|
8223
|
+
for (const child of node.children ?? []) {
|
|
8224
|
+
if (child.type === "list.item") {
|
|
8225
|
+
const text = normalizeWhitespace(collectOrgNodeText(child));
|
|
8226
|
+
if (text) {
|
|
8227
|
+
lines.push(`- ${text}`);
|
|
8228
|
+
}
|
|
8229
|
+
}
|
|
6096
8230
|
}
|
|
6097
|
-
|
|
6098
|
-
|
|
8231
|
+
lines.push("");
|
|
8232
|
+
return;
|
|
8233
|
+
}
|
|
8234
|
+
if (node.type === "block") {
|
|
8235
|
+
const name = node.name ?? "";
|
|
8236
|
+
const body = typeof node.value === "string" ? node.value.trimEnd() : "";
|
|
8237
|
+
if (body) {
|
|
8238
|
+
lines.push(`\`\`\`${name === "src" ? "" : name.toLowerCase()}`);
|
|
8239
|
+
lines.push(body);
|
|
8240
|
+
lines.push("```");
|
|
8241
|
+
lines.push("");
|
|
8242
|
+
}
|
|
8243
|
+
return;
|
|
8244
|
+
}
|
|
8245
|
+
for (const child of node.children ?? []) {
|
|
8246
|
+
renderOrgNode(child, lines);
|
|
8247
|
+
}
|
|
8248
|
+
}
|
|
8249
|
+
async function extractOrgText(input) {
|
|
8250
|
+
try {
|
|
8251
|
+
const orga = await import("orga");
|
|
8252
|
+
const text = decodeTextBytes(input.bytes);
|
|
8253
|
+
const document = orga.parse(text);
|
|
8254
|
+
const properties = document.properties ?? {};
|
|
8255
|
+
const documentTitle = Array.isArray(properties.title) ? properties.title.join(" ") : typeof properties.title === "string" ? properties.title : "";
|
|
8256
|
+
let headlineCount = 0;
|
|
8257
|
+
let todoCount = 0;
|
|
8258
|
+
const walk = (node) => {
|
|
8259
|
+
if (node.type === "headline") {
|
|
8260
|
+
headlineCount += 1;
|
|
8261
|
+
if (node.keyword) {
|
|
8262
|
+
todoCount += 1;
|
|
8263
|
+
}
|
|
8264
|
+
}
|
|
8265
|
+
for (const child of node.children ?? []) {
|
|
8266
|
+
walk(child);
|
|
8267
|
+
}
|
|
8268
|
+
};
|
|
8269
|
+
walk(document);
|
|
8270
|
+
const bodyLines = [];
|
|
8271
|
+
for (const child of document.children ?? []) {
|
|
8272
|
+
renderOrgNode(child, bodyLines);
|
|
8273
|
+
}
|
|
8274
|
+
const title = documentTitle.trim() || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
|
|
8275
|
+
const extractedText = [title ? `# ${title}` : null, "", ...bodyLines].filter((item) => item !== null).join("\n").trim();
|
|
8276
|
+
return {
|
|
8277
|
+
title,
|
|
8278
|
+
extractedText: extractedText || void 0,
|
|
8279
|
+
artifact: {
|
|
8280
|
+
...extractionMetadata("org", input.mimeType, "org_text"),
|
|
8281
|
+
metadata: {
|
|
8282
|
+
headline_count: String(headlineCount),
|
|
8283
|
+
todo_count: String(todoCount)
|
|
8284
|
+
}
|
|
8285
|
+
}
|
|
8286
|
+
};
|
|
8287
|
+
} catch (error) {
|
|
8288
|
+
return {
|
|
8289
|
+
artifact: {
|
|
8290
|
+
...extractionMetadata("org", input.mimeType, "org_text"),
|
|
8291
|
+
warnings: [`Org extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
8292
|
+
}
|
|
8293
|
+
};
|
|
8294
|
+
}
|
|
8295
|
+
}
|
|
8296
|
+
async function extractAsciiDocText(input) {
|
|
8297
|
+
try {
|
|
8298
|
+
const asciidoctorModule = await import("@asciidoctor/core");
|
|
8299
|
+
const factory = asciidoctorModule.default ?? asciidoctorModule;
|
|
8300
|
+
const processor = factory();
|
|
8301
|
+
const source = decodeTextBytes(input.bytes);
|
|
8302
|
+
const loaded = processor.load(source, { safe: "safe" });
|
|
8303
|
+
const html = processor.convert(source, { safe: "safe", standalone: false });
|
|
8304
|
+
const markdown = htmlToMarkdown(html);
|
|
8305
|
+
const docTitle = (typeof loaded.getTitle === "function" ? loaded.getTitle() : void 0) ?? void 0;
|
|
8306
|
+
const fileTitle = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
|
|
8307
|
+
const title = docTitle?.trim() || fileTitle;
|
|
8308
|
+
const extractedText = [title ? `# ${title}` : null, "", markdown].filter((item) => item !== null).join("\n").trim();
|
|
8309
|
+
return {
|
|
8310
|
+
title,
|
|
8311
|
+
extractedText: extractedText || void 0,
|
|
8312
|
+
artifact: {
|
|
8313
|
+
...extractionMetadata("asciidoc", input.mimeType, "asciidoc_text"),
|
|
8314
|
+
metadata: {
|
|
8315
|
+
html_size: String(html.length),
|
|
8316
|
+
markdown_size: String(markdown.length)
|
|
8317
|
+
}
|
|
8318
|
+
}
|
|
8319
|
+
};
|
|
8320
|
+
} catch (error) {
|
|
8321
|
+
return {
|
|
8322
|
+
artifact: {
|
|
8323
|
+
...extractionMetadata("asciidoc", input.mimeType, "asciidoc_text"),
|
|
8324
|
+
warnings: [`AsciiDoc extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
8325
|
+
}
|
|
8326
|
+
};
|
|
6099
8327
|
}
|
|
6100
|
-
return entries;
|
|
6101
8328
|
}
|
|
6102
8329
|
async function extractTranscriptText(input) {
|
|
6103
8330
|
try {
|
|
@@ -6537,41 +8764,6 @@ async function appendWatchRun(rootDir, run) {
|
|
|
6537
8764
|
await appendJsonLine(paths.jobsLogPath, run);
|
|
6538
8765
|
}
|
|
6539
8766
|
|
|
6540
|
-
// src/markdown-ast.ts
|
|
6541
|
-
import { fromMarkdown } from "mdast-util-from-markdown";
|
|
6542
|
-
function parseMarkdownNodes(text) {
|
|
6543
|
-
try {
|
|
6544
|
-
const root = fromMarkdown(text);
|
|
6545
|
-
return Array.isArray(root.children) ? root.children : [];
|
|
6546
|
-
} catch {
|
|
6547
|
-
return [];
|
|
6548
|
-
}
|
|
6549
|
-
}
|
|
6550
|
-
function markdownNodeText(node) {
|
|
6551
|
-
if (node.type === "text" || node.type === "inlineCode" || node.type === "code") {
|
|
6552
|
-
return normalizeWhitespace(node.value ?? "");
|
|
6553
|
-
}
|
|
6554
|
-
if (node.type === "image") {
|
|
6555
|
-
return normalizeWhitespace(node.alt ?? "");
|
|
6556
|
-
}
|
|
6557
|
-
if (node.type === "break" || node.type === "thematicBreak") {
|
|
6558
|
-
return " ";
|
|
6559
|
-
}
|
|
6560
|
-
return normalizeWhitespace((node.children ?? []).map((child) => markdownNodeText(child)).join(" "));
|
|
6561
|
-
}
|
|
6562
|
-
function firstMarkdownHeading(text) {
|
|
6563
|
-
const nodes = parseMarkdownNodes(text);
|
|
6564
|
-
for (const node of nodes) {
|
|
6565
|
-
if (node.type === "heading") {
|
|
6566
|
-
const title = markdownNodeText(node).trim();
|
|
6567
|
-
if (title) {
|
|
6568
|
-
return title;
|
|
6569
|
-
}
|
|
6570
|
-
}
|
|
6571
|
-
}
|
|
6572
|
-
return void 0;
|
|
6573
|
-
}
|
|
6574
|
-
|
|
6575
8767
|
// src/source-classification.ts
|
|
6576
8768
|
import path9 from "path";
|
|
6577
8769
|
var ALL_SOURCE_CLASSES = ["first_party", "third_party", "resource", "generated"];
|
|
@@ -6902,7 +9094,7 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
|
|
|
6902
9094
|
if (isTranscriptFilePath(filePath) || mimeType === "application/x-subrip" || mimeType === "text/vtt") {
|
|
6903
9095
|
return "transcript";
|
|
6904
9096
|
}
|
|
6905
|
-
if (mimeType.includes("markdown")) {
|
|
9097
|
+
if (mimeType.includes("markdown") || filePath.toLowerCase().endsWith(".mdx")) {
|
|
6906
9098
|
return "markdown";
|
|
6907
9099
|
}
|
|
6908
9100
|
if (mimeType.includes("html")) {
|
|
@@ -6911,7 +9103,7 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
|
|
|
6911
9103
|
if (mimeType === "application/pdf" || filePath.toLowerCase().endsWith(".pdf")) {
|
|
6912
9104
|
return "pdf";
|
|
6913
9105
|
}
|
|
6914
|
-
if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || filePath.toLowerCase().endsWith(".docx")) {
|
|
9106
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || mimeType === "application/vnd.ms-word.document.macroenabled.12" || mimeType === "application/vnd.ms-word.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.template" || filePath.toLowerCase().endsWith(".docx") || filePath.toLowerCase().endsWith(".docm") || filePath.toLowerCase().endsWith(".dotx") || filePath.toLowerCase().endsWith(".dotm")) {
|
|
6915
9107
|
return "docx";
|
|
6916
9108
|
}
|
|
6917
9109
|
if (isEmailFilePath(filePath) || mimeType === "message/rfc822" || mimeType === "application/mbox") {
|
|
@@ -6926,20 +9118,66 @@ function inferKind(mimeType, filePath, detectionOptions = {}) {
|
|
|
6926
9118
|
if (mimeType === "text/csv" || mimeType === "text/tab-separated-values" || filePath.toLowerCase().endsWith(".csv") || filePath.toLowerCase().endsWith(".tsv")) {
|
|
6927
9119
|
return "csv";
|
|
6928
9120
|
}
|
|
9121
|
+
if (mimeType === "application/x-ipynb+json" || filePath.toLowerCase().endsWith(".ipynb")) {
|
|
9122
|
+
return "jupyter";
|
|
9123
|
+
}
|
|
9124
|
+
if (mimeType === "application/vnd.oasis.opendocument.text" || filePath.toLowerCase().endsWith(".odt")) {
|
|
9125
|
+
return "odt";
|
|
9126
|
+
}
|
|
9127
|
+
if (mimeType === "application/vnd.oasis.opendocument.presentation" || filePath.toLowerCase().endsWith(".odp")) {
|
|
9128
|
+
return "odp";
|
|
9129
|
+
}
|
|
9130
|
+
if (mimeType === "application/vnd.oasis.opendocument.spreadsheet" || filePath.toLowerCase().endsWith(".ods")) {
|
|
9131
|
+
return "ods";
|
|
9132
|
+
}
|
|
9133
|
+
if (filePath.toLowerCase().endsWith(".bib") || mimeType === "application/x-bibtex") {
|
|
9134
|
+
return "bibtex";
|
|
9135
|
+
}
|
|
9136
|
+
if (mimeType === "application/rtf" || mimeType === "text/rtf" || filePath.toLowerCase().endsWith(".rtf")) {
|
|
9137
|
+
return "rtf";
|
|
9138
|
+
}
|
|
9139
|
+
if (filePath.toLowerCase().endsWith(".org") || mimeType === "text/x-org") {
|
|
9140
|
+
return "org";
|
|
9141
|
+
}
|
|
9142
|
+
if (filePath.toLowerCase().endsWith(".adoc") || filePath.toLowerCase().endsWith(".asciidoc") || mimeType === "text/x-asciidoc") {
|
|
9143
|
+
return "asciidoc";
|
|
9144
|
+
}
|
|
9145
|
+
if (isStructuredDataPath(filePath, mimeType)) {
|
|
9146
|
+
return "data";
|
|
9147
|
+
}
|
|
6929
9148
|
if (mimeType.startsWith("text/") || isStructuredTextMime(mimeType)) {
|
|
6930
9149
|
return "text";
|
|
6931
9150
|
}
|
|
6932
|
-
if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || filePath.toLowerCase().endsWith(".xlsx")) {
|
|
9151
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || mimeType === "application/vnd.ms-excel" || mimeType === "application/vnd.ms-excel.sheet.macroenabled.12" || mimeType === "application/vnd.ms-excel.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.template" || filePath.toLowerCase().endsWith(".xlsx") || filePath.toLowerCase().endsWith(".xlsm") || filePath.toLowerCase().endsWith(".xltx") || filePath.toLowerCase().endsWith(".xltm") || filePath.toLowerCase().endsWith(".xls")) {
|
|
6933
9152
|
return "xlsx";
|
|
6934
9153
|
}
|
|
6935
|
-
if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || filePath.toLowerCase().endsWith(".pptx")) {
|
|
9154
|
+
if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || mimeType === "application/vnd.ms-powerpoint.presentation.macroenabled.12" || mimeType === "application/vnd.ms-powerpoint.template.macroenabled.12" || mimeType === "application/vnd.openxmlformats-officedocument.presentationml.template" || filePath.toLowerCase().endsWith(".pptx") || filePath.toLowerCase().endsWith(".pptm") || filePath.toLowerCase().endsWith(".potx") || filePath.toLowerCase().endsWith(".potm")) {
|
|
6936
9155
|
return "pptx";
|
|
6937
9156
|
}
|
|
6938
|
-
if (mimeType.startsWith("image/")) {
|
|
9157
|
+
if (mimeType.startsWith("image/") || isImagePath(filePath)) {
|
|
6939
9158
|
return "image";
|
|
6940
9159
|
}
|
|
6941
9160
|
return "binary";
|
|
6942
9161
|
}
|
|
9162
|
+
var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
9163
|
+
".png",
|
|
9164
|
+
".jpg",
|
|
9165
|
+
".jpeg",
|
|
9166
|
+
".webp",
|
|
9167
|
+
".gif",
|
|
9168
|
+
".bmp",
|
|
9169
|
+
".ico",
|
|
9170
|
+
".tiff",
|
|
9171
|
+
".tif",
|
|
9172
|
+
".heic",
|
|
9173
|
+
".heif",
|
|
9174
|
+
".avif",
|
|
9175
|
+
".jxl",
|
|
9176
|
+
".svg"
|
|
9177
|
+
]);
|
|
9178
|
+
function isImagePath(filePath) {
|
|
9179
|
+
return IMAGE_EXTENSIONS.has(path12.extname(filePath).toLowerCase());
|
|
9180
|
+
}
|
|
6943
9181
|
function isStructuredTextMime(mimeType) {
|
|
6944
9182
|
switch (mimeType) {
|
|
6945
9183
|
case "application/json":
|
|
@@ -6960,6 +9198,23 @@ function isStructuredTextMime(mimeType) {
|
|
|
6960
9198
|
return false;
|
|
6961
9199
|
}
|
|
6962
9200
|
}
|
|
9201
|
+
function isStructuredDataPath(filePath, mimeType) {
|
|
9202
|
+
const lower = filePath.toLowerCase();
|
|
9203
|
+
if (lower.endsWith(".yaml") || lower.endsWith(".yml") || lower.endsWith(".toml") || mimeType === "application/toml" || mimeType === "application/yaml" || mimeType === "application/x-yaml") {
|
|
9204
|
+
return true;
|
|
9205
|
+
}
|
|
9206
|
+
if (lower.endsWith(".xml") || lower.endsWith(".ini") || lower.endsWith(".env") || lower.endsWith(".properties") || lower.endsWith(".conf") || lower.endsWith(".cfg") || mimeType === "application/xml" || mimeType === "text/xml") {
|
|
9207
|
+
return true;
|
|
9208
|
+
}
|
|
9209
|
+
if (lower.endsWith(".json") || lower.endsWith(".jsonc") || lower.endsWith(".json5") || mimeType === "application/json" || mimeType === "application/json5") {
|
|
9210
|
+
const base = path12.basename(lower);
|
|
9211
|
+
if (base === "package.json" || base === "package-lock.json" || base === "tsconfig.json" || base === "pnpm-lock.yaml") {
|
|
9212
|
+
return false;
|
|
9213
|
+
}
|
|
9214
|
+
return true;
|
|
9215
|
+
}
|
|
9216
|
+
return false;
|
|
9217
|
+
}
|
|
6963
9218
|
async function localCodeDetectionOptions(absolutePath, payloadBytes) {
|
|
6964
9219
|
if (path12.extname(absolutePath)) {
|
|
6965
9220
|
return {};
|
|
@@ -8615,6 +10870,60 @@ async function prepareFileInputs(rootDir, absoluteInput, repoRoot, sourceClass)
|
|
|
8615
10870
|
title = extracted.title?.trim() || title;
|
|
8616
10871
|
extractedText = extracted.extractedText;
|
|
8617
10872
|
extractionArtifact = extracted.artifact;
|
|
10873
|
+
} else if (sourceKind === "jupyter") {
|
|
10874
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10875
|
+
const extracted = await extractJupyterNotebook({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10876
|
+
title = extracted.title?.trim() || title;
|
|
10877
|
+
extractedText = extracted.extractedText;
|
|
10878
|
+
extractionArtifact = extracted.artifact;
|
|
10879
|
+
} else if (sourceKind === "odt") {
|
|
10880
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10881
|
+
const extracted = await extractOdtText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10882
|
+
title = extracted.title?.trim() || title;
|
|
10883
|
+
extractedText = extracted.extractedText;
|
|
10884
|
+
extractionArtifact = extracted.artifact;
|
|
10885
|
+
} else if (sourceKind === "odp") {
|
|
10886
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10887
|
+
const extracted = await extractOdpText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10888
|
+
title = extracted.title?.trim() || title;
|
|
10889
|
+
extractedText = extracted.extractedText;
|
|
10890
|
+
extractionArtifact = extracted.artifact;
|
|
10891
|
+
} else if (sourceKind === "ods") {
|
|
10892
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10893
|
+
const extracted = await extractOdsText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10894
|
+
title = extracted.title?.trim() || title;
|
|
10895
|
+
extractedText = extracted.extractedText;
|
|
10896
|
+
extractionArtifact = extracted.artifact;
|
|
10897
|
+
} else if (sourceKind === "data") {
|
|
10898
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10899
|
+
const extracted = await extractStructuredData({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10900
|
+
title = extracted.title?.trim() || title;
|
|
10901
|
+
extractedText = extracted.extractedText;
|
|
10902
|
+
extractionArtifact = extracted.artifact;
|
|
10903
|
+
} else if (sourceKind === "bibtex") {
|
|
10904
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10905
|
+
const extracted = await extractBibTeXText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10906
|
+
title = extracted.title?.trim() || title;
|
|
10907
|
+
extractedText = extracted.extractedText;
|
|
10908
|
+
extractionArtifact = extracted.artifact;
|
|
10909
|
+
} else if (sourceKind === "rtf") {
|
|
10910
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10911
|
+
const extracted = await extractRtfText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10912
|
+
title = extracted.title?.trim() || title;
|
|
10913
|
+
extractedText = extracted.extractedText;
|
|
10914
|
+
extractionArtifact = extracted.artifact;
|
|
10915
|
+
} else if (sourceKind === "org") {
|
|
10916
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10917
|
+
const extracted = await extractOrgText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10918
|
+
title = extracted.title?.trim() || title;
|
|
10919
|
+
extractedText = extracted.extractedText;
|
|
10920
|
+
extractionArtifact = extracted.artifact;
|
|
10921
|
+
} else if (sourceKind === "asciidoc") {
|
|
10922
|
+
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
10923
|
+
const extracted = await extractAsciiDocText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
10924
|
+
title = extracted.title?.trim() || title;
|
|
10925
|
+
extractedText = extracted.extractedText;
|
|
10926
|
+
extractionArtifact = extracted.artifact;
|
|
8618
10927
|
} else if (sourceKind === "epub") {
|
|
8619
10928
|
title = path12.basename(absoluteInput, path12.extname(absoluteInput));
|
|
8620
10929
|
const extracted = await extractEpubChapters({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
|
|
@@ -8941,7 +11250,11 @@ async function collectInboxAttachmentRefs(inputDir, files) {
|
|
|
8941
11250
|
for (const absolutePath of files) {
|
|
8942
11251
|
const mimeType = guessMimeType(absolutePath);
|
|
8943
11252
|
const detectionOptions = await localCodeDetectionOptions(absolutePath);
|
|
8944
|
-
|
|
11253
|
+
let sourceKind = inferKind(mimeType, absolutePath, detectionOptions);
|
|
11254
|
+
const lowerExt = path12.extname(absolutePath).toLowerCase();
|
|
11255
|
+
if ((lowerExt === ".html" || lowerExt === ".htm") && sourceKind === "code") {
|
|
11256
|
+
sourceKind = "html";
|
|
11257
|
+
}
|
|
8945
11258
|
if (sourceKind !== "markdown" && sourceKind !== "html") {
|
|
8946
11259
|
continue;
|
|
8947
11260
|
}
|
|
@@ -9285,7 +11598,11 @@ async function importInbox(rootDir, inputDir) {
|
|
|
9285
11598
|
const mimeType = guessMimeType(absolutePath);
|
|
9286
11599
|
const detectionOptions = await localCodeDetectionOptions(absolutePath);
|
|
9287
11600
|
let sourceKind = inferKind(mimeType, absolutePath, detectionOptions);
|
|
9288
|
-
|
|
11601
|
+
const lowerExt = path12.extname(absolutePath).toLowerCase();
|
|
11602
|
+
if ((lowerExt === ".html" || lowerExt === ".htm") && sourceKind === "code") {
|
|
11603
|
+
sourceKind = "html";
|
|
11604
|
+
}
|
|
11605
|
+
if (sourceKind === "binary" && lowerExt === ".zip") {
|
|
9289
11606
|
const bytes = await fs11.readFile(absolutePath);
|
|
9290
11607
|
if (isSlackExportArchive(bytes)) {
|
|
9291
11608
|
sourceKind = "chat_export";
|
|
@@ -18270,7 +20587,7 @@ async function bootstrapDemo(rootDir, input) {
|
|
|
18270
20587
|
}
|
|
18271
20588
|
|
|
18272
20589
|
// src/mcp.ts
|
|
18273
|
-
var SERVER_VERSION = "0.
|
|
20590
|
+
var SERVER_VERSION = "0.7.0";
|
|
18274
20591
|
async function createMcpServer(rootDir) {
|
|
18275
20592
|
const server = new McpServer({
|
|
18276
20593
|
name: "swarmvault",
|