ai-contextify 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +237 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +93 -0
- package/dist/cli.js.map +1 -0
- package/dist/exporters/chunks.d.ts +7 -0
- package/dist/exporters/chunks.d.ts.map +1 -0
- package/dist/exporters/chunks.js +18 -0
- package/dist/exporters/chunks.js.map +1 -0
- package/dist/exporters/markdown.d.ts +9 -0
- package/dist/exporters/markdown.d.ts.map +1 -0
- package/dist/exporters/markdown.js +62 -0
- package/dist/exporters/markdown.js.map +1 -0
- package/dist/exporters/metadata.d.ts +3 -0
- package/dist/exporters/metadata.d.ts.map +1 -0
- package/dist/exporters/metadata.js +7 -0
- package/dist/exporters/metadata.js.map +1 -0
- package/dist/exporters/xml.d.ts +12 -0
- package/dist/exporters/xml.d.ts.map +1 -0
- package/dist/exporters/xml.js +35 -0
- package/dist/exporters/xml.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +128 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/code.d.ts +2 -0
- package/dist/parsers/code.d.ts.map +1 -0
- package/dist/parsers/code.js +6 -0
- package/dist/parsers/code.js.map +1 -0
- package/dist/parsers/docx.d.ts +2 -0
- package/dist/parsers/docx.d.ts.map +1 -0
- package/dist/parsers/docx.js +16 -0
- package/dist/parsers/docx.js.map +1 -0
- package/dist/parsers/index.d.ts +8 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +56 -0
- package/dist/parsers/index.js.map +1 -0
- package/dist/parsers/json.d.ts +2 -0
- package/dist/parsers/json.d.ts.map +1 -0
- package/dist/parsers/json.js +12 -0
- package/dist/parsers/json.js.map +1 -0
- package/dist/parsers/markdown.d.ts +2 -0
- package/dist/parsers/markdown.d.ts.map +1 -0
- package/dist/parsers/markdown.js +11 -0
- package/dist/parsers/markdown.js.map +1 -0
- package/dist/parsers/pdf.d.ts +2 -0
- package/dist/parsers/pdf.d.ts.map +1 -0
- package/dist/parsers/pdf.js +17 -0
- package/dist/parsers/pdf.js.map +1 -0
- package/dist/parsers/text.d.ts +2 -0
- package/dist/parsers/text.d.ts.map +1 -0
- package/dist/parsers/text.js +6 -0
- package/dist/parsers/text.js.map +1 -0
- package/dist/scanner/index.d.ts +15 -0
- package/dist/scanner/index.d.ts.map +1 -0
- package/dist/scanner/index.js +66 -0
- package/dist/scanner/index.js.map +1 -0
- package/dist/types.d.ts +50 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/chunking.d.ts +11 -0
- package/dist/utils/chunking.d.ts.map +1 -0
- package/dist/utils/chunking.js +62 -0
- package/dist/utils/chunking.js.map +1 -0
- package/dist/utils/language.d.ts +3 -0
- package/dist/utils/language.d.ts.map +1 -0
- package/dist/utils/language.js +72 -0
- package/dist/utils/language.js.map +1 -0
- package/dist/utils/logger.d.ts +9 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +10 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/tokens.d.ts +9 -0
- package/dist/utils/tokens.d.ts.map +1 -0
- package/dist/utils/tokens.js +22 -0
- package/dist/utils/tokens.js.map +1 -0
- package/package.json +62 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "fs-extra";
|
|
3
|
+
import ora from "ora";
|
|
4
|
+
import { detectKind, parseFile } from "./parsers/index.js";
|
|
5
|
+
import { scanDirectory } from "./scanner/index.js";
|
|
6
|
+
import { exportCombinedMarkdown } from "./exporters/markdown.js";
|
|
7
|
+
import { exportMetadata } from "./exporters/metadata.js";
|
|
8
|
+
import { exportClaudeXml } from "./exporters/xml.js";
|
|
9
|
+
import { exportChunks } from "./exporters/chunks.js";
|
|
10
|
+
import { estimateTokens } from "./utils/tokens.js";
|
|
11
|
+
import { detectLanguage } from "./utils/language.js";
|
|
12
|
+
import { logger } from "./utils/logger.js";
|
|
13
|
+
export async function build(options) {
|
|
14
|
+
const startedAt = Date.now();
|
|
15
|
+
const absoluteInput = path.resolve(options.input);
|
|
16
|
+
const absoluteOutput = path.resolve(options.output);
|
|
17
|
+
const scanSpinner = ora({
|
|
18
|
+
text: `Scanning ${absoluteInput}`,
|
|
19
|
+
color: "cyan",
|
|
20
|
+
}).start();
|
|
21
|
+
const scanned = await scanDirectory(absoluteInput, {
|
|
22
|
+
include: options.include,
|
|
23
|
+
exclude: options.exclude,
|
|
24
|
+
followSymlinks: options.followSymlinks,
|
|
25
|
+
maxFileSizeBytes: options.maxFileSizeBytes,
|
|
26
|
+
});
|
|
27
|
+
scanSpinner.succeed(`Found ${scanned.length} file(s)`);
|
|
28
|
+
if (scanned.length === 0) {
|
|
29
|
+
logger.warn("No files matched the scan filters. Nothing to do.");
|
|
30
|
+
}
|
|
31
|
+
const parseSpinner = ora({ color: "cyan" }).start();
|
|
32
|
+
const parsed = [];
|
|
33
|
+
let i = 0;
|
|
34
|
+
for (const entry of scanned) {
|
|
35
|
+
i += 1;
|
|
36
|
+
const detected = detectKind(entry.absolutePath);
|
|
37
|
+
parseSpinner.text = `Parsing [${i}/${scanned.length}] ${entry.relativePath}`;
|
|
38
|
+
let content = "";
|
|
39
|
+
let parseError;
|
|
40
|
+
try {
|
|
41
|
+
content = await parseFile(entry.absolutePath, detected.kind);
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
parseError = err instanceof Error ? err.message : String(err);
|
|
45
|
+
content = "";
|
|
46
|
+
}
|
|
47
|
+
parsed.push({
|
|
48
|
+
absolutePath: entry.absolutePath,
|
|
49
|
+
relativePath: entry.relativePath,
|
|
50
|
+
kind: detected.kind,
|
|
51
|
+
extension: detected.extension,
|
|
52
|
+
language: detectLanguage(detected.extension, entry.relativePath),
|
|
53
|
+
bytes: entry.bytes,
|
|
54
|
+
content,
|
|
55
|
+
estimatedTokens: estimateTokens(content),
|
|
56
|
+
parseError,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
parseSpinner.succeed(`Parsed ${parsed.length} file(s)`);
|
|
60
|
+
await fs.ensureDir(absoluteOutput);
|
|
61
|
+
const artifacts = {};
|
|
62
|
+
const generatedAt = new Date().toISOString();
|
|
63
|
+
const title = options.title ?? `Context bundle: ${path.basename(absoluteInput)}`;
|
|
64
|
+
const combinedPath = path.join(absoluteOutput, "combined.md");
|
|
65
|
+
if (options.emitCombined !== false) {
|
|
66
|
+
const exportSpinner = ora({
|
|
67
|
+
text: "Writing combined.md",
|
|
68
|
+
color: "cyan",
|
|
69
|
+
}).start();
|
|
70
|
+
await exportCombinedMarkdown(parsed, {
|
|
71
|
+
outputPath: combinedPath,
|
|
72
|
+
title,
|
|
73
|
+
inputDir: absoluteInput,
|
|
74
|
+
generatedAt,
|
|
75
|
+
});
|
|
76
|
+
artifacts.combined = combinedPath;
|
|
77
|
+
exportSpinner.succeed(`combined.md written (${parsed.length} sections)`);
|
|
78
|
+
}
|
|
79
|
+
if (options.emitXml) {
|
|
80
|
+
const xmlSpinner = ora({ text: "Writing context.xml", color: "cyan" }).start();
|
|
81
|
+
const xmlPath = path.join(absoluteOutput, "context.xml");
|
|
82
|
+
await exportClaudeXml(parsed, {
|
|
83
|
+
outputPath: xmlPath,
|
|
84
|
+
inputDir: absoluteInput,
|
|
85
|
+
generatedAt,
|
|
86
|
+
});
|
|
87
|
+
artifacts.xml = xmlPath;
|
|
88
|
+
xmlSpinner.succeed("context.xml written");
|
|
89
|
+
}
|
|
90
|
+
if (options.chunkSize && options.chunkSize > 0 && artifacts.combined) {
|
|
91
|
+
const chunkSpinner = ora({
|
|
92
|
+
text: `Chunking into ~${options.chunkSize}-token slices`,
|
|
93
|
+
color: "cyan",
|
|
94
|
+
}).start();
|
|
95
|
+
const combinedMarkdown = await fs.readFile(artifacts.combined, "utf8");
|
|
96
|
+
const chunkDir = path.join(absoluteOutput, "chunks");
|
|
97
|
+
artifacts.chunks = await exportChunks({
|
|
98
|
+
outputDir: chunkDir,
|
|
99
|
+
combinedMarkdown,
|
|
100
|
+
chunkTokens: options.chunkSize,
|
|
101
|
+
});
|
|
102
|
+
chunkSpinner.succeed(`Wrote ${artifacts.chunks.length} chunk(s) to ${chunkDir}`);
|
|
103
|
+
}
|
|
104
|
+
const summary = {
|
|
105
|
+
inputDir: absoluteInput,
|
|
106
|
+
outputDir: absoluteOutput,
|
|
107
|
+
generatedAt,
|
|
108
|
+
fileCount: parsed.length,
|
|
109
|
+
totalBytes: parsed.reduce((acc, f) => acc + f.bytes, 0),
|
|
110
|
+
totalTokens: parsed.reduce((acc, f) => acc + f.estimatedTokens, 0),
|
|
111
|
+
durationMs: Date.now() - startedAt,
|
|
112
|
+
files: parsed.map((f) => ({
|
|
113
|
+
path: f.relativePath,
|
|
114
|
+
kind: f.kind,
|
|
115
|
+
bytes: f.bytes,
|
|
116
|
+
tokens: f.estimatedTokens,
|
|
117
|
+
parseError: f.parseError,
|
|
118
|
+
})),
|
|
119
|
+
artifacts,
|
|
120
|
+
};
|
|
121
|
+
if (options.emitMetadata !== false) {
|
|
122
|
+
const metaPath = path.join(absoluteOutput, "metadata.json");
|
|
123
|
+
await exportMetadata(metaPath, summary);
|
|
124
|
+
summary.artifacts.metadata = metaPath;
|
|
125
|
+
}
|
|
126
|
+
return summary;
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,GAAG,MAAM,KAAK,CAAC;AAEtB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,OAAqB;IAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAEpD,MAAM,WAAW,GAAG,GAAG,CAAC;QACtB,IAAI,EAAE,YAAY,aAAa,EAAE;QACjC,KAAK,EAAE,MAAM;KACd,CAAC,CAAC,KAAK,EAAE,CAAC;IAEX,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE;QACjD,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;KAC3C,CAAC,CAAC;IAEH,WAAW,CAAC,OAAO,CAAC,SAAS,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;IAEvD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,mDAAmD,CAAC,CAAC;IACnE,CAAC;IAED,MAAM,YAAY,GAAG,GAAG,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;IACpD,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,CAAC,IAAI,CAAC,CAAC;QACP,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAChD,YAAY,CAAC,IAAI,GAAG,YAAY,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,YAAY,EAAE,CAAC;QAE7E,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,UAA8B,CAAC;QACnC,IAAI,CAAC;YACH,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC/D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,YAAY,CAAC;YAChE,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO;YACP,eAAe,EAAE,cAAc,CAAC,OAAO,CAAC;YACxC,UAAU;SACX,CAAC,CAAC;IACL,CAAC;IAED,YAAY,CAAC,OAAO,CAAC,UAAU,MAAM,CAAC,MAAM,UAAU,CAAC,CAAC;IAExD,MAAM,EAAE,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IAEnC,MAAM,SAAS,GAA8B,EAAE,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,mBAAmB,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;IAEjF,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAC9D,IAAI,OAAO,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,GAAG,CAAC;YACxB,IAAI,EAAE,qBAAqB;YAC3B,KAAK,EAAE,MAAM;SACd,CAAC,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,sBAAsB,CAAC,MAAM,EAAE;YACnC,UAAU,EAAE,YAAY;YACxB,KAAK;YACL,QAAQ,EAAE,aAAa;YACvB,WAAW;SACZ,CAAC,CAAC;QACH,SAAS,CAAC,QAAQ,GAAG,YAAY,CAAC;QAClC,aAAa,CAAC,OAAO,CAAC,wBAAwB,MAAM,CAAC,MAAM,YAAY,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,GAAG,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;QAC/E,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QACzD,MAAM,eAAe,CAAC,MAAM,EAAE;YAC5B,UAAU,EAAE,OAAO;YACnB,QAAQ,EAAE,aAAa;YACvB,WAAW;SACZ,CAAC,CAAC;QACH,SAAS,CAAC,GAAG,GAAG,OAAO,CAAC;QACxB,UAAU,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACrE,MAAM,YAAY,GAAG,GAAG,CAAC;YACvB,IAAI,EAAE,kBAAkB,OAAO,CAAC,SAAS,eAAe;YACxD,KAAK,EAAE,MAAM;SACd,CAAC,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,gBAAgB,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;QACrD,SAAS,CAAC,MAAM,GAAG,MAAM,YAAY,CAAC;YACpC,SAAS,EAAE,QAAQ;YACnB,gBAAgB;YAChB,WAAW,EAAE,OAAO,CAAC,SAAS;SAC/B,CAAC,CAAC;QACH,YAAY,CAAC,OAAO,CAAC,SAAS,SAAS,CAAC,MAAM,CAAC,MAAM,gBAAgB,QAAQ,EAAE,CAAC,CAAC;IACnF,CAAC;IAED,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,aAAa;QACvB,SAAS,EAAE,cAAc;QACzB,WAAW;QACX,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACvD,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAClE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QAClC,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACxB,IAAI,EAAE,CAAC,CAAC,YAAY;YACpB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,MAAM,EAAE,CAAC,CAAC,eAAe;YACzB,UAAU,EAAE,CAAC,CAAC,UAAU;SACzB,CAAC,CAAC;QACH,SAAS;KACV,CAAC;IAEF,IAAI,OAAO,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAC5D,MAAM,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACxC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACxC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code.d.ts","sourceRoot":"","sources":["../../src/parsers/code.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAGrE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code.js","sourceRoot":"","sources":["../../src/parsers/code.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docx.d.ts","sourceRoot":"","sources":["../../src/parsers/docx.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAmBrE"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import mammoth from "mammoth";
|
|
2
|
+
export async function parseDocx(absolutePath) {
|
|
3
|
+
// `convertToMarkdown` exists at runtime but isn't on mammoth's public types,
|
|
4
|
+
// so we narrow the shape ourselves and fall back to plain text on failure.
|
|
5
|
+
const mammothAny = mammoth;
|
|
6
|
+
if (typeof mammothAny.convertToMarkdown === "function") {
|
|
7
|
+
const { value } = await mammothAny.convertToMarkdown({ path: absolutePath });
|
|
8
|
+
return value
|
|
9
|
+
.replace(/\r\n/g, "\n")
|
|
10
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
11
|
+
.trim();
|
|
12
|
+
}
|
|
13
|
+
const { value } = await mammoth.extractRawText({ path: absolutePath });
|
|
14
|
+
return value.replace(/\r\n/g, "\n").trim();
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=docx.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docx.js","sourceRoot":"","sources":["../../src/parsers/docx.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,6EAA6E;IAC7E,2EAA2E;IAC3E,MAAM,UAAU,GAAG,OAIlB,CAAC;IAEF,IAAI,OAAO,UAAU,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;QACvD,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,UAAU,CAAC,iBAAiB,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;QAC7E,OAAO,KAAK;aACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;aAC1B,IAAI,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;IACvE,OAAO,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC7C,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { FileKind } from "../types.js";
|
|
2
|
+
export interface DetectedFile {
|
|
3
|
+
kind: FileKind;
|
|
4
|
+
extension: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function detectKind(absolutePath: string): DetectedFile;
|
|
7
|
+
export declare function parseFile(absolutePath: string, kind: FileKind): Promise<string>;
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsers/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAS5C,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,UAAU,CAAC,YAAY,EAAE,MAAM,GAAG,YAAY,CA6B7D;AAED,wBAAsB,SAAS,CAC7B,YAAY,EAAE,MAAM,EACpB,IAAI,EAAE,QAAQ,GACb,OAAO,CAAC,MAAM,CAAC,CAiBjB"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { isCodeExtension } from "../utils/language.js";
|
|
3
|
+
import { parseCode } from "./code.js";
|
|
4
|
+
import { parseDocx } from "./docx.js";
|
|
5
|
+
import { parseJson } from "./json.js";
|
|
6
|
+
import { parseMarkdown } from "./markdown.js";
|
|
7
|
+
import { parsePdf } from "./pdf.js";
|
|
8
|
+
import { parseText } from "./text.js";
|
|
9
|
+
export function detectKind(absolutePath) {
|
|
10
|
+
const extension = path.extname(absolutePath).toLowerCase();
|
|
11
|
+
const base = path.basename(absolutePath).toLowerCase();
|
|
12
|
+
switch (extension) {
|
|
13
|
+
case ".pdf":
|
|
14
|
+
return { kind: "pdf", extension };
|
|
15
|
+
case ".docx":
|
|
16
|
+
return { kind: "docx", extension };
|
|
17
|
+
case ".md":
|
|
18
|
+
case ".mdx":
|
|
19
|
+
case ".markdown":
|
|
20
|
+
return { kind: "markdown", extension };
|
|
21
|
+
case ".json":
|
|
22
|
+
case ".jsonc":
|
|
23
|
+
return { kind: "json", extension };
|
|
24
|
+
case ".txt":
|
|
25
|
+
case ".log":
|
|
26
|
+
case ".rst":
|
|
27
|
+
return { kind: "text", extension };
|
|
28
|
+
}
|
|
29
|
+
if (isCodeExtension(extension))
|
|
30
|
+
return { kind: "code", extension };
|
|
31
|
+
if (base === "dockerfile" || base.startsWith("dockerfile.")) {
|
|
32
|
+
return { kind: "code", extension: ".dockerfile" };
|
|
33
|
+
}
|
|
34
|
+
if (base === "makefile")
|
|
35
|
+
return { kind: "code", extension: ".makefile" };
|
|
36
|
+
return { kind: "unknown", extension };
|
|
37
|
+
}
|
|
38
|
+
export async function parseFile(absolutePath, kind) {
|
|
39
|
+
switch (kind) {
|
|
40
|
+
case "pdf":
|
|
41
|
+
return parsePdf(absolutePath);
|
|
42
|
+
case "docx":
|
|
43
|
+
return parseDocx(absolutePath);
|
|
44
|
+
case "markdown":
|
|
45
|
+
return parseMarkdown(absolutePath);
|
|
46
|
+
case "json":
|
|
47
|
+
return parseJson(absolutePath);
|
|
48
|
+
case "code":
|
|
49
|
+
return parseCode(absolutePath);
|
|
50
|
+
case "text":
|
|
51
|
+
return parseText(absolutePath);
|
|
52
|
+
case "unknown":
|
|
53
|
+
return parseText(absolutePath);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsers/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAOtC,MAAM,UAAU,UAAU,CAAC,YAAoB;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAC;IAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAC;IAEvD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,MAAM;YACT,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QACpC,KAAK,OAAO;YACV,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QACrC,KAAK,KAAK,CAAC;QACX,KAAK,MAAM,CAAC;QACZ,KAAK,WAAW;YACd,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;QACzC,KAAK,OAAO,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QACrC,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM;YACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACvC,CAAC;IAED,IAAI,eAAe,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACnE,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC5D,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC;IACpD,CAAC;IACD,IAAI,IAAI,KAAK,UAAU;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAEzE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,YAAoB,EACpB,IAAc;IAEd,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,KAAK;YACR,OAAO,QAAQ,CAAC,YAAY,CAAC,CAAC;QAChC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,UAAU;YACb,OAAO,aAAa,CAAC,YAAY,CAAC,CAAC;QACrC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,SAAS;YACZ,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/parsers/json.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAQrE"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
export async function parseJson(absolutePath) {
|
|
3
|
+
const raw = await fs.readFile(absolutePath, "utf8");
|
|
4
|
+
try {
|
|
5
|
+
const parsed = JSON.parse(raw);
|
|
6
|
+
return JSON.stringify(parsed, null, 2);
|
|
7
|
+
}
|
|
8
|
+
catch {
|
|
9
|
+
return raw.trim();
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=json.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/parsers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/B,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/parsers/markdown.ts"],"names":[],"mappings":"AAMA,wBAAsB,aAAa,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAKzE"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
import { remark } from "remark";
|
|
3
|
+
import remarkGfm from "remark-gfm";
|
|
4
|
+
const processor = remark().use(remarkGfm);
|
|
5
|
+
export async function parseMarkdown(absolutePath) {
|
|
6
|
+
const raw = await fs.readFile(absolutePath, "utf8");
|
|
7
|
+
// Round-trip through remark to normalize whitespace, list markers, etc.
|
|
8
|
+
const file = await processor.process(raw);
|
|
9
|
+
return String(file).replace(/\r\n/g, "\n").trim();
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=markdown.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/parsers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,YAAoB;IACtD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,wEAAwE;IACxE,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC1C,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/parsers/pdf.ts"],"names":[],"mappings":"AAIA,wBAAsB,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIpE"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
// pdf-parse is CommonJS; import the default function.
|
|
3
|
+
import pdfParse from "pdf-parse";
|
|
4
|
+
export async function parsePdf(absolutePath) {
|
|
5
|
+
const buffer = await fs.readFile(absolutePath);
|
|
6
|
+
const result = await pdfParse(buffer);
|
|
7
|
+
return normalize(result.text);
|
|
8
|
+
}
|
|
9
|
+
function normalize(text) {
|
|
10
|
+
return text
|
|
11
|
+
.replace(/\r\n/g, "\n")
|
|
12
|
+
.replace(/ /g, " ")
|
|
13
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
14
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
15
|
+
.trim();
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=pdf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/parsers/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,sDAAsD;AACtD,OAAO,QAAQ,MAAM,WAAW,CAAC;AAEjC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,YAAoB;IACjD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;IACtC,OAAO,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/parsers/text.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAGrE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/parsers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export declare const DEFAULT_EXCLUDES: string[];
|
|
2
|
+
export declare const DEFAULT_INCLUDES: string[];
|
|
3
|
+
export interface ScannerOptions {
|
|
4
|
+
include?: string[];
|
|
5
|
+
exclude?: string[];
|
|
6
|
+
followSymlinks?: boolean;
|
|
7
|
+
maxFileSizeBytes?: number;
|
|
8
|
+
}
|
|
9
|
+
export interface ScannedFile {
|
|
10
|
+
absolutePath: string;
|
|
11
|
+
relativePath: string;
|
|
12
|
+
bytes: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function scanDirectory(rootDir: string, opts?: ScannerOptions): Promise<ScannedFile[]>;
|
|
15
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scanner/index.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,gBAAgB,UAiB5B,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,CAAC;AAEzC,MAAM,WAAW,cAAc;IAC7B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,WAAW;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAsB,aAAa,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,GAAE,cAAmB,GACxB,OAAO,CAAC,WAAW,EAAE,CAAC,CA8CxB"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fg from "fast-glob";
|
|
3
|
+
import fs from "fs-extra";
|
|
4
|
+
export const DEFAULT_EXCLUDES = [
|
|
5
|
+
"**/node_modules/**",
|
|
6
|
+
"**/.git/**",
|
|
7
|
+
"**/dist/**",
|
|
8
|
+
"**/build/**",
|
|
9
|
+
"**/out/**",
|
|
10
|
+
"**/.next/**",
|
|
11
|
+
"**/.nuxt/**",
|
|
12
|
+
"**/.cache/**",
|
|
13
|
+
"**/.turbo/**",
|
|
14
|
+
"**/.vercel/**",
|
|
15
|
+
"**/coverage/**",
|
|
16
|
+
"**/.DS_Store",
|
|
17
|
+
"**/*.lock",
|
|
18
|
+
"**/pnpm-lock.yaml",
|
|
19
|
+
"**/package-lock.json",
|
|
20
|
+
"**/yarn.lock",
|
|
21
|
+
];
|
|
22
|
+
export const DEFAULT_INCLUDES = ["**/*"];
|
|
23
|
+
export async function scanDirectory(rootDir, opts = {}) {
|
|
24
|
+
const absoluteRoot = path.resolve(rootDir);
|
|
25
|
+
const stat = await fs.stat(absoluteRoot).catch(() => null);
|
|
26
|
+
if (!stat)
|
|
27
|
+
throw new Error(`Input path does not exist: ${rootDir}`);
|
|
28
|
+
if (stat.isFile()) {
|
|
29
|
+
return [
|
|
30
|
+
{
|
|
31
|
+
absolutePath: absoluteRoot,
|
|
32
|
+
relativePath: path.basename(absoluteRoot),
|
|
33
|
+
bytes: stat.size,
|
|
34
|
+
},
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
const include = opts.include?.length ? opts.include : DEFAULT_INCLUDES;
|
|
38
|
+
const exclude = [...DEFAULT_EXCLUDES, ...(opts.exclude ?? [])];
|
|
39
|
+
const entries = await fg(include, {
|
|
40
|
+
cwd: absoluteRoot,
|
|
41
|
+
ignore: exclude,
|
|
42
|
+
dot: false,
|
|
43
|
+
onlyFiles: true,
|
|
44
|
+
followSymbolicLinks: opts.followSymlinks ?? false,
|
|
45
|
+
absolute: true,
|
|
46
|
+
stats: true,
|
|
47
|
+
suppressErrors: true,
|
|
48
|
+
});
|
|
49
|
+
const maxBytes = opts.maxFileSizeBytes ?? 10 * 1024 * 1024;
|
|
50
|
+
const files = [];
|
|
51
|
+
for (const entry of entries) {
|
|
52
|
+
const size = entry.stats?.size ?? 0;
|
|
53
|
+
if (size === 0)
|
|
54
|
+
continue;
|
|
55
|
+
if (size > maxBytes)
|
|
56
|
+
continue;
|
|
57
|
+
files.push({
|
|
58
|
+
absolutePath: entry.path,
|
|
59
|
+
relativePath: path.relative(absoluteRoot, entry.path),
|
|
60
|
+
bytes: size,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
files.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
|
|
64
|
+
return files;
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scanner/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,WAAW,CAAC;AAC3B,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,oBAAoB;IACpB,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,WAAW;IACX,aAAa;IACb,aAAa;IACb,cAAc;IACd,cAAc;IACd,eAAe;IACf,gBAAgB;IAChB,cAAc;IACd,WAAW;IACX,mBAAmB;IACnB,sBAAsB;IACtB,cAAc;CACf,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,MAAM,CAAC,CAAC;AAezC,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAAe,EACf,OAAuB,EAAE;IAEzB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC;IAEpE,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClB,OAAO;YACL;gBACE,YAAY,EAAE,YAAY;gBAC1B,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACzC,KAAK,EAAE,IAAI,CAAC,IAAI;aACjB;SACF,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACvE,MAAM,OAAO,GAAG,CAAC,GAAG,gBAAgB,EAAE,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;IAE/D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,EAAE;QAChC,GAAG,EAAE,YAAY;QACjB,MAAM,EAAE,OAAO;QACf,GAAG,EAAE,KAAK;QACV,SAAS,EAAE,IAAI;QACf,mBAAmB,EAAE,IAAI,CAAC,cAAc,IAAI,KAAK;QACjD,QAAQ,EAAE,IAAI;QACd,KAAK,EAAE,IAAI;QACX,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,IAAI,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3D,MAAM,KAAK,GAAkB,EAAE,CAAC;IAEhC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,CAAC;QACpC,IAAI,IAAI,KAAK,CAAC;YAAE,SAAS;QACzB,IAAI,IAAI,GAAG,QAAQ;YAAE,SAAS;QAE9B,KAAK,CAAC,IAAI,CAAC;YACT,YAAY,EAAE,KAAK,CAAC,IAAI;YACxB,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC;YACrD,KAAK,EAAE,IAAI;SACZ,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC;IACnE,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export type FileKind = "pdf" | "docx" | "markdown" | "text" | "json" | "code" | "unknown";
|
|
2
|
+
export interface ScanOptions {
|
|
3
|
+
input: string;
|
|
4
|
+
output: string;
|
|
5
|
+
include?: string[];
|
|
6
|
+
exclude?: string[];
|
|
7
|
+
maxFileSizeBytes?: number;
|
|
8
|
+
followSymlinks?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface ParsedFile {
|
|
11
|
+
absolutePath: string;
|
|
12
|
+
relativePath: string;
|
|
13
|
+
kind: FileKind;
|
|
14
|
+
extension: string;
|
|
15
|
+
language?: string;
|
|
16
|
+
bytes: number;
|
|
17
|
+
content: string;
|
|
18
|
+
estimatedTokens: number;
|
|
19
|
+
parseError?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface BuildOptions extends ScanOptions {
|
|
22
|
+
chunkSize?: number;
|
|
23
|
+
emitXml?: boolean;
|
|
24
|
+
emitMetadata?: boolean;
|
|
25
|
+
emitCombined?: boolean;
|
|
26
|
+
title?: string;
|
|
27
|
+
}
|
|
28
|
+
export interface BuildSummary {
|
|
29
|
+
inputDir: string;
|
|
30
|
+
outputDir: string;
|
|
31
|
+
generatedAt: string;
|
|
32
|
+
fileCount: number;
|
|
33
|
+
totalBytes: number;
|
|
34
|
+
totalTokens: number;
|
|
35
|
+
durationMs: number;
|
|
36
|
+
files: Array<{
|
|
37
|
+
path: string;
|
|
38
|
+
kind: FileKind;
|
|
39
|
+
bytes: number;
|
|
40
|
+
tokens: number;
|
|
41
|
+
parseError?: string;
|
|
42
|
+
}>;
|
|
43
|
+
artifacts: {
|
|
44
|
+
combined?: string;
|
|
45
|
+
metadata?: string;
|
|
46
|
+
xml?: string;
|
|
47
|
+
chunks?: string[];
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAChB,KAAK,GACL,MAAM,GACN,UAAU,GACV,MAAM,GACN,MAAM,GACN,MAAM,GACN,SAAS,CAAC;AAEd,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,QAAQ,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAa,SAAQ,WAAW;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,KAAK,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,QAAQ,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC,CAAC;IACH,SAAS,EAAE;QACT,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;KACnB,CAAC;CACH"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface Chunk {
|
|
2
|
+
index: number;
|
|
3
|
+
tokens: number;
|
|
4
|
+
content: string;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Split text into chunks of approximately `targetTokens` tokens,
|
|
8
|
+
* preferring paragraph and line boundaries to avoid mid-sentence cuts.
|
|
9
|
+
*/
|
|
10
|
+
export declare function chunkText(text: string, targetTokens?: number): Chunk[];
|
|
11
|
+
//# sourceMappingURL=chunking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,SAAQ,GAAG,KAAK,EAAE,CA0DrE"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { estimateTokens } from "./tokens.js";
|
|
2
|
+
/**
|
|
3
|
+
* Split text into chunks of approximately `targetTokens` tokens,
|
|
4
|
+
* preferring paragraph and line boundaries to avoid mid-sentence cuts.
|
|
5
|
+
*/
|
|
6
|
+
export function chunkText(text, targetTokens = 6_000) {
|
|
7
|
+
if (!text.trim())
|
|
8
|
+
return [];
|
|
9
|
+
const paragraphs = text.split(/\n{2,}/);
|
|
10
|
+
const chunks = [];
|
|
11
|
+
let buffer = "";
|
|
12
|
+
let bufferTokens = 0;
|
|
13
|
+
const flush = () => {
|
|
14
|
+
if (!buffer.trim())
|
|
15
|
+
return;
|
|
16
|
+
chunks.push({
|
|
17
|
+
index: chunks.length,
|
|
18
|
+
tokens: bufferTokens,
|
|
19
|
+
content: buffer.trim(),
|
|
20
|
+
});
|
|
21
|
+
buffer = "";
|
|
22
|
+
bufferTokens = 0;
|
|
23
|
+
};
|
|
24
|
+
for (const paragraph of paragraphs) {
|
|
25
|
+
const pTokens = estimateTokens(paragraph);
|
|
26
|
+
if (pTokens > targetTokens) {
|
|
27
|
+
flush();
|
|
28
|
+
// Hard-split very large blocks line by line.
|
|
29
|
+
let lineBuf = "";
|
|
30
|
+
let lineTokens = 0;
|
|
31
|
+
for (const line of paragraph.split("\n")) {
|
|
32
|
+
const lt = estimateTokens(line);
|
|
33
|
+
if (lineTokens + lt > targetTokens && lineBuf) {
|
|
34
|
+
chunks.push({
|
|
35
|
+
index: chunks.length,
|
|
36
|
+
tokens: lineTokens,
|
|
37
|
+
content: lineBuf,
|
|
38
|
+
});
|
|
39
|
+
lineBuf = "";
|
|
40
|
+
lineTokens = 0;
|
|
41
|
+
}
|
|
42
|
+
lineBuf += (lineBuf ? "\n" : "") + line;
|
|
43
|
+
lineTokens += lt;
|
|
44
|
+
}
|
|
45
|
+
if (lineBuf) {
|
|
46
|
+
chunks.push({
|
|
47
|
+
index: chunks.length,
|
|
48
|
+
tokens: lineTokens,
|
|
49
|
+
content: lineBuf,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (bufferTokens + pTokens > targetTokens)
|
|
55
|
+
flush();
|
|
56
|
+
buffer += (buffer ? "\n\n" : "") + paragraph;
|
|
57
|
+
bufferTokens += pTokens;
|
|
58
|
+
}
|
|
59
|
+
flush();
|
|
60
|
+
return chunks;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=chunking.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAQ7C;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,YAAY,GAAG,KAAK;IAC1D,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAE5B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,MAAM,KAAK,GAAG,GAAS,EAAE;QACvB,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;YAAE,OAAO;QAC3B,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,MAAM,CAAC,MAAM;YACpB,MAAM,EAAE,YAAY;YACpB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE;SACvB,CAAC,CAAC;QACH,MAAM,GAAG,EAAE,CAAC;QACZ,YAAY,GAAG,CAAC,CAAC;IACnB,CAAC,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAE1C,IAAI,OAAO,GAAG,YAAY,EAAE,CAAC;YAC3B,KAAK,EAAE,CAAC;YACR,6CAA6C;YAC7C,IAAI,OAAO,GAAG,EAAE,CAAC;YACjB,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzC,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;gBAChC,IAAI,UAAU,GAAG,EAAE,GAAG,YAAY,IAAI,OAAO,EAAE,CAAC;oBAC9C,MAAM,CAAC,IAAI,CAAC;wBACV,KAAK,EAAE,MAAM,CAAC,MAAM;wBACpB,MAAM,EAAE,UAAU;wBAClB,OAAO,EAAE,OAAO;qBACjB,CAAC,CAAC;oBACH,OAAO,GAAG,EAAE,CAAC;oBACb,UAAU,GAAG,CAAC,CAAC;gBACjB,CAAC;gBACD,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;gBACxC,UAAU,IAAI,EAAE,CAAC;YACnB,CAAC;YACD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,MAAM,CAAC,MAAM;oBACpB,MAAM,EAAE,UAAU;oBAClB,OAAO,EAAE,OAAO;iBACjB,CAAC,CAAC;YACL,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,YAAY,GAAG,OAAO,GAAG,YAAY;YAAE,KAAK,EAAE,CAAC;QACnD,MAAM,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC;QAC7C,YAAY,IAAI,OAAO,CAAC;IAC1B,CAAC;IAED,KAAK,EAAE,CAAC;IACR,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"language.d.ts","sourceRoot":"","sources":["../../src/utils/language.ts"],"names":[],"mappings":"AA0DA,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAM1E;AAID,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAE1D"}
|