ai-contextify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +237 -0
  3. package/dist/cli.d.ts +3 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +93 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/exporters/chunks.d.ts +7 -0
  8. package/dist/exporters/chunks.d.ts.map +1 -0
  9. package/dist/exporters/chunks.js +18 -0
  10. package/dist/exporters/chunks.js.map +1 -0
  11. package/dist/exporters/markdown.d.ts +9 -0
  12. package/dist/exporters/markdown.d.ts.map +1 -0
  13. package/dist/exporters/markdown.js +62 -0
  14. package/dist/exporters/markdown.js.map +1 -0
  15. package/dist/exporters/metadata.d.ts +3 -0
  16. package/dist/exporters/metadata.d.ts.map +1 -0
  17. package/dist/exporters/metadata.js +7 -0
  18. package/dist/exporters/metadata.js.map +1 -0
  19. package/dist/exporters/xml.d.ts +12 -0
  20. package/dist/exporters/xml.d.ts.map +1 -0
  21. package/dist/exporters/xml.js +35 -0
  22. package/dist/exporters/xml.js.map +1 -0
  23. package/dist/index.d.ts +4 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +128 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/parsers/code.d.ts +2 -0
  28. package/dist/parsers/code.d.ts.map +1 -0
  29. package/dist/parsers/code.js +6 -0
  30. package/dist/parsers/code.js.map +1 -0
  31. package/dist/parsers/docx.d.ts +2 -0
  32. package/dist/parsers/docx.d.ts.map +1 -0
  33. package/dist/parsers/docx.js +16 -0
  34. package/dist/parsers/docx.js.map +1 -0
  35. package/dist/parsers/index.d.ts +8 -0
  36. package/dist/parsers/index.d.ts.map +1 -0
  37. package/dist/parsers/index.js +56 -0
  38. package/dist/parsers/index.js.map +1 -0
  39. package/dist/parsers/json.d.ts +2 -0
  40. package/dist/parsers/json.d.ts.map +1 -0
  41. package/dist/parsers/json.js +12 -0
  42. package/dist/parsers/json.js.map +1 -0
  43. package/dist/parsers/markdown.d.ts +2 -0
  44. package/dist/parsers/markdown.d.ts.map +1 -0
  45. package/dist/parsers/markdown.js +11 -0
  46. package/dist/parsers/markdown.js.map +1 -0
  47. package/dist/parsers/pdf.d.ts +2 -0
  48. package/dist/parsers/pdf.d.ts.map +1 -0
  49. package/dist/parsers/pdf.js +17 -0
  50. package/dist/parsers/pdf.js.map +1 -0
  51. package/dist/parsers/text.d.ts +2 -0
  52. package/dist/parsers/text.d.ts.map +1 -0
  53. package/dist/parsers/text.js +6 -0
  54. package/dist/parsers/text.js.map +1 -0
  55. package/dist/scanner/index.d.ts +15 -0
  56. package/dist/scanner/index.d.ts.map +1 -0
  57. package/dist/scanner/index.js +66 -0
  58. package/dist/scanner/index.js.map +1 -0
  59. package/dist/types.d.ts +50 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +2 -0
  62. package/dist/types.js.map +1 -0
  63. package/dist/utils/chunking.d.ts +11 -0
  64. package/dist/utils/chunking.d.ts.map +1 -0
  65. package/dist/utils/chunking.js +62 -0
  66. package/dist/utils/chunking.js.map +1 -0
  67. package/dist/utils/language.d.ts +3 -0
  68. package/dist/utils/language.d.ts.map +1 -0
  69. package/dist/utils/language.js +72 -0
  70. package/dist/utils/language.js.map +1 -0
  71. package/dist/utils/logger.d.ts +9 -0
  72. package/dist/utils/logger.d.ts.map +1 -0
  73. package/dist/utils/logger.js +10 -0
  74. package/dist/utils/logger.js.map +1 -0
  75. package/dist/utils/tokens.d.ts +9 -0
  76. package/dist/utils/tokens.d.ts.map +1 -0
  77. package/dist/utils/tokens.js +22 -0
  78. package/dist/utils/tokens.js.map +1 -0
  79. package/package.json +62 -0
package/dist/index.js ADDED
@@ -0,0 +1,128 @@
1
+ import path from "node:path";
2
+ import fs from "fs-extra";
3
+ import ora from "ora";
4
+ import { detectKind, parseFile } from "./parsers/index.js";
5
+ import { scanDirectory } from "./scanner/index.js";
6
+ import { exportCombinedMarkdown } from "./exporters/markdown.js";
7
+ import { exportMetadata } from "./exporters/metadata.js";
8
+ import { exportClaudeXml } from "./exporters/xml.js";
9
+ import { exportChunks } from "./exporters/chunks.js";
10
+ import { estimateTokens } from "./utils/tokens.js";
11
+ import { detectLanguage } from "./utils/language.js";
12
+ import { logger } from "./utils/logger.js";
13
+ export async function build(options) {
14
+ const startedAt = Date.now();
15
+ const absoluteInput = path.resolve(options.input);
16
+ const absoluteOutput = path.resolve(options.output);
17
+ const scanSpinner = ora({
18
+ text: `Scanning ${absoluteInput}`,
19
+ color: "cyan",
20
+ }).start();
21
+ const scanned = await scanDirectory(absoluteInput, {
22
+ include: options.include,
23
+ exclude: options.exclude,
24
+ followSymlinks: options.followSymlinks,
25
+ maxFileSizeBytes: options.maxFileSizeBytes,
26
+ });
27
+ scanSpinner.succeed(`Found ${scanned.length} file(s)`);
28
+ if (scanned.length === 0) {
29
+ logger.warn("No files matched the scan filters. Nothing to do.");
30
+ }
31
+ const parseSpinner = ora({ color: "cyan" }).start();
32
+ const parsed = [];
33
+ let i = 0;
34
+ for (const entry of scanned) {
35
+ i += 1;
36
+ const detected = detectKind(entry.absolutePath);
37
+ parseSpinner.text = `Parsing [${i}/${scanned.length}] ${entry.relativePath}`;
38
+ let content = "";
39
+ let parseError;
40
+ try {
41
+ content = await parseFile(entry.absolutePath, detected.kind);
42
+ }
43
+ catch (err) {
44
+ parseError = err instanceof Error ? err.message : String(err);
45
+ content = "";
46
+ }
47
+ parsed.push({
48
+ absolutePath: entry.absolutePath,
49
+ relativePath: entry.relativePath,
50
+ kind: detected.kind,
51
+ extension: detected.extension,
52
+ language: detectLanguage(detected.extension, entry.relativePath),
53
+ bytes: entry.bytes,
54
+ content,
55
+ estimatedTokens: estimateTokens(content),
56
+ parseError,
57
+ });
58
+ }
59
+ parseSpinner.succeed(`Parsed ${parsed.length} file(s)`);
60
+ await fs.ensureDir(absoluteOutput);
61
+ const artifacts = {};
62
+ const generatedAt = new Date().toISOString();
63
+ const title = options.title ?? `Context bundle: ${path.basename(absoluteInput)}`;
64
+ const combinedPath = path.join(absoluteOutput, "combined.md");
65
+ if (options.emitCombined !== false) {
66
+ const exportSpinner = ora({
67
+ text: "Writing combined.md",
68
+ color: "cyan",
69
+ }).start();
70
+ await exportCombinedMarkdown(parsed, {
71
+ outputPath: combinedPath,
72
+ title,
73
+ inputDir: absoluteInput,
74
+ generatedAt,
75
+ });
76
+ artifacts.combined = combinedPath;
77
+ exportSpinner.succeed(`combined.md written (${parsed.length} sections)`);
78
+ }
79
+ if (options.emitXml) {
80
+ const xmlSpinner = ora({ text: "Writing context.xml", color: "cyan" }).start();
81
+ const xmlPath = path.join(absoluteOutput, "context.xml");
82
+ await exportClaudeXml(parsed, {
83
+ outputPath: xmlPath,
84
+ inputDir: absoluteInput,
85
+ generatedAt,
86
+ });
87
+ artifacts.xml = xmlPath;
88
+ xmlSpinner.succeed("context.xml written");
89
+ }
90
+ if (options.chunkSize && options.chunkSize > 0 && artifacts.combined) {
91
+ const chunkSpinner = ora({
92
+ text: `Chunking into ~${options.chunkSize}-token slices`,
93
+ color: "cyan",
94
+ }).start();
95
+ const combinedMarkdown = await fs.readFile(artifacts.combined, "utf8");
96
+ const chunkDir = path.join(absoluteOutput, "chunks");
97
+ artifacts.chunks = await exportChunks({
98
+ outputDir: chunkDir,
99
+ combinedMarkdown,
100
+ chunkTokens: options.chunkSize,
101
+ });
102
+ chunkSpinner.succeed(`Wrote ${artifacts.chunks.length} chunk(s) to ${chunkDir}`);
103
+ }
104
+ const summary = {
105
+ inputDir: absoluteInput,
106
+ outputDir: absoluteOutput,
107
+ generatedAt,
108
+ fileCount: parsed.length,
109
+ totalBytes: parsed.reduce((acc, f) => acc + f.bytes, 0),
110
+ totalTokens: parsed.reduce((acc, f) => acc + f.estimatedTokens, 0),
111
+ durationMs: Date.now() - startedAt,
112
+ files: parsed.map((f) => ({
113
+ path: f.relativePath,
114
+ kind: f.kind,
115
+ bytes: f.bytes,
116
+ tokens: f.estimatedTokens,
117
+ parseError: f.parseError,
118
+ })),
119
+ artifacts,
120
+ };
121
+ if (options.emitMetadata !== false) {
122
+ const metaPath = path.join(absoluteOutput, "metadata.json");
123
+ await exportMetadata(metaPath, summary);
124
+ summary.artifacts.metadata = metaPath;
125
+ }
126
+ return summary;
127
+ }
128
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,GAAG,MAAM,KAAK,CAAC;AAEtB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,OAAqB;IAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAEpD,MAAM,WAAW,GAAG,GAAG,CAAC;QACtB,IAAI,EAAE,YAAY,aAAa,EAAE;QACjC,KAAK,EAAE,MAAM;KACd,CAAC,CAAC,KAAK,EAAE,CAAC;IAEX,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE;QACjD,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;KAC3C,CAAC,CAAC;IAEH,WAAW,CAAC,OAAO,CAAC,SAAS,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;IAEvD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,mDAAmD,CAAC,CAAC;IACnE,CAAC;IAED,MAAM,YAAY,GAAG,GAAG,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;IACpD,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,CAAC,IAAI,CAAC,CAAC;QACP,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAChD,YAAY,CAAC,IAAI,GAAG,YAAY,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,YAAY,EAAE,CAAC;QAE7E,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,UAA8B,CAAC;QACnC,IAAI,CAAC;YACH,OAAO,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC/D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QAED,MAAM,CAAC,IAAI,CAAC;YACV,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,YAAY,CAAC;YAChE,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO;YACP,eAAe,EAAE,cAAc,CAAC,OAAO,CAAC;YACxC,UAAU;SACX,CAAC,CAAC;IACL,CAAC;IAED,YAAY,CAAC,OAAO,CAAC,UAAU,MAAM,CAAC,MAAM,UAAU,CAAC,CAAC;IAExD,MAAM,EAAE,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IAEnC,MAAM,SAAS,GAA8B,EAAE,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,mBAAmB,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;IAEjF,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAC9D,IAAI,OAAO,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,GAAG,CAAC;YACxB,IAAI,EAAE,qBAAqB;YAC3B,KAAK,EAAE,MAAM;SACd,CAAC,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,sBAAsB,CAAC,MAAM,EAAE;YACnC,UAAU,EAAE,YAAY;YACxB,KAAK;YACL,QAAQ,EAAE,aAAa;YACvB,WAAW;SACZ,CAAC,CAAC;QACH,SAAS,CAAC,QAAQ,GAAG,YAAY,CAAC;QAClC,aAAa,CAAC,OAAO,CAAC,wBAAwB,MAAM,CAAC,MAAM,YAAY,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,GAAG,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;QAC/E,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;QACzD,MAAM,eAAe,CAAC,MAAM,EAAE;YAC5B,UAAU,EAAE,OAAO;YACnB,QAAQ,EAAE,aAAa;YACvB,WAAW;SACZ,CAAC,CAAC;QACH,SAAS,CAAC,GAAG,GAAG,OAAO,CAAC;QACxB,UAAU,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACrE,MAAM,YAAY,GAAG,GAAG,CAAC;YACvB,IAAI,EAAE,kBAAkB,OAAO,CAAC,SAAS,eAAe;YACxD,KAAK,EAAE,MAAM;SACd,CAAC,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,gBAAgB,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;QACrD,SAAS,CAAC,MAAM,GAAG,MAAM,YAAY,CAAC;YACpC,SAAS,EAAE,QAAQ;YACnB,gBAAgB;YAChB,WAAW,EAAE,OAAO,CAAC,SAAS;SAC/B,CAAC,CAAC;QACH,YAAY,CAAC,OAAO,CAAC,SAAS,SAAS,CAAC,MAAM,CAAC,MAAM,gBAAgB,QAAQ,EAAE,CAAC,CAAC;IACnF,CAAC;IAED,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,aAAa;QACvB,SAAS,EAAE,cAAc;QACzB,WAAW;QACX,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACvD,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAClE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QAClC,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACxB,IAAI,EAAE,CAAC,CAAC,YAAY;YACpB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,MAAM,EAAE,CAAC,CAAC,eAAe;YACzB,UAAU,EAAE,CAAC,CAAC,UAAU;SACzB,CAAC,CAAC;QACH,SAAS;KACV,CAAC;IAEF,IAAI,OAAO,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAC5D,MAAM,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACxC,OAAO,CAAC,SAAS,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACxC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parseCode(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=code.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code.d.ts","sourceRoot":"","sources":["../../src/parsers/code.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAGrE"}
@@ -0,0 +1,6 @@
1
+ import fs from "fs-extra";
2
+ export async function parseCode(absolutePath) {
3
+ const raw = await fs.readFile(absolutePath, "utf8");
4
+ return raw.replace(/\r\n/g, "\n").replace(/[ \t]+$/gm, "");
5
+ }
6
+ //# sourceMappingURL=code.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code.js","sourceRoot":"","sources":["../../src/parsers/code.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;AAC7D,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parseDocx(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=docx.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docx.d.ts","sourceRoot":"","sources":["../../src/parsers/docx.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAmBrE"}
@@ -0,0 +1,16 @@
1
+ import mammoth from "mammoth";
2
+ export async function parseDocx(absolutePath) {
3
+ // `convertToMarkdown` exists at runtime but isn't on mammoth's public types,
4
+ // so we narrow the shape ourselves and fall back to plain text on failure.
5
+ const mammothAny = mammoth;
6
+ if (typeof mammothAny.convertToMarkdown === "function") {
7
+ const { value } = await mammothAny.convertToMarkdown({ path: absolutePath });
8
+ return value
9
+ .replace(/\r\n/g, "\n")
10
+ .replace(/\n{3,}/g, "\n\n")
11
+ .trim();
12
+ }
13
+ const { value } = await mammoth.extractRawText({ path: absolutePath });
14
+ return value.replace(/\r\n/g, "\n").trim();
15
+ }
16
+ //# sourceMappingURL=docx.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docx.js","sourceRoot":"","sources":["../../src/parsers/docx.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,6EAA6E;IAC7E,2EAA2E;IAC3E,MAAM,UAAU,GAAG,OAIlB,CAAC;IAEF,IAAI,OAAO,UAAU,CAAC,iBAAiB,KAAK,UAAU,EAAE,CAAC;QACvD,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,UAAU,CAAC,iBAAiB,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;QAC7E,OAAO,KAAK;aACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;aAC1B,IAAI,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;IACvE,OAAO,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC7C,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { FileKind } from "../types.js";
2
+ export interface DetectedFile {
3
+ kind: FileKind;
4
+ extension: string;
5
+ }
6
+ export declare function detectKind(absolutePath: string): DetectedFile;
7
+ export declare function parseFile(absolutePath: string, kind: FileKind): Promise<string>;
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsers/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAS5C,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,UAAU,CAAC,YAAY,EAAE,MAAM,GAAG,YAAY,CA6B7D;AAED,wBAAsB,SAAS,CAC7B,YAAY,EAAE,MAAM,EACpB,IAAI,EAAE,QAAQ,GACb,OAAO,CAAC,MAAM,CAAC,CAiBjB"}
@@ -0,0 +1,56 @@
1
+ import path from "node:path";
2
+ import { isCodeExtension } from "../utils/language.js";
3
+ import { parseCode } from "./code.js";
4
+ import { parseDocx } from "./docx.js";
5
+ import { parseJson } from "./json.js";
6
+ import { parseMarkdown } from "./markdown.js";
7
+ import { parsePdf } from "./pdf.js";
8
+ import { parseText } from "./text.js";
9
+ export function detectKind(absolutePath) {
10
+ const extension = path.extname(absolutePath).toLowerCase();
11
+ const base = path.basename(absolutePath).toLowerCase();
12
+ switch (extension) {
13
+ case ".pdf":
14
+ return { kind: "pdf", extension };
15
+ case ".docx":
16
+ return { kind: "docx", extension };
17
+ case ".md":
18
+ case ".mdx":
19
+ case ".markdown":
20
+ return { kind: "markdown", extension };
21
+ case ".json":
22
+ case ".jsonc":
23
+ return { kind: "json", extension };
24
+ case ".txt":
25
+ case ".log":
26
+ case ".rst":
27
+ return { kind: "text", extension };
28
+ }
29
+ if (isCodeExtension(extension))
30
+ return { kind: "code", extension };
31
+ if (base === "dockerfile" || base.startsWith("dockerfile.")) {
32
+ return { kind: "code", extension: ".dockerfile" };
33
+ }
34
+ if (base === "makefile")
35
+ return { kind: "code", extension: ".makefile" };
36
+ return { kind: "unknown", extension };
37
+ }
38
+ export async function parseFile(absolutePath, kind) {
39
+ switch (kind) {
40
+ case "pdf":
41
+ return parsePdf(absolutePath);
42
+ case "docx":
43
+ return parseDocx(absolutePath);
44
+ case "markdown":
45
+ return parseMarkdown(absolutePath);
46
+ case "json":
47
+ return parseJson(absolutePath);
48
+ case "code":
49
+ return parseCode(absolutePath);
50
+ case "text":
51
+ return parseText(absolutePath);
52
+ case "unknown":
53
+ return parseText(absolutePath);
54
+ }
55
+ }
56
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsers/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAOtC,MAAM,UAAU,UAAU,CAAC,YAAoB;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAC;IAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAC;IAEvD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,MAAM;YACT,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QACpC,KAAK,OAAO;YACV,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QACrC,KAAK,KAAK,CAAC;QACX,KAAK,MAAM,CAAC;QACZ,KAAK,WAAW;YACd,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;QACzC,KAAK,OAAO,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QACrC,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM;YACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACvC,CAAC;IAED,IAAI,eAAe,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACnE,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC5D,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC;IACpD,CAAC;IACD,IAAI,IAAI,KAAK,UAAU;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAEzE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,YAAoB,EACpB,IAAc;IAEd,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,KAAK;YACR,OAAO,QAAQ,CAAC,YAAY,CAAC,CAAC;QAChC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,UAAU;YACb,OAAO,aAAa,CAAC,YAAY,CAAC,CAAC;QACrC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;QACjC,KAAK,SAAS;YACZ,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;IACnC,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parseJson(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=json.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/parsers/json.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAQrE"}
@@ -0,0 +1,12 @@
1
+ import fs from "fs-extra";
2
+ export async function parseJson(absolutePath) {
3
+ const raw = await fs.readFile(absolutePath, "utf8");
4
+ try {
5
+ const parsed = JSON.parse(raw);
6
+ return JSON.stringify(parsed, null, 2);
7
+ }
8
+ catch {
9
+ return raw.trim();
10
+ }
11
+ }
12
+ //# sourceMappingURL=json.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/parsers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/B,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parseMarkdown(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/parsers/markdown.ts"],"names":[],"mappings":"AAMA,wBAAsB,aAAa,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAKzE"}
@@ -0,0 +1,11 @@
1
+ import fs from "fs-extra";
2
+ import { remark } from "remark";
3
+ import remarkGfm from "remark-gfm";
4
+ const processor = remark().use(remarkGfm);
5
+ export async function parseMarkdown(absolutePath) {
6
+ const raw = await fs.readFile(absolutePath, "utf8");
7
+ // Round-trip through remark to normalize whitespace, list markers, etc.
8
+ const file = await processor.process(raw);
9
+ return String(file).replace(/\r\n/g, "\n").trim();
10
+ }
11
+ //# sourceMappingURL=markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/parsers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,SAAS,MAAM,YAAY,CAAC;AAEnC,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,YAAoB;IACtD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,wEAAwE;IACxE,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC1C,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parsePdf(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=pdf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/parsers/pdf.ts"],"names":[],"mappings":"AAIA,wBAAsB,QAAQ,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIpE"}
@@ -0,0 +1,17 @@
1
+ import fs from "fs-extra";
2
+ // pdf-parse is CommonJS; import the default function.
3
+ import pdfParse from "pdf-parse";
4
+ export async function parsePdf(absolutePath) {
5
+ const buffer = await fs.readFile(absolutePath);
6
+ const result = await pdfParse(buffer);
7
+ return normalize(result.text);
8
+ }
9
+ function normalize(text) {
10
+ return text
11
+ .replace(/\r\n/g, "\n")
12
+ .replace(/ /g, " ")
13
+ .replace(/[ \t]+\n/g, "\n")
14
+ .replace(/\n{3,}/g, "\n\n")
15
+ .trim();
16
+ }
17
+ //# sourceMappingURL=pdf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/parsers/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,sDAAsD;AACtD,OAAO,QAAQ,MAAM,WAAW,CAAC;AAEjC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,YAAoB;IACjD,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;IACtC,OAAO,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function parseText(absolutePath: string): Promise<string>;
2
+ //# sourceMappingURL=text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/parsers/text.ts"],"names":[],"mappings":"AAEA,wBAAsB,SAAS,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAGrE"}
@@ -0,0 +1,6 @@
1
+ import fs from "fs-extra";
2
+ export async function parseText(absolutePath) {
3
+ const raw = await fs.readFile(absolutePath, "utf8");
4
+ return raw.replace(/\r\n/g, "\n").trim();
5
+ }
6
+ //# sourceMappingURL=text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/parsers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,YAAoB;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACpD,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC"}
@@ -0,0 +1,15 @@
1
+ export declare const DEFAULT_EXCLUDES: string[];
2
+ export declare const DEFAULT_INCLUDES: string[];
3
+ export interface ScannerOptions {
4
+ include?: string[];
5
+ exclude?: string[];
6
+ followSymlinks?: boolean;
7
+ maxFileSizeBytes?: number;
8
+ }
9
+ export interface ScannedFile {
10
+ absolutePath: string;
11
+ relativePath: string;
12
+ bytes: number;
13
+ }
14
+ export declare function scanDirectory(rootDir: string, opts?: ScannerOptions): Promise<ScannedFile[]>;
15
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scanner/index.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,gBAAgB,UAiB5B,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,CAAC;AAEzC,MAAM,WAAW,cAAc;IAC7B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,WAAW;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAsB,aAAa,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,GAAE,cAAmB,GACxB,OAAO,CAAC,WAAW,EAAE,CAAC,CA8CxB"}
@@ -0,0 +1,66 @@
1
+ import path from "node:path";
2
+ import fg from "fast-glob";
3
+ import fs from "fs-extra";
4
+ export const DEFAULT_EXCLUDES = [
5
+ "**/node_modules/**",
6
+ "**/.git/**",
7
+ "**/dist/**",
8
+ "**/build/**",
9
+ "**/out/**",
10
+ "**/.next/**",
11
+ "**/.nuxt/**",
12
+ "**/.cache/**",
13
+ "**/.turbo/**",
14
+ "**/.vercel/**",
15
+ "**/coverage/**",
16
+ "**/.DS_Store",
17
+ "**/*.lock",
18
+ "**/pnpm-lock.yaml",
19
+ "**/package-lock.json",
20
+ "**/yarn.lock",
21
+ ];
22
+ export const DEFAULT_INCLUDES = ["**/*"];
23
+ export async function scanDirectory(rootDir, opts = {}) {
24
+ const absoluteRoot = path.resolve(rootDir);
25
+ const stat = await fs.stat(absoluteRoot).catch(() => null);
26
+ if (!stat)
27
+ throw new Error(`Input path does not exist: ${rootDir}`);
28
+ if (stat.isFile()) {
29
+ return [
30
+ {
31
+ absolutePath: absoluteRoot,
32
+ relativePath: path.basename(absoluteRoot),
33
+ bytes: stat.size,
34
+ },
35
+ ];
36
+ }
37
+ const include = opts.include?.length ? opts.include : DEFAULT_INCLUDES;
38
+ const exclude = [...DEFAULT_EXCLUDES, ...(opts.exclude ?? [])];
39
+ const entries = await fg(include, {
40
+ cwd: absoluteRoot,
41
+ ignore: exclude,
42
+ dot: false,
43
+ onlyFiles: true,
44
+ followSymbolicLinks: opts.followSymlinks ?? false,
45
+ absolute: true,
46
+ stats: true,
47
+ suppressErrors: true,
48
+ });
49
+ const maxBytes = opts.maxFileSizeBytes ?? 10 * 1024 * 1024;
50
+ const files = [];
51
+ for (const entry of entries) {
52
+ const size = entry.stats?.size ?? 0;
53
+ if (size === 0)
54
+ continue;
55
+ if (size > maxBytes)
56
+ continue;
57
+ files.push({
58
+ absolutePath: entry.path,
59
+ relativePath: path.relative(absoluteRoot, entry.path),
60
+ bytes: size,
61
+ });
62
+ }
63
+ files.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
64
+ return files;
65
+ }
66
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scanner/index.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,WAAW,CAAC;AAC3B,OAAO,EAAE,MAAM,UAAU,CAAC;AAE1B,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,oBAAoB;IACpB,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,WAAW;IACX,aAAa;IACb,aAAa;IACb,cAAc;IACd,cAAc;IACd,eAAe;IACf,gBAAgB;IAChB,cAAc;IACd,WAAW;IACX,mBAAmB;IACnB,sBAAsB;IACtB,cAAc;CACf,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,MAAM,CAAC,CAAC;AAezC,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,OAAe,EACf,OAAuB,EAAE;IAEzB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC;IAEpE,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClB,OAAO;YACL;gBACE,YAAY,EAAE,YAAY;gBAC1B,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACzC,KAAK,EAAE,IAAI,CAAC,IAAI;aACjB;SACF,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACvE,MAAM,OAAO,GAAG,CAAC,GAAG,gBAAgB,EAAE,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;IAE/D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,EAAE;QAChC,GAAG,EAAE,YAAY;QACjB,MAAM,EAAE,OAAO;QACf,GAAG,EAAE,KAAK;QACV,SAAS,EAAE,IAAI;QACf,mBAAmB,EAAE,IAAI,CAAC,cAAc,IAAI,KAAK;QACjD,QAAQ,EAAE,IAAI;QACd,KAAK,EAAE,IAAI;QACX,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,IAAI,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3D,MAAM,KAAK,GAAkB,EAAE,CAAC;IAEhC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,CAAC;QACpC,IAAI,IAAI,KAAK,CAAC;YAAE,SAAS;QACzB,IAAI,IAAI,GAAG,QAAQ;YAAE,SAAS;QAE9B,KAAK,CAAC,IAAI,CAAC;YACT,YAAY,EAAE,KAAK,CAAC,IAAI;YACxB,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC;YACrD,KAAK,EAAE,IAAI;SACZ,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC;IACnE,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,50 @@
1
+ export type FileKind = "pdf" | "docx" | "markdown" | "text" | "json" | "code" | "unknown";
2
+ export interface ScanOptions {
3
+ input: string;
4
+ output: string;
5
+ include?: string[];
6
+ exclude?: string[];
7
+ maxFileSizeBytes?: number;
8
+ followSymlinks?: boolean;
9
+ }
10
+ export interface ParsedFile {
11
+ absolutePath: string;
12
+ relativePath: string;
13
+ kind: FileKind;
14
+ extension: string;
15
+ language?: string;
16
+ bytes: number;
17
+ content: string;
18
+ estimatedTokens: number;
19
+ parseError?: string;
20
+ }
21
+ export interface BuildOptions extends ScanOptions {
22
+ chunkSize?: number;
23
+ emitXml?: boolean;
24
+ emitMetadata?: boolean;
25
+ emitCombined?: boolean;
26
+ title?: string;
27
+ }
28
+ export interface BuildSummary {
29
+ inputDir: string;
30
+ outputDir: string;
31
+ generatedAt: string;
32
+ fileCount: number;
33
+ totalBytes: number;
34
+ totalTokens: number;
35
+ durationMs: number;
36
+ files: Array<{
37
+ path: string;
38
+ kind: FileKind;
39
+ bytes: number;
40
+ tokens: number;
41
+ parseError?: string;
42
+ }>;
43
+ artifacts: {
44
+ combined?: string;
45
+ metadata?: string;
46
+ xml?: string;
47
+ chunks?: string[];
48
+ };
49
+ }
50
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAChB,KAAK,GACL,MAAM,GACN,UAAU,GACV,MAAM,GACN,MAAM,GACN,MAAM,GACN,SAAS,CAAC;AAEd,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,QAAQ,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAa,SAAQ,WAAW;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,KAAK,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,QAAQ,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC,CAAC;IACH,SAAS,EAAE;QACT,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;KACnB,CAAC;CACH"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,11 @@
1
+ export interface Chunk {
2
+ index: number;
3
+ tokens: number;
4
+ content: string;
5
+ }
6
+ /**
7
+ * Split text into chunks of approximately `targetTokens` tokens,
8
+ * preferring paragraph and line boundaries to avoid mid-sentence cuts.
9
+ */
10
+ export declare function chunkText(text: string, targetTokens?: number): Chunk[];
11
+ //# sourceMappingURL=chunking.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,SAAQ,GAAG,KAAK,EAAE,CA0DrE"}
@@ -0,0 +1,62 @@
1
+ import { estimateTokens } from "./tokens.js";
2
+ /**
3
+ * Split text into chunks of approximately `targetTokens` tokens,
4
+ * preferring paragraph and line boundaries to avoid mid-sentence cuts.
5
+ */
6
+ export function chunkText(text, targetTokens = 6_000) {
7
+ if (!text.trim())
8
+ return [];
9
+ const paragraphs = text.split(/\n{2,}/);
10
+ const chunks = [];
11
+ let buffer = "";
12
+ let bufferTokens = 0;
13
+ const flush = () => {
14
+ if (!buffer.trim())
15
+ return;
16
+ chunks.push({
17
+ index: chunks.length,
18
+ tokens: bufferTokens,
19
+ content: buffer.trim(),
20
+ });
21
+ buffer = "";
22
+ bufferTokens = 0;
23
+ };
24
+ for (const paragraph of paragraphs) {
25
+ const pTokens = estimateTokens(paragraph);
26
+ if (pTokens > targetTokens) {
27
+ flush();
28
+ // Hard-split very large blocks line by line.
29
+ let lineBuf = "";
30
+ let lineTokens = 0;
31
+ for (const line of paragraph.split("\n")) {
32
+ const lt = estimateTokens(line);
33
+ if (lineTokens + lt > targetTokens && lineBuf) {
34
+ chunks.push({
35
+ index: chunks.length,
36
+ tokens: lineTokens,
37
+ content: lineBuf,
38
+ });
39
+ lineBuf = "";
40
+ lineTokens = 0;
41
+ }
42
+ lineBuf += (lineBuf ? "\n" : "") + line;
43
+ lineTokens += lt;
44
+ }
45
+ if (lineBuf) {
46
+ chunks.push({
47
+ index: chunks.length,
48
+ tokens: lineTokens,
49
+ content: lineBuf,
50
+ });
51
+ }
52
+ continue;
53
+ }
54
+ if (bufferTokens + pTokens > targetTokens)
55
+ flush();
56
+ buffer += (buffer ? "\n\n" : "") + paragraph;
57
+ bufferTokens += pTokens;
58
+ }
59
+ flush();
60
+ return chunks;
61
+ }
62
+ //# sourceMappingURL=chunking.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAQ7C;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,YAAY,GAAG,KAAK;IAC1D,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAE5B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,MAAM,KAAK,GAAG,GAAS,EAAE;QACvB,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;YAAE,OAAO;QAC3B,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,MAAM,CAAC,MAAM;YACpB,MAAM,EAAE,YAAY;YACpB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE;SACvB,CAAC,CAAC;QACH,MAAM,GAAG,EAAE,CAAC;QACZ,YAAY,GAAG,CAAC,CAAC;IACnB,CAAC,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAE1C,IAAI,OAAO,GAAG,YAAY,EAAE,CAAC;YAC3B,KAAK,EAAE,CAAC;YACR,6CAA6C;YAC7C,IAAI,OAAO,GAAG,EAAE,CAAC;YACjB,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzC,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;gBAChC,IAAI,UAAU,GAAG,EAAE,GAAG,YAAY,IAAI,OAAO,EAAE,CAAC;oBAC9C,MAAM,CAAC,IAAI,CAAC;wBACV,KAAK,EAAE,MAAM,CAAC,MAAM;wBACpB,MAAM,EAAE,UAAU;wBAClB,OAAO,EAAE,OAAO;qBACjB,CAAC,CAAC;oBACH,OAAO,GAAG,EAAE,CAAC;oBACb,UAAU,GAAG,CAAC,CAAC;gBACjB,CAAC;gBACD,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;gBACxC,UAAU,IAAI,EAAE,CAAC;YACnB,CAAC;YACD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,MAAM,CAAC,MAAM;oBACpB,MAAM,EAAE,UAAU;oBAClB,OAAO,EAAE,OAAO;iBACjB,CAAC,CAAC;YACL,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,YAAY,GAAG,OAAO,GAAG,YAAY;YAAE,KAAK,EAAE,CAAC;QACnD,MAAM,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC;QAC7C,YAAY,IAAI,OAAO,CAAC;IAC1B,CAAC;IAED,KAAK,EAAE,CAAC;IACR,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function detectLanguage(extension: string, filename: string): string;
2
+ export declare function isCodeExtension(extension: string): boolean;
3
+ //# sourceMappingURL=language.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"language.d.ts","sourceRoot":"","sources":["../../src/utils/language.ts"],"names":[],"mappings":"AA0DA,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAM1E;AAID,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAE1D"}