wikimem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/LICENSE +21 -0
  3. package/README.md +398 -0
  4. package/dist/cli/commands/duplicates.d.ts +3 -0
  5. package/dist/cli/commands/duplicates.d.ts.map +1 -0
  6. package/dist/cli/commands/duplicates.js +38 -0
  7. package/dist/cli/commands/duplicates.js.map +1 -0
  8. package/dist/cli/commands/improve.d.ts +3 -0
  9. package/dist/cli/commands/improve.d.ts.map +1 -0
  10. package/dist/cli/commands/improve.js +69 -0
  11. package/dist/cli/commands/improve.js.map +1 -0
  12. package/dist/cli/commands/ingest.d.ts +3 -0
  13. package/dist/cli/commands/ingest.d.ts.map +1 -0
  14. package/dist/cli/commands/ingest.js +181 -0
  15. package/dist/cli/commands/ingest.js.map +1 -0
  16. package/dist/cli/commands/init.d.ts +3 -0
  17. package/dist/cli/commands/init.d.ts.map +1 -0
  18. package/dist/cli/commands/init.js +91 -0
  19. package/dist/cli/commands/init.js.map +1 -0
  20. package/dist/cli/commands/lint.d.ts +3 -0
  21. package/dist/cli/commands/lint.d.ts.map +1 -0
  22. package/dist/cli/commands/lint.js +49 -0
  23. package/dist/cli/commands/lint.js.map +1 -0
  24. package/dist/cli/commands/query.d.ts +3 -0
  25. package/dist/cli/commands/query.d.ts.map +1 -0
  26. package/dist/cli/commands/query.js +51 -0
  27. package/dist/cli/commands/query.js.map +1 -0
  28. package/dist/cli/commands/scrape.d.ts +3 -0
  29. package/dist/cli/commands/scrape.d.ts.map +1 -0
  30. package/dist/cli/commands/scrape.js +47 -0
  31. package/dist/cli/commands/scrape.js.map +1 -0
  32. package/dist/cli/commands/serve.d.ts +3 -0
  33. package/dist/cli/commands/serve.d.ts.map +1 -0
  34. package/dist/cli/commands/serve.js +24 -0
  35. package/dist/cli/commands/serve.js.map +1 -0
  36. package/dist/cli/commands/status.d.ts +3 -0
  37. package/dist/cli/commands/status.d.ts.map +1 -0
  38. package/dist/cli/commands/status.js +30 -0
  39. package/dist/cli/commands/status.js.map +1 -0
  40. package/dist/cli/commands/watch.d.ts +3 -0
  41. package/dist/cli/commands/watch.d.ts.map +1 -0
  42. package/dist/cli/commands/watch.js +29 -0
  43. package/dist/cli/commands/watch.js.map +1 -0
  44. package/dist/cli/index.d.ts +3 -0
  45. package/dist/cli/index.d.ts.map +1 -0
  46. package/dist/cli/index.js +30 -0
  47. package/dist/cli/index.js.map +1 -0
  48. package/dist/core/config.d.ts +47 -0
  49. package/dist/core/config.d.ts.map +1 -0
  50. package/dist/core/config.js +11 -0
  51. package/dist/core/config.js.map +1 -0
  52. package/dist/core/improve.d.ts +19 -0
  53. package/dist/core/improve.d.ts.map +1 -0
  54. package/dist/core/improve.js +175 -0
  55. package/dist/core/improve.js.map +1 -0
  56. package/dist/core/index-manager.d.ts +9 -0
  57. package/dist/core/index-manager.d.ts.map +1 -0
  58. package/dist/core/index-manager.js +30 -0
  59. package/dist/core/index-manager.js.map +1 -0
  60. package/dist/core/ingest.d.ts +46 -0
  61. package/dist/core/ingest.d.ts.map +1 -0
  62. package/dist/core/ingest.js +366 -0
  63. package/dist/core/ingest.js.map +1 -0
  64. package/dist/core/lint.d.ts +19 -0
  65. package/dist/core/lint.d.ts.map +1 -0
  66. package/dist/core/lint.js +90 -0
  67. package/dist/core/lint.js.map +1 -0
  68. package/dist/core/log-manager.d.ts +2 -0
  69. package/dist/core/log-manager.d.ts.map +1 -0
  70. package/dist/core/log-manager.js +14 -0
  71. package/dist/core/log-manager.js.map +1 -0
  72. package/dist/core/obsidian.d.ts +89 -0
  73. package/dist/core/obsidian.d.ts.map +1 -0
  74. package/dist/core/obsidian.js +123 -0
  75. package/dist/core/obsidian.js.map +1 -0
  76. package/dist/core/query.d.ts +16 -0
  77. package/dist/core/query.d.ts.map +1 -0
  78. package/dist/core/query.js +77 -0
  79. package/dist/core/query.js.map +1 -0
  80. package/dist/core/scrape.d.ts +13 -0
  81. package/dist/core/scrape.d.ts.map +1 -0
  82. package/dist/core/scrape.js +103 -0
  83. package/dist/core/scrape.js.map +1 -0
  84. package/dist/core/vault.d.ts +35 -0
  85. package/dist/core/vault.d.ts.map +1 -0
  86. package/dist/core/vault.js +119 -0
  87. package/dist/core/vault.js.map +1 -0
  88. package/dist/core/watcher.d.ts +4 -0
  89. package/dist/core/watcher.d.ts.map +1 -0
  90. package/dist/core/watcher.js +34 -0
  91. package/dist/core/watcher.js.map +1 -0
  92. package/dist/index.d.ts +3 -0
  93. package/dist/index.d.ts.map +1 -0
  94. package/dist/index.js +5 -0
  95. package/dist/index.js.map +1 -0
  96. package/dist/processors/audio.d.ts +10 -0
  97. package/dist/processors/audio.d.ts.map +1 -0
  98. package/dist/processors/audio.js +139 -0
  99. package/dist/processors/audio.js.map +1 -0
  100. package/dist/processors/docx.d.ts +12 -0
  101. package/dist/processors/docx.d.ts.map +1 -0
  102. package/dist/processors/docx.js +98 -0
  103. package/dist/processors/docx.js.map +1 -0
  104. package/dist/processors/image.d.ts +9 -0
  105. package/dist/processors/image.d.ts.map +1 -0
  106. package/dist/processors/image.js +94 -0
  107. package/dist/processors/image.js.map +1 -0
  108. package/dist/processors/pdf.d.ts +10 -0
  109. package/dist/processors/pdf.d.ts.map +1 -0
  110. package/dist/processors/pdf.js +92 -0
  111. package/dist/processors/pdf.js.map +1 -0
  112. package/dist/processors/pptx.d.ts +13 -0
  113. package/dist/processors/pptx.d.ts.map +1 -0
  114. package/dist/processors/pptx.js +165 -0
  115. package/dist/processors/pptx.js.map +1 -0
  116. package/dist/processors/text.d.ts +7 -0
  117. package/dist/processors/text.d.ts.map +1 -0
  118. package/dist/processors/text.js +9 -0
  119. package/dist/processors/text.js.map +1 -0
  120. package/dist/processors/url.d.ts +7 -0
  121. package/dist/processors/url.d.ts.map +1 -0
  122. package/dist/processors/url.js +61 -0
  123. package/dist/processors/url.js.map +1 -0
  124. package/dist/processors/video.d.ts +10 -0
  125. package/dist/processors/video.d.ts.map +1 -0
  126. package/dist/processors/video.js +115 -0
  127. package/dist/processors/video.js.map +1 -0
  128. package/dist/processors/xlsx.d.ts +13 -0
  129. package/dist/processors/xlsx.d.ts.map +1 -0
  130. package/dist/processors/xlsx.js +138 -0
  131. package/dist/processors/xlsx.js.map +1 -0
  132. package/dist/providers/claude.d.ts +10 -0
  133. package/dist/providers/claude.d.ts.map +1 -0
  134. package/dist/providers/claude.js +44 -0
  135. package/dist/providers/claude.js.map +1 -0
  136. package/dist/providers/embeddings.d.ts +62 -0
  137. package/dist/providers/embeddings.d.ts.map +1 -0
  138. package/dist/providers/embeddings.js +206 -0
  139. package/dist/providers/embeddings.js.map +1 -0
  140. package/dist/providers/index.d.ts +7 -0
  141. package/dist/providers/index.d.ts.map +1 -0
  142. package/dist/providers/index.js +19 -0
  143. package/dist/providers/index.js.map +1 -0
  144. package/dist/providers/ollama.d.ts +10 -0
  145. package/dist/providers/ollama.d.ts.map +1 -0
  146. package/dist/providers/ollama.js +48 -0
  147. package/dist/providers/ollama.js.map +1 -0
  148. package/dist/providers/openai.d.ts +10 -0
  149. package/dist/providers/openai.d.ts.map +1 -0
  150. package/dist/providers/openai.js +38 -0
  151. package/dist/providers/openai.js.map +1 -0
  152. package/dist/providers/types.d.ts +33 -0
  153. package/dist/providers/types.d.ts.map +1 -0
  154. package/dist/providers/types.js +2 -0
  155. package/dist/providers/types.js.map +1 -0
  156. package/dist/search/bm25.d.ts +18 -0
  157. package/dist/search/bm25.d.ts.map +1 -0
  158. package/dist/search/bm25.js +52 -0
  159. package/dist/search/bm25.js.map +1 -0
  160. package/dist/search/index.d.ts +12 -0
  161. package/dist/search/index.d.ts.map +1 -0
  162. package/dist/search/index.js +64 -0
  163. package/dist/search/index.js.map +1 -0
  164. package/dist/search/semantic.d.ts +30 -0
  165. package/dist/search/semantic.d.ts.map +1 -0
  166. package/dist/search/semantic.js +162 -0
  167. package/dist/search/semantic.js.map +1 -0
  168. package/dist/templates/agents-md.d.ts +2 -0
  169. package/dist/templates/agents-md.d.ts.map +1 -0
  170. package/dist/templates/agents-md.js +85 -0
  171. package/dist/templates/agents-md.js.map +1 -0
  172. package/dist/templates/config-yaml.d.ts +2 -0
  173. package/dist/templates/config-yaml.d.ts.map +1 -0
  174. package/dist/templates/config-yaml.js +81 -0
  175. package/dist/templates/config-yaml.js.map +1 -0
  176. package/dist/web/server.d.ts +2 -0
  177. package/dist/web/server.d.ts.map +1 -0
  178. package/dist/web/server.js +170 -0
  179. package/dist/web/server.js.map +1 -0
  180. package/package.json +68 -0
@@ -0,0 +1,165 @@
1
+ /**
2
+ * PowerPoint (.pptx) processor.
3
+ * Extracts slide text and speaker notes from raw XML (no external deps).
4
+ */
5
+ import { readFileSync } from 'node:fs';
6
+ import { basename } from 'node:path';
7
+ export async function processPptx(filePath) {
8
+ const title = basename(filePath, '.pptx');
9
+ const slides = extractSlides(filePath);
10
+ const slideCount = slides.length;
11
+ let content;
12
+ if (slides.length > 0) {
13
+ content = slides
14
+ .map((slide) => formatSlide(slide))
15
+ .join('\n\n---\n\n');
16
+ }
17
+ else {
18
+ content = `[PowerPoint — no text content extracted from ${basename(filePath)}]`;
19
+ }
20
+ return {
21
+ title,
22
+ content,
23
+ markdown: buildMarkdown(title, filePath, content, slideCount),
24
+ slideCount,
25
+ sourcePath: filePath,
26
+ };
27
+ }
28
+ function extractSlides(filePath) {
29
+ const buffer = readFileSync(filePath);
30
+ const content = buffer.toString('latin1');
31
+ const slides = [];
32
+ // .pptx is a zip containing XML files.
33
+ // Slide content lives in ppt/slides/slide{N}.xml
34
+ // Speaker notes live in ppt/notesSlides/notesSlide{N}.xml
35
+ // Since we're reading raw bytes, we look for XML patterns directly.
36
+ // Strategy: Split by slide boundaries and extract text from each section
37
+ // The <a:t> elements contain all visible text in Office OpenXML
38
+ const slideChunks = splitBySlides(content);
39
+ for (let i = 0; i < slideChunks.length; i++) {
40
+ const chunk = slideChunks[i];
41
+ if (!chunk)
42
+ continue;
43
+ const texts = extractTextElements(chunk);
44
+ if (texts.length > 0) {
45
+ slides.push({
46
+ slideNumber: i + 1,
47
+ texts,
48
+ notes: [], // Notes extraction below
49
+ });
50
+ }
51
+ }
52
+ // If chunk-based splitting didn't work, try a simpler approach
53
+ if (slides.length === 0) {
54
+ const allTexts = extractTextElements(content);
55
+ if (allTexts.length > 0) {
56
+ // Group texts into pseudo-slides (every ~5 text blocks = 1 slide)
57
+ const chunkSize = 5;
58
+ for (let i = 0; i < allTexts.length; i += chunkSize) {
59
+ const slideTexts = allTexts.slice(i, i + chunkSize);
60
+ slides.push({
61
+ slideNumber: slides.length + 1,
62
+ texts: slideTexts,
63
+ notes: [],
64
+ });
65
+ }
66
+ }
67
+ }
68
+ // Extract speaker notes — look for notesSlide patterns
69
+ const noteChunks = splitByNotes(content);
70
+ for (let i = 0; i < noteChunks.length; i++) {
71
+ const chunk = noteChunks[i];
72
+ if (!chunk)
73
+ continue;
74
+ const notes = extractTextElements(chunk);
75
+ // Match notes to slides by index
76
+ const slide = slides[i];
77
+ if (slide && notes.length > 0) {
78
+ slide.notes = notes;
79
+ }
80
+ }
81
+ return slides;
82
+ }
83
+ function splitBySlides(content) {
84
+ // Look for slide{N}.xml boundaries in the zip
85
+ const chunks = [];
86
+ const slideMarker = /slide\d+\.xml/g;
87
+ const positions = [];
88
+ let match;
89
+ while ((match = slideMarker.exec(content)) !== null) {
90
+ positions.push(match.index);
91
+ }
92
+ for (let i = 0; i < positions.length; i++) {
93
+ const start = positions[i] ?? 0;
94
+ const end = positions[i + 1] ?? content.length;
95
+ chunks.push(content.substring(start, Math.min(end, start + 50000)));
96
+ }
97
+ return chunks;
98
+ }
99
+ function splitByNotes(content) {
100
+ const chunks = [];
101
+ const noteMarker = /notesSlide\d+\.xml/g;
102
+ const positions = [];
103
+ let match;
104
+ while ((match = noteMarker.exec(content)) !== null) {
105
+ positions.push(match.index);
106
+ }
107
+ for (let i = 0; i < positions.length; i++) {
108
+ const start = positions[i] ?? 0;
109
+ const end = positions[i + 1] ?? content.length;
110
+ chunks.push(content.substring(start, Math.min(end, start + 50000)));
111
+ }
112
+ return chunks;
113
+ }
114
+ function extractTextElements(xml) {
115
+ const texts = [];
116
+ // <a:t> elements contain text in Office OpenXML
117
+ const textRegex = /<a:t>([\s\S]*?)<\/a:t>/g;
118
+ let match;
119
+ while ((match = textRegex.exec(xml)) !== null) {
120
+ const text = match[1]?.trim();
121
+ if (text && text.length > 0) {
122
+ texts.push(decodeXmlEntities(text));
123
+ }
124
+ }
125
+ // Also check for <a:fld> (field codes that may contain text)
126
+ const fldRegex = /<a:fld[^>]*>[\s\S]*?<a:t>([\s\S]*?)<\/a:t>[\s\S]*?<\/a:fld>/g;
127
+ while ((match = fldRegex.exec(xml)) !== null) {
128
+ const text = match[1]?.trim();
129
+ if (text && text.length > 0 && !texts.includes(text)) {
130
+ texts.push(decodeXmlEntities(text));
131
+ }
132
+ }
133
+ return texts;
134
+ }
135
+ function decodeXmlEntities(text) {
136
+ return text
137
+ .replace(/&amp;/g, '&')
138
+ .replace(/&lt;/g, '<')
139
+ .replace(/&gt;/g, '>')
140
+ .replace(/&quot;/g, '"')
141
+ .replace(/&apos;/g, "'")
142
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
143
+ }
144
+ function formatSlide(slide) {
145
+ let md = `### Slide ${slide.slideNumber}\n\n`;
146
+ md += slide.texts.join('\n\n');
147
+ if (slide.notes.length > 0) {
148
+ md += `\n\n**Speaker Notes:**\n\n> ${slide.notes.join(' ')}`;
149
+ }
150
+ return md;
151
+ }
152
+ function buildMarkdown(title, filePath, content, slideCount) {
153
+ return `# ${title}
154
+
155
+ > **Source:** [${basename(filePath)}](${filePath})
156
+ > **Type:** PowerPoint Presentation (.pptx)
157
+ > **Slides:** ${slideCount}
158
+ > **Processed:** ${new Date().toISOString().split('T')[0]}
159
+
160
+ ## Slides
161
+
162
+ ${content}
163
+ `;
164
+ }
165
+ //# sourceMappingURL=pptx.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAgBrC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IAEjC,IAAI,OAAe,CAAC;IACpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM;aACb,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;aAClC,IAAI,CAAC,aAAa,CAAC,CAAC;IACzB,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,uCAAuC;IACvC,iDAAiD;IACjD,0DAA0D;IAC1D,oEAAoE;IAEpE,yEAAyE;IACzE,gEAAgE;IAChE,MAAM,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,WAAW,EAAE,CAAC,GAAG,CAAC;gBAClB,KAAK;gBACL,KAAK,EAAE,EAAE,EAAE,yBAAyB;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,kEAAkE;YAClE,MAAM,SAAS,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;oBAC9B,KAAK,EAAE,UAAU;oBACjB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,iCAAiC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,8CAA8C;IAC9C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,gBAAgB,CAAC;IACrC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAW;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,gDAAgD;IAChD,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,8DAA8D,CAAC;IAChF,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrD,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1F,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,IAAI,EAAE,GAAG,aAAa,KAAK,CAAC,WAAW,MAAM,CAAC;IAC9C,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE/B,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,EAAE,IAAI,+BAA+B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -0,0 +1,7 @@
1
+ export interface ProcessedText {
2
+ title: string;
3
+ content: string;
4
+ wordCount: number;
5
+ }
6
+ export declare function processText(filePath: string): ProcessedText;
7
+ //# sourceMappingURL=text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/processors/text.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAM3D"}
@@ -0,0 +1,9 @@
1
+ import { readFileSync } from 'node:fs';
2
+ import { basename, extname } from 'node:path';
3
+ export function processText(filePath) {
4
+ const content = readFileSync(filePath, 'utf-8');
5
+ const title = basename(filePath, extname(filePath));
6
+ const wordCount = content.split(/\s+/).filter(Boolean).length;
7
+ return { title, content, wordCount };
8
+ }
9
+ //# sourceMappingURL=text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/processors/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQ9C,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAE9D,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AACvC,CAAC"}
@@ -0,0 +1,7 @@
1
+ export interface ProcessedUrl {
2
+ title: string;
3
+ content: string;
4
+ url: string;
5
+ }
6
+ export declare function processUrl(url: string): Promise<ProcessedUrl>;
7
+ //# sourceMappingURL=url.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/processors/url.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CASnE"}
@@ -0,0 +1,61 @@
1
+ export async function processUrl(url) {
2
+ // Try Firecrawl first if API key is available
3
+ const firecrawlKey = process.env['FIRECRAWL_API_KEY'];
4
+ if (firecrawlKey) {
5
+ return await processWithFirecrawl(url, firecrawlKey);
6
+ }
7
+ // Fallback: basic fetch + HTML strip
8
+ return await processWithFetch(url);
9
+ }
10
+ async function processWithFirecrawl(url, apiKey) {
11
+ const response = await fetch('https://api.firecrawl.dev/v1/scrape', {
12
+ method: 'POST',
13
+ headers: {
14
+ 'Content-Type': 'application/json',
15
+ 'Authorization': `Bearer ${apiKey}`,
16
+ },
17
+ body: JSON.stringify({
18
+ url,
19
+ formats: ['markdown'],
20
+ }),
21
+ });
22
+ if (!response.ok) {
23
+ throw new Error(`Firecrawl API error: ${response.status}`);
24
+ }
25
+ const data = (await response.json());
26
+ return {
27
+ title: data.data.metadata.title ?? new URL(url).hostname,
28
+ content: data.data.markdown,
29
+ url,
30
+ };
31
+ }
32
+ async function processWithFetch(url) {
33
+ const response = await fetch(url);
34
+ if (!response.ok) {
35
+ throw new Error(`Failed to fetch ${url}: ${response.status}`);
36
+ }
37
+ const html = await response.text();
38
+ // Extract title
39
+ const titleMatch = html.match(/<title>(.*?)<\/title>/i);
40
+ const title = titleMatch?.[1] ?? new URL(url).hostname;
41
+ // Strip HTML to get text content
42
+ const content = stripHtml(html);
43
+ return {
44
+ title,
45
+ content: `# ${title}\n\nSource: ${url}\n\n${content.substring(0, 15000)}`,
46
+ url,
47
+ };
48
+ }
49
+ function stripHtml(html) {
50
+ // Remove script and style elements
51
+ let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
52
+ text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
53
+ // Remove HTML tags
54
+ text = text.replace(/<[^>]+>/g, ' ');
55
+ // Decode common entities
56
+ text = text.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&nbsp;/g, ' ').replace(/&quot;/g, '"');
57
+ // Normalize whitespace
58
+ text = text.replace(/\s+/g, ' ').trim();
59
+ return text;
60
+ }
61
+ //# sourceMappingURL=url.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/processors/url.ts"],"names":[],"mappings":"AAMA,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,8CAA8C;IAC9C,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,MAAM,oBAAoB,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IACvD,CAAC;IAED,qCAAqC;IACrC,OAAO,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;AACrC,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW,EAAE,MAAc;IAC7D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,qCAAqC,EAAE;QAClE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,eAAe,EAAE,UAAU,MAAM,EAAE;SACpC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,GAAG;YACH,OAAO,EAAE,CAAC,UAAU,CAAC;SACtB,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ;QACxD,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;QAC3B,GAAG;KACJ,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,KAAK,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAEnC,gBAAgB;IAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAEvD,iCAAiC;IACjC,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhC,OAAO;QACL,KAAK;QACL,OAAO,EAAE,KAAK,KAAK,eAAe,GAAG,OAAO,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE;QACzE,GAAG;KACJ,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,mCAAmC;IACnC,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;IACjE,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC,CAAC;IAC3D,mBAAmB;IACnB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACrC,yBAAyB;IACzB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAC/H,uBAAuB;IACvB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACxC,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,10 @@
1
+ export interface VideoResult {
2
+ title: string;
3
+ transcript: string;
4
+ markdown: string;
5
+ duration?: string;
6
+ sourcePath: string;
7
+ }
8
+ export declare function isVideoFile(filePath: string): boolean;
9
+ export declare function processVideo(filePath: string): Promise<VideoResult>;
10
+ //# sourceMappingURL=video.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"video.d.ts","sourceRoot":"","sources":["../../src/processors/video.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAmEzE"}
@@ -0,0 +1,115 @@
1
+ import { spawnSync } from 'node:child_process';
2
+ import { basename, extname, join } from 'node:path';
3
+ import { existsSync, unlinkSync } from 'node:fs';
4
+ import { tmpdir } from 'node:os';
5
+ import { processAudio } from './audio.js';
6
+ const SUPPORTED_EXTENSIONS = new Set(['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']);
7
+ export function isVideoFile(filePath) {
8
+ return SUPPORTED_EXTENSIONS.has(extname(filePath).toLowerCase());
9
+ }
10
+ export async function processVideo(filePath) {
11
+ const ext = extname(filePath).toLowerCase();
12
+ const title = basename(filePath, ext);
13
+ if (!SUPPORTED_EXTENSIONS.has(ext)) {
14
+ throw new Error(`Unsupported video format: ${ext}. Supported: ${[...SUPPORTED_EXTENSIONS].join(', ')}`);
15
+ }
16
+ // Check for ffmpeg
17
+ if (!isFfmpegAvailable()) {
18
+ return {
19
+ title,
20
+ transcript: '',
21
+ markdown: buildMarkdown(title, filePath, '[Video file — install ffmpeg for audio extraction and transcription]'),
22
+ sourcePath: filePath,
23
+ };
24
+ }
25
+ // Step 1: Extract audio track via ffmpeg
26
+ const audioPath = join(tmpdir(), `llmwiki-video-${Date.now()}.wav`);
27
+ const extractResult = spawnSync('ffmpeg', [
28
+ '-i', filePath,
29
+ '-vn', // no video
30
+ '-acodec', 'pcm_s16le', // WAV format
31
+ '-ar', '16000', // 16kHz (optimal for Whisper)
32
+ '-ac', '1', // mono
33
+ '-y', // overwrite
34
+ audioPath,
35
+ ], { encoding: 'utf-8', timeout: 120000 });
36
+ if (extractResult.status !== 0) {
37
+ return {
38
+ title,
39
+ transcript: '',
40
+ markdown: buildMarkdown(title, filePath, `[Video file — ffmpeg audio extraction failed: ${extractResult.stderr?.substring(0, 200)}]`),
41
+ sourcePath: filePath,
42
+ };
43
+ }
44
+ // Step 2: Transcribe the extracted audio
45
+ try {
46
+ const audioResult = await processAudio(audioPath);
47
+ // Get video duration
48
+ const duration = getDuration(filePath);
49
+ // Clean up temp audio file
50
+ if (existsSync(audioPath))
51
+ unlinkSync(audioPath);
52
+ return {
53
+ title,
54
+ transcript: audioResult.transcript,
55
+ markdown: buildMarkdown(title, filePath, audioResult.transcript, duration),
56
+ duration,
57
+ sourcePath: filePath,
58
+ };
59
+ }
60
+ catch (error) {
61
+ // Clean up on failure
62
+ if (existsSync(audioPath))
63
+ unlinkSync(audioPath);
64
+ return {
65
+ title,
66
+ transcript: '',
67
+ markdown: buildMarkdown(title, filePath, `[Video file — transcription failed: ${error instanceof Error ? error.message : String(error)}]`),
68
+ sourcePath: filePath,
69
+ };
70
+ }
71
+ }
72
+ function buildMarkdown(title, filePath, transcript, duration) {
73
+ return `# ${title}
74
+
75
+ > **Source:** [${basename(filePath)}](${filePath})
76
+ > **Type:** Video${duration ? `\n> **Duration:** ${duration}` : ''}
77
+ > **Processed:** ${new Date().toISOString().split('T')[0]}
78
+
79
+ ## Transcript
80
+
81
+ ${transcript || '_No transcript available._'}
82
+ `;
83
+ }
84
+ function isFfmpegAvailable() {
85
+ try {
86
+ const result = spawnSync('ffmpeg', ['-version'], { encoding: 'utf-8', timeout: 5000 });
87
+ return result.status === 0;
88
+ }
89
+ catch {
90
+ return false;
91
+ }
92
+ }
93
+ function getDuration(filePath) {
94
+ try {
95
+ const result = spawnSync('ffprobe', [
96
+ '-v', 'error', '-show_entries', 'format=duration',
97
+ '-of', 'default=noprint_wrappers=1:nokey=1', filePath,
98
+ ], { encoding: 'utf-8', timeout: 10000 });
99
+ const seconds = parseFloat(result.stdout.trim());
100
+ if (isNaN(seconds))
101
+ return undefined;
102
+ const h = Math.floor(seconds / 3600);
103
+ const m = Math.floor((seconds % 3600) / 60);
104
+ const s = Math.floor(seconds % 60);
105
+ if (h > 0)
106
+ return `${h}h ${m}m ${s}s`;
107
+ if (m > 0)
108
+ return `${m}m ${s}s`;
109
+ return `${s}s`;
110
+ }
111
+ catch {
112
+ return undefined;
113
+ }
114
+ }
115
+ //# sourceMappingURL=video.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"video.js","sourceRoot":"","sources":["../../src/processors/video.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAU1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAExF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,mBAAmB;IACnB,IAAI,CAAC,iBAAiB,EAAE,EAAE,CAAC;QACzB,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,sEAAsE,CAAC;YAChH,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACpE,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,EAAE;QACxC,IAAI,EAAE,QAAQ;QACd,KAAK,EAAqB,WAAW;QACrC,SAAS,EAAE,WAAW,EAAG,aAAa;QACtC,KAAK,EAAE,OAAO,EAAY,8BAA8B;QACxD,KAAK,EAAE,GAAG,EAAgB,OAAO;QACjC,IAAI,EAAsB,YAAY;QACtC,SAAS;KACV,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAE3C,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,iDAAiD,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;YACrI,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,yCAAyC;IACzC,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;QAElD,qBAAqB;QACrB,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QAEvC,2BAA2B;QAC3B,IAAI,UAAU,CAAC,SAAS,CAAC;YAAE,UAAU,CAAC,SAAS,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK;YACL,UAAU,EAAE,WAAW,CAAC,UAAU;YAClC,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC,UAAU,EAAE,QAAQ,CAAC;YAC1E,QAAQ;YACR,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB;QACtB,IAAI,UAAU,CAAC,SAAS,CAAC;YAAE,UAAU,CAAC,SAAS,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,uCAAuC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC;YAC1I,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,UAAkB,EAAE,QAAiB;IAC3F,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;mBAC7B,QAAQ,CAAC,CAAC,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC/C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,UAAU,IAAI,4BAA4B;CAC3C,CAAC;AACF,CAAC;AAED,SAAS,iBAAiB;IACxB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE;YAClC,IAAI,EAAE,OAAO,EAAE,eAAe,EAAE,iBAAiB;YACjD,KAAK,EAAE,oCAAoC,EAAE,QAAQ;SACtD,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,IAAI,KAAK,CAAC,OAAO,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;QAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;QACnC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC;QACtC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC;QAChC,OAAO,GAAG,CAAC,GAAG,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Excel/spreadsheet (.xlsx, .xls) processor.
3
+ * Uses xlsx (SheetJS) for extraction, with a raw XML fallback.
4
+ */
5
+ export interface XlsxResult {
6
+ title: string;
7
+ content: string;
8
+ markdown: string;
9
+ sheetCount: number;
10
+ sourcePath: string;
11
+ }
12
+ export declare function processXlsx(filePath: string): Promise<XlsxResult>;
13
+ //# sourceMappingURL=xlsx.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"xlsx.d.ts","sourceRoot":"","sources":["../../src/processors/xlsx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CA6BvE"}
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Excel/spreadsheet (.xlsx, .xls) processor.
3
+ * Uses xlsx (SheetJS) for extraction, with a raw XML fallback.
4
+ */
5
+ import { readFileSync } from 'node:fs';
6
+ import { basename, extname } from 'node:path';
7
+ export async function processXlsx(filePath) {
8
+ const ext = extname(filePath).toLowerCase();
9
+ const title = basename(filePath, ext);
10
+ // Try SheetJS first (best quality), fall back to raw XML
11
+ let content;
12
+ let sheetCount = 0;
13
+ try {
14
+ const result = await extractWithSheetJS(filePath);
15
+ content = result.content;
16
+ sheetCount = result.sheetCount;
17
+ }
18
+ catch {
19
+ const result = extractFromRawXml(filePath);
20
+ content = result.content;
21
+ sheetCount = result.sheetCount;
22
+ }
23
+ if (!content.trim()) {
24
+ content = `[Spreadsheet — no data extracted from ${basename(filePath)}]`;
25
+ }
26
+ return {
27
+ title,
28
+ content,
29
+ markdown: buildMarkdown(title, filePath, content, sheetCount),
30
+ sheetCount,
31
+ sourcePath: filePath,
32
+ };
33
+ }
34
+ async function extractWithSheetJS(filePath) {
35
+ // Dynamic import — xlsx is an optional dependency
36
+ const XLSX = await import('xlsx');
37
+ const buffer = readFileSync(filePath);
38
+ const workbook = XLSX.read(buffer, { type: 'buffer' });
39
+ const sections = [];
40
+ for (const sheetName of workbook.SheetNames) {
41
+ const sheet = workbook.Sheets[sheetName];
42
+ if (!sheet)
43
+ continue;
44
+ // Convert sheet to array of arrays
45
+ const data = XLSX.utils.sheet_to_json(sheet, { header: 1 });
46
+ if (data.length === 0)
47
+ continue;
48
+ const table = arrayToMarkdownTable(data);
49
+ if (table) {
50
+ sections.push(`### ${sheetName}\n\n${table}`);
51
+ }
52
+ }
53
+ return {
54
+ content: sections.join('\n\n---\n\n'),
55
+ sheetCount: workbook.SheetNames.length,
56
+ };
57
+ }
58
+ function extractFromRawXml(filePath) {
59
+ // .xlsx is a zip file — try to find sharedStrings.xml for text content
60
+ const buffer = readFileSync(filePath);
61
+ const content = buffer.toString('latin1');
62
+ const textParts = [];
63
+ // Look for <t> elements (shared strings in xlsx XML)
64
+ const textRegex = /<t[^>]*>([\s\S]*?)<\/t>/g;
65
+ let match;
66
+ while ((match = textRegex.exec(content)) !== null) {
67
+ if (match[1] && match[1].trim()) {
68
+ textParts.push(match[1].trim());
69
+ }
70
+ }
71
+ // Also look for <v> elements (cell values)
72
+ const valueRegex = /<v>([\s\S]*?)<\/v>/g;
73
+ while ((match = valueRegex.exec(content)) !== null) {
74
+ if (match[1] && match[1].trim()) {
75
+ textParts.push(match[1].trim());
76
+ }
77
+ }
78
+ // Count sheets
79
+ const sheetMatches = content.match(/<sheet /g);
80
+ const sheetCount = sheetMatches ? sheetMatches.length : 1;
81
+ if (textParts.length === 0) {
82
+ return { content: '', sheetCount };
83
+ }
84
+ // Present as a simple list since we can't reconstruct table structure
85
+ const uniqueParts = [...new Set(textParts)].slice(0, 500);
86
+ return {
87
+ content: `**Extracted cell values:**\n\n${uniqueParts.join(' | ')}`,
88
+ sheetCount,
89
+ };
90
+ }
91
+ function arrayToMarkdownTable(data) {
92
+ if (data.length === 0)
93
+ return '';
94
+ // Filter out completely empty rows
95
+ const rows = data.filter((row) => Array.isArray(row) && row.some((cell) => cell !== null && cell !== undefined && String(cell).trim() !== ''));
96
+ if (rows.length === 0)
97
+ return '';
98
+ // Determine max columns
99
+ const maxCols = Math.max(...rows.map((row) => (Array.isArray(row) ? row.length : 0)));
100
+ if (maxCols === 0)
101
+ return '';
102
+ // Build markdown table
103
+ const lines = [];
104
+ for (let i = 0; i < Math.min(rows.length, 100); i++) {
105
+ const row = rows[i];
106
+ if (!Array.isArray(row))
107
+ continue;
108
+ const cells = [];
109
+ for (let j = 0; j < maxCols; j++) {
110
+ const cell = row[j];
111
+ const cellStr = cell !== null && cell !== undefined ? String(cell).replace(/\|/g, '\\|').replace(/\n/g, ' ') : '';
112
+ cells.push(cellStr);
113
+ }
114
+ lines.push(`| ${cells.join(' | ')} |`);
115
+ // Add header separator after first row
116
+ if (i === 0) {
117
+ lines.push(`| ${cells.map(() => '---').join(' | ')} |`);
118
+ }
119
+ }
120
+ if (rows.length > 100) {
121
+ lines.push(`\n> _...and ${rows.length - 100} more rows (truncated)_`);
122
+ }
123
+ return lines.join('\n');
124
+ }
125
+ function buildMarkdown(title, filePath, content, sheetCount) {
126
+ return `# ${title}
127
+
128
+ > **Source:** [${basename(filePath)}](${filePath})
129
+ > **Type:** Spreadsheet (${extname(filePath)})
130
+ > **Sheets:** ${sheetCount}
131
+ > **Processed:** ${new Date().toISOString().split('T')[0]}
132
+
133
+ ## Data
134
+
135
+ ${content}
136
+ `;
137
+ }
138
+ //# sourceMappingURL=xlsx.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"xlsx.js","sourceRoot":"","sources":["../../src/processors/xlsx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAU9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,yDAAyD;IACzD,IAAI,OAAe,CAAC;IACpB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAClD,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QACzB,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC3C,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QACzB,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACpB,OAAO,GAAG,yCAAyC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAC3E,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IAChD,kDAAkD;IAClD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEvD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,mCAAmC;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAW,KAAK,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;QACtE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEhC,MAAM,KAAK,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;QACzC,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,OAAO,SAAS,OAAO,KAAK,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC;QACrC,UAAU,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;KACvC,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB;IACzC,uEAAuE;IACvE,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,qDAAqD;IACrD,MAAM,SAAS,GAAG,0BAA0B,CAAC;IAC7C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,2CAA2C;IAC3C,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,eAAe;IACf,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;IACrC,CAAC;IAED,sEAAsE;IACtE,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,OAAO;QACL,OAAO,EAAE,iCAAiC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;QACnE,UAAU;KACX,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAiB;IAC7C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,mCAAmC;IACnC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAC/B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAC5G,CAAC;IAEF,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,wBAAwB;IACxB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtF,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7B,uBAAuB;IACvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACpD,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;YAAE,SAAS;QAElC,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,OAAO,GAAG,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClH,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,uCAAuC;QACvC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,GAAG,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;2BACrB,OAAO,CAAC,QAAQ,CAAC;gBAC5B,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { LLMProvider, LLMMessage, LLMResponse, LLMOptions } from './types.js';
2
+ export declare class ClaudeProvider implements LLMProvider {
3
+ name: string;
4
+ private client;
5
+ private defaultModel;
6
+ constructor(model?: string, apiKey?: string);
7
+ chat(messages: LLMMessage[], options?: LLMOptions): Promise<LLMResponse>;
8
+ isAvailable(): Promise<boolean>;
9
+ }
10
+ //# sourceMappingURL=claude.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../src/providers/claude.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAEnF,qBAAa,cAAe,YAAW,WAAW;IAChD,IAAI,SAAY;IAChB,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,YAAY,CAAS;gBAEjB,KAAK,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAOrC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAiCxE,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;CAGtC"}
@@ -0,0 +1,44 @@
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+ export class ClaudeProvider {
3
+ name = 'claude';
4
+ client;
5
+ defaultModel;
6
+ constructor(model, apiKey) {
7
+ this.client = new Anthropic({
8
+ apiKey: apiKey ?? process.env['ANTHROPIC_API_KEY'],
9
+ });
10
+ this.defaultModel = model ?? 'claude-sonnet-4-20250514';
11
+ }
12
+ async chat(messages, options) {
13
+ const systemMessages = messages.filter((m) => m.role === 'system');
14
+ const nonSystemMessages = messages.filter((m) => m.role !== 'system');
15
+ const systemPrompt = options?.systemPrompt
16
+ ?? systemMessages.map((m) => m.content).join('\n\n')
17
+ ?? undefined;
18
+ const response = await this.client.messages.create({
19
+ model: options?.model ?? this.defaultModel,
20
+ max_tokens: options?.maxTokens ?? 4096,
21
+ ...(systemPrompt ? { system: systemPrompt } : {}),
22
+ messages: nonSystemMessages.map((m) => ({
23
+ role: m.role,
24
+ content: m.content,
25
+ })),
26
+ });
27
+ const content = response.content
28
+ .filter((block) => block.type === 'text')
29
+ .map((block) => block.text)
30
+ .join('');
31
+ return {
32
+ content,
33
+ model: response.model,
34
+ tokensUsed: {
35
+ input: response.usage.input_tokens,
36
+ output: response.usage.output_tokens,
37
+ },
38
+ };
39
+ }
40
+ async isAvailable() {
41
+ return !!process.env['ANTHROPIC_API_KEY'];
42
+ }
43
+ }
44
+ //# sourceMappingURL=claude.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude.js","sourceRoot":"","sources":["../../src/providers/claude.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAG1C,MAAM,OAAO,cAAc;IACzB,IAAI,GAAG,QAAQ,CAAC;IACR,MAAM,CAAY;IAClB,YAAY,CAAS;IAE7B,YAAY,KAAc,EAAE,MAAe;QACzC,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC;YAC1B,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;SACnD,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,KAAK,IAAI,0BAA0B,CAAC;IAC1D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,QAAsB,EAAE,OAAoB;QACrD,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACnE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY;eACrC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;eACjD,SAAS,CAAC;QAEf,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YACjD,KAAK,EAAE,OAAO,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY;YAC1C,UAAU,EAAE,OAAO,EAAE,SAAS,IAAI,IAAI;YACtC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjD,QAAQ,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtC,IAAI,EAAE,CAAC,CAAC,IAA4B;gBACpC,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;SACJ,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO;aAC7B,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;aACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;aAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,OAAO;YACL,OAAO;YACP,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,UAAU,EAAE;gBACV,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;gBAClC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,aAAa;aACrC;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAC5C,CAAC;CACF"}