wikimem 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/LICENSE +21 -0
- package/README.md +398 -0
- package/dist/cli/commands/duplicates.d.ts +3 -0
- package/dist/cli/commands/duplicates.d.ts.map +1 -0
- package/dist/cli/commands/duplicates.js +38 -0
- package/dist/cli/commands/duplicates.js.map +1 -0
- package/dist/cli/commands/improve.d.ts +3 -0
- package/dist/cli/commands/improve.d.ts.map +1 -0
- package/dist/cli/commands/improve.js +69 -0
- package/dist/cli/commands/improve.js.map +1 -0
- package/dist/cli/commands/ingest.d.ts +3 -0
- package/dist/cli/commands/ingest.d.ts.map +1 -0
- package/dist/cli/commands/ingest.js +181 -0
- package/dist/cli/commands/ingest.js.map +1 -0
- package/dist/cli/commands/init.d.ts +3 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +91 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/lint.d.ts +3 -0
- package/dist/cli/commands/lint.d.ts.map +1 -0
- package/dist/cli/commands/lint.js +49 -0
- package/dist/cli/commands/lint.js.map +1 -0
- package/dist/cli/commands/query.d.ts +3 -0
- package/dist/cli/commands/query.d.ts.map +1 -0
- package/dist/cli/commands/query.js +51 -0
- package/dist/cli/commands/query.js.map +1 -0
- package/dist/cli/commands/scrape.d.ts +3 -0
- package/dist/cli/commands/scrape.d.ts.map +1 -0
- package/dist/cli/commands/scrape.js +47 -0
- package/dist/cli/commands/scrape.js.map +1 -0
- package/dist/cli/commands/serve.d.ts +3 -0
- package/dist/cli/commands/serve.d.ts.map +1 -0
- package/dist/cli/commands/serve.js +24 -0
- package/dist/cli/commands/serve.js.map +1 -0
- package/dist/cli/commands/status.d.ts +3 -0
- package/dist/cli/commands/status.d.ts.map +1 -0
- package/dist/cli/commands/status.js +30 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/commands/watch.d.ts +3 -0
- package/dist/cli/commands/watch.d.ts.map +1 -0
- package/dist/cli/commands/watch.js +29 -0
- package/dist/cli/commands/watch.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +30 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/config.d.ts +47 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +11 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/improve.d.ts +19 -0
- package/dist/core/improve.d.ts.map +1 -0
- package/dist/core/improve.js +175 -0
- package/dist/core/improve.js.map +1 -0
- package/dist/core/index-manager.d.ts +9 -0
- package/dist/core/index-manager.d.ts.map +1 -0
- package/dist/core/index-manager.js +30 -0
- package/dist/core/index-manager.js.map +1 -0
- package/dist/core/ingest.d.ts +46 -0
- package/dist/core/ingest.d.ts.map +1 -0
- package/dist/core/ingest.js +366 -0
- package/dist/core/ingest.js.map +1 -0
- package/dist/core/lint.d.ts +19 -0
- package/dist/core/lint.d.ts.map +1 -0
- package/dist/core/lint.js +90 -0
- package/dist/core/lint.js.map +1 -0
- package/dist/core/log-manager.d.ts +2 -0
- package/dist/core/log-manager.d.ts.map +1 -0
- package/dist/core/log-manager.js +14 -0
- package/dist/core/log-manager.js.map +1 -0
- package/dist/core/obsidian.d.ts +89 -0
- package/dist/core/obsidian.d.ts.map +1 -0
- package/dist/core/obsidian.js +123 -0
- package/dist/core/obsidian.js.map +1 -0
- package/dist/core/query.d.ts +16 -0
- package/dist/core/query.d.ts.map +1 -0
- package/dist/core/query.js +77 -0
- package/dist/core/query.js.map +1 -0
- package/dist/core/scrape.d.ts +13 -0
- package/dist/core/scrape.d.ts.map +1 -0
- package/dist/core/scrape.js +103 -0
- package/dist/core/scrape.js.map +1 -0
- package/dist/core/vault.d.ts +35 -0
- package/dist/core/vault.d.ts.map +1 -0
- package/dist/core/vault.js +119 -0
- package/dist/core/vault.js.map +1 -0
- package/dist/core/watcher.d.ts +4 -0
- package/dist/core/watcher.d.ts.map +1 -0
- package/dist/core/watcher.js +34 -0
- package/dist/core/watcher.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/processors/audio.d.ts +10 -0
- package/dist/processors/audio.d.ts.map +1 -0
- package/dist/processors/audio.js +139 -0
- package/dist/processors/audio.js.map +1 -0
- package/dist/processors/docx.d.ts +12 -0
- package/dist/processors/docx.d.ts.map +1 -0
- package/dist/processors/docx.js +98 -0
- package/dist/processors/docx.js.map +1 -0
- package/dist/processors/image.d.ts +9 -0
- package/dist/processors/image.d.ts.map +1 -0
- package/dist/processors/image.js +94 -0
- package/dist/processors/image.js.map +1 -0
- package/dist/processors/pdf.d.ts +10 -0
- package/dist/processors/pdf.d.ts.map +1 -0
- package/dist/processors/pdf.js +92 -0
- package/dist/processors/pdf.js.map +1 -0
- package/dist/processors/pptx.d.ts +13 -0
- package/dist/processors/pptx.d.ts.map +1 -0
- package/dist/processors/pptx.js +165 -0
- package/dist/processors/pptx.js.map +1 -0
- package/dist/processors/text.d.ts +7 -0
- package/dist/processors/text.d.ts.map +1 -0
- package/dist/processors/text.js +9 -0
- package/dist/processors/text.js.map +1 -0
- package/dist/processors/url.d.ts +7 -0
- package/dist/processors/url.d.ts.map +1 -0
- package/dist/processors/url.js +61 -0
- package/dist/processors/url.js.map +1 -0
- package/dist/processors/video.d.ts +10 -0
- package/dist/processors/video.d.ts.map +1 -0
- package/dist/processors/video.js +115 -0
- package/dist/processors/video.js.map +1 -0
- package/dist/processors/xlsx.d.ts +13 -0
- package/dist/processors/xlsx.d.ts.map +1 -0
- package/dist/processors/xlsx.js +138 -0
- package/dist/processors/xlsx.js.map +1 -0
- package/dist/providers/claude.d.ts +10 -0
- package/dist/providers/claude.d.ts.map +1 -0
- package/dist/providers/claude.js +44 -0
- package/dist/providers/claude.js.map +1 -0
- package/dist/providers/embeddings.d.ts +62 -0
- package/dist/providers/embeddings.d.ts.map +1 -0
- package/dist/providers/embeddings.js +206 -0
- package/dist/providers/embeddings.js.map +1 -0
- package/dist/providers/index.d.ts +7 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +19 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +10 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +48 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +10 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +38 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/types.d.ts +33 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/search/bm25.d.ts +18 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +52 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/index.d.ts +12 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +64 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/semantic.d.ts +30 -0
- package/dist/search/semantic.d.ts.map +1 -0
- package/dist/search/semantic.js +162 -0
- package/dist/search/semantic.js.map +1 -0
- package/dist/templates/agents-md.d.ts +2 -0
- package/dist/templates/agents-md.d.ts.map +1 -0
- package/dist/templates/agents-md.js +85 -0
- package/dist/templates/agents-md.js.map +1 -0
- package/dist/templates/config-yaml.d.ts +2 -0
- package/dist/templates/config-yaml.d.ts.map +1 -0
- package/dist/templates/config-yaml.js +81 -0
- package/dist/templates/config-yaml.js.map +1 -0
- package/dist/web/server.d.ts +2 -0
- package/dist/web/server.d.ts.map +1 -0
- package/dist/web/server.js +170 -0
- package/dist/web/server.js.map +1 -0
- package/package.json +68 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PowerPoint (.pptx) processor.
|
|
3
|
+
* Extracts slide text and speaker notes from raw XML (no external deps).
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync } from 'node:fs';
|
|
6
|
+
import { basename } from 'node:path';
|
|
7
|
+
export async function processPptx(filePath) {
|
|
8
|
+
const title = basename(filePath, '.pptx');
|
|
9
|
+
const slides = extractSlides(filePath);
|
|
10
|
+
const slideCount = slides.length;
|
|
11
|
+
let content;
|
|
12
|
+
if (slides.length > 0) {
|
|
13
|
+
content = slides
|
|
14
|
+
.map((slide) => formatSlide(slide))
|
|
15
|
+
.join('\n\n---\n\n');
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
content = `[PowerPoint — no text content extracted from ${basename(filePath)}]`;
|
|
19
|
+
}
|
|
20
|
+
return {
|
|
21
|
+
title,
|
|
22
|
+
content,
|
|
23
|
+
markdown: buildMarkdown(title, filePath, content, slideCount),
|
|
24
|
+
slideCount,
|
|
25
|
+
sourcePath: filePath,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
function extractSlides(filePath) {
|
|
29
|
+
const buffer = readFileSync(filePath);
|
|
30
|
+
const content = buffer.toString('latin1');
|
|
31
|
+
const slides = [];
|
|
32
|
+
// .pptx is a zip containing XML files.
|
|
33
|
+
// Slide content lives in ppt/slides/slide{N}.xml
|
|
34
|
+
// Speaker notes live in ppt/notesSlides/notesSlide{N}.xml
|
|
35
|
+
// Since we're reading raw bytes, we look for XML patterns directly.
|
|
36
|
+
// Strategy: Split by slide boundaries and extract text from each section
|
|
37
|
+
// The <a:t> elements contain all visible text in Office OpenXML
|
|
38
|
+
const slideChunks = splitBySlides(content);
|
|
39
|
+
for (let i = 0; i < slideChunks.length; i++) {
|
|
40
|
+
const chunk = slideChunks[i];
|
|
41
|
+
if (!chunk)
|
|
42
|
+
continue;
|
|
43
|
+
const texts = extractTextElements(chunk);
|
|
44
|
+
if (texts.length > 0) {
|
|
45
|
+
slides.push({
|
|
46
|
+
slideNumber: i + 1,
|
|
47
|
+
texts,
|
|
48
|
+
notes: [], // Notes extraction below
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// If chunk-based splitting didn't work, try a simpler approach
|
|
53
|
+
if (slides.length === 0) {
|
|
54
|
+
const allTexts = extractTextElements(content);
|
|
55
|
+
if (allTexts.length > 0) {
|
|
56
|
+
// Group texts into pseudo-slides (every ~5 text blocks = 1 slide)
|
|
57
|
+
const chunkSize = 5;
|
|
58
|
+
for (let i = 0; i < allTexts.length; i += chunkSize) {
|
|
59
|
+
const slideTexts = allTexts.slice(i, i + chunkSize);
|
|
60
|
+
slides.push({
|
|
61
|
+
slideNumber: slides.length + 1,
|
|
62
|
+
texts: slideTexts,
|
|
63
|
+
notes: [],
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Extract speaker notes — look for notesSlide patterns
|
|
69
|
+
const noteChunks = splitByNotes(content);
|
|
70
|
+
for (let i = 0; i < noteChunks.length; i++) {
|
|
71
|
+
const chunk = noteChunks[i];
|
|
72
|
+
if (!chunk)
|
|
73
|
+
continue;
|
|
74
|
+
const notes = extractTextElements(chunk);
|
|
75
|
+
// Match notes to slides by index
|
|
76
|
+
const slide = slides[i];
|
|
77
|
+
if (slide && notes.length > 0) {
|
|
78
|
+
slide.notes = notes;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return slides;
|
|
82
|
+
}
|
|
83
|
+
function splitBySlides(content) {
|
|
84
|
+
// Look for slide{N}.xml boundaries in the zip
|
|
85
|
+
const chunks = [];
|
|
86
|
+
const slideMarker = /slide\d+\.xml/g;
|
|
87
|
+
const positions = [];
|
|
88
|
+
let match;
|
|
89
|
+
while ((match = slideMarker.exec(content)) !== null) {
|
|
90
|
+
positions.push(match.index);
|
|
91
|
+
}
|
|
92
|
+
for (let i = 0; i < positions.length; i++) {
|
|
93
|
+
const start = positions[i] ?? 0;
|
|
94
|
+
const end = positions[i + 1] ?? content.length;
|
|
95
|
+
chunks.push(content.substring(start, Math.min(end, start + 50000)));
|
|
96
|
+
}
|
|
97
|
+
return chunks;
|
|
98
|
+
}
|
|
99
|
+
function splitByNotes(content) {
|
|
100
|
+
const chunks = [];
|
|
101
|
+
const noteMarker = /notesSlide\d+\.xml/g;
|
|
102
|
+
const positions = [];
|
|
103
|
+
let match;
|
|
104
|
+
while ((match = noteMarker.exec(content)) !== null) {
|
|
105
|
+
positions.push(match.index);
|
|
106
|
+
}
|
|
107
|
+
for (let i = 0; i < positions.length; i++) {
|
|
108
|
+
const start = positions[i] ?? 0;
|
|
109
|
+
const end = positions[i + 1] ?? content.length;
|
|
110
|
+
chunks.push(content.substring(start, Math.min(end, start + 50000)));
|
|
111
|
+
}
|
|
112
|
+
return chunks;
|
|
113
|
+
}
|
|
114
|
+
function extractTextElements(xml) {
|
|
115
|
+
const texts = [];
|
|
116
|
+
// <a:t> elements contain text in Office OpenXML
|
|
117
|
+
const textRegex = /<a:t>([\s\S]*?)<\/a:t>/g;
|
|
118
|
+
let match;
|
|
119
|
+
while ((match = textRegex.exec(xml)) !== null) {
|
|
120
|
+
const text = match[1]?.trim();
|
|
121
|
+
if (text && text.length > 0) {
|
|
122
|
+
texts.push(decodeXmlEntities(text));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// Also check for <a:fld> (field codes that may contain text)
|
|
126
|
+
const fldRegex = /<a:fld[^>]*>[\s\S]*?<a:t>([\s\S]*?)<\/a:t>[\s\S]*?<\/a:fld>/g;
|
|
127
|
+
while ((match = fldRegex.exec(xml)) !== null) {
|
|
128
|
+
const text = match[1]?.trim();
|
|
129
|
+
if (text && text.length > 0 && !texts.includes(text)) {
|
|
130
|
+
texts.push(decodeXmlEntities(text));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return texts;
|
|
134
|
+
}
|
|
135
|
+
function decodeXmlEntities(text) {
|
|
136
|
+
return text
|
|
137
|
+
.replace(/&/g, '&')
|
|
138
|
+
.replace(/</g, '<')
|
|
139
|
+
.replace(/>/g, '>')
|
|
140
|
+
.replace(/"/g, '"')
|
|
141
|
+
.replace(/'/g, "'")
|
|
142
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
|
|
143
|
+
}
|
|
144
|
+
function formatSlide(slide) {
|
|
145
|
+
let md = `### Slide ${slide.slideNumber}\n\n`;
|
|
146
|
+
md += slide.texts.join('\n\n');
|
|
147
|
+
if (slide.notes.length > 0) {
|
|
148
|
+
md += `\n\n**Speaker Notes:**\n\n> ${slide.notes.join(' ')}`;
|
|
149
|
+
}
|
|
150
|
+
return md;
|
|
151
|
+
}
|
|
152
|
+
function buildMarkdown(title, filePath, content, slideCount) {
|
|
153
|
+
return `# ${title}
|
|
154
|
+
|
|
155
|
+
> **Source:** [${basename(filePath)}](${filePath})
|
|
156
|
+
> **Type:** PowerPoint Presentation (.pptx)
|
|
157
|
+
> **Slides:** ${slideCount}
|
|
158
|
+
> **Processed:** ${new Date().toISOString().split('T')[0]}
|
|
159
|
+
|
|
160
|
+
## Slides
|
|
161
|
+
|
|
162
|
+
${content}
|
|
163
|
+
`;
|
|
164
|
+
}
|
|
165
|
+
//# sourceMappingURL=pptx.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAgBrC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IAEjC,IAAI,OAAe,CAAC;IACpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM;aACb,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;aAClC,IAAI,CAAC,aAAa,CAAC,CAAC;IACzB,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,uCAAuC;IACvC,iDAAiD;IACjD,0DAA0D;IAC1D,oEAAoE;IAEpE,yEAAyE;IACzE,gEAAgE;IAChE,MAAM,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,WAAW,EAAE,CAAC,GAAG,CAAC;gBAClB,KAAK;gBACL,KAAK,EAAE,EAAE,EAAE,yBAAyB;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,kEAAkE;YAClE,MAAM,SAAS,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;oBAC9B,KAAK,EAAE,UAAU;oBACjB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,iCAAiC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,8CAA8C;IAC9C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,gBAAgB,CAAC;IACrC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAW;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,gDAAgD;IAChD,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,8DAA8D,CAAC;IAChF,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrD,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1F,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,IAAI,EAAE,GAAG,aAAa,KAAK,CAAC,WAAW,MAAM,CAAC;IAC9C,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE/B,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,EAAE,IAAI,+BAA+B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/processors/text.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,CAM3D"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { basename, extname } from 'node:path';
|
|
3
|
+
export function processText(filePath) {
|
|
4
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
5
|
+
const title = basename(filePath, extname(filePath));
|
|
6
|
+
const wordCount = content.split(/\s+/).filter(Boolean).length;
|
|
7
|
+
return { title, content, wordCount };
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=text.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/processors/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQ9C,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAE9D,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/processors/url.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CASnE"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
export async function processUrl(url) {
|
|
2
|
+
// Try Firecrawl first if API key is available
|
|
3
|
+
const firecrawlKey = process.env['FIRECRAWL_API_KEY'];
|
|
4
|
+
if (firecrawlKey) {
|
|
5
|
+
return await processWithFirecrawl(url, firecrawlKey);
|
|
6
|
+
}
|
|
7
|
+
// Fallback: basic fetch + HTML strip
|
|
8
|
+
return await processWithFetch(url);
|
|
9
|
+
}
|
|
10
|
+
async function processWithFirecrawl(url, apiKey) {
|
|
11
|
+
const response = await fetch('https://api.firecrawl.dev/v1/scrape', {
|
|
12
|
+
method: 'POST',
|
|
13
|
+
headers: {
|
|
14
|
+
'Content-Type': 'application/json',
|
|
15
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
16
|
+
},
|
|
17
|
+
body: JSON.stringify({
|
|
18
|
+
url,
|
|
19
|
+
formats: ['markdown'],
|
|
20
|
+
}),
|
|
21
|
+
});
|
|
22
|
+
if (!response.ok) {
|
|
23
|
+
throw new Error(`Firecrawl API error: ${response.status}`);
|
|
24
|
+
}
|
|
25
|
+
const data = (await response.json());
|
|
26
|
+
return {
|
|
27
|
+
title: data.data.metadata.title ?? new URL(url).hostname,
|
|
28
|
+
content: data.data.markdown,
|
|
29
|
+
url,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
async function processWithFetch(url) {
|
|
33
|
+
const response = await fetch(url);
|
|
34
|
+
if (!response.ok) {
|
|
35
|
+
throw new Error(`Failed to fetch ${url}: ${response.status}`);
|
|
36
|
+
}
|
|
37
|
+
const html = await response.text();
|
|
38
|
+
// Extract title
|
|
39
|
+
const titleMatch = html.match(/<title>(.*?)<\/title>/i);
|
|
40
|
+
const title = titleMatch?.[1] ?? new URL(url).hostname;
|
|
41
|
+
// Strip HTML to get text content
|
|
42
|
+
const content = stripHtml(html);
|
|
43
|
+
return {
|
|
44
|
+
title,
|
|
45
|
+
content: `# ${title}\n\nSource: ${url}\n\n${content.substring(0, 15000)}`,
|
|
46
|
+
url,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function stripHtml(html) {
|
|
50
|
+
// Remove script and style elements
|
|
51
|
+
let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
52
|
+
text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
53
|
+
// Remove HTML tags
|
|
54
|
+
text = text.replace(/<[^>]+>/g, ' ');
|
|
55
|
+
// Decode common entities
|
|
56
|
+
text = text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/ /g, ' ').replace(/"/g, '"');
|
|
57
|
+
// Normalize whitespace
|
|
58
|
+
text = text.replace(/\s+/g, ' ').trim();
|
|
59
|
+
return text;
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=url.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/processors/url.ts"],"names":[],"mappings":"AAMA,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,8CAA8C;IAC9C,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACtD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,MAAM,oBAAoB,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IACvD,CAAC;IAED,qCAAqC;IACrC,OAAO,MAAM,gBAAgB,CAAC,GAAG,CAAC,CAAC;AACrC,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW,EAAE,MAAc;IAC7D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,qCAAqC,EAAE;QAClE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,eAAe,EAAE,UAAU,MAAM,EAAE;SACpC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,GAAG;YACH,OAAO,EAAE,CAAC,UAAU,CAAC;SACtB,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ;QACxD,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;QAC3B,GAAG;KACJ,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW;IACzC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,KAAK,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAEnC,gBAAgB;IAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAEvD,iCAAiC;IACjC,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhC,OAAO;QACL,KAAK;QACL,OAAO,EAAE,KAAK,KAAK,eAAe,GAAG,OAAO,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE;QACzE,GAAG;KACJ,CAAC;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,mCAAmC;IACnC,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;IACjE,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC,CAAC;IAC3D,mBAAmB;IACnB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACrC,yBAAyB;IACzB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IAC/H,uBAAuB;IACvB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACxC,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface VideoResult {
|
|
2
|
+
title: string;
|
|
3
|
+
transcript: string;
|
|
4
|
+
markdown: string;
|
|
5
|
+
duration?: string;
|
|
6
|
+
sourcePath: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function isVideoFile(filePath: string): boolean;
|
|
9
|
+
export declare function processVideo(filePath: string): Promise<VideoResult>;
|
|
10
|
+
//# sourceMappingURL=video.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"video.d.ts","sourceRoot":"","sources":["../../src/processors/video.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAmEzE"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { spawnSync } from 'node:child_process';
|
|
2
|
+
import { basename, extname, join } from 'node:path';
|
|
3
|
+
import { existsSync, unlinkSync } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { processAudio } from './audio.js';
|
|
6
|
+
const SUPPORTED_EXTENSIONS = new Set(['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']);
|
|
7
|
+
export function isVideoFile(filePath) {
|
|
8
|
+
return SUPPORTED_EXTENSIONS.has(extname(filePath).toLowerCase());
|
|
9
|
+
}
|
|
10
|
+
export async function processVideo(filePath) {
|
|
11
|
+
const ext = extname(filePath).toLowerCase();
|
|
12
|
+
const title = basename(filePath, ext);
|
|
13
|
+
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
14
|
+
throw new Error(`Unsupported video format: ${ext}. Supported: ${[...SUPPORTED_EXTENSIONS].join(', ')}`);
|
|
15
|
+
}
|
|
16
|
+
// Check for ffmpeg
|
|
17
|
+
if (!isFfmpegAvailable()) {
|
|
18
|
+
return {
|
|
19
|
+
title,
|
|
20
|
+
transcript: '',
|
|
21
|
+
markdown: buildMarkdown(title, filePath, '[Video file — install ffmpeg for audio extraction and transcription]'),
|
|
22
|
+
sourcePath: filePath,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
// Step 1: Extract audio track via ffmpeg
|
|
26
|
+
const audioPath = join(tmpdir(), `llmwiki-video-${Date.now()}.wav`);
|
|
27
|
+
const extractResult = spawnSync('ffmpeg', [
|
|
28
|
+
'-i', filePath,
|
|
29
|
+
'-vn', // no video
|
|
30
|
+
'-acodec', 'pcm_s16le', // WAV format
|
|
31
|
+
'-ar', '16000', // 16kHz (optimal for Whisper)
|
|
32
|
+
'-ac', '1', // mono
|
|
33
|
+
'-y', // overwrite
|
|
34
|
+
audioPath,
|
|
35
|
+
], { encoding: 'utf-8', timeout: 120000 });
|
|
36
|
+
if (extractResult.status !== 0) {
|
|
37
|
+
return {
|
|
38
|
+
title,
|
|
39
|
+
transcript: '',
|
|
40
|
+
markdown: buildMarkdown(title, filePath, `[Video file — ffmpeg audio extraction failed: ${extractResult.stderr?.substring(0, 200)}]`),
|
|
41
|
+
sourcePath: filePath,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// Step 2: Transcribe the extracted audio
|
|
45
|
+
try {
|
|
46
|
+
const audioResult = await processAudio(audioPath);
|
|
47
|
+
// Get video duration
|
|
48
|
+
const duration = getDuration(filePath);
|
|
49
|
+
// Clean up temp audio file
|
|
50
|
+
if (existsSync(audioPath))
|
|
51
|
+
unlinkSync(audioPath);
|
|
52
|
+
return {
|
|
53
|
+
title,
|
|
54
|
+
transcript: audioResult.transcript,
|
|
55
|
+
markdown: buildMarkdown(title, filePath, audioResult.transcript, duration),
|
|
56
|
+
duration,
|
|
57
|
+
sourcePath: filePath,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
// Clean up on failure
|
|
62
|
+
if (existsSync(audioPath))
|
|
63
|
+
unlinkSync(audioPath);
|
|
64
|
+
return {
|
|
65
|
+
title,
|
|
66
|
+
transcript: '',
|
|
67
|
+
markdown: buildMarkdown(title, filePath, `[Video file — transcription failed: ${error instanceof Error ? error.message : String(error)}]`),
|
|
68
|
+
sourcePath: filePath,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
function buildMarkdown(title, filePath, transcript, duration) {
|
|
73
|
+
return `# ${title}
|
|
74
|
+
|
|
75
|
+
> **Source:** [${basename(filePath)}](${filePath})
|
|
76
|
+
> **Type:** Video${duration ? `\n> **Duration:** ${duration}` : ''}
|
|
77
|
+
> **Processed:** ${new Date().toISOString().split('T')[0]}
|
|
78
|
+
|
|
79
|
+
## Transcript
|
|
80
|
+
|
|
81
|
+
${transcript || '_No transcript available._'}
|
|
82
|
+
`;
|
|
83
|
+
}
|
|
84
|
+
function isFfmpegAvailable() {
|
|
85
|
+
try {
|
|
86
|
+
const result = spawnSync('ffmpeg', ['-version'], { encoding: 'utf-8', timeout: 5000 });
|
|
87
|
+
return result.status === 0;
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function getDuration(filePath) {
|
|
94
|
+
try {
|
|
95
|
+
const result = spawnSync('ffprobe', [
|
|
96
|
+
'-v', 'error', '-show_entries', 'format=duration',
|
|
97
|
+
'-of', 'default=noprint_wrappers=1:nokey=1', filePath,
|
|
98
|
+
], { encoding: 'utf-8', timeout: 10000 });
|
|
99
|
+
const seconds = parseFloat(result.stdout.trim());
|
|
100
|
+
if (isNaN(seconds))
|
|
101
|
+
return undefined;
|
|
102
|
+
const h = Math.floor(seconds / 3600);
|
|
103
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
104
|
+
const s = Math.floor(seconds % 60);
|
|
105
|
+
if (h > 0)
|
|
106
|
+
return `${h}h ${m}m ${s}s`;
|
|
107
|
+
if (m > 0)
|
|
108
|
+
return `${m}m ${s}s`;
|
|
109
|
+
return `${s}s`;
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=video.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"video.js","sourceRoot":"","sources":["../../src/processors/video.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAU1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAExF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,mBAAmB;IACnB,IAAI,CAAC,iBAAiB,EAAE,EAAE,CAAC;QACzB,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,sEAAsE,CAAC;YAChH,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACpE,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,EAAE;QACxC,IAAI,EAAE,QAAQ;QACd,KAAK,EAAqB,WAAW;QACrC,SAAS,EAAE,WAAW,EAAG,aAAa;QACtC,KAAK,EAAE,OAAO,EAAY,8BAA8B;QACxD,KAAK,EAAE,GAAG,EAAgB,OAAO;QACjC,IAAI,EAAsB,YAAY;QACtC,SAAS;KACV,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAE3C,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,iDAAiD,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;YACrI,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,yCAAyC;IACzC,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;QAElD,qBAAqB;QACrB,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QAEvC,2BAA2B;QAC3B,IAAI,UAAU,CAAC,SAAS,CAAC;YAAE,UAAU,CAAC,SAAS,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK;YACL,UAAU,EAAE,WAAW,CAAC,UAAU;YAClC,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC,UAAU,EAAE,QAAQ,CAAC;YAC1E,QAAQ;YACR,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,sBAAsB;QACtB,IAAI,UAAU,CAAC,SAAS,CAAC;YAAE,UAAU,CAAC,SAAS,CAAC,CAAC;QAEjD,OAAO;YACL,KAAK;YACL,UAAU,EAAE,EAAE;YACd,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,uCAAuC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC;YAC1I,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,UAAkB,EAAE,QAAiB;IAC3F,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;mBAC7B,QAAQ,CAAC,CAAC,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC/C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,UAAU,IAAI,4BAA4B;CAC3C,CAAC;AACF,CAAC;AAED,SAAS,iBAAiB;IACxB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE;YAClC,IAAI,EAAE,OAAO,EAAE,eAAe,EAAE,iBAAiB;YACjD,KAAK,EAAE,oCAAoC,EAAE,QAAQ;SACtD,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QACjD,IAAI,KAAK,CAAC,OAAO,CAAC;YAAE,OAAO,SAAS,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;QAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;QACnC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC;QACtC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC;QAChC,OAAO,GAAG,CAAC,GAAG,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Excel/spreadsheet (.xlsx, .xls) processor.
|
|
3
|
+
* Uses xlsx (SheetJS) for extraction, with a raw XML fallback.
|
|
4
|
+
*/
|
|
5
|
+
export interface XlsxResult {
|
|
6
|
+
title: string;
|
|
7
|
+
content: string;
|
|
8
|
+
markdown: string;
|
|
9
|
+
sheetCount: number;
|
|
10
|
+
sourcePath: string;
|
|
11
|
+
}
|
|
12
|
+
export declare function processXlsx(filePath: string): Promise<XlsxResult>;
|
|
13
|
+
//# sourceMappingURL=xlsx.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xlsx.d.ts","sourceRoot":"","sources":["../../src/processors/xlsx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CA6BvE"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Excel/spreadsheet (.xlsx, .xls) processor.
|
|
3
|
+
* Uses xlsx (SheetJS) for extraction, with a raw XML fallback.
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync } from 'node:fs';
|
|
6
|
+
import { basename, extname } from 'node:path';
|
|
7
|
+
export async function processXlsx(filePath) {
|
|
8
|
+
const ext = extname(filePath).toLowerCase();
|
|
9
|
+
const title = basename(filePath, ext);
|
|
10
|
+
// Try SheetJS first (best quality), fall back to raw XML
|
|
11
|
+
let content;
|
|
12
|
+
let sheetCount = 0;
|
|
13
|
+
try {
|
|
14
|
+
const result = await extractWithSheetJS(filePath);
|
|
15
|
+
content = result.content;
|
|
16
|
+
sheetCount = result.sheetCount;
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
const result = extractFromRawXml(filePath);
|
|
20
|
+
content = result.content;
|
|
21
|
+
sheetCount = result.sheetCount;
|
|
22
|
+
}
|
|
23
|
+
if (!content.trim()) {
|
|
24
|
+
content = `[Spreadsheet — no data extracted from ${basename(filePath)}]`;
|
|
25
|
+
}
|
|
26
|
+
return {
|
|
27
|
+
title,
|
|
28
|
+
content,
|
|
29
|
+
markdown: buildMarkdown(title, filePath, content, sheetCount),
|
|
30
|
+
sheetCount,
|
|
31
|
+
sourcePath: filePath,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
async function extractWithSheetJS(filePath) {
|
|
35
|
+
// Dynamic import — xlsx is an optional dependency
|
|
36
|
+
const XLSX = await import('xlsx');
|
|
37
|
+
const buffer = readFileSync(filePath);
|
|
38
|
+
const workbook = XLSX.read(buffer, { type: 'buffer' });
|
|
39
|
+
const sections = [];
|
|
40
|
+
for (const sheetName of workbook.SheetNames) {
|
|
41
|
+
const sheet = workbook.Sheets[sheetName];
|
|
42
|
+
if (!sheet)
|
|
43
|
+
continue;
|
|
44
|
+
// Convert sheet to array of arrays
|
|
45
|
+
const data = XLSX.utils.sheet_to_json(sheet, { header: 1 });
|
|
46
|
+
if (data.length === 0)
|
|
47
|
+
continue;
|
|
48
|
+
const table = arrayToMarkdownTable(data);
|
|
49
|
+
if (table) {
|
|
50
|
+
sections.push(`### ${sheetName}\n\n${table}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
content: sections.join('\n\n---\n\n'),
|
|
55
|
+
sheetCount: workbook.SheetNames.length,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
function extractFromRawXml(filePath) {
|
|
59
|
+
// .xlsx is a zip file — try to find sharedStrings.xml for text content
|
|
60
|
+
const buffer = readFileSync(filePath);
|
|
61
|
+
const content = buffer.toString('latin1');
|
|
62
|
+
const textParts = [];
|
|
63
|
+
// Look for <t> elements (shared strings in xlsx XML)
|
|
64
|
+
const textRegex = /<t[^>]*>([\s\S]*?)<\/t>/g;
|
|
65
|
+
let match;
|
|
66
|
+
while ((match = textRegex.exec(content)) !== null) {
|
|
67
|
+
if (match[1] && match[1].trim()) {
|
|
68
|
+
textParts.push(match[1].trim());
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Also look for <v> elements (cell values)
|
|
72
|
+
const valueRegex = /<v>([\s\S]*?)<\/v>/g;
|
|
73
|
+
while ((match = valueRegex.exec(content)) !== null) {
|
|
74
|
+
if (match[1] && match[1].trim()) {
|
|
75
|
+
textParts.push(match[1].trim());
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Count sheets
|
|
79
|
+
const sheetMatches = content.match(/<sheet /g);
|
|
80
|
+
const sheetCount = sheetMatches ? sheetMatches.length : 1;
|
|
81
|
+
if (textParts.length === 0) {
|
|
82
|
+
return { content: '', sheetCount };
|
|
83
|
+
}
|
|
84
|
+
// Present as a simple list since we can't reconstruct table structure
|
|
85
|
+
const uniqueParts = [...new Set(textParts)].slice(0, 500);
|
|
86
|
+
return {
|
|
87
|
+
content: `**Extracted cell values:**\n\n${uniqueParts.join(' | ')}`,
|
|
88
|
+
sheetCount,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
function arrayToMarkdownTable(data) {
|
|
92
|
+
if (data.length === 0)
|
|
93
|
+
return '';
|
|
94
|
+
// Filter out completely empty rows
|
|
95
|
+
const rows = data.filter((row) => Array.isArray(row) && row.some((cell) => cell !== null && cell !== undefined && String(cell).trim() !== ''));
|
|
96
|
+
if (rows.length === 0)
|
|
97
|
+
return '';
|
|
98
|
+
// Determine max columns
|
|
99
|
+
const maxCols = Math.max(...rows.map((row) => (Array.isArray(row) ? row.length : 0)));
|
|
100
|
+
if (maxCols === 0)
|
|
101
|
+
return '';
|
|
102
|
+
// Build markdown table
|
|
103
|
+
const lines = [];
|
|
104
|
+
for (let i = 0; i < Math.min(rows.length, 100); i++) {
|
|
105
|
+
const row = rows[i];
|
|
106
|
+
if (!Array.isArray(row))
|
|
107
|
+
continue;
|
|
108
|
+
const cells = [];
|
|
109
|
+
for (let j = 0; j < maxCols; j++) {
|
|
110
|
+
const cell = row[j];
|
|
111
|
+
const cellStr = cell !== null && cell !== undefined ? String(cell).replace(/\|/g, '\\|').replace(/\n/g, ' ') : '';
|
|
112
|
+
cells.push(cellStr);
|
|
113
|
+
}
|
|
114
|
+
lines.push(`| ${cells.join(' | ')} |`);
|
|
115
|
+
// Add header separator after first row
|
|
116
|
+
if (i === 0) {
|
|
117
|
+
lines.push(`| ${cells.map(() => '---').join(' | ')} |`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (rows.length > 100) {
|
|
121
|
+
lines.push(`\n> _...and ${rows.length - 100} more rows (truncated)_`);
|
|
122
|
+
}
|
|
123
|
+
return lines.join('\n');
|
|
124
|
+
}
|
|
125
|
+
function buildMarkdown(title, filePath, content, sheetCount) {
|
|
126
|
+
return `# ${title}
|
|
127
|
+
|
|
128
|
+
> **Source:** [${basename(filePath)}](${filePath})
|
|
129
|
+
> **Type:** Spreadsheet (${extname(filePath)})
|
|
130
|
+
> **Sheets:** ${sheetCount}
|
|
131
|
+
> **Processed:** ${new Date().toISOString().split('T')[0]}
|
|
132
|
+
|
|
133
|
+
## Data
|
|
134
|
+
|
|
135
|
+
${content}
|
|
136
|
+
`;
|
|
137
|
+
}
|
|
138
|
+
//# sourceMappingURL=xlsx.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xlsx.js","sourceRoot":"","sources":["../../src/processors/xlsx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAU9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,yDAAyD;IACzD,IAAI,OAAe,CAAC;IACpB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAClD,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QACzB,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC3C,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QACzB,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACpB,OAAO,GAAG,yCAAyC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAC3E,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IAChD,kDAAkD;IAClD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEvD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,mCAAmC;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAW,KAAK,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;QACtE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEhC,MAAM,KAAK,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;QACzC,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,OAAO,SAAS,OAAO,KAAK,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC;QACrC,UAAU,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;KACvC,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB;IACzC,uEAAuE;IACvE,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,qDAAqD;IACrD,MAAM,SAAS,GAAG,0BAA0B,CAAC;IAC7C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,2CAA2C;IAC3C,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;YAChC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,eAAe;IACf,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC;IACrC,CAAC;IAED,sEAAsE;IACtE,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,OAAO;QACL,OAAO,EAAE,iCAAiC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;QACnE,UAAU;KACX,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAiB;IAC7C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,mCAAmC;IACnC,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAC/B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAC5G,CAAC;IAEF,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,wBAAwB;IACxB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtF,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7B,uBAAuB;IACvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACpD,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;YAAE,SAAS;QAElC,MAAM,KAAK,GAAG,EAAE,CAAC;QACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,OAAO,GAAG,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClH,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,uCAAuC;QACvC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,MAAM,GAAG,GAAG,yBAAyB,CAAC,CAAC;IACxE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;2BACrB,OAAO,CAAC,QAAQ,CAAC;gBAC5B,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { LLMProvider, LLMMessage, LLMResponse, LLMOptions } from './types.js';
|
|
2
|
+
export declare class ClaudeProvider implements LLMProvider {
|
|
3
|
+
name: string;
|
|
4
|
+
private client;
|
|
5
|
+
private defaultModel;
|
|
6
|
+
constructor(model?: string, apiKey?: string);
|
|
7
|
+
chat(messages: LLMMessage[], options?: LLMOptions): Promise<LLMResponse>;
|
|
8
|
+
isAvailable(): Promise<boolean>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=claude.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claude.d.ts","sourceRoot":"","sources":["../../src/providers/claude.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAEnF,qBAAa,cAAe,YAAW,WAAW;IAChD,IAAI,SAAY;IAChB,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,YAAY,CAAS;gBAEjB,KAAK,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAOrC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAiCxE,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;CAGtC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
+
export class ClaudeProvider {
|
|
3
|
+
name = 'claude';
|
|
4
|
+
client;
|
|
5
|
+
defaultModel;
|
|
6
|
+
constructor(model, apiKey) {
|
|
7
|
+
this.client = new Anthropic({
|
|
8
|
+
apiKey: apiKey ?? process.env['ANTHROPIC_API_KEY'],
|
|
9
|
+
});
|
|
10
|
+
this.defaultModel = model ?? 'claude-sonnet-4-20250514';
|
|
11
|
+
}
|
|
12
|
+
async chat(messages, options) {
|
|
13
|
+
const systemMessages = messages.filter((m) => m.role === 'system');
|
|
14
|
+
const nonSystemMessages = messages.filter((m) => m.role !== 'system');
|
|
15
|
+
const systemPrompt = options?.systemPrompt
|
|
16
|
+
?? systemMessages.map((m) => m.content).join('\n\n')
|
|
17
|
+
?? undefined;
|
|
18
|
+
const response = await this.client.messages.create({
|
|
19
|
+
model: options?.model ?? this.defaultModel,
|
|
20
|
+
max_tokens: options?.maxTokens ?? 4096,
|
|
21
|
+
...(systemPrompt ? { system: systemPrompt } : {}),
|
|
22
|
+
messages: nonSystemMessages.map((m) => ({
|
|
23
|
+
role: m.role,
|
|
24
|
+
content: m.content,
|
|
25
|
+
})),
|
|
26
|
+
});
|
|
27
|
+
const content = response.content
|
|
28
|
+
.filter((block) => block.type === 'text')
|
|
29
|
+
.map((block) => block.text)
|
|
30
|
+
.join('');
|
|
31
|
+
return {
|
|
32
|
+
content,
|
|
33
|
+
model: response.model,
|
|
34
|
+
tokensUsed: {
|
|
35
|
+
input: response.usage.input_tokens,
|
|
36
|
+
output: response.usage.output_tokens,
|
|
37
|
+
},
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
async isAvailable() {
|
|
41
|
+
return !!process.env['ANTHROPIC_API_KEY'];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=claude.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claude.js","sourceRoot":"","sources":["../../src/providers/claude.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAG1C,MAAM,OAAO,cAAc;IACzB,IAAI,GAAG,QAAQ,CAAC;IACR,MAAM,CAAY;IAClB,YAAY,CAAS;IAE7B,YAAY,KAAc,EAAE,MAAe;QACzC,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC;YAC1B,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;SACnD,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,KAAK,IAAI,0BAA0B,CAAC;IAC1D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,QAAsB,EAAE,OAAoB;QACrD,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACnE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAEtE,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY;eACrC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;eACjD,SAAS,CAAC;QAEf,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YACjD,KAAK,EAAE,OAAO,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY;YAC1C,UAAU,EAAE,OAAO,EAAE,SAAS,IAAI,IAAI;YACtC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjD,QAAQ,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtC,IAAI,EAAE,CAAC,CAAC,IAA4B;gBACpC,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;SACJ,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO;aAC7B,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;aACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;aAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,OAAO;YACL,OAAO;YACP,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,UAAU,EAAE;gBACV,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,YAAY;gBAClC,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,aAAa;aACrC;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAC5C,CAAC;CACF"}
|