@tyvm/knowhow 0.0.118 → 0.0.120
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -3
- package/src/agents/base/base.ts +72 -9
- package/src/agents/researcher/researcher.ts +9 -2
- package/src/agents/tools/list.ts +13 -2
- package/src/agents/tools/patch.ts +318 -32
- package/src/agents/tools/readFile.ts +48 -5
- package/src/chat/modules/AgentModule.ts +12 -0
- package/src/cli.ts +2 -0
- package/src/clients/anthropic.ts +12 -2
- package/src/clients/contextLimits.ts +77 -0
- package/src/commands/convert.ts +291 -0
- package/src/conversion.ts +15 -61
- package/src/index.ts +3 -0
- package/src/processors/CustomVariables.ts +45 -20
- package/src/processors/TokenCompressor.ts +95 -9
- package/src/services/AgentSyncFs.ts +26 -4
- package/src/services/AgentSyncKnowhowWeb.ts +26 -4
- package/src/services/SyncedAgentWatcher.ts +8 -0
- package/src/services/conversion/ConversionService.ts +763 -0
- package/src/services/conversion/index.ts +2 -0
- package/src/services/conversion/types.ts +79 -0
- package/src/services/index.ts +8 -1
- package/src/services/modules/types.ts +2 -0
- package/src/services/watchers/FsSyncer.ts +6 -0
- package/src/services/watchers/RemoteSyncer.ts +5 -0
- package/tests/agents/tools/readFile.test.ts +88 -0
- package/tests/clients/AIClient.test.ts +5 -0
- package/tests/clients/contextLimits.test.ts +71 -0
- package/tests/patching/patchFileOutput.test.ts +217 -0
- package/tests/patching/regression-2026.test.ts +278 -0
- package/tests/processors/CustomVariables.test.ts +4 -4
- package/tests/processors/TokenCompressor.test.ts +59 -1
- package/tests/processors/tools/grepToolResponse.test.ts +72 -0
- package/tests/services/ConversionService.test.ts +154 -0
- package/tests/test.spec.ts +1 -1
- package/tests/unit/clients/AIClient.test.ts +8 -0
- package/ts_build/package.json +1 -3
- package/ts_build/src/agents/base/base.d.ts +3 -0
- package/ts_build/src/agents/base/base.js +46 -3
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/researcher/researcher.js +5 -2
- package/ts_build/src/agents/researcher/researcher.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +10 -2
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/patch.js +202 -24
- package/ts_build/src/agents/tools/patch.js.map +1 -1
- package/ts_build/src/agents/tools/readFile.d.ts +1 -1
- package/ts_build/src/agents/tools/readFile.js +17 -4
- package/ts_build/src/agents/tools/readFile.js.map +1 -1
- package/ts_build/src/chat/modules/AgentModule.js +12 -0
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/cli.js +2 -0
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.js +7 -2
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/contextLimits.js +70 -0
- package/ts_build/src/clients/contextLimits.js.map +1 -1
- package/ts_build/src/commands/convert.d.ts +2 -0
- package/ts_build/src/commands/convert.js +275 -0
- package/ts_build/src/commands/convert.js.map +1 -0
- package/ts_build/src/conversion.js +6 -38
- package/ts_build/src/conversion.js.map +1 -1
- package/ts_build/src/index.d.ts +2 -0
- package/ts_build/src/index.js +4 -1
- package/ts_build/src/index.js.map +1 -1
- package/ts_build/src/processors/CustomVariables.js +14 -12
- package/ts_build/src/processors/CustomVariables.js.map +1 -1
- package/ts_build/src/processors/TokenCompressor.d.ts +2 -0
- package/ts_build/src/processors/TokenCompressor.js +57 -7
- package/ts_build/src/processors/TokenCompressor.js.map +1 -1
- package/ts_build/src/services/AgentSyncFs.d.ts +1 -0
- package/ts_build/src/services/AgentSyncFs.js +21 -4
- package/ts_build/src/services/AgentSyncFs.js.map +1 -1
- package/ts_build/src/services/AgentSyncKnowhowWeb.d.ts +1 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.js +21 -4
- package/ts_build/src/services/AgentSyncKnowhowWeb.js.map +1 -1
- package/ts_build/src/services/SyncedAgentWatcher.d.ts +3 -0
- package/ts_build/src/services/SyncedAgentWatcher.js +4 -0
- package/ts_build/src/services/SyncedAgentWatcher.js.map +1 -1
- package/ts_build/src/services/conversion/ConversionService.d.ts +18 -0
- package/ts_build/src/services/conversion/ConversionService.js +585 -0
- package/ts_build/src/services/conversion/ConversionService.js.map +1 -0
- package/ts_build/src/services/conversion/index.d.ts +2 -0
- package/ts_build/src/services/conversion/index.js +19 -0
- package/ts_build/src/services/conversion/index.js.map +1 -0
- package/ts_build/src/services/conversion/types.d.ts +49 -0
- package/ts_build/src/services/conversion/types.js +3 -0
- package/ts_build/src/services/conversion/types.js.map +1 -0
- package/ts_build/src/services/index.d.ts +3 -0
- package/ts_build/src/services/index.js +6 -1
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/services/modules/index.d.ts +2 -0
- package/ts_build/src/services/modules/types.d.ts +2 -0
- package/ts_build/src/services/watchers/FsSyncer.d.ts +1 -0
- package/ts_build/src/services/watchers/FsSyncer.js +5 -0
- package/ts_build/src/services/watchers/FsSyncer.js.map +1 -1
- package/ts_build/src/services/watchers/RemoteSyncer.d.ts +1 -0
- package/ts_build/src/services/watchers/RemoteSyncer.js +4 -0
- package/ts_build/src/services/watchers/RemoteSyncer.js.map +1 -1
- package/ts_build/tests/agents/tools/readFile.test.d.ts +1 -0
- package/ts_build/tests/agents/tools/readFile.test.js +90 -0
- package/ts_build/tests/agents/tools/readFile.test.js.map +1 -0
- package/ts_build/tests/clients/AIClient.test.js +1 -0
- package/ts_build/tests/clients/AIClient.test.js.map +1 -1
- package/ts_build/tests/clients/contextLimits.test.d.ts +1 -0
- package/ts_build/tests/clients/contextLimits.test.js +57 -0
- package/ts_build/tests/clients/contextLimits.test.js.map +1 -0
- package/ts_build/tests/patching/patchFileOutput.test.d.ts +1 -0
- package/ts_build/tests/patching/patchFileOutput.test.js +187 -0
- package/ts_build/tests/patching/patchFileOutput.test.js.map +1 -0
- package/ts_build/tests/patching/regression-2026.test.js +214 -0
- package/ts_build/tests/patching/regression-2026.test.js.map +1 -1
- package/ts_build/tests/processors/CustomVariables.test.js +4 -4
- package/ts_build/tests/processors/CustomVariables.test.js.map +1 -1
- package/ts_build/tests/processors/TokenCompressor.test.js +37 -1
- package/ts_build/tests/processors/TokenCompressor.test.js.map +1 -1
- package/ts_build/tests/processors/tools/grepToolResponse.test.d.ts +1 -0
- package/ts_build/tests/processors/tools/grepToolResponse.test.js +40 -0
- package/ts_build/tests/processors/tools/grepToolResponse.test.js.map +1 -0
- package/ts_build/tests/services/ConversionService.test.d.ts +1 -0
- package/ts_build/tests/services/ConversionService.test.js +154 -0
- package/ts_build/tests/services/ConversionService.test.js.map +1 -0
- package/ts_build/tests/test.spec.js +1 -1
- package/ts_build/tests/test.spec.js.map +1 -1
- package/ts_build/tests/unit/clients/AIClient.test.js +3 -0
- package/ts_build/tests/unit/clients/AIClient.test.js.map +1 -1
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import * as os from "os";
|
|
4
|
+
import * as crypto from "crypto";
|
|
5
|
+
import { execSync } from "child_process";
|
|
6
|
+
import { AIClient } from "../../clients";
|
|
7
|
+
import { MediaProcessorService } from "../MediaProcessorService";
|
|
8
|
+
import {
|
|
9
|
+
Converter,
|
|
10
|
+
ConverterContext,
|
|
11
|
+
ConvertInput,
|
|
12
|
+
ConvertOptions,
|
|
13
|
+
ConvertResult,
|
|
14
|
+
Modality,
|
|
15
|
+
} from "./types";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Default quality gate for text output: if source file is > 500KB but text
|
|
19
|
+
* is < 50 chars, consider the output not good enough.
|
|
20
|
+
*/
|
|
21
|
+
function defaultIsGoodEnough(filePath: string, result: ConvertResult): boolean {
|
|
22
|
+
if (result.outputType === "text" || result.outputType === "html") {
|
|
23
|
+
const text = result.text ?? "";
|
|
24
|
+
try {
|
|
25
|
+
const stat = fs.statSync(filePath);
|
|
26
|
+
if (stat.size > 500 * 1024 && text.length < 50) {
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
} catch {
|
|
30
|
+
// ignore stat errors
|
|
31
|
+
}
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Build a deterministic cache key for a conversion step.
|
|
39
|
+
*/
|
|
40
|
+
function cacheKey(
|
|
41
|
+
converterName: string,
|
|
42
|
+
outputType: Modality,
|
|
43
|
+
input: ConvertInput
|
|
44
|
+
): string {
|
|
45
|
+
const parts = [
|
|
46
|
+
converterName,
|
|
47
|
+
outputType,
|
|
48
|
+
input.filePath,
|
|
49
|
+
input.startPage ?? "",
|
|
50
|
+
input.endPage ?? "",
|
|
51
|
+
input.startLine ?? "",
|
|
52
|
+
input.endLine ?? "",
|
|
53
|
+
input.startTime ?? "",
|
|
54
|
+
input.endTime ?? "",
|
|
55
|
+
]
|
|
56
|
+
.map(String)
|
|
57
|
+
.join("|");
|
|
58
|
+
return crypto.createHash("md5").update(parts).digest("hex");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Poll a video generation job until it is completed or failed.
|
|
63
|
+
* Returns the final VideoStatusResponse.
|
|
64
|
+
*/
|
|
65
|
+
async function pollVideoJob(
|
|
66
|
+
clients: AIClient,
|
|
67
|
+
provider: string,
|
|
68
|
+
jobId: string,
|
|
69
|
+
intervalMs = 5000,
|
|
70
|
+
maxWaitMs = 300_000
|
|
71
|
+
): Promise<{ data?: { url?: string; b64_json?: string; fileUri?: string }[]; error?: string }> {
|
|
72
|
+
const deadline = Date.now() + maxWaitMs;
|
|
73
|
+
while (Date.now() < deadline) {
|
|
74
|
+
const status = await clients.getVideoStatus(provider, { jobId });
|
|
75
|
+
if (status.status === "completed") {
|
|
76
|
+
return { data: status.data };
|
|
77
|
+
}
|
|
78
|
+
if (status.status === "failed" || status.status === "expired") {
|
|
79
|
+
throw new Error(`Video generation job ${jobId} ${status.status}: ${status.error ?? ""}`);
|
|
80
|
+
}
|
|
81
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
82
|
+
}
|
|
83
|
+
throw new Error(`Video generation job ${jobId} timed out after ${maxWaitMs / 1000}s`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export class ConversionService {
|
|
87
|
+
private converters: Converter[] = [];
|
|
88
|
+
private clients: AIClient;
|
|
89
|
+
private mediaProcessor: MediaProcessorService;
|
|
90
|
+
|
|
91
|
+
constructor(clients: AIClient, mediaProcessor: MediaProcessorService) {
|
|
92
|
+
this.clients = clients;
|
|
93
|
+
this.mediaProcessor = mediaProcessor;
|
|
94
|
+
this.initDefaults();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
register(converter: Converter): void {
|
|
98
|
+
this.converters.push(converter);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
list(): Converter[] {
|
|
102
|
+
return [...this.converters];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Register built-in converters that are always available.
|
|
107
|
+
*/
|
|
108
|
+
private initDefaults(): void {
|
|
109
|
+
const self = this;
|
|
110
|
+
|
|
111
|
+
// ── audio → text via Whisper ────────────────────────────────────────────
|
|
112
|
+
// options: { model?, provider? }
|
|
113
|
+
this.register({
|
|
114
|
+
name: "whisper",
|
|
115
|
+
cache: true,
|
|
116
|
+
inputModality: "audio",
|
|
117
|
+
outputType: "text",
|
|
118
|
+
async convert(input, _ctx): Promise<ConvertResult> {
|
|
119
|
+
const chunks = await self.mediaProcessor.processAudio(input.filePath);
|
|
120
|
+
const text = chunks.join("\n");
|
|
121
|
+
return { outputType: "text", text };
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// ── video → text via ffmpeg + Whisper ───────────────────────────────────
|
|
126
|
+
// options: { model? }
|
|
127
|
+
// CLI: --start-time / --end-time trims the video before transcription
|
|
128
|
+
this.register({
|
|
129
|
+
name: "ffmpeg-whisper",
|
|
130
|
+
cache: true,
|
|
131
|
+
inputModality: "video",
|
|
132
|
+
outputType: "text",
|
|
133
|
+
async convert(input, _ctx): Promise<ConvertResult> {
|
|
134
|
+
// If startTime/endTime provided, trim with ffmpeg first
|
|
135
|
+
let filePath = input.filePath;
|
|
136
|
+
if (input.startTime !== undefined || input.endTime !== undefined) {
|
|
137
|
+
const tmpDir = path.join(os.tmpdir(), "knowhow-convert");
|
|
138
|
+
fs.mkdirSync(tmpDir, { recursive: true });
|
|
139
|
+
const trimmed = path.join(tmpDir, `trim_${path.basename(filePath)}`);
|
|
140
|
+
const ssArg = input.startTime !== undefined ? `-ss ${input.startTime}` : "";
|
|
141
|
+
const toArg = input.endTime !== undefined ? `-to ${input.endTime}` : "";
|
|
142
|
+
execSync(
|
|
143
|
+
`ffmpeg -y ${ssArg} -i "${filePath}" ${toArg} -c copy "${trimmed}"`,
|
|
144
|
+
{ stdio: "pipe" }
|
|
145
|
+
);
|
|
146
|
+
filePath = trimmed;
|
|
147
|
+
}
|
|
148
|
+
const chunks = await self.mediaProcessor.processAudio(filePath);
|
|
149
|
+
const text = chunks.join("\n");
|
|
150
|
+
return { outputType: "text", text };
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// ── text passthrough (known extensions) ────────────────────────────────
|
|
155
|
+
this.register({
|
|
156
|
+
name: "text-passthrough",
|
|
157
|
+
inputExts: ["txt", "md", "json", "yaml", "yml", "csv", "xml", "html", "htm", "js", "ts", "py", "rb", "sh", "text"],
|
|
158
|
+
outputType: "text",
|
|
159
|
+
async convert(input, _ctx): Promise<ConvertResult> {
|
|
160
|
+
const text = fs.readFileSync(input.filePath, "utf8");
|
|
161
|
+
return { outputType: "text", text };
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// ── text passthrough fallback ───────────────────────────────────────────
|
|
166
|
+
this.register({
|
|
167
|
+
name: "text-passthrough-fallback",
|
|
168
|
+
inputModality: "text",
|
|
169
|
+
outputType: "text",
|
|
170
|
+
async convert(input, _ctx): Promise<ConvertResult> {
|
|
171
|
+
const text = fs.readFileSync(input.filePath, "utf8");
|
|
172
|
+
return { outputType: "text", text };
|
|
173
|
+
},
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
// ── image → text via vision LLM ────────────────────────────────────────
|
|
177
|
+
// options: { model?, prompt?, provider? }
|
|
178
|
+
this.register({
|
|
179
|
+
name: "image-to-text",
|
|
180
|
+
cache: true,
|
|
181
|
+
inputExts: ["png", "jpg", "jpeg", "gif", "webp", "bmp"],
|
|
182
|
+
inputModality: "image",
|
|
183
|
+
outputType: "text",
|
|
184
|
+
async convert(input, ctx): Promise<ConvertResult> {
|
|
185
|
+
const provider = (ctx.options?.provider as string) || "openai";
|
|
186
|
+
const model = (ctx.options?.model as string) || "gpt-4o";
|
|
187
|
+
const prompt =
|
|
188
|
+
(ctx.options?.prompt as string) ||
|
|
189
|
+
"Extract and transcribe all text from this image. If it is a document or scan, return the full text content verbatim. If there is no text, describe the image in detail.";
|
|
190
|
+
|
|
191
|
+
const images: string[] =
|
|
192
|
+
(input as any).files && Array.isArray((input as any).files)
|
|
193
|
+
? (input as any).files
|
|
194
|
+
: [input.filePath];
|
|
195
|
+
|
|
196
|
+
const parts: string[] = [];
|
|
197
|
+
let totalCost = 0;
|
|
198
|
+
for (const imgPath of images) {
|
|
199
|
+
const ext =
|
|
200
|
+
path.extname(imgPath).replace(/^\./, "").toLowerCase() || "png";
|
|
201
|
+
const base64 = fs.readFileSync(imgPath, { encoding: "base64" });
|
|
202
|
+
const dataUrl = `data:image/${ext};base64,${base64}`;
|
|
203
|
+
const resp = await self.clients.createCompletion(provider, {
|
|
204
|
+
model,
|
|
205
|
+
max_tokens: 4000,
|
|
206
|
+
messages: [
|
|
207
|
+
{
|
|
208
|
+
role: "user",
|
|
209
|
+
content: [
|
|
210
|
+
{ type: "text", text: prompt },
|
|
211
|
+
{ type: "image_url", image_url: { url: dataUrl } },
|
|
212
|
+
],
|
|
213
|
+
},
|
|
214
|
+
],
|
|
215
|
+
} as any);
|
|
216
|
+
parts.push(resp.choices?.[0]?.message?.content ?? "");
|
|
217
|
+
totalCost += (resp as any).usd_cost ?? 0;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
outputType: "text",
|
|
222
|
+
text: parts.join("\n\n"),
|
|
223
|
+
usd_cost: totalCost,
|
|
224
|
+
};
|
|
225
|
+
},
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
// ── text → audio via TTS ────────────────────────────────────────────────
|
|
229
|
+
// options: { model?, voice?, provider?, format? }
|
|
230
|
+
// Reads the text file, calls TTS, writes mp3 to cache dir, returns files[].
|
|
231
|
+
this.register({
|
|
232
|
+
name: "text-to-audio",
|
|
233
|
+
inputModality: "text",
|
|
234
|
+
outputType: "audio",
|
|
235
|
+
async convert(input, ctx): Promise<ConvertResult> {
|
|
236
|
+
const provider = (ctx.options?.provider as string) || "openai";
|
|
237
|
+
const model = (ctx.options?.model as string) || "tts-1";
|
|
238
|
+
const voice = (ctx.options?.voice as string) || "alloy";
|
|
239
|
+
const format = (ctx.options?.format as "mp3" | "opus" | "aac" | "flac" | "wav" | "pcm") || "mp3";
|
|
240
|
+
|
|
241
|
+
const text = fs.readFileSync(input.filePath, "utf8");
|
|
242
|
+
|
|
243
|
+
const resp = await self.clients.createAudioGeneration(provider, {
|
|
244
|
+
model,
|
|
245
|
+
input: text,
|
|
246
|
+
voice,
|
|
247
|
+
response_format: format,
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
fs.mkdirSync(ctx.cacheDir, { recursive: true });
|
|
251
|
+
const outFile = path.join(
|
|
252
|
+
ctx.cacheDir,
|
|
253
|
+
`${path.basename(input.filePath, path.extname(input.filePath))}.${format}`
|
|
254
|
+
);
|
|
255
|
+
fs.writeFileSync(outFile, resp.audio);
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
outputType: "audio",
|
|
259
|
+
files: [outFile],
|
|
260
|
+
usd_cost: resp.usd_cost,
|
|
261
|
+
};
|
|
262
|
+
},
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
// ── text → image via image generation ──────────────────────────────────
|
|
266
|
+
// options: { model?, provider?, size?, quality?, style?, n? }
|
|
267
|
+
// Reads the text file as the prompt, generates image(s), writes to cache dir.
|
|
268
|
+
this.register({
|
|
269
|
+
name: "text-to-image",
|
|
270
|
+
inputModality: "text",
|
|
271
|
+
outputType: "image",
|
|
272
|
+
async convert(input, ctx): Promise<ConvertResult> {
|
|
273
|
+
const provider = (ctx.options?.provider as string) || "openai";
|
|
274
|
+
const model = (ctx.options?.model as string) || "dall-e-3";
|
|
275
|
+
const size = (ctx.options?.size as any) || "1024x1024";
|
|
276
|
+
const quality = (ctx.options?.quality as any) || "standard";
|
|
277
|
+
const style = (ctx.options?.style as any) || "vivid";
|
|
278
|
+
const n = (ctx.options?.n as number) || 1;
|
|
279
|
+
|
|
280
|
+
const prompt = fs.readFileSync(input.filePath, "utf8").trim();
|
|
281
|
+
|
|
282
|
+
const resp = await self.clients.createImageGeneration(provider, {
|
|
283
|
+
model,
|
|
284
|
+
prompt,
|
|
285
|
+
size,
|
|
286
|
+
quality,
|
|
287
|
+
style,
|
|
288
|
+
n,
|
|
289
|
+
response_format: "b64_json",
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
fs.mkdirSync(ctx.cacheDir, { recursive: true });
|
|
293
|
+
const files: string[] = [];
|
|
294
|
+
let totalCost = resp.usd_cost ?? 0;
|
|
295
|
+
|
|
296
|
+
for (let i = 0; i < resp.data.length; i++) {
|
|
297
|
+
const item = resp.data[i];
|
|
298
|
+
const outFile = path.join(
|
|
299
|
+
ctx.cacheDir,
|
|
300
|
+
`${path.basename(input.filePath, path.extname(input.filePath))}_${i}.png`
|
|
301
|
+
);
|
|
302
|
+
if (item.b64_json) {
|
|
303
|
+
fs.writeFileSync(outFile, Buffer.from(item.b64_json, "base64"));
|
|
304
|
+
} else if (item.url) {
|
|
305
|
+
// download from URL
|
|
306
|
+
const https = require("https");
|
|
307
|
+
const http = require("http");
|
|
308
|
+
const protocol = item.url.startsWith("https") ? https : http;
|
|
309
|
+
await new Promise<void>((resolve, reject) => {
|
|
310
|
+
const file = fs.createWriteStream(outFile);
|
|
311
|
+
protocol.get(item.url, (res: any) => {
|
|
312
|
+
res.pipe(file);
|
|
313
|
+
file.on("finish", () => { file.close(); resolve(); });
|
|
314
|
+
}).on("error", reject);
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
files.push(outFile);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return { outputType: "image", files, usd_cost: totalCost };
|
|
321
|
+
},
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
// ── text → video via video generation ──────────────────────────────────
|
|
325
|
+
// options: { model?, provider?, duration?, resolution?, aspect_ratio? }
|
|
326
|
+
// Reads the text file as the prompt, submits job, polls, downloads.
|
|
327
|
+
this.register({
|
|
328
|
+
name: "text-to-video",
|
|
329
|
+
inputModality: "text",
|
|
330
|
+
outputType: "video",
|
|
331
|
+
async convert(input, ctx): Promise<ConvertResult> {
|
|
332
|
+
const provider = (ctx.options?.provider as string) || "google";
|
|
333
|
+
const model = (ctx.options?.model as string) || "veo-2.0-generate-001";
|
|
334
|
+
const duration = (ctx.options?.duration as number) || undefined;
|
|
335
|
+
const resolution = (ctx.options?.resolution as string) || undefined;
|
|
336
|
+
const aspect_ratio = (ctx.options?.aspect_ratio as string) || "16:9";
|
|
337
|
+
|
|
338
|
+
const prompt = fs.readFileSync(input.filePath, "utf8").trim();
|
|
339
|
+
|
|
340
|
+
const genResp = await self.clients.createVideoGeneration(provider, {
|
|
341
|
+
model,
|
|
342
|
+
prompt,
|
|
343
|
+
duration,
|
|
344
|
+
resolution,
|
|
345
|
+
aspect_ratio,
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
fs.mkdirSync(ctx.cacheDir, { recursive: true });
|
|
349
|
+
let totalCost = genResp.usd_cost ?? 0;
|
|
350
|
+
|
|
351
|
+
// If provider returned a jobId, poll until done
|
|
352
|
+
const files: string[] = [];
|
|
353
|
+
if (genResp.jobId) {
|
|
354
|
+
const result = await pollVideoJob(self.clients, provider, genResp.jobId);
|
|
355
|
+
for (let i = 0; i < (result.data ?? []).length; i++) {
|
|
356
|
+
const item = result.data![i];
|
|
357
|
+
const outFile = path.join(ctx.cacheDir, `output_${i}.mp4`);
|
|
358
|
+
if (item.b64_json) {
|
|
359
|
+
fs.writeFileSync(outFile, Buffer.from(item.b64_json, "base64"));
|
|
360
|
+
files.push(outFile);
|
|
361
|
+
} else if (item.fileUri || item.url) {
|
|
362
|
+
const uri = item.fileUri || item.url!;
|
|
363
|
+
const dlResp = await self.clients.downloadVideo(provider, { fileId: uri, uri });
|
|
364
|
+
fs.writeFileSync(outFile, dlResp.data);
|
|
365
|
+
files.push(outFile);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
} else {
|
|
369
|
+
// Synchronous providers return data directly
|
|
370
|
+
for (let i = 0; i < genResp.data.length; i++) {
|
|
371
|
+
const item = genResp.data[i];
|
|
372
|
+
const outFile = path.join(ctx.cacheDir, `output_${i}.mp4`);
|
|
373
|
+
if (item.b64_json) {
|
|
374
|
+
fs.writeFileSync(outFile, Buffer.from(item.b64_json, "base64"));
|
|
375
|
+
files.push(outFile);
|
|
376
|
+
} else if (item.url) {
|
|
377
|
+
const dlResp = await self.clients.downloadVideo(provider, { fileId: item.url, uri: item.url });
|
|
378
|
+
fs.writeFileSync(outFile, dlResp.data);
|
|
379
|
+
files.push(outFile);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return { outputType: "video", files, usd_cost: totalCost };
|
|
385
|
+
},
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// ── image → video via image-to-video generation ─────────────────────────
|
|
389
|
+
// options: { model?, provider?, prompt?, duration?, aspect_ratio? }
|
|
390
|
+
// Takes an image file, submits image-to-video job, polls, downloads.
|
|
391
|
+
this.register({
|
|
392
|
+
name: "image-to-video",
|
|
393
|
+
inputExts: ["png", "jpg", "jpeg", "webp"],
|
|
394
|
+
inputModality: "image",
|
|
395
|
+
outputType: "video",
|
|
396
|
+
async convert(input, ctx): Promise<ConvertResult> {
|
|
397
|
+
const provider = (ctx.options?.provider as string) || "xai";
|
|
398
|
+
const model = (ctx.options?.model as string) || "grok-2-image";
|
|
399
|
+
const prompt = (ctx.options?.prompt as string) || "Animate this image naturally.";
|
|
400
|
+
const duration = (ctx.options?.duration as number) || undefined;
|
|
401
|
+
const aspect_ratio = (ctx.options?.aspect_ratio as string) || undefined;
|
|
402
|
+
|
|
403
|
+
// Read image as base64 data URL for providers that accept image_url
|
|
404
|
+
const ext =
|
|
405
|
+
path.extname(input.filePath).replace(/^\./, "").toLowerCase() || "png";
|
|
406
|
+
const base64 = fs.readFileSync(input.filePath, { encoding: "base64" });
|
|
407
|
+
const imageDataUrl = `data:image/${ext};base64,${base64}`;
|
|
408
|
+
|
|
409
|
+
const genResp = await self.clients.createVideoGeneration(provider, {
|
|
410
|
+
model,
|
|
411
|
+
prompt,
|
|
412
|
+
duration,
|
|
413
|
+
aspect_ratio,
|
|
414
|
+
image_url: imageDataUrl,
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
fs.mkdirSync(ctx.cacheDir, { recursive: true });
|
|
418
|
+
let totalCost = genResp.usd_cost ?? 0;
|
|
419
|
+
|
|
420
|
+
const files: string[] = [];
|
|
421
|
+
if (genResp.jobId) {
|
|
422
|
+
const result = await pollVideoJob(self.clients, provider, genResp.jobId);
|
|
423
|
+
for (let i = 0; i < (result.data ?? []).length; i++) {
|
|
424
|
+
const item = result.data![i];
|
|
425
|
+
const outFile = path.join(ctx.cacheDir, `output_${i}.mp4`);
|
|
426
|
+
if (item.b64_json) {
|
|
427
|
+
fs.writeFileSync(outFile, Buffer.from(item.b64_json, "base64"));
|
|
428
|
+
files.push(outFile);
|
|
429
|
+
} else if (item.fileUri || item.url) {
|
|
430
|
+
const uri = item.fileUri || item.url!;
|
|
431
|
+
const dlResp = await self.clients.downloadVideo(provider, { fileId: uri, uri });
|
|
432
|
+
fs.writeFileSync(outFile, dlResp.data);
|
|
433
|
+
files.push(outFile);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
} else {
|
|
437
|
+
for (let i = 0; i < genResp.data.length; i++) {
|
|
438
|
+
const item = genResp.data[i];
|
|
439
|
+
const outFile = path.join(ctx.cacheDir, `output_${i}.mp4`);
|
|
440
|
+
if (item.b64_json) {
|
|
441
|
+
fs.writeFileSync(outFile, Buffer.from(item.b64_json, "base64"));
|
|
442
|
+
files.push(outFile);
|
|
443
|
+
} else if (item.url) {
|
|
444
|
+
const dlResp = await self.clients.downloadVideo(provider, { fileId: item.url, uri: item.url });
|
|
445
|
+
fs.writeFileSync(outFile, dlResp.data);
|
|
446
|
+
files.push(outFile);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
return { outputType: "video", files, usd_cost: totalCost };
|
|
452
|
+
},
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* BFS from inputNode -> targetType over the registered converter graph.
|
|
459
|
+
* Returns an array of converter chains (paths), or null if unreachable.
|
|
460
|
+
*/
|
|
461
|
+
private findPath(
|
|
462
|
+
inputNode: string,
|
|
463
|
+
targetType: Modality,
|
|
464
|
+
preferredFirst: string[] = []
|
|
465
|
+
): Converter[][] | null {
|
|
466
|
+
type State = { node: string; path: Converter[] };
|
|
467
|
+
const queue: State[] = [{ node: inputNode, path: [] }];
|
|
468
|
+
const visited = new Set<string>();
|
|
469
|
+
visited.add(inputNode);
|
|
470
|
+
|
|
471
|
+
const knownModalities: Modality[] = ["text", "html", "image", "audio", "video"];
|
|
472
|
+
const isKnownModality = knownModalities.includes(inputNode as Modality);
|
|
473
|
+
const hasDirectMatch = this.converters.some(
|
|
474
|
+
(c) => c.inputExts?.includes(inputNode) || c.inputModality === inputNode
|
|
475
|
+
);
|
|
476
|
+
if (!isKnownModality && !hasDirectMatch && inputNode !== "text") {
|
|
477
|
+
queue.push({ node: "text", path: [] });
|
|
478
|
+
visited.add("text");
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
while (queue.length > 0) {
|
|
482
|
+
const { node, path } = queue.shift()!;
|
|
483
|
+
|
|
484
|
+
const candidates = this.converters.filter((c) => {
|
|
485
|
+
if (c.inputExts && c.inputExts.includes(node)) return true;
|
|
486
|
+
if (c.inputModality && c.inputModality === node) return true;
|
|
487
|
+
return false;
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
for (const converter of candidates) {
|
|
491
|
+
const newPath = [...path, converter];
|
|
492
|
+
if (converter.outputType === targetType) {
|
|
493
|
+
return [newPath];
|
|
494
|
+
}
|
|
495
|
+
if (!visited.has(converter.outputType)) {
|
|
496
|
+
visited.add(converter.outputType);
|
|
497
|
+
queue.push({ node: converter.outputType, path: newPath });
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
return null;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Find the best conversion path, respecting preferredConverters.
|
|
507
|
+
*/
|
|
508
|
+
private findBestPath(
|
|
509
|
+
inputNode: string,
|
|
510
|
+
targetType: Modality,
|
|
511
|
+
preferredConverters: string[]
|
|
512
|
+
): Converter[] | null {
|
|
513
|
+
if (preferredConverters.length > 0) {
|
|
514
|
+
for (const prefName of preferredConverters) {
|
|
515
|
+
const prefConverter = this.converters.find((c) => c.name === prefName);
|
|
516
|
+
if (!prefConverter) continue;
|
|
517
|
+
const inputMatch =
|
|
518
|
+
(prefConverter.inputExts && prefConverter.inputExts.includes(inputNode)) ||
|
|
519
|
+
(prefConverter.inputModality && prefConverter.inputModality === inputNode);
|
|
520
|
+
if (!inputMatch) continue;
|
|
521
|
+
if (prefConverter.outputType === targetType) {
|
|
522
|
+
return [prefConverter];
|
|
523
|
+
}
|
|
524
|
+
const rest = this.findPath(prefConverter.outputType, targetType);
|
|
525
|
+
if (rest) {
|
|
526
|
+
return [prefConverter, ...rest[0]];
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
const paths = this.findPath(inputNode, targetType);
|
|
531
|
+
return paths ? paths[0] : null;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
/**
|
|
535
|
+
* Build a converter chain that passes through explicit intermediate modalities
|
|
536
|
+
* (waypoints) before reaching the final target.
|
|
537
|
+
*/
|
|
538
|
+
private findPathVia(
|
|
539
|
+
inputNode: string,
|
|
540
|
+
via: Modality[],
|
|
541
|
+
targetType: Modality
|
|
542
|
+
): Converter[] | null {
|
|
543
|
+
const waypoints = [...via, targetType];
|
|
544
|
+
let current = inputNode;
|
|
545
|
+
const fullChain: Converter[] = [];
|
|
546
|
+
for (const waypoint of waypoints) {
|
|
547
|
+
const segment = this.findPath(current, waypoint);
|
|
548
|
+
if (!segment || segment.length === 0) return null;
|
|
549
|
+
fullChain.push(...segment[0]);
|
|
550
|
+
current = waypoint;
|
|
551
|
+
}
|
|
552
|
+
return fullChain;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Convert a file to the target modality, with caching, fallback, and range support.
|
|
557
|
+
*/
|
|
558
|
+
async convert(
|
|
559
|
+
filePath: string,
|
|
560
|
+
targetType: Modality,
|
|
561
|
+
options: ConvertOptions = {}
|
|
562
|
+
): Promise<ConvertResult> {
|
|
563
|
+
const ext = path.extname(filePath).replace(/^\./, "").toLowerCase();
|
|
564
|
+
const inputNode = ext || "text";
|
|
565
|
+
|
|
566
|
+
const {
|
|
567
|
+
force = false,
|
|
568
|
+
preferredConverters = [],
|
|
569
|
+
via,
|
|
570
|
+
isGoodEnough,
|
|
571
|
+
startPage,
|
|
572
|
+
endPage,
|
|
573
|
+
startLine,
|
|
574
|
+
endLine,
|
|
575
|
+
startTime,
|
|
576
|
+
endTime,
|
|
577
|
+
onProgress,
|
|
578
|
+
converterOptions = {},
|
|
579
|
+
} = options;
|
|
580
|
+
|
|
581
|
+
const converterPath =
|
|
582
|
+
via && via.length > 0
|
|
583
|
+
? this.findPathVia(inputNode, via, targetType)
|
|
584
|
+
: this.findBestPath(inputNode, targetType, preferredConverters);
|
|
585
|
+
|
|
586
|
+
if (!converterPath) {
|
|
587
|
+
throw new Error(
|
|
588
|
+
`No conversion path found from "${inputNode}" to "${targetType}"`
|
|
589
|
+
);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
const checkGoodEnough = isGoodEnough
|
|
593
|
+
? (result: ConvertResult) => isGoodEnough({ filePath, result })
|
|
594
|
+
: (result: ConvertResult) => defaultIsGoodEnough(filePath, result);
|
|
595
|
+
|
|
596
|
+
const parsed = path.parse(filePath);
|
|
597
|
+
const baseDir = path.join(parsed.dir, parsed.name);
|
|
598
|
+
|
|
599
|
+
let currentFilePath = filePath;
|
|
600
|
+
let currentExt = inputNode;
|
|
601
|
+
let currentFiles: string[] | undefined = undefined;
|
|
602
|
+
let lastResult: ConvertResult | null = null;
|
|
603
|
+
|
|
604
|
+
for (let stepIdx = 0; stepIdx < converterPath.length; stepIdx++) {
|
|
605
|
+
const stepConverter = converterPath[stepIdx];
|
|
606
|
+
const stepInput: ConvertInput = {
|
|
607
|
+
filePath: currentFilePath,
|
|
608
|
+
inputExt: currentExt,
|
|
609
|
+
files: currentFiles,
|
|
610
|
+
startPage,
|
|
611
|
+
endPage,
|
|
612
|
+
startLine,
|
|
613
|
+
endLine,
|
|
614
|
+
startTime,
|
|
615
|
+
endTime,
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
onProgress?.(`${stepConverter.name}`, stepIdx / converterPath.length);
|
|
619
|
+
|
|
620
|
+
const cacheDir = path.join(baseDir, stepConverter.name);
|
|
621
|
+
const key = cacheKey(
|
|
622
|
+
stepConverter.name,
|
|
623
|
+
stepConverter.outputType,
|
|
624
|
+
stepInput
|
|
625
|
+
);
|
|
626
|
+
const cacheFile = path.join(cacheDir, `${key}.${stepConverter.outputType}.json`);
|
|
627
|
+
const doneFile = cacheFile + ".done";
|
|
628
|
+
|
|
629
|
+
if (stepConverter.cache && !force && fs.existsSync(doneFile)) {
|
|
630
|
+
try {
|
|
631
|
+
const cached = JSON.parse(
|
|
632
|
+
fs.readFileSync(cacheFile, "utf8")
|
|
633
|
+
) as ConvertResult;
|
|
634
|
+
lastResult = cached;
|
|
635
|
+
if (stepIdx < converterPath.length - 1) {
|
|
636
|
+
currentFilePath = cached.files?.[0] ?? currentFilePath;
|
|
637
|
+
currentFiles = cached.files ?? currentFiles;
|
|
638
|
+
currentExt = stepConverter.outputType;
|
|
639
|
+
}
|
|
640
|
+
continue;
|
|
641
|
+
} catch {
|
|
642
|
+
// cache read failed; fall through to run converter
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
const stepAlternatives = this.buildAlternatives(
|
|
647
|
+
currentExt,
|
|
648
|
+
stepConverter.outputType,
|
|
649
|
+
preferredConverters
|
|
650
|
+
);
|
|
651
|
+
|
|
652
|
+
let stepResult: ConvertResult | null = null;
|
|
653
|
+
for (const alt of stepAlternatives) {
|
|
654
|
+
try {
|
|
655
|
+
const ctx: ConverterContext = {
|
|
656
|
+
clients: this.clients,
|
|
657
|
+
cacheDir,
|
|
658
|
+
options: converterOptions[alt.name] ?? {},
|
|
659
|
+
};
|
|
660
|
+
const result = await alt.convert(stepInput, ctx);
|
|
661
|
+
if (!checkGoodEnough(result)) {
|
|
662
|
+
continue;
|
|
663
|
+
}
|
|
664
|
+
stepResult = result;
|
|
665
|
+
break;
|
|
666
|
+
} catch {
|
|
667
|
+
// try next alternative
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if (!stepResult) {
|
|
672
|
+
throw new Error(
|
|
673
|
+
`Conversion step "${stepConverter.name}" (${currentExt} -> ${stepConverter.outputType}) failed with all alternatives`
|
|
674
|
+
);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
if (stepConverter.cache) {
|
|
678
|
+
try {
|
|
679
|
+
fs.mkdirSync(cacheDir, { recursive: true });
|
|
680
|
+
fs.writeFileSync(cacheFile, JSON.stringify(stepResult));
|
|
681
|
+
fs.writeFileSync(doneFile, "1");
|
|
682
|
+
} catch {
|
|
683
|
+
// cache write failures are non-fatal
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
lastResult = stepResult;
|
|
688
|
+
if (stepIdx < converterPath.length - 1 && stepResult.files?.length) {
|
|
689
|
+
currentFilePath = stepResult.files[0];
|
|
690
|
+
currentFiles = stepResult.files;
|
|
691
|
+
currentExt = stepConverter.outputType;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
if (!lastResult) {
|
|
696
|
+
throw new Error("Conversion produced no result");
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
if (
|
|
700
|
+
(lastResult.outputType === "text" || lastResult.outputType === "html") &&
|
|
701
|
+
lastResult.text &&
|
|
702
|
+
(startLine !== undefined || endLine !== undefined)
|
|
703
|
+
) {
|
|
704
|
+
const lines = lastResult.text.split("\n");
|
|
705
|
+
const from = (startLine ?? 1) - 1;
|
|
706
|
+
const to = endLine ?? lines.length;
|
|
707
|
+
lastResult = {
|
|
708
|
+
...lastResult,
|
|
709
|
+
text: lines.slice(from, to).join("\n"),
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
onProgress?.("done", 1);
|
|
714
|
+
return lastResult;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Convenience: convert to text and return the string.
|
|
719
|
+
*/
|
|
720
|
+
async convertToText(
|
|
721
|
+
filePath: string,
|
|
722
|
+
options: ConvertOptions = {}
|
|
723
|
+
): Promise<string> {
|
|
724
|
+
const result = await this.convert(filePath, "text", options);
|
|
725
|
+
return result.text ?? "";
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
/**
|
|
729
|
+
* For a given (inputNode, outputType) step, build an ordered list of
|
|
730
|
+
* candidate converters: preferredConverters first (by name order), then
|
|
731
|
+
* registration order.
|
|
732
|
+
*/
|
|
733
|
+
private buildAlternatives(
|
|
734
|
+
inputNode: string,
|
|
735
|
+
outputType: Modality,
|
|
736
|
+
preferredNames: string[]
|
|
737
|
+
): Converter[] {
|
|
738
|
+
const matching = this.converters.filter((c) => {
|
|
739
|
+
const inputMatch =
|
|
740
|
+
(c.inputExts && c.inputExts.includes(inputNode)) ||
|
|
741
|
+
(c.inputModality && c.inputModality === inputNode);
|
|
742
|
+
return inputMatch && c.outputType === outputType;
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
const preferred: Converter[] = [];
|
|
746
|
+
const rest: Converter[] = [];
|
|
747
|
+
|
|
748
|
+
for (const c of matching) {
|
|
749
|
+
if (preferredNames.includes(c.name)) {
|
|
750
|
+
preferred.push(c);
|
|
751
|
+
} else {
|
|
752
|
+
rest.push(c);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
preferred.sort(
|
|
757
|
+
(a, b) =>
|
|
758
|
+
preferredNames.indexOf(a.name) - preferredNames.indexOf(b.name)
|
|
759
|
+
);
|
|
760
|
+
|
|
761
|
+
return [...preferred, ...rest];
|
|
762
|
+
}
|
|
763
|
+
}
|