@ztimson/ai-utils 0.8.13 → 0.8.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +22 -18
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +137 -133
- package/dist/index.mjs.map +1 -1
- package/dist/llm.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import * as O from "node:os";
|
|
2
|
-
import { tmpdir as
|
|
3
|
-
import { Anthropic as
|
|
4
|
-
import { objectMap as z, JSONAttemptParse as S, findByProp as R, JSONSanitize as _, clean as
|
|
5
|
-
import { OpenAI as
|
|
6
|
-
import { fileURLToPath as
|
|
2
|
+
import { tmpdir as N } from "node:os";
|
|
3
|
+
import { Anthropic as U } from "@anthropic-ai/sdk";
|
|
4
|
+
import { objectMap as z, JSONAttemptParse as S, findByProp as R, JSONSanitize as _, clean as W, Http as C, consoleInterceptor as J, fn as I, ASet as F } from "@ztimson/utils";
|
|
5
|
+
import { OpenAI as B } from "openai";
|
|
6
|
+
import { fileURLToPath as D } from "url";
|
|
7
7
|
import { join as H, dirname as G } from "path";
|
|
8
8
|
import { spawn as w, execSync as K } from "node:child_process";
|
|
9
9
|
import { mkdtempSync as Y } from "node:fs";
|
|
@@ -17,19 +17,19 @@ class L {
|
|
|
17
17
|
}
|
|
18
18
|
class Q extends L {
|
|
19
19
|
constructor(r, e, t) {
|
|
20
|
-
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new
|
|
20
|
+
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new U({ apiKey: e });
|
|
21
21
|
}
|
|
22
22
|
client;
|
|
23
23
|
toStandard(r) {
|
|
24
24
|
const e = Date.now(), t = [];
|
|
25
|
-
for (let
|
|
26
|
-
if (typeof
|
|
27
|
-
t.push({ timestamp: e, ...
|
|
25
|
+
for (let a of r)
|
|
26
|
+
if (typeof a.content == "string")
|
|
27
|
+
t.push({ timestamp: e, ...a });
|
|
28
28
|
else {
|
|
29
|
-
const o =
|
|
29
|
+
const o = a.content?.filter((n) => n.type == "text").map((n) => n.text).join(`
|
|
30
30
|
|
|
31
31
|
`);
|
|
32
|
-
o && t.push({ timestamp: e, role:
|
|
32
|
+
o && t.push({ timestamp: e, role: a.role, content: o }), a.content.forEach((n) => {
|
|
33
33
|
if (n.type == "tool_use")
|
|
34
34
|
t.push({ timestamp: e, role: "tool", id: n.id, name: n.name, args: n.input, content: void 0 });
|
|
35
35
|
else if (n.type == "tool_result") {
|
|
@@ -55,7 +55,7 @@ class Q extends L {
|
|
|
55
55
|
}
|
|
56
56
|
ask(r, e = {}) {
|
|
57
57
|
const t = new AbortController();
|
|
58
|
-
return Object.assign(new Promise(async (
|
|
58
|
+
return Object.assign(new Promise(async (a) => {
|
|
59
59
|
let o = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
60
60
|
const n = e.tools || this.ai.options.llm?.tools || [], m = {
|
|
61
61
|
model: e.model || this.model,
|
|
@@ -75,7 +75,7 @@ class Q extends L {
|
|
|
75
75
|
messages: o,
|
|
76
76
|
stream: !!e.stream
|
|
77
77
|
};
|
|
78
|
-
let l,
|
|
78
|
+
let l, i = !0;
|
|
79
79
|
do {
|
|
80
80
|
if (l = await this.client.messages.create(m).catch((s) => {
|
|
81
81
|
throw s.message += `
|
|
@@ -83,7 +83,7 @@ class Q extends L {
|
|
|
83
83
|
Messages:
|
|
84
84
|
${JSON.stringify(o, null, 2)}`, s;
|
|
85
85
|
}), e.stream) {
|
|
86
|
-
|
|
86
|
+
i ? i = !1 : e.stream({ text: `
|
|
87
87
|
|
|
88
88
|
` }), l.content = [];
|
|
89
89
|
for await (const s of l) {
|
|
@@ -120,13 +120,13 @@ ${JSON.stringify(o, null, 2)}`, s;
|
|
|
120
120
|
} while (!t.signal.aborted && l.content.some((u) => u.type === "tool_use"));
|
|
121
121
|
o.push({ role: "assistant", content: l.content.filter((u) => u.type == "text").map((u) => u.text).join(`
|
|
122
122
|
|
|
123
|
-
`) }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o),
|
|
123
|
+
`) }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o), a(o.at(-1)?.content);
|
|
124
124
|
}), { abort: () => t.abort() });
|
|
125
125
|
}
|
|
126
126
|
}
|
|
127
127
|
class P extends L {
|
|
128
|
-
constructor(r, e, t,
|
|
129
|
-
super(), this.ai = r, this.host = e, this.token = t, this.model =
|
|
128
|
+
constructor(r, e, t, a) {
|
|
129
|
+
super(), this.ai = r, this.host = e, this.token = t, this.model = a, this.client = new B(W({
|
|
130
130
|
baseURL: e,
|
|
131
131
|
apiKey: t || e ? "ignored" : void 0
|
|
132
132
|
}));
|
|
@@ -136,17 +136,17 @@ class P extends L {
|
|
|
136
136
|
for (let e = 0; e < r.length; e++) {
|
|
137
137
|
const t = r[e];
|
|
138
138
|
if (t.role === "assistant" && t.tool_calls) {
|
|
139
|
-
const
|
|
139
|
+
const a = t.tool_calls.map((o) => ({
|
|
140
140
|
role: "tool",
|
|
141
141
|
id: o.id,
|
|
142
142
|
name: o.function.name,
|
|
143
143
|
args: S(o.function.arguments, {}),
|
|
144
144
|
timestamp: t.timestamp
|
|
145
145
|
}));
|
|
146
|
-
r.splice(e, 1, ...
|
|
146
|
+
r.splice(e, 1, ...a), e += a.length - 1;
|
|
147
147
|
} else if (t.role === "tool" && t.content) {
|
|
148
|
-
const
|
|
149
|
-
|
|
148
|
+
const a = r.find((o) => t.tool_call_id == o.id);
|
|
149
|
+
a && (t.content.includes('"error":') ? a.error = t.content : a.content = t.content), r.splice(e, 1), e--;
|
|
150
150
|
}
|
|
151
151
|
r[e]?.timestamp || (r[e].timestamp = Date.now());
|
|
152
152
|
}
|
|
@@ -167,7 +167,7 @@ class P extends L {
|
|
|
167
167
|
content: t.error || t.content
|
|
168
168
|
});
|
|
169
169
|
else {
|
|
170
|
-
const { timestamp:
|
|
170
|
+
const { timestamp: a, ...o } = t;
|
|
171
171
|
e.push(o);
|
|
172
172
|
}
|
|
173
173
|
return e;
|
|
@@ -175,7 +175,7 @@ class P extends L {
|
|
|
175
175
|
}
|
|
176
176
|
ask(r, e = {}) {
|
|
177
177
|
const t = new AbortController();
|
|
178
|
-
return Object.assign(new Promise(async (
|
|
178
|
+
return Object.assign(new Promise(async (a, o) => {
|
|
179
179
|
e.system && (e.history?.[0]?.role != "system" ? e.history?.splice(0, 0, { role: "system", content: e.system, timestamp: Date.now() }) : e.history[0].content = e.system);
|
|
180
180
|
let n = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
181
181
|
const m = e.tools || this.ai.options.llm?.tools || [], l = {
|
|
@@ -197,9 +197,9 @@ class P extends L {
|
|
|
197
197
|
}
|
|
198
198
|
}))
|
|
199
199
|
};
|
|
200
|
-
let
|
|
200
|
+
let i, u = !0;
|
|
201
201
|
do {
|
|
202
|
-
if (
|
|
202
|
+
if (i = await this.client.chat.completions.create(l).catch((c) => {
|
|
203
203
|
throw c.message += `
|
|
204
204
|
|
|
205
205
|
Messages:
|
|
@@ -207,13 +207,13 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
207
207
|
}), e.stream) {
|
|
208
208
|
u ? u = !1 : e.stream({ text: `
|
|
209
209
|
|
|
210
|
-
` }),
|
|
211
|
-
for await (const c of
|
|
210
|
+
` }), i.choices = [{ message: { role: "assistant", content: "", tool_calls: [] } }];
|
|
211
|
+
for await (const c of i) {
|
|
212
212
|
if (t.signal.aborted) break;
|
|
213
|
-
if (c.choices[0].delta.content && (
|
|
213
|
+
if (c.choices[0].delta.content && (i.choices[0].message.content += c.choices[0].delta.content, e.stream({ text: c.choices[0].delta.content })), c.choices[0].delta.tool_calls)
|
|
214
214
|
for (const d of c.choices[0].delta.tool_calls) {
|
|
215
|
-
const p =
|
|
216
|
-
p ? (d.id && (p.id = d.id), d.type && (p.type = d.type), d.function && (p.function || (p.function = {}), d.function.name && (p.function.name = d.function.name), d.function.arguments && (p.function.arguments = (p.function.arguments || "") + d.function.arguments))) :
|
|
215
|
+
const p = i.choices[0].message.tool_calls.find((f) => f.index === d.index);
|
|
216
|
+
p ? (d.id && (p.id = d.id), d.type && (p.type = d.type), d.function && (p.function || (p.function = {}), d.function.name && (p.function.name = d.function.name), d.function.arguments && (p.function.arguments = (p.function.arguments || "") + d.function.arguments))) : i.choices[0].message.tool_calls.push({
|
|
217
217
|
index: d.index,
|
|
218
218
|
id: d.id || "",
|
|
219
219
|
type: d.type || "function",
|
|
@@ -225,9 +225,9 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
225
225
|
}
|
|
226
226
|
}
|
|
227
227
|
}
|
|
228
|
-
const s =
|
|
228
|
+
const s = i.choices[0].message.tool_calls || [];
|
|
229
229
|
if (s.length && !t.signal.aborted) {
|
|
230
|
-
n.push(
|
|
230
|
+
n.push(i.choices[0].message);
|
|
231
231
|
const c = await Promise.all(s.map(async (d) => {
|
|
232
232
|
const p = m?.find(R("name", d.function.name));
|
|
233
233
|
if (e.stream && e.stream({ tool: d.function.name }), !p) return { role: "tool", tool_call_id: d.id, content: '{"error": "Tool not found"}' };
|
|
@@ -240,8 +240,8 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
240
240
|
}));
|
|
241
241
|
n.push(...c), l.messages = n;
|
|
242
242
|
}
|
|
243
|
-
} while (!t.signal.aborted &&
|
|
244
|
-
n.push({ role: "assistant", content:
|
|
243
|
+
} while (!t.signal.aborted && i.choices?.[0]?.message?.tool_calls?.length);
|
|
244
|
+
n.push({ role: "assistant", content: i.choices[0].message.content || "" }), n = this.toStandard(n), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...n), a(n.at(-1)?.content);
|
|
245
245
|
}), { abort: () => t.abort() });
|
|
246
246
|
}
|
|
247
247
|
}
|
|
@@ -270,14 +270,14 @@ class X {
|
|
|
270
270
|
};
|
|
271
271
|
const t = e.model || this.defaultModel;
|
|
272
272
|
if (!this.models[t]) throw new Error(`Model does not exist: ${t}`);
|
|
273
|
-
let
|
|
273
|
+
let a = () => {
|
|
274
274
|
};
|
|
275
275
|
return Object.assign(new Promise(async (o) => {
|
|
276
276
|
if (e.history || (e.history = []), e.memory) {
|
|
277
|
-
const m = async (
|
|
277
|
+
const m = async (i, u, s = 10) => {
|
|
278
278
|
const [c, d] = await Promise.all([
|
|
279
279
|
u ? this.embedding(u) : Promise.resolve(null),
|
|
280
|
-
|
|
280
|
+
i ? this.embedding(i) : Promise.resolve(null)
|
|
281
281
|
]);
|
|
282
282
|
return (e.memory || []).map((p) => {
|
|
283
283
|
const f = (c ? this.cosineSimilarity(p.embeddings[0], c[0].embedding) : 0) + (d ? this.cosineSimilarity(p.embeddings[1], d[0].embedding) : 0);
|
|
@@ -298,9 +298,9 @@ ${l}` }), e.tools = [{
|
|
|
298
298
|
query: { type: "string", description: "Search memory based on a query, can be used with or without subject argument" },
|
|
299
299
|
topK: { type: "number", description: "Result limit, default 5" }
|
|
300
300
|
},
|
|
301
|
-
fn: (
|
|
302
|
-
if (!
|
|
303
|
-
return m(
|
|
301
|
+
fn: (i) => {
|
|
302
|
+
if (!i.subject && !i.query) throw new Error("Either a subject or query argument is required");
|
|
303
|
+
return m(i.query, i.subject, i.topK);
|
|
304
304
|
}
|
|
305
305
|
}, {
|
|
306
306
|
name: "remember",
|
|
@@ -309,12 +309,12 @@ ${l}` }), e.tools = [{
|
|
|
309
309
|
owner: { type: "string", description: "Subject/person this fact is about" },
|
|
310
310
|
fact: { type: "string", description: "The information to remember" }
|
|
311
311
|
},
|
|
312
|
-
fn: async (
|
|
312
|
+
fn: async (i) => {
|
|
313
313
|
if (!e.memory) return;
|
|
314
314
|
const u = await Promise.all([
|
|
315
|
-
this.embedding(
|
|
316
|
-
this.embedding(`${
|
|
317
|
-
]), s = { owner:
|
|
315
|
+
this.embedding(i.owner),
|
|
316
|
+
this.embedding(`${i.owner}: ${i.fact}`)
|
|
317
|
+
]), s = { owner: i.owner, fact: i.fact, embeddings: [u[0][0].embedding, u[1][0].embedding] };
|
|
318
318
|
return e.memory.splice(0, e.memory.length, ...e.memory.filter((c) => !(this.cosineSimilarity(s.embeddings[0], c.embeddings[0]) >= 0.9 && this.cosineSimilarity(s.embeddings[1], c.embeddings[1]) >= 0.8)), s), "Remembered!";
|
|
319
319
|
}
|
|
320
320
|
}, ...e.tools || []];
|
|
@@ -325,15 +325,15 @@ ${l}` }), e.tools = [{
|
|
|
325
325
|
e.history.splice(0, e.history.length, ...m);
|
|
326
326
|
}
|
|
327
327
|
return o(n);
|
|
328
|
-
}), { abort:
|
|
328
|
+
}), { abort: a });
|
|
329
329
|
}
|
|
330
330
|
async code(r, e) {
|
|
331
331
|
const t = await this.ask(r, { ...e, system: [
|
|
332
332
|
e?.system,
|
|
333
333
|
"Return your response in a code block"
|
|
334
334
|
].filter((o) => !!o).join(`
|
|
335
|
-
`) }),
|
|
336
|
-
return
|
|
335
|
+
`) }), a = /```(?:.+)?\s*([\s\S]*?)```/.exec(t);
|
|
336
|
+
return a ? a[1].trim() : null;
|
|
337
337
|
}
|
|
338
338
|
/**
|
|
339
339
|
* Compress chat history to reduce context size
|
|
@@ -343,16 +343,16 @@ ${l}` }), e.tools = [{
|
|
|
343
343
|
* @param {LLMRequest} options LLM options
|
|
344
344
|
* @returns {Promise<LLMMessage[]>} New chat history will summary at index 0
|
|
345
345
|
*/
|
|
346
|
-
async compressHistory(r, e, t,
|
|
346
|
+
async compressHistory(r, e, t, a) {
|
|
347
347
|
if (this.estimateTokens(r) < e) return r;
|
|
348
348
|
let o = 0, n = 0;
|
|
349
349
|
for (let d of r.toReversed())
|
|
350
350
|
if (n += this.estimateTokens(d.content), n < t) o++;
|
|
351
351
|
else break;
|
|
352
352
|
if (r.length <= o) return r;
|
|
353
|
-
const m = r[0].role == "system" ? r[0] : null, l = o == 0 ? [] : r.slice(-o),
|
|
353
|
+
const m = r[0].role == "system" ? r[0] : null, l = o == 0 ? [] : r.slice(-o), i = (o == 0 ? r : r.slice(0, -o)).filter((d) => d.role === "assistant" || d.role === "user"), u = await this.summarize(i.map((d) => `[${d.role}]: ${d.content}`).join(`
|
|
354
354
|
|
|
355
|
-
`), 500,
|
|
355
|
+
`), 500, a), s = Date.now(), c = [{ role: "tool", name: "summary", id: "summary_" + s, args: {}, content: `Conversation Summary: ${u?.summary}`, timestamp: s }, ...l];
|
|
356
356
|
return m && c.splice(0, 0, m), c;
|
|
357
357
|
}
|
|
358
358
|
/**
|
|
@@ -363,10 +363,10 @@ ${l}` }), e.tools = [{
|
|
|
363
363
|
*/
|
|
364
364
|
cosineSimilarity(r, e) {
|
|
365
365
|
if (r.length !== e.length) throw new Error("Vectors must be same length");
|
|
366
|
-
let t = 0,
|
|
366
|
+
let t = 0, a = 0, o = 0;
|
|
367
367
|
for (let m = 0; m < r.length; m++)
|
|
368
|
-
t += r[m] * e[m],
|
|
369
|
-
const n = Math.sqrt(
|
|
368
|
+
t += r[m] * e[m], a += r[m] * r[m], o += e[m] * e[m];
|
|
369
|
+
const n = Math.sqrt(a) * Math.sqrt(o);
|
|
370
370
|
return n === 0 ? 0 : t / n;
|
|
371
371
|
}
|
|
372
372
|
/**
|
|
@@ -377,21 +377,21 @@ ${l}` }), e.tools = [{
|
|
|
377
377
|
* @returns {string[]} Chunked strings
|
|
378
378
|
*/
|
|
379
379
|
chunk(r, e = 500, t = 50) {
|
|
380
|
-
const
|
|
381
|
-
const c =
|
|
382
|
-
return typeof s == "object" && !Array.isArray(s) ?
|
|
383
|
-
}) : [], n = (typeof r == "object" ?
|
|
380
|
+
const a = (l, i = "") => l ? Object.entries(l).flatMap(([u, s]) => {
|
|
381
|
+
const c = i ? `${i}${isNaN(+u) ? `.${u}` : `[${u}]`}` : u;
|
|
382
|
+
return typeof s == "object" && !Array.isArray(s) ? a(s, c) : `${c}: ${Array.isArray(s) ? s.join(", ") : s}`;
|
|
383
|
+
}) : [], n = (typeof r == "object" ? a(r) : r.toString().split(`
|
|
384
384
|
`)).flatMap((l) => [...l.split(/\s+/).filter(Boolean), `
|
|
385
385
|
`]), m = [];
|
|
386
386
|
for (let l = 0; l < n.length; ) {
|
|
387
|
-
let
|
|
387
|
+
let i = "", u = l;
|
|
388
388
|
for (; u < n.length; ) {
|
|
389
|
-
const c =
|
|
389
|
+
const c = i + (i ? " " : "") + n[u];
|
|
390
390
|
if (this.estimateTokens(c.replace(/\s*\n\s*/g, `
|
|
391
|
-
`)) > e &&
|
|
392
|
-
|
|
391
|
+
`)) > e && i) break;
|
|
392
|
+
i = c, u++;
|
|
393
393
|
}
|
|
394
|
-
const s =
|
|
394
|
+
const s = i.replace(/\s*\n\s*/g, `
|
|
395
395
|
`).trim();
|
|
396
396
|
s && m.push(s), l = Math.max(u - t, u === l ? l + 1 : u);
|
|
397
397
|
}
|
|
@@ -404,17 +404,17 @@ ${l}` }), e.tools = [{
|
|
|
404
404
|
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
|
405
405
|
*/
|
|
406
406
|
embedding(r, e = {}) {
|
|
407
|
-
let { maxTokens: t = 500, overlapTokens:
|
|
407
|
+
let { maxTokens: t = 500, overlapTokens: a = 50 } = e, o = !1;
|
|
408
408
|
const n = () => {
|
|
409
409
|
o = !0;
|
|
410
|
-
}, m = (
|
|
410
|
+
}, m = (i) => new Promise((u, s) => {
|
|
411
411
|
if (o) return s(new Error("Aborted"));
|
|
412
412
|
const c = [
|
|
413
|
-
H(G(
|
|
413
|
+
H(G(D(import.meta.url)), "embedder.js"),
|
|
414
414
|
this.ai.options.path,
|
|
415
415
|
this.ai.options?.embedder || "bge-small-en-v1.5"
|
|
416
416
|
], d = w("node", c, { stdio: ["pipe", "pipe", "ignore"] });
|
|
417
|
-
d.stdin.write(
|
|
417
|
+
d.stdin.write(i), d.stdin.end();
|
|
418
418
|
let p = "";
|
|
419
419
|
d.stdout.on("data", (f) => p += f.toString()), d.on("close", (f) => {
|
|
420
420
|
if (o) return s(new Error("Aborted"));
|
|
@@ -429,9 +429,9 @@ ${l}` }), e.tools = [{
|
|
|
429
429
|
s(new Error(`Embedder process exited with code ${f}`));
|
|
430
430
|
}), d.on("error", s);
|
|
431
431
|
}), l = (async () => {
|
|
432
|
-
const
|
|
433
|
-
for (let s = 0; s <
|
|
434
|
-
const c =
|
|
432
|
+
const i = this.chunk(r, t, a), u = [];
|
|
433
|
+
for (let s = 0; s < i.length && !o; s++) {
|
|
434
|
+
const c = i[s], d = await m(c);
|
|
435
435
|
u.push({ index: s, embedding: d, text: c, tokens: this.estimateTokens(c) });
|
|
436
436
|
}
|
|
437
437
|
return u;
|
|
@@ -455,7 +455,7 @@ ${l}` }), e.tools = [{
|
|
|
455
455
|
*/
|
|
456
456
|
fuzzyMatch(r, ...e) {
|
|
457
457
|
if (e.length < 2) throw new Error("Requires at least 2 strings to compare");
|
|
458
|
-
const t = (n, m = 10) => n.toLowerCase().split("").map((l,
|
|
458
|
+
const t = (n, m = 10) => n.toLowerCase().split("").map((l, i) => l.charCodeAt(0) * (i + 1) % m / m).slice(0, m), a = t(r), o = e.map((n) => t(n)).map((n) => this.cosineSimilarity(a, n));
|
|
459
459
|
return { avg: o.reduce((n, m) => n + m, 0) / o.length, max: Math.max(...o), similarities: o };
|
|
460
460
|
}
|
|
461
461
|
/**
|
|
@@ -466,61 +466,65 @@ ${l}` }), e.tools = [{
|
|
|
466
466
|
* @returns {Promise<{} | {} | RegExpExecArray | null>}
|
|
467
467
|
*/
|
|
468
468
|
async json(r, e, t) {
|
|
469
|
-
let
|
|
469
|
+
let a = `Your job is to convert input to JSON using tool calls. Call the \`submit\` tool at least once with JSON matching this schema:
|
|
470
470
|
\`\`\`json
|
|
471
471
|
${e}
|
|
472
|
-
|
|
473
|
-
|
|
472
|
+
\`\`\`
|
|
473
|
+
|
|
474
|
+
Responses are ignored`;
|
|
475
|
+
return t?.system && (a += `
|
|
474
476
|
|
|
475
477
|
` + t.system), new Promise(async (o, n) => {
|
|
476
478
|
let m = !1;
|
|
477
479
|
const l = await this.ask(r, {
|
|
478
480
|
temperature: 0.3,
|
|
479
481
|
...t,
|
|
480
|
-
system:
|
|
482
|
+
system: a,
|
|
481
483
|
tools: [{
|
|
482
484
|
name: "submit",
|
|
483
485
|
description: "Submit JSON",
|
|
484
486
|
args: { json: { type: "string", description: "Javascript parsable JSON string", required: !0 } },
|
|
485
|
-
fn: (
|
|
487
|
+
fn: (i) => {
|
|
486
488
|
try {
|
|
487
|
-
const u = JSON.parse(
|
|
489
|
+
const u = JSON.parse(i.json);
|
|
488
490
|
o(u), m = !0;
|
|
489
491
|
} catch {
|
|
490
492
|
return "Invalid JSON";
|
|
491
493
|
}
|
|
492
|
-
return "
|
|
494
|
+
return "Saved";
|
|
493
495
|
}
|
|
494
496
|
}, ...t?.tools || []]
|
|
495
497
|
});
|
|
496
|
-
m || n(`AI failed to create
|
|
498
|
+
m || n(`AI failed to create JSON:
|
|
499
|
+
${l}`);
|
|
497
500
|
});
|
|
498
501
|
}
|
|
499
502
|
/**
|
|
500
503
|
* Create a summary of some text
|
|
501
504
|
* @param {string} text Text to summarize
|
|
502
|
-
* @param {number}
|
|
505
|
+
* @param {number} length Max number of words
|
|
503
506
|
* @param options LLM request options
|
|
504
507
|
* @returns {Promise<string>} Summary
|
|
505
508
|
*/
|
|
506
509
|
async summarize(r, e = 500, t) {
|
|
507
|
-
let
|
|
508
|
-
return t?.system && (
|
|
510
|
+
let a = `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${e} words. The tool call will respond with the token count. Responses are ignored`;
|
|
511
|
+
return t?.system && (a += `
|
|
509
512
|
|
|
510
513
|
` + t.system), new Promise(async (o, n) => {
|
|
511
514
|
let m = !1;
|
|
512
515
|
const l = await this.ask(r, {
|
|
513
516
|
temperature: 0.3,
|
|
514
517
|
...t,
|
|
515
|
-
system:
|
|
518
|
+
system: a,
|
|
516
519
|
tools: [{
|
|
517
520
|
name: "submit",
|
|
518
521
|
description: "Submit summary",
|
|
519
522
|
args: { summary: { type: "string", description: "Text summarization", required: !0 } },
|
|
520
|
-
fn: (
|
|
523
|
+
fn: (i) => i.summary ? i.summary.split(" ").length > e ? `Too long: ${e} words` : (m = !0, o(i.summary || null), `Saved: ${e} words`) : "No summary provided"
|
|
521
524
|
}, ...t?.tools || []]
|
|
522
525
|
});
|
|
523
|
-
m || n(`AI failed to create summary:
|
|
526
|
+
m || n(`AI failed to create summary:
|
|
527
|
+
${l}`);
|
|
524
528
|
});
|
|
525
529
|
}
|
|
526
530
|
}
|
|
@@ -547,7 +551,7 @@ print(json.dumps(segments))
|
|
|
547
551
|
pyannote;
|
|
548
552
|
whisperModel;
|
|
549
553
|
async addPunctuation(r, e, t = 150) {
|
|
550
|
-
const
|
|
554
|
+
const a = (n) => {
|
|
551
555
|
if (n = n.toLowerCase().replace(/[^a-z]/g, ""), n.length <= 3) return 1;
|
|
552
556
|
const m = n.match(/[aeiouy]+/g);
|
|
553
557
|
let l = m ? m.length : 1;
|
|
@@ -556,10 +560,10 @@ print(json.dumps(segments))
|
|
|
556
560
|
let o = "";
|
|
557
561
|
return r.transcription.filter((n, m) => {
|
|
558
562
|
let l = !1;
|
|
559
|
-
const
|
|
560
|
-
return !n.text && u ? (u.offsets.from = n.offsets.from, u.timestamps.from = n.offsets.from) : n.text && n.text[0] != " " &&
|
|
563
|
+
const i = r.transcription[m - 1], u = r.transcription[m + 1];
|
|
564
|
+
return !n.text && u ? (u.offsets.from = n.offsets.from, u.timestamps.from = n.offsets.from) : n.text && n.text[0] != " " && i && (i.offsets.to = n.offsets.to, i.timestamps.to = n.timestamps.to, i.text += n.text, l = !0), !!n.text && !l;
|
|
561
565
|
}).forEach((n) => {
|
|
562
|
-
const m = /^[A-Z]/.test(n.text.trim()), l = n.offsets.to - n.offsets.from, u =
|
|
566
|
+
const m = /^[A-Z]/.test(n.text.trim()), l = n.offsets.to - n.offsets.from, u = a(n.text.trim()) * t;
|
|
563
567
|
m && l > u * 2 && n.text[0] == " " && (o += "."), o += n.text;
|
|
564
568
|
}), e ? this.ai.language.ask(o, {
|
|
565
569
|
system: "Remove any misplaced punctuation from the following ASR transcript using the replace tool. Avoid modifying words unless there is an obvious typo",
|
|
@@ -576,29 +580,29 @@ print(json.dumps(segments))
|
|
|
576
580
|
}).then(() => o) : o.trim();
|
|
577
581
|
}
|
|
578
582
|
async diarizeTranscript(r, e, t) {
|
|
579
|
-
const
|
|
583
|
+
const a = /* @__PURE__ */ new Map();
|
|
580
584
|
let o = 0;
|
|
581
585
|
e.forEach((p) => {
|
|
582
|
-
|
|
586
|
+
a.has(p.speaker) || a.set(p.speaker, ++o);
|
|
583
587
|
});
|
|
584
|
-
const n = await this.addPunctuation(r, t), m = n.match(/[^.!?]+[.!?]+/g) || [n], l = r.transcription.filter((p) => p.text.trim()),
|
|
588
|
+
const n = await this.addPunctuation(r, t), m = n.match(/[^.!?]+[.!?]+/g) || [n], l = r.transcription.filter((p) => p.text.trim()), i = m.map((p) => {
|
|
585
589
|
if (p = p.trim(), !p) return null;
|
|
586
590
|
const f = p.toLowerCase().replace(/[^\w\s]/g, "").split(/\s+/), g = /* @__PURE__ */ new Map();
|
|
587
591
|
f.forEach((x) => {
|
|
588
592
|
const k = l.find((y) => x === y.text.trim().toLowerCase().replace(/[^\w]/g, ""));
|
|
589
593
|
if (!k) return;
|
|
590
|
-
const
|
|
594
|
+
const E = k.offsets.from / 1e3, $ = e.find((y) => E >= y.start && E <= y.end);
|
|
591
595
|
if ($) {
|
|
592
|
-
const y =
|
|
596
|
+
const y = a.get($.speaker);
|
|
593
597
|
g.set(y, (g.get(y) || 0) + 1);
|
|
594
598
|
}
|
|
595
599
|
});
|
|
596
|
-
let T = 1,
|
|
600
|
+
let T = 1, v = 0;
|
|
597
601
|
return g.forEach((x, k) => {
|
|
598
|
-
x >
|
|
602
|
+
x > v && (v = x, T = k);
|
|
599
603
|
}), { speaker: T, text: p };
|
|
600
604
|
}).filter((p) => p !== null), u = [];
|
|
601
|
-
|
|
605
|
+
i.forEach((p) => {
|
|
602
606
|
const f = u[u.length - 1];
|
|
603
607
|
f && f.speaker === p.speaker ? f.text += " " + p.text : u.push({ ...p });
|
|
604
608
|
});
|
|
@@ -616,7 +620,7 @@ print(json.dumps(segments))
|
|
|
616
620
|
}
|
|
617
621
|
runAsr(r, e = {}) {
|
|
618
622
|
let t;
|
|
619
|
-
const
|
|
623
|
+
const a = new Promise((o, n) => {
|
|
620
624
|
this.downloadAsrModel(e.model).then((m) => {
|
|
621
625
|
if (e.diarization) {
|
|
622
626
|
let l = M.join(M.dirname(r), "transcript");
|
|
@@ -624,8 +628,8 @@ print(json.dumps(segments))
|
|
|
624
628
|
this.ai.options.whisper,
|
|
625
629
|
["-m", m, "-f", r, "-np", "-ml", "1", "-oj", "-of", l],
|
|
626
630
|
{ stdio: ["ignore", "ignore", "pipe"] }
|
|
627
|
-
), t.on("error", (
|
|
628
|
-
if (
|
|
631
|
+
), t.on("error", (i) => n(i)), t.on("close", async (i) => {
|
|
632
|
+
if (i === 0) {
|
|
629
633
|
l = await b.readFile(l + ".json", "utf-8"), b.rm(l + ".json").catch(() => {
|
|
630
634
|
});
|
|
631
635
|
try {
|
|
@@ -634,40 +638,40 @@ print(json.dumps(segments))
|
|
|
634
638
|
n(new Error("Failed to parse whisper JSON"));
|
|
635
639
|
}
|
|
636
640
|
} else
|
|
637
|
-
n(new Error(`Exit code ${
|
|
641
|
+
n(new Error(`Exit code ${i}`));
|
|
638
642
|
});
|
|
639
643
|
} else {
|
|
640
644
|
let l = "";
|
|
641
|
-
t = w(this.ai.options.whisper, ["-m", m, "-f", r, "-np", "-nt"]), t.on("error", (
|
|
642
|
-
|
|
645
|
+
t = w(this.ai.options.whisper, ["-m", m, "-f", r, "-np", "-nt"]), t.on("error", (i) => n(i)), t.stdout.on("data", (i) => l += i.toString()), t.on("close", async (i) => {
|
|
646
|
+
i === 0 ? o(l.trim() || null) : n(new Error(`Exit code ${i}`));
|
|
643
647
|
});
|
|
644
648
|
}
|
|
645
649
|
});
|
|
646
650
|
});
|
|
647
|
-
return Object.assign(
|
|
651
|
+
return Object.assign(a, { abort: () => t?.kill("SIGTERM") });
|
|
648
652
|
}
|
|
649
653
|
runDiarization(r) {
|
|
650
654
|
let e = !1, t = () => {
|
|
651
655
|
e = !0;
|
|
652
656
|
};
|
|
653
|
-
const
|
|
657
|
+
const a = (n) => new Promise((m) => {
|
|
654
658
|
const l = w(n, ["-W", "ignore", "-c", "import pyannote.audio"]);
|
|
655
|
-
l.on("close", (
|
|
659
|
+
l.on("close", (i) => m(i === 0)), l.on("error", () => m(!1));
|
|
656
660
|
}), o = Promise.all([
|
|
657
|
-
|
|
658
|
-
|
|
661
|
+
a("python"),
|
|
662
|
+
a("python3")
|
|
659
663
|
]).then((async ([n, m]) => {
|
|
660
664
|
if (e) return;
|
|
661
665
|
if (!n && !m) throw new Error("Pyannote is not installed: pip install pyannote.audio");
|
|
662
666
|
const l = m ? "python3" : "python";
|
|
663
|
-
return new Promise((
|
|
667
|
+
return new Promise((i, u) => {
|
|
664
668
|
if (e) return;
|
|
665
669
|
let s = "";
|
|
666
670
|
const c = w(l, ["-W", "ignore", "-c", this.pyannote, r]);
|
|
667
671
|
c.stdout.on("data", (d) => s += d.toString()), c.stderr.on("data", (d) => console.error(d.toString())), c.on("close", (d) => {
|
|
668
672
|
if (d === 0)
|
|
669
673
|
try {
|
|
670
|
-
|
|
674
|
+
i(JSON.parse(s));
|
|
671
675
|
} catch {
|
|
672
676
|
u(new Error("Failed to parse diarization output"));
|
|
673
677
|
}
|
|
@@ -680,23 +684,23 @@ print(json.dumps(segments))
|
|
|
680
684
|
}
|
|
681
685
|
asr(r, e = {}) {
|
|
682
686
|
if (!this.ai.options.whisper) throw new Error("Whisper not configured");
|
|
683
|
-
const t = A(Y(A(
|
|
687
|
+
const t = A(Y(A(N(), "audio-")), "converted.wav");
|
|
684
688
|
K(`ffmpeg -i "${r}" -ar 16000 -ac 1 -f wav "${t}"`, { stdio: "ignore" });
|
|
685
|
-
const
|
|
689
|
+
const a = () => b.rm(q.dirname(t), { recursive: !0, force: !0 }).catch(() => {
|
|
686
690
|
});
|
|
687
691
|
if (!e.diarization) return this.runAsr(t, { model: e.model });
|
|
688
692
|
const o = this.runAsr(t, { model: e.model, diarization: !0 }), n = this.runDiarization(t);
|
|
689
693
|
let m = !1, l = () => {
|
|
690
|
-
m = !0, o.abort(), n.abort(),
|
|
694
|
+
m = !0, o.abort(), n.abort(), a();
|
|
691
695
|
};
|
|
692
|
-
const
|
|
696
|
+
const i = Promise.allSettled([o, n]).then(async ([u, s]) => {
|
|
693
697
|
if (u.status == "rejected") throw new Error(`Whisper.cpp timestamps:
|
|
694
698
|
` + u.reason);
|
|
695
699
|
if (s.status == "rejected") throw new Error(`Pyannote:
|
|
696
700
|
` + s.reason);
|
|
697
701
|
return m || !e.diarization ? u.value : this.diarizeTranscript(u.value, s.value, e.diarization == "llm");
|
|
698
|
-
}).finally(() =>
|
|
699
|
-
return Object.assign(
|
|
702
|
+
}).finally(() => a());
|
|
703
|
+
return Object.assign(i, { abort: l });
|
|
700
704
|
}
|
|
701
705
|
async downloadAsrModel(r = this.whisperModel) {
|
|
702
706
|
if (!this.ai.options.whisper) throw new Error("Whisper not configured");
|
|
@@ -716,10 +720,10 @@ class te {
|
|
|
716
720
|
*/
|
|
717
721
|
ocr(r) {
|
|
718
722
|
let e;
|
|
719
|
-
const t = new Promise(async (
|
|
723
|
+
const t = new Promise(async (a) => {
|
|
720
724
|
e = await V(this.ai.options.ocr || "eng", 2, { cachePath: this.ai.options.path });
|
|
721
725
|
const { data: o } = await e.recognize(r);
|
|
722
|
-
await e.terminate(),
|
|
726
|
+
await e.terminate(), a(o.text.trim() || null);
|
|
723
727
|
});
|
|
724
728
|
return Object.assign(t, { abort: () => e?.terminate() });
|
|
725
729
|
}
|
|
@@ -818,26 +822,26 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
818
822
|
redirect: "follow"
|
|
819
823
|
}).catch((s) => {
|
|
820
824
|
throw new Error(`Failed to fetch: ${s.message}`);
|
|
821
|
-
}), t = e.headers.get("content-type") || "",
|
|
822
|
-
if (t.match(/charset=([^;]+)/)?.[1], h.mimeRegex && !new RegExp(h.mimeRegex, "i").test(
|
|
823
|
-
return { url: h.url, error: "MIME type rejected", mimeType:
|
|
824
|
-
if (
|
|
825
|
+
}), t = e.headers.get("content-type") || "", a = t.split(";")[0].trim().toLowerCase();
|
|
826
|
+
if (t.match(/charset=([^;]+)/)?.[1], h.mimeRegex && !new RegExp(h.mimeRegex, "i").test(a))
|
|
827
|
+
return { url: h.url, error: "MIME type rejected", mimeType: a, filter: h.mimeRegex };
|
|
828
|
+
if (a.startsWith("image/") || a.startsWith("audio/") || a.startsWith("video/")) {
|
|
825
829
|
const s = await e.arrayBuffer();
|
|
826
830
|
if (s.byteLength > 10485760)
|
|
827
|
-
return { url: h.url, type: "media", mimeType:
|
|
831
|
+
return { url: h.url, type: "media", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
828
832
|
const c = Buffer.from(s).toString("base64");
|
|
829
|
-
return { url: h.url, type: "media", mimeType:
|
|
833
|
+
return { url: h.url, type: "media", mimeType: a, dataUrl: `data:${a};base64,${c}`, size: s.byteLength };
|
|
830
834
|
}
|
|
831
|
-
if (
|
|
835
|
+
if (a.match(/^(text\/(plain|csv|xml)|application\/(json|xml|csv|x-yaml))/) || h.url.match(/\.(txt|json|xml|csv|yaml|yml|md)$/i)) {
|
|
832
836
|
const s = await e.text();
|
|
833
|
-
return { url: h.url, type: "text", mimeType:
|
|
837
|
+
return { url: h.url, type: "text", mimeType: a, content: s.slice(0, 1e5) };
|
|
834
838
|
}
|
|
835
|
-
if (
|
|
839
|
+
if (a === "application/pdf" || a.startsWith("application/") && !a.includes("html")) {
|
|
836
840
|
const s = await e.arrayBuffer();
|
|
837
841
|
if (s.byteLength > 10485760)
|
|
838
|
-
return { url: h.url, type: "binary", mimeType:
|
|
842
|
+
return { url: h.url, type: "binary", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
839
843
|
const c = Buffer.from(s).toString("base64");
|
|
840
|
-
return { url: h.url, type: "binary", mimeType:
|
|
844
|
+
return { url: h.url, type: "binary", mimeType: a, dataUrl: `data:${a};base64,${c}`, size: s.byteLength };
|
|
841
845
|
}
|
|
842
846
|
const o = await e.text(), n = Z.load(o);
|
|
843
847
|
n('script, style, nav, footer, header, aside, iframe, noscript, svg, [role="navigation"], [role="banner"], [role="complementary"], .ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
|
|
@@ -849,8 +853,8 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
849
853
|
image: n('meta[property="og:image"]').attr("content") || ""
|
|
850
854
|
};
|
|
851
855
|
let l = "";
|
|
852
|
-
const
|
|
853
|
-
for (const s of
|
|
856
|
+
const i = ["article", "main", '[role="main"]', ".content", ".post-content", ".entry-content", ".article-content", "body"];
|
|
857
|
+
for (const s of i) {
|
|
854
858
|
const c = n(s).first();
|
|
855
859
|
if (c.length && c.text().trim().length > 200) {
|
|
856
860
|
l = c.text();
|
|
@@ -887,12 +891,12 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
887
891
|
headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9" }
|
|
888
892
|
}).then((o) => o.text());
|
|
889
893
|
let e, t = /<a .*?href="(.+?)".+?<\/a>/g;
|
|
890
|
-
const
|
|
894
|
+
const a = new F();
|
|
891
895
|
for (; (e = t.exec(r)) !== null; ) {
|
|
892
896
|
let o = /uddg=(.+)&?/.exec(decodeURIComponent(e[1]))?.[1];
|
|
893
|
-
if (o && (o = decodeURIComponent(o)), o &&
|
|
897
|
+
if (o && (o = decodeURIComponent(o)), o && a.add(o), a.size >= (h.length || 5)) break;
|
|
894
898
|
}
|
|
895
|
-
return
|
|
899
|
+
return a;
|
|
896
900
|
}
|
|
897
901
|
};
|
|
898
902
|
export {
|