@ztimson/ai-utils 0.8.14 → 0.8.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +24 -24
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +168 -172
- package/dist/index.mjs.map +1 -1
- package/dist/llm.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -33,8 +33,8 @@ class Q extends L {
|
|
|
33
33
|
if (n.type == "tool_use")
|
|
34
34
|
t.push({ timestamp: e, role: "tool", id: n.id, name: n.name, args: n.input, content: void 0 });
|
|
35
35
|
else if (n.type == "tool_result") {
|
|
36
|
-
const
|
|
37
|
-
|
|
36
|
+
const m = t.findLast((l) => l.id == n.tool_use_id);
|
|
37
|
+
m && (m[n.is_error ? "error" : "content"] = n.content);
|
|
38
38
|
}
|
|
39
39
|
});
|
|
40
40
|
}
|
|
@@ -57,27 +57,27 @@ class Q extends L {
|
|
|
57
57
|
const t = new AbortController();
|
|
58
58
|
return Object.assign(new Promise(async (a) => {
|
|
59
59
|
let o = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
60
|
-
const n = e.tools || this.ai.options.llm?.tools || [],
|
|
60
|
+
const n = e.tools || this.ai.options.llm?.tools || [], m = {
|
|
61
61
|
model: e.model || this.model,
|
|
62
62
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
63
63
|
system: e.system || this.ai.options.llm?.system || "",
|
|
64
64
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
65
|
-
tools: n.map((
|
|
66
|
-
name:
|
|
67
|
-
description:
|
|
65
|
+
tools: n.map((u) => ({
|
|
66
|
+
name: u.name,
|
|
67
|
+
description: u.description,
|
|
68
68
|
input_schema: {
|
|
69
69
|
type: "object",
|
|
70
|
-
properties:
|
|
71
|
-
required:
|
|
70
|
+
properties: u.args ? z(u.args, (s, c) => ({ ...c, required: void 0 })) : {},
|
|
71
|
+
required: u.args ? Object.entries(u.args).filter((s) => s[1].required).map((s) => s[0]) : []
|
|
72
72
|
},
|
|
73
73
|
fn: void 0
|
|
74
74
|
})),
|
|
75
75
|
messages: o,
|
|
76
76
|
stream: !!e.stream
|
|
77
77
|
};
|
|
78
|
-
let
|
|
78
|
+
let l, i = !0;
|
|
79
79
|
do {
|
|
80
|
-
if (
|
|
80
|
+
if (l = await this.client.messages.create(m).catch((s) => {
|
|
81
81
|
throw s.message += `
|
|
82
82
|
|
|
83
83
|
Messages:
|
|
@@ -85,40 +85,40 @@ ${JSON.stringify(o, null, 2)}`, s;
|
|
|
85
85
|
}), e.stream) {
|
|
86
86
|
i ? i = !1 : e.stream({ text: `
|
|
87
87
|
|
|
88
|
-
` }),
|
|
89
|
-
for await (const s of
|
|
88
|
+
` }), l.content = [];
|
|
89
|
+
for await (const s of l) {
|
|
90
90
|
if (t.signal.aborted) break;
|
|
91
91
|
if (s.type === "content_block_start")
|
|
92
|
-
s.content_block.type === "text" ?
|
|
92
|
+
s.content_block.type === "text" ? l.content.push({ type: "text", text: "" }) : s.content_block.type === "tool_use" && l.content.push({ type: "tool_use", id: s.content_block.id, name: s.content_block.name, input: "" });
|
|
93
93
|
else if (s.type === "content_block_delta")
|
|
94
94
|
if (s.delta.type === "text_delta") {
|
|
95
|
-
const
|
|
96
|
-
|
|
97
|
-
} else s.delta.type === "input_json_delta" && (
|
|
95
|
+
const c = s.delta.text;
|
|
96
|
+
l.content.at(-1).text += c, e.stream({ text: c });
|
|
97
|
+
} else s.delta.type === "input_json_delta" && (l.content.at(-1).input += s.delta.partial_json);
|
|
98
98
|
else if (s.type === "content_block_stop") {
|
|
99
|
-
const
|
|
100
|
-
|
|
99
|
+
const c = l.content.at(-1);
|
|
100
|
+
c.input != null && (c.input = c.input ? S(c.input, {}) : {});
|
|
101
101
|
} else if (s.type === "message_stop")
|
|
102
102
|
break;
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
|
-
const
|
|
106
|
-
if (
|
|
107
|
-
o.push({ role: "assistant", content:
|
|
108
|
-
const s = await Promise.all(
|
|
109
|
-
const d = n.find(R("name",
|
|
110
|
-
if (e.stream && e.stream({ tool:
|
|
105
|
+
const u = l.content.filter((s) => s.type === "tool_use");
|
|
106
|
+
if (u.length && !t.signal.aborted) {
|
|
107
|
+
o.push({ role: "assistant", content: l.content });
|
|
108
|
+
const s = await Promise.all(u.map(async (c) => {
|
|
109
|
+
const d = n.find(R("name", c.name));
|
|
110
|
+
if (e.stream && e.stream({ tool: c.name }), !d) return { tool_use_id: c.id, is_error: !0, content: "Tool not found" };
|
|
111
111
|
try {
|
|
112
|
-
const p = await d.fn(
|
|
113
|
-
return { type: "tool_result", tool_use_id:
|
|
112
|
+
const p = await d.fn(c.input, e?.stream, this.ai);
|
|
113
|
+
return { type: "tool_result", tool_use_id: c.id, content: _(p) };
|
|
114
114
|
} catch (p) {
|
|
115
|
-
return { type: "tool_result", tool_use_id:
|
|
115
|
+
return { type: "tool_result", tool_use_id: c.id, is_error: !0, content: p?.message || p?.toString() || "Unknown" };
|
|
116
116
|
}
|
|
117
117
|
}));
|
|
118
|
-
o.push({ role: "user", content: s }),
|
|
118
|
+
o.push({ role: "user", content: s }), m.messages = o;
|
|
119
119
|
}
|
|
120
|
-
} while (!t.signal.aborted &&
|
|
121
|
-
o.push({ role: "assistant", content:
|
|
120
|
+
} while (!t.signal.aborted && l.content.some((u) => u.type === "tool_use"));
|
|
121
|
+
o.push({ role: "assistant", content: l.content.filter((u) => u.type == "text").map((u) => u.text).join(`
|
|
122
122
|
|
|
123
123
|
`) }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o), a(o.at(-1)?.content);
|
|
124
124
|
}), { abort: () => t.abort() });
|
|
@@ -178,40 +178,40 @@ class P extends L {
|
|
|
178
178
|
return Object.assign(new Promise(async (a, o) => {
|
|
179
179
|
e.system && (e.history?.[0]?.role != "system" ? e.history?.splice(0, 0, { role: "system", content: e.system, timestamp: Date.now() }) : e.history[0].content = e.system);
|
|
180
180
|
let n = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
181
|
-
const
|
|
181
|
+
const m = e.tools || this.ai.options.llm?.tools || [], l = {
|
|
182
182
|
model: e.model || this.model,
|
|
183
183
|
messages: n,
|
|
184
184
|
stream: !!e.stream,
|
|
185
185
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
186
186
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
187
|
-
tools:
|
|
187
|
+
tools: m.map((s) => ({
|
|
188
188
|
type: "function",
|
|
189
189
|
function: {
|
|
190
190
|
name: s.name,
|
|
191
191
|
description: s.description,
|
|
192
192
|
parameters: {
|
|
193
193
|
type: "object",
|
|
194
|
-
properties: s.args ? z(s.args, (
|
|
195
|
-
required: s.args ? Object.entries(s.args).filter((
|
|
194
|
+
properties: s.args ? z(s.args, (c, d) => ({ ...d, required: void 0 })) : {},
|
|
195
|
+
required: s.args ? Object.entries(s.args).filter((c) => c[1].required).map((c) => c[0]) : []
|
|
196
196
|
}
|
|
197
197
|
}
|
|
198
198
|
}))
|
|
199
199
|
};
|
|
200
|
-
let i,
|
|
200
|
+
let i, u = !0;
|
|
201
201
|
do {
|
|
202
|
-
if (i = await this.client.chat.completions.create(
|
|
203
|
-
throw
|
|
202
|
+
if (i = await this.client.chat.completions.create(l).catch((c) => {
|
|
203
|
+
throw c.message += `
|
|
204
204
|
|
|
205
205
|
Messages:
|
|
206
|
-
${JSON.stringify(n, null, 2)}`,
|
|
206
|
+
${JSON.stringify(n, null, 2)}`, c;
|
|
207
207
|
}), e.stream) {
|
|
208
|
-
|
|
208
|
+
u ? u = !1 : e.stream({ text: `
|
|
209
209
|
|
|
210
210
|
` }), i.choices = [{ message: { role: "assistant", content: "", tool_calls: [] } }];
|
|
211
|
-
for await (const
|
|
211
|
+
for await (const c of i) {
|
|
212
212
|
if (t.signal.aborted) break;
|
|
213
|
-
if (
|
|
214
|
-
for (const d of
|
|
213
|
+
if (c.choices[0].delta.content && (i.choices[0].message.content += c.choices[0].delta.content, e.stream({ text: c.choices[0].delta.content })), c.choices[0].delta.tool_calls)
|
|
214
|
+
for (const d of c.choices[0].delta.tool_calls) {
|
|
215
215
|
const p = i.choices[0].message.tool_calls.find((f) => f.index === d.index);
|
|
216
216
|
p ? (d.id && (p.id = d.id), d.type && (p.type = d.type), d.function && (p.function || (p.function = {}), d.function.name && (p.function.name = d.function.name), d.function.arguments && (p.function.arguments = (p.function.arguments || "") + d.function.arguments))) : i.choices[0].message.tool_calls.push({
|
|
217
217
|
index: d.index,
|
|
@@ -228,8 +228,8 @@ ${JSON.stringify(n, null, 2)}`, l;
|
|
|
228
228
|
const s = i.choices[0].message.tool_calls || [];
|
|
229
229
|
if (s.length && !t.signal.aborted) {
|
|
230
230
|
n.push(i.choices[0].message);
|
|
231
|
-
const
|
|
232
|
-
const p =
|
|
231
|
+
const c = await Promise.all(s.map(async (d) => {
|
|
232
|
+
const p = m?.find(R("name", d.function.name));
|
|
233
233
|
if (e.stream && e.stream({ tool: d.function.name }), !p) return { role: "tool", tool_call_id: d.id, content: '{"error": "Tool not found"}' };
|
|
234
234
|
try {
|
|
235
235
|
const f = S(d.function.arguments, {}), g = await p.fn(f, e.stream, this.ai);
|
|
@@ -238,7 +238,7 @@ ${JSON.stringify(n, null, 2)}`, l;
|
|
|
238
238
|
return { role: "tool", tool_call_id: d.id, content: _({ error: f?.message || f?.toString() || "Unknown" }) };
|
|
239
239
|
}
|
|
240
240
|
}));
|
|
241
|
-
n.push(...
|
|
241
|
+
n.push(...c), l.messages = n;
|
|
242
242
|
}
|
|
243
243
|
} while (!t.signal.aborted && i.choices?.[0]?.message?.tool_calls?.length);
|
|
244
244
|
n.push({ role: "assistant", content: i.choices[0].message.content || "" }), n = this.toStandard(n), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...n), a(n.at(-1)?.content);
|
|
@@ -274,13 +274,13 @@ class X {
|
|
|
274
274
|
};
|
|
275
275
|
return Object.assign(new Promise(async (o) => {
|
|
276
276
|
if (e.history || (e.history = []), e.memory) {
|
|
277
|
-
const
|
|
278
|
-
const [
|
|
279
|
-
|
|
277
|
+
const m = async (i, u, s = 10) => {
|
|
278
|
+
const [c, d] = await Promise.all([
|
|
279
|
+
u ? this.embedding(u) : Promise.resolve(null),
|
|
280
280
|
i ? this.embedding(i) : Promise.resolve(null)
|
|
281
281
|
]);
|
|
282
282
|
return (e.memory || []).map((p) => {
|
|
283
|
-
const f = (
|
|
283
|
+
const f = (c ? this.cosineSimilarity(p.embeddings[0], c[0].embedding) : 0) + (d ? this.cosineSimilarity(p.embeddings[1], d[0].embedding) : 0);
|
|
284
284
|
return { ...p, score: f };
|
|
285
285
|
}).toSorted((p, f) => p.score - f.score).slice(0, s).map((p) => `- ${p.owner}: ${p.fact}`).join(`
|
|
286
286
|
`);
|
|
@@ -288,9 +288,9 @@ class X {
|
|
|
288
288
|
e.system += `
|
|
289
289
|
You have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.
|
|
290
290
|
`;
|
|
291
|
-
const
|
|
292
|
-
|
|
293
|
-
${
|
|
291
|
+
const l = await m(r);
|
|
292
|
+
l.length && e.history.push({ role: "tool", name: "recall", id: "auto_recall_" + Math.random().toString(), args: {}, content: `Things I remembered:
|
|
293
|
+
${l}` }), e.tools = [{
|
|
294
294
|
name: "recall",
|
|
295
295
|
description: "Recall the closest memories you have regarding a query using RAG",
|
|
296
296
|
args: {
|
|
@@ -300,7 +300,7 @@ ${m}` }), e.tools = [{
|
|
|
300
300
|
},
|
|
301
301
|
fn: (i) => {
|
|
302
302
|
if (!i.subject && !i.query) throw new Error("Either a subject or query argument is required");
|
|
303
|
-
return
|
|
303
|
+
return m(i.query, i.subject, i.topK);
|
|
304
304
|
}
|
|
305
305
|
}, {
|
|
306
306
|
name: "remember",
|
|
@@ -311,18 +311,18 @@ ${m}` }), e.tools = [{
|
|
|
311
311
|
},
|
|
312
312
|
fn: async (i) => {
|
|
313
313
|
if (!e.memory) return;
|
|
314
|
-
const
|
|
314
|
+
const u = await Promise.all([
|
|
315
315
|
this.embedding(i.owner),
|
|
316
316
|
this.embedding(`${i.owner}: ${i.fact}`)
|
|
317
|
-
]), s = { owner: i.owner, fact: i.fact, embeddings: [
|
|
318
|
-
return e.memory.splice(0, e.memory.length, ...e.memory.filter((
|
|
317
|
+
]), s = { owner: i.owner, fact: i.fact, embeddings: [u[0][0].embedding, u[1][0].embedding] };
|
|
318
|
+
return e.memory.splice(0, e.memory.length, ...e.memory.filter((c) => !(this.cosineSimilarity(s.embeddings[0], c.embeddings[0]) >= 0.9 && this.cosineSimilarity(s.embeddings[1], c.embeddings[1]) >= 0.8)), s), "Remembered!";
|
|
319
319
|
}
|
|
320
320
|
}, ...e.tools || []];
|
|
321
321
|
}
|
|
322
322
|
const n = await this.models[t].ask(r, e);
|
|
323
|
-
if (e.memory && e.history.splice(0, e.history.length, ...e.history.filter((
|
|
324
|
-
const
|
|
325
|
-
e.history.splice(0, e.history.length, ...
|
|
323
|
+
if (e.memory && e.history.splice(0, e.history.length, ...e.history.filter((m) => m.role != "tool" || m.name != "recall" && m.name != "remember")), e.compress) {
|
|
324
|
+
const m = await this.ai.language.compressHistory(e.history, e.compress.max, e.compress.min, e);
|
|
325
|
+
e.history.splice(0, e.history.length, ...m);
|
|
326
326
|
}
|
|
327
327
|
return o(n);
|
|
328
328
|
}), { abort: a });
|
|
@@ -350,10 +350,10 @@ ${m}` }), e.tools = [{
|
|
|
350
350
|
if (n += this.estimateTokens(d.content), n < t) o++;
|
|
351
351
|
else break;
|
|
352
352
|
if (r.length <= o) return r;
|
|
353
|
-
const
|
|
353
|
+
const m = r[0].role == "system" ? r[0] : null, l = o == 0 ? [] : r.slice(-o), i = (o == 0 ? r : r.slice(0, -o)).filter((d) => d.role === "assistant" || d.role === "user"), u = await this.summarize(i.map((d) => `[${d.role}]: ${d.content}`).join(`
|
|
354
354
|
|
|
355
|
-
`), 500, a), s = Date.now(),
|
|
356
|
-
return
|
|
355
|
+
`), 500, a), s = Date.now(), c = [{ role: "tool", name: "summary", id: "summary_" + s, args: {}, content: `Conversation Summary: ${u?.summary}`, timestamp: s }, ...l];
|
|
356
|
+
return m && c.splice(0, 0, m), c;
|
|
357
357
|
}
|
|
358
358
|
/**
|
|
359
359
|
* Compare the difference between embeddings (calculates the angle between two vectors)
|
|
@@ -364,8 +364,8 @@ ${m}` }), e.tools = [{
|
|
|
364
364
|
cosineSimilarity(r, e) {
|
|
365
365
|
if (r.length !== e.length) throw new Error("Vectors must be same length");
|
|
366
366
|
let t = 0, a = 0, o = 0;
|
|
367
|
-
for (let
|
|
368
|
-
t += r[
|
|
367
|
+
for (let m = 0; m < r.length; m++)
|
|
368
|
+
t += r[m] * e[m], a += r[m] * r[m], o += e[m] * e[m];
|
|
369
369
|
const n = Math.sqrt(a) * Math.sqrt(o);
|
|
370
370
|
return n === 0 ? 0 : t / n;
|
|
371
371
|
}
|
|
@@ -377,25 +377,25 @@ ${m}` }), e.tools = [{
|
|
|
377
377
|
* @returns {string[]} Chunked strings
|
|
378
378
|
*/
|
|
379
379
|
chunk(r, e = 500, t = 50) {
|
|
380
|
-
const a = (
|
|
381
|
-
const
|
|
382
|
-
return typeof s == "object" && !Array.isArray(s) ? a(s,
|
|
380
|
+
const a = (l, i = "") => l ? Object.entries(l).flatMap(([u, s]) => {
|
|
381
|
+
const c = i ? `${i}${isNaN(+u) ? `.${u}` : `[${u}]`}` : u;
|
|
382
|
+
return typeof s == "object" && !Array.isArray(s) ? a(s, c) : `${c}: ${Array.isArray(s) ? s.join(", ") : s}`;
|
|
383
383
|
}) : [], n = (typeof r == "object" ? a(r) : r.toString().split(`
|
|
384
|
-
`)).flatMap((
|
|
385
|
-
`]),
|
|
386
|
-
for (let
|
|
387
|
-
let i = "",
|
|
388
|
-
for (;
|
|
389
|
-
const
|
|
390
|
-
if (this.estimateTokens(
|
|
384
|
+
`)).flatMap((l) => [...l.split(/\s+/).filter(Boolean), `
|
|
385
|
+
`]), m = [];
|
|
386
|
+
for (let l = 0; l < n.length; ) {
|
|
387
|
+
let i = "", u = l;
|
|
388
|
+
for (; u < n.length; ) {
|
|
389
|
+
const c = i + (i ? " " : "") + n[u];
|
|
390
|
+
if (this.estimateTokens(c.replace(/\s*\n\s*/g, `
|
|
391
391
|
`)) > e && i) break;
|
|
392
|
-
i =
|
|
392
|
+
i = c, u++;
|
|
393
393
|
}
|
|
394
394
|
const s = i.replace(/\s*\n\s*/g, `
|
|
395
395
|
`).trim();
|
|
396
|
-
s &&
|
|
396
|
+
s && m.push(s), l = Math.max(u - t, u === l ? l + 1 : u);
|
|
397
397
|
}
|
|
398
|
-
return
|
|
398
|
+
return m;
|
|
399
399
|
}
|
|
400
400
|
/**
|
|
401
401
|
* Create a vector representation of a string
|
|
@@ -407,13 +407,13 @@ ${m}` }), e.tools = [{
|
|
|
407
407
|
let { maxTokens: t = 500, overlapTokens: a = 50 } = e, o = !1;
|
|
408
408
|
const n = () => {
|
|
409
409
|
o = !0;
|
|
410
|
-
},
|
|
410
|
+
}, m = (i) => new Promise((u, s) => {
|
|
411
411
|
if (o) return s(new Error("Aborted"));
|
|
412
|
-
const
|
|
412
|
+
const c = [
|
|
413
413
|
H(G(D(import.meta.url)), "embedder.js"),
|
|
414
414
|
this.ai.options.path,
|
|
415
415
|
this.ai.options?.embedder || "bge-small-en-v1.5"
|
|
416
|
-
], d = w("node",
|
|
416
|
+
], d = w("node", c, { stdio: ["pipe", "pipe", "ignore"] });
|
|
417
417
|
d.stdin.write(i), d.stdin.end();
|
|
418
418
|
let p = "";
|
|
419
419
|
d.stdout.on("data", (f) => p += f.toString()), d.on("close", (f) => {
|
|
@@ -421,22 +421,22 @@ ${m}` }), e.tools = [{
|
|
|
421
421
|
if (f === 0)
|
|
422
422
|
try {
|
|
423
423
|
const g = JSON.parse(p);
|
|
424
|
-
|
|
424
|
+
u(g.embedding);
|
|
425
425
|
} catch {
|
|
426
426
|
s(new Error("Failed to parse embedding output"));
|
|
427
427
|
}
|
|
428
428
|
else
|
|
429
429
|
s(new Error(`Embedder process exited with code ${f}`));
|
|
430
430
|
}), d.on("error", s);
|
|
431
|
-
}),
|
|
432
|
-
const i = this.chunk(r, t, a),
|
|
431
|
+
}), l = (async () => {
|
|
432
|
+
const i = this.chunk(r, t, a), u = [];
|
|
433
433
|
for (let s = 0; s < i.length && !o; s++) {
|
|
434
|
-
const
|
|
435
|
-
|
|
434
|
+
const c = i[s], d = await m(c);
|
|
435
|
+
u.push({ index: s, embedding: d, text: c, tokens: this.estimateTokens(c) });
|
|
436
436
|
}
|
|
437
|
-
return
|
|
437
|
+
return u;
|
|
438
438
|
})();
|
|
439
|
-
return Object.assign(
|
|
439
|
+
return Object.assign(l, { abort: n });
|
|
440
440
|
}
|
|
441
441
|
/**
|
|
442
442
|
* Estimate variable as tokens
|
|
@@ -455,8 +455,8 @@ ${m}` }), e.tools = [{
|
|
|
455
455
|
*/
|
|
456
456
|
fuzzyMatch(r, ...e) {
|
|
457
457
|
if (e.length < 2) throw new Error("Requires at least 2 strings to compare");
|
|
458
|
-
const t = (n,
|
|
459
|
-
return { avg: o.reduce((n,
|
|
458
|
+
const t = (n, m = 10) => n.toLowerCase().split("").map((l, i) => l.charCodeAt(0) * (i + 1) % m / m).slice(0, m), a = t(r), o = e.map((n) => t(n)).map((n) => this.cosineSimilarity(a, n));
|
|
459
|
+
return { avg: o.reduce((n, m) => n + m, 0) / o.length, max: Math.max(...o), similarities: o };
|
|
460
460
|
}
|
|
461
461
|
/**
|
|
462
462
|
* Ask a question with JSON response
|
|
@@ -475,8 +475,8 @@ Responses are ignored`;
|
|
|
475
475
|
return t?.system && (a += `
|
|
476
476
|
|
|
477
477
|
` + t.system), new Promise(async (o, n) => {
|
|
478
|
-
let
|
|
479
|
-
const
|
|
478
|
+
let m = !1;
|
|
479
|
+
const l = await this.ask(r, {
|
|
480
480
|
temperature: 0.3,
|
|
481
481
|
...t,
|
|
482
482
|
system: a,
|
|
@@ -486,8 +486,8 @@ Responses are ignored`;
|
|
|
486
486
|
args: { json: { type: "string", description: "Javascript parsable JSON string", required: !0 } },
|
|
487
487
|
fn: (i) => {
|
|
488
488
|
try {
|
|
489
|
-
const
|
|
490
|
-
o(
|
|
489
|
+
const u = JSON.parse(i.json);
|
|
490
|
+
o(u), m = !0;
|
|
491
491
|
} catch {
|
|
492
492
|
return "Invalid JSON";
|
|
493
493
|
}
|
|
@@ -495,24 +495,24 @@ Responses are ignored`;
|
|
|
495
495
|
}
|
|
496
496
|
}, ...t?.tools || []]
|
|
497
497
|
});
|
|
498
|
-
|
|
499
|
-
${
|
|
498
|
+
m || n(`AI failed to create JSON:
|
|
499
|
+
${l}`);
|
|
500
500
|
});
|
|
501
501
|
}
|
|
502
502
|
/**
|
|
503
503
|
* Create a summary of some text
|
|
504
504
|
* @param {string} text Text to summarize
|
|
505
|
-
* @param {number}
|
|
505
|
+
* @param {number} length Max number of words
|
|
506
506
|
* @param options LLM request options
|
|
507
507
|
* @returns {Promise<string>} Summary
|
|
508
508
|
*/
|
|
509
509
|
async summarize(r, e = 500, t) {
|
|
510
|
-
let a = `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${e}
|
|
510
|
+
let a = `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${e} words. The tool call will respond with the token count. Responses are ignored`;
|
|
511
511
|
return t?.system && (a += `
|
|
512
512
|
|
|
513
513
|
` + t.system), new Promise(async (o, n) => {
|
|
514
|
-
let
|
|
515
|
-
const
|
|
514
|
+
let m = !1;
|
|
515
|
+
const l = await this.ask(r, {
|
|
516
516
|
temperature: 0.3,
|
|
517
517
|
...t,
|
|
518
518
|
system: a,
|
|
@@ -520,15 +520,11 @@ ${m}`);
|
|
|
520
520
|
name: "submit",
|
|
521
521
|
description: "Submit summary",
|
|
522
522
|
args: { summary: { type: "string", description: "Text summarization", required: !0 } },
|
|
523
|
-
fn: (i) => {
|
|
524
|
-
if (!i.summary) return "No summary provided";
|
|
525
|
-
const c = this.estimateTokens(i.summary);
|
|
526
|
-
return c > e ? `Summary is too long (${c} tokens)` : (u = !0, o(i.summary || null), `Saved (${c} tokens)`);
|
|
527
|
-
}
|
|
523
|
+
fn: (i) => i.summary ? i.summary.split(" ").length > e ? `Too long: ${e} words` : (m = !0, o(i.summary || null), `Saved: ${e} words`) : "No summary provided"
|
|
528
524
|
}, ...t?.tools || []]
|
|
529
525
|
});
|
|
530
|
-
|
|
531
|
-
${
|
|
526
|
+
m || n(`AI failed to create summary:
|
|
527
|
+
${l}`);
|
|
532
528
|
});
|
|
533
529
|
}
|
|
534
530
|
}
|
|
@@ -557,18 +553,18 @@ print(json.dumps(segments))
|
|
|
557
553
|
async addPunctuation(r, e, t = 150) {
|
|
558
554
|
const a = (n) => {
|
|
559
555
|
if (n = n.toLowerCase().replace(/[^a-z]/g, ""), n.length <= 3) return 1;
|
|
560
|
-
const
|
|
561
|
-
let
|
|
562
|
-
return n.endsWith("e") &&
|
|
556
|
+
const m = n.match(/[aeiouy]+/g);
|
|
557
|
+
let l = m ? m.length : 1;
|
|
558
|
+
return n.endsWith("e") && l--, Math.max(1, l);
|
|
563
559
|
};
|
|
564
560
|
let o = "";
|
|
565
|
-
return r.transcription.filter((n,
|
|
566
|
-
let
|
|
567
|
-
const i = r.transcription[
|
|
568
|
-
return !n.text &&
|
|
561
|
+
return r.transcription.filter((n, m) => {
|
|
562
|
+
let l = !1;
|
|
563
|
+
const i = r.transcription[m - 1], u = r.transcription[m + 1];
|
|
564
|
+
return !n.text && u ? (u.offsets.from = n.offsets.from, u.timestamps.from = n.offsets.from) : n.text && n.text[0] != " " && i && (i.offsets.to = n.offsets.to, i.timestamps.to = n.timestamps.to, i.text += n.text, l = !0), !!n.text && !l;
|
|
569
565
|
}).forEach((n) => {
|
|
570
|
-
const
|
|
571
|
-
|
|
566
|
+
const m = /^[A-Z]/.test(n.text.trim()), l = n.offsets.to - n.offsets.from, u = a(n.text.trim()) * t;
|
|
567
|
+
m && l > u * 2 && n.text[0] == " " && (o += "."), o += n.text;
|
|
572
568
|
}), e ? this.ai.language.ask(o, {
|
|
573
569
|
system: "Remove any misplaced punctuation from the following ASR transcript using the replace tool. Avoid modifying words unless there is an obvious typo",
|
|
574
570
|
temperature: 0.1,
|
|
@@ -589,11 +585,11 @@ print(json.dumps(segments))
|
|
|
589
585
|
e.forEach((p) => {
|
|
590
586
|
a.has(p.speaker) || a.set(p.speaker, ++o);
|
|
591
587
|
});
|
|
592
|
-
const n = await this.addPunctuation(r, t),
|
|
588
|
+
const n = await this.addPunctuation(r, t), m = n.match(/[^.!?]+[.!?]+/g) || [n], l = r.transcription.filter((p) => p.text.trim()), i = m.map((p) => {
|
|
593
589
|
if (p = p.trim(), !p) return null;
|
|
594
590
|
const f = p.toLowerCase().replace(/[^\w\s]/g, "").split(/\s+/), g = /* @__PURE__ */ new Map();
|
|
595
591
|
f.forEach((x) => {
|
|
596
|
-
const k =
|
|
592
|
+
const k = l.find((y) => x === y.text.trim().toLowerCase().replace(/[^\w]/g, ""));
|
|
597
593
|
if (!k) return;
|
|
598
594
|
const E = k.offsets.from / 1e3, $ = e.find((y) => E >= y.start && E <= y.end);
|
|
599
595
|
if ($) {
|
|
@@ -605,17 +601,17 @@ print(json.dumps(segments))
|
|
|
605
601
|
return g.forEach((x, k) => {
|
|
606
602
|
x > v && (v = x, T = k);
|
|
607
603
|
}), { speaker: T, text: p };
|
|
608
|
-
}).filter((p) => p !== null),
|
|
604
|
+
}).filter((p) => p !== null), u = [];
|
|
609
605
|
i.forEach((p) => {
|
|
610
|
-
const f =
|
|
611
|
-
f && f.speaker === p.speaker ? f.text += " " + p.text :
|
|
606
|
+
const f = u[u.length - 1];
|
|
607
|
+
f && f.speaker === p.speaker ? f.text += " " + p.text : u.push({ ...p });
|
|
612
608
|
});
|
|
613
|
-
let s =
|
|
609
|
+
let s = u.map((p) => `[Speaker ${p.speaker}]: ${p.text}`).join(`
|
|
614
610
|
`).trim();
|
|
615
611
|
if (!t) return s;
|
|
616
|
-
let
|
|
617
|
-
|
|
618
|
-
const d = await this.ai.language.json(
|
|
612
|
+
let c = this.ai.language.chunk(s, 500, 0);
|
|
613
|
+
c.length > 4 && (c = [...c.slice(0, 3), c.at(-1)]);
|
|
614
|
+
const d = await this.ai.language.json(c.join(`
|
|
619
615
|
`), '{1: "Detected Name", 2: "Second Name"}', {
|
|
620
616
|
system: "Use the following transcript to identify speakers. Only identify speakers you are positive about, dont mention speakers you are unsure about in your response",
|
|
621
617
|
temperature: 0.1
|
|
@@ -625,19 +621,19 @@ print(json.dumps(segments))
|
|
|
625
621
|
runAsr(r, e = {}) {
|
|
626
622
|
let t;
|
|
627
623
|
const a = new Promise((o, n) => {
|
|
628
|
-
this.downloadAsrModel(e.model).then((
|
|
624
|
+
this.downloadAsrModel(e.model).then((m) => {
|
|
629
625
|
if (e.diarization) {
|
|
630
|
-
let
|
|
626
|
+
let l = M.join(M.dirname(r), "transcript");
|
|
631
627
|
t = w(
|
|
632
628
|
this.ai.options.whisper,
|
|
633
|
-
["-m",
|
|
629
|
+
["-m", m, "-f", r, "-np", "-ml", "1", "-oj", "-of", l],
|
|
634
630
|
{ stdio: ["ignore", "ignore", "pipe"] }
|
|
635
631
|
), t.on("error", (i) => n(i)), t.on("close", async (i) => {
|
|
636
632
|
if (i === 0) {
|
|
637
|
-
|
|
633
|
+
l = await b.readFile(l + ".json", "utf-8"), b.rm(l + ".json").catch(() => {
|
|
638
634
|
});
|
|
639
635
|
try {
|
|
640
|
-
o(JSON.parse(
|
|
636
|
+
o(JSON.parse(l));
|
|
641
637
|
} catch {
|
|
642
638
|
n(new Error("Failed to parse whisper JSON"));
|
|
643
639
|
}
|
|
@@ -645,9 +641,9 @@ print(json.dumps(segments))
|
|
|
645
641
|
n(new Error(`Exit code ${i}`));
|
|
646
642
|
});
|
|
647
643
|
} else {
|
|
648
|
-
let
|
|
649
|
-
t = w(this.ai.options.whisper, ["-m",
|
|
650
|
-
i === 0 ? o(
|
|
644
|
+
let l = "";
|
|
645
|
+
t = w(this.ai.options.whisper, ["-m", m, "-f", r, "-np", "-nt"]), t.on("error", (i) => n(i)), t.stdout.on("data", (i) => l += i.toString()), t.on("close", async (i) => {
|
|
646
|
+
i === 0 ? o(l.trim() || null) : n(new Error(`Exit code ${i}`));
|
|
651
647
|
});
|
|
652
648
|
}
|
|
653
649
|
});
|
|
@@ -658,30 +654,30 @@ print(json.dumps(segments))
|
|
|
658
654
|
let e = !1, t = () => {
|
|
659
655
|
e = !0;
|
|
660
656
|
};
|
|
661
|
-
const a = (n) => new Promise((
|
|
662
|
-
const
|
|
663
|
-
|
|
657
|
+
const a = (n) => new Promise((m) => {
|
|
658
|
+
const l = w(n, ["-W", "ignore", "-c", "import pyannote.audio"]);
|
|
659
|
+
l.on("close", (i) => m(i === 0)), l.on("error", () => m(!1));
|
|
664
660
|
}), o = Promise.all([
|
|
665
661
|
a("python"),
|
|
666
662
|
a("python3")
|
|
667
|
-
]).then((async ([n,
|
|
663
|
+
]).then((async ([n, m]) => {
|
|
668
664
|
if (e) return;
|
|
669
|
-
if (!n && !
|
|
670
|
-
const
|
|
671
|
-
return new Promise((i,
|
|
665
|
+
if (!n && !m) throw new Error("Pyannote is not installed: pip install pyannote.audio");
|
|
666
|
+
const l = m ? "python3" : "python";
|
|
667
|
+
return new Promise((i, u) => {
|
|
672
668
|
if (e) return;
|
|
673
669
|
let s = "";
|
|
674
|
-
const
|
|
675
|
-
|
|
670
|
+
const c = w(l, ["-W", "ignore", "-c", this.pyannote, r]);
|
|
671
|
+
c.stdout.on("data", (d) => s += d.toString()), c.stderr.on("data", (d) => console.error(d.toString())), c.on("close", (d) => {
|
|
676
672
|
if (d === 0)
|
|
677
673
|
try {
|
|
678
674
|
i(JSON.parse(s));
|
|
679
675
|
} catch {
|
|
680
|
-
|
|
676
|
+
u(new Error("Failed to parse diarization output"));
|
|
681
677
|
}
|
|
682
678
|
else
|
|
683
|
-
|
|
684
|
-
}),
|
|
679
|
+
u(new Error(`Python process exited with code ${d}`));
|
|
680
|
+
}), c.on("error", u), t = () => c.kill("SIGTERM");
|
|
685
681
|
});
|
|
686
682
|
}));
|
|
687
683
|
return Object.assign(o, { abort: t });
|
|
@@ -694,17 +690,17 @@ print(json.dumps(segments))
|
|
|
694
690
|
});
|
|
695
691
|
if (!e.diarization) return this.runAsr(t, { model: e.model });
|
|
696
692
|
const o = this.runAsr(t, { model: e.model, diarization: !0 }), n = this.runDiarization(t);
|
|
697
|
-
let
|
|
698
|
-
|
|
693
|
+
let m = !1, l = () => {
|
|
694
|
+
m = !0, o.abort(), n.abort(), a();
|
|
699
695
|
};
|
|
700
|
-
const i = Promise.allSettled([o, n]).then(async ([
|
|
701
|
-
if (
|
|
702
|
-
` +
|
|
696
|
+
const i = Promise.allSettled([o, n]).then(async ([u, s]) => {
|
|
697
|
+
if (u.status == "rejected") throw new Error(`Whisper.cpp timestamps:
|
|
698
|
+
` + u.reason);
|
|
703
699
|
if (s.status == "rejected") throw new Error(`Pyannote:
|
|
704
700
|
` + s.reason);
|
|
705
|
-
return
|
|
701
|
+
return m || !e.diarization ? u.value : this.diarizeTranscript(u.value, s.value, e.diarization == "llm");
|
|
706
702
|
}).finally(() => a());
|
|
707
|
-
return Object.assign(i, { abort:
|
|
703
|
+
return Object.assign(i, { abort: l });
|
|
708
704
|
}
|
|
709
705
|
async downloadAsrModel(r = this.whisperModel) {
|
|
710
706
|
if (!this.ai.options.whisper) throw new Error("Whisper not configured");
|
|
@@ -833,8 +829,8 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
833
829
|
const s = await e.arrayBuffer();
|
|
834
830
|
if (s.byteLength > 10485760)
|
|
835
831
|
return { url: h.url, type: "media", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
836
|
-
const
|
|
837
|
-
return { url: h.url, type: "media", mimeType: a, dataUrl: `data:${a};base64,${
|
|
832
|
+
const c = Buffer.from(s).toString("base64");
|
|
833
|
+
return { url: h.url, type: "media", mimeType: a, dataUrl: `data:${a};base64,${c}`, size: s.byteLength };
|
|
838
834
|
}
|
|
839
835
|
if (a.match(/^(text\/(plain|csv|xml)|application\/(json|xml|csv|x-yaml))/) || h.url.match(/\.(txt|json|xml|csv|yaml|yml|md)$/i)) {
|
|
840
836
|
const s = await e.text();
|
|
@@ -844,43 +840,43 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
844
840
|
const s = await e.arrayBuffer();
|
|
845
841
|
if (s.byteLength > 10485760)
|
|
846
842
|
return { url: h.url, type: "binary", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
847
|
-
const
|
|
848
|
-
return { url: h.url, type: "binary", mimeType: a, dataUrl: `data:${a};base64,${
|
|
843
|
+
const c = Buffer.from(s).toString("base64");
|
|
844
|
+
return { url: h.url, type: "binary", mimeType: a, dataUrl: `data:${a};base64,${c}`, size: s.byteLength };
|
|
849
845
|
}
|
|
850
846
|
const o = await e.text(), n = Z.load(o);
|
|
851
847
|
n('script, style, nav, footer, header, aside, iframe, noscript, svg, [role="navigation"], [role="banner"], [role="complementary"], .ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
|
|
852
|
-
const
|
|
848
|
+
const m = {
|
|
853
849
|
title: n('meta[property="og:title"]').attr("content") || n("title").text() || "",
|
|
854
850
|
description: n('meta[name="description"]').attr("content") || n('meta[property="og:description"]').attr("content") || "",
|
|
855
851
|
author: n('meta[name="author"]').attr("content") || "",
|
|
856
852
|
published: n('meta[property="article:published_time"]').attr("content") || n("time").attr("datetime") || "",
|
|
857
853
|
image: n('meta[property="og:image"]').attr("content") || ""
|
|
858
854
|
};
|
|
859
|
-
let
|
|
855
|
+
let l = "";
|
|
860
856
|
const i = ["article", "main", '[role="main"]', ".content", ".post-content", ".entry-content", ".article-content", "body"];
|
|
861
857
|
for (const s of i) {
|
|
862
|
-
const
|
|
863
|
-
if (
|
|
864
|
-
|
|
858
|
+
const c = n(s).first();
|
|
859
|
+
if (c.length && c.text().trim().length > 200) {
|
|
860
|
+
l = c.text();
|
|
865
861
|
break;
|
|
866
862
|
}
|
|
867
863
|
}
|
|
868
|
-
|
|
864
|
+
l || (l = n("body").text()), l = l.replace(/\n\s*\n\s*\n/g, `
|
|
869
865
|
|
|
870
866
|
`).replace(/[ \t]+/g, " ").trim().slice(0, 5e4);
|
|
871
|
-
let
|
|
872
|
-
return
|
|
873
|
-
const d = n(
|
|
874
|
-
d && p && !d.startsWith("#") &&
|
|
875
|
-
}),
|
|
867
|
+
let u = [];
|
|
868
|
+
return l.length < 500 && (n("a[href]").each((s, c) => {
|
|
869
|
+
const d = n(c).attr("href"), p = n(c).text().trim();
|
|
870
|
+
d && p && !d.startsWith("#") && u.push({ text: p, href: d });
|
|
871
|
+
}), u = u.slice(0, 50)), {
|
|
876
872
|
url: h.url,
|
|
877
873
|
type: "html",
|
|
878
|
-
title:
|
|
879
|
-
description:
|
|
880
|
-
author:
|
|
881
|
-
published:
|
|
882
|
-
content:
|
|
883
|
-
links:
|
|
874
|
+
title: m.title.trim(),
|
|
875
|
+
description: m.description.trim(),
|
|
876
|
+
author: m.author.trim(),
|
|
877
|
+
published: m.published,
|
|
878
|
+
content: l,
|
|
879
|
+
links: u.length ? u : void 0
|
|
884
880
|
};
|
|
885
881
|
}
|
|
886
882
|
}, je = {
|