@ztimson/ai-utils 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +17 -17
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +211 -198
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -2
package/dist/index.mjs
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
import * as
|
|
2
|
-
import { objectMap as
|
|
3
|
-
import { Anthropic as
|
|
4
|
-
import { OpenAI as
|
|
5
|
-
import { Worker as
|
|
6
|
-
import { fileURLToPath as
|
|
7
|
-
import { join as
|
|
8
|
-
import { spawn as
|
|
9
|
-
import { pipeline as
|
|
10
|
-
import
|
|
1
|
+
import * as P from "node:os";
|
|
2
|
+
import { objectMap as x, JSONAttemptParse as b, findByProp as S, JSONSanitize as w, clean as q, Http as $, consoleInterceptor as M, fn as E, ASet as A } from "@ztimson/utils";
|
|
3
|
+
import { Anthropic as O } from "@anthropic-ai/sdk";
|
|
4
|
+
import { OpenAI as v } from "openai";
|
|
5
|
+
import { Worker as R } from "worker_threads";
|
|
6
|
+
import { fileURLToPath as U } from "url";
|
|
7
|
+
import { join as z, dirname as L } from "path";
|
|
8
|
+
import { spawn as k } from "node:child_process";
|
|
9
|
+
import { pipeline as N } from "@xenova/transformers";
|
|
10
|
+
import * as W from "node:fs";
|
|
11
|
+
import C from "wavefile";
|
|
12
|
+
import { createWorker as D } from "tesseract.js";
|
|
11
13
|
import "./embedder.mjs";
|
|
12
|
-
import * as
|
|
13
|
-
import { $ as
|
|
14
|
-
class
|
|
14
|
+
import * as I from "cheerio";
|
|
15
|
+
import { $ as F, $Sync as H } from "@ztimson/node-utils";
|
|
16
|
+
class j {
|
|
15
17
|
}
|
|
16
|
-
class
|
|
18
|
+
class J extends j {
|
|
17
19
|
constructor(r, e, t) {
|
|
18
|
-
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new
|
|
20
|
+
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new O({ apiKey: e });
|
|
19
21
|
}
|
|
20
22
|
client;
|
|
21
23
|
toStandard(r) {
|
|
@@ -24,15 +26,15 @@ class I extends S {
|
|
|
24
26
|
if (typeof i.content == "string")
|
|
25
27
|
t.push({ timestamp: e, ...i });
|
|
26
28
|
else {
|
|
27
|
-
const
|
|
29
|
+
const s = i.content?.filter((n) => n.type == "text").map((n) => n.text).join(`
|
|
28
30
|
|
|
29
31
|
`);
|
|
30
|
-
|
|
31
|
-
if (
|
|
32
|
-
t.push({ timestamp: e, role: "tool", id:
|
|
33
|
-
else if (
|
|
34
|
-
const o = t.findLast((a) => a.id ==
|
|
35
|
-
o && (o[
|
|
32
|
+
s && t.push({ timestamp: e, role: i.role, content: s }), i.content.forEach((n) => {
|
|
33
|
+
if (n.type == "tool_use")
|
|
34
|
+
t.push({ timestamp: e, role: "tool", id: n.id, name: n.name, args: n.input, content: void 0 });
|
|
35
|
+
else if (n.type == "tool_result") {
|
|
36
|
+
const o = t.findLast((a) => a.id == n.tool_use_id);
|
|
37
|
+
o && (o[n.is_error ? "error" : "content"] = n.content);
|
|
36
38
|
}
|
|
37
39
|
});
|
|
38
40
|
}
|
|
@@ -54,34 +56,34 @@ class I extends S {
|
|
|
54
56
|
ask(r, e = {}) {
|
|
55
57
|
const t = new AbortController();
|
|
56
58
|
return Object.assign(new Promise(async (i) => {
|
|
57
|
-
let
|
|
58
|
-
const
|
|
59
|
+
let s = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
60
|
+
const n = e.tools || this.ai.options.llm?.tools || [], o = {
|
|
59
61
|
model: e.model || this.model,
|
|
60
62
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
61
63
|
system: e.system || this.ai.options.llm?.system || "",
|
|
62
64
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
63
|
-
tools:
|
|
65
|
+
tools: n.map((d) => ({
|
|
64
66
|
name: d.name,
|
|
65
67
|
description: d.description,
|
|
66
68
|
input_schema: {
|
|
67
69
|
type: "object",
|
|
68
|
-
properties: d.args ?
|
|
70
|
+
properties: d.args ? x(d.args, (c, l) => ({ ...l, required: void 0 })) : {},
|
|
69
71
|
required: d.args ? Object.entries(d.args).filter((c) => c[1].required).map((c) => c[0]) : []
|
|
70
72
|
},
|
|
71
73
|
fn: void 0
|
|
72
74
|
})),
|
|
73
|
-
messages:
|
|
75
|
+
messages: s,
|
|
74
76
|
stream: !!e.stream
|
|
75
77
|
};
|
|
76
|
-
let a,
|
|
78
|
+
let a, m = !0;
|
|
77
79
|
do {
|
|
78
80
|
if (a = await this.client.messages.create(o).catch((c) => {
|
|
79
81
|
throw c.message += `
|
|
80
82
|
|
|
81
83
|
Messages:
|
|
82
|
-
${JSON.stringify(
|
|
84
|
+
${JSON.stringify(s, null, 2)}`, c;
|
|
83
85
|
}), e.stream) {
|
|
84
|
-
|
|
86
|
+
m ? m = !1 : e.stream({ text: `
|
|
85
87
|
|
|
86
88
|
` }), a.content = [];
|
|
87
89
|
for await (const c of a) {
|
|
@@ -90,41 +92,41 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
90
92
|
c.content_block.type === "text" ? a.content.push({ type: "text", text: "" }) : c.content_block.type === "tool_use" && a.content.push({ type: "tool_use", id: c.content_block.id, name: c.content_block.name, input: "" });
|
|
91
93
|
else if (c.type === "content_block_delta")
|
|
92
94
|
if (c.delta.type === "text_delta") {
|
|
93
|
-
const
|
|
94
|
-
a.content.at(-1).text +=
|
|
95
|
+
const l = c.delta.text;
|
|
96
|
+
a.content.at(-1).text += l, e.stream({ text: l });
|
|
95
97
|
} else c.delta.type === "input_json_delta" && (a.content.at(-1).input += c.delta.partial_json);
|
|
96
98
|
else if (c.type === "content_block_stop") {
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
+
const l = a.content.at(-1);
|
|
100
|
+
l.input != null && (l.input = l.input ? b(l.input, {}) : {});
|
|
99
101
|
} else if (c.type === "message_stop")
|
|
100
102
|
break;
|
|
101
103
|
}
|
|
102
104
|
}
|
|
103
105
|
const d = a.content.filter((c) => c.type === "tool_use");
|
|
104
106
|
if (d.length && !t.signal.aborted) {
|
|
105
|
-
|
|
106
|
-
const c = await Promise.all(d.map(async (
|
|
107
|
-
const h =
|
|
108
|
-
if (e.stream && e.stream({ tool:
|
|
107
|
+
s.push({ role: "assistant", content: a.content });
|
|
108
|
+
const c = await Promise.all(d.map(async (l) => {
|
|
109
|
+
const h = n.find(S("name", l.name));
|
|
110
|
+
if (e.stream && e.stream({ tool: l.name }), !h) return { tool_use_id: l.id, is_error: !0, content: "Tool not found" };
|
|
109
111
|
try {
|
|
110
|
-
const u = await h.fn(
|
|
111
|
-
return { type: "tool_result", tool_use_id:
|
|
112
|
+
const u = await h.fn(l.input, e?.stream, this.ai);
|
|
113
|
+
return { type: "tool_result", tool_use_id: l.id, content: w(u) };
|
|
112
114
|
} catch (u) {
|
|
113
|
-
return { type: "tool_result", tool_use_id:
|
|
115
|
+
return { type: "tool_result", tool_use_id: l.id, is_error: !0, content: u?.message || u?.toString() || "Unknown" };
|
|
114
116
|
}
|
|
115
117
|
}));
|
|
116
|
-
|
|
118
|
+
s.push({ role: "user", content: c }), o.messages = s;
|
|
117
119
|
}
|
|
118
120
|
} while (!t.signal.aborted && a.content.some((d) => d.type === "tool_use"));
|
|
119
|
-
|
|
121
|
+
s.push({ role: "assistant", content: a.content.filter((d) => d.type == "text").map((d) => d.text).join(`
|
|
120
122
|
|
|
121
|
-
`) }),
|
|
123
|
+
`) }), s = this.toStandard(s), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...s), i(s.at(-1)?.content);
|
|
122
124
|
}), { abort: () => t.abort() });
|
|
123
125
|
}
|
|
124
126
|
}
|
|
125
|
-
class _ extends
|
|
127
|
+
class _ extends j {
|
|
126
128
|
constructor(r, e, t, i) {
|
|
127
|
-
super(), this.ai = r, this.host = e, this.token = t, this.model = i, this.client = new
|
|
129
|
+
super(), this.ai = r, this.host = e, this.token = t, this.model = i, this.client = new v(q({
|
|
128
130
|
baseURL: e,
|
|
129
131
|
apiKey: t
|
|
130
132
|
}));
|
|
@@ -134,16 +136,16 @@ class _ extends S {
|
|
|
134
136
|
for (let e = 0; e < r.length; e++) {
|
|
135
137
|
const t = r[e];
|
|
136
138
|
if (t.role === "assistant" && t.tool_calls) {
|
|
137
|
-
const i = t.tool_calls.map((
|
|
139
|
+
const i = t.tool_calls.map((s) => ({
|
|
138
140
|
role: "tool",
|
|
139
|
-
id:
|
|
140
|
-
name:
|
|
141
|
-
args:
|
|
141
|
+
id: s.id,
|
|
142
|
+
name: s.function.name,
|
|
143
|
+
args: b(s.function.arguments, {}),
|
|
142
144
|
timestamp: t.timestamp
|
|
143
145
|
}));
|
|
144
146
|
r.splice(e, 1, ...i), e += i.length - 1;
|
|
145
147
|
} else if (t.role === "tool" && t.content) {
|
|
146
|
-
const i = r.find((
|
|
148
|
+
const i = r.find((s) => t.tool_call_id == s.id);
|
|
147
149
|
i && (t.content.includes('"error":') ? i.error = t.content : i.content = t.content), r.splice(e, 1), e--;
|
|
148
150
|
}
|
|
149
151
|
r[e]?.timestamp || (r[e].timestamp = Date.now());
|
|
@@ -165,20 +167,20 @@ class _ extends S {
|
|
|
165
167
|
content: t.error || t.content
|
|
166
168
|
});
|
|
167
169
|
else {
|
|
168
|
-
const { timestamp: i, ...
|
|
169
|
-
e.push(
|
|
170
|
+
const { timestamp: i, ...s } = t;
|
|
171
|
+
e.push(s);
|
|
170
172
|
}
|
|
171
173
|
return e;
|
|
172
174
|
}, []);
|
|
173
175
|
}
|
|
174
176
|
ask(r, e = {}) {
|
|
175
177
|
const t = new AbortController();
|
|
176
|
-
return Object.assign(new Promise(async (i,
|
|
178
|
+
return Object.assign(new Promise(async (i, s) => {
|
|
177
179
|
e.system && e.history?.[0]?.role != "system" && e.history?.splice(0, 0, { role: "system", content: e.system, timestamp: Date.now() });
|
|
178
|
-
let
|
|
180
|
+
let n = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
179
181
|
const o = e.tools || this.ai.options.llm?.tools || [], a = {
|
|
180
182
|
model: e.model || this.model,
|
|
181
|
-
messages:
|
|
183
|
+
messages: n,
|
|
182
184
|
stream: !!e.stream,
|
|
183
185
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
184
186
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
@@ -189,55 +191,55 @@ class _ extends S {
|
|
|
189
191
|
description: c.description,
|
|
190
192
|
parameters: {
|
|
191
193
|
type: "object",
|
|
192
|
-
properties: c.args ?
|
|
193
|
-
required: c.args ? Object.entries(c.args).filter((
|
|
194
|
+
properties: c.args ? x(c.args, (l, h) => ({ ...h, required: void 0 })) : {},
|
|
195
|
+
required: c.args ? Object.entries(c.args).filter((l) => l[1].required).map((l) => l[0]) : []
|
|
194
196
|
}
|
|
195
197
|
}
|
|
196
198
|
}))
|
|
197
199
|
};
|
|
198
|
-
let
|
|
200
|
+
let m, d = !0;
|
|
199
201
|
do {
|
|
200
|
-
if (
|
|
201
|
-
throw
|
|
202
|
+
if (m = await this.client.chat.completions.create(a).catch((l) => {
|
|
203
|
+
throw l.message += `
|
|
202
204
|
|
|
203
205
|
Messages:
|
|
204
|
-
${JSON.stringify(
|
|
206
|
+
${JSON.stringify(n, null, 2)}`, l;
|
|
205
207
|
}), e.stream) {
|
|
206
208
|
d ? d = !1 : e.stream({ text: `
|
|
207
209
|
|
|
208
|
-
` }),
|
|
209
|
-
for await (const
|
|
210
|
+
` }), m.choices = [{ message: { content: "", tool_calls: [] } }];
|
|
211
|
+
for await (const l of m) {
|
|
210
212
|
if (t.signal.aborted) break;
|
|
211
|
-
|
|
213
|
+
l.choices[0].delta.content && (m.choices[0].message.content += l.choices[0].delta.content, e.stream({ text: l.choices[0].delta.content })), l.choices[0].delta.tool_calls && (m.choices[0].message.tool_calls = l.choices[0].delta.tool_calls);
|
|
212
214
|
}
|
|
213
215
|
}
|
|
214
|
-
const c =
|
|
216
|
+
const c = m.choices[0].message.tool_calls || [];
|
|
215
217
|
if (c.length && !t.signal.aborted) {
|
|
216
|
-
|
|
217
|
-
const
|
|
218
|
-
const u = o?.find(
|
|
218
|
+
n.push(m.choices[0].message);
|
|
219
|
+
const l = await Promise.all(c.map(async (h) => {
|
|
220
|
+
const u = o?.find(S("name", h.function.name));
|
|
219
221
|
if (e.stream && e.stream({ tool: h.function.name }), !u) return { role: "tool", tool_call_id: h.id, content: '{"error": "Tool not found"}' };
|
|
220
222
|
try {
|
|
221
|
-
const f =
|
|
222
|
-
return { role: "tool", tool_call_id: h.id, content:
|
|
223
|
+
const f = b(h.function.arguments, {}), y = await u.fn(f, e.stream, this.ai);
|
|
224
|
+
return { role: "tool", tool_call_id: h.id, content: w(y) };
|
|
223
225
|
} catch (f) {
|
|
224
|
-
return { role: "tool", tool_call_id: h.id, content:
|
|
226
|
+
return { role: "tool", tool_call_id: h.id, content: w({ error: f?.message || f?.toString() || "Unknown" }) };
|
|
225
227
|
}
|
|
226
228
|
}));
|
|
227
|
-
|
|
229
|
+
n.push(...l), a.messages = n;
|
|
228
230
|
}
|
|
229
|
-
} while (!t.signal.aborted &&
|
|
230
|
-
|
|
231
|
+
} while (!t.signal.aborted && m.choices?.[0]?.message?.tool_calls?.length);
|
|
232
|
+
n.push({ role: "assistant", content: m.choices[0].message.content || "" }), n = this.toStandard(n), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...n), i(n.at(-1)?.content);
|
|
231
233
|
}), { abort: () => t.abort() });
|
|
232
234
|
}
|
|
233
235
|
}
|
|
234
|
-
class
|
|
236
|
+
class B {
|
|
235
237
|
constructor(r) {
|
|
236
|
-
this.ai = r, this.embedWorker = new
|
|
238
|
+
this.ai = r, this.embedWorker = new R(z(L(U(import.meta.url)), "embedder.js")), this.embedWorker.on("message", ({ id: e, embedding: t }) => {
|
|
237
239
|
const i = this.embedQueue.get(e);
|
|
238
240
|
i && (i.resolve(t), this.embedQueue.delete(e));
|
|
239
241
|
}), r.options.llm?.models && Object.entries(r.options.llm.models).forEach(([e, t]) => {
|
|
240
|
-
this.defaultModel || (this.defaultModel = e), t.proto == "anthropic" ? this.models[e] = new
|
|
242
|
+
this.defaultModel || (this.defaultModel = e), t.proto == "anthropic" ? this.models[e] = new J(this.ai, t.token, e) : t.proto == "ollama" ? this.models[e] = new _(this.ai, t.host, "not-needed", e) : t.proto == "openai" && (this.models[e] = new _(this.ai, t.host || null, t.token, e));
|
|
241
243
|
});
|
|
242
244
|
}
|
|
243
245
|
embedWorker = null;
|
|
@@ -256,20 +258,20 @@ class H {
|
|
|
256
258
|
if (!this.models[t]) throw new Error(`Model does not exist: ${t}`);
|
|
257
259
|
let i = () => {
|
|
258
260
|
};
|
|
259
|
-
return Object.assign(new Promise(async (
|
|
261
|
+
return Object.assign(new Promise(async (s) => {
|
|
260
262
|
if (e.history || (e.history = []), e.memory) {
|
|
261
263
|
e.system = (e.system || "") + `
|
|
262
264
|
You have passive persistent memory never make any mention of your memory capabilities and what you can/cannot remember
|
|
263
265
|
`;
|
|
264
|
-
const o = async (
|
|
265
|
-
const [
|
|
266
|
+
const o = async (m, d, c = 50) => {
|
|
267
|
+
const [l, h] = await Promise.all([
|
|
266
268
|
d ? this.embedding(d) : Promise.resolve(null),
|
|
267
|
-
|
|
269
|
+
m ? this.embedding(m) : Promise.resolve(null)
|
|
268
270
|
]);
|
|
269
|
-
return (e.memory || []).map((u) => ({ ...u, score:
|
|
271
|
+
return (e.memory || []).map((u) => ({ ...u, score: l ? this.cosineSimilarity(u.embeddings[0], l[0].embedding) : 1 })).filter((u) => u.score >= 0.8).map((u) => ({ ...u, score: h ? this.cosineSimilarity(u.embeddings[1], h[0].embedding) : u.score })).filter((u) => u.score >= 0.2).toSorted((u, f) => u.score - f.score).slice(0, c);
|
|
270
272
|
}, a = await o(r);
|
|
271
273
|
a.length && e.history.push({ role: "assistant", content: `Things I remembered:
|
|
272
|
-
` + a.map((
|
|
274
|
+
` + a.map((m) => `${m.owner}: ${m.fact}`).join(`
|
|
273
275
|
`) }), e.tools = [...e.tools || [], {
|
|
274
276
|
name: "read_memory",
|
|
275
277
|
description: "Check your long-term memory for more information",
|
|
@@ -278,13 +280,13 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
278
280
|
query: { type: "string", description: "Search memory based on a query, can be used with or without subject argument" },
|
|
279
281
|
limit: { type: "number", description: "Result limit, default 5" }
|
|
280
282
|
},
|
|
281
|
-
fn: (
|
|
282
|
-
if (!
|
|
283
|
-
return o(
|
|
283
|
+
fn: (m) => {
|
|
284
|
+
if (!m.subject && !m.query) throw new Error("Either a subject or query argument is required");
|
|
285
|
+
return o(m.query, m.subject, m.limit || 5);
|
|
284
286
|
}
|
|
285
287
|
}];
|
|
286
288
|
}
|
|
287
|
-
const
|
|
289
|
+
const n = await this.models[t].ask(r, e);
|
|
288
290
|
if (e.memory) {
|
|
289
291
|
const o = e.history?.findIndex((a) => a.role == "assistant" && a.content.startsWith("Things I remembered:"));
|
|
290
292
|
o != null && o >= 0 && e.history?.splice(o, 1);
|
|
@@ -294,15 +296,15 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
294
296
|
if (e.compress)
|
|
295
297
|
o = await this.ai.language.compressHistory(e.history, e.compress.max, e.compress.min, e), e.history.splice(0, e.history.length, ...o.history);
|
|
296
298
|
else {
|
|
297
|
-
const a = e.history?.findLastIndex((
|
|
299
|
+
const a = e.history?.findLastIndex((m) => m.role == "user") ?? -1;
|
|
298
300
|
o = await this.ai.language.compressHistory(a != -1 ? e.history.slice(a) : e.history, 0, 0, e);
|
|
299
301
|
}
|
|
300
302
|
if (e.memory) {
|
|
301
|
-
const a = e.memory.filter((
|
|
303
|
+
const a = e.memory.filter((m) => !o.memory.some((d) => this.cosineSimilarity(m.embeddings[1], d.embeddings[1]) > 0.8)).concat(o.memory);
|
|
302
304
|
e.memory.splice(0, e.memory.length, ...a);
|
|
303
305
|
}
|
|
304
306
|
}
|
|
305
|
-
return n
|
|
307
|
+
return s(n);
|
|
306
308
|
}), { abort: i });
|
|
307
309
|
}
|
|
308
310
|
/**
|
|
@@ -315,20 +317,20 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
315
317
|
*/
|
|
316
318
|
async compressHistory(r, e, t, i) {
|
|
317
319
|
if (this.estimateTokens(r) < e) return { history: r, memory: [] };
|
|
318
|
-
let
|
|
320
|
+
let s = 0, n = 0;
|
|
319
321
|
for (let u of r.toReversed())
|
|
320
|
-
if (
|
|
322
|
+
if (n += this.estimateTokens(u.content), n < t) s++;
|
|
321
323
|
else break;
|
|
322
|
-
if (r.length <=
|
|
323
|
-
const o = r[0].role == "system" ? r[0] : null, a =
|
|
324
|
+
if (r.length <= s) return { history: r, memory: [] };
|
|
325
|
+
const o = r[0].role == "system" ? r[0] : null, a = s == 0 ? [] : r.slice(-s), m = (s == 0 ? r : r.slice(0, -s)).filter((u) => u.role === "assistant" || u.role === "user"), d = await this.json(`Create the smallest summary possible, no more than 500 tokens. Create a list of NEW facts (split by subject [pro]noun and fact) about what you learned from this conversation that you didn't already know or get from a tool call or system prompt. Focus only on new information about people, topics, or facts. Avoid generating facts about the AI. Match this format: {summary: string, facts: [[subject, fact]]}
|
|
324
326
|
|
|
325
|
-
${
|
|
327
|
+
${m.map((u) => `${u.role}: ${u.content}`).join(`
|
|
326
328
|
|
|
327
|
-
`)}`, { model: i?.model, temperature: i?.temperature || 0.3 }), c = /* @__PURE__ */ new Date(),
|
|
329
|
+
`)}`, { model: i?.model, temperature: i?.temperature || 0.3 }), c = /* @__PURE__ */ new Date(), l = await Promise.all((d?.facts || [])?.map(async ([u, f]) => {
|
|
328
330
|
const y = await Promise.all([this.embedding(u), this.embedding(`${u}: ${f}`)]);
|
|
329
331
|
return { owner: u, fact: f, embeddings: [y[0][0].embedding, y[1][0].embedding], timestamp: c };
|
|
330
332
|
})), h = [{ role: "assistant", content: `Conversation Summary: ${d?.summary}`, timestamp: Date.now() }, ...a];
|
|
331
|
-
return o && h.splice(0, 0, o), { history: h, memory:
|
|
333
|
+
return o && h.splice(0, 0, o), { history: h, memory: l };
|
|
332
334
|
}
|
|
333
335
|
/**
|
|
334
336
|
* Compare the difference between embeddings (calculates the angle between two vectors)
|
|
@@ -338,11 +340,11 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
338
340
|
*/
|
|
339
341
|
cosineSimilarity(r, e) {
|
|
340
342
|
if (r.length !== e.length) throw new Error("Vectors must be same length");
|
|
341
|
-
let t = 0, i = 0,
|
|
343
|
+
let t = 0, i = 0, s = 0;
|
|
342
344
|
for (let o = 0; o < r.length; o++)
|
|
343
|
-
t += r[o] * e[o], i += r[o] * r[o],
|
|
344
|
-
const
|
|
345
|
-
return
|
|
345
|
+
t += r[o] * e[o], i += r[o] * r[o], s += e[o] * e[o];
|
|
346
|
+
const n = Math.sqrt(i) * Math.sqrt(s);
|
|
347
|
+
return n === 0 ? 0 : t / n;
|
|
346
348
|
}
|
|
347
349
|
/**
|
|
348
350
|
* Chunk text into parts for AI digestion
|
|
@@ -352,21 +354,21 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
352
354
|
* @returns {string[]} Chunked strings
|
|
353
355
|
*/
|
|
354
356
|
chunk(r, e = 500, t = 50) {
|
|
355
|
-
const i = (a,
|
|
356
|
-
const
|
|
357
|
-
return typeof c == "object" && !Array.isArray(c) ? i(c,
|
|
358
|
-
}) : [],
|
|
357
|
+
const i = (a, m = "") => a ? Object.entries(a).flatMap(([d, c]) => {
|
|
358
|
+
const l = m ? `${m}${isNaN(+d) ? `.${d}` : `[${d}]`}` : d;
|
|
359
|
+
return typeof c == "object" && !Array.isArray(c) ? i(c, l) : `${l}: ${Array.isArray(c) ? c.join(", ") : c}`;
|
|
360
|
+
}) : [], n = (typeof r == "object" ? i(r) : r.split(`
|
|
359
361
|
`)).flatMap((a) => [...a.split(/\s+/).filter(Boolean), `
|
|
360
362
|
`]), o = [];
|
|
361
|
-
for (let a = 0; a <
|
|
362
|
-
let
|
|
363
|
-
for (; d <
|
|
364
|
-
const
|
|
365
|
-
if (this.estimateTokens(
|
|
366
|
-
`)) > e &&
|
|
367
|
-
|
|
363
|
+
for (let a = 0; a < n.length; ) {
|
|
364
|
+
let m = "", d = a;
|
|
365
|
+
for (; d < n.length; ) {
|
|
366
|
+
const l = m + (m ? " " : "") + n[d];
|
|
367
|
+
if (this.estimateTokens(l.replace(/\s*\n\s*/g, `
|
|
368
|
+
`)) > e && m) break;
|
|
369
|
+
m = l, d++;
|
|
368
370
|
}
|
|
369
|
-
const c =
|
|
371
|
+
const c = m.replace(/\s*\n\s*/g, `
|
|
370
372
|
`).trim();
|
|
371
373
|
c && o.push(c), a = Math.max(d - t, d === a ? a + 1 : d);
|
|
372
374
|
}
|
|
@@ -380,20 +382,20 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
380
382
|
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
|
381
383
|
*/
|
|
382
384
|
embedding(r, e = 500, t = 50) {
|
|
383
|
-
const i = (
|
|
384
|
-
const
|
|
385
|
-
this.embedQueue.set(
|
|
386
|
-
id:
|
|
387
|
-
text:
|
|
385
|
+
const i = (n) => new Promise((o, a) => {
|
|
386
|
+
const m = this.embedId++;
|
|
387
|
+
this.embedQueue.set(m, { resolve: o, reject: a }), this.embedWorker?.postMessage({
|
|
388
|
+
id: m,
|
|
389
|
+
text: n,
|
|
388
390
|
model: this.ai.options?.embedder || "bge-small-en-v1.5",
|
|
389
391
|
path: this.ai.options.path
|
|
390
392
|
});
|
|
391
|
-
}),
|
|
392
|
-
return Promise.all(
|
|
393
|
+
}), s = this.chunk(r, e, t);
|
|
394
|
+
return Promise.all(s.map(async (n, o) => ({
|
|
393
395
|
index: o,
|
|
394
|
-
embedding: await i(
|
|
395
|
-
text:
|
|
396
|
-
tokens: this.estimateTokens(
|
|
396
|
+
embedding: await i(n),
|
|
397
|
+
text: n,
|
|
398
|
+
tokens: this.estimateTokens(n)
|
|
397
399
|
})));
|
|
398
400
|
}
|
|
399
401
|
/**
|
|
@@ -413,8 +415,8 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
413
415
|
*/
|
|
414
416
|
fuzzyMatch(r, ...e) {
|
|
415
417
|
if (e.length < 2) throw new Error("Requires at least 2 strings to compare");
|
|
416
|
-
const t = (
|
|
417
|
-
return { avg:
|
|
418
|
+
const t = (n, o = 10) => n.toLowerCase().split("").map((a, m) => a.charCodeAt(0) * (m + 1) % o / o).slice(0, o), i = t(r), s = e.map((n) => t(n)).map((n) => this.cosineSimilarity(i, n));
|
|
419
|
+
return { avg: s.reduce((n, o) => n + o, 0) / s.length, max: Math.max(...s), similarities: s };
|
|
418
420
|
}
|
|
419
421
|
/**
|
|
420
422
|
* Ask a question with JSON response
|
|
@@ -425,8 +427,8 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
425
427
|
async json(r, e) {
|
|
426
428
|
let t = await this.ask(r, { system: "Respond using a JSON blob matching any provided examples", ...e });
|
|
427
429
|
if (!t) return {};
|
|
428
|
-
const i = /```(?:.+)?\s*([\s\S]*?)```/.exec(t),
|
|
429
|
-
return
|
|
430
|
+
const i = /```(?:.+)?\s*([\s\S]*?)```/.exec(t), s = i ? i[1].trim() : t;
|
|
431
|
+
return b(s, {});
|
|
430
432
|
}
|
|
431
433
|
/**
|
|
432
434
|
* Create a summary of some text
|
|
@@ -439,7 +441,7 @@ ${l.map((u) => `${u.role}: ${u.content}`).join(`
|
|
|
439
441
|
return this.ask(r, { system: `Generate a brief summary <= ${e} tokens. Output nothing else`, temperature: 0.3, ...t });
|
|
440
442
|
}
|
|
441
443
|
}
|
|
442
|
-
class
|
|
444
|
+
class G {
|
|
443
445
|
constructor(r) {
|
|
444
446
|
this.ai = r;
|
|
445
447
|
}
|
|
@@ -450,17 +452,17 @@ class J {
|
|
|
450
452
|
e.forEach((a) => {
|
|
451
453
|
t.has(a.speaker) || t.set(a.speaker, ++i);
|
|
452
454
|
});
|
|
453
|
-
const
|
|
454
|
-
let
|
|
455
|
+
const s = [];
|
|
456
|
+
let n = -1, o = "";
|
|
455
457
|
return r.forEach((a) => {
|
|
456
|
-
const
|
|
457
|
-
c !==
|
|
458
|
-
}), o &&
|
|
458
|
+
const m = a.timestamp[0], d = e.find((l) => m >= l.start && m <= l.end), c = d ? t.get(d.speaker) : 1;
|
|
459
|
+
c !== n ? (o && s.push(`[speaker ${n}]: ${o.trim()}`), n = c, o = a.text) : o += a.text;
|
|
460
|
+
}), o && s.push(`[speaker ${n}]: ${o.trim()}`), s.join(`
|
|
459
461
|
`);
|
|
460
462
|
}
|
|
461
463
|
async canDiarization() {
|
|
462
464
|
return new Promise((r) => {
|
|
463
|
-
const e =
|
|
465
|
+
const e = k("python3", ["-c", "import pyannote.audio"]);
|
|
464
466
|
e.on("close", (t) => r(t === 0)), e.on("error", () => r(!1));
|
|
465
467
|
});
|
|
466
468
|
}
|
|
@@ -486,43 +488,54 @@ for turn, _, speaker in diarization.itertracks(yield_label=True):
|
|
|
486
488
|
print(json.dumps(segments))
|
|
487
489
|
`;
|
|
488
490
|
return new Promise((t, i) => {
|
|
489
|
-
let
|
|
490
|
-
const
|
|
491
|
-
|
|
491
|
+
let s = "";
|
|
492
|
+
const n = k("python3", ["-c", e, r]);
|
|
493
|
+
n.stdout.on("data", (o) => s += o.toString()), n.stderr.on("data", (o) => console.error(o.toString())), n.on("close", (o) => {
|
|
492
494
|
if (o === 0)
|
|
493
495
|
try {
|
|
494
|
-
t(JSON.parse(
|
|
496
|
+
t(JSON.parse(s));
|
|
495
497
|
} catch {
|
|
496
498
|
i(new Error("Failed to parse diarization output"));
|
|
497
499
|
}
|
|
498
500
|
else
|
|
499
501
|
i(new Error(`Python process exited with code ${o}`));
|
|
500
|
-
}),
|
|
502
|
+
}), n.on("error", i);
|
|
501
503
|
});
|
|
502
504
|
}
|
|
503
505
|
asr(r, e = {}) {
|
|
504
506
|
const { model: t = this.ai.options.asr || "whisper-base", speaker: i = !1 } = e;
|
|
505
|
-
let
|
|
506
|
-
const
|
|
507
|
-
|
|
508
|
-
}, o = new Promise(async (a,
|
|
507
|
+
let s = !1;
|
|
508
|
+
const n = () => {
|
|
509
|
+
s = !0;
|
|
510
|
+
}, o = new Promise(async (a, m) => {
|
|
509
511
|
try {
|
|
510
|
-
if (
|
|
511
|
-
const d =
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
if (
|
|
516
|
-
|
|
517
|
-
|
|
512
|
+
if (s || (this.whisperPipeline || (this.whisperPipeline = await N("automatic-speech-recognition", `Xenova/${t}`, { cache_dir: this.ai.options.path, quantized: !0 })), s)) return a(null);
|
|
513
|
+
const d = new C.WaveFile(W.readFileSync(r));
|
|
514
|
+
d.toBitDepth("32f"), d.toSampleRate(16e3);
|
|
515
|
+
const c = d.getSamples();
|
|
516
|
+
let l;
|
|
517
|
+
if (Array.isArray(c)) {
|
|
518
|
+
const y = c[0], T = c[1];
|
|
519
|
+
l = new Float32Array(y.length);
|
|
520
|
+
for (let g = 0; g < y.length; g++) l[g] = (y[g] + T[g]) / 2;
|
|
521
|
+
} else
|
|
522
|
+
l = c;
|
|
523
|
+
if (s) return a(null);
|
|
524
|
+
const h = await this.whisperPipeline(l, { return_timestamps: i ? "word" : !1 });
|
|
525
|
+
if (!i) return a(h.text?.trim() || null);
|
|
526
|
+
if (s) return a(null);
|
|
527
|
+
const u = await this.runDiarization(r);
|
|
528
|
+
if (s) return a(null);
|
|
529
|
+
const f = this.combineSpeakerTranscript(h.chunks || [], u);
|
|
530
|
+
a(f);
|
|
518
531
|
} catch (d) {
|
|
519
|
-
|
|
532
|
+
m(d);
|
|
520
533
|
}
|
|
521
534
|
});
|
|
522
|
-
return Object.assign(o, { abort:
|
|
535
|
+
return Object.assign(o, { abort: n });
|
|
523
536
|
}
|
|
524
537
|
}
|
|
525
|
-
class
|
|
538
|
+
class Q {
|
|
526
539
|
constructor(r) {
|
|
527
540
|
this.ai = r;
|
|
528
541
|
}
|
|
@@ -534,16 +547,16 @@ class F {
|
|
|
534
547
|
ocr(r) {
|
|
535
548
|
let e;
|
|
536
549
|
const t = new Promise(async (i) => {
|
|
537
|
-
e = await
|
|
538
|
-
const { data:
|
|
539
|
-
await e.terminate(), i(
|
|
550
|
+
e = await D(this.ai.options.ocr || "eng", 2, { cachePath: this.ai.options.path });
|
|
551
|
+
const { data: s } = await e.recognize(r);
|
|
552
|
+
await e.terminate(), i(s.text.trim() || null);
|
|
540
553
|
});
|
|
541
554
|
return Object.assign(t, { abort: () => e?.terminate() });
|
|
542
555
|
}
|
|
543
556
|
}
|
|
544
|
-
class
|
|
557
|
+
class me {
|
|
545
558
|
constructor(r) {
|
|
546
|
-
this.options = r, r.path || (r.path =
|
|
559
|
+
this.options = r, r.path || (r.path = P.tmpdir()), process.env.TRANSFORMERS_CACHE = r.path, this.audio = new G(this), this.language = new B(this), this.vision = new Q(this);
|
|
547
560
|
}
|
|
548
561
|
/** Audio processing AI */
|
|
549
562
|
audio;
|
|
@@ -552,17 +565,17 @@ class ie {
|
|
|
552
565
|
/** Vision processing AI */
|
|
553
566
|
vision;
|
|
554
567
|
}
|
|
555
|
-
const
|
|
568
|
+
const K = {
|
|
556
569
|
name: "cli",
|
|
557
570
|
description: "Use the command line interface, returns any output",
|
|
558
571
|
args: { command: { type: "string", description: "Command to run", required: !0 } },
|
|
559
|
-
fn: (p) =>
|
|
560
|
-
},
|
|
572
|
+
fn: (p) => F`${p.command}`
|
|
573
|
+
}, de = {
|
|
561
574
|
name: "get_datetime",
|
|
562
575
|
description: "Get current UTC date / time",
|
|
563
576
|
args: {},
|
|
564
577
|
fn: async () => (/* @__PURE__ */ new Date()).toUTCString()
|
|
565
|
-
},
|
|
578
|
+
}, ue = {
|
|
566
579
|
name: "exec",
|
|
567
580
|
description: "Run code/scripts",
|
|
568
581
|
args: {
|
|
@@ -573,17 +586,17 @@ const G = {
|
|
|
573
586
|
try {
|
|
574
587
|
switch (p.type) {
|
|
575
588
|
case "bash":
|
|
576
|
-
return await
|
|
589
|
+
return await K.fn({ command: p.code }, r, e);
|
|
577
590
|
case "node":
|
|
578
|
-
return await
|
|
591
|
+
return await V.fn({ code: p.code }, r, e);
|
|
579
592
|
case "python":
|
|
580
|
-
return await
|
|
593
|
+
return await X.fn({ code: p.code }, r, e);
|
|
581
594
|
}
|
|
582
595
|
} catch (t) {
|
|
583
596
|
return { error: t?.message || t.toString() };
|
|
584
597
|
}
|
|
585
598
|
}
|
|
586
|
-
},
|
|
599
|
+
}, pe = {
|
|
587
600
|
name: "fetch",
|
|
588
601
|
description: "Make HTTP request to URL",
|
|
589
602
|
args: {
|
|
@@ -592,25 +605,25 @@ const G = {
|
|
|
592
605
|
headers: { type: "object", description: "HTTP headers to send", default: {} },
|
|
593
606
|
body: { type: "object", description: "HTTP body to send" }
|
|
594
607
|
},
|
|
595
|
-
fn: (p) => new
|
|
596
|
-
},
|
|
608
|
+
fn: (p) => new $({ url: p.url, headers: p.headers }).request({ method: p.method || "GET", body: p.body })
|
|
609
|
+
}, V = {
|
|
597
610
|
name: "exec_javascript",
|
|
598
611
|
description: "Execute commonjs javascript",
|
|
599
612
|
args: {
|
|
600
613
|
code: { type: "string", description: "CommonJS javascript", required: !0 }
|
|
601
614
|
},
|
|
602
615
|
fn: async (p) => {
|
|
603
|
-
const r =
|
|
616
|
+
const r = M(null), e = await E({ console: r }, p.code, !0).catch((t) => r.output.error.push(t));
|
|
604
617
|
return { ...r.output, return: e, stdout: void 0, stderr: void 0 };
|
|
605
618
|
}
|
|
606
|
-
},
|
|
619
|
+
}, X = {
|
|
607
620
|
name: "exec_javascript",
|
|
608
621
|
description: "Execute commonjs javascript",
|
|
609
622
|
args: {
|
|
610
623
|
code: { type: "string", description: "CommonJS javascript", required: !0 }
|
|
611
624
|
},
|
|
612
|
-
fn: async (p) => ({ result:
|
|
613
|
-
},
|
|
625
|
+
fn: async (p) => ({ result: H`python -c "${p.code}"` })
|
|
626
|
+
}, he = {
|
|
614
627
|
name: "read_webpage",
|
|
615
628
|
description: "Extract clean, structured content from a webpage. Use after web_search to read specific URLs",
|
|
616
629
|
args: {
|
|
@@ -618,18 +631,18 @@ const G = {
|
|
|
618
631
|
focus: { type: "string", description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")' }
|
|
619
632
|
},
|
|
620
633
|
fn: async (p) => {
|
|
621
|
-
const r = await fetch(p.url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" } }).then((
|
|
622
|
-
throw new Error(`Failed to fetch: ${
|
|
623
|
-
}), e =
|
|
634
|
+
const r = await fetch(p.url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" } }).then((n) => n.text()).catch((n) => {
|
|
635
|
+
throw new Error(`Failed to fetch: ${n.message}`);
|
|
636
|
+
}), e = I.load(r);
|
|
624
637
|
e('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();
|
|
625
638
|
const t = {
|
|
626
639
|
title: e('meta[property="og:title"]').attr("content") || e("title").text() || "",
|
|
627
640
|
description: e('meta[name="description"]').attr("content") || e('meta[property="og:description"]').attr("content") || ""
|
|
628
641
|
};
|
|
629
642
|
let i = "";
|
|
630
|
-
const
|
|
631
|
-
for (const
|
|
632
|
-
const o = e(
|
|
643
|
+
const s = ["article", "main", '[role="main"]', ".content", ".post", ".entry", "body"];
|
|
644
|
+
for (const n of s) {
|
|
645
|
+
const o = e(n).first();
|
|
633
646
|
if (o.length && o.text().trim().length > 200) {
|
|
634
647
|
i = o.text();
|
|
635
648
|
break;
|
|
@@ -637,7 +650,7 @@ const G = {
|
|
|
637
650
|
}
|
|
638
651
|
return i || (i = e("body").text()), i = i.replace(/\s+/g, " ").trim().slice(0, 8e3), { url: p.url, title: t.title.trim(), description: t.description.trim(), content: i, focus: p.focus };
|
|
639
652
|
}
|
|
640
|
-
},
|
|
653
|
+
}, fe = {
|
|
641
654
|
name: "web_search",
|
|
642
655
|
description: "Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool",
|
|
643
656
|
args: {
|
|
@@ -647,30 +660,30 @@ const G = {
|
|
|
647
660
|
fn: async (p) => {
|
|
648
661
|
const r = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(p.query)}`, {
|
|
649
662
|
headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9" }
|
|
650
|
-
}).then((
|
|
663
|
+
}).then((s) => s.text());
|
|
651
664
|
let e, t = /<a .*?href="(.+?)".+?<\/a>/g;
|
|
652
|
-
const i = new
|
|
665
|
+
const i = new A();
|
|
653
666
|
for (; (e = t.exec(r)) !== null; ) {
|
|
654
|
-
let
|
|
655
|
-
if (
|
|
667
|
+
let s = /uddg=(.+)&?/.exec(decodeURIComponent(e[1]))?.[1];
|
|
668
|
+
if (s && (s = decodeURIComponent(s)), s && i.add(s), i.size >= (p.length || 5)) break;
|
|
656
669
|
}
|
|
657
670
|
return i;
|
|
658
671
|
}
|
|
659
672
|
};
|
|
660
673
|
export {
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
674
|
+
me as Ai,
|
|
675
|
+
J as Anthropic,
|
|
676
|
+
G as Audio,
|
|
677
|
+
K as CliTool,
|
|
678
|
+
de as DateTimeTool,
|
|
679
|
+
ue as ExecTool,
|
|
680
|
+
pe as FetchTool,
|
|
681
|
+
V as JSTool,
|
|
682
|
+
j as LLMProvider,
|
|
670
683
|
_ as OpenAi,
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
684
|
+
X as PythonTool,
|
|
685
|
+
he as ReadWebpageTool,
|
|
686
|
+
Q as Vision,
|
|
687
|
+
fe as WebSearchTool
|
|
675
688
|
};
|
|
676
689
|
//# sourceMappingURL=index.mjs.map
|