@ztimson/ai-utils 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +17 -16
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +219 -214
- package/dist/index.mjs.map +1 -1
- package/dist/llm.d.ts +6 -8
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
import * as
|
|
2
|
-
import { objectMap as _, JSONAttemptParse as
|
|
3
|
-
import { Anthropic as
|
|
4
|
-
import { OpenAI as
|
|
5
|
-
import { Worker as
|
|
6
|
-
import { fileURLToPath as
|
|
7
|
-
import { join as
|
|
8
|
-
import { canDiarization as
|
|
9
|
-
import { createWorker as
|
|
1
|
+
import * as M from "node:os";
|
|
2
|
+
import { objectMap as _, JSONAttemptParse as b, findByProp as x, JSONSanitize as w, clean as E, Http as P, consoleInterceptor as A, fn as O, ASet as v } from "@ztimson/utils";
|
|
3
|
+
import { Anthropic as U } from "@anthropic-ai/sdk";
|
|
4
|
+
import { OpenAI as R } from "openai";
|
|
5
|
+
import { Worker as j } from "worker_threads";
|
|
6
|
+
import { fileURLToPath as S } from "url";
|
|
7
|
+
import { join as T, dirname as q } from "path";
|
|
8
|
+
import { canDiarization as L } from "./asr.mjs";
|
|
9
|
+
import { createWorker as D } from "tesseract.js";
|
|
10
10
|
import "./embedder.mjs";
|
|
11
|
-
import * as
|
|
12
|
-
import { $ as
|
|
13
|
-
class
|
|
11
|
+
import * as N from "cheerio";
|
|
12
|
+
import { $ as C, $Sync as W } from "@ztimson/node-utils";
|
|
13
|
+
class $ {
|
|
14
14
|
}
|
|
15
|
-
class
|
|
15
|
+
class z extends $ {
|
|
16
16
|
constructor(s, e, t) {
|
|
17
|
-
super(), this.ai = s, this.apiToken = e, this.model = t, this.client = new
|
|
17
|
+
super(), this.ai = s, this.apiToken = e, this.model = t, this.client = new U({ apiKey: e });
|
|
18
18
|
}
|
|
19
19
|
client;
|
|
20
20
|
toStandard(s) {
|
|
@@ -23,15 +23,15 @@ class W extends q {
|
|
|
23
23
|
if (typeof l.content == "string")
|
|
24
24
|
t.push({ timestamp: e, ...l });
|
|
25
25
|
else {
|
|
26
|
-
const r = l.content?.filter((
|
|
26
|
+
const r = l.content?.filter((o) => o.type == "text").map((o) => o.text).join(`
|
|
27
27
|
|
|
28
28
|
`);
|
|
29
|
-
r && t.push({ timestamp: e, role: l.role, content: r }), l.content.forEach((
|
|
30
|
-
if (
|
|
31
|
-
t.push({ timestamp: e, role: "tool", id:
|
|
32
|
-
else if (
|
|
33
|
-
const
|
|
34
|
-
|
|
29
|
+
r && t.push({ timestamp: e, role: l.role, content: r }), l.content.forEach((o) => {
|
|
30
|
+
if (o.type == "tool_use")
|
|
31
|
+
t.push({ timestamp: e, role: "tool", id: o.id, name: o.name, args: o.input, content: void 0 });
|
|
32
|
+
else if (o.type == "tool_result") {
|
|
33
|
+
const m = t.findLast((i) => i.id == o.tool_use_id);
|
|
34
|
+
m && (m[o.is_error ? "error" : "content"] = o.content);
|
|
35
35
|
}
|
|
36
36
|
});
|
|
37
37
|
}
|
|
@@ -54,76 +54,76 @@ class W extends q {
|
|
|
54
54
|
const t = new AbortController();
|
|
55
55
|
return Object.assign(new Promise(async (l) => {
|
|
56
56
|
let r = this.fromStandard([...e.history || [], { role: "user", content: s, timestamp: Date.now() }]);
|
|
57
|
-
const
|
|
57
|
+
const o = e.tools || this.ai.options.llm?.tools || [], m = {
|
|
58
58
|
model: e.model || this.model,
|
|
59
59
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
60
60
|
system: e.system || this.ai.options.llm?.system || "",
|
|
61
61
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
62
|
-
tools:
|
|
62
|
+
tools: o.map((d) => ({
|
|
63
63
|
name: d.name,
|
|
64
64
|
description: d.description,
|
|
65
65
|
input_schema: {
|
|
66
66
|
type: "object",
|
|
67
|
-
properties: d.args ? _(d.args, (
|
|
68
|
-
required: d.args ? Object.entries(d.args).filter((
|
|
67
|
+
properties: d.args ? _(d.args, (n, a) => ({ ...a, required: void 0 })) : {},
|
|
68
|
+
required: d.args ? Object.entries(d.args).filter((n) => n[1].required).map((n) => n[0]) : []
|
|
69
69
|
},
|
|
70
70
|
fn: void 0
|
|
71
71
|
})),
|
|
72
72
|
messages: r,
|
|
73
73
|
stream: !!e.stream
|
|
74
74
|
};
|
|
75
|
-
let
|
|
75
|
+
let i, c = !0;
|
|
76
76
|
do {
|
|
77
|
-
if (
|
|
78
|
-
throw
|
|
77
|
+
if (i = await this.client.messages.create(m).catch((n) => {
|
|
78
|
+
throw n.message += `
|
|
79
79
|
|
|
80
80
|
Messages:
|
|
81
|
-
${JSON.stringify(r, null, 2)}`,
|
|
81
|
+
${JSON.stringify(r, null, 2)}`, n;
|
|
82
82
|
}), e.stream) {
|
|
83
|
-
|
|
83
|
+
c ? c = !1 : e.stream({ text: `
|
|
84
84
|
|
|
85
|
-
` }),
|
|
86
|
-
for await (const
|
|
85
|
+
` }), i.content = [];
|
|
86
|
+
for await (const n of i) {
|
|
87
87
|
if (t.signal.aborted) break;
|
|
88
|
-
if (
|
|
89
|
-
|
|
90
|
-
else if (
|
|
91
|
-
if (
|
|
92
|
-
const
|
|
93
|
-
|
|
94
|
-
} else
|
|
95
|
-
else if (
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
} else if (
|
|
88
|
+
if (n.type === "content_block_start")
|
|
89
|
+
n.content_block.type === "text" ? i.content.push({ type: "text", text: "" }) : n.content_block.type === "tool_use" && i.content.push({ type: "tool_use", id: n.content_block.id, name: n.content_block.name, input: "" });
|
|
90
|
+
else if (n.type === "content_block_delta")
|
|
91
|
+
if (n.delta.type === "text_delta") {
|
|
92
|
+
const a = n.delta.text;
|
|
93
|
+
i.content.at(-1).text += a, e.stream({ text: a });
|
|
94
|
+
} else n.delta.type === "input_json_delta" && (i.content.at(-1).input += n.delta.partial_json);
|
|
95
|
+
else if (n.type === "content_block_stop") {
|
|
96
|
+
const a = i.content.at(-1);
|
|
97
|
+
a.input != null && (a.input = a.input ? b(a.input, {}) : {});
|
|
98
|
+
} else if (n.type === "message_stop")
|
|
99
99
|
break;
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
-
const d =
|
|
102
|
+
const d = i.content.filter((n) => n.type === "tool_use");
|
|
103
103
|
if (d.length && !t.signal.aborted) {
|
|
104
|
-
r.push({ role: "assistant", content:
|
|
105
|
-
const
|
|
106
|
-
const p =
|
|
107
|
-
if (e.stream && e.stream({ tool:
|
|
104
|
+
r.push({ role: "assistant", content: i.content });
|
|
105
|
+
const n = await Promise.all(d.map(async (a) => {
|
|
106
|
+
const p = o.find(x("name", a.name));
|
|
107
|
+
if (e.stream && e.stream({ tool: a.name }), !p) return { tool_use_id: a.id, is_error: !0, content: "Tool not found" };
|
|
108
108
|
try {
|
|
109
|
-
const u = await p.fn(
|
|
110
|
-
return { type: "tool_result", tool_use_id:
|
|
109
|
+
const u = await p.fn(a.input, e?.stream, this.ai);
|
|
110
|
+
return { type: "tool_result", tool_use_id: a.id, content: w(u) };
|
|
111
111
|
} catch (u) {
|
|
112
|
-
return { type: "tool_result", tool_use_id:
|
|
112
|
+
return { type: "tool_result", tool_use_id: a.id, is_error: !0, content: u?.message || u?.toString() || "Unknown" };
|
|
113
113
|
}
|
|
114
114
|
}));
|
|
115
|
-
r.push({ role: "user", content:
|
|
115
|
+
r.push({ role: "user", content: n }), m.messages = r;
|
|
116
116
|
}
|
|
117
|
-
} while (!t.signal.aborted &&
|
|
118
|
-
r.push({ role: "assistant", content:
|
|
117
|
+
} while (!t.signal.aborted && i.content.some((d) => d.type === "tool_use"));
|
|
118
|
+
r.push({ role: "assistant", content: i.content.filter((d) => d.type == "text").map((d) => d.text).join(`
|
|
119
119
|
|
|
120
120
|
`) }), r = this.toStandard(r), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...r), l(r.at(-1)?.content);
|
|
121
121
|
}), { abort: () => t.abort() });
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
|
-
class
|
|
124
|
+
class k extends $ {
|
|
125
125
|
constructor(s, e, t, l) {
|
|
126
|
-
super(), this.ai = s, this.host = e, this.token = t, this.model = l, this.client = new
|
|
126
|
+
super(), this.ai = s, this.host = e, this.token = t, this.model = l, this.client = new R(E({
|
|
127
127
|
baseURL: e,
|
|
128
128
|
apiKey: t
|
|
129
129
|
}));
|
|
@@ -137,7 +137,7 @@ class w extends q {
|
|
|
137
137
|
role: "tool",
|
|
138
138
|
id: r.id,
|
|
139
139
|
name: r.function.name,
|
|
140
|
-
args:
|
|
140
|
+
args: b(r.function.arguments, {}),
|
|
141
141
|
timestamp: t.timestamp
|
|
142
142
|
}));
|
|
143
143
|
s.splice(e, 1, ...l), e += l.length - 1;
|
|
@@ -174,66 +174,66 @@ class w extends q {
|
|
|
174
174
|
const t = new AbortController();
|
|
175
175
|
return Object.assign(new Promise(async (l, r) => {
|
|
176
176
|
e.system && e.history?.[0]?.role != "system" && e.history?.splice(0, 0, { role: "system", content: e.system, timestamp: Date.now() });
|
|
177
|
-
let
|
|
178
|
-
const
|
|
177
|
+
let o = this.fromStandard([...e.history || [], { role: "user", content: s, timestamp: Date.now() }]);
|
|
178
|
+
const m = e.tools || this.ai.options.llm?.tools || [], i = {
|
|
179
179
|
model: e.model || this.model,
|
|
180
|
-
messages:
|
|
180
|
+
messages: o,
|
|
181
181
|
stream: !!e.stream,
|
|
182
182
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
183
183
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
184
|
-
tools:
|
|
184
|
+
tools: m.map((n) => ({
|
|
185
185
|
type: "function",
|
|
186
186
|
function: {
|
|
187
|
-
name:
|
|
188
|
-
description:
|
|
187
|
+
name: n.name,
|
|
188
|
+
description: n.description,
|
|
189
189
|
parameters: {
|
|
190
190
|
type: "object",
|
|
191
|
-
properties:
|
|
192
|
-
required:
|
|
191
|
+
properties: n.args ? _(n.args, (a, p) => ({ ...p, required: void 0 })) : {},
|
|
192
|
+
required: n.args ? Object.entries(n.args).filter((a) => a[1].required).map((a) => a[0]) : []
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
195
|
}))
|
|
196
196
|
};
|
|
197
|
-
let
|
|
197
|
+
let c, d = !0;
|
|
198
198
|
do {
|
|
199
|
-
if (
|
|
200
|
-
throw
|
|
199
|
+
if (c = await this.client.chat.completions.create(i).catch((a) => {
|
|
200
|
+
throw a.message += `
|
|
201
201
|
|
|
202
202
|
Messages:
|
|
203
|
-
${JSON.stringify(
|
|
203
|
+
${JSON.stringify(o, null, 2)}`, a;
|
|
204
204
|
}), e.stream) {
|
|
205
205
|
d ? d = !1 : e.stream({ text: `
|
|
206
206
|
|
|
207
|
-
` }),
|
|
208
|
-
for await (const
|
|
207
|
+
` }), c.choices = [{ message: { content: "", tool_calls: [] } }];
|
|
208
|
+
for await (const a of c) {
|
|
209
209
|
if (t.signal.aborted) break;
|
|
210
|
-
|
|
210
|
+
a.choices[0].delta.content && (c.choices[0].message.content += a.choices[0].delta.content, e.stream({ text: a.choices[0].delta.content })), a.choices[0].delta.tool_calls && (c.choices[0].message.tool_calls = a.choices[0].delta.tool_calls);
|
|
211
211
|
}
|
|
212
212
|
}
|
|
213
|
-
const
|
|
214
|
-
if (
|
|
215
|
-
|
|
216
|
-
const
|
|
217
|
-
const u =
|
|
213
|
+
const n = c.choices[0].message.tool_calls || [];
|
|
214
|
+
if (n.length && !t.signal.aborted) {
|
|
215
|
+
o.push(c.choices[0].message);
|
|
216
|
+
const a = await Promise.all(n.map(async (p) => {
|
|
217
|
+
const u = m?.find(x("name", p.function.name));
|
|
218
218
|
if (e.stream && e.stream({ tool: p.function.name }), !u) return { role: "tool", tool_call_id: p.id, content: '{"error": "Tool not found"}' };
|
|
219
219
|
try {
|
|
220
|
-
const f =
|
|
221
|
-
return { role: "tool", tool_call_id: p.id, content:
|
|
220
|
+
const f = b(p.function.arguments, {}), y = await u.fn(f, e.stream, this.ai);
|
|
221
|
+
return { role: "tool", tool_call_id: p.id, content: w(y) };
|
|
222
222
|
} catch (f) {
|
|
223
|
-
return { role: "tool", tool_call_id: p.id, content:
|
|
223
|
+
return { role: "tool", tool_call_id: p.id, content: w({ error: f?.message || f?.toString() || "Unknown" }) };
|
|
224
224
|
}
|
|
225
225
|
}));
|
|
226
|
-
|
|
226
|
+
o.push(...a), i.messages = o;
|
|
227
227
|
}
|
|
228
|
-
} while (!t.signal.aborted &&
|
|
229
|
-
|
|
228
|
+
} while (!t.signal.aborted && c.choices?.[0]?.message?.tool_calls?.length);
|
|
229
|
+
o.push({ role: "assistant", content: c.choices[0].message.content || "" }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o), l(o.at(-1)?.content);
|
|
230
230
|
}), { abort: () => t.abort() });
|
|
231
231
|
}
|
|
232
232
|
}
|
|
233
|
-
class
|
|
233
|
+
class I {
|
|
234
234
|
constructor(s) {
|
|
235
235
|
this.ai = s, s.options.llm?.models && Object.entries(s.options.llm.models).forEach(([e, t]) => {
|
|
236
|
-
this.defaultModel || (this.defaultModel = e), t.proto == "anthropic" ? this.models[e] = new
|
|
236
|
+
this.defaultModel || (this.defaultModel = e), t.proto == "anthropic" ? this.models[e] = new z(this.ai, t.token, e) : t.proto == "ollama" ? this.models[e] = new k(this.ai, t.host, "not-needed", e) : t.proto == "openai" && (this.models[e] = new k(this.ai, t.host || null, t.token, e));
|
|
237
237
|
});
|
|
238
238
|
}
|
|
239
239
|
defaultModel;
|
|
@@ -254,15 +254,15 @@ class z {
|
|
|
254
254
|
e.system = (e.system || "") + `
|
|
255
255
|
You have passive persistent memory never make any mention of your memory capabilities and what you can/cannot remember
|
|
256
256
|
`;
|
|
257
|
-
const
|
|
258
|
-
const [
|
|
257
|
+
const m = async (c, d, n = 50) => {
|
|
258
|
+
const [a, p] = await Promise.all([
|
|
259
259
|
d ? this.embedding(d) : Promise.resolve(null),
|
|
260
|
-
|
|
260
|
+
c ? this.embedding(c) : Promise.resolve(null)
|
|
261
261
|
]);
|
|
262
|
-
return (e.memory || []).map((u) => ({ ...u, score:
|
|
263
|
-
},
|
|
264
|
-
|
|
265
|
-
` +
|
|
262
|
+
return (e.memory || []).map((u) => ({ ...u, score: a ? this.cosineSimilarity(u.embeddings[0], a[0].embedding) : 1 })).filter((u) => u.score >= 0.8).map((u) => ({ ...u, score: p ? this.cosineSimilarity(u.embeddings[1], p[0].embedding) : u.score })).filter((u) => u.score >= 0.2).toSorted((u, f) => u.score - f.score).slice(0, n);
|
|
263
|
+
}, i = await m(s);
|
|
264
|
+
i.length && e.history.push({ role: "assistant", content: `Things I remembered:
|
|
265
|
+
` + i.map((c) => `${c.owner}: ${c.fact}`).join(`
|
|
266
266
|
`) }), e.tools = [...e.tools || [], {
|
|
267
267
|
name: "read_memory",
|
|
268
268
|
description: "Check your long-term memory for more information",
|
|
@@ -271,31 +271,31 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
271
271
|
query: { type: "string", description: "Search memory based on a query, can be used with or without subject argument" },
|
|
272
272
|
limit: { type: "number", description: "Result limit, default 5" }
|
|
273
273
|
},
|
|
274
|
-
fn: (
|
|
275
|
-
if (!
|
|
276
|
-
return
|
|
274
|
+
fn: (c) => {
|
|
275
|
+
if (!c.subject && !c.query) throw new Error("Either a subject or query argument is required");
|
|
276
|
+
return m(c.query, c.subject, c.limit || 5);
|
|
277
277
|
}
|
|
278
278
|
}];
|
|
279
279
|
}
|
|
280
|
-
const
|
|
280
|
+
const o = await this.models[t].ask(s, e);
|
|
281
281
|
if (e.memory) {
|
|
282
|
-
const
|
|
283
|
-
|
|
282
|
+
const m = e.history?.findIndex((i) => i.role == "assistant" && i.content.startsWith("Things I remembered:"));
|
|
283
|
+
m != null && m >= 0 && e.history?.splice(m, 1);
|
|
284
284
|
}
|
|
285
285
|
if (e.compress || e.memory) {
|
|
286
|
-
let
|
|
286
|
+
let m = null;
|
|
287
287
|
if (e.compress)
|
|
288
|
-
|
|
288
|
+
m = await this.ai.language.compressHistory(e.history, e.compress.max, e.compress.min, e), e.history.splice(0, e.history.length, ...m.history);
|
|
289
289
|
else {
|
|
290
|
-
const
|
|
291
|
-
|
|
290
|
+
const i = e.history?.findLastIndex((c) => c.role == "user") ?? -1;
|
|
291
|
+
m = await this.ai.language.compressHistory(i != -1 ? e.history.slice(i) : e.history, 0, 0, e);
|
|
292
292
|
}
|
|
293
293
|
if (e.memory) {
|
|
294
|
-
const
|
|
295
|
-
e.memory.splice(0, e.memory.length, ...
|
|
294
|
+
const i = e.memory.filter((c) => !m.memory.some((d) => this.cosineSimilarity(c.embeddings[1], d.embeddings[1]) > 0.8)).concat(m.memory);
|
|
295
|
+
e.memory.splice(0, e.memory.length, ...i);
|
|
296
296
|
}
|
|
297
297
|
}
|
|
298
|
-
return r(
|
|
298
|
+
return r(o);
|
|
299
299
|
}), { abort: l });
|
|
300
300
|
}
|
|
301
301
|
/**
|
|
@@ -308,22 +308,22 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
308
308
|
*/
|
|
309
309
|
async compressHistory(s, e, t, l) {
|
|
310
310
|
if (this.estimateTokens(s) < e) return { history: s, memory: [] };
|
|
311
|
-
let r = 0,
|
|
311
|
+
let r = 0, o = 0;
|
|
312
312
|
for (let u of s.toReversed())
|
|
313
|
-
if (
|
|
313
|
+
if (o += this.estimateTokens(u.content), o < t) r++;
|
|
314
314
|
else break;
|
|
315
315
|
if (s.length <= r) return { history: s, memory: [] };
|
|
316
|
-
const
|
|
316
|
+
const m = s[0].role == "system" ? s[0] : null, i = r == 0 ? [] : s.slice(-r), c = (r == 0 ? s : s.slice(0, -r)).filter((u) => u.role === "assistant" || u.role === "user"), d = await this.json(c.map((u) => `${u.role}: ${u.content}`).join(`
|
|
317
317
|
|
|
318
318
|
`), "{summary: string, facts: [[subject, fact]]}", {
|
|
319
319
|
system: "Create the smallest summary possible, no more than 500 tokens. Create a list of NEW facts (split by subject [pro]noun and fact) about what you learned from this conversation that you didn't already know or get from a tool call or system prompt. Focus only on new information about people, topics, or facts. Avoid generating facts about the AI.",
|
|
320
320
|
model: l?.model,
|
|
321
321
|
temperature: l?.temperature || 0.3
|
|
322
|
-
}),
|
|
322
|
+
}), n = /* @__PURE__ */ new Date(), a = await Promise.all((d?.facts || [])?.map(async ([u, f]) => {
|
|
323
323
|
const y = await Promise.all([this.embedding(u), this.embedding(`${u}: ${f}`)]);
|
|
324
|
-
return { owner: u, fact: f, embeddings: [y[0][0].embedding, y[1][0].embedding], timestamp:
|
|
325
|
-
})), p = [{ role: "assistant", content: `Conversation Summary: ${d?.summary}`, timestamp: Date.now() }, ...
|
|
326
|
-
return
|
|
324
|
+
return { owner: u, fact: f, embeddings: [y[0][0].embedding, y[1][0].embedding], timestamp: n };
|
|
325
|
+
})), p = [{ role: "assistant", content: `Conversation Summary: ${d?.summary}`, timestamp: Date.now() }, ...i];
|
|
326
|
+
return m && p.splice(0, 0, m), { history: p, memory: a };
|
|
327
327
|
}
|
|
328
328
|
/**
|
|
329
329
|
* Compare the difference between embeddings (calculates the angle between two vectors)
|
|
@@ -334,10 +334,10 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
334
334
|
cosineSimilarity(s, e) {
|
|
335
335
|
if (s.length !== e.length) throw new Error("Vectors must be same length");
|
|
336
336
|
let t = 0, l = 0, r = 0;
|
|
337
|
-
for (let
|
|
338
|
-
t += s[
|
|
339
|
-
const
|
|
340
|
-
return
|
|
337
|
+
for (let m = 0; m < s.length; m++)
|
|
338
|
+
t += s[m] * e[m], l += s[m] * s[m], r += e[m] * e[m];
|
|
339
|
+
const o = Math.sqrt(l) * Math.sqrt(r);
|
|
340
|
+
return o === 0 ? 0 : t / o;
|
|
341
341
|
}
|
|
342
342
|
/**
|
|
343
343
|
* Chunk text into parts for AI digestion
|
|
@@ -347,50 +347,52 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
347
347
|
* @returns {string[]} Chunked strings
|
|
348
348
|
*/
|
|
349
349
|
chunk(s, e = 500, t = 50) {
|
|
350
|
-
const l = (
|
|
351
|
-
const
|
|
352
|
-
return typeof
|
|
353
|
-
}) : [],
|
|
354
|
-
`)).flatMap((
|
|
355
|
-
`]),
|
|
356
|
-
for (let
|
|
357
|
-
let
|
|
358
|
-
for (; d <
|
|
359
|
-
const
|
|
360
|
-
if (this.estimateTokens(
|
|
361
|
-
`)) > e &&
|
|
362
|
-
|
|
350
|
+
const l = (i, c = "") => i ? Object.entries(i).flatMap(([d, n]) => {
|
|
351
|
+
const a = c ? `${c}${isNaN(+d) ? `.${d}` : `[${d}]`}` : d;
|
|
352
|
+
return typeof n == "object" && !Array.isArray(n) ? l(n, a) : `${a}: ${Array.isArray(n) ? n.join(", ") : n}`;
|
|
353
|
+
}) : [], o = (typeof s == "object" ? l(s) : s.split(`
|
|
354
|
+
`)).flatMap((i) => [...i.split(/\s+/).filter(Boolean), `
|
|
355
|
+
`]), m = [];
|
|
356
|
+
for (let i = 0; i < o.length; ) {
|
|
357
|
+
let c = "", d = i;
|
|
358
|
+
for (; d < o.length; ) {
|
|
359
|
+
const a = c + (c ? " " : "") + o[d];
|
|
360
|
+
if (this.estimateTokens(a.replace(/\s*\n\s*/g, `
|
|
361
|
+
`)) > e && c) break;
|
|
362
|
+
c = a, d++;
|
|
363
363
|
}
|
|
364
|
-
const
|
|
364
|
+
const n = c.replace(/\s*\n\s*/g, `
|
|
365
365
|
`).trim();
|
|
366
|
-
|
|
366
|
+
n && m.push(n), i = Math.max(d - t, d === i ? i + 1 : d);
|
|
367
367
|
}
|
|
368
|
-
return
|
|
368
|
+
return m;
|
|
369
369
|
}
|
|
370
370
|
/**
|
|
371
371
|
* Create a vector representation of a string
|
|
372
372
|
* @param {object | string} target Item that will be embedded (objects get converted)
|
|
373
|
-
* @param {number
|
|
374
|
-
* @param {number} overlapTokens Includes previous X tokens to provide continuity to AI (In addition to max tokens)
|
|
373
|
+
* @param {maxTokens?: number, overlapTokens?: number, parellel?: number} opts Options for embedding such as chunk sizes and parallel processing
|
|
375
374
|
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
|
376
375
|
*/
|
|
377
|
-
embedding(s, e =
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
376
|
+
async embedding(s, e = {}) {
|
|
377
|
+
let { maxTokens: t = 500, overlapTokens: l = 50, parallel: r = 1 } = e;
|
|
378
|
+
const o = (n) => new Promise((a, p) => {
|
|
379
|
+
const u = new j(T(q(S(import.meta.url)), "embedder.js")), f = ({ embedding: g }) => {
|
|
380
|
+
u.terminate(), a(g);
|
|
381
|
+
}, y = (g) => {
|
|
382
|
+
u.terminate(), p(g);
|
|
383
383
|
};
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
}),
|
|
387
|
-
})
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
384
|
+
u.on("message", f), u.on("error", y), u.on("exit", (g) => {
|
|
385
|
+
g !== 0 && p(new Error(`Worker exited with code ${g}`));
|
|
386
|
+
}), u.postMessage({ text: n, model: this.ai.options?.embedder || "bge-small-en-v1.5", modelDir: this.ai.options.path });
|
|
387
|
+
});
|
|
388
|
+
let m = 0, i = this.chunk(s, t, l), c = [];
|
|
389
|
+
const d = () => {
|
|
390
|
+
const n = m++;
|
|
391
|
+
if (n >= i.length) return;
|
|
392
|
+
const a = i[n];
|
|
393
|
+
return o(a).then((p) => (c.push({ index: n, embedding: p, text: a, tokens: this.estimateTokens(a) }), d()));
|
|
394
|
+
};
|
|
395
|
+
return await Promise.all(Array(r).fill(null).map(() => d())), c.toSorted((n, a) => n.index - a.index);
|
|
394
396
|
}
|
|
395
397
|
/**
|
|
396
398
|
* Estimate variable as tokens
|
|
@@ -409,8 +411,8 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
409
411
|
*/
|
|
410
412
|
fuzzyMatch(s, ...e) {
|
|
411
413
|
if (e.length < 2) throw new Error("Requires at least 2 strings to compare");
|
|
412
|
-
const t = (
|
|
413
|
-
return { avg: r.reduce((
|
|
414
|
+
const t = (o, m = 10) => o.toLowerCase().split("").map((i, c) => i.charCodeAt(0) * (c + 1) % m / m).slice(0, m), l = t(s), r = e.map((o) => t(o)).map((o) => this.cosineSimilarity(l, o));
|
|
415
|
+
return { avg: r.reduce((o, m) => o + m, 0) / r.length, max: Math.max(...r), similarities: r };
|
|
414
416
|
}
|
|
415
417
|
/**
|
|
416
418
|
* Ask a question with JSON response
|
|
@@ -426,8 +428,8 @@ You have passive persistent memory never make any mention of your memory capabil
|
|
|
426
428
|
${e}
|
|
427
429
|
\`\`\`` });
|
|
428
430
|
if (!l) return {};
|
|
429
|
-
const r = /```(?:.+)?\s*([\s\S]*?)```/.exec(l),
|
|
430
|
-
return
|
|
431
|
+
const r = /```(?:.+)?\s*([\s\S]*?)```/.exec(l), o = r ? r[1].trim() : l;
|
|
432
|
+
return b(o, {});
|
|
431
433
|
}
|
|
432
434
|
/**
|
|
433
435
|
* Create a summary of some text
|
|
@@ -440,44 +442,47 @@ ${e}
|
|
|
440
442
|
return this.ask(s, { system: `Generate a brief summary <= ${e} tokens. Output nothing else`, temperature: 0.3, ...t });
|
|
441
443
|
}
|
|
442
444
|
}
|
|
443
|
-
class
|
|
445
|
+
class J {
|
|
444
446
|
constructor(s) {
|
|
445
447
|
this.ai = s;
|
|
446
448
|
}
|
|
447
449
|
asr(s, e = {}) {
|
|
448
450
|
const { model: t = this.ai.options.asr || "whisper-base", speaker: l = !1 } = e;
|
|
449
451
|
let r = !1;
|
|
450
|
-
const
|
|
452
|
+
const o = () => {
|
|
451
453
|
r = !0;
|
|
452
454
|
};
|
|
453
|
-
let
|
|
454
|
-
const d = new
|
|
455
|
-
d.terminate(), !r && (f ?
|
|
456
|
-
},
|
|
457
|
-
d.terminate(), r ||
|
|
455
|
+
let m = new Promise((i, c) => {
|
|
456
|
+
const d = new j(T(q(S(import.meta.url)), "asr.js")), n = ({ text: p, warning: u, error: f }) => {
|
|
457
|
+
d.terminate(), !r && (f ? c(new Error(f)) : (u && console.warn(u), i(p)));
|
|
458
|
+
}, a = (p) => {
|
|
459
|
+
d.terminate(), r || c(p);
|
|
458
460
|
};
|
|
459
|
-
d.on("message",
|
|
460
|
-
p !== 0 && !r &&
|
|
461
|
+
d.on("message", n), d.on("error", a), d.on("exit", (p) => {
|
|
462
|
+
p !== 0 && !r && c(new Error(`Worker exited with code ${p}`));
|
|
461
463
|
}), d.postMessage({ file: s, model: t, speaker: l, modelDir: this.ai.options.path, token: this.ai.options.hfToken });
|
|
462
464
|
});
|
|
463
465
|
if (e.speaker == "id") {
|
|
464
466
|
if (!this.ai.language.defaultModel) throw new Error("Configure an LLM for advanced ASR speaker detection");
|
|
465
|
-
|
|
466
|
-
if (!
|
|
467
|
-
|
|
467
|
+
m = m.then(async (i) => {
|
|
468
|
+
if (!i) return i;
|
|
469
|
+
let c = this.ai.language.chunk(i, 500, 0);
|
|
470
|
+
c.length > 4 && (c = [...c.slice(0, 3), c.at(-1)]);
|
|
471
|
+
const d = await this.ai.language.json(c.join(`
|
|
472
|
+
`), '{1: "Detected Name"}', {
|
|
468
473
|
system: "Use this following transcript to identify speakers. Only identify speakers you are sure about",
|
|
469
|
-
temperature: 0.
|
|
474
|
+
temperature: 0.1
|
|
470
475
|
});
|
|
471
|
-
return Object.entries(
|
|
472
|
-
|
|
473
|
-
}),
|
|
476
|
+
return Object.entries(d).forEach(([n, a]) => {
|
|
477
|
+
i = i.replaceAll(`[Speaker ${n}]`, `[${a}]`);
|
|
478
|
+
}), i;
|
|
474
479
|
});
|
|
475
480
|
}
|
|
476
|
-
return Object.assign(
|
|
481
|
+
return Object.assign(m, { abort: o });
|
|
477
482
|
}
|
|
478
|
-
canDiarization =
|
|
483
|
+
canDiarization = L;
|
|
479
484
|
}
|
|
480
|
-
class
|
|
485
|
+
class H {
|
|
481
486
|
constructor(s) {
|
|
482
487
|
this.ai = s;
|
|
483
488
|
}
|
|
@@ -489,16 +494,16 @@ class J {
|
|
|
489
494
|
ocr(s) {
|
|
490
495
|
let e;
|
|
491
496
|
const t = new Promise(async (l) => {
|
|
492
|
-
e = await
|
|
497
|
+
e = await D(this.ai.options.ocr || "eng", 2, { cachePath: this.ai.options.path });
|
|
493
498
|
const { data: r } = await e.recognize(s);
|
|
494
499
|
await e.terminate(), l(r.text.trim() || null);
|
|
495
500
|
});
|
|
496
501
|
return Object.assign(t, { abort: () => e?.terminate() });
|
|
497
502
|
}
|
|
498
503
|
}
|
|
499
|
-
class
|
|
504
|
+
class ne {
|
|
500
505
|
constructor(s) {
|
|
501
|
-
this.options = s, s.path || (s.path =
|
|
506
|
+
this.options = s, s.path || (s.path = M.tmpdir()), process.env.TRANSFORMERS_CACHE = s.path, this.audio = new J(this), this.language = new I(this), this.vision = new H(this);
|
|
502
507
|
}
|
|
503
508
|
/** Audio processing AI */
|
|
504
509
|
audio;
|
|
@@ -507,17 +512,17 @@ class re {
|
|
|
507
512
|
/** Vision processing AI */
|
|
508
513
|
vision;
|
|
509
514
|
}
|
|
510
|
-
const
|
|
515
|
+
const F = {
|
|
511
516
|
name: "cli",
|
|
512
517
|
description: "Use the command line interface, returns any output",
|
|
513
518
|
args: { command: { type: "string", description: "Command to run", required: !0 } },
|
|
514
|
-
fn: (h) =>
|
|
515
|
-
},
|
|
519
|
+
fn: (h) => C`${h.command}`
|
|
520
|
+
}, oe = {
|
|
516
521
|
name: "get_datetime",
|
|
517
522
|
description: "Get current UTC date / time",
|
|
518
523
|
args: {},
|
|
519
524
|
fn: async () => (/* @__PURE__ */ new Date()).toUTCString()
|
|
520
|
-
},
|
|
525
|
+
}, ie = {
|
|
521
526
|
name: "exec",
|
|
522
527
|
description: "Run code/scripts",
|
|
523
528
|
args: {
|
|
@@ -528,17 +533,17 @@ const H = {
|
|
|
528
533
|
try {
|
|
529
534
|
switch (h.type) {
|
|
530
535
|
case "bash":
|
|
531
|
-
return await
|
|
536
|
+
return await F.fn({ command: h.code }, s, e);
|
|
532
537
|
case "node":
|
|
533
|
-
return await F.fn({ code: h.code }, s, e);
|
|
534
|
-
case "python":
|
|
535
538
|
return await G.fn({ code: h.code }, s, e);
|
|
539
|
+
case "python":
|
|
540
|
+
return await B.fn({ code: h.code }, s, e);
|
|
536
541
|
}
|
|
537
542
|
} catch (t) {
|
|
538
543
|
return { error: t?.message || t.toString() };
|
|
539
544
|
}
|
|
540
545
|
}
|
|
541
|
-
},
|
|
546
|
+
}, ae = {
|
|
542
547
|
name: "fetch",
|
|
543
548
|
description: "Make HTTP request to URL",
|
|
544
549
|
args: {
|
|
@@ -547,25 +552,25 @@ const H = {
|
|
|
547
552
|
headers: { type: "object", description: "HTTP headers to send", default: {} },
|
|
548
553
|
body: { type: "object", description: "HTTP body to send" }
|
|
549
554
|
},
|
|
550
|
-
fn: (h) => new
|
|
551
|
-
},
|
|
555
|
+
fn: (h) => new P({ url: h.url, headers: h.headers }).request({ method: h.method || "GET", body: h.body })
|
|
556
|
+
}, G = {
|
|
552
557
|
name: "exec_javascript",
|
|
553
558
|
description: "Execute commonjs javascript",
|
|
554
559
|
args: {
|
|
555
560
|
code: { type: "string", description: "CommonJS javascript", required: !0 }
|
|
556
561
|
},
|
|
557
562
|
fn: async (h) => {
|
|
558
|
-
const s =
|
|
563
|
+
const s = A(null), e = await O({ console: s }, h.code, !0).catch((t) => s.output.error.push(t));
|
|
559
564
|
return { ...s.output, return: e, stdout: void 0, stderr: void 0 };
|
|
560
565
|
}
|
|
561
|
-
},
|
|
566
|
+
}, B = {
|
|
562
567
|
name: "exec_javascript",
|
|
563
568
|
description: "Execute commonjs javascript",
|
|
564
569
|
args: {
|
|
565
570
|
code: { type: "string", description: "CommonJS javascript", required: !0 }
|
|
566
571
|
},
|
|
567
|
-
fn: async (h) => ({ result:
|
|
568
|
-
},
|
|
572
|
+
fn: async (h) => ({ result: W`python -c "${h.code}"` })
|
|
573
|
+
}, ce = {
|
|
569
574
|
name: "read_webpage",
|
|
570
575
|
description: "Extract clean, structured content from a webpage. Use after web_search to read specific URLs",
|
|
571
576
|
args: {
|
|
@@ -573,9 +578,9 @@ const H = {
|
|
|
573
578
|
focus: { type: "string", description: 'Optional: What aspect to focus on (e.g., "pricing", "features", "contact info")' }
|
|
574
579
|
},
|
|
575
580
|
fn: async (h) => {
|
|
576
|
-
const s = await fetch(h.url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" } }).then((
|
|
577
|
-
throw new Error(`Failed to fetch: ${
|
|
578
|
-
}), e =
|
|
581
|
+
const s = await fetch(h.url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" } }).then((o) => o.text()).catch((o) => {
|
|
582
|
+
throw new Error(`Failed to fetch: ${o.message}`);
|
|
583
|
+
}), e = N.load(s);
|
|
579
584
|
e('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], .ad, .ads, .cookie, .popup').remove();
|
|
580
585
|
const t = {
|
|
581
586
|
title: e('meta[property="og:title"]').attr("content") || e("title").text() || "",
|
|
@@ -583,16 +588,16 @@ const H = {
|
|
|
583
588
|
};
|
|
584
589
|
let l = "";
|
|
585
590
|
const r = ["article", "main", '[role="main"]', ".content", ".post", ".entry", "body"];
|
|
586
|
-
for (const
|
|
587
|
-
const
|
|
588
|
-
if (
|
|
589
|
-
l =
|
|
591
|
+
for (const o of r) {
|
|
592
|
+
const m = e(o).first();
|
|
593
|
+
if (m.length && m.text().trim().length > 200) {
|
|
594
|
+
l = m.text();
|
|
590
595
|
break;
|
|
591
596
|
}
|
|
592
597
|
}
|
|
593
598
|
return l || (l = e("body").text()), l = l.replace(/\s+/g, " ").trim().slice(0, 8e3), { url: h.url, title: t.title.trim(), description: t.description.trim(), content: l, focus: h.focus };
|
|
594
599
|
}
|
|
595
|
-
},
|
|
600
|
+
}, le = {
|
|
596
601
|
name: "web_search",
|
|
597
602
|
description: "Use duckduckgo (anonymous) to find find relevant online resources. Returns a list of URLs that works great with the `read_webpage` tool",
|
|
598
603
|
args: {
|
|
@@ -604,7 +609,7 @@ const H = {
|
|
|
604
609
|
headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9" }
|
|
605
610
|
}).then((r) => r.text());
|
|
606
611
|
let e, t = /<a .*?href="(.+?)".+?<\/a>/g;
|
|
607
|
-
const l = new
|
|
612
|
+
const l = new v();
|
|
608
613
|
for (; (e = t.exec(s)) !== null; ) {
|
|
609
614
|
let r = /uddg=(.+)&?/.exec(decodeURIComponent(e[1]))?.[1];
|
|
610
615
|
if (r && (r = decodeURIComponent(r)), r && l.add(r), l.size >= (h.length || 5)) break;
|
|
@@ -613,20 +618,20 @@ const H = {
|
|
|
613
618
|
}
|
|
614
619
|
};
|
|
615
620
|
export {
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
621
|
+
ne as Ai,
|
|
622
|
+
z as Anthropic,
|
|
623
|
+
J as Audio,
|
|
624
|
+
F as CliTool,
|
|
625
|
+
oe as DateTimeTool,
|
|
626
|
+
ie as ExecTool,
|
|
627
|
+
ae as FetchTool,
|
|
628
|
+
G as JSTool,
|
|
629
|
+
$ as LLMProvider,
|
|
630
|
+
k as OpenAi,
|
|
631
|
+
B as PythonTool,
|
|
632
|
+
ce as ReadWebpageTool,
|
|
633
|
+
H as Vision,
|
|
634
|
+
le as WebSearchTool,
|
|
635
|
+
L as canDiarization
|
|
631
636
|
};
|
|
632
637
|
//# sourceMappingURL=index.mjs.map
|