@ztimson/ai-utils 0.8.13 → 0.8.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +29 -25
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +265 -257
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import * as O from "node:os";
|
|
2
|
-
import { tmpdir as
|
|
3
|
-
import { Anthropic as
|
|
4
|
-
import { objectMap as z, JSONAttemptParse as S, findByProp as R, JSONSanitize as _, clean as
|
|
5
|
-
import { OpenAI as
|
|
6
|
-
import { fileURLToPath as
|
|
2
|
+
import { tmpdir as N } from "node:os";
|
|
3
|
+
import { Anthropic as U } from "@anthropic-ai/sdk";
|
|
4
|
+
import { objectMap as z, JSONAttemptParse as S, findByProp as R, JSONSanitize as _, clean as W, Http as C, consoleInterceptor as J, fn as I, ASet as F } from "@ztimson/utils";
|
|
5
|
+
import { OpenAI as B } from "openai";
|
|
6
|
+
import { fileURLToPath as D } from "url";
|
|
7
7
|
import { join as H, dirname as G } from "path";
|
|
8
8
|
import { spawn as w, execSync as K } from "node:child_process";
|
|
9
9
|
import { mkdtempSync as Y } from "node:fs";
|
|
@@ -17,24 +17,24 @@ class L {
|
|
|
17
17
|
}
|
|
18
18
|
class Q extends L {
|
|
19
19
|
constructor(r, e, t) {
|
|
20
|
-
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new
|
|
20
|
+
super(), this.ai = r, this.apiToken = e, this.model = t, this.client = new U({ apiKey: e });
|
|
21
21
|
}
|
|
22
22
|
client;
|
|
23
23
|
toStandard(r) {
|
|
24
24
|
const e = Date.now(), t = [];
|
|
25
|
-
for (let
|
|
26
|
-
if (typeof
|
|
27
|
-
t.push({ timestamp: e, ...
|
|
25
|
+
for (let a of r)
|
|
26
|
+
if (typeof a.content == "string")
|
|
27
|
+
t.push({ timestamp: e, ...a });
|
|
28
28
|
else {
|
|
29
|
-
const o =
|
|
29
|
+
const o = a.content?.filter((n) => n.type == "text").map((n) => n.text).join(`
|
|
30
30
|
|
|
31
31
|
`);
|
|
32
|
-
o && t.push({ timestamp: e, role:
|
|
32
|
+
o && t.push({ timestamp: e, role: a.role, content: o }), a.content.forEach((n) => {
|
|
33
33
|
if (n.type == "tool_use")
|
|
34
34
|
t.push({ timestamp: e, role: "tool", id: n.id, name: n.name, args: n.input, content: void 0 });
|
|
35
35
|
else if (n.type == "tool_result") {
|
|
36
|
-
const
|
|
37
|
-
|
|
36
|
+
const u = t.findLast((m) => m.id == n.tool_use_id);
|
|
37
|
+
u && (u[n.is_error ? "error" : "content"] = n.content);
|
|
38
38
|
}
|
|
39
39
|
});
|
|
40
40
|
}
|
|
@@ -55,78 +55,78 @@ class Q extends L {
|
|
|
55
55
|
}
|
|
56
56
|
ask(r, e = {}) {
|
|
57
57
|
const t = new AbortController();
|
|
58
|
-
return Object.assign(new Promise(async (
|
|
58
|
+
return Object.assign(new Promise(async (a) => {
|
|
59
59
|
let o = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
60
|
-
const n = e.tools || this.ai.options.llm?.tools || [],
|
|
60
|
+
const n = e.tools || this.ai.options.llm?.tools || [], u = {
|
|
61
61
|
model: e.model || this.model,
|
|
62
62
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
63
63
|
system: e.system || this.ai.options.llm?.system || "",
|
|
64
64
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
65
|
-
tools: n.map((
|
|
66
|
-
name:
|
|
67
|
-
description:
|
|
65
|
+
tools: n.map((c) => ({
|
|
66
|
+
name: c.name,
|
|
67
|
+
description: c.description,
|
|
68
68
|
input_schema: {
|
|
69
69
|
type: "object",
|
|
70
|
-
properties:
|
|
71
|
-
required:
|
|
70
|
+
properties: c.args ? z(c.args, (s, l) => ({ ...l, required: void 0 })) : {},
|
|
71
|
+
required: c.args ? Object.entries(c.args).filter((s) => s[1].required).map((s) => s[0]) : []
|
|
72
72
|
},
|
|
73
73
|
fn: void 0
|
|
74
74
|
})),
|
|
75
75
|
messages: o,
|
|
76
76
|
stream: !!e.stream
|
|
77
77
|
};
|
|
78
|
-
let
|
|
78
|
+
let m, i = !0;
|
|
79
79
|
do {
|
|
80
|
-
if (
|
|
80
|
+
if (m = await this.client.messages.create(u).catch((s) => {
|
|
81
81
|
throw s.message += `
|
|
82
82
|
|
|
83
83
|
Messages:
|
|
84
84
|
${JSON.stringify(o, null, 2)}`, s;
|
|
85
85
|
}), e.stream) {
|
|
86
|
-
|
|
86
|
+
i ? i = !1 : e.stream({ text: `
|
|
87
87
|
|
|
88
|
-
` }),
|
|
89
|
-
for await (const s of
|
|
88
|
+
` }), m.content = [];
|
|
89
|
+
for await (const s of m) {
|
|
90
90
|
if (t.signal.aborted) break;
|
|
91
91
|
if (s.type === "content_block_start")
|
|
92
|
-
s.content_block.type === "text" ?
|
|
92
|
+
s.content_block.type === "text" ? m.content.push({ type: "text", text: "" }) : s.content_block.type === "tool_use" && m.content.push({ type: "tool_use", id: s.content_block.id, name: s.content_block.name, input: "" });
|
|
93
93
|
else if (s.type === "content_block_delta")
|
|
94
94
|
if (s.delta.type === "text_delta") {
|
|
95
|
-
const
|
|
96
|
-
|
|
97
|
-
} else s.delta.type === "input_json_delta" && (
|
|
95
|
+
const l = s.delta.text;
|
|
96
|
+
m.content.at(-1).text += l, e.stream({ text: l });
|
|
97
|
+
} else s.delta.type === "input_json_delta" && (m.content.at(-1).input += s.delta.partial_json);
|
|
98
98
|
else if (s.type === "content_block_stop") {
|
|
99
|
-
const
|
|
100
|
-
|
|
99
|
+
const l = m.content.at(-1);
|
|
100
|
+
l.input != null && (l.input = l.input ? S(l.input, {}) : {});
|
|
101
101
|
} else if (s.type === "message_stop")
|
|
102
102
|
break;
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
|
-
const
|
|
106
|
-
if (
|
|
107
|
-
o.push({ role: "assistant", content:
|
|
108
|
-
const s = await Promise.all(
|
|
109
|
-
const d = n.find(R("name",
|
|
110
|
-
if (e.stream && e.stream({ tool:
|
|
105
|
+
const c = m.content.filter((s) => s.type === "tool_use");
|
|
106
|
+
if (c.length && !t.signal.aborted) {
|
|
107
|
+
o.push({ role: "assistant", content: m.content });
|
|
108
|
+
const s = await Promise.all(c.map(async (l) => {
|
|
109
|
+
const d = n.find(R("name", l.name));
|
|
110
|
+
if (e.stream && e.stream({ tool: l.name }), !d) return { tool_use_id: l.id, is_error: !0, content: "Tool not found" };
|
|
111
111
|
try {
|
|
112
|
-
const p = await d.fn(
|
|
113
|
-
return { type: "tool_result", tool_use_id:
|
|
112
|
+
const p = await d.fn(l.input, e?.stream, this.ai);
|
|
113
|
+
return { type: "tool_result", tool_use_id: l.id, content: _(p) };
|
|
114
114
|
} catch (p) {
|
|
115
|
-
return { type: "tool_result", tool_use_id:
|
|
115
|
+
return { type: "tool_result", tool_use_id: l.id, is_error: !0, content: p?.message || p?.toString() || "Unknown" };
|
|
116
116
|
}
|
|
117
117
|
}));
|
|
118
|
-
o.push({ role: "user", content: s }),
|
|
118
|
+
o.push({ role: "user", content: s }), u.messages = o;
|
|
119
119
|
}
|
|
120
|
-
} while (!t.signal.aborted &&
|
|
121
|
-
o.push({ role: "assistant", content:
|
|
120
|
+
} while (!t.signal.aborted && m.content.some((c) => c.type === "tool_use"));
|
|
121
|
+
o.push({ role: "assistant", content: m.content.filter((c) => c.type == "text").map((c) => c.text).join(`
|
|
122
122
|
|
|
123
|
-
`) }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o),
|
|
123
|
+
`) }), o = this.toStandard(o), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...o), a(o.at(-1)?.content);
|
|
124
124
|
}), { abort: () => t.abort() });
|
|
125
125
|
}
|
|
126
126
|
}
|
|
127
127
|
class P extends L {
|
|
128
|
-
constructor(r, e, t,
|
|
129
|
-
super(), this.ai = r, this.host = e, this.token = t, this.model =
|
|
128
|
+
constructor(r, e, t, a) {
|
|
129
|
+
super(), this.ai = r, this.host = e, this.token = t, this.model = a, this.client = new B(W({
|
|
130
130
|
baseURL: e,
|
|
131
131
|
apiKey: t || e ? "ignored" : void 0
|
|
132
132
|
}));
|
|
@@ -136,17 +136,17 @@ class P extends L {
|
|
|
136
136
|
for (let e = 0; e < r.length; e++) {
|
|
137
137
|
const t = r[e];
|
|
138
138
|
if (t.role === "assistant" && t.tool_calls) {
|
|
139
|
-
const
|
|
139
|
+
const a = t.tool_calls.map((o) => ({
|
|
140
140
|
role: "tool",
|
|
141
141
|
id: o.id,
|
|
142
142
|
name: o.function.name,
|
|
143
143
|
args: S(o.function.arguments, {}),
|
|
144
144
|
timestamp: t.timestamp
|
|
145
145
|
}));
|
|
146
|
-
r.splice(e, 1, ...
|
|
146
|
+
r.splice(e, 1, ...a), e += a.length - 1;
|
|
147
147
|
} else if (t.role === "tool" && t.content) {
|
|
148
|
-
const
|
|
149
|
-
|
|
148
|
+
const a = r.find((o) => t.tool_call_id == o.id);
|
|
149
|
+
a && (t.content.includes('"error":') ? a.error = t.content : a.content = t.content), r.splice(e, 1), e--;
|
|
150
150
|
}
|
|
151
151
|
r[e]?.timestamp || (r[e].timestamp = Date.now());
|
|
152
152
|
}
|
|
@@ -167,7 +167,7 @@ class P extends L {
|
|
|
167
167
|
content: t.error || t.content
|
|
168
168
|
});
|
|
169
169
|
else {
|
|
170
|
-
const { timestamp:
|
|
170
|
+
const { timestamp: a, ...o } = t;
|
|
171
171
|
e.push(o);
|
|
172
172
|
}
|
|
173
173
|
return e;
|
|
@@ -175,45 +175,45 @@ class P extends L {
|
|
|
175
175
|
}
|
|
176
176
|
ask(r, e = {}) {
|
|
177
177
|
const t = new AbortController();
|
|
178
|
-
return Object.assign(new Promise(async (
|
|
178
|
+
return Object.assign(new Promise(async (a, o) => {
|
|
179
179
|
e.system && (e.history?.[0]?.role != "system" ? e.history?.splice(0, 0, { role: "system", content: e.system, timestamp: Date.now() }) : e.history[0].content = e.system);
|
|
180
180
|
let n = this.fromStandard([...e.history || [], { role: "user", content: r, timestamp: Date.now() }]);
|
|
181
|
-
const
|
|
181
|
+
const u = e.tools || this.ai.options.llm?.tools || [], m = {
|
|
182
182
|
model: e.model || this.model,
|
|
183
183
|
messages: n,
|
|
184
184
|
stream: !!e.stream,
|
|
185
185
|
max_tokens: e.max_tokens || this.ai.options.llm?.max_tokens || 4096,
|
|
186
186
|
temperature: e.temperature || this.ai.options.llm?.temperature || 0.7,
|
|
187
|
-
tools:
|
|
187
|
+
tools: u.map((s) => ({
|
|
188
188
|
type: "function",
|
|
189
189
|
function: {
|
|
190
190
|
name: s.name,
|
|
191
191
|
description: s.description,
|
|
192
192
|
parameters: {
|
|
193
193
|
type: "object",
|
|
194
|
-
properties: s.args ? z(s.args, (
|
|
195
|
-
required: s.args ? Object.entries(s.args).filter((
|
|
194
|
+
properties: s.args ? z(s.args, (l, d) => ({ ...d, required: void 0 })) : {},
|
|
195
|
+
required: s.args ? Object.entries(s.args).filter((l) => l[1].required).map((l) => l[0]) : []
|
|
196
196
|
}
|
|
197
197
|
}
|
|
198
198
|
}))
|
|
199
199
|
};
|
|
200
|
-
let
|
|
200
|
+
let i, c = !0;
|
|
201
201
|
do {
|
|
202
|
-
if (
|
|
203
|
-
throw
|
|
202
|
+
if (i = await this.client.chat.completions.create(m).catch((l) => {
|
|
203
|
+
throw l.message += `
|
|
204
204
|
|
|
205
205
|
Messages:
|
|
206
|
-
${JSON.stringify(n, null, 2)}`,
|
|
206
|
+
${JSON.stringify(n, null, 2)}`, l;
|
|
207
207
|
}), e.stream) {
|
|
208
|
-
|
|
208
|
+
c ? c = !1 : e.stream({ text: `
|
|
209
209
|
|
|
210
|
-
` }),
|
|
211
|
-
for await (const
|
|
210
|
+
` }), i.choices = [{ message: { role: "assistant", content: "", tool_calls: [] } }];
|
|
211
|
+
for await (const l of i) {
|
|
212
212
|
if (t.signal.aborted) break;
|
|
213
|
-
if (
|
|
214
|
-
for (const d of
|
|
215
|
-
const p =
|
|
216
|
-
p ? (d.id && (p.id = d.id), d.type && (p.type = d.type), d.function && (p.function || (p.function = {}), d.function.name && (p.function.name = d.function.name), d.function.arguments && (p.function.arguments = (p.function.arguments || "") + d.function.arguments))) :
|
|
213
|
+
if (l.choices[0].delta.content && (i.choices[0].message.content += l.choices[0].delta.content, e.stream({ text: l.choices[0].delta.content })), l.choices[0].delta.tool_calls)
|
|
214
|
+
for (const d of l.choices[0].delta.tool_calls) {
|
|
215
|
+
const p = i.choices[0].message.tool_calls.find((f) => f.index === d.index);
|
|
216
|
+
p ? (d.id && (p.id = d.id), d.type && (p.type = d.type), d.function && (p.function || (p.function = {}), d.function.name && (p.function.name = d.function.name), d.function.arguments && (p.function.arguments = (p.function.arguments || "") + d.function.arguments))) : i.choices[0].message.tool_calls.push({
|
|
217
217
|
index: d.index,
|
|
218
218
|
id: d.id || "",
|
|
219
219
|
type: d.type || "function",
|
|
@@ -225,11 +225,11 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
225
225
|
}
|
|
226
226
|
}
|
|
227
227
|
}
|
|
228
|
-
const s =
|
|
228
|
+
const s = i.choices[0].message.tool_calls || [];
|
|
229
229
|
if (s.length && !t.signal.aborted) {
|
|
230
|
-
n.push(
|
|
231
|
-
const
|
|
232
|
-
const p =
|
|
230
|
+
n.push(i.choices[0].message);
|
|
231
|
+
const l = await Promise.all(s.map(async (d) => {
|
|
232
|
+
const p = u?.find(R("name", d.function.name));
|
|
233
233
|
if (e.stream && e.stream({ tool: d.function.name }), !p) return { role: "tool", tool_call_id: d.id, content: '{"error": "Tool not found"}' };
|
|
234
234
|
try {
|
|
235
235
|
const f = S(d.function.arguments, {}), g = await p.fn(f, e.stream, this.ai);
|
|
@@ -238,10 +238,10 @@ ${JSON.stringify(n, null, 2)}`, c;
|
|
|
238
238
|
return { role: "tool", tool_call_id: d.id, content: _({ error: f?.message || f?.toString() || "Unknown" }) };
|
|
239
239
|
}
|
|
240
240
|
}));
|
|
241
|
-
n.push(...
|
|
241
|
+
n.push(...l), m.messages = n;
|
|
242
242
|
}
|
|
243
|
-
} while (!t.signal.aborted &&
|
|
244
|
-
n.push({ role: "assistant", content:
|
|
243
|
+
} while (!t.signal.aborted && i.choices?.[0]?.message?.tool_calls?.length);
|
|
244
|
+
n.push({ role: "assistant", content: i.choices[0].message.content || "" }), n = this.toStandard(n), e.stream && e.stream({ done: !0 }), e.history && e.history.splice(0, e.history.length, ...n), a(n.at(-1)?.content);
|
|
245
245
|
}), { abort: () => t.abort() });
|
|
246
246
|
}
|
|
247
247
|
}
|
|
@@ -270,17 +270,17 @@ class X {
|
|
|
270
270
|
};
|
|
271
271
|
const t = e.model || this.defaultModel;
|
|
272
272
|
if (!this.models[t]) throw new Error(`Model does not exist: ${t}`);
|
|
273
|
-
let
|
|
273
|
+
let a = () => {
|
|
274
274
|
};
|
|
275
275
|
return Object.assign(new Promise(async (o) => {
|
|
276
276
|
if (e.history || (e.history = []), e.memory) {
|
|
277
|
-
const
|
|
278
|
-
const [
|
|
279
|
-
|
|
280
|
-
|
|
277
|
+
const u = async (i, c, s = 10) => {
|
|
278
|
+
const [l, d] = await Promise.all([
|
|
279
|
+
c ? this.embedding(c) : Promise.resolve(null),
|
|
280
|
+
i ? this.embedding(i) : Promise.resolve(null)
|
|
281
281
|
]);
|
|
282
282
|
return (e.memory || []).map((p) => {
|
|
283
|
-
const f = (
|
|
283
|
+
const f = (l ? this.cosineSimilarity(p.embeddings[0], l[0].embedding) : 0) + (d ? this.cosineSimilarity(p.embeddings[1], d[0].embedding) : 0);
|
|
284
284
|
return { ...p, score: f };
|
|
285
285
|
}).toSorted((p, f) => p.score - f.score).slice(0, s).map((p) => `- ${p.owner}: ${p.fact}`).join(`
|
|
286
286
|
`);
|
|
@@ -288,9 +288,9 @@ class X {
|
|
|
288
288
|
e.system += `
|
|
289
289
|
You have RAG memory and will be given the top_k closest memories regarding the users query. Save anything new you have learned worth remembering from the user message using the remember tool and feel free to recall memories manually.
|
|
290
290
|
`;
|
|
291
|
-
const
|
|
292
|
-
|
|
293
|
-
${
|
|
291
|
+
const m = await u(r);
|
|
292
|
+
m.length && e.history.push({ role: "tool", name: "recall", id: "auto_recall_" + Math.random().toString(), args: {}, content: `Things I remembered:
|
|
293
|
+
${m}` }), e.tools = [{
|
|
294
294
|
name: "recall",
|
|
295
295
|
description: "Recall the closest memories you have regarding a query using RAG",
|
|
296
296
|
args: {
|
|
@@ -298,9 +298,9 @@ ${l}` }), e.tools = [{
|
|
|
298
298
|
query: { type: "string", description: "Search memory based on a query, can be used with or without subject argument" },
|
|
299
299
|
topK: { type: "number", description: "Result limit, default 5" }
|
|
300
300
|
},
|
|
301
|
-
fn: (
|
|
302
|
-
if (!
|
|
303
|
-
return
|
|
301
|
+
fn: (i) => {
|
|
302
|
+
if (!i.subject && !i.query) throw new Error("Either a subject or query argument is required");
|
|
303
|
+
return u(i.query, i.subject, i.topK);
|
|
304
304
|
}
|
|
305
305
|
}, {
|
|
306
306
|
name: "remember",
|
|
@@ -309,31 +309,31 @@ ${l}` }), e.tools = [{
|
|
|
309
309
|
owner: { type: "string", description: "Subject/person this fact is about" },
|
|
310
310
|
fact: { type: "string", description: "The information to remember" }
|
|
311
311
|
},
|
|
312
|
-
fn: async (
|
|
312
|
+
fn: async (i) => {
|
|
313
313
|
if (!e.memory) return;
|
|
314
|
-
const
|
|
315
|
-
this.embedding(
|
|
316
|
-
this.embedding(`${
|
|
317
|
-
]), s = { owner:
|
|
318
|
-
return e.memory.splice(0, e.memory.length, ...e.memory.filter((
|
|
314
|
+
const c = await Promise.all([
|
|
315
|
+
this.embedding(i.owner),
|
|
316
|
+
this.embedding(`${i.owner}: ${i.fact}`)
|
|
317
|
+
]), s = { owner: i.owner, fact: i.fact, embeddings: [c[0][0].embedding, c[1][0].embedding] };
|
|
318
|
+
return e.memory.splice(0, e.memory.length, ...e.memory.filter((l) => !(this.cosineSimilarity(s.embeddings[0], l.embeddings[0]) >= 0.9 && this.cosineSimilarity(s.embeddings[1], l.embeddings[1]) >= 0.8)), s), "Remembered!";
|
|
319
319
|
}
|
|
320
320
|
}, ...e.tools || []];
|
|
321
321
|
}
|
|
322
322
|
const n = await this.models[t].ask(r, e);
|
|
323
|
-
if (e.memory && e.history.splice(0, e.history.length, ...e.history.filter((
|
|
324
|
-
const
|
|
325
|
-
e.history.splice(0, e.history.length, ...
|
|
323
|
+
if (e.memory && e.history.splice(0, e.history.length, ...e.history.filter((u) => u.role != "tool" || u.name != "recall" && u.name != "remember")), e.compress) {
|
|
324
|
+
const u = await this.ai.language.compressHistory(e.history, e.compress.max, e.compress.min, e);
|
|
325
|
+
e.history.splice(0, e.history.length, ...u);
|
|
326
326
|
}
|
|
327
327
|
return o(n);
|
|
328
|
-
}), { abort:
|
|
328
|
+
}), { abort: a });
|
|
329
329
|
}
|
|
330
330
|
async code(r, e) {
|
|
331
331
|
const t = await this.ask(r, { ...e, system: [
|
|
332
332
|
e?.system,
|
|
333
333
|
"Return your response in a code block"
|
|
334
334
|
].filter((o) => !!o).join(`
|
|
335
|
-
`) }),
|
|
336
|
-
return
|
|
335
|
+
`) }), a = /```(?:.+)?\s*([\s\S]*?)```/.exec(t);
|
|
336
|
+
return a ? a[1].trim() : null;
|
|
337
337
|
}
|
|
338
338
|
/**
|
|
339
339
|
* Compress chat history to reduce context size
|
|
@@ -343,17 +343,17 @@ ${l}` }), e.tools = [{
|
|
|
343
343
|
* @param {LLMRequest} options LLM options
|
|
344
344
|
* @returns {Promise<LLMMessage[]>} New chat history will summary at index 0
|
|
345
345
|
*/
|
|
346
|
-
async compressHistory(r, e, t,
|
|
346
|
+
async compressHistory(r, e, t, a) {
|
|
347
347
|
if (this.estimateTokens(r) < e) return r;
|
|
348
348
|
let o = 0, n = 0;
|
|
349
349
|
for (let d of r.toReversed())
|
|
350
350
|
if (n += this.estimateTokens(d.content), n < t) o++;
|
|
351
351
|
else break;
|
|
352
352
|
if (r.length <= o) return r;
|
|
353
|
-
const
|
|
353
|
+
const u = r[0].role == "system" ? r[0] : null, m = o == 0 ? [] : r.slice(-o), i = (o == 0 ? r : r.slice(0, -o)).filter((d) => d.role === "assistant" || d.role === "user"), c = await this.summarize(i.map((d) => `[${d.role}]: ${d.content}`).join(`
|
|
354
354
|
|
|
355
|
-
`), 500,
|
|
356
|
-
return
|
|
355
|
+
`), 500, a), s = Date.now(), l = [{ role: "tool", name: "summary", id: "summary_" + s, args: {}, content: `Conversation Summary: ${c?.summary}`, timestamp: s }, ...m];
|
|
356
|
+
return u && l.splice(0, 0, u), l;
|
|
357
357
|
}
|
|
358
358
|
/**
|
|
359
359
|
* Compare the difference between embeddings (calculates the angle between two vectors)
|
|
@@ -363,10 +363,10 @@ ${l}` }), e.tools = [{
|
|
|
363
363
|
*/
|
|
364
364
|
cosineSimilarity(r, e) {
|
|
365
365
|
if (r.length !== e.length) throw new Error("Vectors must be same length");
|
|
366
|
-
let t = 0,
|
|
367
|
-
for (let
|
|
368
|
-
t += r[
|
|
369
|
-
const n = Math.sqrt(
|
|
366
|
+
let t = 0, a = 0, o = 0;
|
|
367
|
+
for (let u = 0; u < r.length; u++)
|
|
368
|
+
t += r[u] * e[u], a += r[u] * r[u], o += e[u] * e[u];
|
|
369
|
+
const n = Math.sqrt(a) * Math.sqrt(o);
|
|
370
370
|
return n === 0 ? 0 : t / n;
|
|
371
371
|
}
|
|
372
372
|
/**
|
|
@@ -377,25 +377,25 @@ ${l}` }), e.tools = [{
|
|
|
377
377
|
* @returns {string[]} Chunked strings
|
|
378
378
|
*/
|
|
379
379
|
chunk(r, e = 500, t = 50) {
|
|
380
|
-
const
|
|
381
|
-
const
|
|
382
|
-
return typeof s == "object" && !Array.isArray(s) ?
|
|
383
|
-
}) : [], n = (typeof r == "object" ?
|
|
384
|
-
`)).flatMap((
|
|
385
|
-
`]),
|
|
386
|
-
for (let
|
|
387
|
-
let
|
|
388
|
-
for (;
|
|
389
|
-
const
|
|
390
|
-
if (this.estimateTokens(
|
|
391
|
-
`)) > e &&
|
|
392
|
-
|
|
380
|
+
const a = (m, i = "") => m ? Object.entries(m).flatMap(([c, s]) => {
|
|
381
|
+
const l = i ? `${i}${isNaN(+c) ? `.${c}` : `[${c}]`}` : c;
|
|
382
|
+
return typeof s == "object" && !Array.isArray(s) ? a(s, l) : `${l}: ${Array.isArray(s) ? s.join(", ") : s}`;
|
|
383
|
+
}) : [], n = (typeof r == "object" ? a(r) : r.toString().split(`
|
|
384
|
+
`)).flatMap((m) => [...m.split(/\s+/).filter(Boolean), `
|
|
385
|
+
`]), u = [];
|
|
386
|
+
for (let m = 0; m < n.length; ) {
|
|
387
|
+
let i = "", c = m;
|
|
388
|
+
for (; c < n.length; ) {
|
|
389
|
+
const l = i + (i ? " " : "") + n[c];
|
|
390
|
+
if (this.estimateTokens(l.replace(/\s*\n\s*/g, `
|
|
391
|
+
`)) > e && i) break;
|
|
392
|
+
i = l, c++;
|
|
393
393
|
}
|
|
394
|
-
const s =
|
|
394
|
+
const s = i.replace(/\s*\n\s*/g, `
|
|
395
395
|
`).trim();
|
|
396
|
-
s &&
|
|
396
|
+
s && u.push(s), m = Math.max(c - t, c === m ? m + 1 : c);
|
|
397
397
|
}
|
|
398
|
-
return
|
|
398
|
+
return u;
|
|
399
399
|
}
|
|
400
400
|
/**
|
|
401
401
|
* Create a vector representation of a string
|
|
@@ -404,39 +404,39 @@ ${l}` }), e.tools = [{
|
|
|
404
404
|
* @returns {Promise<Awaited<{index: number, embedding: number[], text: string, tokens: number}>[]>} Chunked embeddings
|
|
405
405
|
*/
|
|
406
406
|
embedding(r, e = {}) {
|
|
407
|
-
let { maxTokens: t = 500, overlapTokens:
|
|
407
|
+
let { maxTokens: t = 500, overlapTokens: a = 50 } = e, o = !1;
|
|
408
408
|
const n = () => {
|
|
409
409
|
o = !0;
|
|
410
|
-
},
|
|
410
|
+
}, u = (i) => new Promise((c, s) => {
|
|
411
411
|
if (o) return s(new Error("Aborted"));
|
|
412
|
-
const
|
|
413
|
-
H(G(
|
|
412
|
+
const l = [
|
|
413
|
+
H(G(D(import.meta.url)), "embedder.js"),
|
|
414
414
|
this.ai.options.path,
|
|
415
415
|
this.ai.options?.embedder || "bge-small-en-v1.5"
|
|
416
|
-
], d = w("node",
|
|
417
|
-
d.stdin.write(
|
|
416
|
+
], d = w("node", l, { stdio: ["pipe", "pipe", "ignore"] });
|
|
417
|
+
d.stdin.write(i), d.stdin.end();
|
|
418
418
|
let p = "";
|
|
419
419
|
d.stdout.on("data", (f) => p += f.toString()), d.on("close", (f) => {
|
|
420
420
|
if (o) return s(new Error("Aborted"));
|
|
421
421
|
if (f === 0)
|
|
422
422
|
try {
|
|
423
423
|
const g = JSON.parse(p);
|
|
424
|
-
|
|
424
|
+
c(g.embedding);
|
|
425
425
|
} catch {
|
|
426
426
|
s(new Error("Failed to parse embedding output"));
|
|
427
427
|
}
|
|
428
428
|
else
|
|
429
429
|
s(new Error(`Embedder process exited with code ${f}`));
|
|
430
430
|
}), d.on("error", s);
|
|
431
|
-
}),
|
|
432
|
-
const
|
|
433
|
-
for (let s = 0; s <
|
|
434
|
-
const
|
|
435
|
-
|
|
431
|
+
}), m = (async () => {
|
|
432
|
+
const i = this.chunk(r, t, a), c = [];
|
|
433
|
+
for (let s = 0; s < i.length && !o; s++) {
|
|
434
|
+
const l = i[s], d = await u(l);
|
|
435
|
+
c.push({ index: s, embedding: d, text: l, tokens: this.estimateTokens(l) });
|
|
436
436
|
}
|
|
437
|
-
return
|
|
437
|
+
return c;
|
|
438
438
|
})();
|
|
439
|
-
return Object.assign(
|
|
439
|
+
return Object.assign(m, { abort: n });
|
|
440
440
|
}
|
|
441
441
|
/**
|
|
442
442
|
* Estimate variable as tokens
|
|
@@ -455,8 +455,8 @@ ${l}` }), e.tools = [{
|
|
|
455
455
|
*/
|
|
456
456
|
fuzzyMatch(r, ...e) {
|
|
457
457
|
if (e.length < 2) throw new Error("Requires at least 2 strings to compare");
|
|
458
|
-
const t = (n,
|
|
459
|
-
return { avg: o.reduce((n,
|
|
458
|
+
const t = (n, u = 10) => n.toLowerCase().split("").map((m, i) => m.charCodeAt(0) * (i + 1) % u / u).slice(0, u), a = t(r), o = e.map((n) => t(n)).map((n) => this.cosineSimilarity(a, n));
|
|
459
|
+
return { avg: o.reduce((n, u) => n + u, 0) / o.length, max: Math.max(...o), similarities: o };
|
|
460
460
|
}
|
|
461
461
|
/**
|
|
462
462
|
* Ask a question with JSON response
|
|
@@ -466,34 +466,37 @@ ${l}` }), e.tools = [{
|
|
|
466
466
|
* @returns {Promise<{} | {} | RegExpExecArray | null>}
|
|
467
467
|
*/
|
|
468
468
|
async json(r, e, t) {
|
|
469
|
-
let
|
|
469
|
+
let a = `Your job is to convert input to JSON using tool calls. Call the \`submit\` tool at least once with JSON matching this schema:
|
|
470
470
|
\`\`\`json
|
|
471
471
|
${e}
|
|
472
|
-
|
|
473
|
-
|
|
472
|
+
\`\`\`
|
|
473
|
+
|
|
474
|
+
Responses are ignored`;
|
|
475
|
+
return t?.system && (a += `
|
|
474
476
|
|
|
475
477
|
` + t.system), new Promise(async (o, n) => {
|
|
476
|
-
let
|
|
477
|
-
const
|
|
478
|
+
let u = !1;
|
|
479
|
+
const m = await this.ask(r, {
|
|
478
480
|
temperature: 0.3,
|
|
479
481
|
...t,
|
|
480
|
-
system:
|
|
482
|
+
system: a,
|
|
481
483
|
tools: [{
|
|
482
484
|
name: "submit",
|
|
483
485
|
description: "Submit JSON",
|
|
484
486
|
args: { json: { type: "string", description: "Javascript parsable JSON string", required: !0 } },
|
|
485
|
-
fn: (
|
|
487
|
+
fn: (i) => {
|
|
486
488
|
try {
|
|
487
|
-
const
|
|
488
|
-
o(
|
|
489
|
+
const c = JSON.parse(i.json);
|
|
490
|
+
o(c), u = !0;
|
|
489
491
|
} catch {
|
|
490
492
|
return "Invalid JSON";
|
|
491
493
|
}
|
|
492
|
-
return "
|
|
494
|
+
return "Saved";
|
|
493
495
|
}
|
|
494
496
|
}, ...t?.tools || []]
|
|
495
497
|
});
|
|
496
|
-
|
|
498
|
+
u || n(`AI failed to create JSON:
|
|
499
|
+
${m}`);
|
|
497
500
|
});
|
|
498
501
|
}
|
|
499
502
|
/**
|
|
@@ -504,23 +507,28 @@ ${e}
|
|
|
504
507
|
* @returns {Promise<string>} Summary
|
|
505
508
|
*/
|
|
506
509
|
async summarize(r, e = 500, t) {
|
|
507
|
-
let
|
|
508
|
-
return t?.system && (
|
|
510
|
+
let a = `Your job is to summarize the users message using tool calls. Call the \`submit\` tool at least once with the shortest summary possible that's <= ${e} tokens. The tool call will respond with the token count. Responses are ignored`;
|
|
511
|
+
return t?.system && (a += `
|
|
509
512
|
|
|
510
513
|
` + t.system), new Promise(async (o, n) => {
|
|
511
|
-
let
|
|
512
|
-
const
|
|
514
|
+
let u = !1;
|
|
515
|
+
const m = await this.ask(r, {
|
|
513
516
|
temperature: 0.3,
|
|
514
517
|
...t,
|
|
515
|
-
system:
|
|
518
|
+
system: a,
|
|
516
519
|
tools: [{
|
|
517
520
|
name: "submit",
|
|
518
521
|
description: "Submit summary",
|
|
519
522
|
args: { summary: { type: "string", description: "Text summarization", required: !0 } },
|
|
520
|
-
fn: (
|
|
523
|
+
fn: (i) => {
|
|
524
|
+
if (!i.summary) return "No summary provided";
|
|
525
|
+
const c = this.estimateTokens(i.summary);
|
|
526
|
+
return c > e ? `Summary is too long (${c} tokens)` : (u = !0, o(i.summary || null), `Saved (${c} tokens)`);
|
|
527
|
+
}
|
|
521
528
|
}, ...t?.tools || []]
|
|
522
529
|
});
|
|
523
|
-
|
|
530
|
+
u || n(`AI failed to create summary:
|
|
531
|
+
${m}`);
|
|
524
532
|
});
|
|
525
533
|
}
|
|
526
534
|
}
|
|
@@ -547,20 +555,20 @@ print(json.dumps(segments))
|
|
|
547
555
|
pyannote;
|
|
548
556
|
whisperModel;
|
|
549
557
|
async addPunctuation(r, e, t = 150) {
|
|
550
|
-
const
|
|
558
|
+
const a = (n) => {
|
|
551
559
|
if (n = n.toLowerCase().replace(/[^a-z]/g, ""), n.length <= 3) return 1;
|
|
552
|
-
const
|
|
553
|
-
let
|
|
554
|
-
return n.endsWith("e") &&
|
|
560
|
+
const u = n.match(/[aeiouy]+/g);
|
|
561
|
+
let m = u ? u.length : 1;
|
|
562
|
+
return n.endsWith("e") && m--, Math.max(1, m);
|
|
555
563
|
};
|
|
556
564
|
let o = "";
|
|
557
|
-
return r.transcription.filter((n,
|
|
558
|
-
let
|
|
559
|
-
const
|
|
560
|
-
return !n.text &&
|
|
565
|
+
return r.transcription.filter((n, u) => {
|
|
566
|
+
let m = !1;
|
|
567
|
+
const i = r.transcription[u - 1], c = r.transcription[u + 1];
|
|
568
|
+
return !n.text && c ? (c.offsets.from = n.offsets.from, c.timestamps.from = n.offsets.from) : n.text && n.text[0] != " " && i && (i.offsets.to = n.offsets.to, i.timestamps.to = n.timestamps.to, i.text += n.text, m = !0), !!n.text && !m;
|
|
561
569
|
}).forEach((n) => {
|
|
562
|
-
const
|
|
563
|
-
|
|
570
|
+
const u = /^[A-Z]/.test(n.text.trim()), m = n.offsets.to - n.offsets.from, c = a(n.text.trim()) * t;
|
|
571
|
+
u && m > c * 2 && n.text[0] == " " && (o += "."), o += n.text;
|
|
564
572
|
}), e ? this.ai.language.ask(o, {
|
|
565
573
|
system: "Remove any misplaced punctuation from the following ASR transcript using the replace tool. Avoid modifying words unless there is an obvious typo",
|
|
566
574
|
temperature: 0.1,
|
|
@@ -576,38 +584,38 @@ print(json.dumps(segments))
|
|
|
576
584
|
}).then(() => o) : o.trim();
|
|
577
585
|
}
|
|
578
586
|
async diarizeTranscript(r, e, t) {
|
|
579
|
-
const
|
|
587
|
+
const a = /* @__PURE__ */ new Map();
|
|
580
588
|
let o = 0;
|
|
581
589
|
e.forEach((p) => {
|
|
582
|
-
|
|
590
|
+
a.has(p.speaker) || a.set(p.speaker, ++o);
|
|
583
591
|
});
|
|
584
|
-
const n = await this.addPunctuation(r, t),
|
|
592
|
+
const n = await this.addPunctuation(r, t), u = n.match(/[^.!?]+[.!?]+/g) || [n], m = r.transcription.filter((p) => p.text.trim()), i = u.map((p) => {
|
|
585
593
|
if (p = p.trim(), !p) return null;
|
|
586
594
|
const f = p.toLowerCase().replace(/[^\w\s]/g, "").split(/\s+/), g = /* @__PURE__ */ new Map();
|
|
587
595
|
f.forEach((x) => {
|
|
588
|
-
const k =
|
|
596
|
+
const k = m.find((y) => x === y.text.trim().toLowerCase().replace(/[^\w]/g, ""));
|
|
589
597
|
if (!k) return;
|
|
590
|
-
const
|
|
598
|
+
const E = k.offsets.from / 1e3, $ = e.find((y) => E >= y.start && E <= y.end);
|
|
591
599
|
if ($) {
|
|
592
|
-
const y =
|
|
600
|
+
const y = a.get($.speaker);
|
|
593
601
|
g.set(y, (g.get(y) || 0) + 1);
|
|
594
602
|
}
|
|
595
603
|
});
|
|
596
|
-
let T = 1,
|
|
604
|
+
let T = 1, v = 0;
|
|
597
605
|
return g.forEach((x, k) => {
|
|
598
|
-
x >
|
|
606
|
+
x > v && (v = x, T = k);
|
|
599
607
|
}), { speaker: T, text: p };
|
|
600
|
-
}).filter((p) => p !== null),
|
|
601
|
-
|
|
602
|
-
const f =
|
|
603
|
-
f && f.speaker === p.speaker ? f.text += " " + p.text :
|
|
608
|
+
}).filter((p) => p !== null), c = [];
|
|
609
|
+
i.forEach((p) => {
|
|
610
|
+
const f = c[c.length - 1];
|
|
611
|
+
f && f.speaker === p.speaker ? f.text += " " + p.text : c.push({ ...p });
|
|
604
612
|
});
|
|
605
|
-
let s =
|
|
613
|
+
let s = c.map((p) => `[Speaker ${p.speaker}]: ${p.text}`).join(`
|
|
606
614
|
`).trim();
|
|
607
615
|
if (!t) return s;
|
|
608
|
-
let
|
|
609
|
-
|
|
610
|
-
const d = await this.ai.language.json(
|
|
616
|
+
let l = this.ai.language.chunk(s, 500, 0);
|
|
617
|
+
l.length > 4 && (l = [...l.slice(0, 3), l.at(-1)]);
|
|
618
|
+
const d = await this.ai.language.json(l.join(`
|
|
611
619
|
`), '{1: "Detected Name", 2: "Second Name"}', {
|
|
612
620
|
system: "Use the following transcript to identify speakers. Only identify speakers you are positive about, dont mention speakers you are unsure about in your response",
|
|
613
621
|
temperature: 0.1
|
|
@@ -616,87 +624,87 @@ print(json.dumps(segments))
|
|
|
616
624
|
}
|
|
617
625
|
runAsr(r, e = {}) {
|
|
618
626
|
let t;
|
|
619
|
-
const
|
|
620
|
-
this.downloadAsrModel(e.model).then((
|
|
627
|
+
const a = new Promise((o, n) => {
|
|
628
|
+
this.downloadAsrModel(e.model).then((u) => {
|
|
621
629
|
if (e.diarization) {
|
|
622
|
-
let
|
|
630
|
+
let m = M.join(M.dirname(r), "transcript");
|
|
623
631
|
t = w(
|
|
624
632
|
this.ai.options.whisper,
|
|
625
|
-
["-m",
|
|
633
|
+
["-m", u, "-f", r, "-np", "-ml", "1", "-oj", "-of", m],
|
|
626
634
|
{ stdio: ["ignore", "ignore", "pipe"] }
|
|
627
|
-
), t.on("error", (
|
|
628
|
-
if (
|
|
629
|
-
|
|
635
|
+
), t.on("error", (i) => n(i)), t.on("close", async (i) => {
|
|
636
|
+
if (i === 0) {
|
|
637
|
+
m = await b.readFile(m + ".json", "utf-8"), b.rm(m + ".json").catch(() => {
|
|
630
638
|
});
|
|
631
639
|
try {
|
|
632
|
-
o(JSON.parse(
|
|
640
|
+
o(JSON.parse(m));
|
|
633
641
|
} catch {
|
|
634
642
|
n(new Error("Failed to parse whisper JSON"));
|
|
635
643
|
}
|
|
636
644
|
} else
|
|
637
|
-
n(new Error(`Exit code ${
|
|
645
|
+
n(new Error(`Exit code ${i}`));
|
|
638
646
|
});
|
|
639
647
|
} else {
|
|
640
|
-
let
|
|
641
|
-
t = w(this.ai.options.whisper, ["-m",
|
|
642
|
-
|
|
648
|
+
let m = "";
|
|
649
|
+
t = w(this.ai.options.whisper, ["-m", u, "-f", r, "-np", "-nt"]), t.on("error", (i) => n(i)), t.stdout.on("data", (i) => m += i.toString()), t.on("close", async (i) => {
|
|
650
|
+
i === 0 ? o(m.trim() || null) : n(new Error(`Exit code ${i}`));
|
|
643
651
|
});
|
|
644
652
|
}
|
|
645
653
|
});
|
|
646
654
|
});
|
|
647
|
-
return Object.assign(
|
|
655
|
+
return Object.assign(a, { abort: () => t?.kill("SIGTERM") });
|
|
648
656
|
}
|
|
649
657
|
runDiarization(r) {
|
|
650
658
|
let e = !1, t = () => {
|
|
651
659
|
e = !0;
|
|
652
660
|
};
|
|
653
|
-
const
|
|
654
|
-
const
|
|
655
|
-
|
|
661
|
+
const a = (n) => new Promise((u) => {
|
|
662
|
+
const m = w(n, ["-W", "ignore", "-c", "import pyannote.audio"]);
|
|
663
|
+
m.on("close", (i) => u(i === 0)), m.on("error", () => u(!1));
|
|
656
664
|
}), o = Promise.all([
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
]).then((async ([n,
|
|
665
|
+
a("python"),
|
|
666
|
+
a("python3")
|
|
667
|
+
]).then((async ([n, u]) => {
|
|
660
668
|
if (e) return;
|
|
661
|
-
if (!n && !
|
|
662
|
-
const
|
|
663
|
-
return new Promise((
|
|
669
|
+
if (!n && !u) throw new Error("Pyannote is not installed: pip install pyannote.audio");
|
|
670
|
+
const m = u ? "python3" : "python";
|
|
671
|
+
return new Promise((i, c) => {
|
|
664
672
|
if (e) return;
|
|
665
673
|
let s = "";
|
|
666
|
-
const
|
|
667
|
-
|
|
674
|
+
const l = w(m, ["-W", "ignore", "-c", this.pyannote, r]);
|
|
675
|
+
l.stdout.on("data", (d) => s += d.toString()), l.stderr.on("data", (d) => console.error(d.toString())), l.on("close", (d) => {
|
|
668
676
|
if (d === 0)
|
|
669
677
|
try {
|
|
670
|
-
|
|
678
|
+
i(JSON.parse(s));
|
|
671
679
|
} catch {
|
|
672
|
-
|
|
680
|
+
c(new Error("Failed to parse diarization output"));
|
|
673
681
|
}
|
|
674
682
|
else
|
|
675
|
-
|
|
676
|
-
}),
|
|
683
|
+
c(new Error(`Python process exited with code ${d}`));
|
|
684
|
+
}), l.on("error", c), t = () => l.kill("SIGTERM");
|
|
677
685
|
});
|
|
678
686
|
}));
|
|
679
687
|
return Object.assign(o, { abort: t });
|
|
680
688
|
}
|
|
681
689
|
asr(r, e = {}) {
|
|
682
690
|
if (!this.ai.options.whisper) throw new Error("Whisper not configured");
|
|
683
|
-
const t = A(Y(A(
|
|
691
|
+
const t = A(Y(A(N(), "audio-")), "converted.wav");
|
|
684
692
|
K(`ffmpeg -i "${r}" -ar 16000 -ac 1 -f wav "${t}"`, { stdio: "ignore" });
|
|
685
|
-
const
|
|
693
|
+
const a = () => b.rm(q.dirname(t), { recursive: !0, force: !0 }).catch(() => {
|
|
686
694
|
});
|
|
687
695
|
if (!e.diarization) return this.runAsr(t, { model: e.model });
|
|
688
696
|
const o = this.runAsr(t, { model: e.model, diarization: !0 }), n = this.runDiarization(t);
|
|
689
|
-
let
|
|
690
|
-
|
|
697
|
+
let u = !1, m = () => {
|
|
698
|
+
u = !0, o.abort(), n.abort(), a();
|
|
691
699
|
};
|
|
692
|
-
const
|
|
693
|
-
if (
|
|
694
|
-
` +
|
|
700
|
+
const i = Promise.allSettled([o, n]).then(async ([c, s]) => {
|
|
701
|
+
if (c.status == "rejected") throw new Error(`Whisper.cpp timestamps:
|
|
702
|
+
` + c.reason);
|
|
695
703
|
if (s.status == "rejected") throw new Error(`Pyannote:
|
|
696
704
|
` + s.reason);
|
|
697
|
-
return
|
|
698
|
-
}).finally(() =>
|
|
699
|
-
return Object.assign(
|
|
705
|
+
return u || !e.diarization ? c.value : this.diarizeTranscript(c.value, s.value, e.diarization == "llm");
|
|
706
|
+
}).finally(() => a());
|
|
707
|
+
return Object.assign(i, { abort: m });
|
|
700
708
|
}
|
|
701
709
|
async downloadAsrModel(r = this.whisperModel) {
|
|
702
710
|
if (!this.ai.options.whisper) throw new Error("Whisper not configured");
|
|
@@ -716,10 +724,10 @@ class te {
|
|
|
716
724
|
*/
|
|
717
725
|
ocr(r) {
|
|
718
726
|
let e;
|
|
719
|
-
const t = new Promise(async (
|
|
727
|
+
const t = new Promise(async (a) => {
|
|
720
728
|
e = await V(this.ai.options.ocr || "eng", 2, { cachePath: this.ai.options.path });
|
|
721
729
|
const { data: o } = await e.recognize(r);
|
|
722
|
-
await e.terminate(),
|
|
730
|
+
await e.terminate(), a(o.text.trim() || null);
|
|
723
731
|
});
|
|
724
732
|
return Object.assign(t, { abort: () => e?.terminate() });
|
|
725
733
|
}
|
|
@@ -818,61 +826,61 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
818
826
|
redirect: "follow"
|
|
819
827
|
}).catch((s) => {
|
|
820
828
|
throw new Error(`Failed to fetch: ${s.message}`);
|
|
821
|
-
}), t = e.headers.get("content-type") || "",
|
|
822
|
-
if (t.match(/charset=([^;]+)/)?.[1], h.mimeRegex && !new RegExp(h.mimeRegex, "i").test(
|
|
823
|
-
return { url: h.url, error: "MIME type rejected", mimeType:
|
|
824
|
-
if (
|
|
829
|
+
}), t = e.headers.get("content-type") || "", a = t.split(";")[0].trim().toLowerCase();
|
|
830
|
+
if (t.match(/charset=([^;]+)/)?.[1], h.mimeRegex && !new RegExp(h.mimeRegex, "i").test(a))
|
|
831
|
+
return { url: h.url, error: "MIME type rejected", mimeType: a, filter: h.mimeRegex };
|
|
832
|
+
if (a.startsWith("image/") || a.startsWith("audio/") || a.startsWith("video/")) {
|
|
825
833
|
const s = await e.arrayBuffer();
|
|
826
834
|
if (s.byteLength > 10485760)
|
|
827
|
-
return { url: h.url, type: "media", mimeType:
|
|
828
|
-
const
|
|
829
|
-
return { url: h.url, type: "media", mimeType:
|
|
835
|
+
return { url: h.url, type: "media", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
836
|
+
const l = Buffer.from(s).toString("base64");
|
|
837
|
+
return { url: h.url, type: "media", mimeType: a, dataUrl: `data:${a};base64,${l}`, size: s.byteLength };
|
|
830
838
|
}
|
|
831
|
-
if (
|
|
839
|
+
if (a.match(/^(text\/(plain|csv|xml)|application\/(json|xml|csv|x-yaml))/) || h.url.match(/\.(txt|json|xml|csv|yaml|yml|md)$/i)) {
|
|
832
840
|
const s = await e.text();
|
|
833
|
-
return { url: h.url, type: "text", mimeType:
|
|
841
|
+
return { url: h.url, type: "text", mimeType: a, content: s.slice(0, 1e5) };
|
|
834
842
|
}
|
|
835
|
-
if (
|
|
843
|
+
if (a === "application/pdf" || a.startsWith("application/") && !a.includes("html")) {
|
|
836
844
|
const s = await e.arrayBuffer();
|
|
837
845
|
if (s.byteLength > 10485760)
|
|
838
|
-
return { url: h.url, type: "binary", mimeType:
|
|
839
|
-
const
|
|
840
|
-
return { url: h.url, type: "binary", mimeType:
|
|
846
|
+
return { url: h.url, type: "binary", mimeType: a, error: "File too large", size: s.byteLength, maxSize: 10485760 };
|
|
847
|
+
const l = Buffer.from(s).toString("base64");
|
|
848
|
+
return { url: h.url, type: "binary", mimeType: a, dataUrl: `data:${a};base64,${l}`, size: s.byteLength };
|
|
841
849
|
}
|
|
842
850
|
const o = await e.text(), n = Z.load(o);
|
|
843
851
|
n('script, style, nav, footer, header, aside, iframe, noscript, svg, [role="navigation"], [role="banner"], [role="complementary"], .ad, .ads, .advertisement, .cookie, .popup, .modal, .sidebar, .related, .comments, .social-share').remove();
|
|
844
|
-
const
|
|
852
|
+
const u = {
|
|
845
853
|
title: n('meta[property="og:title"]').attr("content") || n("title").text() || "",
|
|
846
854
|
description: n('meta[name="description"]').attr("content") || n('meta[property="og:description"]').attr("content") || "",
|
|
847
855
|
author: n('meta[name="author"]').attr("content") || "",
|
|
848
856
|
published: n('meta[property="article:published_time"]').attr("content") || n("time").attr("datetime") || "",
|
|
849
857
|
image: n('meta[property="og:image"]').attr("content") || ""
|
|
850
858
|
};
|
|
851
|
-
let
|
|
852
|
-
const
|
|
853
|
-
for (const s of
|
|
854
|
-
const
|
|
855
|
-
if (
|
|
856
|
-
|
|
859
|
+
let m = "";
|
|
860
|
+
const i = ["article", "main", '[role="main"]', ".content", ".post-content", ".entry-content", ".article-content", "body"];
|
|
861
|
+
for (const s of i) {
|
|
862
|
+
const l = n(s).first();
|
|
863
|
+
if (l.length && l.text().trim().length > 200) {
|
|
864
|
+
m = l.text();
|
|
857
865
|
break;
|
|
858
866
|
}
|
|
859
867
|
}
|
|
860
|
-
|
|
868
|
+
m || (m = n("body").text()), m = m.replace(/\n\s*\n\s*\n/g, `
|
|
861
869
|
|
|
862
870
|
`).replace(/[ \t]+/g, " ").trim().slice(0, 5e4);
|
|
863
|
-
let
|
|
864
|
-
return
|
|
865
|
-
const d = n(
|
|
866
|
-
d && p && !d.startsWith("#") &&
|
|
867
|
-
}),
|
|
871
|
+
let c = [];
|
|
872
|
+
return m.length < 500 && (n("a[href]").each((s, l) => {
|
|
873
|
+
const d = n(l).attr("href"), p = n(l).text().trim();
|
|
874
|
+
d && p && !d.startsWith("#") && c.push({ text: p, href: d });
|
|
875
|
+
}), c = c.slice(0, 50)), {
|
|
868
876
|
url: h.url,
|
|
869
877
|
type: "html",
|
|
870
|
-
title:
|
|
871
|
-
description:
|
|
872
|
-
author:
|
|
873
|
-
published:
|
|
874
|
-
content:
|
|
875
|
-
links:
|
|
878
|
+
title: u.title.trim(),
|
|
879
|
+
description: u.description.trim(),
|
|
880
|
+
author: u.author.trim(),
|
|
881
|
+
published: u.published,
|
|
882
|
+
content: m,
|
|
883
|
+
links: c.length ? c : void 0
|
|
876
884
|
};
|
|
877
885
|
}
|
|
878
886
|
}, je = {
|
|
@@ -887,12 +895,12 @@ const re = () => O.platform() == "win32" ? "cmd" : j`echo $SHELL`?.split("/").po
|
|
|
887
895
|
headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US,en;q=0.9" }
|
|
888
896
|
}).then((o) => o.text());
|
|
889
897
|
let e, t = /<a .*?href="(.+?)".+?<\/a>/g;
|
|
890
|
-
const
|
|
898
|
+
const a = new F();
|
|
891
899
|
for (; (e = t.exec(r)) !== null; ) {
|
|
892
900
|
let o = /uddg=(.+)&?/.exec(decodeURIComponent(e[1]))?.[1];
|
|
893
|
-
if (o && (o = decodeURIComponent(o)), o &&
|
|
901
|
+
if (o && (o = decodeURIComponent(o)), o && a.add(o), a.size >= (h.length || 5)) break;
|
|
894
902
|
}
|
|
895
|
-
return
|
|
903
|
+
return a;
|
|
896
904
|
}
|
|
897
905
|
};
|
|
898
906
|
export {
|