@fs/mycroft 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ import {
2
+ getModels,
3
+ logInfo,
4
+ logWarn
5
+ } from "./chunk-LV52FEMB.js";
6
+
7
+ // src/services/batch-embedder.ts
8
+ import OpenAI from "openai";
9
+ var buildJsonl = (chunks, model) => chunks.map(
10
+ (chunk, i) => ({
11
+ custom_id: String(i),
12
+ method: "POST",
13
+ url: "/v1/embeddings",
14
+ body: { model, input: chunk.content }
15
+ })
16
+ ).map((line) => JSON.stringify(line)).join("\n");
17
+ var submitBatchEmbeddings = async (chunks) => {
18
+ const models = await getModels();
19
+ const client = new OpenAI();
20
+ logInfo(`[BatchEmbedder] Preparing batch request for ${chunks.length} chunks`);
21
+ const jsonl = buildJsonl(chunks, models.embedding);
22
+ const blob = new Blob([jsonl], { type: "application/jsonl" });
23
+ const file = await client.files.create({
24
+ file: new File([blob], "embeddings.jsonl", { type: "application/jsonl" }),
25
+ purpose: "batch"
26
+ });
27
+ logInfo(`[BatchEmbedder] Uploaded input file ${file.id}`);
28
+ const batch = await client.batches.create({
29
+ input_file_id: file.id,
30
+ endpoint: "/v1/embeddings",
31
+ completion_window: "24h"
32
+ });
33
+ logInfo(`[BatchEmbedder] Created batch ${batch.id} \u2014 status: ${batch.status}`);
34
+ return { batchId: batch.id, inputFileId: file.id };
35
+ };
36
+ var checkBatchStatus = async (batchId) => {
37
+ const client = new OpenAI();
38
+ const batch = await client.batches.retrieve(batchId);
39
+ return {
40
+ status: batch.status,
41
+ completed: batch.request_counts?.completed ?? 0,
42
+ failed: batch.request_counts?.failed ?? 0,
43
+ total: batch.request_counts?.total ?? 0,
44
+ outputFileId: batch.output_file_id ?? null,
45
+ errorFileId: batch.error_file_id ?? null
46
+ };
47
+ };
48
+ var downloadBatchResults = async (outputFileId, chunks) => {
49
+ const client = new OpenAI();
50
+ logInfo(`[BatchEmbedder] Downloading results from ${outputFileId}`);
51
+ const response = await client.files.content(outputFileId);
52
+ const text = await response.text();
53
+ const lines = text.trim().split("\n");
54
+ const vectors = /* @__PURE__ */ new Map();
55
+ for (const line of lines) {
56
+ let result;
57
+ try {
58
+ result = JSON.parse(line);
59
+ } catch {
60
+ logWarn(`[BatchEmbedder] Skipping malformed JSONL line`);
61
+ continue;
62
+ }
63
+ const idx = Number(result.custom_id);
64
+ if (result.response?.status_code === 200) {
65
+ const embedding = result.response.body?.data?.[0]?.embedding;
66
+ if (embedding) {
67
+ vectors.set(idx, embedding);
68
+ }
69
+ } else {
70
+ logWarn(
71
+ `[BatchEmbedder] Request ${idx} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`
72
+ );
73
+ }
74
+ }
75
+ const embedded = chunks.map((chunk, i) => {
76
+ const vector = vectors.get(i) ?? [];
77
+ if (vector.length === 0) {
78
+ logWarn(`[BatchEmbedder] Chunk ${i} has empty embedding \u2014 skipping vector insertion`);
79
+ }
80
+ return { ...chunk, vector };
81
+ });
82
+ const missing = embedded.filter((e) => e.vector.length === 0).length;
83
+ if (missing > 0) {
84
+ logWarn(`[BatchEmbedder] ${missing} chunk(s) have empty embeddings due to batch errors`);
85
+ }
86
+ logInfo(`[BatchEmbedder] Successfully processed ${embedded.length} chunks via batch API`);
87
+ return embedded;
88
+ };
89
+ var cleanupBatchFiles = async (inputFileId, outputFileId) => {
90
+ const client = new OpenAI();
91
+ await client.files.del(inputFileId).catch(() => void 0);
92
+ if (outputFileId) {
93
+ await client.files.del(outputFileId).catch(() => void 0);
94
+ }
95
+ };
96
+
97
+ export {
98
+ submitBatchEmbeddings,
99
+ checkBatchStatus,
100
+ downloadBatchResults,
101
+ cleanupBatchFiles
102
+ };
103
+ //# sourceMappingURL=chunk-VBEGUDHG.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/services/batch-embedder.ts"],"sourcesContent":["import OpenAI from \"openai\";\nimport type { BookChunk } from \"../shared/types.js\";\nimport type { EmbeddedChunk } from \"./embedder.js\";\nimport { getModels, logInfo, logWarn } from \"./constants.js\";\n\ntype BatchRequestLine = {\n custom_id: string;\n method: \"POST\";\n url: \"/v1/embeddings\";\n body: { model: string; input: string };\n};\n\nconst buildJsonl = (chunks: BookChunk[], model: string): string =>\n chunks\n .map(\n (chunk, i): BatchRequestLine => ({\n custom_id: String(i),\n method: \"POST\",\n url: \"/v1/embeddings\",\n body: { model, input: chunk.content },\n })\n )\n .map((line) => JSON.stringify(line))\n .join(\"\\n\");\n\nexport type BatchSubmitResult = {\n batchId: string;\n inputFileId: string;\n};\n\nexport const submitBatchEmbeddings = async (chunks: BookChunk[]): Promise<BatchSubmitResult> => {\n const models = await getModels();\n const client = new OpenAI();\n\n logInfo(`[BatchEmbedder] Preparing batch request for ${chunks.length} chunks`);\n\n const jsonl = buildJsonl(chunks, models.embedding);\n const blob = new Blob([jsonl], { type: \"application/jsonl\" });\n const file = await client.files.create({\n file: new File([blob], \"embeddings.jsonl\", { type: \"application/jsonl\" }),\n purpose: \"batch\",\n });\n logInfo(`[BatchEmbedder] Uploaded input file ${file.id}`);\n\n const batch = await client.batches.create({\n input_file_id: file.id,\n endpoint: \"/v1/embeddings\",\n completion_window: \"24h\",\n });\n logInfo(`[BatchEmbedder] Created batch ${batch.id} — status: ${batch.status}`);\n\n return { batchId: batch.id, inputFileId: file.id };\n};\n\nexport type BatchStatus = {\n status: string;\n completed: number;\n failed: number;\n total: number;\n outputFileId: string | null;\n errorFileId: string | null;\n};\n\nexport const checkBatchStatus = async (batchId: string): Promise<BatchStatus> => {\n const client = new OpenAI();\n const batch = await client.batches.retrieve(batchId);\n return {\n status: batch.status,\n completed: batch.request_counts?.completed ?? 0,\n failed: batch.request_counts?.failed ?? 0,\n total: batch.request_counts?.total ?? 0,\n outputFileId: batch.output_file_id ?? null,\n errorFileId: batch.error_file_id ?? null,\n };\n};\n\nexport const downloadBatchResults = async (\n outputFileId: string,\n chunks: BookChunk[],\n): Promise<EmbeddedChunk[]> => {\n const client = new OpenAI();\n\n logInfo(`[BatchEmbedder] Downloading results from ${outputFileId}`);\n const response = await client.files.content(outputFileId);\n const text = await response.text();\n const lines = text.trim().split(\"\\n\");\n\n const vectors = new Map<number, number[]>();\n for (const line of lines) {\n let result: any;\n try {\n result = JSON.parse(line);\n } catch {\n logWarn(`[BatchEmbedder] Skipping malformed JSONL line`);\n continue;\n }\n const idx = Number(result.custom_id);\n if (result.response?.status_code === 200) {\n const embedding = result.response.body?.data?.[0]?.embedding;\n if (embedding) {\n vectors.set(idx, embedding);\n }\n } else {\n logWarn(\n `[BatchEmbedder] Request ${idx} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`\n );\n }\n }\n\n const embedded: EmbeddedChunk[] = chunks.map((chunk, i) => {\n const vector = vectors.get(i) ?? [];\n if (vector.length === 0) {\n logWarn(`[BatchEmbedder] Chunk ${i} has empty embedding — skipping vector insertion`);\n }\n return { ...chunk, vector };\n });\n\n const missing = embedded.filter((e) => e.vector.length === 0).length;\n if (missing > 0) {\n logWarn(`[BatchEmbedder] ${missing} chunk(s) have empty embeddings due to batch errors`);\n }\n\n logInfo(`[BatchEmbedder] Successfully processed ${embedded.length} chunks via batch API`);\n return embedded;\n};\n\nexport const cleanupBatchFiles = async (inputFileId: string, outputFileId?: string | null) => {\n const client = new OpenAI();\n await client.files.del(inputFileId).catch(() => undefined);\n if (outputFileId) {\n await client.files.del(outputFileId).catch(() => undefined);\n }\n};\n"],"mappings":";;;;;;;AAAA,OAAO,YAAY;AAYnB,IAAM,aAAa,CAAC,QAAqB,UACvC,OACG;AAAA,EACC,CAAC,OAAO,OAAyB;AAAA,IAC/B,WAAW,OAAO,CAAC;AAAA,IACnB,QAAQ;AAAA,IACR,KAAK;AAAA,IACL,MAAM,EAAE,OAAO,OAAO,MAAM,QAAQ;AAAA,EACtC;AACF,EACC,IAAI,CAAC,SAAS,KAAK,UAAU,IAAI,CAAC,EAClC,KAAK,IAAI;AAOP,IAAM,wBAAwB,OAAO,WAAoD;AAC9F,QAAM,SAAS,MAAM,UAAU;AAC/B,QAAM,SAAS,IAAI,OAAO;AAE1B,UAAQ,+CAA+C,OAAO,MAAM,SAAS;AAE7E,QAAM,QAAQ,WAAW,QAAQ,OAAO,SAAS;AACjD,QAAM,OAAO,IAAI,KAAK,CAAC,KAAK,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAC5D,QAAM,OAAO,MAAM,OAAO,MAAM,OAAO;AAAA,IACrC,MAAM,IAAI,KAAK,CAAC,IAAI,GAAG,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAAA,IACxE,SAAS;AAAA,EACX,CAAC;AACD,UAAQ,uCAAuC,KAAK,EAAE,EAAE;AAExD,QAAM,QAAQ,MAAM,OAAO,QAAQ,OAAO;AAAA,IACxC,eAAe,KAAK;AAAA,IACpB,UAAU;AAAA,IACV,mBAAmB;AAAA,EACrB,CAAC;AACD,UAAQ,iCAAiC,MAAM,EAAE,mBAAc,MAAM,MAAM,EAAE;AAE7E,SAAO,EAAE,SAAS,MAAM,IAAI,aAAa,KAAK,GAAG;AACnD;AAWO,IAAM,mBAAmB,OAAO,YAA0C;AAC/E,QAAM,SAAS,IAAI,OAAO;AAC1B,QAAM,QAAQ,MAAM,OAAO,QAAQ,SAAS,OAAO;AACnD,SAAO;AAAA,IACL,QAAQ,MAAM;AAAA,IACd,WAAW,MAAM,gBAAgB,aAAa;AAAA,IAC9C,QAAQ,MAAM,gBAAgB,UAAU;AAAA,IACxC,OAAO,MAAM,gBAAgB,SAAS;AAAA,IACtC,cAAc,MAAM,kBAAkB;AAAA,IACtC,aAAa,MAAM,iBAAiB;AAAA,EACtC;AACF;AAEO,IAAM,uBAAuB,OAClC,cACA,WAC6B;AAC7B,QAAM,SAAS,IAAI,OAAO;AAE1B,UAAQ,4CAA4C,YAAY,EAAE;AAClE,QAAM,WAAW,MAAM,OAAO,MAAM,QAAQ,YAAY;AACxD,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,QAAM,QAAQ,KAAK,KAAK,EAAE,MAAM,IAAI;AAEpC,QAAM,UAAU,oBAAI,IAAsB;AAC1C,aAAW,QAAQ,OAAO;AACxB,QAAI;AACJ,QAAI;AACF,eAAS,KAAK,MAAM,IAAI;AAAA,IAC1B,QAAQ;AACN,cAAQ,+CAA+C;AACvD;AAAA,IACF;AACA,UAAM,MAAM,OAAO,OAAO,SAAS;AACnC,QAAI,OAAO,UAAU,gBAAgB,KAAK;AACxC,YAAM,YAAY,OAAO,SAAS,MAAM,OAAO,CAAC,GAAG;AACnD,UAAI,WAAW;AACb,gBAAQ,IAAI,KAAK,SAAS;AAAA,MAC5B;AAAA,IACF,OAAO;AACL;AAAA,QACE,2BAA2B,GAAG,YAAY,KAAK,UAAU,OAAO,UAAU,MAAM,SAAS,OAAO,KAAK,CAAC;AAAA,MACxG;AAAA,IACF;AAAA,EACF;AAEA,QAAM,WAA4B,OAAO,IAAI,CAAC,OAAO,MAAM;AACzD,UAAM,SAAS,QAAQ,IAAI,CAAC,KAAK,CAAC;AAClC,QAAI,OAAO,WAAW,GAAG;AACvB,cAAQ,yBAAyB,CAAC,uDAAkD;AAAA,IACtF;AACA,WAAO,EAAE,GAAG,OAAO,OAAO;AAAA,EAC5B,CAAC;AAED,QAAM,UAAU,SAAS,OAAO,CAAC,MAAM,EAAE,OAAO,WAAW,CAAC,EAAE;AAC9D,MAAI,UAAU,GAAG;AACf,YAAQ,mBAAmB,OAAO,qDAAqD;AAAA,EACzF;AAEA,UAAQ,0CAA0C,SAAS,MAAM,uBAAuB;AACxF,SAAO;AACT;AAEO,IAAM,oBAAoB,OAAO,aAAqB,iBAAiC;AAC5F,QAAM,SAAS,IAAI,OAAO;AAC1B,QAAM,OAAO,MAAM,IAAI,WAAW,EAAE,MAAM,MAAM,MAAS;AACzD,MAAI,cAAc;AAChB,UAAM,OAAO,MAAM,IAAI,YAAY,EAAE,MAAM,MAAM,MAAS;AAAA,EAC5D;AACF;","names":[]}