@astrofoundry/grimoire 3.12.3 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/admin-HA6FNUV4.js +1516 -0
  2. package/dist/admin-HA6FNUV4.js.map +7 -0
  3. package/dist/chunk-BRS6X3AE.js +12 -0
  4. package/dist/chunk-BRS6X3AE.js.map +7 -0
  5. package/dist/cli.js +291 -722
  6. package/dist/cli.js.map +7 -1
  7. package/package.json +11 -12
  8. package/dist/apikey.d.ts +0 -5
  9. package/dist/apikey.d.ts.map +0 -1
  10. package/dist/apikey.js +0 -84
  11. package/dist/apikey.js.map +0 -1
  12. package/dist/chunker.d.ts +0 -7
  13. package/dist/chunker.d.ts.map +0 -1
  14. package/dist/chunker.js +0 -158
  15. package/dist/chunker.js.map +0 -1
  16. package/dist/cli.d.ts +0 -3
  17. package/dist/cli.d.ts.map +0 -1
  18. package/dist/config.d.ts +0 -23
  19. package/dist/config.d.ts.map +0 -1
  20. package/dist/config.js +0 -89
  21. package/dist/config.js.map +0 -1
  22. package/dist/consumer-config.d.ts +0 -11
  23. package/dist/consumer-config.d.ts.map +0 -1
  24. package/dist/consumer-config.js +0 -60
  25. package/dist/consumer-config.js.map +0 -1
  26. package/dist/consumer.d.ts +0 -11
  27. package/dist/consumer.d.ts.map +0 -1
  28. package/dist/consumer.js +0 -84
  29. package/dist/consumer.js.map +0 -1
  30. package/dist/converter.d.ts +0 -12
  31. package/dist/converter.d.ts.map +0 -1
  32. package/dist/converter.js +0 -95
  33. package/dist/converter.js.map +0 -1
  34. package/dist/embedder.d.ts +0 -9
  35. package/dist/embedder.d.ts.map +0 -1
  36. package/dist/embedder.js +0 -108
  37. package/dist/embedder.js.map +0 -1
  38. package/dist/format.d.ts +0 -5
  39. package/dist/format.d.ts.map +0 -1
  40. package/dist/format.js +0 -6
  41. package/dist/format.js.map +0 -1
  42. package/dist/llms-ingest.d.ts +0 -3
  43. package/dist/llms-ingest.d.ts.map +0 -1
  44. package/dist/llms-ingest.js +0 -85
  45. package/dist/llms-ingest.js.map +0 -1
  46. package/dist/reranker.d.ts +0 -6
  47. package/dist/reranker.d.ts.map +0 -1
  48. package/dist/reranker.js +0 -21
  49. package/dist/reranker.js.map +0 -1
  50. package/dist/scraper.d.ts +0 -9
  51. package/dist/scraper.d.ts.map +0 -1
  52. package/dist/scraper.js +0 -98
  53. package/dist/scraper.js.map +0 -1
  54. package/dist/search.d.ts +0 -8
  55. package/dist/search.d.ts.map +0 -1
  56. package/dist/search.js +0 -43
  57. package/dist/search.js.map +0 -1
  58. package/dist/store.d.ts +0 -15
  59. package/dist/store.d.ts.map +0 -1
  60. package/dist/store.js +0 -149
  61. package/dist/store.js.map +0 -1
  62. package/dist/types.d.ts +0 -26
  63. package/dist/types.d.ts.map +0 -1
  64. package/dist/types.js +0 -2
  65. package/dist/types.js.map +0 -1
@@ -1,60 +0,0 @@
1
- import { readFile, writeFile, mkdir } from "node:fs/promises";
2
- import { join } from "node:path";
3
- import { homedir } from "node:os";
4
- import { createInterface } from "node:readline";
5
- const CONFIG_DIR = join(homedir(), ".grimoire");
6
- const CONFIG_FILE = join(CONFIG_DIR, "config.json");
7
- export async function loadConsumerConfig() {
8
- const raw = await readFile(CONFIG_FILE, "utf-8").catch(() => null);
9
- if (!raw)
10
- return null;
11
- const data = JSON.parse(raw);
12
- if (typeof data.apiUrl === "string" && typeof data.apiKey === "string") {
13
- return { apiUrl: data.apiUrl, apiKey: data.apiKey };
14
- }
15
- return null;
16
- }
17
- export async function saveConsumerConfig(config) {
18
- await mkdir(CONFIG_DIR, { recursive: true });
19
- await writeFile(CONFIG_FILE, JSON.stringify(config, null, 2) + "\n", "utf-8");
20
- }
21
- export async function resolveConsumerConfig() {
22
- const envUrl = process.env.GRIMOIRE_API_URL;
23
- const envKey = process.env.GRIMOIRE_API_KEY;
24
- if (envUrl && envKey) {
25
- return { apiUrl: envUrl, apiKey: envKey };
26
- }
27
- const fileConfig = await loadConsumerConfig();
28
- if (fileConfig)
29
- return fileConfig;
30
- throw new Error("Grimoire is not configured. Run 'grimoire init' to set up.");
31
- }
32
- export function isConsumerMode() {
33
- return !!process.env.GRIMOIRE_API_URL;
34
- }
35
- export async function detectConsumerMode() {
36
- if (process.env.GOOGLE_APPLICATION_CREDENTIALS)
37
- return false;
38
- if (process.env.GRIMOIRE_API_URL)
39
- return true;
40
- const config = await loadConsumerConfig();
41
- return config !== null;
42
- }
43
- export async function cmdInit() {
44
- const rl = createInterface({ input: process.stdin, output: process.stdout });
45
- const ask = (q) => new Promise((resolve) => rl.question(q, resolve));
46
- const existing = await loadConsumerConfig();
47
- const apiUrl = await ask(`API URL${existing ? ` [${existing.apiUrl}]` : ""}: `);
48
- const apiKey = await ask(`API Key${existing ? " [****]" : ""}: `);
49
- const config = {
50
- apiUrl: apiUrl.trim() || existing?.apiUrl || "",
51
- apiKey: apiKey.trim() || existing?.apiKey || "",
52
- };
53
- rl.close();
54
- if (!config.apiUrl || !config.apiKey) {
55
- throw new Error("Both API URL and API Key are required.");
56
- }
57
- await saveConsumerConfig(config);
58
- console.log(`\nSaved to ${CONFIG_FILE}`);
59
- }
60
- //# sourceMappingURL=consumer-config.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"consumer-config.js","sourceRoot":"","sources":["../src/consumer-config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,WAAW,CAAC,CAAC;AAChD,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAOpD,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACtC,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACvE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,MAAsB;IAC7D,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAChF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB;IACzC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;IAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;IAE5C,IAAI,MAAM,IAAI,MAAM,EAAE,CAAC;QACrB,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;IAC5C,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC9C,IAAI,UAAU;QAAE,OAAO,UAAU,CAAC;IAElC,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;AAChF,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACtC,IAAI,OAAO,CAAC,GAAG,CAAC,8BAA8B;QAAE,OAAO,KAAK,CAAC;IAC7D,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB;QAAE,OAAO,IAAI,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC1C,OAAO,MAAM,KAAK,IAAI,CAAC;AACzB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO;IAC3B,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7E,MAAM,GAAG,GAAG,CAAC,CAAS,EAAmB,EAAE,CACzC,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;IAEpD,MAAM,QAAQ,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAE5C,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,UAAU,QAAQ,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChF,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,UAAU,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAElE,MAAM,MAAM,GAAmB;QAC7B,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,IAAI,QAAQ,EAAE,MAAM,IAAI,EAAE;QAC/C,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,IAAI,QAAQ,EAAE,MAAM,IAAI,EAAE;KAChD,CAAC;IAEF,EAAE,CAAC,KAAK,EAAE,CAAC;IAEX,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACrC,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,kBAAkB,CAAC,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,cAAc,WAAW,EAAE,CAAC,CAAC;AAC3C,CAAC"}
@@ -1,11 +0,0 @@
1
- import type { ConsumerConfig } from "./consumer-config.js";
2
- export declare function cmdConsumerSearch(config: ConsumerConfig, query: string, options: {
3
- source?: string;
4
- topN?: number;
5
- compact?: boolean;
6
- }): Promise<void>;
7
- export declare function cmdConsumerList(config: ConsumerConfig, options?: {
8
- names?: boolean;
9
- }): Promise<void>;
10
- export declare function cmdConsumerStats(config: ConsumerConfig): Promise<void>;
11
- //# sourceMappingURL=consumer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"consumer.d.ts","sourceRoot":"","sources":["../src/consumer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AA+B3D,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,GAC7D,OAAO,CAAC,IAAI,CAAC,CAyBf;AAED,wBAAsB,eAAe,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAqB1G;AAED,wBAAsB,gBAAgB,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAkB5E"}
package/dist/consumer.js DELETED
@@ -1,84 +0,0 @@
1
- import { bold, cyan, yellow } from "./format.js";
2
- async function apiRequest(config, path, options) {
3
- const url = `${config.apiUrl.replace(/\/$/, "")}${path}`;
4
- let response;
5
- try {
6
- response = await fetch(url, {
7
- ...options,
8
- headers: {
9
- "Content-Type": "application/json",
10
- "x-api-key": config.apiKey,
11
- ...options?.headers,
12
- },
13
- });
14
- }
15
- catch {
16
- throw new Error(`Cannot reach Grimoire API at ${config.apiUrl}. Check your GRIMOIRE_API_URL.`);
17
- }
18
- if (response.status === 401 || response.status === 403) {
19
- throw new Error("Invalid API key. Check your GRIMOIRE_API_KEY or run 'grimoire init'.");
20
- }
21
- if (!response.ok) {
22
- throw new Error(`API error: ${response.status} ${response.statusText}`);
23
- }
24
- return response.json();
25
- }
26
- export async function cmdConsumerSearch(config, query, options) {
27
- const data = await apiRequest(config, "/search", {
28
- method: "POST",
29
- body: JSON.stringify({ query, source: options.source, topN: options.topN }),
30
- });
31
- if (data.results.length === 0) {
32
- console.log("No results found.");
33
- return;
34
- }
35
- if (options.compact) {
36
- for (const r of data.results) {
37
- console.log(`${r.relevance_score.toFixed(4)} | ${r.source} | ${r.title} | ${r.heading_path.join(" > ")} | ${r.url}`);
38
- }
39
- return;
40
- }
41
- for (let i = 0; i < data.results.length; i++) {
42
- const r = data.results[i];
43
- console.log(`\n${bold(`[${i + 1}] ${r.title}`)} (${r.relevance_score.toFixed(4)})`);
44
- console.log(` ${cyan(r.url)}`);
45
- console.log(` ${yellow(r.heading_path.join(" > "))}`);
46
- console.log(` ${r.content.replace(/\n/g, " ")}`);
47
- }
48
- }
49
- export async function cmdConsumerList(config, options) {
50
- const data = await apiRequest(config, "/list");
51
- if (data.sources.length === 0) {
52
- console.log("No sources available.");
53
- return;
54
- }
55
- if (options?.names) {
56
- for (const s of data.sources) {
57
- console.log(s.source);
58
- }
59
- return;
60
- }
61
- console.log("\nSources:\n");
62
- for (const s of data.sources) {
63
- const ver = s.version ? ` v${s.version}` : "";
64
- console.log(` ${bold(s.source)}${ver}`);
65
- console.log(` ${s.chunk_count} chunks, ${s.url_count} URLs, last refreshed ${s.last_refreshed}`);
66
- }
67
- }
68
- export async function cmdConsumerStats(config) {
69
- const data = await apiRequest(config, "/stats");
70
- if (data.sources.length === 0) {
71
- console.log("No sources have been refreshed yet.");
72
- return;
73
- }
74
- console.log("\nSource Statistics:\n");
75
- for (const s of data.sources) {
76
- const ver = s.version ? ` v${s.version}` : "";
77
- console.log(` ${bold(s.source)}${ver}`);
78
- console.log(` Chunks: ${s.chunk_count}`);
79
- console.log(` URLs: ${s.url_count}`);
80
- console.log(` Last refreshed: ${s.last_refreshed}`);
81
- }
82
- console.log(`\n Total: ${data.totalChunks} chunks across ${data.totalUrls} URLs from ${data.sources.length} sources`);
83
- }
84
- //# sourceMappingURL=consumer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"consumer.js","sourceRoot":"","sources":["../src/consumer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAEjD,KAAK,UAAU,UAAU,CAAI,MAAsB,EAAE,IAAY,EAAE,OAAqB;IACtF,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;IAEzD,IAAI,QAAkB,CAAC;IACvB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC1B,GAAG,OAAO;YACV,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,GAAG,OAAO,EAAE,OAAO;aACpB;SACF,CAAC,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,gCAAgC,MAAM,CAAC,MAAM,gCAAgC,CAAC,CAAC;IACjG,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;IAC1F,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,cAAc,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAgB,CAAC;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,MAAsB,EACtB,KAAa,EACb,OAA8D;IAE9D,MAAM,IAAI,GAAG,MAAM,UAAU,CAA8B,MAAM,EAAE,SAAS,EAAE;QAC5E,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;KAC5E,CAAC,CAAC;IAEH,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QACjC,OAAO;IACT,CAAC;IAED,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QACvH,CAAC;QACD,OAAO;IACT,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpF,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,OAAO,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QACzD,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAsB,EAAE,OAA6B;IACzF,MAAM,IAAI,GAAG,MAAM,UAAU,CAA4B,MAAM,EAAE,OAAO,CAAC,CAAC;IAE1E,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,OAAO;IACT,CAAC;IAED,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;QACnB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACxB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC5B,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,WAAW,YAAY,CAAC,CAAC,SAAS,yBAAyB,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC;IACtG,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,MAAsB;IAC3D,MAAM,IAAI,GAAG,MAAM,UAAU,CAAoE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEnH,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;QACnD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC;QACzC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,WAAW,kBAAkB,IAAI,CAAC,SAAS,cAAc,IAAI,CAAC,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;AACzH,CAAC"}
@@ -1,12 +0,0 @@
1
- export interface ConvertedPage {
2
- source: string;
3
- url: string;
4
- title: string;
5
- markdown: string;
6
- }
7
- export declare function extractContent(html: string, contentSelector: string, removeSelectors?: string[], removeTextPatterns?: string[]): string;
8
- export declare function extractTitle(html: string): string;
9
- export declare function buildFrontmatter(source: string, url: string, title: string): string;
10
- export declare function convertPage(html: string, source: string, url: string, contentSelector: string, removeSelectors?: string[], removeTextPatterns?: string[]): ConvertedPage;
11
- export declare function convertSource(sourceName: string, urls: string[], contentSelector: string, removeSelectors: string[] | undefined, removeTextPatterns: string[] | undefined, dataDir: string, concurrency?: number, onProgress?: (current: number, total: number, url: string) => void): Promise<ConvertedPage[]>;
12
- //# sourceMappingURL=converter.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"converter.d.ts","sourceRoot":"","sources":["../src/converter.ts"],"names":[],"mappings":"AAYA,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAyBD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,eAAe,EAAE,MAAM,EACvB,eAAe,CAAC,EAAE,MAAM,EAAE,EAC1B,kBAAkB,CAAC,EAAE,MAAM,EAAE,GAC5B,MAAM,CAiBR;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAKjD;AAED,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CASnF;AAED,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,EACd,GAAG,EAAE,MAAM,EACX,eAAe,EAAE,MAAM,EACvB,eAAe,CAAC,EAAE,MAAM,EAAE,EAC1B,kBAAkB,CAAC,EAAE,MAAM,EAAE,GAC5B,aAAa,CAOf;AAID,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,MAAM,EAAE,EACd,eAAe,EAAE,MAAM,EACvB,eAAe,EAAE,MAAM,EAAE,GAAG,SAAS,EACrC,kBAAkB,EAAE,MAAM,EAAE,GAAG,SAAS,EACxC,OAAO,EAAE,MAAM,EACf,WAAW,SAAsB,EACjC,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,IAAI,GACjE,OAAO,CAAC,aAAa,EAAE,CAAC,CAgC1B"}
package/dist/converter.js DELETED
@@ -1,95 +0,0 @@
1
- import { readFile, writeFile, mkdir } from "node:fs/promises";
2
- import { join } from "node:path";
3
- import { JSDOM } from "jsdom";
4
- import TurndownService from "turndown";
5
- import { slugifyUrl } from "./scraper.js";
6
- const turndown = new TurndownService({
7
- headingStyle: "atx",
8
- codeBlockStyle: "fenced",
9
- bulletListMarker: "-",
10
- });
11
- const GENERIC_REMOVE = [
12
- "style",
13
- "script",
14
- "noscript",
15
- "iframe",
16
- "svg",
17
- ];
18
- function cleanMarkdown(md, textPatterns) {
19
- let cleaned = md
20
- .replace(/^(#+)\s*$/gm, "")
21
- .replace(/\n{3,}/g, "\n\n");
22
- if (textPatterns) {
23
- for (const pattern of textPatterns) {
24
- cleaned = cleaned.replace(new RegExp(pattern, "gm"), "");
25
- }
26
- cleaned = cleaned.replace(/\n{3,}/g, "\n\n");
27
- }
28
- return cleaned.trim();
29
- }
30
- export function extractContent(html, contentSelector, removeSelectors, removeTextPatterns) {
31
- const dom = new JSDOM(html);
32
- const doc = dom.window.document;
33
- const contentEl = doc.querySelector(contentSelector);
34
- if (!contentEl) {
35
- return cleanMarkdown(turndown.turndown(doc.body.innerHTML), removeTextPatterns);
36
- }
37
- const allSelectors = [...GENERIC_REMOVE, ...(removeSelectors ?? [])];
38
- for (const selector of allSelectors) {
39
- for (const el of contentEl.querySelectorAll(selector)) {
40
- el.remove();
41
- }
42
- }
43
- return cleanMarkdown(turndown.turndown(contentEl.innerHTML), removeTextPatterns);
44
- }
45
- export function extractTitle(html) {
46
- const dom = new JSDOM(html);
47
- const titleEl = dom.window.document.querySelector("title");
48
- if (!titleEl)
49
- return "Untitled";
50
- return titleEl.textContent?.replace(/\s*[|–—-]\s*.+$/, "").trim() ?? "Untitled";
51
- }
52
- export function buildFrontmatter(source, url, title) {
53
- return [
54
- "---",
55
- `source: ${source}`,
56
- `url: "${url}"`,
57
- `title: "${title.replace(/"/g, '\\"')}"`,
58
- `fetched_at: "${new Date().toISOString()}"`,
59
- "---",
60
- ].join("\n");
61
- }
62
- export function convertPage(html, source, url, contentSelector, removeSelectors, removeTextPatterns) {
63
- const title = extractTitle(html);
64
- const content = extractContent(html, contentSelector, removeSelectors, removeTextPatterns);
65
- const frontmatter = buildFrontmatter(source, url, title);
66
- const markdown = `${frontmatter}\n\n${content}`;
67
- return { source, url, title, markdown };
68
- }
69
- const DEFAULT_CONCURRENCY = 10;
70
- export async function convertSource(sourceName, urls, contentSelector, removeSelectors, removeTextPatterns, dataDir, concurrency = DEFAULT_CONCURRENCY, onProgress) {
71
- const rawDir = join(dataDir, "raw", sourceName);
72
- const mdDir = join(dataDir, "markdown", sourceName);
73
- await mkdir(mdDir, { recursive: true });
74
- const pages = new Array(urls.length);
75
- let completed = 0;
76
- let nextIndex = 0;
77
- async function worker() {
78
- while (nextIndex < urls.length) {
79
- const i = nextIndex++;
80
- const url = urls[i];
81
- const slug = slugifyUrl(url);
82
- const htmlPath = join(rawDir, `${slug}.html`);
83
- const html = await readFile(htmlPath, "utf-8");
84
- const page = convertPage(html, sourceName, url, contentSelector, removeSelectors, removeTextPatterns);
85
- await writeFile(join(mdDir, `${slug}.md`), page.markdown, "utf-8");
86
- pages[i] = page;
87
- completed++;
88
- onProgress?.(completed, urls.length, url);
89
- }
90
- }
91
- const workers = Array.from({ length: Math.min(concurrency, urls.length) }, () => worker());
92
- await Promise.all(workers);
93
- return pages;
94
- }
95
- //# sourceMappingURL=converter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"converter.js","sourceRoot":"","sources":["../src/converter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AASH,MAAM,cAAc,GAAG;IACrB,OAAO;IACP,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,KAAK;CACN,CAAC;AAEF,SAAS,aAAa,CAAC,EAAU,EAAE,YAAuB;IACxD,IAAI,OAAO,GAAG,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE9B,IAAI,YAAY,EAAE,CAAC;QACjB,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACnC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;QAC3D,CAAC;QACD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC/C,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,eAAuB,EACvB,eAA0B,EAC1B,kBAA6B;IAE7B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;IAEhC,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC;IACrD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,kBAAkB,CAAC,CAAC;IAClF,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,cAAc,EAAE,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IACrE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,KAAK,MAAM,EAAE,IAAI,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;YACtD,EAAE,CAAC,MAAM,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,kBAAkB,CAAC,CAAC;AACnF,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC3D,IAAI,CAAC,OAAO;QAAE,OAAO,UAAU,CAAC;IAChC,OAAO,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,UAAU,CAAC;AAClF,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,MAAc,EAAE,GAAW,EAAE,KAAa;IACzE,OAAO;QACL,KAAK;QACL,WAAW,MAAM,EAAE;QACnB,SAAS,GAAG,GAAG;QACf,WAAW,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG;QACxC,gBAAgB,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,GAAG;QAC3C,KAAK;KACN,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,MAAc,EACd,GAAW,EACX,eAAuB,EACvB,eAA0B,EAC1B,kBAA6B;IAE7B,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,eAAe,EAAE,eAAe,EAAE,kBAAkB,CAAC,CAAC;IAC3F,MAAM,WAAW,GAAG,gBAAgB,CAAC,MAAM,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;IACzD,MAAM,QAAQ,GAAG,GAAG,WAAW,OAAO,OAAO,EAAE,CAAC;IAEhD,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC;AAED,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAE/B,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,UAAkB,EAClB,IAAc,EACd,eAAuB,EACvB,eAAqC,EACrC,kBAAwC,EACxC,OAAe,EACf,WAAW,GAAG,mBAAmB,EACjC,UAAkE;IAElE,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;IACpD,MAAM,KAAK,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,MAAM,KAAK,GAAoB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACtD,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,UAAU,MAAM;QACnB,OAAO,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC/B,MAAM,CAAC,GAAG,SAAS,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,OAAO,CAAC,CAAC;YAC9C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAE/C,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,eAAe,EAAE,eAAe,EAAE,kBAAkB,CAAC,CAAC;YACtG,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACnE,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;YAChB,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,EAC9C,GAAG,EAAE,CAAC,MAAM,EAAE,CACf,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -1,9 +0,0 @@
1
- export interface EmbedOptions {
2
- onProgress?: (done: number, total: number) => void;
3
- onCheckpoint?: (embeddings: number[][]) => Promise<void>;
4
- checkpointEveryBatches?: number;
5
- resumeFrom?: number[][];
6
- }
7
- export declare function embedTexts(texts: string[], options?: EmbedOptions): Promise<number[][]>;
8
- export declare function embedText(text: string): Promise<number[]>;
9
- //# sourceMappingURL=embedder.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AA6CA,MAAM,WAAW,YAAY;IAC3B,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACnD,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACzD,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC,UAAU,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CACzB;AAED,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA8ErB;AAED,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAG/D"}
package/dist/embedder.js DELETED
@@ -1,108 +0,0 @@
1
- import { GoogleGenerativeAI } from "@google/generative-ai";
2
- const BATCH_SIZE = 50;
3
- const MODEL = "gemini-embedding-001";
4
- const OUTPUT_DIMENSIONALITY = 768;
5
- const MAX_RETRIES = 5;
6
- const RATE_LIMIT_BASE_DELAY_MS = 60000;
7
- const NETWORK_BASE_DELAY_MS = 10000;
8
- const BATCH_DELAY_MS = 2500;
9
- const DEFAULT_CHECKPOINT_EVERY_BATCHES = 20;
10
- const NETWORK_ERROR_PATTERNS = [
11
- "fetch failed",
12
- "ECONNRESET",
13
- "ETIMEDOUT",
14
- "ECONNREFUSED",
15
- "EAI_AGAIN",
16
- "ENOTFOUND",
17
- "socket hang up",
18
- "UND_ERR_",
19
- ];
20
- let genAI;
21
- function getClient() {
22
- if (!genAI) {
23
- const apiKey = process.env.GEMINI_API_KEY;
24
- if (!apiKey) {
25
- throw new Error("GEMINI_API_KEY environment variable is not set");
26
- }
27
- genAI = new GoogleGenerativeAI(apiKey);
28
- }
29
- return genAI;
30
- }
31
- function classifyError(message) {
32
- if (message.includes("429") || message.includes("503")) {
33
- return "rate_limit";
34
- }
35
- if (NETWORK_ERROR_PATTERNS.some((p) => message.includes(p))) {
36
- return "network";
37
- }
38
- return "other";
39
- }
40
- export async function embedTexts(texts, options = {}) {
41
- const client = getClient();
42
- const model = client.getGenerativeModel({ model: MODEL });
43
- const { onProgress, onCheckpoint, resumeFrom } = options;
44
- const checkpointEveryBatches = options.checkpointEveryBatches ?? DEFAULT_CHECKPOINT_EVERY_BATCHES;
45
- const embeddings = resumeFrom ? [...resumeFrom] : [];
46
- const startIndex = Math.floor(embeddings.length / BATCH_SIZE) * BATCH_SIZE;
47
- if (embeddings.length > startIndex) {
48
- embeddings.length = startIndex;
49
- }
50
- if (startIndex > 0) {
51
- console.log(` Resuming from chunk ${startIndex} of ${texts.length} (${embeddings.length} cached).`);
52
- }
53
- if (startIndex >= texts.length) {
54
- return embeddings.slice(0, texts.length);
55
- }
56
- let batchesSinceCheckpoint = 0;
57
- for (let i = startIndex; i < texts.length; i += BATCH_SIZE) {
58
- const batch = texts.slice(i, i + BATCH_SIZE);
59
- const batchNumber = i / BATCH_SIZE + 1;
60
- let result;
61
- for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
62
- try {
63
- result = await model.batchEmbedContents({
64
- requests: batch.map((text) => ({
65
- content: { role: "user", parts: [{ text }] },
66
- outputDimensionality: OUTPUT_DIMENSIONALITY,
67
- })),
68
- });
69
- break;
70
- }
71
- catch (err) {
72
- const message = err instanceof Error ? err.message : String(err);
73
- const kind = classifyError(message);
74
- if (kind !== "other" && attempt < MAX_RETRIES - 1) {
75
- const baseDelay = kind === "rate_limit" ? RATE_LIMIT_BASE_DELAY_MS : NETWORK_BASE_DELAY_MS;
76
- const delay = baseDelay * Math.pow(2, attempt);
77
- const label = kind === "rate_limit" ? "Rate limited" : "Network error";
78
- console.log(` ${label} (batch ${batchNumber}), retrying in ${delay / 1000}s...`);
79
- await new Promise((resolve) => setTimeout(resolve, delay));
80
- continue;
81
- }
82
- console.error(` Embedding failed at batch ${batchNumber} (chunks ${i + 1}-${i + batch.length}): ${message}`);
83
- throw err;
84
- }
85
- }
86
- for (const embedding of result.embeddings) {
87
- embeddings.push(embedding.values);
88
- }
89
- onProgress?.(Math.min(i + BATCH_SIZE, texts.length), texts.length);
90
- batchesSinceCheckpoint++;
91
- if (onCheckpoint && batchesSinceCheckpoint >= checkpointEveryBatches && i + BATCH_SIZE < texts.length) {
92
- await onCheckpoint(embeddings);
93
- batchesSinceCheckpoint = 0;
94
- }
95
- if (i + BATCH_SIZE < texts.length) {
96
- await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY_MS));
97
- }
98
- }
99
- if (onCheckpoint) {
100
- await onCheckpoint(embeddings);
101
- }
102
- return embeddings;
103
- }
104
- export async function embedText(text) {
105
- const [embedding] = await embedTexts([text]);
106
- return embedding;
107
- }
108
- //# sourceMappingURL=embedder.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAE3D,MAAM,UAAU,GAAG,EAAE,CAAC;AACtB,MAAM,KAAK,GAAG,sBAAsB,CAAC;AACrC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,WAAW,GAAG,CAAC,CAAC;AACtB,MAAM,wBAAwB,GAAG,KAAK,CAAC;AACvC,MAAM,qBAAqB,GAAG,KAAK,CAAC;AACpC,MAAM,cAAc,GAAG,IAAI,CAAC;AAC5B,MAAM,gCAAgC,GAAG,EAAE,CAAC;AAE5C,MAAM,sBAAsB,GAAG;IAC7B,cAAc;IACd,YAAY;IACZ,WAAW;IACX,cAAc;IACd,WAAW;IACX,WAAW;IACX,gBAAgB;IAChB,UAAU;CACX,CAAC;AAEF,IAAI,KAAqC,CAAC;AAE1C,SAAS,SAAS;IAChB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACpE,CAAC;QACD,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACvD,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,IAAI,sBAAsB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5D,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AASD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAe,EACf,UAAwB,EAAE;IAE1B,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,kBAAkB,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;IAE1D,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;IACzD,MAAM,sBAAsB,GAAG,OAAO,CAAC,sBAAsB,IAAI,gCAAgC,CAAC;IAElG,MAAM,UAAU,GAAe,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC,GAAG,UAAU,CAAC;IAE3E,IAAI,UAAU,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC;QACnC,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC;IACjC,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,OAAO,KAAK,CAAC,MAAM,KAAK,UAAU,CAAC,MAAM,WAAW,CAAC,CAAC;IACvG,CAAC;IAED,IAAI,UAAU,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAC/B,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;IAED,IAAI,sBAAsB,GAAG,CAAC,CAAC;IAE/B,KAAK,IAAI,CAAC,GAAG,UAAU,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QAC7C,MAAM,WAAW,GAAG,CAAC,GAAG,UAAU,GAAG,CAAC,CAAC;QAEvC,IAAI,MAAM,CAAC;QACX,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,WAAW,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,KAAK,CAAC,kBAAkB,CAAC;oBACtC,QAAQ,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;wBAC7B,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE;wBAC5C,oBAAoB,EAAE,qBAAqB;qBAC5C,CAAC,CAAC;iBACJ,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;gBAEpC,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,GAAG,WAAW,GAAG,CAAC,EAAE,CAAC;oBAClD,MAAM,SAAS,GAAG,IAAI,KAAK,YAAY,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,qBAAqB,CAAC;oBAC3F,MAAM,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;oBAC/C,MAAM,KAAK,GAAG,IAAI,KAAK,YAAY,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAC;oBACvE,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,WAAW,WAAW,kBAAkB,KAAK,GAAG,IAAI,MAAM,CAAC,CAAC;oBAClF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;oBAC3D,SAAS;gBACX,CAAC;gBAED,OAAO,CAAC,KAAK,CAAC,+BAA+B,WAAW,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;gBAC9G,MAAM,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,KAAK,MAAM,SAAS,IAAI,MAAO,CAAC,UAAU,EAAE,CAAC;YAC3C,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACpC,CAAC;QAED,UAAU,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAEnE,sBAAsB,EAAE,CAAC;QACzB,IAAI,YAAY,IAAI,sBAAsB,IAAI,sBAAsB,IAAI,CAAC,GAAG,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YACtG,MAAM,YAAY,CAAC,UAAU,CAAC,CAAC;YAC/B,sBAAsB,GAAG,CAAC,CAAC;QAC7B,CAAC;QAED,IAAI,CAAC,GAAG,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAClC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;QACtE,CAAC;IACH,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,YAAY,CAAC,UAAU,CAAC,CAAC;IACjC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAY;IAC1C,MAAM,CAAC,SAAS,CAAC,GAAG,MAAM,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7C,OAAO,SAAS,CAAC;AACnB,CAAC"}
package/dist/format.d.ts DELETED
@@ -1,5 +0,0 @@
1
- export declare const bold: (s: string) => string;
2
- export declare const cyan: (s: string) => string;
3
- export declare const yellow: (s: string) => string;
4
- export declare const red: (s: string) => string;
5
- //# sourceMappingURL=format.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../src/format.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,IAAI,GAAI,GAAG,MAAM,KAAG,MAA0C,CAAC;AAC5E,eAAO,MAAM,IAAI,GAAI,GAAG,MAAM,KAAG,MAA2C,CAAC;AAC7E,eAAO,MAAM,MAAM,GAAI,GAAG,MAAM,KAAG,MAA2C,CAAC;AAC/E,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,KAAG,MAA2C,CAAC"}
package/dist/format.js DELETED
@@ -1,6 +0,0 @@
1
- const isTTY = process.stdout.isTTY ?? false;
2
- export const bold = (s) => isTTY ? `\x1b[1m${s}\x1b[0m` : s;
3
- export const cyan = (s) => isTTY ? `\x1b[36m${s}\x1b[0m` : s;
4
- export const yellow = (s) => isTTY ? `\x1b[33m${s}\x1b[0m` : s;
5
- export const red = (s) => isTTY ? `\x1b[31m${s}\x1b[0m` : s;
6
- //# sourceMappingURL=format.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"format.js","sourceRoot":"","sources":["../src/format.ts"],"names":[],"mappings":"AAAA,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,IAAI,KAAK,CAAC;AAE5C,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;AAC5E,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7E,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;AAC/E,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC"}
@@ -1,3 +0,0 @@
1
- import { type ConvertedPage } from "./converter.js";
2
- export declare function ingestLlmsFull(llmsFullUrl: string, sourceName: string, baseUrl: string, dataDir: string, onProgress?: (current: number, total: number) => void): Promise<ConvertedPage[]>;
3
- //# sourceMappingURL=llms-ingest.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"llms-ingest.d.ts","sourceRoot":"","sources":["../src/llms-ingest.ts"],"names":[],"mappings":"AAGA,OAAO,EAAoB,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAsEtE,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EACf,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACpD,OAAO,CAAC,aAAa,EAAE,CAAC,CAiC1B"}
@@ -1,85 +0,0 @@
1
- import { writeFile, mkdir } from "node:fs/promises";
2
- import { join } from "node:path";
3
- import { slugifyUrl } from "./scraper.js";
4
- import { buildFrontmatter } from "./converter.js";
5
- const BOILERPLATE_PATTERNS = [
6
- /^\[Skip to content\]\([^)]*\)\s*$/gm,
7
- /^Was this helpful\?\s*$/gm,
8
- /^YesNo\s*$/gm,
9
- /^\[ Edit page \]\([^)]+\) \[ Report issue \]\([^)]+\)\s*$/gm,
10
- /^Copy page\s*$/gm,
11
- /^```json\n\{"@context":"https:\/\/schema\.org","@type":"BreadcrumbList"[^`]*```\s*$/gm,
12
- ];
13
- function splitPages(content) {
14
- const pages = [];
15
- const frontmatterPattern = /^---\ntitle: (.+)\n/gm;
16
- const boundaries = [];
17
- let match;
18
- while ((match = frontmatterPattern.exec(content)) !== null) {
19
- boundaries.push({ index: match.index, title: match[1] });
20
- }
21
- for (let i = 0; i < boundaries.length; i++) {
22
- const start = boundaries[i].index;
23
- const end = i + 1 < boundaries.length ? boundaries[i + 1].index : content.length;
24
- const raw = content.slice(start, end).trimEnd();
25
- const url = extractUrl(raw);
26
- if (!url)
27
- continue;
28
- const bodyStart = raw.indexOf("---", 3);
29
- if (bodyStart === -1)
30
- continue;
31
- const body = raw.slice(raw.indexOf("\n", bodyStart) + 1);
32
- let cleaned = body;
33
- for (const pattern of BOILERPLATE_PATTERNS) {
34
- cleaned = cleaned.replace(pattern, "");
35
- }
36
- cleaned = cleaned.replace(/\n{3,}/g, "\n\n").trim();
37
- if (!cleaned)
38
- continue;
39
- pages.push({
40
- title: boundaries[i].title,
41
- url,
42
- markdown: cleaned,
43
- });
44
- }
45
- return pages;
46
- }
47
- function extractUrl(pageContent) {
48
- const match = pageContent.match(/```json\n\{"@context":"https:\/\/schema\.org","@type":"BreadcrumbList","itemListElement":\[(.+?)\]\}\n```/);
49
- if (!match)
50
- return null;
51
- const items = JSON.parse(`[${match[1]}]`);
52
- const last = items[items.length - 1];
53
- if (!last?.item?.["@id"])
54
- return null;
55
- return `https://developers.cloudflare.com${last.item["@id"]}`;
56
- }
57
- export async function ingestLlmsFull(llmsFullUrl, sourceName, baseUrl, dataDir, onProgress) {
58
- const response = await fetch(llmsFullUrl);
59
- if (!response.ok) {
60
- throw new Error(`Failed to fetch ${llmsFullUrl}: ${response.status} ${response.statusText}`);
61
- }
62
- const content = await response.text();
63
- const pages = splitPages(content);
64
- const mdDir = join(dataDir, "markdown", sourceName);
65
- await mkdir(mdDir, { recursive: true });
66
- const results = [];
67
- for (let i = 0; i < pages.length; i++) {
68
- const page = pages[i];
69
- const frontmatter = buildFrontmatter(sourceName, page.url, page.title);
70
- const fullMarkdown = `${frontmatter}\n\n${page.markdown}`;
71
- const slug = slugifyUrl(page.url);
72
- await writeFile(join(mdDir, `${slug}.md`), fullMarkdown, "utf-8");
73
- results.push({
74
- source: sourceName,
75
- url: page.url,
76
- title: page.title,
77
- markdown: fullMarkdown,
78
- });
79
- if (onProgress && ((i + 1) % 100 === 0 || i + 1 === pages.length)) {
80
- onProgress(i + 1, pages.length);
81
- }
82
- }
83
- return results;
84
- }
85
- //# sourceMappingURL=llms-ingest.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"llms-ingest.js","sourceRoot":"","sources":["../src/llms-ingest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAsB,MAAM,gBAAgB,CAAC;AAQtE,MAAM,oBAAoB,GAAG;IAC3B,qCAAqC;IACrC,2BAA2B;IAC3B,cAAc;IACd,6DAA6D;IAC7D,kBAAkB;IAClB,uFAAuF;CACxF,CAAC;AAEF,SAAS,UAAU,CAAC,OAAe;IACjC,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,MAAM,kBAAkB,GAAG,uBAAuB,CAAC;IACnD,MAAM,UAAU,GAAuC,EAAE,CAAC;IAE1D,IAAI,KAAK,CAAC;IACV,OAAO,CAAC,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3D,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QAClC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;QACjF,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC;QAEhD,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,SAAS,KAAK,CAAC,CAAC;YAAE,SAAS;QAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC;QAEzD,IAAI,OAAO,GAAG,IAAI,CAAC;QACnB,KAAK,MAAM,OAAO,IAAI,oBAAoB,EAAE,CAAC;YAC3C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QACD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAEpD,IAAI,CAAC,OAAO;YAAE,SAAS;QAEvB,KAAK,CAAC,IAAI,CAAC;YACT,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK;YAC1B,GAAG;YACH,QAAQ,EAAE,OAAO;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,UAAU,CAAC,WAAmB;IACrC,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAC7B,2GAA2G,CAC5G,CAAC;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,CAAmC,CAAC;IAC5E,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,OAAO,oCAAoC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;AAChE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,WAAmB,EACnB,UAAkB,EAClB,OAAe,EACf,OAAe,EACf,UAAqD;IAErD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,WAAW,CAAC,CAAC;IAC1C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,WAAW,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;IAC/F,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAEtC,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;IACpD,MAAM,KAAK,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,MAAM,OAAO,GAAoB,EAAE,CAAC;IACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QACvE,MAAM,YAAY,GAAG,GAAG,WAAW,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC1D,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,IAAI,KAAK,CAAC,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAElE,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,UAAU;YAClB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,YAAY;SACvB,CAAC,CAAC;QAEH,IAAI,UAAU,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YAClE,UAAU,CAAC,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -1,6 +0,0 @@
1
- export interface RerankResult {
2
- index: number;
3
- relevance_score: number;
4
- }
5
- export declare function rerank(query: string, documents: string[], topN?: number): Promise<RerankResult[]>;
6
- //# sourceMappingURL=reranker.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"reranker.d.ts","sourceRoot":"","sources":["../src/reranker.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;CACzB;AAcD,wBAAsB,MAAM,CAC1B,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EAAE,EACnB,IAAI,SAAI,GACP,OAAO,CAAC,YAAY,EAAE,CAAC,CAczB"}
package/dist/reranker.js DELETED
@@ -1,21 +0,0 @@
1
- function getRerankerUrl() {
2
- const url = process.env.RERANKER_URL;
3
- if (!url) {
4
- throw new Error("RERANKER_URL environment variable is not set");
5
- }
6
- return url;
7
- }
8
- export async function rerank(query, documents, topN = 5) {
9
- const baseUrl = getRerankerUrl();
10
- const response = await fetch(`${baseUrl}/v1/rerank`, {
11
- method: "POST",
12
- headers: { "Content-Type": "application/json" },
13
- body: JSON.stringify({ query, documents, top_n: topN }),
14
- });
15
- if (!response.ok) {
16
- throw new Error(`Reranker request failed: ${response.status} ${response.statusText}`);
17
- }
18
- const data = (await response.json());
19
- return data.results;
20
- }
21
- //# sourceMappingURL=reranker.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"reranker.js","sourceRoot":"","sources":["../src/reranker.ts"],"names":[],"mappings":"AASA,SAAS,cAAc;IACrB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC;IACrC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,MAAM,CAC1B,KAAa,EACb,SAAmB,EACnB,IAAI,GAAG,CAAC;IAER,MAAM,OAAO,GAAG,cAAc,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,YAAY,EAAE;QACnD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;KACxD,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA2B,CAAC;IAC/D,OAAO,IAAI,CAAC,OAAO,CAAC;AACtB,CAAC"}
package/dist/scraper.d.ts DELETED
@@ -1,9 +0,0 @@
1
- import { type Browser, type Page } from "playwright";
2
- import type { SourceConfig } from "./config.js";
3
- export declare function slugifyUrl(url: string): string;
4
- export declare function filterUrls(urls: string[], includePatterns?: string[], excludePatterns?: string[]): string[];
5
- export declare function discoverUrls(page: Page, source: SourceConfig): Promise<string[]>;
6
- export declare function fetchPage(page: Page, url: string, headed?: boolean): Promise<string>;
7
- export declare function scrapeSource(source: SourceConfig, sourceName: string, dataDir: string, onProgress?: (current: number, total: number, url: string) => void): Promise<string[]>;
8
- export declare function createBrowser(): Promise<Browser>;
9
- //# sourceMappingURL=scraper.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scraper.d.ts","sourceRoot":"","sources":["../src/scraper.ts"],"names":[],"mappings":"AAEA,OAAO,EAAY,KAAK,OAAO,EAAE,KAAK,IAAI,EAAE,MAAM,YAAY,CAAC;AAC/D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAO9C;AAED,wBAAgB,UAAU,CACxB,IAAI,EAAE,MAAM,EAAE,EACd,eAAe,CAAC,EAAE,MAAM,EAAE,EAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,GACzB,MAAM,EAAE,CAkBV;AAoBD,wBAAsB,YAAY,CAChC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,YAAY,GACnB,OAAO,CAAC,MAAM,EAAE,CAAC,CAiBnB;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,MAAM,EACX,MAAM,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,MAAM,CAAC,CAGjB;AAsBD,wBAAsB,YAAY,CAChC,MAAM,EAAE,YAAY,EACpB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM,EACf,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,KAAK,IAAI,GACjE,OAAO,CAAC,MAAM,EAAE,CAAC,CAiCnB;AAED,wBAAsB,aAAa,IAAI,OAAO,CAAC,OAAO,CAAC,CAEtD"}