@staticn0va/wigolo 0.6.6 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/dist/cache/store.d.ts +9 -1
  2. package/dist/cache/store.d.ts.map +1 -1
  3. package/dist/cache/store.js +30 -4
  4. package/dist/cache/store.js.map +1 -1
  5. package/dist/cli/doctor.d.ts.map +1 -1
  6. package/dist/cli/doctor.js +56 -2
  7. package/dist/cli/doctor.js.map +1 -1
  8. package/dist/cli/status.js +1 -1
  9. package/dist/cli/status.js.map +1 -1
  10. package/dist/cli/tui/hooks/useInstall.js +1 -1
  11. package/dist/cli/tui/hooks/useInstall.js.map +1 -1
  12. package/dist/cli/tui/hooks/useVerify.js +1 -1
  13. package/dist/cli/tui/hooks/useVerify.js.map +1 -1
  14. package/dist/cli/tui/status-format.d.ts +1 -1
  15. package/dist/cli/tui/status-format.d.ts.map +1 -1
  16. package/dist/cli/tui/status-format.js +1 -1
  17. package/dist/cli/tui/status-format.js.map +1 -1
  18. package/dist/cli/tui/status-python.d.ts +1 -1
  19. package/dist/cli/tui/status-python.d.ts.map +1 -1
  20. package/dist/cli/tui/status-python.js +17 -1
  21. package/dist/cli/tui/status-python.js.map +1 -1
  22. package/dist/cli/tui/verify-suggestions.d.ts +1 -1
  23. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -1
  24. package/dist/cli/tui/verify-suggestions.js +3 -3
  25. package/dist/cli/tui/verify-suggestions.js.map +1 -1
  26. package/dist/cli/tui/verify.d.ts +2 -2
  27. package/dist/cli/tui/verify.d.ts.map +1 -1
  28. package/dist/cli/tui/verify.js +32 -6
  29. package/dist/cli/tui/verify.js.map +1 -1
  30. package/dist/cli/warmup.d.ts.map +1 -1
  31. package/dist/cli/warmup.js +16 -12
  32. package/dist/cli/warmup.js.map +1 -1
  33. package/dist/config.d.ts +6 -1
  34. package/dist/config.d.ts.map +1 -1
  35. package/dist/config.js +15 -2
  36. package/dist/config.js.map +1 -1
  37. package/dist/crawl/dedup.d.ts +1 -0
  38. package/dist/crawl/dedup.d.ts.map +1 -1
  39. package/dist/crawl/dedup.js +47 -1
  40. package/dist/crawl/dedup.js.map +1 -1
  41. package/dist/extraction/boilerplate.d.ts +15 -0
  42. package/dist/extraction/boilerplate.d.ts.map +1 -0
  43. package/dist/extraction/boilerplate.js +49 -0
  44. package/dist/extraction/boilerplate.js.map +1 -0
  45. package/dist/extraction/defuddle.d.ts.map +1 -1
  46. package/dist/extraction/defuddle.js +7 -3
  47. package/dist/extraction/defuddle.js.map +1 -1
  48. package/dist/extraction/jsonld.js +1 -1
  49. package/dist/extraction/jsonld.js.map +1 -1
  50. package/dist/extraction/lang-hints.d.ts +2 -0
  51. package/dist/extraction/lang-hints.d.ts.map +1 -0
  52. package/dist/extraction/lang-hints.js +28 -0
  53. package/dist/extraction/lang-hints.js.map +1 -0
  54. package/dist/extraction/llm/anthropic.d.ts +3 -0
  55. package/dist/extraction/llm/anthropic.d.ts.map +1 -0
  56. package/dist/extraction/llm/anthropic.js +33 -0
  57. package/dist/extraction/llm/anthropic.js.map +1 -0
  58. package/dist/extraction/llm/cache.d.ts +5 -0
  59. package/dist/extraction/llm/cache.d.ts.map +1 -0
  60. package/dist/extraction/llm/cache.js +35 -0
  61. package/dist/extraction/llm/cache.js.map +1 -0
  62. package/dist/extraction/llm/gemini.d.ts +3 -0
  63. package/dist/extraction/llm/gemini.d.ts.map +1 -0
  64. package/dist/extraction/llm/gemini.js +35 -0
  65. package/dist/extraction/llm/gemini.js.map +1 -0
  66. package/dist/extraction/llm/groq.d.ts +3 -0
  67. package/dist/extraction/llm/groq.d.ts.map +1 -0
  68. package/dist/extraction/llm/groq.js +63 -0
  69. package/dist/extraction/llm/groq.js.map +1 -0
  70. package/dist/extraction/llm/hash.d.ts +3 -0
  71. package/dist/extraction/llm/hash.d.ts.map +1 -0
  72. package/dist/extraction/llm/hash.js +22 -0
  73. package/dist/extraction/llm/hash.js.map +1 -0
  74. package/dist/extraction/llm/openai.d.ts +3 -0
  75. package/dist/extraction/llm/openai.d.ts.map +1 -0
  76. package/dist/extraction/llm/openai.js +38 -0
  77. package/dist/extraction/llm/openai.js.map +1 -0
  78. package/dist/extraction/llm/select.d.ts +5 -0
  79. package/dist/extraction/llm/select.d.ts.map +1 -0
  80. package/dist/extraction/llm/select.js +27 -0
  81. package/dist/extraction/llm/select.js.map +1 -0
  82. package/dist/extraction/llm/types.d.ts +24 -0
  83. package/dist/extraction/llm/types.d.ts.map +1 -0
  84. package/dist/extraction/llm/types.js +2 -0
  85. package/dist/extraction/llm/types.js.map +1 -0
  86. package/dist/extraction/llm/validate.d.ts +6 -0
  87. package/dist/extraction/llm/validate.d.ts.map +1 -0
  88. package/dist/extraction/llm/validate.js +63 -0
  89. package/dist/extraction/llm/validate.js.map +1 -0
  90. package/dist/extraction/llm-fallback.d.ts +17 -0
  91. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  92. package/dist/extraction/llm-fallback.js +129 -0
  93. package/dist/extraction/llm-fallback.js.map +1 -0
  94. package/dist/extraction/markdown.d.ts +9 -0
  95. package/dist/extraction/markdown.d.ts.map +1 -1
  96. package/dist/extraction/markdown.js +52 -3
  97. package/dist/extraction/markdown.js.map +1 -1
  98. package/dist/extraction/pipeline.d.ts.map +1 -1
  99. package/dist/extraction/pipeline.js +17 -5
  100. package/dist/extraction/pipeline.js.map +1 -1
  101. package/dist/extraction/readability.d.ts.map +1 -1
  102. package/dist/extraction/readability.js +2 -3
  103. package/dist/extraction/readability.js.map +1 -1
  104. package/dist/extraction/schema.d.ts +12 -0
  105. package/dist/extraction/schema.d.ts.map +1 -1
  106. package/dist/extraction/schema.js +81 -11
  107. package/dist/extraction/schema.js.map +1 -1
  108. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  109. package/dist/extraction/site-extractors/docs-generic.js +2 -3
  110. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  111. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  112. package/dist/extraction/site-extractors/github.js +4 -5
  113. package/dist/extraction/site-extractors/github.js.map +1 -1
  114. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  115. package/dist/extraction/site-extractors/mdn.js +2 -3
  116. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  117. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  118. package/dist/extraction/site-extractors/stackoverflow.js +3 -4
  119. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  120. package/dist/extraction/structured-data.d.ts +4 -0
  121. package/dist/extraction/structured-data.d.ts.map +1 -0
  122. package/dist/extraction/structured-data.js +203 -0
  123. package/dist/extraction/structured-data.js.map +1 -0
  124. package/dist/fetch/router.d.ts +2 -1
  125. package/dist/fetch/router.d.ts.map +1 -1
  126. package/dist/fetch/router.js +19 -1
  127. package/dist/fetch/router.js.map +1 -1
  128. package/dist/instructions.d.ts +7 -7
  129. package/dist/instructions.d.ts.map +1 -1
  130. package/dist/instructions.js +43 -36
  131. package/dist/instructions.js.map +1 -1
  132. package/dist/logger.d.ts +1 -1
  133. package/dist/logger.d.ts.map +1 -1
  134. package/dist/research/brief.js +1 -1
  135. package/dist/research/brief.js.map +1 -1
  136. package/dist/search/evidence.d.ts +25 -0
  137. package/dist/search/evidence.d.ts.map +1 -0
  138. package/dist/search/evidence.js +260 -0
  139. package/dist/search/evidence.js.map +1 -0
  140. package/dist/search/highlights.d.ts +11 -2
  141. package/dist/search/highlights.d.ts.map +1 -1
  142. package/dist/search/highlights.js +131 -48
  143. package/dist/search/highlights.js.map +1 -1
  144. package/dist/search/multi-query.d.ts +1 -0
  145. package/dist/search/multi-query.d.ts.map +1 -1
  146. package/dist/search/multi-query.js +13 -0
  147. package/dist/search/multi-query.js.map +1 -1
  148. package/dist/search/rerank.d.ts +3 -2
  149. package/dist/search/rerank.d.ts.map +1 -1
  150. package/dist/search/rerank.js +16 -44
  151. package/dist/search/rerank.js.map +1 -1
  152. package/dist/search/reranker/download.d.ts +9 -0
  153. package/dist/search/reranker/download.d.ts.map +1 -0
  154. package/dist/search/reranker/download.js +77 -0
  155. package/dist/search/reranker/download.js.map +1 -0
  156. package/dist/search/reranker/models.d.ts +14 -0
  157. package/dist/search/reranker/models.d.ts.map +1 -0
  158. package/dist/search/reranker/models.js +37 -0
  159. package/dist/search/reranker/models.js.map +1 -0
  160. package/dist/search/reranker/onnx.d.ts +13 -0
  161. package/dist/search/reranker/onnx.d.ts.map +1 -0
  162. package/dist/search/reranker/onnx.js +70 -0
  163. package/dist/search/reranker/onnx.js.map +1 -0
  164. package/dist/search/reranker/recency-boost.d.ts +3 -0
  165. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  166. package/dist/search/reranker/recency-boost.js +12 -0
  167. package/dist/search/reranker/recency-boost.js.map +1 -0
  168. package/dist/search/reranker/recency.d.ts +3 -0
  169. package/dist/search/reranker/recency.d.ts.map +1 -0
  170. package/dist/search/reranker/recency.js +26 -0
  171. package/dist/search/reranker/recency.js.map +1 -0
  172. package/dist/search/reranker/tokenizer.d.ts +30 -0
  173. package/dist/search/reranker/tokenizer.d.ts.map +1 -0
  174. package/dist/search/reranker/tokenizer.js +49 -0
  175. package/dist/search/reranker/tokenizer.js.map +1 -0
  176. package/dist/search/tokens.d.ts +3 -0
  177. package/dist/search/tokens.d.ts.map +1 -0
  178. package/dist/search/tokens.js +38 -0
  179. package/dist/search/tokens.js.map +1 -0
  180. package/dist/search/truncate.d.ts +4 -0
  181. package/dist/search/truncate.d.ts.map +1 -1
  182. package/dist/search/truncate.js +13 -0
  183. package/dist/search/truncate.js.map +1 -1
  184. package/dist/server/tool-schemas.d.ts +503 -0
  185. package/dist/server/tool-schemas.d.ts.map +1 -0
  186. package/dist/server/tool-schemas.js +425 -0
  187. package/dist/server/tool-schemas.js.map +1 -0
  188. package/dist/server.d.ts.map +1 -1
  189. package/dist/server.js +1 -326
  190. package/dist/server.js.map +1 -1
  191. package/dist/tools/agent.d.ts.map +1 -1
  192. package/dist/tools/agent.js +36 -0
  193. package/dist/tools/agent.js.map +1 -1
  194. package/dist/tools/crawl.d.ts.map +1 -1
  195. package/dist/tools/crawl.js +37 -2
  196. package/dist/tools/crawl.js.map +1 -1
  197. package/dist/tools/extract.d.ts.map +1 -1
  198. package/dist/tools/extract.js +19 -3
  199. package/dist/tools/extract.js.map +1 -1
  200. package/dist/tools/fetch.d.ts.map +1 -1
  201. package/dist/tools/fetch.js +44 -7
  202. package/dist/tools/fetch.js.map +1 -1
  203. package/dist/tools/find-similar.d.ts.map +1 -1
  204. package/dist/tools/find-similar.js +32 -1
  205. package/dist/tools/find-similar.js.map +1 -1
  206. package/dist/tools/research.d.ts.map +1 -1
  207. package/dist/tools/research.js +34 -1
  208. package/dist/tools/research.js.map +1 -1
  209. package/dist/tools/search.d.ts.map +1 -1
  210. package/dist/tools/search.js +101 -55
  211. package/dist/tools/search.js.map +1 -1
  212. package/dist/types.d.ts +65 -1
  213. package/dist/types.d.ts.map +1 -1
  214. package/dist/types.js +1 -1
  215. package/dist/types.js.map +1 -1
  216. package/dist/util/mode.d.ts +4 -0
  217. package/dist/util/mode.d.ts.map +1 -0
  218. package/dist/util/mode.js +13 -0
  219. package/dist/util/mode.js.map +1 -0
  220. package/package.json +9 -1
  221. package/dist/search/flashrank.d.ts +0 -12
  222. package/dist/search/flashrank.d.ts.map +0 -1
  223. package/dist/search/flashrank.js +0 -64
  224. package/dist/search/flashrank.js.map +0 -1
@@ -0,0 +1,35 @@
1
+ import { GoogleGenAI } from '@google/genai';
2
+ const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
3
+ export async function callGemini(opts, apiKey) {
4
+ const client = new GoogleGenAI({ apiKey });
5
+ const model = opts.modelOverride ?? DEFAULT_MODEL;
6
+ const start = Date.now();
7
+ const response = await client.models.generateContent({
8
+ model,
9
+ contents: opts.prompt,
10
+ config: {
11
+ responseMimeType: 'application/json',
12
+ responseJsonSchema: opts.jsonSchema,
13
+ abortSignal: opts.signal,
14
+ },
15
+ });
16
+ const text = response.text;
17
+ if (!text) {
18
+ throw new Error('gemini: empty text in response');
19
+ }
20
+ let values;
21
+ try {
22
+ values = JSON.parse(text);
23
+ }
24
+ catch (e) {
25
+ throw new Error(`gemini: invalid JSON in response: ${e.message}`);
26
+ }
27
+ return {
28
+ values,
29
+ provider: 'gemini',
30
+ model,
31
+ cached: false,
32
+ latencyMs: Date.now() - start,
33
+ };
34
+ }
35
+ //# sourceMappingURL=gemini.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini.js","sourceRoot":"","sources":["../../../src/extraction/llm/gemini.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,MAAM,aAAa,GAAG,uBAAuB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC;QACnD,KAAK;QACL,QAAQ,EAAE,IAAI,CAAC,MAAM;QACrB,MAAM,EAAE;YACN,gBAAgB,EAAE,kBAAkB;YACpC,kBAAkB,EAAE,IAAI,CAAC,UAAU;YACnC,WAAW,EAAE,IAAI,CAAC,MAAM;SACzB;KACF,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IAC3B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,qCAAsC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,QAAQ;QAClB,KAAK;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { LLMCallOpts, LLMExtractResult } from './types.js';
2
+ export declare function callGroq(opts: LLMCallOpts, apiKey: string): Promise<LLMExtractResult>;
3
+ //# sourceMappingURL=groq.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"groq.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/groq.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAKhE,wBAAsB,QAAQ,CAC5B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
@@ -0,0 +1,63 @@
1
+ import Groq from 'groq-sdk';
2
+ import { validateAgainstSchema } from './validate.js';
3
+ const DEFAULT_MODEL = 'llama-3.3-70b-versatile';
4
+ export async function callGroq(opts, apiKey) {
5
+ const client = new Groq({ apiKey });
6
+ const model = opts.modelOverride ?? DEFAULT_MODEL;
7
+ const start = Date.now();
8
+ const messages = [
9
+ { role: 'user', content: buildPrompt(opts.prompt, opts.jsonSchema) },
10
+ ];
11
+ const first = await runOnce(client, model, messages, opts.signal);
12
+ let errors = validateAgainstSchema(first.values, opts.jsonSchema);
13
+ if (errors.length === 0) {
14
+ return done(first.values, first.responseModel ?? model, start);
15
+ }
16
+ // Retry once with validation errors fed back to the model.
17
+ messages.push({ role: 'assistant', content: first.raw });
18
+ messages.push({ role: 'user', content: retryPrompt(errors) });
19
+ const second = await runOnce(client, model, messages, opts.signal);
20
+ errors = validateAgainstSchema(second.values, opts.jsonSchema);
21
+ if (errors.length > 0) {
22
+ throw new Error(`groq: response failed schema validation after retry: ${formatErrors(errors)}`);
23
+ }
24
+ return done(second.values, second.responseModel ?? model, start);
25
+ }
26
+ async function runOnce(client, model, messages, signal) {
27
+ const response = await client.chat.completions.create({
28
+ model,
29
+ messages,
30
+ response_format: { type: 'json_object' },
31
+ }, { signal });
32
+ const content = response.choices?.[0]?.message?.content;
33
+ if (!content) {
34
+ throw new Error('groq: empty content in response');
35
+ }
36
+ let values;
37
+ try {
38
+ values = JSON.parse(content);
39
+ }
40
+ catch (e) {
41
+ throw new Error(`groq: invalid JSON in response: ${e.message}`);
42
+ }
43
+ return { values, raw: content, responseModel: response.model };
44
+ }
45
+ function buildPrompt(prompt, schema) {
46
+ return `${prompt}\n\nReturn JSON matching this schema:\n${JSON.stringify(schema)}`;
47
+ }
48
+ function retryPrompt(errors) {
49
+ return `Your previous response failed schema validation:\n${formatErrors(errors)}\nReturn corrected JSON only.`;
50
+ }
51
+ function formatErrors(errors) {
52
+ return errors.map((e) => `${e.path}: ${e.message}`).join('; ');
53
+ }
54
+ function done(values, model, start) {
55
+ return {
56
+ values,
57
+ provider: 'groq',
58
+ model,
59
+ cached: false,
60
+ latencyMs: Date.now() - start,
61
+ };
62
+ }
63
+ //# sourceMappingURL=groq.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"groq.js","sourceRoot":"","sources":["../../../src/extraction/llm/groq.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,UAAU,CAAC;AAE5B,OAAO,EAAE,qBAAqB,EAAwB,MAAM,eAAe,CAAC;AAE5E,MAAM,aAAa,GAAG,yBAAyB,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAA2D;QACvE,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE;KACrE,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAClE,IAAI,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAClE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK,EAAE,KAAK,CAAC,CAAC;IACjE,CAAC;IAED,2DAA2D;IAC3D,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;IACzD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAE9D,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACnE,MAAM,GAAG,qBAAqB,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACb,wDAAwD,YAAY,CAAC,MAAM,CAAC,EAAE,CAC/E,CAAC;IACJ,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,aAAa,IAAI,KAAK,EAAE,KAAK,CAAC,CAAC;AACnE,CAAC;AAQD,KAAK,UAAU,OAAO,CACpB,MAAY,EACZ,KAAa,EACb,QAAgE,EAChE,MAA+B;IAE/B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CACnD;QACE,KAAK;QACL,QAAQ;QACR,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;KACzC,EACD,EAAE,MAAM,EAAE,CACX,CAAC;IACF,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;IACxD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,mCAAoC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC;AACjE,CAAC;AAED,SAAS,WAAW,CAAC,MAAc,EAAE,MAA+B;IAClE,OAAO,GAAG,MAAM,0CAA0C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,SAAS,WAAW,CAAC,MAAyB;IAC5C,OAAO,qDAAqD,YAAY,CAAC,MAAM,CAAC,+BAA+B,CAAC;AAClH,CAAC;AAED,SAAS,YAAY,CAAC,MAAyB;IAC7C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjE,CAAC;AAED,SAAS,IAAI,CACX,MAA+B,EAC/B,KAAa,EACb,KAAa;IAEb,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function hashPrompt(prompt: string): string;
2
+ export declare function hashSchema(schema: unknown): string;
3
+ //# sourceMappingURL=hash.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/hash.ts"],"names":[],"mappings":"AAEA,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAGjD;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE,OAAO,GAAG,MAAM,CAElD"}
@@ -0,0 +1,22 @@
1
+ import { createHash } from 'node:crypto';
2
+ export function hashPrompt(prompt) {
3
+ const normalized = prompt.replace(/\s+/g, ' ').trim();
4
+ return createHash('sha256').update(normalized).digest('hex');
5
+ }
6
+ export function hashSchema(schema) {
7
+ return createHash('sha256').update(stableStringify(schema)).digest('hex');
8
+ }
9
+ function stableStringify(value) {
10
+ if (value === null || typeof value !== 'object') {
11
+ return JSON.stringify(value);
12
+ }
13
+ if (Array.isArray(value)) {
14
+ return '[' + value.map(stableStringify).join(',') + ']';
15
+ }
16
+ const keys = Object.keys(value).sort();
17
+ const parts = keys.map((k) => JSON.stringify(k) +
18
+ ':' +
19
+ stableStringify(value[k]));
20
+ return '{' + parts.join(',') + '}';
21
+ }
22
+ //# sourceMappingURL=hash.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hash.js","sourceRoot":"","sources":["../../../src/extraction/llm/hash.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,MAAM,UAAU,UAAU,CAAC,MAAc;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAe;IACxC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IAC1D,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,KAAgC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CACpB,CAAC,CAAC,EAAE,EAAE,CACJ,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACjB,GAAG;QACH,eAAe,CAAE,KAAiC,CAAC,CAAC,CAAC,CAAC,CACzD,CAAC;IACF,OAAO,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;AACrC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { LLMCallOpts, LLMExtractResult } from './types.js';
2
+ export declare function callOpenAI(opts: LLMCallOpts, apiKey: string): Promise<LLMExtractResult>;
3
+ //# sourceMappingURL=openai.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/openai.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAIhE,wBAAsB,UAAU,CAC9B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,gBAAgB,CAAC,CAwC3B"}
@@ -0,0 +1,38 @@
1
+ import OpenAI from 'openai';
2
+ const DEFAULT_MODEL = 'gpt-4o-mini';
3
+ export async function callOpenAI(opts, apiKey) {
4
+ const client = new OpenAI({ apiKey });
5
+ const model = opts.modelOverride ?? DEFAULT_MODEL;
6
+ const start = Date.now();
7
+ const response = await client.chat.completions.create({
8
+ model,
9
+ messages: [{ role: 'user', content: opts.prompt }],
10
+ response_format: {
11
+ type: 'json_schema',
12
+ json_schema: {
13
+ name: 'extract',
14
+ schema: opts.jsonSchema,
15
+ strict: true,
16
+ },
17
+ },
18
+ }, { signal: opts.signal });
19
+ const content = response.choices?.[0]?.message?.content;
20
+ if (!content) {
21
+ throw new Error('openai: empty content in response');
22
+ }
23
+ let values;
24
+ try {
25
+ values = JSON.parse(content);
26
+ }
27
+ catch (e) {
28
+ throw new Error(`openai: invalid JSON in response: ${e.message}`);
29
+ }
30
+ return {
31
+ values,
32
+ provider: 'openai',
33
+ model: response.model ?? model,
34
+ cached: false,
35
+ latencyMs: Date.now() - start,
36
+ };
37
+ }
38
+ //# sourceMappingURL=openai.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../src/extraction/llm/openai.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAG5B,MAAM,aAAa,GAAG,aAAa,CAAC;AAEpC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CACnD;QACE,KAAK;QACL,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;QAClD,eAAe,EAAE;YACf,IAAI,EAAE,aAAa;YACnB,WAAW,EAAE;gBACX,IAAI,EAAE,SAAS;gBACf,MAAM,EAAE,IAAI,CAAC,UAAU;gBACvB,MAAM,EAAE,IAAI;aACb;SACF;KACF,EACD,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CACxB,CAAC;IAEF,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;IACxD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,qCAAsC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,KAAK;QAC9B,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { LLMProvider } from './types.js';
2
+ export declare function selectProvider(env: Record<string, string | undefined>): LLMProvider | null;
3
+ export declare function providerEnvVar(p: LLMProvider): string;
4
+ export declare function allProviders(): readonly LLMProvider[];
5
+ //# sourceMappingURL=select.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/select.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAW9C,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,GACtC,WAAW,GAAG,IAAI,CAUpB;AAED,wBAAgB,cAAc,CAAC,CAAC,EAAE,WAAW,GAAG,MAAM,CAErD;AAED,wBAAgB,YAAY,IAAI,SAAS,WAAW,EAAE,CAErD"}
@@ -0,0 +1,27 @@
1
+ const PROVIDER_ORDER = ['anthropic', 'openai', 'gemini', 'groq'];
2
+ const PROVIDER_ENV = {
3
+ anthropic: 'ANTHROPIC_API_KEY',
4
+ openai: 'OPENAI_API_KEY',
5
+ gemini: 'GOOGLE_API_KEY',
6
+ groq: 'GROQ_API_KEY',
7
+ };
8
+ export function selectProvider(env) {
9
+ const override = env.WIGOLO_LLM_PROVIDER;
10
+ if (override && PROVIDER_ORDER.includes(override)) {
11
+ const p = override;
12
+ if (env[PROVIDER_ENV[p]])
13
+ return p;
14
+ }
15
+ for (const p of PROVIDER_ORDER) {
16
+ if (env[PROVIDER_ENV[p]])
17
+ return p;
18
+ }
19
+ return null;
20
+ }
21
+ export function providerEnvVar(p) {
22
+ return PROVIDER_ENV[p];
23
+ }
24
+ export function allProviders() {
25
+ return PROVIDER_ORDER;
26
+ }
27
+ //# sourceMappingURL=select.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"select.js","sourceRoot":"","sources":["../../../src/extraction/llm/select.ts"],"names":[],"mappings":"AAEA,MAAM,cAAc,GAAkB,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;AAEhF,MAAM,YAAY,GAAgC;IAChD,SAAS,EAAE,mBAAmB;IAC9B,MAAM,EAAE,gBAAgB;IACxB,MAAM,EAAE,gBAAgB;IACxB,IAAI,EAAE,cAAc;CACrB,CAAC;AAEF,MAAM,UAAU,cAAc,CAC5B,GAAuC;IAEvC,MAAM,QAAQ,GAAG,GAAG,CAAC,mBAAmB,CAAC;IACzC,IAAI,QAAQ,IAAK,cAA2B,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChE,MAAM,CAAC,GAAG,QAAuB,CAAC;QAClC,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,CAAc;IAC3C,OAAO,YAAY,CAAC,CAAC,CAAC,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,YAAY;IAC1B,OAAO,cAAc,CAAC;AACxB,CAAC"}
@@ -0,0 +1,24 @@
1
+ export type LLMProvider = 'anthropic' | 'openai' | 'gemini' | 'groq';
2
+ export interface LLMExtractResult {
3
+ values: Record<string, unknown>;
4
+ provider: LLMProvider;
5
+ model: string;
6
+ cached: boolean;
7
+ latencyMs: number;
8
+ warnings?: string[];
9
+ }
10
+ export interface LLMCallRecord {
11
+ modelId: string;
12
+ promptHash: string;
13
+ schemaHash: string;
14
+ response: string;
15
+ createdAt: number;
16
+ expiresAt: number;
17
+ }
18
+ export interface LLMCallOpts {
19
+ prompt: string;
20
+ jsonSchema: Record<string, unknown>;
21
+ modelOverride?: string;
22
+ signal?: AbortSignal;
23
+ }
24
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;AAErE,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/extraction/llm/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,6 @@
1
+ export interface ValidationError {
2
+ path: string;
3
+ message: string;
4
+ }
5
+ export declare function validateAgainstSchema(value: unknown, schema: Record<string, unknown>): ValidationError[];
6
+ //# sourceMappingURL=validate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/validate.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AASD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,OAAO,EACd,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,eAAe,EAAE,CAInB"}
@@ -0,0 +1,63 @@
1
+ // Minimal recursive JSON Schema validator: required + type checks.
2
+ // Sufficient for post-hoc validation of provider responses where the SDK
3
+ // does not enforce a schema natively (e.g. Groq json_object).
4
+ export function validateAgainstSchema(value, schema) {
5
+ const errors = [];
6
+ walk(value, schema, '$', errors);
7
+ return errors;
8
+ }
9
+ function walk(value, schema, path, errors) {
10
+ if (schema.type) {
11
+ const types = Array.isArray(schema.type) ? schema.type : [schema.type];
12
+ if (!types.some((t) => matchesType(value, t))) {
13
+ errors.push({
14
+ path,
15
+ message: `expected type ${types.join('|')} but got ${actualType(value)}`,
16
+ });
17
+ return;
18
+ }
19
+ }
20
+ if (schema.type === 'object' && value && typeof value === 'object') {
21
+ const obj = value;
22
+ for (const req of schema.required ?? []) {
23
+ if (obj[req] === undefined) {
24
+ errors.push({ path: `${path}.${req}`, message: 'required' });
25
+ }
26
+ }
27
+ for (const [k, sub] of Object.entries(schema.properties ?? {})) {
28
+ if (obj[k] !== undefined) {
29
+ walk(obj[k], sub, `${path}.${k}`, errors);
30
+ }
31
+ }
32
+ }
33
+ if (schema.type === 'array' && Array.isArray(value) && schema.items) {
34
+ value.forEach((item, i) => walk(item, schema.items, `${path}[${i}]`, errors));
35
+ }
36
+ }
37
+ function matchesType(value, type) {
38
+ switch (type) {
39
+ case 'string':
40
+ return typeof value === 'string';
41
+ case 'number':
42
+ case 'integer':
43
+ return typeof value === 'number';
44
+ case 'boolean':
45
+ return typeof value === 'boolean';
46
+ case 'null':
47
+ return value === null;
48
+ case 'array':
49
+ return Array.isArray(value);
50
+ case 'object':
51
+ return value !== null && typeof value === 'object' && !Array.isArray(value);
52
+ default:
53
+ return true;
54
+ }
55
+ }
56
+ function actualType(value) {
57
+ if (value === null)
58
+ return 'null';
59
+ if (Array.isArray(value))
60
+ return 'array';
61
+ return typeof value;
62
+ }
63
+ //# sourceMappingURL=validate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validate.js","sourceRoot":"","sources":["../../../src/extraction/llm/validate.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,yEAAyE;AACzE,8DAA8D;AAc9D,MAAM,UAAU,qBAAqB,CACnC,KAAc,EACd,MAA+B;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,MAAqB,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IAChD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,IAAI,CACX,KAAc,EACd,MAAmB,EACnB,IAAY,EACZ,MAAyB;IAEzB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACvE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9C,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI;gBACJ,OAAO,EAAE,iBAAiB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,UAAU,CAAC,KAAK,CAAC,EAAE;aACzE,CAAC,CAAC;YACH,OAAO;QACT,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACnE,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;YACxC,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,IAAI,IAAI,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/D,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,IAAI,IAAI,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACpE,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACxB,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,KAAoB,EAAE,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CACjE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,KAAc,EAAE,IAAY;IAC/C,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC;QACnC,KAAK,QAAQ,CAAC;QACd,KAAK,SAAS;YACZ,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC;QACnC,KAAK,SAAS;YACZ,OAAO,OAAO,KAAK,KAAK,SAAS,CAAC;QACpC,KAAK,MAAM;YACT,OAAO,KAAK,KAAK,IAAI,CAAC;QACxB,KAAK,OAAO;YACV,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC9B,KAAK,QAAQ;YACX,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC9E;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,MAAM,CAAC;IAClC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACzC,OAAO,OAAO,KAAK,CAAC;AACtB,CAAC"}
@@ -0,0 +1,17 @@
1
+ import type { LLMExtractResult } from './llm/types.js';
2
+ export interface LLMFallbackBudget {
3
+ remaining: number;
4
+ }
5
+ export interface LLMFallbackInput {
6
+ html: string;
7
+ jsonSchema: Record<string, unknown>;
8
+ partial: Record<string, unknown>;
9
+ missing: string[];
10
+ signal?: AbortSignal;
11
+ budget?: LLMFallbackBudget;
12
+ }
13
+ export interface LLMFallbackResult extends LLMExtractResult {
14
+ warnings: string[];
15
+ }
16
+ export declare function extractWithLLM(input: LLMFallbackInput): Promise<LLMFallbackResult>;
17
+ //# sourceMappingURL=llm-fallback.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-fallback.d.ts","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,gBAAgB,EAAe,MAAM,gBAAgB,CAAC;AAKpE,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAED,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAeD,wBAAsB,cAAc,CAClC,KAAK,EAAE,gBAAgB,GACtB,OAAO,CAAC,iBAAiB,CAAC,CAsF5B"}
@@ -0,0 +1,129 @@
1
+ import { getConfig } from '../config.js';
2
+ import { callAnthropic } from './llm/anthropic.js';
3
+ import { callOpenAI } from './llm/openai.js';
4
+ import { callGemini } from './llm/gemini.js';
5
+ import { callGroq } from './llm/groq.js';
6
+ import { ensureLLMCacheTable, insertLLMCache, lookupLLMCache, } from './llm/cache.js';
7
+ import { hashPrompt, hashSchema } from './llm/hash.js';
8
+ import { allProviders, providerEnvVar, selectProvider } from './llm/select.js';
9
+ import { validateAgainstSchema } from './llm/validate.js';
10
+ const MAX_HTML_BYTES = 50_000;
11
+ const ADAPTERS = {
12
+ anthropic: callAnthropic,
13
+ openai: callOpenAI,
14
+ gemini: callGemini,
15
+ groq: callGroq,
16
+ };
17
+ export async function extractWithLLM(input) {
18
+ if (input.missing.length === 0) {
19
+ return emptyResult(input.partial, []);
20
+ }
21
+ const cfg = getConfig();
22
+ const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };
23
+ if (budget.remaining <= 0) {
24
+ return emptyResult(input.partial, [
25
+ `LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`,
26
+ ]);
27
+ }
28
+ const provider = selectProvider(process.env);
29
+ if (!provider) {
30
+ const envList = allProviders()
31
+ .map((p) => providerEnvVar(p))
32
+ .join(', ');
33
+ return emptyResult(input.partial, [
34
+ `LLM fallback skipped: no provider key set (${envList}). ` +
35
+ `${input.missing.length} required field(s) still missing: ${input.missing.join(', ')}.`,
36
+ ]);
37
+ }
38
+ const apiKey = process.env[providerEnvVar(provider)];
39
+ const prompt = buildPrompt(input);
40
+ const promptHash = hashPrompt(prompt);
41
+ const schemaHash = hashSchema(input.jsonSchema);
42
+ const modelId = `${provider}:default`;
43
+ ensureLLMCacheTable();
44
+ const cached = lookupLLMCache(modelId, promptHash, schemaHash);
45
+ if (cached) {
46
+ const values = JSON.parse(cached);
47
+ return {
48
+ values: mergeOnlyMissing(input.partial, values, input.missing),
49
+ provider,
50
+ model: modelId,
51
+ cached: true,
52
+ latencyMs: 0,
53
+ warnings: [],
54
+ };
55
+ }
56
+ let result;
57
+ try {
58
+ result = await ADAPTERS[provider]({ prompt, jsonSchema: input.jsonSchema, signal: input.signal }, apiKey);
59
+ }
60
+ catch (e) {
61
+ return emptyResult(input.partial, [
62
+ `LLM fallback (${provider}) failed: ${e.message}`,
63
+ ]);
64
+ }
65
+ finally {
66
+ budget.remaining = Math.max(0, budget.remaining - 1);
67
+ }
68
+ const errors = validateAgainstSchema(result.values, input.jsonSchema);
69
+ if (errors.length > 0) {
70
+ return emptyResult(input.partial, [
71
+ `LLM fallback (${provider}) response failed schema validation: ${errors
72
+ .map((e) => `${e.path} ${e.message}`)
73
+ .join('; ')}`,
74
+ ]);
75
+ }
76
+ const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1000;
77
+ const now = Date.now();
78
+ insertLLMCache({
79
+ modelId,
80
+ promptHash,
81
+ schemaHash,
82
+ response: JSON.stringify(result.values),
83
+ createdAt: now,
84
+ expiresAt: now + ttlMs,
85
+ });
86
+ return {
87
+ values: mergeOnlyMissing(input.partial, result.values, input.missing),
88
+ provider,
89
+ model: result.model,
90
+ cached: false,
91
+ latencyMs: result.latencyMs,
92
+ warnings: result.warnings ?? [],
93
+ };
94
+ }
95
+ function emptyResult(partial, warnings) {
96
+ return {
97
+ values: { ...partial },
98
+ provider: 'anthropic',
99
+ model: '',
100
+ cached: false,
101
+ latencyMs: 0,
102
+ warnings,
103
+ };
104
+ }
105
+ function mergeOnlyMissing(partial, filled, missing) {
106
+ const out = { ...partial };
107
+ for (const key of missing) {
108
+ if (filled[key] !== undefined)
109
+ out[key] = filled[key];
110
+ }
111
+ return out;
112
+ }
113
+ function buildPrompt(input) {
114
+ const html = truncate(input.html, MAX_HTML_BYTES);
115
+ return [
116
+ 'Extract the following missing fields from the HTML below.',
117
+ `Missing fields: ${input.missing.join(', ')}.`,
118
+ 'Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.',
119
+ '',
120
+ 'HTML:',
121
+ html,
122
+ ].join('\n');
123
+ }
124
+ function truncate(s, maxBytes) {
125
+ if (s.length <= maxBytes)
126
+ return s;
127
+ return s.slice(0, maxBytes);
128
+ }
129
+ //# sourceMappingURL=llm-fallback.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-fallback.js","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,cAAc,GACf,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAE/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,cAAc,GAAG,MAAM,CAAC;AAmB9B,MAAM,QAAQ,GAMV;IACF,SAAS,EAAE,aAAa;IACxB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,UAAU;IAClB,IAAI,EAAE,QAAQ;CACf,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAuB;IAEvB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;IACxB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,EAAE,SAAS,EAAE,GAAG,CAAC,qBAAqB,EAAE,CAAC;IACxE,IAAI,MAAM,CAAC,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,2DAA2D,GAAG,CAAC,qBAAqB,mDAAmD;SACxI,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,OAAO,GAAG,YAAY,EAAE;aAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;aAC7B,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,8CAA8C,OAAO,KAAK;gBACxD,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,qCAAqC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;SAC1F,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAW,CAAC;IAC/D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,GAAG,QAAQ,UAAU,CAAC;IAEtC,mBAAmB,EAAE,CAAC;IACtB,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;IAC/D,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAA4B,CAAC;QAC7D,OAAO;YACL,MAAM,EAAE,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC;YAC9D,QAAQ;YACR,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,IAAI;YACZ,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,EAAE;SACb,CAAC;IACJ,CAAC;IAED,IAAI,MAAwB,CAAC;IAC7B,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAC/B,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,EAC9D,MAAM,CACP,CAAC;IACJ,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,iBAAiB,QAAQ,aAAc,CAAW,CAAC,OAAO,EAAE;SAC7D,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IACtE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,iBAAiB,QAAQ,wCAAwC,MAAM;iBACpE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;iBACpC,IAAI,CAAC,IAAI,CAAC,EAAE;SAChB,CAAC,CAAC;IACL,CAAC;IAED,MAAM,KAAK,GAAG,GAAG,CAAC,eAAe,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,cAAc,CAAC;QACb,OAAO;QACP,UAAU;QACV,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC;QACvC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG,GAAG,KAAK;KACvB,CAAC,CAAC;IAEH,OAAO;QACL,MAAM,EAAE,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC;QACrE,QAAQ;QACR,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;KAChC,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAClB,OAAgC,EAChC,QAAkB;IAElB,OAAO;QACL,MAAM,EAAE,EAAE,GAAG,OAAO,EAAE;QACtB,QAAQ,EAAE,WAAW;QACrB,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,CAAC;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAgC,EAChC,MAA+B,EAC/B,OAAiB;IAEjB,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,SAAS;YAAE,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,WAAW,CAAC,KAAuB;IAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;IAClD,OAAO;QACL,2DAA2D;QAC3D,mBAAmB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QAC9C,iHAAiH;QACjH,EAAE;QACF,OAAO;QACP,IAAI;KACL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS,EAAE,QAAgB;IAC3C,IAAI,CAAC,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,CAAC;IACnC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;AAC9B,CAAC"}
@@ -1,4 +1,13 @@
1
+ import TurndownService from 'turndown';
2
+ export declare function buildTurndown(): TurndownService;
1
3
  export declare function htmlToMarkdown(html: string): string;
4
+ export interface Heading {
5
+ level: number;
6
+ text: string;
7
+ lineIndex: number;
8
+ }
9
+ export declare function parseHeadings(lines: string[]): Heading[];
10
+ export declare function lineStartCharOffsets(lines: string[]): number[];
2
11
  export declare function extractSection(markdown: string, section: string, sectionIndex?: number): {
3
12
  content: string;
4
13
  matched: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAkC7E"}
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAiBvC,wBAAgB,aAAa,IAAI,eAAe,CA2D/C;AAID,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,EAAE,CASxD;AAID,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQ9D;AAkBD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAyC7E"}
@@ -1,5 +1,21 @@
1
1
  import TurndownService from 'turndown';
2
- function buildTurndown() {
2
+ import { detectCodeLanguage } from './lang-hints.js';
3
+ function longestBacktickRun(s) {
4
+ let max = 0;
5
+ let cur = 0;
6
+ for (let i = 0; i < s.length; i++) {
7
+ if (s.charCodeAt(i) === 96) {
8
+ cur++;
9
+ if (cur > max)
10
+ max = cur;
11
+ }
12
+ else {
13
+ cur = 0;
14
+ }
15
+ }
16
+ return max;
17
+ }
18
+ export function buildTurndown() {
3
19
  const td = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
4
20
  // Remove script and style tags entirely
5
21
  td.remove(['script', 'style']);
@@ -35,6 +51,20 @@ function buildTurndown() {
35
51
  return content;
36
52
  },
37
53
  });
54
+ td.addRule('codeBlockLang', {
55
+ filter(node) {
56
+ return node.nodeName === 'PRE' && node.querySelector('code') !== null;
57
+ },
58
+ replacement(_content, node) {
59
+ const pre = node;
60
+ const code = pre.querySelector('code');
61
+ const cls = code?.getAttribute('class') ?? pre.getAttribute('class') ?? '';
62
+ const lang = detectCodeLanguage(cls);
63
+ const body = code?.textContent ?? pre.textContent ?? '';
64
+ const fence = '`'.repeat(Math.max(3, longestBacktickRun(body) + 1));
65
+ return `\n\n${fence}${lang ?? ''}\n${body.replace(/\n+$/, '')}\n${fence}\n\n`;
66
+ },
67
+ });
38
68
  return td;
39
69
  }
40
70
  const turndown = buildTurndown();
@@ -43,7 +73,7 @@ export function htmlToMarkdown(html) {
43
73
  return '';
44
74
  return turndown.turndown(html);
45
75
  }
46
- function parseHeadings(lines) {
76
+ export function parseHeadings(lines) {
47
77
  const headings = [];
48
78
  for (let i = 0; i < lines.length; i++) {
49
79
  const match = lines[i].match(/^(#{1,6})\s+(.+)/);
@@ -53,6 +83,17 @@ function parseHeadings(lines) {
53
83
  }
54
84
  return headings;
55
85
  }
86
+ // Prefix-sum array of char offsets: offsets[i] is the index in
87
+ // `lines.join('\n')` at which lines[i] begins.
88
+ export function lineStartCharOffsets(lines) {
89
+ const offsets = new Array(lines.length);
90
+ let acc = 0;
91
+ for (let i = 0; i < lines.length; i++) {
92
+ offsets[i] = acc;
93
+ acc += lines[i].length + 1; // +1 for the '\n' separator
94
+ }
95
+ return offsets;
96
+ }
56
97
  function extractFromHeading(lines, headings, headingIdx) {
57
98
  const heading = headings[headingIdx];
58
99
  const start = heading.lineIndex;
@@ -151,8 +192,16 @@ export function resolveRelativeUrls(markdown, baseUrl) {
151
192
  const trimmed = path.trim();
152
193
  if (!trimmed)
153
194
  return path;
154
- if (/^(?:https?:|mailto:|tel:|javascript:|data:|#)/i.test(trimmed))
195
+ if (/^(?:https?:|mailto:|tel:|javascript:|data:)/i.test(trimmed))
155
196
  return path;
197
+ if (trimmed.startsWith('#')) {
198
+ try {
199
+ return new URL(trimmed, baseUrl).href;
200
+ }
201
+ catch {
202
+ return path;
203
+ }
204
+ }
156
205
  if (trimmed.startsWith('//')) {
157
206
  try {
158
207
  const base = new URL(baseUrl);