@staticn0va/wigolo 0.6.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -48
- package/SKILL.md +22 -22
- package/assets/skills/wigolo/rules/cache-first.md +1 -1
- package/assets/skills/wigolo/rules/synthesis.md +1 -1
- package/assets/skills/wigolo-fetch/SKILL.md +1 -1
- package/assets/skills/wigolo-find-similar/SKILL.md +2 -2
- package/assets/skills/wigolo-search/SKILL.md +3 -3
- package/dist/cache/store.d.ts +9 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +30 -4
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/doctor.d.ts +3 -3
- package/dist/cli/doctor.d.ts.map +1 -1
- package/dist/cli/doctor.js +67 -13
- package/dist/cli/doctor.js.map +1 -1
- package/dist/cli/health.js +1 -1
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/status.js +1 -1
- package/dist/cli/status.js.map +1 -1
- package/dist/cli/tui/hooks/useInstall.js +2 -2
- package/dist/cli/tui/hooks/useInstall.js.map +1 -1
- package/dist/cli/tui/hooks/useVerify.js +3 -3
- package/dist/cli/tui/hooks/useVerify.js.map +1 -1
- package/dist/cli/tui/status-format.d.ts +1 -1
- package/dist/cli/tui/status-format.d.ts.map +1 -1
- package/dist/cli/tui/status-format.js +5 -5
- package/dist/cli/tui/status-format.js.map +1 -1
- package/dist/cli/tui/status-python.d.ts +1 -1
- package/dist/cli/tui/status-python.d.ts.map +1 -1
- package/dist/cli/tui/status-python.js +17 -1
- package/dist/cli/tui/status-python.js.map +1 -1
- package/dist/cli/tui/verify-suggestions.d.ts +1 -1
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -1
- package/dist/cli/tui/verify-suggestions.js +5 -5
- package/dist/cli/tui/verify-suggestions.js.map +1 -1
- package/dist/cli/tui/verify.d.ts +2 -2
- package/dist/cli/tui/verify.d.ts.map +1 -1
- package/dist/cli/tui/verify.js +34 -8
- package/dist/cli/tui/verify.js.map +1 -1
- package/dist/cli/uninstall.js +2 -2
- package/dist/cli/uninstall.js.map +1 -1
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +29 -25
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +6 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +15 -2
- package/dist/config.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +47 -1
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +49 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +7 -3
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/jsonld.js +1 -1
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +28 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm/anthropic.d.ts +3 -0
- package/dist/extraction/llm/anthropic.d.ts.map +1 -0
- package/dist/extraction/llm/anthropic.js +33 -0
- package/dist/extraction/llm/anthropic.js.map +1 -0
- package/dist/extraction/llm/cache.d.ts +5 -0
- package/dist/extraction/llm/cache.d.ts.map +1 -0
- package/dist/extraction/llm/cache.js +35 -0
- package/dist/extraction/llm/cache.js.map +1 -0
- package/dist/extraction/llm/gemini.d.ts +3 -0
- package/dist/extraction/llm/gemini.d.ts.map +1 -0
- package/dist/extraction/llm/gemini.js +35 -0
- package/dist/extraction/llm/gemini.js.map +1 -0
- package/dist/extraction/llm/groq.d.ts +3 -0
- package/dist/extraction/llm/groq.d.ts.map +1 -0
- package/dist/extraction/llm/groq.js +63 -0
- package/dist/extraction/llm/groq.js.map +1 -0
- package/dist/extraction/llm/hash.d.ts +3 -0
- package/dist/extraction/llm/hash.d.ts.map +1 -0
- package/dist/extraction/llm/hash.js +22 -0
- package/dist/extraction/llm/hash.js.map +1 -0
- package/dist/extraction/llm/openai.d.ts +3 -0
- package/dist/extraction/llm/openai.d.ts.map +1 -0
- package/dist/extraction/llm/openai.js +38 -0
- package/dist/extraction/llm/openai.js.map +1 -0
- package/dist/extraction/llm/select.d.ts +5 -0
- package/dist/extraction/llm/select.d.ts.map +1 -0
- package/dist/extraction/llm/select.js +27 -0
- package/dist/extraction/llm/select.js.map +1 -0
- package/dist/extraction/llm/types.d.ts +24 -0
- package/dist/extraction/llm/types.d.ts.map +1 -0
- package/dist/extraction/llm/types.js +2 -0
- package/dist/extraction/llm/types.js.map +1 -0
- package/dist/extraction/llm/validate.d.ts +6 -0
- package/dist/extraction/llm/validate.d.ts.map +1 -0
- package/dist/extraction/llm/validate.js +63 -0
- package/dist/extraction/llm/validate.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +129 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown.d.ts +9 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +52 -3
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +17 -5
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +2 -3
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +81 -11
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +2 -3
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +4 -5
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +2 -3
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +3 -4
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +203 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/fetch/router.d.ts +2 -1
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +19 -1
- package/dist/fetch/router.js.map +1 -1
- package/dist/instructions.d.ts +8 -8
- package/dist/instructions.d.ts.map +1 -1
- package/dist/instructions.js +48 -41
- package/dist/instructions.js.map +1 -1
- package/dist/logger.d.ts +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/research/brief.js +1 -1
- package/dist/research/brief.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +260 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/highlights.d.ts +11 -2
- package/dist/search/highlights.d.ts.map +1 -1
- package/dist/search/highlights.js +131 -48
- package/dist/search/highlights.js.map +1 -1
- package/dist/search/multi-query.d.ts +1 -0
- package/dist/search/multi-query.d.ts.map +1 -1
- package/dist/search/multi-query.js +13 -0
- package/dist/search/multi-query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -2
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +16 -44
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/download.d.ts +9 -0
- package/dist/search/reranker/download.d.ts.map +1 -0
- package/dist/search/reranker/download.js +77 -0
- package/dist/search/reranker/download.js.map +1 -0
- package/dist/search/reranker/models.d.ts +14 -0
- package/dist/search/reranker/models.d.ts.map +1 -0
- package/dist/search/reranker/models.js +37 -0
- package/dist/search/reranker/models.js.map +1 -0
- package/dist/search/reranker/onnx.d.ts +13 -0
- package/dist/search/reranker/onnx.d.ts.map +1 -0
- package/dist/search/reranker/onnx.js +70 -0
- package/dist/search/reranker/onnx.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +12 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +26 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/tokenizer.d.ts +30 -0
- package/dist/search/reranker/tokenizer.d.ts.map +1 -0
- package/dist/search/reranker/tokenizer.js +49 -0
- package/dist/search/reranker/tokenizer.js.map +1 -0
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +38 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +4 -0
- package/dist/search/truncate.d.ts.map +1 -1
- package/dist/search/truncate.js +13 -0
- package/dist/search/truncate.js.map +1 -1
- package/dist/server/backend-status.js +2 -2
- package/dist/server/backend-status.js.map +1 -1
- package/dist/server/tool-schemas.d.ts +503 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +425 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +14 -339
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts.map +1 -1
- package/dist/tools/agent.js +36 -0
- package/dist/tools/agent.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +37 -2
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +19 -3
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +44 -7
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts.map +1 -1
- package/dist/tools/find-similar.js +32 -1
- package/dist/tools/find-similar.js.map +1 -1
- package/dist/tools/research.d.ts.map +1 -1
- package/dist/tools/research.js +34 -1
- package/dist/tools/research.js.map +1 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +98 -54
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +65 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +13 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +10 -4
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -64
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anthropic.js","sourceRoot":"","sources":["../../../src/extraction/llm/anthropic.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAI1C,MAAM,aAAa,GAAG,kBAAkB,CAAC;AACzC,MAAM,SAAS,GAAG,SAAS,CAAC;AAE5B,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAC3C;QACE,KAAK;QACL,UAAU,EAAE,IAAI;QAChB,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,SAAS;gBACf,WAAW,EAAE,iDAAiD;gBAC9D,YAAY,EAAE,IAAI,CAAC,UAA8B;aAClD;SACF;QACD,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;QAC9C,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;KACnD,EACD,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CACxB,CAAC;IAEF,MAAM,KAAK,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,CACzC,CAAC,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CACmC,CAAC;IAEpF,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO;QACL,MAAM,EAAE,KAAK,CAAC,KAAK;QACnB,QAAQ,EAAE,WAAW;QACrB,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,KAAK;QAC9B,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { LLMCallRecord } from './types.js';
|
|
2
|
+
export declare function ensureLLMCacheTable(): void;
|
|
3
|
+
export declare function lookupLLMCache(modelId: string, promptHash: string, schemaHash: string): string | null;
|
|
4
|
+
export declare function insertLLMCache(rec: LLMCallRecord): void;
|
|
5
|
+
//# sourceMappingURL=cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/cache.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,wBAAgB,mBAAmB,IAAI,IAAI,CAc1C;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,GACjB,MAAM,GAAG,IAAI,CAYf;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,aAAa,GAAG,IAAI,CAiBvD"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { getDatabase } from '../../cache/db.js';
|
|
2
|
+
export function ensureLLMCacheTable() {
|
|
3
|
+
const db = getDatabase();
|
|
4
|
+
db.exec(`
|
|
5
|
+
CREATE TABLE IF NOT EXISTS llm_cache (
|
|
6
|
+
model_id TEXT NOT NULL,
|
|
7
|
+
prompt_hash TEXT NOT NULL,
|
|
8
|
+
schema_hash TEXT NOT NULL,
|
|
9
|
+
response TEXT NOT NULL,
|
|
10
|
+
created_at INTEGER NOT NULL,
|
|
11
|
+
expires_at INTEGER NOT NULL,
|
|
12
|
+
PRIMARY KEY (model_id, prompt_hash, schema_hash)
|
|
13
|
+
);
|
|
14
|
+
CREATE INDEX IF NOT EXISTS idx_llm_cache_expires ON llm_cache(expires_at);
|
|
15
|
+
`);
|
|
16
|
+
}
|
|
17
|
+
export function lookupLLMCache(modelId, promptHash, schemaHash) {
|
|
18
|
+
const db = getDatabase();
|
|
19
|
+
const row = db
|
|
20
|
+
.prepare(`SELECT response FROM llm_cache
|
|
21
|
+
WHERE model_id = ? AND prompt_hash = ? AND schema_hash = ?
|
|
22
|
+
AND expires_at > ?`)
|
|
23
|
+
.get(modelId, promptHash, schemaHash, Date.now());
|
|
24
|
+
return row?.response ?? null;
|
|
25
|
+
}
|
|
26
|
+
export function insertLLMCache(rec) {
|
|
27
|
+
const db = getDatabase();
|
|
28
|
+
db.prepare(`INSERT INTO llm_cache (model_id, prompt_hash, schema_hash, response, created_at, expires_at)
|
|
29
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
30
|
+
ON CONFLICT(model_id, prompt_hash, schema_hash) DO UPDATE SET
|
|
31
|
+
response = excluded.response,
|
|
32
|
+
created_at = excluded.created_at,
|
|
33
|
+
expires_at = excluded.expires_at`).run(rec.modelId, rec.promptHash, rec.schemaHash, rec.response, rec.createdAt, rec.expiresAt);
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../../src/extraction/llm/cache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhD,MAAM,UAAU,mBAAmB;IACjC,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;GAWP,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,OAAe,EACf,UAAkB,EAClB,UAAkB;IAElB,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,EAAE;SACX,OAAO,CACN;;4BAEsB,CACvB;SACA,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,CAErC,CAAC;IACd,OAAO,GAAG,EAAE,QAAQ,IAAI,IAAI,CAAC;AAC/B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,GAAkB;IAC/C,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,EAAE,CAAC,OAAO,CACR;;;;;wCAKoC,CACrC,CAAC,GAAG,CACH,GAAG,CAAC,OAAO,EACX,GAAG,CAAC,UAAU,EACd,GAAG,CAAC,UAAU,EACd,GAAG,CAAC,QAAQ,EACZ,GAAG,CAAC,SAAS,EACb,GAAG,CAAC,SAAS,CACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gemini.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/gemini.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAIhE,wBAAsB,UAAU,CAC9B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,gBAAgB,CAAC,CAkC3B"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { GoogleGenAI } from '@google/genai';
|
|
2
|
+
const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
|
|
3
|
+
export async function callGemini(opts, apiKey) {
|
|
4
|
+
const client = new GoogleGenAI({ apiKey });
|
|
5
|
+
const model = opts.modelOverride ?? DEFAULT_MODEL;
|
|
6
|
+
const start = Date.now();
|
|
7
|
+
const response = await client.models.generateContent({
|
|
8
|
+
model,
|
|
9
|
+
contents: opts.prompt,
|
|
10
|
+
config: {
|
|
11
|
+
responseMimeType: 'application/json',
|
|
12
|
+
responseJsonSchema: opts.jsonSchema,
|
|
13
|
+
abortSignal: opts.signal,
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
const text = response.text;
|
|
17
|
+
if (!text) {
|
|
18
|
+
throw new Error('gemini: empty text in response');
|
|
19
|
+
}
|
|
20
|
+
let values;
|
|
21
|
+
try {
|
|
22
|
+
values = JSON.parse(text);
|
|
23
|
+
}
|
|
24
|
+
catch (e) {
|
|
25
|
+
throw new Error(`gemini: invalid JSON in response: ${e.message}`);
|
|
26
|
+
}
|
|
27
|
+
return {
|
|
28
|
+
values,
|
|
29
|
+
provider: 'gemini',
|
|
30
|
+
model,
|
|
31
|
+
cached: false,
|
|
32
|
+
latencyMs: Date.now() - start,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=gemini.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gemini.js","sourceRoot":"","sources":["../../../src/extraction/llm/gemini.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,MAAM,aAAa,GAAG,uBAAuB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC;QACnD,KAAK;QACL,QAAQ,EAAE,IAAI,CAAC,MAAM;QACrB,MAAM,EAAE;YACN,gBAAgB,EAAE,kBAAkB;YACpC,kBAAkB,EAAE,IAAI,CAAC,UAAU;YACnC,WAAW,EAAE,IAAI,CAAC,MAAM;SACzB;KACF,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IAC3B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,qCAAsC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,QAAQ;QAClB,KAAK;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"groq.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/groq.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAKhE,wBAAsB,QAAQ,CAC5B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import Groq from 'groq-sdk';
|
|
2
|
+
import { validateAgainstSchema } from './validate.js';
|
|
3
|
+
const DEFAULT_MODEL = 'llama-3.3-70b-versatile';
|
|
4
|
+
export async function callGroq(opts, apiKey) {
|
|
5
|
+
const client = new Groq({ apiKey });
|
|
6
|
+
const model = opts.modelOverride ?? DEFAULT_MODEL;
|
|
7
|
+
const start = Date.now();
|
|
8
|
+
const messages = [
|
|
9
|
+
{ role: 'user', content: buildPrompt(opts.prompt, opts.jsonSchema) },
|
|
10
|
+
];
|
|
11
|
+
const first = await runOnce(client, model, messages, opts.signal);
|
|
12
|
+
let errors = validateAgainstSchema(first.values, opts.jsonSchema);
|
|
13
|
+
if (errors.length === 0) {
|
|
14
|
+
return done(first.values, first.responseModel ?? model, start);
|
|
15
|
+
}
|
|
16
|
+
// Retry once with validation errors fed back to the model.
|
|
17
|
+
messages.push({ role: 'assistant', content: first.raw });
|
|
18
|
+
messages.push({ role: 'user', content: retryPrompt(errors) });
|
|
19
|
+
const second = await runOnce(client, model, messages, opts.signal);
|
|
20
|
+
errors = validateAgainstSchema(second.values, opts.jsonSchema);
|
|
21
|
+
if (errors.length > 0) {
|
|
22
|
+
throw new Error(`groq: response failed schema validation after retry: ${formatErrors(errors)}`);
|
|
23
|
+
}
|
|
24
|
+
return done(second.values, second.responseModel ?? model, start);
|
|
25
|
+
}
|
|
26
|
+
async function runOnce(client, model, messages, signal) {
|
|
27
|
+
const response = await client.chat.completions.create({
|
|
28
|
+
model,
|
|
29
|
+
messages,
|
|
30
|
+
response_format: { type: 'json_object' },
|
|
31
|
+
}, { signal });
|
|
32
|
+
const content = response.choices?.[0]?.message?.content;
|
|
33
|
+
if (!content) {
|
|
34
|
+
throw new Error('groq: empty content in response');
|
|
35
|
+
}
|
|
36
|
+
let values;
|
|
37
|
+
try {
|
|
38
|
+
values = JSON.parse(content);
|
|
39
|
+
}
|
|
40
|
+
catch (e) {
|
|
41
|
+
throw new Error(`groq: invalid JSON in response: ${e.message}`);
|
|
42
|
+
}
|
|
43
|
+
return { values, raw: content, responseModel: response.model };
|
|
44
|
+
}
|
|
45
|
+
function buildPrompt(prompt, schema) {
|
|
46
|
+
return `${prompt}\n\nReturn JSON matching this schema:\n${JSON.stringify(schema)}`;
|
|
47
|
+
}
|
|
48
|
+
function retryPrompt(errors) {
|
|
49
|
+
return `Your previous response failed schema validation:\n${formatErrors(errors)}\nReturn corrected JSON only.`;
|
|
50
|
+
}
|
|
51
|
+
function formatErrors(errors) {
|
|
52
|
+
return errors.map((e) => `${e.path}: ${e.message}`).join('; ');
|
|
53
|
+
}
|
|
54
|
+
function done(values, model, start) {
|
|
55
|
+
return {
|
|
56
|
+
values,
|
|
57
|
+
provider: 'groq',
|
|
58
|
+
model,
|
|
59
|
+
cached: false,
|
|
60
|
+
latencyMs: Date.now() - start,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=groq.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"groq.js","sourceRoot":"","sources":["../../../src/extraction/llm/groq.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,UAAU,CAAC;AAE5B,OAAO,EAAE,qBAAqB,EAAwB,MAAM,eAAe,CAAC;AAE5E,MAAM,aAAa,GAAG,yBAAyB,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACpC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAA2D;QACvE,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE;KACrE,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAClE,IAAI,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAClE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,aAAa,IAAI,KAAK,EAAE,KAAK,CAAC,CAAC;IACjE,CAAC;IAED,2DAA2D;IAC3D,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;IACzD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAE9D,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACnE,MAAM,GAAG,qBAAqB,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACb,wDAAwD,YAAY,CAAC,MAAM,CAAC,EAAE,CAC/E,CAAC;IACJ,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,aAAa,IAAI,KAAK,EAAE,KAAK,CAAC,CAAC;AACnE,CAAC;AAQD,KAAK,UAAU,OAAO,CACpB,MAAY,EACZ,KAAa,EACb,QAAgE,EAChE,MAA+B;IAE/B,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CACnD;QACE,KAAK;QACL,QAAQ;QACR,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;KACzC,EACD,EAAE,MAAM,EAAE,CACX,CAAC;IACF,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;IACxD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IACrD,CAAC;IACD,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,mCAAoC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC;AACjE,CAAC;AAED,SAAS,WAAW,CAAC,MAAc,EAAE,MAA+B;IAClE,OAAO,GAAG,MAAM,0CAA0C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;AACrF,CAAC;AAED,SAAS,WAAW,CAAC,MAAyB;IAC5C,OAAO,qDAAqD,YAAY,CAAC,MAAM,CAAC,+BAA+B,CAAC;AAClH,CAAC;AAED,SAAS,YAAY,CAAC,MAAyB;IAC7C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjE,CAAC;AAED,SAAS,IAAI,CACX,MAA+B,EAC/B,KAAa,EACb,KAAa;IAEb,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/hash.ts"],"names":[],"mappings":"AAEA,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAGjD;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE,OAAO,GAAG,MAAM,CAElD"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
export function hashPrompt(prompt) {
|
|
3
|
+
const normalized = prompt.replace(/\s+/g, ' ').trim();
|
|
4
|
+
return createHash('sha256').update(normalized).digest('hex');
|
|
5
|
+
}
|
|
6
|
+
export function hashSchema(schema) {
|
|
7
|
+
return createHash('sha256').update(stableStringify(schema)).digest('hex');
|
|
8
|
+
}
|
|
9
|
+
function stableStringify(value) {
|
|
10
|
+
if (value === null || typeof value !== 'object') {
|
|
11
|
+
return JSON.stringify(value);
|
|
12
|
+
}
|
|
13
|
+
if (Array.isArray(value)) {
|
|
14
|
+
return '[' + value.map(stableStringify).join(',') + ']';
|
|
15
|
+
}
|
|
16
|
+
const keys = Object.keys(value).sort();
|
|
17
|
+
const parts = keys.map((k) => JSON.stringify(k) +
|
|
18
|
+
':' +
|
|
19
|
+
stableStringify(value[k]));
|
|
20
|
+
return '{' + parts.join(',') + '}';
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=hash.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hash.js","sourceRoot":"","sources":["../../../src/extraction/llm/hash.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,MAAM,UAAU,UAAU,CAAC,MAAc;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAe;IACxC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAChD,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IACD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IAC1D,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,KAAgC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CACpB,CAAC,CAAC,EAAE,EAAE,CACJ,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QACjB,GAAG;QACH,eAAe,CAAE,KAAiC,CAAC,CAAC,CAAC,CAAC,CACzD,CAAC;IACF,OAAO,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;AACrC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/openai.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAIhE,wBAAsB,UAAU,CAC9B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,gBAAgB,CAAC,CAwC3B"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
const DEFAULT_MODEL = 'gpt-4o-mini';
|
|
3
|
+
export async function callOpenAI(opts, apiKey) {
|
|
4
|
+
const client = new OpenAI({ apiKey });
|
|
5
|
+
const model = opts.modelOverride ?? DEFAULT_MODEL;
|
|
6
|
+
const start = Date.now();
|
|
7
|
+
const response = await client.chat.completions.create({
|
|
8
|
+
model,
|
|
9
|
+
messages: [{ role: 'user', content: opts.prompt }],
|
|
10
|
+
response_format: {
|
|
11
|
+
type: 'json_schema',
|
|
12
|
+
json_schema: {
|
|
13
|
+
name: 'extract',
|
|
14
|
+
schema: opts.jsonSchema,
|
|
15
|
+
strict: true,
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
}, { signal: opts.signal });
|
|
19
|
+
const content = response.choices?.[0]?.message?.content;
|
|
20
|
+
if (!content) {
|
|
21
|
+
throw new Error('openai: empty content in response');
|
|
22
|
+
}
|
|
23
|
+
let values;
|
|
24
|
+
try {
|
|
25
|
+
values = JSON.parse(content);
|
|
26
|
+
}
|
|
27
|
+
catch (e) {
|
|
28
|
+
throw new Error(`openai: invalid JSON in response: ${e.message}`);
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
values,
|
|
32
|
+
provider: 'openai',
|
|
33
|
+
model: response.model ?? model,
|
|
34
|
+
cached: false,
|
|
35
|
+
latencyMs: Date.now() - start,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=openai.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../src/extraction/llm/openai.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAG5B,MAAM,aAAa,GAAG,aAAa,CAAC;AAEpC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAiB,EACjB,MAAc;IAEd,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CACnD;QACE,KAAK;QACL,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;QAClD,eAAe,EAAE;YACf,IAAI,EAAE,aAAa;YACnB,WAAW,EAAE;gBACX,IAAI,EAAE,SAAS;gBACf,MAAM,EAAE,IAAI,CAAC,UAAU;gBACvB,MAAM,EAAE,IAAI;aACb;SACF;KACF,EACD,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CACxB,CAAC;IAEF,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;IACxD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,MAA+B,CAAC;IACpC,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,qCAAsC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,OAAO;QACL,MAAM;QACN,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,KAAK;QAC9B,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { LLMProvider } from './types.js';
|
|
2
|
+
export declare function selectProvider(env: Record<string, string | undefined>): LLMProvider | null;
|
|
3
|
+
export declare function providerEnvVar(p: LLMProvider): string;
|
|
4
|
+
export declare function allProviders(): readonly LLMProvider[];
|
|
5
|
+
//# sourceMappingURL=select.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/select.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAW9C,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC,GACtC,WAAW,GAAG,IAAI,CAUpB;AAED,wBAAgB,cAAc,CAAC,CAAC,EAAE,WAAW,GAAG,MAAM,CAErD;AAED,wBAAgB,YAAY,IAAI,SAAS,WAAW,EAAE,CAErD"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const PROVIDER_ORDER = ['anthropic', 'openai', 'gemini', 'groq'];
|
|
2
|
+
const PROVIDER_ENV = {
|
|
3
|
+
anthropic: 'ANTHROPIC_API_KEY',
|
|
4
|
+
openai: 'OPENAI_API_KEY',
|
|
5
|
+
gemini: 'GOOGLE_API_KEY',
|
|
6
|
+
groq: 'GROQ_API_KEY',
|
|
7
|
+
};
|
|
8
|
+
export function selectProvider(env) {
|
|
9
|
+
const override = env.WIGOLO_LLM_PROVIDER;
|
|
10
|
+
if (override && PROVIDER_ORDER.includes(override)) {
|
|
11
|
+
const p = override;
|
|
12
|
+
if (env[PROVIDER_ENV[p]])
|
|
13
|
+
return p;
|
|
14
|
+
}
|
|
15
|
+
for (const p of PROVIDER_ORDER) {
|
|
16
|
+
if (env[PROVIDER_ENV[p]])
|
|
17
|
+
return p;
|
|
18
|
+
}
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
export function providerEnvVar(p) {
|
|
22
|
+
return PROVIDER_ENV[p];
|
|
23
|
+
}
|
|
24
|
+
export function allProviders() {
|
|
25
|
+
return PROVIDER_ORDER;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=select.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"select.js","sourceRoot":"","sources":["../../../src/extraction/llm/select.ts"],"names":[],"mappings":"AAEA,MAAM,cAAc,GAAkB,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;AAEhF,MAAM,YAAY,GAAgC;IAChD,SAAS,EAAE,mBAAmB;IAC9B,MAAM,EAAE,gBAAgB;IACxB,MAAM,EAAE,gBAAgB;IACxB,IAAI,EAAE,cAAc;CACrB,CAAC;AAEF,MAAM,UAAU,cAAc,CAC5B,GAAuC;IAEvC,MAAM,QAAQ,GAAG,GAAG,CAAC,mBAAmB,CAAC;IACzC,IAAI,QAAQ,IAAK,cAA2B,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChE,MAAM,CAAC,GAAG,QAAuB,CAAC;QAClC,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,CAAc;IAC3C,OAAO,YAAY,CAAC,CAAC,CAAC,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,YAAY;IAC1B,OAAO,cAAc,CAAC;AACxB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export type LLMProvider = 'anthropic' | 'openai' | 'gemini' | 'groq';
|
|
2
|
+
export interface LLMExtractResult {
|
|
3
|
+
values: Record<string, unknown>;
|
|
4
|
+
provider: LLMProvider;
|
|
5
|
+
model: string;
|
|
6
|
+
cached: boolean;
|
|
7
|
+
latencyMs: number;
|
|
8
|
+
warnings?: string[];
|
|
9
|
+
}
|
|
10
|
+
export interface LLMCallRecord {
|
|
11
|
+
modelId: string;
|
|
12
|
+
promptHash: string;
|
|
13
|
+
schemaHash: string;
|
|
14
|
+
response: string;
|
|
15
|
+
createdAt: number;
|
|
16
|
+
expiresAt: number;
|
|
17
|
+
}
|
|
18
|
+
export interface LLMCallOpts {
|
|
19
|
+
prompt: string;
|
|
20
|
+
jsonSchema: Record<string, unknown>;
|
|
21
|
+
modelOverride?: string;
|
|
22
|
+
signal?: AbortSignal;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;AAErE,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/extraction/llm/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../../src/extraction/llm/validate.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AASD,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,OAAO,EACd,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,eAAe,EAAE,CAInB"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// Minimal recursive JSON Schema validator: required + type checks.
|
|
2
|
+
// Sufficient for post-hoc validation of provider responses where the SDK
|
|
3
|
+
// does not enforce a schema natively (e.g. Groq json_object).
|
|
4
|
+
export function validateAgainstSchema(value, schema) {
|
|
5
|
+
const errors = [];
|
|
6
|
+
walk(value, schema, '$', errors);
|
|
7
|
+
return errors;
|
|
8
|
+
}
|
|
9
|
+
function walk(value, schema, path, errors) {
|
|
10
|
+
if (schema.type) {
|
|
11
|
+
const types = Array.isArray(schema.type) ? schema.type : [schema.type];
|
|
12
|
+
if (!types.some((t) => matchesType(value, t))) {
|
|
13
|
+
errors.push({
|
|
14
|
+
path,
|
|
15
|
+
message: `expected type ${types.join('|')} but got ${actualType(value)}`,
|
|
16
|
+
});
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
if (schema.type === 'object' && value && typeof value === 'object') {
|
|
21
|
+
const obj = value;
|
|
22
|
+
for (const req of schema.required ?? []) {
|
|
23
|
+
if (obj[req] === undefined) {
|
|
24
|
+
errors.push({ path: `${path}.${req}`, message: 'required' });
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
for (const [k, sub] of Object.entries(schema.properties ?? {})) {
|
|
28
|
+
if (obj[k] !== undefined) {
|
|
29
|
+
walk(obj[k], sub, `${path}.${k}`, errors);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
if (schema.type === 'array' && Array.isArray(value) && schema.items) {
|
|
34
|
+
value.forEach((item, i) => walk(item, schema.items, `${path}[${i}]`, errors));
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
function matchesType(value, type) {
|
|
38
|
+
switch (type) {
|
|
39
|
+
case 'string':
|
|
40
|
+
return typeof value === 'string';
|
|
41
|
+
case 'number':
|
|
42
|
+
case 'integer':
|
|
43
|
+
return typeof value === 'number';
|
|
44
|
+
case 'boolean':
|
|
45
|
+
return typeof value === 'boolean';
|
|
46
|
+
case 'null':
|
|
47
|
+
return value === null;
|
|
48
|
+
case 'array':
|
|
49
|
+
return Array.isArray(value);
|
|
50
|
+
case 'object':
|
|
51
|
+
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
52
|
+
default:
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
function actualType(value) {
|
|
57
|
+
if (value === null)
|
|
58
|
+
return 'null';
|
|
59
|
+
if (Array.isArray(value))
|
|
60
|
+
return 'array';
|
|
61
|
+
return typeof value;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../../src/extraction/llm/validate.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,yEAAyE;AACzE,8DAA8D;AAc9D,MAAM,UAAU,qBAAqB,CACnC,KAAc,EACd,MAA+B;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,IAAI,CAAC,KAAK,EAAE,MAAqB,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;IAChD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,IAAI,CACX,KAAc,EACd,MAAmB,EACnB,IAAY,EACZ,MAAyB;IAEzB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACvE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9C,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI;gBACJ,OAAO,EAAE,iBAAiB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,UAAU,CAAC,KAAK,CAAC,EAAE;aACzE,CAAC,CAAC;YACH,OAAO;QACT,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACnE,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;YACxC,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,IAAI,IAAI,GAAG,EAAE,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/D,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,IAAI,IAAI,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACpE,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACxB,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,KAAoB,EAAE,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,CACjE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,KAAc,EAAE,IAAY;IAC/C,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC;QACnC,KAAK,QAAQ,CAAC;QACd,KAAK,SAAS;YACZ,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC;QACnC,KAAK,SAAS;YACZ,OAAO,OAAO,KAAK,KAAK,SAAS,CAAC;QACpC,KAAK,MAAM;YACT,OAAO,KAAK,KAAK,IAAI,CAAC;QACxB,KAAK,OAAO;YACV,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC9B,KAAK,QAAQ;YACX,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC9E;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,MAAM,CAAC;IAClC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACzC,OAAO,OAAO,KAAK,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { LLMExtractResult } from './llm/types.js';
|
|
2
|
+
export interface LLMFallbackBudget {
|
|
3
|
+
remaining: number;
|
|
4
|
+
}
|
|
5
|
+
export interface LLMFallbackInput {
|
|
6
|
+
html: string;
|
|
7
|
+
jsonSchema: Record<string, unknown>;
|
|
8
|
+
partial: Record<string, unknown>;
|
|
9
|
+
missing: string[];
|
|
10
|
+
signal?: AbortSignal;
|
|
11
|
+
budget?: LLMFallbackBudget;
|
|
12
|
+
}
|
|
13
|
+
export interface LLMFallbackResult extends LLMExtractResult {
|
|
14
|
+
warnings: string[];
|
|
15
|
+
}
|
|
16
|
+
export declare function extractWithLLM(input: LLMFallbackInput): Promise<LLMFallbackResult>;
|
|
17
|
+
//# sourceMappingURL=llm-fallback.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-fallback.d.ts","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,gBAAgB,EAAe,MAAM,gBAAgB,CAAC;AAKpE,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAED,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAeD,wBAAsB,cAAc,CAClC,KAAK,EAAE,gBAAgB,GACtB,OAAO,CAAC,iBAAiB,CAAC,CAsF5B"}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { getConfig } from '../config.js';
|
|
2
|
+
import { callAnthropic } from './llm/anthropic.js';
|
|
3
|
+
import { callOpenAI } from './llm/openai.js';
|
|
4
|
+
import { callGemini } from './llm/gemini.js';
|
|
5
|
+
import { callGroq } from './llm/groq.js';
|
|
6
|
+
import { ensureLLMCacheTable, insertLLMCache, lookupLLMCache, } from './llm/cache.js';
|
|
7
|
+
import { hashPrompt, hashSchema } from './llm/hash.js';
|
|
8
|
+
import { allProviders, providerEnvVar, selectProvider } from './llm/select.js';
|
|
9
|
+
import { validateAgainstSchema } from './llm/validate.js';
|
|
10
|
+
const MAX_HTML_BYTES = 50_000;
|
|
11
|
+
const ADAPTERS = {
|
|
12
|
+
anthropic: callAnthropic,
|
|
13
|
+
openai: callOpenAI,
|
|
14
|
+
gemini: callGemini,
|
|
15
|
+
groq: callGroq,
|
|
16
|
+
};
|
|
17
|
+
export async function extractWithLLM(input) {
|
|
18
|
+
if (input.missing.length === 0) {
|
|
19
|
+
return emptyResult(input.partial, []);
|
|
20
|
+
}
|
|
21
|
+
const cfg = getConfig();
|
|
22
|
+
const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };
|
|
23
|
+
if (budget.remaining <= 0) {
|
|
24
|
+
return emptyResult(input.partial, [
|
|
25
|
+
`LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`,
|
|
26
|
+
]);
|
|
27
|
+
}
|
|
28
|
+
const provider = selectProvider(process.env);
|
|
29
|
+
if (!provider) {
|
|
30
|
+
const envList = allProviders()
|
|
31
|
+
.map((p) => providerEnvVar(p))
|
|
32
|
+
.join(', ');
|
|
33
|
+
return emptyResult(input.partial, [
|
|
34
|
+
`LLM fallback skipped: no provider key set (${envList}). ` +
|
|
35
|
+
`${input.missing.length} required field(s) still missing: ${input.missing.join(', ')}.`,
|
|
36
|
+
]);
|
|
37
|
+
}
|
|
38
|
+
const apiKey = process.env[providerEnvVar(provider)];
|
|
39
|
+
const prompt = buildPrompt(input);
|
|
40
|
+
const promptHash = hashPrompt(prompt);
|
|
41
|
+
const schemaHash = hashSchema(input.jsonSchema);
|
|
42
|
+
const modelId = `${provider}:default`;
|
|
43
|
+
ensureLLMCacheTable();
|
|
44
|
+
const cached = lookupLLMCache(modelId, promptHash, schemaHash);
|
|
45
|
+
if (cached) {
|
|
46
|
+
const values = JSON.parse(cached);
|
|
47
|
+
return {
|
|
48
|
+
values: mergeOnlyMissing(input.partial, values, input.missing),
|
|
49
|
+
provider,
|
|
50
|
+
model: modelId,
|
|
51
|
+
cached: true,
|
|
52
|
+
latencyMs: 0,
|
|
53
|
+
warnings: [],
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
let result;
|
|
57
|
+
try {
|
|
58
|
+
result = await ADAPTERS[provider]({ prompt, jsonSchema: input.jsonSchema, signal: input.signal }, apiKey);
|
|
59
|
+
}
|
|
60
|
+
catch (e) {
|
|
61
|
+
return emptyResult(input.partial, [
|
|
62
|
+
`LLM fallback (${provider}) failed: ${e.message}`,
|
|
63
|
+
]);
|
|
64
|
+
}
|
|
65
|
+
finally {
|
|
66
|
+
budget.remaining = Math.max(0, budget.remaining - 1);
|
|
67
|
+
}
|
|
68
|
+
const errors = validateAgainstSchema(result.values, input.jsonSchema);
|
|
69
|
+
if (errors.length > 0) {
|
|
70
|
+
return emptyResult(input.partial, [
|
|
71
|
+
`LLM fallback (${provider}) response failed schema validation: ${errors
|
|
72
|
+
.map((e) => `${e.path} ${e.message}`)
|
|
73
|
+
.join('; ')}`,
|
|
74
|
+
]);
|
|
75
|
+
}
|
|
76
|
+
const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1000;
|
|
77
|
+
const now = Date.now();
|
|
78
|
+
insertLLMCache({
|
|
79
|
+
modelId,
|
|
80
|
+
promptHash,
|
|
81
|
+
schemaHash,
|
|
82
|
+
response: JSON.stringify(result.values),
|
|
83
|
+
createdAt: now,
|
|
84
|
+
expiresAt: now + ttlMs,
|
|
85
|
+
});
|
|
86
|
+
return {
|
|
87
|
+
values: mergeOnlyMissing(input.partial, result.values, input.missing),
|
|
88
|
+
provider,
|
|
89
|
+
model: result.model,
|
|
90
|
+
cached: false,
|
|
91
|
+
latencyMs: result.latencyMs,
|
|
92
|
+
warnings: result.warnings ?? [],
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
function emptyResult(partial, warnings) {
|
|
96
|
+
return {
|
|
97
|
+
values: { ...partial },
|
|
98
|
+
provider: 'anthropic',
|
|
99
|
+
model: '',
|
|
100
|
+
cached: false,
|
|
101
|
+
latencyMs: 0,
|
|
102
|
+
warnings,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function mergeOnlyMissing(partial, filled, missing) {
|
|
106
|
+
const out = { ...partial };
|
|
107
|
+
for (const key of missing) {
|
|
108
|
+
if (filled[key] !== undefined)
|
|
109
|
+
out[key] = filled[key];
|
|
110
|
+
}
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
function buildPrompt(input) {
|
|
114
|
+
const html = truncate(input.html, MAX_HTML_BYTES);
|
|
115
|
+
return [
|
|
116
|
+
'Extract the following missing fields from the HTML below.',
|
|
117
|
+
`Missing fields: ${input.missing.join(', ')}.`,
|
|
118
|
+
'Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.',
|
|
119
|
+
'',
|
|
120
|
+
'HTML:',
|
|
121
|
+
html,
|
|
122
|
+
].join('\n');
|
|
123
|
+
}
|
|
124
|
+
function truncate(s, maxBytes) {
|
|
125
|
+
if (s.length <= maxBytes)
|
|
126
|
+
return s;
|
|
127
|
+
return s.slice(0, maxBytes);
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=llm-fallback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-fallback.js","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,cAAc,GACf,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAE/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,cAAc,GAAG,MAAM,CAAC;AAmB9B,MAAM,QAAQ,GAMV;IACF,SAAS,EAAE,aAAa;IACxB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,UAAU;IAClB,IAAI,EAAE,QAAQ;CACf,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAuB;IAEvB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;IACxB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,EAAE,SAAS,EAAE,GAAG,CAAC,qBAAqB,EAAE,CAAC;IACxE,IAAI,MAAM,CAAC,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,2DAA2D,GAAG,CAAC,qBAAqB,mDAAmD;SACxI,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,OAAO,GAAG,YAAY,EAAE;aAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;aAC7B,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,8CAA8C,OAAO,KAAK;gBACxD,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,qCAAqC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;SAC1F,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAW,CAAC;IAC/D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IAClC,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,GAAG,QAAQ,UAAU,CAAC;IAEtC,mBAAmB,EAAE,CAAC;IACtB,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;IAC/D,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAA4B,CAAC;QAC7D,OAAO;YACL,MAAM,EAAE,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC;YAC9D,QAAQ;YACR,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,IAAI;YACZ,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,EAAE;SACb,CAAC;IACJ,CAAC;IAED,IAAI,MAAwB,CAAC;IAC7B,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAC/B,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,EAC9D,MAAM,CACP,CAAC;IACJ,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,iBAAiB,QAAQ,aAAc,CAAW,CAAC,OAAO,EAAE;SAC7D,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IACtE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE;YAChC,iBAAiB,QAAQ,wCAAwC,MAAM;iBACpE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;iBACpC,IAAI,CAAC,IAAI,CAAC,EAAE;SAChB,CAAC,CAAC;IACL,CAAC;IAED,MAAM,KAAK,GAAG,GAAG,CAAC,eAAe,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,cAAc,CAAC;QACb,OAAO;QACP,UAAU;QACV,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC;QACvC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG,GAAG,KAAK;KACvB,CAAC,CAAC;IAEH,OAAO;QACL,MAAM,EAAE,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC;QACrE,QAAQ;QACR,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;KAChC,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAClB,OAAgC,EAChC,QAAkB;IAElB,OAAO;QACL,MAAM,EAAE,EAAE,GAAG,OAAO,EAAE;QACtB,QAAQ,EAAE,WAAW;QACrB,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,CAAC;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CACvB,OAAgC,EAChC,MAA+B,EAC/B,OAAiB;IAEjB,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,SAAS;YAAE,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IACxD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,WAAW,CAAC,KAAuB;IAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;IAClD,OAAO;QACL,2DAA2D;QAC3D,mBAAmB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QAC9C,iHAAiH;QACjH,EAAE;QACF,OAAO;QACP,IAAI;KACL,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS,EAAE,QAAgB;IAC3C,IAAI,CAAC,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,CAAC;IACnC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
export declare function buildTurndown(): TurndownService;
|
|
1
3
|
export declare function htmlToMarkdown(html: string): string;
|
|
4
|
+
export interface Heading {
|
|
5
|
+
level: number;
|
|
6
|
+
text: string;
|
|
7
|
+
lineIndex: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function parseHeadings(lines: string[]): Heading[];
|
|
10
|
+
export declare function lineStartCharOffsets(lines: string[]): number[];
|
|
2
11
|
export declare function extractSection(markdown: string, section: string, sectionIndex?: number): {
|
|
3
12
|
content: string;
|
|
4
13
|
matched: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAiBvC,wBAAgB,aAAa,IAAI,eAAe,CA2D/C;AAID,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,EAAE,CASxD;AAID,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQ9D;AAkBD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAyC7E"}
|