@staticn0va/wigolo 0.6.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -48
- package/SKILL.md +22 -22
- package/assets/skills/wigolo/rules/cache-first.md +1 -1
- package/assets/skills/wigolo/rules/synthesis.md +1 -1
- package/assets/skills/wigolo-fetch/SKILL.md +1 -1
- package/assets/skills/wigolo-find-similar/SKILL.md +2 -2
- package/assets/skills/wigolo-search/SKILL.md +3 -3
- package/dist/cache/store.d.ts +9 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +30 -4
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/doctor.d.ts +3 -3
- package/dist/cli/doctor.d.ts.map +1 -1
- package/dist/cli/doctor.js +67 -13
- package/dist/cli/doctor.js.map +1 -1
- package/dist/cli/health.js +1 -1
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/status.js +1 -1
- package/dist/cli/status.js.map +1 -1
- package/dist/cli/tui/hooks/useInstall.js +2 -2
- package/dist/cli/tui/hooks/useInstall.js.map +1 -1
- package/dist/cli/tui/hooks/useVerify.js +3 -3
- package/dist/cli/tui/hooks/useVerify.js.map +1 -1
- package/dist/cli/tui/status-format.d.ts +1 -1
- package/dist/cli/tui/status-format.d.ts.map +1 -1
- package/dist/cli/tui/status-format.js +5 -5
- package/dist/cli/tui/status-format.js.map +1 -1
- package/dist/cli/tui/status-python.d.ts +1 -1
- package/dist/cli/tui/status-python.d.ts.map +1 -1
- package/dist/cli/tui/status-python.js +17 -1
- package/dist/cli/tui/status-python.js.map +1 -1
- package/dist/cli/tui/verify-suggestions.d.ts +1 -1
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -1
- package/dist/cli/tui/verify-suggestions.js +5 -5
- package/dist/cli/tui/verify-suggestions.js.map +1 -1
- package/dist/cli/tui/verify.d.ts +2 -2
- package/dist/cli/tui/verify.d.ts.map +1 -1
- package/dist/cli/tui/verify.js +34 -8
- package/dist/cli/tui/verify.js.map +1 -1
- package/dist/cli/uninstall.js +2 -2
- package/dist/cli/uninstall.js.map +1 -1
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +29 -25
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +6 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +15 -2
- package/dist/config.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +47 -1
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +49 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +7 -3
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/jsonld.js +1 -1
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +28 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm/anthropic.d.ts +3 -0
- package/dist/extraction/llm/anthropic.d.ts.map +1 -0
- package/dist/extraction/llm/anthropic.js +33 -0
- package/dist/extraction/llm/anthropic.js.map +1 -0
- package/dist/extraction/llm/cache.d.ts +5 -0
- package/dist/extraction/llm/cache.d.ts.map +1 -0
- package/dist/extraction/llm/cache.js +35 -0
- package/dist/extraction/llm/cache.js.map +1 -0
- package/dist/extraction/llm/gemini.d.ts +3 -0
- package/dist/extraction/llm/gemini.d.ts.map +1 -0
- package/dist/extraction/llm/gemini.js +35 -0
- package/dist/extraction/llm/gemini.js.map +1 -0
- package/dist/extraction/llm/groq.d.ts +3 -0
- package/dist/extraction/llm/groq.d.ts.map +1 -0
- package/dist/extraction/llm/groq.js +63 -0
- package/dist/extraction/llm/groq.js.map +1 -0
- package/dist/extraction/llm/hash.d.ts +3 -0
- package/dist/extraction/llm/hash.d.ts.map +1 -0
- package/dist/extraction/llm/hash.js +22 -0
- package/dist/extraction/llm/hash.js.map +1 -0
- package/dist/extraction/llm/openai.d.ts +3 -0
- package/dist/extraction/llm/openai.d.ts.map +1 -0
- package/dist/extraction/llm/openai.js +38 -0
- package/dist/extraction/llm/openai.js.map +1 -0
- package/dist/extraction/llm/select.d.ts +5 -0
- package/dist/extraction/llm/select.d.ts.map +1 -0
- package/dist/extraction/llm/select.js +27 -0
- package/dist/extraction/llm/select.js.map +1 -0
- package/dist/extraction/llm/types.d.ts +24 -0
- package/dist/extraction/llm/types.d.ts.map +1 -0
- package/dist/extraction/llm/types.js +2 -0
- package/dist/extraction/llm/types.js.map +1 -0
- package/dist/extraction/llm/validate.d.ts +6 -0
- package/dist/extraction/llm/validate.d.ts.map +1 -0
- package/dist/extraction/llm/validate.js +63 -0
- package/dist/extraction/llm/validate.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +129 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown.d.ts +9 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +52 -3
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +17 -5
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +2 -3
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +81 -11
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +2 -3
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +4 -5
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +2 -3
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +3 -4
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +203 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/fetch/router.d.ts +2 -1
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +19 -1
- package/dist/fetch/router.js.map +1 -1
- package/dist/instructions.d.ts +8 -8
- package/dist/instructions.d.ts.map +1 -1
- package/dist/instructions.js +48 -41
- package/dist/instructions.js.map +1 -1
- package/dist/logger.d.ts +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/research/brief.js +1 -1
- package/dist/research/brief.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +260 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/highlights.d.ts +11 -2
- package/dist/search/highlights.d.ts.map +1 -1
- package/dist/search/highlights.js +131 -48
- package/dist/search/highlights.js.map +1 -1
- package/dist/search/multi-query.d.ts +1 -0
- package/dist/search/multi-query.d.ts.map +1 -1
- package/dist/search/multi-query.js +13 -0
- package/dist/search/multi-query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -2
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +16 -44
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/download.d.ts +9 -0
- package/dist/search/reranker/download.d.ts.map +1 -0
- package/dist/search/reranker/download.js +77 -0
- package/dist/search/reranker/download.js.map +1 -0
- package/dist/search/reranker/models.d.ts +14 -0
- package/dist/search/reranker/models.d.ts.map +1 -0
- package/dist/search/reranker/models.js +37 -0
- package/dist/search/reranker/models.js.map +1 -0
- package/dist/search/reranker/onnx.d.ts +13 -0
- package/dist/search/reranker/onnx.d.ts.map +1 -0
- package/dist/search/reranker/onnx.js +70 -0
- package/dist/search/reranker/onnx.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +12 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +26 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/tokenizer.d.ts +30 -0
- package/dist/search/reranker/tokenizer.d.ts.map +1 -0
- package/dist/search/reranker/tokenizer.js +49 -0
- package/dist/search/reranker/tokenizer.js.map +1 -0
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +38 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +4 -0
- package/dist/search/truncate.d.ts.map +1 -1
- package/dist/search/truncate.js +13 -0
- package/dist/search/truncate.js.map +1 -1
- package/dist/server/backend-status.js +2 -2
- package/dist/server/backend-status.js.map +1 -1
- package/dist/server/tool-schemas.d.ts +503 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +425 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +14 -339
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts.map +1 -1
- package/dist/tools/agent.js +36 -0
- package/dist/tools/agent.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +37 -2
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +19 -3
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +44 -7
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts.map +1 -1
- package/dist/tools/find-similar.js +32 -1
- package/dist/tools/find-similar.js.map +1 -1
- package/dist/tools/research.d.ts.map +1 -1
- package/dist/tools/research.js +34 -1
- package/dist/tools/research.js.map +1 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +98 -54
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +65 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +13 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +10 -4
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -64
- package/dist/search/flashrank.js.map +0 -1
package/dist/research/brief.js
CHANGED
|
@@ -9,7 +9,7 @@ const MIN_PHRASE_LEN = 4;
|
|
|
9
9
|
// shape to produce the final report without needing to re-read raw sources.
|
|
10
10
|
export async function buildResearchBrief(question, sources, subQueries, perSourceCharCap, totalSourcesCharCap, queryType = 'general', comparisonEntities = []) {
|
|
11
11
|
const fetched = sources.filter((s) => s.fetched && s.markdown_content.length > 0);
|
|
12
|
-
// Highlights reuse the
|
|
12
|
+
// Highlights reuse the ONNX-reranker-or-paragraph scorer so briefs align with
|
|
13
13
|
// whatever format='highlights' produces for single-query searches.
|
|
14
14
|
const searchItems = fetched.map((s) => ({
|
|
15
15
|
title: s.title,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"brief.js","sourceRoot":"","sources":["../../src/research/brief.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,mBAAmB,GAAG,GAAG,CAAC;AAChC,MAAM,UAAU,GAAG,CAAC,CAAC;AACrB,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,uEAAuE;AACvE,0EAA0E;AAC1E,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,OAAyB,EACzB,UAAoB,EACpB,gBAAwB,EACxB,mBAA2B,EAC3B,YAAuB,SAAS,EAChC,qBAA+B,EAAE;IAEjC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAElF,
|
|
1
|
+
{"version":3,"file":"brief.js","sourceRoot":"","sources":["../../src/research/brief.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAE5D,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,mBAAmB,GAAG,GAAG,CAAC;AAChC,MAAM,UAAU,GAAG,CAAC,CAAC;AACrB,MAAM,cAAc,GAAG,EAAE,CAAC;AAC1B,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,uEAAuE;AACvE,0EAA0E;AAC1E,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,OAAyB,EACzB,UAAoB,EACpB,gBAAwB,EACxB,mBAA2B,EAC3B,YAAuB,SAAS,EAChC,qBAA+B,EAAE;IAEjC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAElF,8EAA8E;IAC9E,mEAAmE;IACnE,MAAM,WAAW,GAAuB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1D,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,OAAO,EAAE,CAAC,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;QACzC,gBAAgB,EAAE,CAAC,CAAC,gBAAgB;QACpC,eAAe,EAAE,CAAC,CAAC,eAAe;KACnC,CAAC,CAAC,CAAC;IAEJ,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,iBAAiB,CAAC,QAAQ,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC;IAEtF,MAAM,MAAM,GAAG,WAAW,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAChD,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,eAAe,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;IACvD,MAAM,IAAI,GAAG,UAAU,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAE7C,MAAM,UAAU,GAAG,SAAS,KAAK,YAAY,IAAI,kBAAkB,CAAC,MAAM,IAAI,CAAC;QAC7E,CAAC,CAAC,sBAAsB,CAAC,kBAAkB,EAAE,OAAO,CAAC;QACrD,CAAC,CAAC,SAAS,CAAC;IAEd,OAAO;QACL,MAAM;QACN,UAAU;QACV,YAAY,EAAE,WAAW;QACzB,mBAAmB,EAAE,gBAAgB;QACrC,sBAAsB,EAAE,mBAAmB;QAC3C,QAAQ,EAAE;YACR,QAAQ,EAAE;gBACR,YAAY,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;gBACrC,gBAAgB,EAAE,eAAe;aAClC;YACD,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrC,IAAI;SACL;QACD,UAAU,EAAE,SAAS;KACtB,CAAC;AACJ,CAAC;AAED,yEAAyE;AACzE,4DAA4D;AAC5D,SAAS,WAAW,CAAC,UAAoB,EAAE,OAAyB;IAClE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,MAAM,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACjD,CAAC;IACD,MAAM,MAAM,GAAG,OAAO;SACnB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAC9C,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC;IACnD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;AAC7C,CAAC;AAED,4EAA4E;AAC5E,qEAAqE;AACrE,SAAS,gBAAgB,CAAC,OAAyB;IACjD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC;QACnF,MAAM,KAAK,GAAG,yBAAyB,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC;QAC5D,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,GAAG,mBAAmB;YAChD,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,GAAG,GAAG;YACzD,CAAC,CAAC,KAAK,CAAC;QACV,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpB,CAAC;IACD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAyB;IAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,yDAAyD;IACzD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;IAEjD,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC9C,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,gBAAgB,CAAC,WAAW,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,OAAO;aAClB,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;aAC5B,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,cAAc,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnE,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;QACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,IAAI,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC;gBAAE,SAAS;YACxC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAE1B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC;gBAAE,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;YAC7D,SAAS,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,MAAM,UAAU,GAAqB,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,IAAI,SAAS,EAAE,CAAC;QAChD,IAAI,aAAa,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YAC5B,UAAU,CAAC,IAAI,CAAC;gBACd,OAAO,EAAE,MAAM;gBACf,cAAc,EAAE,CAAC,GAAG,aAAa,CAAC,CAAC,IAAI,EAAE;gBACzC,UAAU,EAAE,aAAa,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ;aACxD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IAC7E,OAAO,sBAAsB,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;AACrE,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAsB;IACpD,MAAM,IAAI,GAAqB,EAAE,CAAC;IAClC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACrC,wEAAwE;QACxE,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAClE,IAAI,YAAY,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAElE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,KAAK,MAAM,CAAC,IAAI,KAAK;YAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,UAAU,CAAC,UAAoB,EAAE,OAAyB;IACjE,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEpF,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,2CAA2C;QAC3C,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE;aAC9B,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC;aAC5B,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,cAAc,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEjC,wDAAwD;QACxD,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACnE,MAAM,QAAQ,GAAG,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;QAEtC,IAAI,QAAQ,GAAG,GAAG,EAAE,CAAC;YACnB,IAAI,CAAC,IAAI,CAAC,0BAA0B,KAAK,GAAG,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,sBAAsB,CAC7B,QAAkB,EAClB,OAAyB;IAEzB,MAAM,gBAAgB,GAAa,EAAE,CAAC;IACtC,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAErF,oDAAoD;IACpD,MAAM,eAAe,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM;QAC5E,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,EAAE,aAAa,EAAE,OAAO;QAChE,aAAa,EAAE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;IAErE,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC;YAAE,SAAS;QAE3C,wCAAwC;QACxC,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE;YACrC,MAAM,WAAW,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;YACpC,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAC9C,IAAI,GAAG,KAAK,CAAC,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC7B,2CAA2C;YAC3C,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;YACtF,OAAO,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,IAAI,UAAU,EAAE,CAAC;YACf,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,iBAAiB,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,gBAAgB,CAAC,CAAC,EAAE,CAAC;AACzE,CAAC;AAED,SAAS,yBAAyB,CAAC,QAAgB;IACjD,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAChE,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,MAAM,GAAG,EAAE;YAAE,SAAS;QAC5B,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,SAAS;QAC5E,OAAO,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,MAAM,CAAC,IAAc;IAC5B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACzB,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS;IAC9D,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACjE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9D,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAC/D,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACjE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS;IACpE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IAChE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;CACzC,CAAC,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Citation, EvidenceItem, SearchInput, SearchOutput, SearchResultItem, SourceSpan } from '../types.js';
|
|
2
|
+
export interface BuildEvidenceOptions {
|
|
3
|
+
maxTokensOut?: number;
|
|
4
|
+
maxItems?: number;
|
|
5
|
+
}
|
|
6
|
+
export declare function buildEvidenceFromMarkdown(query: string, title: string, url: string, markdown: string, opts?: BuildEvidenceOptions): Promise<EvidenceItem[]>;
|
|
7
|
+
export declare function applyAggregateMarkdownBudget<T>(items: T[], getBody: (item: T) => string, setBody: (item: T, body: string) => void, opts: {
|
|
8
|
+
maxTokensOut?: number;
|
|
9
|
+
maxChars?: number;
|
|
10
|
+
}): void;
|
|
11
|
+
export declare function applyTokenBudget(items: EvidenceItem[], maxTokensOut: number): EvidenceItem[];
|
|
12
|
+
export declare function stableCitationId(url: string, start: number): string;
|
|
13
|
+
export declare function buildEvidenceItem(input: {
|
|
14
|
+
title: string;
|
|
15
|
+
url: string;
|
|
16
|
+
sectionHeading: string | null;
|
|
17
|
+
excerpt: string;
|
|
18
|
+
score: number;
|
|
19
|
+
sourceSpan: SourceSpan;
|
|
20
|
+
}): EvidenceItem;
|
|
21
|
+
export declare function applyEvidenceDefault(input: SearchInput, output: SearchOutput, results: SearchResultItem[], query: string): Promise<void>;
|
|
22
|
+
export declare function buildCitationsFromEvidence(results: SearchResultItem[], evidence: EvidenceItem[], baseCitations: Citation[]): Citation[];
|
|
23
|
+
export declare function renderCitationsXml(citations: Citation[]): string;
|
|
24
|
+
export declare function escapeXml(s: string): string;
|
|
25
|
+
//# sourceMappingURL=evidence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evidence.d.ts","sourceRoot":"","sources":["../../src/search/evidence.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EAER,YAAY,EACZ,WAAW,EACX,YAAY,EACZ,gBAAgB,EAChB,UAAU,EACX,MAAM,aAAa,CAAC;AAYrB,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAMD,wBAAsB,yBAAyB,CAC7C,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,MAAM,EAChB,IAAI,GAAE,oBAAyB,GAC9B,OAAO,CAAC,YAAY,EAAE,CAAC,CA+CzB;AAMD,wBAAgB,4BAA4B,CAAC,CAAC,EAC5C,KAAK,EAAE,CAAC,EAAE,EACV,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,EAC5B,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,EACxC,IAAI,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACjD,IAAI,CAoBN;AAKD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,YAAY,EAAE,EAAE,YAAY,EAAE,MAAM,GAAG,YAAY,EAAE,CAa5F;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAEnE;AAED,wBAAgB,iBAAiB,CAAC,KAAK,EAAE;IACvC,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,UAAU,CAAC;CACxB,GAAG,YAAY,CAUf;AAED,wBAAsB,oBAAoB,CACxC,KAAK,EAAE,WAAW,EAClB,MAAM,EAAE,YAAY,EACpB,OAAO,EAAE,gBAAgB,EAAE,EAC3B,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAsFf;AAED,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,gBAAgB,EAAE,EAC3B,QAAQ,EAAE,YAAY,EAAE,EACxB,aAAa,EAAE,QAAQ,EAAE,GACxB,QAAQ,EAAE,CAkCZ;AAED,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,MAAM,CAQhE;AAED,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAO3C"}
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { extractHighlights } from './highlights.js';
|
|
3
|
+
import { countTokens, truncateByTokens } from './tokens.js';
|
|
4
|
+
import { applyOutputBudget } from './truncate.js';
|
|
5
|
+
import { createLogger } from '../logger.js';
|
|
6
|
+
const log = createLogger('search');
|
|
7
|
+
const DEFAULT_MAX_TOKENS_OUT = 4000;
|
|
8
|
+
const MAX_EVIDENCE_PASSAGES = 20;
|
|
9
|
+
const TRUNCATION_MARKER = '[... content truncated]';
|
|
10
|
+
// Build evidence items from a single page's markdown. Used by per-page tools
|
|
11
|
+
// (fetch, crawl pages, find_similar results, agent/research sources). The
|
|
12
|
+
// returned list is already truncated to fit `maxTokensOut` if provided; pass
|
|
13
|
+
// `maxItems` to cap how many highlights are projected.
|
|
14
|
+
export async function buildEvidenceFromMarkdown(query, title, url, markdown, opts = {}) {
|
|
15
|
+
if (!markdown)
|
|
16
|
+
return [];
|
|
17
|
+
const maxItems = opts.maxItems ?? 1;
|
|
18
|
+
const synthetic = [{
|
|
19
|
+
title,
|
|
20
|
+
url,
|
|
21
|
+
snippet: '',
|
|
22
|
+
markdown_content: markdown,
|
|
23
|
+
relevance_score: 1,
|
|
24
|
+
}];
|
|
25
|
+
let result;
|
|
26
|
+
try {
|
|
27
|
+
result = await extractHighlights(query, synthetic, Math.max(maxItems, 1));
|
|
28
|
+
}
|
|
29
|
+
catch (err) {
|
|
30
|
+
log.debug('buildEvidenceFromMarkdown: extractHighlights failed', { error: String(err) });
|
|
31
|
+
return [];
|
|
32
|
+
}
|
|
33
|
+
const ranked = result.highlights
|
|
34
|
+
.slice()
|
|
35
|
+
.sort((a, b) => b.relevance_score - a.relevance_score)
|
|
36
|
+
.slice(0, maxItems);
|
|
37
|
+
const out = [];
|
|
38
|
+
let used = 0;
|
|
39
|
+
const budget = opts.maxTokensOut;
|
|
40
|
+
for (const h of ranked) {
|
|
41
|
+
let excerpt = h.text;
|
|
42
|
+
if (budget !== undefined) {
|
|
43
|
+
const remaining = budget - used;
|
|
44
|
+
if (remaining <= 0)
|
|
45
|
+
break;
|
|
46
|
+
excerpt = truncateByTokens(h.text, remaining);
|
|
47
|
+
if (!excerpt || excerpt.trim() === TRUNCATION_MARKER)
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
const span = h.source_span ?? { start: 0, end: excerpt.length };
|
|
51
|
+
out.push(buildEvidenceItem({
|
|
52
|
+
title: h.source_title || title,
|
|
53
|
+
url: h.source_url || url,
|
|
54
|
+
sectionHeading: h.section_heading ?? null,
|
|
55
|
+
excerpt,
|
|
56
|
+
score: h.relevance_score,
|
|
57
|
+
sourceSpan: span,
|
|
58
|
+
}));
|
|
59
|
+
if (budget !== undefined)
|
|
60
|
+
used += countTokens(excerpt);
|
|
61
|
+
}
|
|
62
|
+
return out;
|
|
63
|
+
}
|
|
64
|
+
// Walk items in order, capping each item's body text against a shared token
|
|
65
|
+
// budget. Bodies past the budget are cleared (set to ''). Used by all
|
|
66
|
+
// multi-item tools (search markdown_content, find_similar, crawl, research,
|
|
67
|
+
// agent) so per-tool max_tokens_out is an aggregate cap, not per-item.
|
|
68
|
+
export function applyAggregateMarkdownBudget(items, getBody, setBody, opts) {
|
|
69
|
+
const budget = opts.maxTokensOut;
|
|
70
|
+
let used = 0;
|
|
71
|
+
for (const item of items) {
|
|
72
|
+
const body = getBody(item);
|
|
73
|
+
if (!body)
|
|
74
|
+
continue;
|
|
75
|
+
if (budget !== undefined) {
|
|
76
|
+
const remaining = budget - used;
|
|
77
|
+
if (remaining <= 0) {
|
|
78
|
+
setBody(item, '');
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
const trimmed = applyOutputBudget(body, { maxTokensOut: remaining, maxChars: opts.maxChars });
|
|
82
|
+
setBody(item, trimmed);
|
|
83
|
+
used += countTokens(trimmed);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
const trimmed = applyOutputBudget(body, { maxChars: opts.maxChars });
|
|
87
|
+
setBody(item, trimmed);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Apply an aggregate token budget across an already-built list of evidence
|
|
92
|
+
// items, truncating excerpts in order until the budget is exhausted. Items
|
|
93
|
+
// past the budget are dropped.
|
|
94
|
+
export function applyTokenBudget(items, maxTokensOut) {
|
|
95
|
+
if (maxTokensOut <= 0)
|
|
96
|
+
return [];
|
|
97
|
+
const out = [];
|
|
98
|
+
let used = 0;
|
|
99
|
+
for (const item of items) {
|
|
100
|
+
const remaining = maxTokensOut - used;
|
|
101
|
+
if (remaining <= 0)
|
|
102
|
+
break;
|
|
103
|
+
const excerpt = truncateByTokens(item.excerpt, remaining);
|
|
104
|
+
if (!excerpt)
|
|
105
|
+
break;
|
|
106
|
+
out.push({ ...item, excerpt });
|
|
107
|
+
used += countTokens(excerpt);
|
|
108
|
+
}
|
|
109
|
+
return out;
|
|
110
|
+
}
|
|
111
|
+
export function stableCitationId(url, start) {
|
|
112
|
+
return createHash('sha1').update(`${url}#${start}`).digest('hex').slice(0, 12);
|
|
113
|
+
}
|
|
114
|
+
export function buildEvidenceItem(input) {
|
|
115
|
+
return {
|
|
116
|
+
title: input.title,
|
|
117
|
+
url: input.url,
|
|
118
|
+
section_heading: input.sectionHeading,
|
|
119
|
+
excerpt: input.excerpt,
|
|
120
|
+
score: input.score,
|
|
121
|
+
citation_id: stableCitationId(input.url, input.sourceSpan.start),
|
|
122
|
+
source_span: input.sourceSpan,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
export async function applyEvidenceDefault(input, output, results, query) {
|
|
126
|
+
if (results.length === 0)
|
|
127
|
+
return;
|
|
128
|
+
const includeFullMarkdown = input.include_full_markdown ?? false;
|
|
129
|
+
const citationFormat = input.citation_format ?? 'numbered';
|
|
130
|
+
const maxTokensOut = input.max_tokens_out ?? DEFAULT_MAX_TOKENS_OUT;
|
|
131
|
+
let highlightsResult;
|
|
132
|
+
try {
|
|
133
|
+
highlightsResult = await extractHighlights(query, results, MAX_EVIDENCE_PASSAGES);
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
log.debug('evidence extraction failed', { error: String(err) });
|
|
137
|
+
const msg = 'evidence extraction failed; results returned without highlights';
|
|
138
|
+
output.warning = output.warning ? `${output.warning}; ${msg}` : msg;
|
|
139
|
+
highlightsResult = { highlights: [], citations: [], reranker_used: false };
|
|
140
|
+
}
|
|
141
|
+
const ranked = highlightsResult.highlights
|
|
142
|
+
.slice()
|
|
143
|
+
.sort((a, b) => b.relevance_score - a.relevance_score);
|
|
144
|
+
// When the caller sets max_tokens_out explicitly, evidence shares the budget
|
|
145
|
+
// with citations/results metadata. Reserve room for the structural overhead
|
|
146
|
+
// so the total stringified output stays under the cap.
|
|
147
|
+
// NOTE: this relies on JSON.stringify dropping `undefined` keys, and on
|
|
148
|
+
// applyEvidenceDefault running before any post-evidence mutation that grows
|
|
149
|
+
// the skeleton (e.g. output.warning); reserve overhead first, mutate later.
|
|
150
|
+
let evidenceBudget = maxTokensOut;
|
|
151
|
+
if (input.max_tokens_out !== undefined) {
|
|
152
|
+
const skeleton = { ...output, citations: undefined, evidence: undefined, citations_xml: undefined };
|
|
153
|
+
const skeletonTokens = countTokens(JSON.stringify(skeleton));
|
|
154
|
+
const resultsTokens = countTokens(JSON.stringify(results));
|
|
155
|
+
const overhead = skeletonTokens + resultsTokens;
|
|
156
|
+
evidenceBudget = Math.max(0, maxTokensOut - overhead);
|
|
157
|
+
}
|
|
158
|
+
const evidence = [];
|
|
159
|
+
let usedTokens = 0;
|
|
160
|
+
for (const h of ranked) {
|
|
161
|
+
if (usedTokens >= evidenceBudget)
|
|
162
|
+
break;
|
|
163
|
+
const remaining = evidenceBudget - usedTokens;
|
|
164
|
+
const excerpt = truncateByTokens(h.text, remaining);
|
|
165
|
+
if (!excerpt)
|
|
166
|
+
continue;
|
|
167
|
+
const span = h.source_span ?? { start: 0, end: excerpt.length };
|
|
168
|
+
const item = buildEvidenceItem({
|
|
169
|
+
title: h.source_title,
|
|
170
|
+
url: h.source_url,
|
|
171
|
+
sectionHeading: h.section_heading ?? null,
|
|
172
|
+
excerpt,
|
|
173
|
+
score: h.relevance_score,
|
|
174
|
+
sourceSpan: span,
|
|
175
|
+
});
|
|
176
|
+
evidence.push(item);
|
|
177
|
+
usedTokens += countTokens(excerpt);
|
|
178
|
+
}
|
|
179
|
+
if (evidence.length > 0) {
|
|
180
|
+
output.evidence = evidence;
|
|
181
|
+
}
|
|
182
|
+
const citations = buildCitationsFromEvidence(results, evidence, highlightsResult.citations);
|
|
183
|
+
if (citationFormat === 'numbered' || citationFormat === 'json') {
|
|
184
|
+
if (citations.length > 0)
|
|
185
|
+
output.citations = citations;
|
|
186
|
+
}
|
|
187
|
+
else if (citationFormat === 'anthropic_tags') {
|
|
188
|
+
if (citations.length > 0) {
|
|
189
|
+
output.citations = citations;
|
|
190
|
+
output.citations_xml = renderCitationsXml(citations);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Terminal mutation: applyEvidenceDefault is the last step before return.
|
|
194
|
+
if (!includeFullMarkdown) {
|
|
195
|
+
for (const r of results) {
|
|
196
|
+
if (r.markdown_content !== undefined)
|
|
197
|
+
r.markdown_content = undefined;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else if (input.max_tokens_out !== undefined) {
|
|
201
|
+
// Aggregate cap across all results in score order — sum of markdown_content
|
|
202
|
+
// tokens stays under max_tokens_out; bodies past the budget are dropped.
|
|
203
|
+
applyAggregateMarkdownBudget(results, (r) => (typeof r.markdown_content === 'string' ? r.markdown_content : ''), (r, body) => { r.markdown_content = body; }, { maxTokensOut: input.max_tokens_out });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
export function buildCitationsFromEvidence(results, evidence, baseCitations) {
|
|
207
|
+
// Pick the primary citation_id per source: the first evidence item for that URL
|
|
208
|
+
// (highest score after sort). Sources whose evidence was budget-cut have no
|
|
209
|
+
// citation_id — consumers can interpret missing id as "source-level citation,
|
|
210
|
+
// no specific passage."
|
|
211
|
+
const primaryByUrl = new Map();
|
|
212
|
+
for (const ev of evidence) {
|
|
213
|
+
if (!primaryByUrl.has(ev.url))
|
|
214
|
+
primaryByUrl.set(ev.url, ev.citation_id);
|
|
215
|
+
}
|
|
216
|
+
const baseByUrl = new Map();
|
|
217
|
+
for (const c of baseCitations)
|
|
218
|
+
baseByUrl.set(c.url, c);
|
|
219
|
+
const out = [];
|
|
220
|
+
for (let i = 0; i < results.length; i++) {
|
|
221
|
+
const r = results[i];
|
|
222
|
+
const base = baseByUrl.get(r.url);
|
|
223
|
+
const citation = base
|
|
224
|
+
? { ...base }
|
|
225
|
+
: {
|
|
226
|
+
index: i + 1,
|
|
227
|
+
url: r.url,
|
|
228
|
+
title: r.title,
|
|
229
|
+
snippet: r.snippet ?? '',
|
|
230
|
+
};
|
|
231
|
+
const primary = primaryByUrl.get(r.url);
|
|
232
|
+
if (primary !== undefined) {
|
|
233
|
+
citation.citation_id = primary;
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
// No surviving evidence passage for this source — leave citation_id absent.
|
|
237
|
+
delete citation.citation_id;
|
|
238
|
+
}
|
|
239
|
+
out.push(citation);
|
|
240
|
+
}
|
|
241
|
+
return out;
|
|
242
|
+
}
|
|
243
|
+
export function renderCitationsXml(citations) {
|
|
244
|
+
return citations
|
|
245
|
+
.map((c) => {
|
|
246
|
+
const id = c.citation_id ?? stableCitationId(c.url, 0);
|
|
247
|
+
const inner = escapeXml(`${c.title}\n${c.url}\n${c.snippet}`);
|
|
248
|
+
return `<source id="${id}">${inner}</source>`;
|
|
249
|
+
})
|
|
250
|
+
.join('\n');
|
|
251
|
+
}
|
|
252
|
+
export function escapeXml(s) {
|
|
253
|
+
return s
|
|
254
|
+
.replace(/&/g, '&')
|
|
255
|
+
.replace(/</g, '<')
|
|
256
|
+
.replace(/>/g, '>')
|
|
257
|
+
.replace(/"/g, '"')
|
|
258
|
+
.replace(/'/g, ''');
|
|
259
|
+
}
|
|
260
|
+
//# sourceMappingURL=evidence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evidence.js","sourceRoot":"","sources":["../../src/search/evidence.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAUzC,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACpD,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,sBAAsB,GAAG,IAAI,CAAC;AACpC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AACjC,MAAM,iBAAiB,GAAG,yBAAyB,CAAC;AAOpD,6EAA6E;AAC7E,0EAA0E;AAC1E,6EAA6E;AAC7E,uDAAuD;AACvD,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,KAAa,EACb,KAAa,EACb,GAAW,EACX,QAAgB,EAChB,OAA6B,EAAE;IAE/B,IAAI,CAAC,QAAQ;QAAE,OAAO,EAAE,CAAC;IACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;IACpC,MAAM,SAAS,GAAuB,CAAC;YACrC,KAAK;YACL,GAAG;YACH,OAAO,EAAE,EAAE;YACX,gBAAgB,EAAE,QAAQ;YAC1B,eAAe,EAAE,CAAC;SACnB,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,qDAAqD,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACzF,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU;SAC7B,KAAK,EAAE;SACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC;SACrD,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAEtB,MAAM,GAAG,GAAmB,EAAE,CAAC;IAC/B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC;IACjC,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,IAAI,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC;QACrB,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,MAAM,GAAG,IAAI,CAAC;YAChC,IAAI,SAAS,IAAI,CAAC;gBAAE,MAAM;YAC1B,OAAO,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC9C,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,KAAK,iBAAiB;gBAAE,MAAM;QAC9D,CAAC;QACD,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC;QAChE,GAAG,CAAC,IAAI,CAAC,iBAAiB,CAAC;YACzB,KAAK,EAAE,CAAC,CAAC,YAAY,IAAI,KAAK;YAC9B,GAAG,EAAE,CAAC,CAAC,UAAU,IAAI,GAAG;YACxB,cAAc,EAAE,CAAC,CAAC,eAAe,IAAI,IAAI;YACzC,OAAO;YACP,KAAK,EAAE,CAAC,CAAC,eAAe;YACxB,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC,CAAC;QACJ,IAAI,MAAM,KAAK,SAAS;YAAE,IAAI,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,4EAA4E;AAC5E,sEAAsE;AACtE,4EAA4E;AAC5E,uEAAuE;AACvE,MAAM,UAAU,4BAA4B,CAC1C,KAAU,EACV,OAA4B,EAC5B,OAAwC,EACxC,IAAkD;IAElD,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC;IACjC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,MAAM,GAAG,IAAI,CAAC;YAChC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;gBACnB,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBAClB,SAAS;YACX,CAAC;YACD,MAAM,OAAO,GAAG,iBAAiB,CAAC,IAAI,EAAE,EAAE,YAAY,EAAE,SAAS,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC9F,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACvB,IAAI,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,iBAAiB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YACrE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;AACH,CAAC;AAED,2EAA2E;AAC3E,2EAA2E;AAC3E,+BAA+B;AAC/B,MAAM,UAAU,gBAAgB,CAAC,KAAqB,EAAE,YAAoB;IAC1E,IAAI,YAAY,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,MAAM,GAAG,GAAmB,EAAE,CAAC;IAC/B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,YAAY,GAAG,IAAI,CAAC;QACtC,IAAI,SAAS,IAAI,CAAC;YAAE,MAAM;QAC1B,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1D,IAAI,CAAC,OAAO;YAAE,MAAM;QACpB,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QAC/B,IAAI,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,KAAa;IACzD,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACjF,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,KAOjC;IACC,OAAO;QACL,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,eAAe,EAAE,KAAK,CAAC,cAAc;QACrC,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,WAAW,EAAE,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC;QAChE,WAAW,EAAE,KAAK,CAAC,UAAU;KAC9B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,KAAkB,EAClB,MAAoB,EACpB,OAA2B,EAC3B,KAAa;IAEb,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAEjC,MAAM,mBAAmB,GAAG,KAAK,CAAC,qBAAqB,IAAI,KAAK,CAAC;IACjE,MAAM,cAAc,GAAmB,KAAK,CAAC,eAAe,IAAI,UAAU,CAAC;IAC3E,MAAM,YAAY,GAAG,KAAK,CAAC,cAAc,IAAI,sBAAsB,CAAC;IAEpE,IAAI,gBAAgB,CAAC;IACrB,IAAI,CAAC;QACH,gBAAgB,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,qBAAqB,CAAC,CAAC;IACpF,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,MAAM,GAAG,GAAG,iEAAiE,CAAC;QAC9E,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,OAAO,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACpE,gBAAgB,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC;IAC7E,CAAC;IAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,UAAU;SACvC,KAAK,EAAE;SACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC;IAEzD,6EAA6E;IAC7E,4EAA4E;IAC5E,uDAAuD;IACvD,wEAAwE;IACxE,4EAA4E;IAC5E,4EAA4E;IAC5E,IAAI,cAAc,GAAG,YAAY,CAAC;IAClC,IAAI,KAAK,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAiB,EAAE,GAAG,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,EAAE,SAAS,EAAE,CAAC;QAClH,MAAM,cAAc,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC7D,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,cAAc,GAAG,aAAa,CAAC;QAChD,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,GAAG,QAAQ,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,IAAI,UAAU,IAAI,cAAc;YAAE,MAAM;QACxC,MAAM,SAAS,GAAG,cAAc,GAAG,UAAU,CAAC;QAC9C,MAAM,OAAO,GAAG,gBAAgB,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC;QAChE,MAAM,IAAI,GAAG,iBAAiB,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,YAAY;YACrB,GAAG,EAAE,CAAC,CAAC,UAAU;YACjB,cAAc,EAAE,CAAC,CAAC,eAAe,IAAI,IAAI;YACzC,OAAO;YACP,KAAK,EAAE,CAAC,CAAC,eAAe;YACxB,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,UAAU,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;IAED,MAAM,SAAS,GAAG,0BAA0B,CAAC,OAAO,EAAE,QAAQ,EAAE,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAE5F,IAAI,cAAc,KAAK,UAAU,IAAI,cAAc,KAAK,MAAM,EAAE,CAAC;QAC/D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;IACzD,CAAC;SAAM,IAAI,cAAc,KAAK,gBAAgB,EAAE,CAAC;QAC/C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAC7B,MAAM,CAAC,aAAa,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,CAAC,gBAAgB,KAAK,SAAS;gBAAE,CAAC,CAAC,gBAAgB,GAAG,SAAS,CAAC;QACvE,CAAC;IACH,CAAC;SAAM,IAAI,KAAK,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;QAC9C,4EAA4E;QAC5E,yEAAyE;QACzE,4BAA4B,CAC1B,OAAO,EACP,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,gBAAgB,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,CAAC,EACzE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC,CAAC,EAC3C,EAAE,YAAY,EAAE,KAAK,CAAC,cAAc,EAAE,CACvC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,OAA2B,EAC3B,QAAwB,EACxB,aAAyB;IAEzB,gFAAgF;IAChF,4EAA4E;IAC5E,8EAA8E;IAC9E,wBAAwB;IACxB,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC/C,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC;YAAE,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC;IAC1E,CAAC;IACD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC9C,KAAK,MAAM,CAAC,IAAI,aAAa;QAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IAEvD,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,QAAQ,GAAa,IAAI;YAC7B,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE;YACb,CAAC,CAAC;gBACE,KAAK,EAAE,CAAC,GAAG,CAAC;gBACZ,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;aACzB,CAAC;QACN,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACxC,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,QAAQ,CAAC,WAAW,GAAG,OAAO,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,4EAA4E;YAC5E,OAAO,QAAQ,CAAC,WAAW,CAAC;QAC9B,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,SAAqB;IACtD,OAAO,SAAS;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACT,MAAM,EAAE,GAAG,CAAC,CAAC,WAAW,IAAI,gBAAgB,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9D,OAAO,eAAe,EAAE,KAAK,KAAK,WAAW,CAAC;IAChD,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,CAAS;IACjC,OAAO,CAAC;SACL,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -2,9 +2,18 @@ import type { SearchResultItem, Citation, Highlight } from '../types.js';
|
|
|
2
2
|
export interface HighlightSynthesisResult {
|
|
3
3
|
highlights: Highlight[];
|
|
4
4
|
citations: Citation[];
|
|
5
|
-
|
|
5
|
+
reranker_used: boolean;
|
|
6
6
|
}
|
|
7
|
-
export
|
|
7
|
+
export interface Passage {
|
|
8
|
+
text: string;
|
|
9
|
+
charStart: number;
|
|
10
|
+
charEnd: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function splitIntoPassages(markdown: string): Passage[];
|
|
13
|
+
export interface AnnotatedPassage extends Passage {
|
|
14
|
+
sectionHeading: string | null;
|
|
15
|
+
}
|
|
16
|
+
export declare function mapPassageHeadings(markdown: string, passages: Passage[]): AnnotatedPassage[];
|
|
8
17
|
export declare function extractHighlights(query: string, results: SearchResultItem[], maxHighlights?: number): Promise<HighlightSynthesisResult>;
|
|
9
18
|
export declare function fallbackHighlights(results: SearchResultItem[], maxHighlights: number): Highlight[];
|
|
10
19
|
//# sourceMappingURL=highlights.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"highlights.d.ts","sourceRoot":"","sources":["../../src/search/highlights.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"highlights.d.ts","sourceRoot":"","sources":["../../src/search/highlights.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAYzE,MAAM,WAAW,wBAAwB;IACvC,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAwBD,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,EAAE,CA6B7D;AAOD,MAAM,WAAW,gBAAiB,SAAQ,OAAO;IAC/C,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAKD,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,EAAE,GAClB,gBAAgB,EAAE,CAapB;AAKD,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,gBAAgB,EAAE,EAC3B,aAAa,GAAE,MAA+B,GAC7C,OAAO,CAAC,wBAAwB,CAAC,CAmEnC;AAKD,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,gBAAgB,EAAE,EAC3B,aAAa,EAAE,MAAM,GACpB,SAAS,EAAE,CAkCb"}
|
|
@@ -1,28 +1,89 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { onnxRerank } from './reranker/onnx.js';
|
|
2
|
+
import { getConfig } from '../config.js';
|
|
2
3
|
import { createLogger } from '../logger.js';
|
|
4
|
+
import { parseHeadings, lineStartCharOffsets } from '../extraction/markdown.js';
|
|
3
5
|
const log = createLogger('search');
|
|
4
6
|
const MAX_PASSAGE_LENGTH = 500;
|
|
5
7
|
const MIN_PASSAGE_LENGTH = 50;
|
|
6
8
|
const DEFAULT_MAX_HIGHLIGHTS = 10;
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
function shouldKeep(trimmed) {
|
|
10
|
+
if (trimmed.length < MIN_PASSAGE_LENGTH)
|
|
11
|
+
return false;
|
|
12
|
+
if (trimmed.startsWith('#'))
|
|
13
|
+
return false;
|
|
14
|
+
if (trimmed.startsWith('|'))
|
|
15
|
+
return false;
|
|
16
|
+
if (trimmed.startsWith('```'))
|
|
17
|
+
return false;
|
|
18
|
+
if (trimmed.startsWith('- ') && trimmed.length <= 120)
|
|
19
|
+
return false;
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
// Walk the source markdown block-by-block (separated by blank lines) tracking
|
|
23
|
+
// char offsets so each surviving passage carries an accurate {charStart,
|
|
24
|
+
// charEnd} range pointing back into the original markdown.
|
|
10
25
|
export function splitIntoPassages(markdown) {
|
|
11
26
|
if (!markdown)
|
|
12
27
|
return [];
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
.
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
28
|
+
const out = [];
|
|
29
|
+
const re = /\n\n+/g;
|
|
30
|
+
let blockStart = 0;
|
|
31
|
+
let m;
|
|
32
|
+
const consider = (rawStart, rawEnd) => {
|
|
33
|
+
// raw block is markdown.slice(rawStart, rawEnd); compute trimmed range.
|
|
34
|
+
const raw = markdown.slice(rawStart, rawEnd);
|
|
35
|
+
if (!raw)
|
|
36
|
+
return;
|
|
37
|
+
let leading = 0;
|
|
38
|
+
while (leading < raw.length && /\s/.test(raw[leading]))
|
|
39
|
+
leading++;
|
|
40
|
+
let trailing = raw.length;
|
|
41
|
+
while (trailing > leading && /\s/.test(raw[trailing - 1]))
|
|
42
|
+
trailing--;
|
|
43
|
+
if (trailing <= leading)
|
|
44
|
+
return;
|
|
45
|
+
const trimmedStart = rawStart + leading;
|
|
46
|
+
const trimmedEnd = rawStart + trailing;
|
|
47
|
+
const trimmed = markdown.slice(trimmedStart, trimmedEnd);
|
|
48
|
+
if (!shouldKeep(trimmed))
|
|
49
|
+
return;
|
|
50
|
+
const text = trimmed.length > MAX_PASSAGE_LENGTH ? trimmed.slice(0, MAX_PASSAGE_LENGTH) : trimmed;
|
|
51
|
+
const charEnd = trimmedStart + text.length;
|
|
52
|
+
out.push({ text, charStart: trimmedStart, charEnd });
|
|
53
|
+
};
|
|
54
|
+
while ((m = re.exec(markdown)) !== null) {
|
|
55
|
+
consider(blockStart, m.index);
|
|
56
|
+
blockStart = m.index + m[0].length;
|
|
57
|
+
}
|
|
58
|
+
consider(blockStart, markdown.length);
|
|
59
|
+
return out;
|
|
22
60
|
}
|
|
23
|
-
//
|
|
24
|
-
|
|
25
|
-
|
|
61
|
+
// Internal helper preserved for callers that only need the text strings.
|
|
62
|
+
function splitIntoPassageStrings(markdown) {
|
|
63
|
+
return splitIntoPassages(markdown).map((p) => p.text);
|
|
64
|
+
}
|
|
65
|
+
// Annotate each passage with the nearest preceding markdown heading. Uses
|
|
66
|
+
// `parseHeadings` and a char-offset prefix sum so the lookup is O(passages
|
|
67
|
+
// * headings) without re-parsing markdown for every passage.
|
|
68
|
+
export function mapPassageHeadings(markdown, passages) {
|
|
69
|
+
const lines = markdown.split('\n');
|
|
70
|
+
const headings = parseHeadings(lines);
|
|
71
|
+
const offsets = lineStartCharOffsets(lines);
|
|
72
|
+
const headingOffsets = headings.map((h) => ({ text: h.text, charStart: offsets[h.lineIndex] }));
|
|
73
|
+
return passages.map((p) => {
|
|
74
|
+
let nearest = null;
|
|
75
|
+
for (const h of headingOffsets) {
|
|
76
|
+
if (h.charStart <= p.charStart)
|
|
77
|
+
nearest = h.text;
|
|
78
|
+
else
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
return { ...p, sectionHeading: nearest };
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
// Score passages across all results and return the top N using the in-process
|
|
85
|
+
// ONNX reranker, with a graceful first-paragraph fallback when reranking is
|
|
86
|
+
// disabled or fails. Each Highlight carries a source_index suitable for citing.
|
|
26
87
|
export async function extractHighlights(query, results, maxHighlights = DEFAULT_MAX_HIGHLIGHTS) {
|
|
27
88
|
const citations = [];
|
|
28
89
|
const candidates = [];
|
|
@@ -36,66 +97,88 @@ export async function extractHighlights(query, results, maxHighlights = DEFAULT_
|
|
|
36
97
|
});
|
|
37
98
|
const source = r.markdown_content ?? r.snippet ?? '';
|
|
38
99
|
const passages = splitIntoPassages(source);
|
|
39
|
-
|
|
100
|
+
const annotated = mapPassageHeadings(source, passages);
|
|
101
|
+
for (const p of annotated) {
|
|
40
102
|
candidates.push({
|
|
41
|
-
text,
|
|
103
|
+
text: p.text,
|
|
42
104
|
sourceIndex: i + 1,
|
|
43
105
|
sourceUrl: r.url,
|
|
44
106
|
sourceTitle: r.title,
|
|
107
|
+
charStart: p.charStart,
|
|
108
|
+
charEnd: p.charEnd,
|
|
109
|
+
sectionHeading: p.sectionHeading,
|
|
45
110
|
});
|
|
46
111
|
}
|
|
47
112
|
}
|
|
48
113
|
if (candidates.length === 0) {
|
|
49
|
-
// No passages survived the min-length filter (common with snippets-only
|
|
50
|
-
// results). Fall back to snippet-level highlights so host LLMs still get
|
|
51
|
-
// structured evidence rather than an empty array.
|
|
52
114
|
return {
|
|
53
115
|
highlights: fallbackHighlights(results, maxHighlights),
|
|
54
116
|
citations,
|
|
55
|
-
|
|
117
|
+
reranker_used: false,
|
|
56
118
|
};
|
|
57
119
|
}
|
|
58
|
-
const
|
|
59
|
-
if (
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
.slice()
|
|
64
|
-
.
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
120
|
+
const cfg = getConfig();
|
|
121
|
+
if (cfg.reranker === 'onnx') {
|
|
122
|
+
try {
|
|
123
|
+
const scored = await onnxRerank(query, candidates.map((c) => ({ text: c.text })), { modelId: cfg.rerankerModel });
|
|
124
|
+
if (scored.length > 0) {
|
|
125
|
+
const ranked = scored.slice(0, maxHighlights);
|
|
126
|
+
const highlights = ranked.map((s) => {
|
|
127
|
+
const cand = candidates[s.index];
|
|
128
|
+
return {
|
|
129
|
+
text: cand.text,
|
|
130
|
+
source_index: cand.sourceIndex,
|
|
131
|
+
relevance_score: s.score,
|
|
132
|
+
source_url: cand.sourceUrl,
|
|
133
|
+
source_title: cand.sourceTitle,
|
|
134
|
+
section_heading: cand.sectionHeading,
|
|
135
|
+
source_span: { start: cand.charStart, end: cand.charEnd },
|
|
136
|
+
};
|
|
137
|
+
});
|
|
138
|
+
return { highlights, citations, reranker_used: true };
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch (err) {
|
|
142
|
+
log.debug('onnx reranker failed, using fallback passages', { error: String(err) });
|
|
77
143
|
}
|
|
78
|
-
log.debug('flashrank returned null, using fallback passages');
|
|
79
144
|
}
|
|
80
|
-
return { highlights: fallbackHighlights(results, maxHighlights), citations,
|
|
145
|
+
return { highlights: fallbackHighlights(results, maxHighlights), citations, reranker_used: false };
|
|
81
146
|
}
|
|
82
|
-
// Fallback when
|
|
147
|
+
// Fallback when the ONNX reranker is unavailable: take the first substantive paragraph
|
|
83
148
|
// from each source (ordered by engine relevance). Preserves citation indices
|
|
84
149
|
// so host LLMs can still cite [N] correctly.
|
|
85
150
|
export function fallbackHighlights(results, maxHighlights) {
|
|
86
151
|
const out = [];
|
|
87
152
|
for (let i = 0; i < results.length && out.length < maxHighlights; i++) {
|
|
88
153
|
const r = results[i];
|
|
89
|
-
const source = r.markdown_content ??
|
|
90
|
-
const
|
|
91
|
-
if (
|
|
154
|
+
const source = r.markdown_content ?? '';
|
|
155
|
+
const passages = source ? splitIntoPassages(source) : [];
|
|
156
|
+
if (passages.length > 0) {
|
|
157
|
+
const annotated = mapPassageHeadings(source, [passages[0]])[0];
|
|
158
|
+
const text = annotated.text.slice(0, MAX_PASSAGE_LENGTH);
|
|
159
|
+
out.push({
|
|
160
|
+
text,
|
|
161
|
+
source_index: i + 1,
|
|
162
|
+
relevance_score: r.relevance_score,
|
|
163
|
+
source_url: r.url,
|
|
164
|
+
source_title: r.title,
|
|
165
|
+
section_heading: annotated.sectionHeading,
|
|
166
|
+
source_span: { start: annotated.charStart, end: annotated.charStart + text.length },
|
|
167
|
+
});
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
const snippet = r.snippet ?? '';
|
|
171
|
+
if (!snippet)
|
|
92
172
|
continue;
|
|
173
|
+
const text = snippet.slice(0, MAX_PASSAGE_LENGTH);
|
|
93
174
|
out.push({
|
|
94
|
-
text
|
|
175
|
+
text,
|
|
95
176
|
source_index: i + 1,
|
|
96
177
|
relevance_score: r.relevance_score,
|
|
97
178
|
source_url: r.url,
|
|
98
179
|
source_title: r.title,
|
|
180
|
+
section_heading: null,
|
|
181
|
+
source_span: { start: 0, end: text.length },
|
|
99
182
|
});
|
|
100
183
|
}
|
|
101
184
|
return out;
|