@proposit/proposit-core 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/parse.d.ts.map +1 -1
- package/dist/cli/commands/parse.js +27 -29
- package/dist/cli/commands/parse.js.map +1 -1
- package/dist/extensions/chat-completions/errors.d.ts +66 -0
- package/dist/extensions/chat-completions/errors.d.ts.map +1 -0
- package/dist/extensions/chat-completions/errors.js +139 -0
- package/dist/extensions/chat-completions/errors.js.map +1 -0
- package/dist/extensions/chat-completions/http.d.ts +10 -0
- package/dist/extensions/chat-completions/http.d.ts.map +1 -0
- package/dist/extensions/chat-completions/http.js +80 -0
- package/dist/extensions/chat-completions/http.js.map +1 -0
- package/dist/extensions/chat-completions/index.d.ts +7 -0
- package/dist/extensions/chat-completions/index.d.ts.map +1 -0
- package/dist/extensions/chat-completions/index.js +18 -0
- package/dist/extensions/chat-completions/index.js.map +1 -0
- package/dist/extensions/chat-completions/provider.d.ts +5 -0
- package/dist/extensions/chat-completions/provider.d.ts.map +1 -0
- package/dist/extensions/chat-completions/provider.js +192 -0
- package/dist/extensions/chat-completions/provider.js.map +1 -0
- package/dist/extensions/chat-completions/structured-output.d.ts +18 -0
- package/dist/extensions/chat-completions/structured-output.d.ts.map +1 -0
- package/dist/extensions/{ollama → chat-completions}/structured-output.js +14 -10
- package/dist/extensions/chat-completions/structured-output.js.map +1 -0
- package/dist/extensions/chat-completions/types.d.ts +65 -0
- package/dist/extensions/chat-completions/types.d.ts.map +1 -0
- package/dist/extensions/chat-completions/types.js +19 -0
- package/dist/extensions/chat-completions/types.js.map +1 -0
- package/dist/extensions/openai/errors.js +1 -1
- package/dist/extensions/openai/errors.js.map +1 -1
- package/dist/extensions/openai/provider.d.ts +2 -2
- package/dist/extensions/{argument-ingestion/shared → pipelines/base}/basics-extension.d.ts +1 -1
- package/dist/extensions/pipelines/base/basics-extension.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion/shared → pipelines/base}/basics-extension.js +5 -4
- package/dist/extensions/pipelines/base/basics-extension.js.map +1 -0
- package/dist/extensions/pipelines/base/finalize-response-v2.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion/shared → pipelines/base}/finalize-response-v2.js +1 -1
- package/dist/extensions/pipelines/base/finalize-response-v2.js.map +1 -0
- package/dist/extensions/pipelines/base/index.d.ts +9 -0
- package/dist/extensions/pipelines/base/index.d.ts.map +1 -0
- package/dist/extensions/pipelines/base/index.js +17 -0
- package/dist/extensions/pipelines/base/index.js.map +1 -0
- package/dist/extensions/pipelines/base/resolve-llm-stage-options.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion/shared → pipelines/base}/resolve-llm-stage-options.js +2 -2
- package/dist/extensions/pipelines/base/resolve-llm-stage-options.js.map +1 -0
- package/dist/extensions/pipelines/base/role-derivation.d.ts.map +1 -0
- package/dist/extensions/pipelines/base/role-derivation.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/axiom-indicator-detection.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/axiom-indicator-detection.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/axiom-indicator-detection.js +1 -1
- package/dist/extensions/pipelines/base/stages/axiom-indicator-detection.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/citation-source-detection.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/citation-source-detection.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/citation-source-detection.js +1 -1
- package/dist/extensions/pipelines/base/stages/citation-source-detection.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-canonicalization.d.ts +20 -3
- package/dist/extensions/pipelines/base/stages/claim-canonicalization.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-canonicalization.js +20 -6
- package/dist/extensions/pipelines/base/stages/claim-canonicalization.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-mention-extraction.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/claim-mention-extraction.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-mention-extraction.js +1 -1
- package/dist/extensions/pipelines/base/stages/claim-mention-extraction.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-reference-validation.d.ts +1 -1
- package/dist/extensions/pipelines/base/stages/claim-reference-validation.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-reference-validation.js +1 -1
- package/dist/extensions/pipelines/base/stages/claim-reference-validation.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-type-classification.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/claim-type-classification.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/claim-type-classification.js +2 -2
- package/dist/extensions/pipelines/base/stages/claim-type-classification.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/conclusion-selection.d.ts +21 -3
- package/dist/extensions/pipelines/base/stages/conclusion-selection.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/conclusion-selection.js +8 -2
- package/dist/extensions/pipelines/base/stages/conclusion-selection.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/formula-compilation.d.ts +1 -1
- package/dist/extensions/pipelines/base/stages/formula-compilation.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/formula-compilation.js +1 -1
- package/dist/extensions/pipelines/base/stages/formula-compilation.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/formula-validation.d.ts +1 -1
- package/dist/extensions/pipelines/base/stages/formula-validation.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/formula-validation.js +2 -2
- package/dist/extensions/pipelines/base/stages/formula-validation.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/index.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/index.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/index.js +5 -3
- package/dist/extensions/pipelines/base/stages/index.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/relation-extraction.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/relation-extraction.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/relation-extraction.js +1 -1
- package/dist/extensions/pipelines/base/stages/relation-extraction.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/schemas.d.ts +2 -0
- package/dist/extensions/pipelines/base/stages/schemas.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/schemas.js +8 -0
- package/dist/extensions/pipelines/base/stages/schemas.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/segmentation.d.ts +2 -2
- package/dist/extensions/pipelines/base/stages/segmentation.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/segmentation.js +2 -2
- package/dist/extensions/pipelines/base/stages/segmentation.js.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/variable-assignment.d.ts +1 -1
- package/dist/extensions/pipelines/base/stages/variable-assignment.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion → pipelines/base}/stages/variable-assignment.js +1 -1
- package/dist/extensions/pipelines/base/stages/variable-assignment.js.map +1 -0
- package/dist/extensions/{argument-ingestion/shared → pipelines/base}/types.d.ts +6 -5
- package/dist/extensions/pipelines/base/types.d.ts.map +1 -0
- package/dist/extensions/pipelines/base/types.js +15 -0
- package/dist/extensions/pipelines/base/types.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/index.d.ts +5 -0
- package/dist/extensions/pipelines/ingestion/index.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/index.js +5 -0
- package/dist/extensions/pipelines/ingestion/index.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scholar/index.d.ts +3 -0
- package/dist/extensions/pipelines/ingestion/scholar/index.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scholar/index.js +3 -0
- package/dist/extensions/pipelines/ingestion/scholar/index.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scholar/scholar.d.ts +48 -0
- package/dist/extensions/pipelines/ingestion/scholar/scholar.d.ts.map +1 -0
- package/dist/extensions/{argument-ingestion/v2-multi-stage.js → pipelines/ingestion/scholar/scholar.js} +25 -18
- package/dist/extensions/pipelines/ingestion/scholar/scholar.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/extract-stage.d.ts +33 -0
- package/dist/extensions/pipelines/ingestion/scribe/extract-stage.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/extract-stage.js +103 -0
- package/dist/extensions/pipelines/ingestion/scribe/extract-stage.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/index.d.ts +3 -0
- package/dist/extensions/pipelines/ingestion/scribe/index.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/index.js +3 -0
- package/dist/extensions/pipelines/ingestion/scribe/index.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/schemas.d.ts +17 -0
- package/dist/extensions/pipelines/ingestion/scribe/schemas.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/schemas.js +26 -0
- package/dist/extensions/pipelines/ingestion/scribe/schemas.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/scribe.d.ts +20 -0
- package/dist/extensions/pipelines/ingestion/scribe/scribe.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/scribe.js +81 -0
- package/dist/extensions/pipelines/ingestion/scribe/scribe.js.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/structure-stage.d.ts +30 -0
- package/dist/extensions/pipelines/ingestion/scribe/structure-stage.d.ts.map +1 -0
- package/dist/extensions/pipelines/ingestion/scribe/structure-stage.js +142 -0
- package/dist/extensions/pipelines/ingestion/scribe/structure-stage.js.map +1 -0
- package/dist/lib/index.d.ts +0 -2
- package/dist/lib/index.d.ts.map +1 -1
- package/dist/lib/index.js +7 -5
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/pipelines/debug-log.js +1 -1
- package/dist/lib/pipelines/debug-log.js.map +1 -1
- package/dist/lib/pipelines/stage-helpers.d.ts.map +1 -1
- package/dist/lib/pipelines/stage-helpers.js +37 -1
- package/dist/lib/pipelines/stage-helpers.js.map +1 -1
- package/dist/lib/pipelines/types.d.ts +2 -2
- package/package.json +16 -16
- package/dist/extensions/argument-ingestion/index.d.ts +0 -15
- package/dist/extensions/argument-ingestion/index.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/index.js +0 -17
- package/dist/extensions/argument-ingestion/index.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/basics-extension.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/basics-extension.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/finalize-response-v2.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/finalize-response-v2.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/finalize-response.d.ts +0 -21
- package/dist/extensions/argument-ingestion/shared/finalize-response.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/finalize-response.js +0 -33
- package/dist/extensions/argument-ingestion/shared/finalize-response.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/resolve-llm-stage-options.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/resolve-llm-stage-options.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/role-derivation.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/role-derivation.js.map +0 -1
- package/dist/extensions/argument-ingestion/shared/types.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/shared/types.js +0 -16
- package/dist/extensions/argument-ingestion/shared/types.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/axiom-indicator-detection.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/axiom-indicator-detection.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/citation-source-detection.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/citation-source-detection.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-canonicalization.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-canonicalization.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-mention-extraction.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-mention-extraction.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-reference-validation.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-reference-validation.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-type-classification.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/claim-type-classification.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/conclusion-selection.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/conclusion-selection.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/formula-compilation.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/formula-compilation.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/formula-validation.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/formula-validation.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/index.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/index.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/relation-extraction.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/relation-extraction.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/schemas.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/schemas.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/segmentation.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/segmentation.js.map +0 -1
- package/dist/extensions/argument-ingestion/stages/variable-assignment.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/stages/variable-assignment.js.map +0 -1
- package/dist/extensions/argument-ingestion/v1-single-shot.d.ts +0 -26
- package/dist/extensions/argument-ingestion/v1-single-shot.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/v1-single-shot.js +0 -113
- package/dist/extensions/argument-ingestion/v1-single-shot.js.map +0 -1
- package/dist/extensions/argument-ingestion/v2-multi-stage.d.ts +0 -38
- package/dist/extensions/argument-ingestion/v2-multi-stage.d.ts.map +0 -1
- package/dist/extensions/argument-ingestion/v2-multi-stage.js.map +0 -1
- package/dist/extensions/ollama/errors.d.ts +0 -73
- package/dist/extensions/ollama/errors.d.ts.map +0 -1
- package/dist/extensions/ollama/errors.js +0 -228
- package/dist/extensions/ollama/errors.js.map +0 -1
- package/dist/extensions/ollama/index.d.ts +0 -6
- package/dist/extensions/ollama/index.d.ts.map +0 -1
- package/dist/extensions/ollama/index.js +0 -17
- package/dist/extensions/ollama/index.js.map +0 -1
- package/dist/extensions/ollama/provider.d.ts +0 -22
- package/dist/extensions/ollama/provider.d.ts.map +0 -1
- package/dist/extensions/ollama/provider.js +0 -417
- package/dist/extensions/ollama/provider.js.map +0 -1
- package/dist/extensions/ollama/structured-output.d.ts +0 -18
- package/dist/extensions/ollama/structured-output.d.ts.map +0 -1
- package/dist/extensions/ollama/structured-output.js.map +0 -1
- package/dist/extensions/ollama/timeout-fetch.d.ts +0 -24
- package/dist/extensions/ollama/timeout-fetch.d.ts.map +0 -1
- package/dist/extensions/ollama/timeout-fetch.js +0 -76
- package/dist/extensions/ollama/timeout-fetch.js.map +0 -1
- package/dist/extensions/ollama/types.d.ts +0 -219
- package/dist/extensions/ollama/types.d.ts.map +0 -1
- package/dist/extensions/ollama/types.js +0 -7
- package/dist/extensions/ollama/types.js.map +0 -1
- /package/dist/extensions/{argument-ingestion/shared → pipelines/base}/finalize-response-v2.d.ts +0 -0
- /package/dist/extensions/{argument-ingestion/shared → pipelines/base}/resolve-llm-stage-options.d.ts +0 -0
- /package/dist/extensions/{argument-ingestion/shared → pipelines/base}/role-derivation.d.ts +0 -0
- /package/dist/extensions/{argument-ingestion/shared → pipelines/base}/role-derivation.js +0 -0
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
// stage prevents the larger downstream stages from burning tokens on
|
|
11
11
|
// the same job.
|
|
12
12
|
import { STAGE_IDS, SegmentationOutputSchema, } from "./schemas.js";
|
|
13
|
-
import { llmStage } from "
|
|
13
|
+
import { llmStage } from "../../../../lib/pipelines/stage-helpers.js";
|
|
14
14
|
export const SEGMENTATION_MODEL = "gpt-5.4-mini";
|
|
15
15
|
// **Output cap (v1.3.1 fix for the segmentation-truncation
|
|
16
16
|
// regression).** Segmentation emits an array of `{ segmentId, text,
|
|
@@ -28,7 +28,7 @@ export const SEGMENTATION_MODEL = "gpt-5.4-mini";
|
|
|
28
28
|
// burning tokens on an over-spec'd cap.
|
|
29
29
|
//
|
|
30
30
|
// Callers that ingest larger inputs can raise the cap further via
|
|
31
|
-
// `
|
|
31
|
+
// `createScholarPipeline(extension, { llm: { overrides: {
|
|
32
32
|
// segmentation: { maxOutputTokens: N } } } })` — see
|
|
33
33
|
// `shared/types.ts`.
|
|
34
34
|
export const SEGMENTATION_MAX_OUTPUT_TOKENS = 8192;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"segmentation.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/base/stages/segmentation.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,kEAAkE;AAClE,mEAAmE;AACnE,kCAAkC;AAClC,EAAE;AACF,+DAA+D;AAC/D,mEAAmE;AACnE,qEAAqE;AACrE,qEAAqE;AACrE,qEAAqE;AACrE,gBAAgB;AAEhB,OAAO,EACH,SAAS,EACT,wBAAwB,GAE3B,MAAM,cAAc,CAAA;AACrB,OAAO,EAAE,QAAQ,EAAE,MAAM,4CAA4C,CAAA;AAIrE,MAAM,CAAC,MAAM,kBAAkB,GAAG,cAAc,CAAA;AAEhD,2DAA2D;AAC3D,oEAAoE;AACpE,kEAAkE;AAClE,0DAA0D;AAC1D,oEAAoE;AACpE,+DAA+D;AAC/D,sDAAsD;AACtD,iEAAiE;AACjE,kEAAkE;AAClE,+DAA+D;AAC/D,+DAA+D;AAC/D,qEAAqE;AACrE,oEAAoE;AACpE,wCAAwC;AACxC,EAAE;AACF,kEAAkE;AAClE,0DAA0D;AAC1D,qDAAqD;AACrD,qBAAqB;AACrB,MAAM,CAAC,MAAM,8BAA8B,GAAG,IAAI,CAAA;AAElD,MAAM,CAAC,MAAM,0BAA0B,GAAG;;;;;;;;6KAQmI,CAAA;AAE7K,SAAS,uBAAuB,CAAC,GAAkB;IAI/C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAwB,CAAA;IAC1C,MAAM,YAAY,GAAG,kBAAkB,SAAS,CAAC,YAAY,SAAS,0BAA0B,EAAE,CAAA;IAClG,MAAM,IAAI,GAAG,kBAAkB,KAAK,CAAC,IAAI,EAAE,CAAA;IAC3C,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,CAAA;AACzC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAA6B;IACjE,KAAK,EAAE,kBAAkB;IACzB,eAAe,EAAE,8BAA8B;CAClD,CAAA;AAED;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB,CACnC,OAAkC;IAElC,OAAO,QAAQ,CAAsB;QACjC,EAAE,EAAE,SAAS,CAAC,YAAY;QAC1B,SAAS,EAAE,EAAE;QACb,YAAY,EAAE,wBAAwB;QACtC,KAAK,EAAE,OAAO,EAAE,KAAK,IAAI,kBAAkB;QAC3C,eAAe,EACX,OAAO,EAAE,eAAe,IAAI,8BAA8B;QAC9D,eAAe,EAAE,OAAO,EAAE,eAAe;QACzC,KAAK,EAAE,OAAO,EAAE,KAAK;QACrB,WAAW,EAAE,uBAAuB;KACvC,CAAC,CAAA;AACN,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAC1B,uBAAuB,EAAE,CAAA"}
|
package/dist/extensions/{argument-ingestion → pipelines/base}/stages/variable-assignment.d.ts
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { type TClaimCanonicalizationOutput, type TVariableAssignmentOutput } from "./schemas.js";
|
|
2
|
-
import type { TStage } from "
|
|
2
|
+
import type { TStage } from "../../../../lib/pipelines/types.js";
|
|
3
3
|
export declare function isValidVariableSymbol(symbol: string): boolean;
|
|
4
4
|
export type TAssignVariablesInput = {
|
|
5
5
|
canonicalClaims: TClaimCanonicalizationOutput["canonicalClaims"];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"variable-assignment.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/base/stages/variable-assignment.ts"],"names":[],"mappings":"AAwBA,OAAO,EAIH,KAAK,4BAA4B,EACjC,KAAK,yBAAyB,EACjC,MAAM,cAAc,CAAA;AAErB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAA;AAIhE,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAI7D;AAED,MAAM,MAAM,qBAAqB,GAAG;IAChC,eAAe,EAAE,4BAA4B,CAAC,iBAAiB,CAAC,CAAA;IAChE,UAAU,EAAE,MAAM,MAAM,CAAA;CAC3B,CAAA;AAED;;;;GAIG;AACH,wBAAgB,eAAe,CAC3B,KAAK,EAAE,qBAAqB,GAC7B,yBAAyB,CA0B3B;AAED,eAAO,MAAM,uBAAuB,EAAE,MAAM,CAAC,yBAAyB,CAoBhE,CAAA"}
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
// Errors / failures: this stage never throws and emits no
|
|
23
23
|
// `ProcessingFailure`s. Empty input simply yields an empty array.
|
|
24
24
|
import { STAGE_IDS, VariableAssignmentOutputSchema, } from "./schemas.js";
|
|
25
|
-
import { deterministicStage } from "
|
|
25
|
+
import { deterministicStage } from "../../../../lib/pipelines/stage-helpers.js";
|
|
26
26
|
const SYMBOL_REGEX = /^[a-zA-Z_][a-zA-Z0-9_]{0,31}$/;
|
|
27
27
|
export function isValidVariableSymbol(symbol) {
|
|
28
28
|
if (symbol.length === 0)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"variable-assignment.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/base/stages/variable-assignment.ts"],"names":[],"mappings":"AAAA,6DAA6D;AAC7D,uDAAuD;AACvD,EAAE;AACF,8BAA8B;AAC9B,0EAA0E;AAC1E,eAAe;AACf,mEAAmE;AACnE,qEAAqE;AACrE,sEAAsE;AACtE,kCAAkC;AAClC,wEAAwE;AACxE,uEAAuE;AACvE,uEAAuE;AACvE,iEAAiE;AACjE,wEAAwE;AACxE,oEAAoE;AACpE,EAAE;AACF,uEAAuE;AACvE,+DAA+D;AAC/D,iCAAiC;AACjC,EAAE;AACF,0DAA0D;AAC1D,kEAAkE;AAElE,OAAO,EACH,SAAS,EACT,8BAA8B,GAIjC,MAAM,cAAc,CAAA;AACrB,OAAO,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAA;AAG/E,MAAM,YAAY,GAAG,+BAA+B,CAAA;AAEpD,MAAM,UAAU,qBAAqB,CAAC,MAAc;IAChD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACrC,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,KAAK,CAAA;IACpC,OAAO,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AACpC,CAAC;AAOD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC3B,KAA4B;IAE5B,MAAM,GAAG,GAAwB,EAAE,CAAA;IACnC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAA;IACrC,IAAI,eAAe,GAAG,CAAC,CAAA;IAEvB,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QACxC,IAAI,MAAc,CAAA;QAClB,MAAM,SAAS,GAAG,KAAK,CAAC,eAAe,CAAA;QACvC,IAAI,qBAAqB,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAClE,MAAM,GAAG,SAAS,CAAA;QACtB,CAAC;aAAM,CAAC;YACJ,mDAAmD;YACnD,OAAO,WAAW,CAAC,GAAG,CAAC,IAAI,eAAe,EAAE,CAAC,EAAE,CAAC;gBAC5C,eAAe,IAAI,CAAC,CAAA;YACxB,CAAC;YACD,MAAM,GAAG,IAAI,eAAe,EAAE,CAAA;YAC9B,eAAe,IAAI,CAAC,CAAA;QACxB,CAAC;QACD,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;QACvB,GAAG,CAAC,IAAI,CAAC;YACL,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE;YAC1B,MAAM;YACN,WAAW,EAAE,KAAK,CAAC,MAAM;SAC5B,CAAC,CAAA;IACN,CAAC;IACD,OAAO,GAAG,CAAA;AACd,CAAC;AAED,MAAM,CAAC,MAAM,uBAAuB,GAChC,kBAAkB,CAA4B;IAC1C,EAAE,EAAE,SAAS,CAAC,kBAAkB;IAChC,SAAS,EAAE,CAAC,SAAS,CAAC,qBAAqB,CAAC;IAC5C,YAAY,EAAE,8BAA8B;IAC5C,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE;QACR,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CACjB,SAAS,CAAC,qBAAqB,CAClC,CAAA;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;YACT,qDAAqD;YACrD,mDAAmD;YACnD,WAAW;YACX,OAAO,EAAE,CAAA;QACb,CAAC;QACD,OAAO,eAAe,CAAC;YACnB,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,UAAU,EAAE,GAAG,CAAC,UAAU;SAC7B,CAAC,CAAA;IACN,CAAC;CACJ,CAAC,CAAA"}
|
|
@@ -46,13 +46,14 @@ export type TIngestionInput = {
|
|
|
46
46
|
* `maxOutputTokens` (the output-budget cap; not setting one means the
|
|
47
47
|
* model's default applies, which is what caused the v1.3.0 segmentation
|
|
48
48
|
* truncation against the Singer fixture), `reasoningEffort` (effort
|
|
49
|
-
* budget for reasoning models — OpenAI-specific; ignored by the
|
|
50
|
-
* provider), and `model` (the provider model
|
|
49
|
+
* budget for reasoning models — OpenAI-specific; ignored by the
|
|
50
|
+
* chat-completions provider), and `model` (the provider model
|
|
51
|
+
* identifier).
|
|
51
52
|
*
|
|
52
53
|
* The `model` knob lets a caller retarget every LLM stage at a
|
|
53
54
|
* different backend without forking the stages — e.g. pointing the
|
|
54
|
-
* whole v2 pipeline at a local
|
|
55
|
-
* (`{ llm: { defaults: { model: "
|
|
55
|
+
* whole v2 pipeline at a local model
|
|
56
|
+
* (`{ llm: { defaults: { model: "local-coder" } } }`) for cost-free
|
|
56
57
|
* local development. Each stage keeps its own hard-coded `gpt-5.x`
|
|
57
58
|
* default when no override is supplied, so production behavior is
|
|
58
59
|
* unchanged.
|
|
@@ -63,7 +64,7 @@ export type TIngestionInput = {
|
|
|
63
64
|
* does NOT merge it — see `resolveLlmStageOptions`). Its primary
|
|
64
65
|
* consumer is the server's "no-auto-retry" toggle, which drops
|
|
65
66
|
* `"transient"` from `retryOn`. Note that dropping `"transient"`
|
|
66
|
-
* disables the retry for ALL transient causes — network
|
|
67
|
+
* disables the retry for ALL transient causes — network
|
|
67
68
|
* timeouts, 5xx, AND `incomplete/max_output_tokens` truncation — not
|
|
68
69
|
* timeouts alone, because every non-Abort transport error and the
|
|
69
70
|
* truncation case both classify as `"transient"`.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/extensions/pipelines/base/types.ts"],"names":[],"mappings":"AAcA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAA;AACjE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAA;AAEnE;;;;;;;;;;GAUG;AACH,MAAM,MAAM,mBAAmB,GAAG;IAC9B,8EAA8E;IAC9E,cAAc,EAAE,OAAO,CAAA;IACvB,uGAAuG;IACvG,WAAW,EAAE,OAAO,CAAA;IACpB,gDAAgD;IAChD,cAAc,EAAE,OAAO,CAAA;IACvB,+CAA+C;IAC/C,aAAa,EAAE,OAAO,CAAA;IACtB,yDAAyD;IACzD,cAAc,EAAE,OAAO,CAAA;CAC1B,CAAA;AAED;;;;;GAKG;AACH,MAAM,MAAM,eAAe,GAAG;IAC1B,IAAI,EAAE,MAAM,CAAA;CACf,CAAA;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,MAAM,MAAM,wBAAwB,GAAG;IACnC,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,eAAe,CAAC,EAAE,gBAAgB,CAAA;IAClC,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,CAAA;CAChC,CAAA;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,MAAM,oBAAoB,GAAG;IAC/B,QAAQ,CAAC,EAAE,wBAAwB,CAAA;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAA;CACvD,CAAA"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// Shared types for the ingestion pipelines.
|
|
2
|
+
//
|
|
3
|
+
// The scholar (thorough, multi-stage) and scribe (fast, two-call)
|
|
4
|
+
// pipelines both produce a `TParsedArgumentResponse`-shaped output that
|
|
5
|
+
// the existing `ArgumentParser.build()` consumes. They share an
|
|
6
|
+
// extension descriptor — `TIngestionExtension` — so callers can plug in
|
|
7
|
+
// custom per-entity field shapes (titles, bodies, URLs, axiom labels, …)
|
|
8
|
+
// without reimplementing the pipelines.
|
|
9
|
+
//
|
|
10
|
+
// The descriptor's per-entity slots (`claimSchema`/`variableSchema`/…)
|
|
11
|
+
// are composed by the stages (e.g. canonicalization builds its
|
|
12
|
+
// per-extension output schema from `claimSchema`); `responseSchema` is
|
|
13
|
+
// the pipeline's advertised output schema.
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/extensions/pipelines/base/types.ts"],"names":[],"mappings":"AAAA,4CAA4C;AAC5C,EAAE;AACF,kEAAkE;AAClE,wEAAwE;AACxE,gEAAgE;AAChE,wEAAwE;AACxE,yEAAyE;AACzE,wCAAwC;AACxC,EAAE;AACF,uEAAuE;AACvE,+DAA+D;AAC/D,uEAAuE;AACvE,2CAA2C"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { createScholarPipeline } from "./scholar/index.js";
|
|
2
|
+
export type { TCreateScholarPipelineOptions } from "./scholar/index.js";
|
|
3
|
+
export { createScribePipeline } from "./scribe/index.js";
|
|
4
|
+
export type { TCreateScribePipelineOptions } from "./scribe/index.js";
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/extensions/pipelines/ingestion/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAA;AAC1D,YAAY,EAAE,6BAA6B,EAAE,MAAM,oBAAoB,CAAA;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAA;AACxD,YAAY,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAA"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
// Barrel for the ingestion pipeline family (scholar + scribe).
|
|
2
|
+
// Subpath: @proposit/proposit-core/pipelines/ingestion
|
|
3
|
+
export { createScholarPipeline } from "./scholar/index.js";
|
|
4
|
+
export { createScribePipeline } from "./scribe/index.js";
|
|
5
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/extensions/pipelines/ingestion/index.ts"],"names":[],"mappings":"AAAA,+DAA+D;AAC/D,uDAAuD;AACvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAA;AAE1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scholar/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAA;AAC5E,YAAY,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scholar/index.ts"],"names":[],"mappings":"AAAA,qEAAqE;AACrE,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAA"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import Type from "typebox";
|
|
2
|
+
import type { TPipeline } from "../../../../lib/pipelines/index.js";
|
|
3
|
+
import type { TParsedArgumentResponse } from "../../../../lib/parsing/index.js";
|
|
4
|
+
import type { TIngestionExtension, TIngestionInput, TIngestionLlmOptions } from "../../base/types.js";
|
|
5
|
+
/**
|
|
6
|
+
* Input schema shared by every ingestion pipeline: a single non-empty
|
|
7
|
+
* raw argument text. Exported so sibling pipelines (e.g. the fast
|
|
8
|
+
* `scribe` pipeline) advertise the identical input contract.
|
|
9
|
+
*/
|
|
10
|
+
export declare const INGESTION_INPUT_SCHEMA: Type.TObject<{
|
|
11
|
+
text: Type.TString;
|
|
12
|
+
}>;
|
|
13
|
+
/**
|
|
14
|
+
* Options for `createScholarPipeline`.
|
|
15
|
+
*
|
|
16
|
+
* `llm.defaults` applies to every LLM stage that doesn't have a
|
|
17
|
+
* per-stage entry under `llm.overrides`; per-stage entries are keyed
|
|
18
|
+
* by stage id (`STAGE_IDS.segmentation`, etc.). The effective knobs
|
|
19
|
+
* compose stage-override > pipeline-default > internal stage default.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```ts
|
|
23
|
+
* createScholarPipeline(basicsExtension, {
|
|
24
|
+
* llm: {
|
|
25
|
+
* defaults: { maxOutputTokens: 16_384 },
|
|
26
|
+
* overrides: {
|
|
27
|
+
* [STAGE_IDS.segmentation]: { maxOutputTokens: 32_768 },
|
|
28
|
+
* },
|
|
29
|
+
* },
|
|
30
|
+
* })
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export type TCreateScholarPipelineOptions = {
|
|
34
|
+
llm?: TIngestionLlmOptions;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Build the scholar (thorough) ingestion pipeline for the supplied
|
|
38
|
+
* extension. Returns a `TPipeline` whose 12-stage DAG segments,
|
|
39
|
+
* extracts, canonicalizes, classifies, relates, and compiles the raw
|
|
40
|
+
* text, and whose `finalize` assembles the `TParsedArgumentResponse`
|
|
41
|
+
* shape `ArgumentParser.build()` consumes.
|
|
42
|
+
*
|
|
43
|
+
* The factory is pure: it constructs stage values + a pipeline
|
|
44
|
+
* descriptor and returns immediately. Stage execution happens inside
|
|
45
|
+
* `executePipeline`.
|
|
46
|
+
*/
|
|
47
|
+
export declare function createScholarPipeline(extension: TIngestionExtension, options?: TCreateScholarPipelineOptions): TPipeline<TIngestionInput, TParsedArgumentResponse>;
|
|
48
|
+
//# sourceMappingURL=scholar.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scholar.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scholar/scholar.ts"],"names":[],"mappings":"AAgCA,OAAO,IAAI,MAAM,SAAS,CAAA;AAyB1B,OAAO,KAAK,EAAE,SAAS,EAAU,MAAM,oCAAoC,CAAA;AAC3E,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,kCAAkC,CAAA;AAG/E,OAAO,KAAK,EACR,mBAAmB,EACnB,eAAe,EACf,oBAAoB,EACvB,MAAM,qBAAqB,CAAA;AAK5B;;;;GAIG;AACH,eAAO,MAAM,sBAAsB;;EAEjC,CAAA;AAEF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,MAAM,6BAA6B,GAAG;IACxC,GAAG,CAAC,EAAE,oBAAoB,CAAA;CAC7B,CAAA;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,qBAAqB,CACjC,SAAS,EAAE,mBAAmB,EAC9B,OAAO,CAAC,EAAE,6BAA6B,GACxC,SAAS,CAAC,eAAe,EAAE,uBAAuB,CAAC,CAkGrD"}
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
//
|
|
1
|
+
// scholar — the thorough, multi-stage ingestion pipeline.
|
|
2
2
|
//
|
|
3
|
-
// Composes the 12 stages defined under
|
|
3
|
+
// Composes the 12 stages defined under `../../base/stages/` into a single
|
|
4
4
|
// `TPipeline<TIngestionInput, TParsedArgumentResponse>` whose `finalize`
|
|
5
|
-
// assembles the
|
|
6
|
-
//
|
|
7
|
-
//
|
|
5
|
+
// assembles the parsed-argument response shape downstream parsing
|
|
6
|
+
// consumes. The sibling `scribe` pipeline emits the same output shape
|
|
7
|
+
// from a cheaper two-call front end, so consumers can swap the two
|
|
8
|
+
// factories without changing downstream parsing.
|
|
8
9
|
//
|
|
9
|
-
// DAG
|
|
10
|
+
// DAG:
|
|
10
11
|
//
|
|
11
12
|
// segmentation
|
|
12
13
|
// ├── claim-mention-extraction ─→ claim-canonicalization ─┐
|
|
@@ -29,26 +30,32 @@
|
|
|
29
30
|
// 4 deterministic stages (claim-reference-validation, variable-assignment,
|
|
30
31
|
// formula-compilation, formula-validation) + 8 LLM stages.
|
|
31
32
|
import Type from "typebox";
|
|
32
|
-
import { STAGE_IDS, claimReferenceValidationStage, createAxiomIndicatorDetectionStage, createCitationSourceDetectionStage, createClaimCanonicalizationStage, createClaimMentionExtractionStage, createClaimTypeClassificationStage, createConclusionSelectionStage, createRelationExtractionStage, createSegmentationStage, formulaCompilationStage, formulaValidationStage, variableAssignmentStage, SEGMENTATION_STAGE_DEFAULTS, CLAIM_MENTION_EXTRACTION_STAGE_DEFAULTS, CITATION_SOURCE_DETECTION_STAGE_DEFAULTS, AXIOM_INDICATOR_DETECTION_STAGE_DEFAULTS, CLAIM_CANONICALIZATION_STAGE_DEFAULTS, CLAIM_TYPE_CLASSIFICATION_STAGE_DEFAULTS, RELATION_EXTRACTION_STAGE_DEFAULTS, CONCLUSION_SELECTION_STAGE_DEFAULTS, } from "
|
|
33
|
-
import { optional } from "
|
|
34
|
-
import { finalizeResponseV2 } from "
|
|
35
|
-
import { resolveLlmStageOptions } from "
|
|
36
|
-
const PIPELINE_ID = "argument-ingestion-
|
|
33
|
+
import { STAGE_IDS, claimReferenceValidationStage, createAxiomIndicatorDetectionStage, createCitationSourceDetectionStage, createClaimCanonicalizationStage, createClaimMentionExtractionStage, createClaimTypeClassificationStage, createConclusionSelectionStage, createRelationExtractionStage, createSegmentationStage, formulaCompilationStage, formulaValidationStage, variableAssignmentStage, SEGMENTATION_STAGE_DEFAULTS, CLAIM_MENTION_EXTRACTION_STAGE_DEFAULTS, CITATION_SOURCE_DETECTION_STAGE_DEFAULTS, AXIOM_INDICATOR_DETECTION_STAGE_DEFAULTS, CLAIM_CANONICALIZATION_STAGE_DEFAULTS, CLAIM_TYPE_CLASSIFICATION_STAGE_DEFAULTS, RELATION_EXTRACTION_STAGE_DEFAULTS, CONCLUSION_SELECTION_STAGE_DEFAULTS, } from "../../base/stages/index.js";
|
|
34
|
+
import { optional } from "../../../../lib/pipelines/index.js";
|
|
35
|
+
import { finalizeResponseV2 } from "../../base/finalize-response-v2.js";
|
|
36
|
+
import { resolveLlmStageOptions } from "../../base/resolve-llm-stage-options.js";
|
|
37
|
+
const PIPELINE_ID = "argument-ingestion-scholar";
|
|
37
38
|
const PIPELINE_VERSION = "1.0.0";
|
|
38
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Input schema shared by every ingestion pipeline: a single non-empty
|
|
41
|
+
* raw argument text. Exported so sibling pipelines (e.g. the fast
|
|
42
|
+
* `scribe` pipeline) advertise the identical input contract.
|
|
43
|
+
*/
|
|
44
|
+
export const INGESTION_INPUT_SCHEMA = Type.Object({
|
|
39
45
|
text: Type.String({ minLength: 1 }),
|
|
40
46
|
});
|
|
41
47
|
/**
|
|
42
|
-
* Build the
|
|
43
|
-
* extension. Returns a `TPipeline` whose
|
|
44
|
-
*
|
|
45
|
-
* `
|
|
48
|
+
* Build the scholar (thorough) ingestion pipeline for the supplied
|
|
49
|
+
* extension. Returns a `TPipeline` whose 12-stage DAG segments,
|
|
50
|
+
* extracts, canonicalizes, classifies, relates, and compiles the raw
|
|
51
|
+
* text, and whose `finalize` assembles the `TParsedArgumentResponse`
|
|
52
|
+
* shape `ArgumentParser.build()` consumes.
|
|
46
53
|
*
|
|
47
54
|
* The factory is pure: it constructs stage values + a pipeline
|
|
48
55
|
* descriptor and returns immediately. Stage execution happens inside
|
|
49
56
|
* `executePipeline`.
|
|
50
57
|
*/
|
|
51
|
-
export function
|
|
58
|
+
export function createScholarPipeline(extension, options) {
|
|
52
59
|
const llm = options?.llm;
|
|
53
60
|
const segmentationStage = createSegmentationStage(resolveLlmStageOptions(STAGE_IDS.segmentation, SEGMENTATION_STAGE_DEFAULTS, llm));
|
|
54
61
|
const claimMentionExtractionStage = createClaimMentionExtractionStage(resolveLlmStageOptions(STAGE_IDS.claimMentionExtraction, CLAIM_MENTION_EXTRACTION_STAGE_DEFAULTS, llm));
|
|
@@ -97,4 +104,4 @@ export function createIngestionV2Pipeline(extension, options) {
|
|
|
97
104
|
},
|
|
98
105
|
};
|
|
99
106
|
}
|
|
100
|
-
//# sourceMappingURL=
|
|
107
|
+
//# sourceMappingURL=scholar.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scholar.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scholar/scholar.ts"],"names":[],"mappings":"AAAA,0DAA0D;AAC1D,EAAE;AACF,0EAA0E;AAC1E,yEAAyE;AACzE,kEAAkE;AAClE,sEAAsE;AACtE,mEAAmE;AACnE,iDAAiD;AACjD,EAAE;AACF,OAAO;AACP,EAAE;AACF,iBAAiB;AACjB,gEAAgE;AAChE,+DAA+D;AAC/D,+DAA+D;AAC/D,+DAA+D;AAC/D,+DAA+D;AAC/D,+DAA+D;AAC/D,+DAA+D;AAC/D,+DAA+D;AAC/D,qEAAqE;AACrE,8DAA8D;AAC9D,sEAAsE;AACtE,8DAA8D;AAC9D,qEAAqE;AACrE,8DAA8D;AAC9D,oEAAoE;AACpE,iEAAiE;AACjE,EAAE;AACF,2EAA2E;AAC3E,2DAA2D;AAE3D,OAAO,IAAI,MAAM,SAAS,CAAA;AAC1B,OAAO,EACH,SAAS,EACT,6BAA6B,EAC7B,kCAAkC,EAClC,kCAAkC,EAClC,gCAAgC,EAChC,iCAAiC,EACjC,kCAAkC,EAClC,8BAA8B,EAC9B,6BAA6B,EAC7B,uBAAuB,EACvB,uBAAuB,EACvB,sBAAsB,EACtB,uBAAuB,EACvB,2BAA2B,EAC3B,uCAAuC,EACvC,wCAAwC,EACxC,wCAAwC,EACxC,qCAAqC,EACrC,wCAAwC,EACxC,kCAAkC,EAClC,mCAAmC,GACtC,MAAM,4BAA4B,CAAA;AACnC,OAAO,EAAE,QAAQ,EAAE,MAAM,oCAAoC,CAAA;AAG7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAA;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,yCAAyC,CAAA;AAOhF,MAAM,WAAW,GAAG,4BAA4B,CAAA;AAChD,MAAM,gBAAgB,GAAG,OAAO,CAAA;AAEhC;;;;GAIG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG,IAAI,CAAC,MAAM,CAAC;IAC9C,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;CACtC,CAAC,CAAA;AA0BF;;;;;;;;;;GAUG;AACH,MAAM,UAAU,qBAAqB,CACjC,SAA8B,EAC9B,OAAuC;IAEvC,MAAM,GAAG,GAAG,OAAO,EAAE,GAAG,CAAA;IACxB,MAAM,iBAAiB,GAAG,uBAAuB,CAC7C,sBAAsB,CAClB,SAAS,CAAC,YAAY,EACtB,2BAA2B,EAC3B,GAAG,CACN,CACJ,CAAA;IACD,MAAM,2BAA2B,GAAG,iCAAiC,CACjE,sBAAsB,CAClB,SAAS,CAAC,sBAAsB,EAChC,uCAAuC,EACvC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,4BAA4B,GAAG,kCAAkC,CACnE,sBAAsB,CAClB,SAAS,CAAC,uBAAuB,EACjC,wCAAwC,EACxC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,4BAA4B,GAAG,kCAAkC,CACnE,sBAAsB,CAClB,SAAS,CAAC,uBAAuB,EACjC,wCAAwC,EACxC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,qBAAqB,GAAG,gCAAgC,CAC1D,SAAS,EACT,sBAAsB,CAClB,SAAS,CAAC,qBAAqB,EAC/B,qCAAqC,EACrC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,4BAA4B,GAAG,kCAAkC,CACnE,sBAAsB,CAClB,SAAS,CAAC,uBAAuB,EACjC,wCAAwC,EACxC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,uBAAuB,GAAG,6BAA6B,CACzD,sBAAsB,CAClB,SAAS,CAAC,kBAAkB,EAC5B,kCAAkC,EAClC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,wBAAwB,GAAG,8BAA8B,CAC3D,sBAAsB,CAClB,SAAS,CAAC,mBAAmB,EAC7B,mCAAmC,EACnC,GAAG,CACN,CACJ,CAAA;IACD,MAAM,MAAM,GAA+B;QACvC,iBAAiB;QACjB,2BAA2B;QAC3B,4BAA4B;QAC5B,4BAA4B;QAC5B,qBAAqB;QACrB,4BAA4B;QAC5B,6BAA6B;QAC7B,uBAAuB;QACvB,uBAAuB;QACvB,wBAAwB;QACxB,uBAAuB;QACvB,sBAAsB;KACK,CAAA;IAE/B,OAAO;QACH,EAAE,EAAE,WAAW;QACf,OAAO,EAAE,gBAAgB;QACzB,WAAW,EAAE,sBAAsB;QACnC,8DAA8D;QAC9D,2DAA2D;QAC3D,4DAA4D;QAC5D,mCAAmC;QACnC,YAAY,EAAE,SAAS,CAAC,cAAc;QACtC,MAAM;QACN,QAAQ,EAAE;YACN,SAAS,EAAE;gBACP,SAAS,CAAC,qBAAqB;gBAC/B,SAAS,CAAC,kBAAkB;gBAC5B,SAAS,CAAC,kBAAkB;gBAC5B,QAAQ,CAAC,SAAS,CAAC,uBAAuB,CAAC;gBAC3C,QAAQ,CAAC,SAAS,CAAC,kBAAkB,CAAC;gBACtC,QAAQ,CAAC,SAAS,CAAC,mBAAmB,CAAC;gBACvC,QAAQ,CAAC,SAAS,CAAC,iBAAiB,CAAC;gBACrC,QAAQ,CAAC,SAAS,CAAC,wBAAwB,CAAC;aAC/C;YACD,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,kBAAkB,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;SACvD;KACJ,CAAA;AACL,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { TStage } from "../../../../lib/pipelines/types.js";
|
|
2
|
+
import { type TClaimCanonicalizationOutput, type TClaimTypeClassificationOutput } from "../../base/stages/index.js";
|
|
3
|
+
import type { TIngestionExtension, TLlmStageOptionsOverride } from "../../base/types.js";
|
|
4
|
+
export declare const EXTRACT_MODEL = "gpt-5.4-mini";
|
|
5
|
+
/** Internal default knobs for scribe's `extract` stage. */
|
|
6
|
+
export declare const EXTRACT_STAGE_DEFAULTS: TLlmStageOptionsOverride;
|
|
7
|
+
export declare const EXTRACT_SYSTEM_PROMPT = "You read a raw argument and emit its canonical claim set in one pass.\n\nFor each distinct proposition the author makes, emit one canonical claim. Two phrasings of the same proposition merge into a single claim.\n\nEach canonical claim carries:\n- `miniId` \u2014 assign in order: c1, c2, c3, ...\n- `mentionIds` \u2014 leave as a single synthetic id per claim (e.g. [\"c1-m\"]); scribe does not track sub-claim mentions.\n- `type` \u2014 \"normal\" (a primary proposition), \"citation\" (content is \"the cited source asserts X\"; populate `url` + `title`), or \"axiomatic\" (invoked as self-evident; populate `axiom`).\n- `suggestedSymbol` \u2014 a short PascalCase-or-snake_case identifier (letters/digits/underscores, starts with a letter or underscore, under 32 chars). Avoid single letters and generic names.\n- the extension fields your output schema requires (title, body, url, axiom \u2014 whichever apply to the claim's type).\n- `mentionToClaim` \u2014 one `{ \"mentionId\": \"...\", \"claimMiniId\": \"...\" }` entry per synthetic mention id you used.\n\nStyle:\n- Third-person, present-tense, active voice.\n- State the proposition itself \u2014 never \"The author claims that ...\". For a citation claim, the title summarizes what the source asserts.\n\nOutput ONLY the schema-shaped object. No prose.";
|
|
8
|
+
/**
|
|
9
|
+
* Build scribe's `extract` LLM stage. Its `outputSchema` is the
|
|
10
|
+
* per-extension canonicalization schema, so the cheap model is asked
|
|
11
|
+
* for the same extension-shaped claim records (title/body/url/axiom)
|
|
12
|
+
* scholar's canonicalizer produces — without which finalize would
|
|
13
|
+
* assemble empty claims.
|
|
14
|
+
*/
|
|
15
|
+
export declare function createExtractStage(extension: TIngestionExtension, options?: TLlmStageOptionsOverride): TStage<TClaimCanonicalizationOutput>;
|
|
16
|
+
/**
|
|
17
|
+
* Adapter — republish `extract`'s canonical claims under the
|
|
18
|
+
* canonicalization slot scholar's deterministic stages + finalize read.
|
|
19
|
+
* `extract` already emits the canonicalization shape, so this is a
|
|
20
|
+
* pass-through (with a defensive empty default). Built per-extension
|
|
21
|
+
* because the canonicalization slot's schema carries the extension's
|
|
22
|
+
* claim fields.
|
|
23
|
+
*/
|
|
24
|
+
export declare function createExtractCanonicalizationAdapterStage(extension: TIngestionExtension): TStage<TClaimCanonicalizationOutput>;
|
|
25
|
+
/**
|
|
26
|
+
* Adapter — derive the classification slot from `extract`'s claim
|
|
27
|
+
* records: each canonical claim already carries its `type`, so the
|
|
28
|
+
* classification entry is `{ miniId, type, sourceString }`.
|
|
29
|
+
* `sourceString` is the claim's `url` when present (citation claims),
|
|
30
|
+
* else null — mirroring what scholar's classification stage records.
|
|
31
|
+
*/
|
|
32
|
+
export declare const extractClassificationAdapterStage: TStage<TClaimTypeClassificationOutput>;
|
|
33
|
+
//# sourceMappingURL=extract-stage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-stage.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/extract-stage.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,MAAM,EAAiB,MAAM,oCAAoC,CAAA;AAC/E,OAAO,EAIH,KAAK,4BAA4B,EACjC,KAAK,8BAA8B,EACtC,MAAM,4BAA4B,CAAA;AACnC,OAAO,KAAK,EACR,mBAAmB,EAEnB,wBAAwB,EAC3B,MAAM,qBAAqB,CAAA;AAE5B,eAAO,MAAM,aAAa,iBAAiB,CAAA;AAE3C,2DAA2D;AAC3D,eAAO,MAAM,sBAAsB,EAAE,wBAEpC,CAAA;AAED,eAAO,MAAM,qBAAqB,wyCAgBc,CAAA;AAYhD;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAC9B,SAAS,EAAE,mBAAmB,EAC9B,OAAO,CAAC,EAAE,wBAAwB,GACnC,MAAM,CAAC,4BAA4B,CAAC,CAWtC;AAED;;;;;;;GAOG;AACH,wBAAgB,yCAAyC,CACrD,SAAS,EAAE,mBAAmB,GAC/B,MAAM,CAAC,4BAA4B,CAAC,CAWtC;AAED;;;;;;GAMG;AACH,eAAO,MAAM,iCAAiC,EAAE,MAAM,CAAC,8BAA8B,CAmB/E,CAAA"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
// scribe stage 1 — `extract`: one cheap LLM call that produces the
|
|
2
|
+
// canonical claim set (the same per-extension canonicalization shape
|
|
3
|
+
// scholar's `claim-canonicalization` stage emits), collapsing scholar's
|
|
4
|
+
// segmentation, claim-mention, citation-source, axiom-indicator,
|
|
5
|
+
// canonicalization, and type-classification stages.
|
|
6
|
+
//
|
|
7
|
+
// Because a stage writes exactly one output slot, `extract` is paired
|
|
8
|
+
// with two deterministic adapter stages that republish its parts under
|
|
9
|
+
// the canonicalization and classification slots scholar's deterministic
|
|
10
|
+
// backend + `finalizeResponseV2` read.
|
|
11
|
+
import { llmStage } from "../../../../lib/pipelines/stage-helpers.js";
|
|
12
|
+
import { deterministicStage } from "../../../../lib/pipelines/stage-helpers.js";
|
|
13
|
+
import { STAGE_IDS, buildResponseSchema, ClaimTypeClassificationOutputSchema, } from "../../base/stages/index.js";
|
|
14
|
+
export const EXTRACT_MODEL = "gpt-5.4-mini";
|
|
15
|
+
/** Internal default knobs for scribe's `extract` stage. */
|
|
16
|
+
export const EXTRACT_STAGE_DEFAULTS = {
|
|
17
|
+
model: EXTRACT_MODEL,
|
|
18
|
+
};
|
|
19
|
+
export const EXTRACT_SYSTEM_PROMPT = `You read a raw argument and emit its canonical claim set in one pass.
|
|
20
|
+
|
|
21
|
+
For each distinct proposition the author makes, emit one canonical claim. Two phrasings of the same proposition merge into a single claim.
|
|
22
|
+
|
|
23
|
+
Each canonical claim carries:
|
|
24
|
+
- \`miniId\` — assign in order: c1, c2, c3, ...
|
|
25
|
+
- \`mentionIds\` — leave as a single synthetic id per claim (e.g. ["c1-m"]); scribe does not track sub-claim mentions.
|
|
26
|
+
- \`type\` — "normal" (a primary proposition), "citation" (content is "the cited source asserts X"; populate \`url\` + \`title\`), or "axiomatic" (invoked as self-evident; populate \`axiom\`).
|
|
27
|
+
- \`suggestedSymbol\` — a short PascalCase-or-snake_case identifier (letters/digits/underscores, starts with a letter or underscore, under 32 chars). Avoid single letters and generic names.
|
|
28
|
+
- the extension fields your output schema requires (title, body, url, axiom — whichever apply to the claim's type).
|
|
29
|
+
- \`mentionToClaim\` — one \`{ "mentionId": "...", "claimMiniId": "..." }\` entry per synthetic mention id you used.
|
|
30
|
+
|
|
31
|
+
Style:
|
|
32
|
+
- Third-person, present-tense, active voice.
|
|
33
|
+
- State the proposition itself — never "The author claims that ...". For a citation claim, the title summarizes what the source asserts.
|
|
34
|
+
|
|
35
|
+
Output ONLY the schema-shaped object. No prose.`;
|
|
36
|
+
function buildExtractPrompt(ctx) {
|
|
37
|
+
const input = ctx.input;
|
|
38
|
+
const system = `<!-- stage-id: ${STAGE_IDS.extract} -->\n${EXTRACT_SYSTEM_PROMPT}`;
|
|
39
|
+
const user = `Input text:\n\n${input.text}\n\nProduce the canonicalClaims + mentionToClaim object.`;
|
|
40
|
+
return { system, user };
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Build scribe's `extract` LLM stage. Its `outputSchema` is the
|
|
44
|
+
* per-extension canonicalization schema, so the cheap model is asked
|
|
45
|
+
* for the same extension-shaped claim records (title/body/url/axiom)
|
|
46
|
+
* scholar's canonicalizer produces — without which finalize would
|
|
47
|
+
* assemble empty claims.
|
|
48
|
+
*/
|
|
49
|
+
export function createExtractStage(extension, options) {
|
|
50
|
+
return llmStage({
|
|
51
|
+
id: STAGE_IDS.extract,
|
|
52
|
+
dependsOn: [],
|
|
53
|
+
outputSchema: buildResponseSchema(extension),
|
|
54
|
+
model: options?.model ?? EXTRACT_MODEL,
|
|
55
|
+
maxOutputTokens: options?.maxOutputTokens,
|
|
56
|
+
reasoningEffort: options?.reasoningEffort,
|
|
57
|
+
retry: options?.retry,
|
|
58
|
+
buildPrompt: buildExtractPrompt,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Adapter — republish `extract`'s canonical claims under the
|
|
63
|
+
* canonicalization slot scholar's deterministic stages + finalize read.
|
|
64
|
+
* `extract` already emits the canonicalization shape, so this is a
|
|
65
|
+
* pass-through (with a defensive empty default). Built per-extension
|
|
66
|
+
* because the canonicalization slot's schema carries the extension's
|
|
67
|
+
* claim fields.
|
|
68
|
+
*/
|
|
69
|
+
export function createExtractCanonicalizationAdapterStage(extension) {
|
|
70
|
+
return deterministicStage({
|
|
71
|
+
id: STAGE_IDS.claimCanonicalization,
|
|
72
|
+
dependsOn: [STAGE_IDS.extract],
|
|
73
|
+
outputSchema: buildResponseSchema(extension),
|
|
74
|
+
fn: (ctx) => ctx.get(STAGE_IDS.extract) ?? {
|
|
75
|
+
canonicalClaims: [],
|
|
76
|
+
mentionToClaim: [],
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Adapter — derive the classification slot from `extract`'s claim
|
|
82
|
+
* records: each canonical claim already carries its `type`, so the
|
|
83
|
+
* classification entry is `{ miniId, type, sourceString }`.
|
|
84
|
+
* `sourceString` is the claim's `url` when present (citation claims),
|
|
85
|
+
* else null — mirroring what scholar's classification stage records.
|
|
86
|
+
*/
|
|
87
|
+
export const extractClassificationAdapterStage = deterministicStage({
|
|
88
|
+
id: STAGE_IDS.claimTypeClassification,
|
|
89
|
+
dependsOn: [STAGE_IDS.extract],
|
|
90
|
+
outputSchema: ClaimTypeClassificationOutputSchema,
|
|
91
|
+
fn: (ctx) => {
|
|
92
|
+
const canon = ctx.get(STAGE_IDS.extract);
|
|
93
|
+
const claims = canon?.canonicalClaims ?? [];
|
|
94
|
+
return {
|
|
95
|
+
classifications: claims.map((c) => {
|
|
96
|
+
const url = c.url;
|
|
97
|
+
const sourceString = typeof url === "string" && url.length > 0 ? url : null;
|
|
98
|
+
return { miniId: c.miniId, type: c.type, sourceString };
|
|
99
|
+
}),
|
|
100
|
+
};
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
//# sourceMappingURL=extract-stage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extract-stage.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/extract-stage.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,qEAAqE;AACrE,wEAAwE;AACxE,iEAAiE;AACjE,oDAAoD;AACpD,EAAE;AACF,sEAAsE;AACtE,uEAAuE;AACvE,wEAAwE;AACxE,uCAAuC;AAEvC,OAAO,EAAE,QAAQ,EAAE,MAAM,4CAA4C,CAAA;AACrE,OAAO,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAA;AAE/E,OAAO,EACH,SAAS,EACT,mBAAmB,EACnB,mCAAmC,GAGtC,MAAM,4BAA4B,CAAA;AAOnC,MAAM,CAAC,MAAM,aAAa,GAAG,cAAc,CAAA;AAE3C,2DAA2D;AAC3D,MAAM,CAAC,MAAM,sBAAsB,GAA6B;IAC5D,KAAK,EAAE,aAAa;CACvB,CAAA;AAED,MAAM,CAAC,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;gDAgBW,CAAA;AAEhD,SAAS,kBAAkB,CAAC,GAAkB;IAI1C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAwB,CAAA;IAC1C,MAAM,MAAM,GAAG,kBAAkB,SAAS,CAAC,OAAO,SAAS,qBAAqB,EAAE,CAAA;IAClF,MAAM,IAAI,GAAG,kBAAkB,KAAK,CAAC,IAAI,0DAA0D,CAAA;IACnG,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC3B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAC9B,SAA8B,EAC9B,OAAkC;IAElC,OAAO,QAAQ,CAA+B;QAC1C,EAAE,EAAE,SAAS,CAAC,OAAO;QACrB,SAAS,EAAE,EAAE;QACb,YAAY,EAAE,mBAAmB,CAAC,SAAS,CAAC;QAC5C,KAAK,EAAE,OAAO,EAAE,KAAK,IAAI,aAAa;QACtC,eAAe,EAAE,OAAO,EAAE,eAAe;QACzC,eAAe,EAAE,OAAO,EAAE,eAAe;QACzC,KAAK,EAAE,OAAO,EAAE,KAAK;QACrB,WAAW,EAAE,kBAAkB;KAClC,CAAC,CAAA;AACN,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,yCAAyC,CACrD,SAA8B;IAE9B,OAAO,kBAAkB,CAA+B;QACpD,EAAE,EAAE,SAAS,CAAC,qBAAqB;QACnC,SAAS,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC;QAC9B,YAAY,EAAE,mBAAmB,CAAC,SAAS,CAAC;QAC5C,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE,CACR,GAAG,CAAC,GAAG,CAA+B,SAAS,CAAC,OAAO,CAAC,IAAI;YACxD,eAAe,EAAE,EAAE;YACnB,cAAc,EAAE,EAAE;SACrB;KACR,CAAC,CAAA;AACN,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,iCAAiC,GAC1C,kBAAkB,CAAiC;IAC/C,EAAE,EAAE,SAAS,CAAC,uBAAuB;IACrC,SAAS,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC;IAC9B,YAAY,EAAE,mCAAmC;IACjD,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE;QACR,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CACjB,SAAS,CAAC,OAAO,CACpB,CAAA;QACD,MAAM,MAAM,GAAG,KAAK,EAAE,eAAe,IAAI,EAAE,CAAA;QAC3C,OAAO;YACH,eAAe,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC9B,MAAM,GAAG,GAAI,CAA6B,CAAC,GAAG,CAAA;gBAC9C,MAAM,YAAY,GACd,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAA;gBAC1D,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,YAAY,EAAE,CAAA;YAC3D,CAAC,CAAC;SACL,CAAA;IACL,CAAC;CACJ,CAAC,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAA;AAClD,YAAY,EAAE,4BAA4B,EAAE,MAAM,aAAa,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/index.ts"],"names":[],"mappings":"AAAA,6DAA6D;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAA"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import Type, { type Static } from "typebox";
|
|
2
|
+
export declare const ScribeStructureOutputSchema: Type.TObject<{
|
|
3
|
+
relations: Type.TArray<Type.TObject<{
|
|
4
|
+
relationId: Type.TString;
|
|
5
|
+
type: Type.TUnion<[Type.TLiteral<"support">, Type.TLiteral<"joint-support">, Type.TLiteral<"derivation-support">]>;
|
|
6
|
+
sources: Type.TArray<Type.TString>;
|
|
7
|
+
target: Type.TString;
|
|
8
|
+
evidence: Type.TObject<{
|
|
9
|
+
segmentIds: Type.TArray<Type.TString>;
|
|
10
|
+
quote: Type.TString;
|
|
11
|
+
}>;
|
|
12
|
+
}>>;
|
|
13
|
+
conclusionCandidates: Type.TArray<Type.TString>;
|
|
14
|
+
rationale: Type.TString;
|
|
15
|
+
}>;
|
|
16
|
+
export type TScribeStructureOutput = Static<typeof ScribeStructureOutputSchema>;
|
|
17
|
+
//# sourceMappingURL=schemas.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/schemas.ts"],"names":[],"mappings":"AAeA,OAAO,IAAI,EAAE,EAAE,KAAK,MAAM,EAAE,MAAM,SAAS,CAAA;AAO3C,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;EAOvC,CAAA;AACD,MAAM,MAAM,sBAAsB,GAAG,MAAM,CAAC,OAAO,2BAA2B,CAAC,CAAA"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
// Output schemas for scribe's two cheap LLM stages.
|
|
2
|
+
//
|
|
3
|
+
// scribe collapses scholar's eight LLM stages into two combined calls:
|
|
4
|
+
// - `extract` → the canonical claim set (the same per-extension
|
|
5
|
+
// canonicalization shape scholar emits), so its deterministic
|
|
6
|
+
// adapters can republish the canonicalization + classification
|
|
7
|
+
// slots scholar's backend reads.
|
|
8
|
+
// - `structure` → the relation graph + a confidence-ranked list of
|
|
9
|
+
// conclusion candidates, so its adapters can republish the
|
|
10
|
+
// relation-extraction + conclusion-selection slots.
|
|
11
|
+
//
|
|
12
|
+
// `extract`'s output schema is the per-extension canonicalization
|
|
13
|
+
// schema itself (`buildResponseSchema(extension)`); only `structure`
|
|
14
|
+
// needs a bespoke schema, defined here.
|
|
15
|
+
import Type, {} from "typebox";
|
|
16
|
+
import { RelationExtractionOutputSchema } from "../../base/stages/index.js";
|
|
17
|
+
// `structure` emits the relation graph (same per-relation shape as
|
|
18
|
+
// scholar's `relation-extraction`) plus the conclusion candidates +
|
|
19
|
+
// rationale (same fields scholar's conclusion-selection LLM emits,
|
|
20
|
+
// before the deterministic resolution the adapter reproduces).
|
|
21
|
+
export const ScribeStructureOutputSchema = Type.Object({
|
|
22
|
+
relations: RelationExtractionOutputSchema.properties.relations,
|
|
23
|
+
conclusionCandidates: Type.Array(Type.String()),
|
|
24
|
+
rationale: Type.String(),
|
|
25
|
+
}, { additionalProperties: false });
|
|
26
|
+
//# sourceMappingURL=schemas.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.js","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/schemas.ts"],"names":[],"mappings":"AAAA,oDAAoD;AACpD,EAAE;AACF,uEAAuE;AACvE,mEAAmE;AACnE,kEAAkE;AAClE,mEAAmE;AACnE,qCAAqC;AACrC,qEAAqE;AACrE,+DAA+D;AAC/D,wDAAwD;AACxD,EAAE;AACF,kEAAkE;AAClE,qEAAqE;AACrE,wCAAwC;AAExC,OAAO,IAAI,EAAE,EAAe,MAAM,SAAS,CAAA;AAC3C,OAAO,EAAE,8BAA8B,EAAE,MAAM,4BAA4B,CAAA;AAE3E,mEAAmE;AACnE,oEAAoE;AACpE,mEAAmE;AACnE,+DAA+D;AAC/D,MAAM,CAAC,MAAM,2BAA2B,GAAG,IAAI,CAAC,MAAM,CAClD;IACI,SAAS,EAAE,8BAA8B,CAAC,UAAU,CAAC,SAAS;IAC9D,oBAAoB,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;IAC/C,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE;CAC3B,EACD,EAAE,oBAAoB,EAAE,KAAK,EAAE,CAClC,CAAA"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { TPipeline } from "../../../../lib/pipelines/index.js";
|
|
2
|
+
import type { TParsedArgumentResponse } from "../../../../lib/parsing/index.js";
|
|
3
|
+
import type { TIngestionExtension, TIngestionInput, TIngestionLlmOptions } from "../../base/types.js";
|
|
4
|
+
/**
|
|
5
|
+
* Options for `createScribePipeline`. Same `{ llm }` surface as scholar:
|
|
6
|
+
* `llm.defaults` applies to both cheap LLM stages; per-stage
|
|
7
|
+
* `llm.overrides` are keyed by `STAGE_IDS.extract` / `.scribeStructure`.
|
|
8
|
+
*/
|
|
9
|
+
export type TCreateScribePipelineOptions = {
|
|
10
|
+
llm?: TIngestionLlmOptions;
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Build the scribe (fast) ingestion pipeline for the supplied
|
|
14
|
+
* extension: `extract` + `structure` cheap LLM calls, their
|
|
15
|
+
* deterministic adapters, then scholar's 4 deterministic stages +
|
|
16
|
+
* `finalizeResponseV2`. Emits the same `TParsedArgumentResponse` as
|
|
17
|
+
* scholar.
|
|
18
|
+
*/
|
|
19
|
+
export declare function createScribePipeline(extension: TIngestionExtension, options?: TCreateScribePipelineOptions): TPipeline<TIngestionInput, TParsedArgumentResponse>;
|
|
20
|
+
//# sourceMappingURL=scribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scribe.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/pipelines/ingestion/scribe/scribe.ts"],"names":[],"mappings":"AAyBA,OAAO,KAAK,EAAE,SAAS,EAAU,MAAM,oCAAoC,CAAA;AAC3E,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,kCAAkC,CAAA;AAU/E,OAAO,KAAK,EACR,mBAAmB,EACnB,eAAe,EACf,oBAAoB,EACvB,MAAM,qBAAqB,CAAA;AAkB5B;;;;GAIG;AACH,MAAM,MAAM,4BAA4B,GAAG;IACvC,GAAG,CAAC,EAAE,oBAAoB,CAAA;CAC7B,CAAA;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAChC,SAAS,EAAE,mBAAmB,EAC9B,OAAO,CAAC,EAAE,4BAA4B,GACvC,SAAS,CAAC,eAAe,EAAE,uBAAuB,CAAC,CAoDrD"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
// scribe — the fast, low-cost ingestion pipeline.
|
|
2
|
+
//
|
|
3
|
+
// Two cheap LLM calls (`extract` → `structure`) feed scholar's
|
|
4
|
+
// deterministic backend, producing the same `TParsedArgumentResponse`
|
|
5
|
+
// shape scholar emits. Each combined LLM stage is paired with
|
|
6
|
+
// deterministic adapter stages that republish its parts under the six
|
|
7
|
+
// standard stage-output slots scholar's 4 deterministic stages +
|
|
8
|
+
// `finalizeResponseV2` read — so that backend + finalize are reused
|
|
9
|
+
// verbatim (the deterministic stage consts are shared by reference).
|
|
10
|
+
//
|
|
11
|
+
// DAG:
|
|
12
|
+
//
|
|
13
|
+
// extract ──┬─ (adapter) claim-canonicalization ─┬─ claim-reference-validation
|
|
14
|
+
// └─ (adapter) claim-type-classification ┤
|
|
15
|
+
// └─ variable-assignment
|
|
16
|
+
// structure ─┬─ (adapter) relation-extraction
|
|
17
|
+
// └─ (adapter) conclusion-selection
|
|
18
|
+
// formula-compilation
|
|
19
|
+
// formula-validation
|
|
20
|
+
// finalize
|
|
21
|
+
//
|
|
22
|
+
// `structure` depends on the canonicalization + classification slots,
|
|
23
|
+
// so `extract` (+ its adapters) runs first.
|
|
24
|
+
import { optional } from "../../../../lib/pipelines/index.js";
|
|
25
|
+
import { STAGE_IDS, claimReferenceValidationStage, variableAssignmentStage, formulaCompilationStage, formulaValidationStage, } from "../../base/stages/index.js";
|
|
26
|
+
import { finalizeResponseV2 } from "../../base/finalize-response-v2.js";
|
|
27
|
+
import { resolveLlmStageOptions } from "../../base/resolve-llm-stage-options.js";
|
|
28
|
+
import { INGESTION_INPUT_SCHEMA } from "../scholar/scholar.js";
|
|
29
|
+
import { createExtractStage, createExtractCanonicalizationAdapterStage, extractClassificationAdapterStage, EXTRACT_STAGE_DEFAULTS, } from "./extract-stage.js";
|
|
30
|
+
import { createStructureStage, structureRelationAdapterStage, structureConclusionAdapterStage, STRUCTURE_STAGE_DEFAULTS, } from "./structure-stage.js";
|
|
31
|
+
const PIPELINE_ID = "argument-ingestion-scribe";
|
|
32
|
+
const PIPELINE_VERSION = "1.0.0";
|
|
33
|
+
/**
|
|
34
|
+
* Build the scribe (fast) ingestion pipeline for the supplied
|
|
35
|
+
* extension: `extract` + `structure` cheap LLM calls, their
|
|
36
|
+
* deterministic adapters, then scholar's 4 deterministic stages +
|
|
37
|
+
* `finalizeResponseV2`. Emits the same `TParsedArgumentResponse` as
|
|
38
|
+
* scholar.
|
|
39
|
+
*/
|
|
40
|
+
export function createScribePipeline(extension, options) {
|
|
41
|
+
const llm = options?.llm;
|
|
42
|
+
const extractStage = createExtractStage(extension, resolveLlmStageOptions(STAGE_IDS.extract, EXTRACT_STAGE_DEFAULTS, llm));
|
|
43
|
+
const extractCanonicalizationAdapterStage = createExtractCanonicalizationAdapterStage(extension);
|
|
44
|
+
const structureStage = createStructureStage(resolveLlmStageOptions(STAGE_IDS.scribeStructure, STRUCTURE_STAGE_DEFAULTS, llm));
|
|
45
|
+
const stages = [
|
|
46
|
+
extractStage,
|
|
47
|
+
extractCanonicalizationAdapterStage,
|
|
48
|
+
extractClassificationAdapterStage,
|
|
49
|
+
claimReferenceValidationStage,
|
|
50
|
+
variableAssignmentStage,
|
|
51
|
+
structureStage,
|
|
52
|
+
structureRelationAdapterStage,
|
|
53
|
+
structureConclusionAdapterStage,
|
|
54
|
+
formulaCompilationStage,
|
|
55
|
+
formulaValidationStage,
|
|
56
|
+
];
|
|
57
|
+
return {
|
|
58
|
+
id: PIPELINE_ID,
|
|
59
|
+
version: PIPELINE_VERSION,
|
|
60
|
+
inputSchema: INGESTION_INPUT_SCHEMA,
|
|
61
|
+
// Mirrors scholar: the advertised outputSchema is the
|
|
62
|
+
// extension's response schema; finalize attaches a
|
|
63
|
+
// `processingFailures` slot at runtime.
|
|
64
|
+
outputSchema: extension.responseSchema,
|
|
65
|
+
stages,
|
|
66
|
+
finalize: {
|
|
67
|
+
dependsOn: [
|
|
68
|
+
STAGE_IDS.claimCanonicalization,
|
|
69
|
+
STAGE_IDS.variableAssignment,
|
|
70
|
+
STAGE_IDS.formulaCompilation,
|
|
71
|
+
optional(STAGE_IDS.claimTypeClassification),
|
|
72
|
+
optional(STAGE_IDS.relationExtraction),
|
|
73
|
+
optional(STAGE_IDS.conclusionSelection),
|
|
74
|
+
optional(STAGE_IDS.formulaValidation),
|
|
75
|
+
optional(STAGE_IDS.claimReferenceValidation),
|
|
76
|
+
],
|
|
77
|
+
run: (ctx) => finalizeResponseV2({ ctx, extension }),
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=scribe.js.map
|