@ordis-dev/ordis 0.1.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/cli.js +20 -4
- package/dist/cli.js.map +1 -1
- package/dist/core/error-formatter.d.ts +35 -0
- package/dist/core/error-formatter.d.ts.map +1 -0
- package/dist/core/error-formatter.js +319 -0
- package/dist/core/error-formatter.js.map +1 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/index.js.map +1 -1
- package/dist/core/pipeline.d.ts.map +1 -1
- package/dist/core/pipeline.js +17 -3
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/preprocessor.d.ts +35 -0
- package/dist/core/preprocessor.d.ts.map +1 -0
- package/dist/core/preprocessor.js +297 -0
- package/dist/core/preprocessor.js.map +1 -0
- package/dist/core/types.d.ts +24 -1
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/validator.d.ts +3 -1
- package/dist/core/validator.d.ts.map +1 -1
- package/dist/core/validator.js.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/client.d.ts.map +1 -1
- package/dist/llm/client.js +23 -3
- package/dist/llm/client.js.map +1 -1
- package/dist/llm/types.d.ts +2 -0
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/schemas/types.d.ts +3 -1
- package/dist/schemas/types.d.ts.map +1 -1
- package/package.json +4 -1
- package/dist/__tests__/api.test.d.ts +0 -5
- package/dist/__tests__/api.test.d.ts.map +0 -1
- package/dist/__tests__/api.test.js +0 -95
- package/dist/__tests__/api.test.js.map +0 -1
- package/dist/__tests__/cli.test.d.ts +0 -6
- package/dist/__tests__/cli.test.d.ts.map +0 -1
- package/dist/__tests__/cli.test.js +0 -103
- package/dist/__tests__/cli.test.js.map +0 -1
- package/dist/cli/__tests__/cli.test.d.ts +0 -5
- package/dist/cli/__tests__/cli.test.d.ts.map +0 -1
- package/dist/cli/__tests__/cli.test.js +0 -13
- package/dist/cli/__tests__/cli.test.js.map +0 -1
- package/dist/core/__tests__/pipeline.test.d.ts +0 -5
- package/dist/core/__tests__/pipeline.test.d.ts.map +0 -1
- package/dist/core/__tests__/pipeline.test.js +0 -334
- package/dist/core/__tests__/pipeline.test.js.map +0 -1
- package/dist/core/__tests__/validator.test.d.ts +0 -5
- package/dist/core/__tests__/validator.test.d.ts.map +0 -1
- package/dist/core/__tests__/validator.test.js +0 -124
- package/dist/core/__tests__/validator.test.js.map +0 -1
- package/dist/llm/__tests__/client.test.d.ts +0 -5
- package/dist/llm/__tests__/client.test.d.ts.map +0 -1
- package/dist/llm/__tests__/client.test.js +0 -350
- package/dist/llm/__tests__/client.test.js.map +0 -1
- package/dist/llm/__tests__/prompt-builder.test.d.ts +0 -5
- package/dist/llm/__tests__/prompt-builder.test.d.ts.map +0 -1
- package/dist/llm/__tests__/prompt-builder.test.js +0 -171
- package/dist/llm/__tests__/prompt-builder.test.js.map +0 -1
- package/dist/llm/__tests__/retry.test.d.ts +0 -5
- package/dist/llm/__tests__/retry.test.d.ts.map +0 -1
- package/dist/llm/__tests__/retry.test.js +0 -350
- package/dist/llm/__tests__/retry.test.js.map +0 -1
- package/dist/llm/__tests__/token-counter.test.d.ts +0 -5
- package/dist/llm/__tests__/token-counter.test.d.ts.map +0 -1
- package/dist/llm/__tests__/token-counter.test.js +0 -166
- package/dist/llm/__tests__/token-counter.test.js.map +0 -1
- package/dist/schemas/__tests__/integration.test.d.ts +0 -5
- package/dist/schemas/__tests__/integration.test.d.ts.map +0 -1
- package/dist/schemas/__tests__/integration.test.js +0 -366
- package/dist/schemas/__tests__/integration.test.js.map +0 -1
- package/dist/schemas/__tests__/loader.test.d.ts +0 -5
- package/dist/schemas/__tests__/loader.test.d.ts.map +0 -1
- package/dist/schemas/__tests__/loader.test.js +0 -271
- package/dist/schemas/__tests__/loader.test.js.map +0 -1
- package/dist/schemas/__tests__/validator.test.d.ts +0 -5
- package/dist/schemas/__tests__/validator.test.d.ts.map +0 -1
- package/dist/schemas/__tests__/validator.test.js +0 -592
- package/dist/schemas/__tests__/validator.test.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAG1D;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACnB,KAAK,CAAU;IAEvB,YAAY,QAAiB,KAAK;QAC9B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,OAA0B;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC;YACD,2CAA2C;YAC3C,IAAI,cAAc,GAAG,OAAO,CAAC,KAAK,CAAC;YACnC,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBACxB,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE,GAAG,EAAE;oBACtD,OAAO,qBAAqB,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;gBACvE,CAAC,CAAC,CAAC;gBACH,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAE3B,IAAI,cAAc,CAAC,OAAO,IAAI,cAAc,CAAC,IAAI,EAAE,CAAC;oBAChD,MAAM,MAAM,GAAG,cAAc,CAAC,IAA+C,CAAC;oBAC9E,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC;gBACjC,CAAC;YACL,CAAC;YAED,4BAA4B;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACrD,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAC5C,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEvB,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtB,MAAM,IAAI,aAAa,CACnB,6BAA6B,EAC7B,kBAAkB,CAAC,SAAS,EAC5B,eAAe,CAClB,CAAC;YACN,CAAC;YAED,MAAM,MAAM,GAAG,UAAU,CAAC,IAAiB,CAAC;YAE5C,kCAAkC;YAClC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,KAAK,IAAI,EAAE;gBACrE,OAAO,MAAM,MAAM,CAAC,OAAO,CAAC;oBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;oBACtB,KAAK,EAAE,cAAc;iBACxB,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAExB,IAAI,CAAC,WAAW,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;gBAC5C,MAAM,IAAI,aAAa,CACnB,uBAAuB,EACvB,kBAAkB,CAAC,SAAS,EAC5B,aAAa,EACb,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAC/B,CAAC;YACN,CAAC;YAED,MAAM,UAAU,GAAG,WAAW,CAAC,IAI9B,CAAC;YAEF,kCAAkC;YAClC,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACvD,OAAO,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClE,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAEzB,MAAM,UAAU,GAAG,YAAY,CAAC,IAA2F,CAAC;YAE5H,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE,UAAU,CAAC,MAAM;oBACzB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,qCAAqC;YACrC,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,EAAE,GAAG,EAAE;gBAC5D,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;oBAC7B,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;gBACpC,CAAC;gBAED,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC;gBACrE,MAAM,cAAc,GAAG,UAAU,CAAC,UAAU,IAAI,SAAS,CAAC;gBAE1D,OAAO;oBACH,cAAc;oBACd,UAAU,EAAE,CAAC,cAAc,IAAI,mBAAmB;iBACrD,CAAC;YACN,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAE3B,MAAM,eAAe,GAAG,cAAc,CAAC,IAAyD,CAAC;YAEjG,IAAI,eAAe,CAAC,UAAU,EAAE,CAAC;gBAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,IAAI,EAAE,UAAU,CAAC,IAAI;oBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;oBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;oBAC/C,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,cAAc,UAAU,CAAC,UAAU,qBAAqB,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,SAAS,GAAG;4BACxG,IAAI,EAAE,kBAAkB,CAAC,gBAAgB;yBAC5C;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,WAAW;YACX,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,UAAU,CAAC,IAAI;gBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;gBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;gBAC/C,cAAc,EAAE,eAAe,CAAC,cAAc;gBAC9C,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAExC,IAAI,KAAK,YAAY,aAAa,EAAE,CAAC;gBACjC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,KAAK,CAAC,OAAO;4BACtB,IAAI,EAAE,KAAK,CAAC,IAAI;4BAChB,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,iCAAiC;yBAC5D;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,OAAO;gBACH,OAAO,EAAE,KAAK;gBACd,cAAc,EAAE,KAAK;gBACrB,MAAM,EAAE;oBACJ;wBACI,OAAO,EAAG,KAAe,CAAC,OAAO;wBACjC,IAAI,EAAE,eAAe;qBACxB;iBACJ;gBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,UAAU,CAAI,IAAY,EAAE,EAAW;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,EAAE,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,4CAA4C;gBACpD,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAI,IAAY,EAAE,EAAoB;QAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,4CAA4C;gBACpD,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAA0B;IACpD,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,OAAO,MAAM,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML preprocessing module
|
|
3
|
+
* Strips HTML tags and noise from input text before extraction
|
|
4
|
+
*/
|
|
5
|
+
import type { HtmlStripOptions, PreprocessingConfig } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Resolves preprocessing options to concrete HtmlStripOptions
|
|
8
|
+
*/
|
|
9
|
+
export declare function resolveHtmlStripOptions(config: boolean | HtmlStripOptions | undefined): HtmlStripOptions | null;
|
|
10
|
+
/**
|
|
11
|
+
* Strips HTML from input text according to options
|
|
12
|
+
*/
|
|
13
|
+
export declare function stripHtml(input: string, options: HtmlStripOptions): string;
|
|
14
|
+
/**
|
|
15
|
+
* Preprocesses input text according to configuration
|
|
16
|
+
*/
|
|
17
|
+
export declare function preprocess(input: string, config: PreprocessingConfig): string;
|
|
18
|
+
/**
|
|
19
|
+
* Result of preprocessing
|
|
20
|
+
*/
|
|
21
|
+
export interface PreprocessResult {
|
|
22
|
+
/** The preprocessed text */
|
|
23
|
+
text: string;
|
|
24
|
+
/** Whether preprocessing was applied */
|
|
25
|
+
wasProcessed: boolean;
|
|
26
|
+
/** Original input length */
|
|
27
|
+
originalLength: number;
|
|
28
|
+
/** Processed text length */
|
|
29
|
+
processedLength: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Preprocesses input with detailed result information
|
|
33
|
+
*/
|
|
34
|
+
export declare function preprocessWithDetails(input: string, config: PreprocessingConfig | undefined): PreprocessResult;
|
|
35
|
+
//# sourceMappingURL=preprocessor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"preprocessor.d.ts","sourceRoot":"","sources":["../../src/core/preprocessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AA4CxE;;GAEG;AACH,wBAAgB,uBAAuB,CACnC,MAAM,EAAE,OAAO,GAAG,gBAAgB,GAAG,SAAS,GAC/C,gBAAgB,GAAG,IAAI,CAqBzB;AAuKD;;GAEG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,MAAM,CA2C1E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,MAAM,CAY7E;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,wCAAwC;IACxC,YAAY,EAAE,OAAO,CAAC;IACtB,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,eAAe,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACjC,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,mBAAmB,GAAG,SAAS,GACxC,gBAAgB,CAkBlB"}
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML preprocessing module
|
|
3
|
+
* Strips HTML tags and noise from input text before extraction
|
|
4
|
+
*/
|
|
5
|
+
import { parse, HTMLElement } from 'node-html-parser';
|
|
6
|
+
/**
|
|
7
|
+
* Default selectors to remove from HTML
|
|
8
|
+
* These typically contain non-content elements
|
|
9
|
+
*/
|
|
10
|
+
const DEFAULT_REMOVE_SELECTORS = [
|
|
11
|
+
'script',
|
|
12
|
+
'style',
|
|
13
|
+
'nav',
|
|
14
|
+
'footer',
|
|
15
|
+
'header',
|
|
16
|
+
'aside',
|
|
17
|
+
'noscript',
|
|
18
|
+
'iframe',
|
|
19
|
+
'svg',
|
|
20
|
+
'canvas',
|
|
21
|
+
'form',
|
|
22
|
+
// Common ad and tracking selectors
|
|
23
|
+
'[class*="ad-"]',
|
|
24
|
+
'[class*="advertisement"]',
|
|
25
|
+
'[class*="cookie"]',
|
|
26
|
+
'[class*="subscribe"]',
|
|
27
|
+
'[class*="newsletter"]',
|
|
28
|
+
'[class*="popup"]',
|
|
29
|
+
'[class*="modal"]',
|
|
30
|
+
'[class*="banner"]',
|
|
31
|
+
'[id*="ad-"]',
|
|
32
|
+
'[id*="advertisement"]',
|
|
33
|
+
'[id*="cookie"]',
|
|
34
|
+
];
|
|
35
|
+
/**
|
|
36
|
+
* Elements that should preserve their semantic meaning
|
|
37
|
+
* when preserveStructure is enabled
|
|
38
|
+
*/
|
|
39
|
+
const SEMANTIC_ELEMENTS = {
|
|
40
|
+
headings: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
|
|
41
|
+
lists: ['ul', 'ol', 'li'],
|
|
42
|
+
containers: ['article', 'main', 'section', 'div', 'body', 'html'],
|
|
43
|
+
blocks: ['p', 'blockquote'],
|
|
44
|
+
inline: ['strong', 'b', 'em', 'i', 'a', 'code'],
|
|
45
|
+
};
|
|
46
|
+
/**
|
|
47
|
+
* Resolves preprocessing options to concrete HtmlStripOptions
|
|
48
|
+
*/
|
|
49
|
+
export function resolveHtmlStripOptions(config) {
|
|
50
|
+
if (!config) {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
if (config === true) {
|
|
54
|
+
// Default options when stripHtml: true
|
|
55
|
+
return {
|
|
56
|
+
extractText: true,
|
|
57
|
+
preserveStructure: false,
|
|
58
|
+
removeSelectors: [],
|
|
59
|
+
maxLength: undefined,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
extractText: config.extractText ?? true,
|
|
64
|
+
preserveStructure: config.preserveStructure ?? false,
|
|
65
|
+
removeSelectors: config.removeSelectors ?? [],
|
|
66
|
+
maxLength: config.maxLength,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Removes elements matching the specified selectors
|
|
71
|
+
*/
|
|
72
|
+
function removeElements(root, selectors) {
|
|
73
|
+
const allSelectors = [...DEFAULT_REMOVE_SELECTORS, ...selectors];
|
|
74
|
+
for (const selector of allSelectors) {
|
|
75
|
+
try {
|
|
76
|
+
const elements = root.querySelectorAll(selector);
|
|
77
|
+
for (const el of elements) {
|
|
78
|
+
el.remove();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
// Invalid selector, skip silently
|
|
83
|
+
// This can happen with complex CSS selectors not supported by node-html-parser
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Converts semantic HTML elements to markdown-like text
|
|
89
|
+
*/
|
|
90
|
+
function convertToStructuredText(root) {
|
|
91
|
+
const lines = [];
|
|
92
|
+
function processNode(node, depth = 0) {
|
|
93
|
+
if (!node)
|
|
94
|
+
return;
|
|
95
|
+
const tagName = node.tagName?.toLowerCase() || '';
|
|
96
|
+
// Handle headings
|
|
97
|
+
if (SEMANTIC_ELEMENTS.headings.includes(tagName)) {
|
|
98
|
+
const level = parseInt(tagName[1], 10);
|
|
99
|
+
const prefix = '#'.repeat(level) + ' ';
|
|
100
|
+
const text = node.text.trim();
|
|
101
|
+
if (text) {
|
|
102
|
+
lines.push('');
|
|
103
|
+
lines.push(prefix + text);
|
|
104
|
+
lines.push('');
|
|
105
|
+
}
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
// Handle list items
|
|
109
|
+
if (tagName === 'li') {
|
|
110
|
+
const parent = node.parentNode;
|
|
111
|
+
const parentTag = parent?.tagName?.toLowerCase();
|
|
112
|
+
const prefix = parentTag === 'ol' ? '1. ' : '- ';
|
|
113
|
+
const text = node.text.trim();
|
|
114
|
+
if (text) {
|
|
115
|
+
lines.push(prefix + text);
|
|
116
|
+
}
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
// Handle lists container
|
|
120
|
+
if (tagName === 'ul' || tagName === 'ol') {
|
|
121
|
+
lines.push('');
|
|
122
|
+
for (const child of node.childNodes) {
|
|
123
|
+
if (child instanceof HTMLElement) {
|
|
124
|
+
processNode(child, depth + 1);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
lines.push('');
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
// Handle blockquotes
|
|
131
|
+
if (tagName === 'blockquote') {
|
|
132
|
+
const text = node.text.trim();
|
|
133
|
+
if (text) {
|
|
134
|
+
lines.push('');
|
|
135
|
+
lines.push('> ' + text.replace(/\n/g, '\n> '));
|
|
136
|
+
lines.push('');
|
|
137
|
+
}
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
// Handle code blocks
|
|
141
|
+
if (tagName === 'pre' || tagName === 'code') {
|
|
142
|
+
const text = node.text.trim();
|
|
143
|
+
if (text) {
|
|
144
|
+
lines.push('');
|
|
145
|
+
lines.push('```');
|
|
146
|
+
lines.push(text);
|
|
147
|
+
lines.push('```');
|
|
148
|
+
lines.push('');
|
|
149
|
+
}
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
// Handle paragraphs and other block elements
|
|
153
|
+
if (SEMANTIC_ELEMENTS.blocks.includes(tagName)) {
|
|
154
|
+
const text = node.text.trim();
|
|
155
|
+
if (text) {
|
|
156
|
+
lines.push('');
|
|
157
|
+
lines.push(text);
|
|
158
|
+
lines.push('');
|
|
159
|
+
}
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
// Handle container elements - recurse into children
|
|
163
|
+
if (SEMANTIC_ELEMENTS.containers.includes(tagName) || !tagName) {
|
|
164
|
+
for (const child of node.childNodes) {
|
|
165
|
+
if (child instanceof HTMLElement) {
|
|
166
|
+
processNode(child, depth);
|
|
167
|
+
}
|
|
168
|
+
else if (child.nodeType === 3) {
|
|
169
|
+
// Text node
|
|
170
|
+
const text = child.text.trim();
|
|
171
|
+
if (text) {
|
|
172
|
+
lines.push(text);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
// Recursively process children for any other elements
|
|
179
|
+
for (const child of node.childNodes) {
|
|
180
|
+
if (child instanceof HTMLElement) {
|
|
181
|
+
processNode(child, depth);
|
|
182
|
+
}
|
|
183
|
+
else if (child.nodeType === 3) {
|
|
184
|
+
// Text node
|
|
185
|
+
const text = child.text.trim();
|
|
186
|
+
if (text) {
|
|
187
|
+
lines.push(text);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
processNode(root);
|
|
193
|
+
// Clean up multiple blank lines
|
|
194
|
+
return lines
|
|
195
|
+
.join('\n')
|
|
196
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
197
|
+
.trim();
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Extracts plain text from HTML, preserving meaningful whitespace
|
|
201
|
+
*/
|
|
202
|
+
function extractPlainText(root) {
|
|
203
|
+
// Get raw text
|
|
204
|
+
let text = root.text;
|
|
205
|
+
// Clean up whitespace while preserving paragraph breaks
|
|
206
|
+
text = text
|
|
207
|
+
// Replace multiple spaces with single space
|
|
208
|
+
.replace(/[ \t]+/g, ' ')
|
|
209
|
+
// Replace multiple newlines with double newline (paragraph break)
|
|
210
|
+
.replace(/\n\s*\n/g, '\n\n')
|
|
211
|
+
// Remove leading/trailing whitespace from each line
|
|
212
|
+
.split('\n')
|
|
213
|
+
.map(line => line.trim())
|
|
214
|
+
.join('\n')
|
|
215
|
+
// Remove more than two consecutive newlines
|
|
216
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
217
|
+
.trim();
|
|
218
|
+
return text;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Strips HTML from input text according to options
|
|
222
|
+
*/
|
|
223
|
+
export function stripHtml(input, options) {
|
|
224
|
+
// Quick check: if no HTML-like content, return as-is
|
|
225
|
+
if (!input.includes('<') || !input.includes('>')) {
|
|
226
|
+
return options.maxLength ? input.slice(0, options.maxLength) : input;
|
|
227
|
+
}
|
|
228
|
+
// Parse HTML
|
|
229
|
+
const root = parse(input, {
|
|
230
|
+
lowerCaseTagName: true,
|
|
231
|
+
comment: false, // Remove comments
|
|
232
|
+
blockTextElements: {
|
|
233
|
+
script: true,
|
|
234
|
+
noscript: true,
|
|
235
|
+
style: true,
|
|
236
|
+
pre: true,
|
|
237
|
+
},
|
|
238
|
+
});
|
|
239
|
+
// Remove unwanted elements
|
|
240
|
+
removeElements(root, options.removeSelectors || []);
|
|
241
|
+
// Extract text based on options
|
|
242
|
+
let result;
|
|
243
|
+
if (options.preserveStructure) {
|
|
244
|
+
result = convertToStructuredText(root);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
result = extractPlainText(root);
|
|
248
|
+
}
|
|
249
|
+
// Apply max length if specified
|
|
250
|
+
if (options.maxLength && result.length > options.maxLength) {
|
|
251
|
+
result = result.slice(0, options.maxLength);
|
|
252
|
+
// Try to break at a word boundary
|
|
253
|
+
const lastSpace = result.lastIndexOf(' ');
|
|
254
|
+
if (lastSpace > options.maxLength * 0.8) {
|
|
255
|
+
result = result.slice(0, lastSpace) + '...';
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
result += '...';
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return result;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Preprocesses input text according to configuration
|
|
265
|
+
*/
|
|
266
|
+
export function preprocess(input, config) {
|
|
267
|
+
let result = input;
|
|
268
|
+
// Handle HTML stripping
|
|
269
|
+
if (config.stripHtml) {
|
|
270
|
+
const options = resolveHtmlStripOptions(config.stripHtml);
|
|
271
|
+
if (options) {
|
|
272
|
+
result = stripHtml(result, options);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return result;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Preprocesses input with detailed result information
|
|
279
|
+
*/
|
|
280
|
+
export function preprocessWithDetails(input, config) {
|
|
281
|
+
if (!config || (!config.stripHtml)) {
|
|
282
|
+
return {
|
|
283
|
+
text: input,
|
|
284
|
+
wasProcessed: false,
|
|
285
|
+
originalLength: input.length,
|
|
286
|
+
processedLength: input.length,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
const processed = preprocess(input, config);
|
|
290
|
+
return {
|
|
291
|
+
text: processed,
|
|
292
|
+
wasProcessed: processed !== input,
|
|
293
|
+
originalLength: input.length,
|
|
294
|
+
processedLength: processed.length,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
//# sourceMappingURL=preprocessor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"preprocessor.js","sourceRoot":"","sources":["../../src/core/preprocessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,MAAM,wBAAwB,GAAG;IAC7B,QAAQ;IACR,OAAO;IACP,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,UAAU;IACV,QAAQ;IACR,KAAK;IACL,QAAQ;IACR,MAAM;IACN,mCAAmC;IACnC,gBAAgB;IAChB,0BAA0B;IAC1B,mBAAmB;IACnB,sBAAsB;IACtB,uBAAuB;IACvB,kBAAkB;IAClB,kBAAkB;IAClB,mBAAmB;IACnB,aAAa;IACb,uBAAuB;IACvB,gBAAgB;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,iBAAiB,GAAG;IACtB,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;IAC9C,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;IACzB,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC;IACjE,MAAM,EAAE,CAAC,GAAG,EAAE,YAAY,CAAC;IAC3B,MAAM,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC;CAClD,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACnC,MAA8C;IAE9C,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClB,uCAAuC;QACvC,OAAO;YACH,WAAW,EAAE,IAAI;YACjB,iBAAiB,EAAE,KAAK;YACxB,eAAe,EAAE,EAAE;YACnB,SAAS,EAAE,SAAS;SACvB,CAAC;IACN,CAAC;IAED,OAAO;QACH,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI;QACvC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,KAAK;QACpD,eAAe,EAAE,MAAM,CAAC,eAAe,IAAI,EAAE;QAC7C,SAAS,EAAE,MAAM,CAAC,SAAS;KAC9B,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAiB,EAAE,SAAmB;IAC1D,MAAM,YAAY,GAAG,CAAC,GAAG,wBAAwB,EAAE,GAAG,SAAS,CAAC,CAAC;IAEjE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QAClC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;YACjD,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBACxB,EAAE,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACL,kCAAkC;YAClC,+EAA+E;QACnF,CAAC;IACL,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,IAAiB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,SAAS,WAAW,CAAC,IAAwB,EAAE,QAAgB,CAAC;QAC5D,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAElD,kBAAkB;QAClB,IAAI,iBAAiB,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACvC,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC;YACvC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;gBAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,IAAI,CAAC,UAAgC,CAAC;YACrD,MAAM,SAAS,GAAG,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,SAAS,KAAK,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;YACjD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YAC9B,CAAC;YACD,OAAO;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;oBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBAClC,CAAC;YACL,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,OAAO;QACX,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,KAAK,YAAY,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBAC/C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,KAAK,KAAK,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAClB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,6CAA6C;QAC7C,IAAI,iBAAiB,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,oDAAoD;QACpD,IAAI,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;oBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC9B,YAAY;oBACZ,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;oBAC/B,IAAI,IAAI,EAAE,CAAC;wBACP,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;gBACL,CAAC;YACL,CAAC;YACD,OAAO;QACX,CAAC;QAED,sDAAsD;QACtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;gBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAC9B,CAAC;iBAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;gBAC9B,YAAY;gBACZ,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,IAAI,EAAE,CAAC;oBACP,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,WAAW,CAAC,IAAI,CAAC,CAAC;IAElB,gCAAgC;IAChC,OAAO,KAAK;SACP,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAiB;IACvC,eAAe;IACf,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IAErB,wDAAwD;IACxD,IAAI,GAAG,IAAI;QACP,4CAA4C;SAC3C,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;QACxB,kEAAkE;SACjE,OAAO,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,oDAAoD;SACnD,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,IAAI,CAAC,IAAI,CAAC;QACX,4CAA4C;SAC3C,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;IAEZ,OAAO,IAAI,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa,EAAE,OAAyB;IAC9D,qDAAqD;IACrD,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,OAAO,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACzE,CAAC;IAED,aAAa;IACb,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE;QACtB,gBAAgB,EAAE,IAAI;QACtB,OAAO,EAAE,KAAK,EAAE,kBAAkB;QAClC,iBAAiB,EAAE;YACf,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI;SACZ;KACJ,CAAC,CAAC;IAEH,2BAA2B;IAC3B,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC;IAEpD,gCAAgC;IAChC,IAAI,MAAc,CAAC;IAEnB,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;QAC5B,MAAM,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;SAAM,CAAC;QACJ,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,gCAAgC;IAChC,IAAI,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;QACzD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QAC5C,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC;YACtC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAC;QAChD,CAAC;aAAM,CAAC;YACJ,MAAM,IAAI,KAAK,CAAC;QACpB,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,KAAa,EAAE,MAA2B;IACjE,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,wBAAwB;IACxB,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,uBAAuB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,OAAO,EAAE,CAAC;YACV,MAAM,GAAG,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACxC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAgBD;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACjC,KAAa,EACb,MAAuC;IAEvC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,OAAO;YACH,IAAI,EAAE,KAAK;YACX,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,KAAK,CAAC,MAAM;YAC5B,eAAe,EAAE,KAAK,CAAC,MAAM;SAChC,CAAC;IACN,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAE5C,OAAO;QACH,IAAI,EAAE,SAAS;QACf,YAAY,EAAE,SAAS,KAAK,KAAK;QACjC,cAAc,EAAE,KAAK,CAAC,MAAM;QAC5B,eAAe,EAAE,SAAS,CAAC,MAAM;KACpC,CAAC;AACN,CAAC"}
|
package/dist/core/types.d.ts
CHANGED
|
@@ -3,6 +3,26 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { Schema } from '../schemas/types.js';
|
|
5
5
|
import type { LLMConfig } from '../llm/types.js';
|
|
6
|
+
/**
|
|
7
|
+
* HTML stripping options for preprocessing
|
|
8
|
+
*/
|
|
9
|
+
export interface HtmlStripOptions {
|
|
10
|
+
/** Keep text content only (default: true) */
|
|
11
|
+
extractText?: boolean;
|
|
12
|
+
/** Preserve semantic structure like headings, lists (converts to markdown-like format) */
|
|
13
|
+
preserveStructure?: boolean;
|
|
14
|
+
/** Remove specific CSS selectors (e.g., 'nav', 'footer', '.ad', '#sidebar') */
|
|
15
|
+
removeSelectors?: string[];
|
|
16
|
+
/** Max content length after stripping (truncates if exceeded) */
|
|
17
|
+
maxLength?: number;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Preprocessing configuration for input text
|
|
21
|
+
*/
|
|
22
|
+
export interface PreprocessingConfig {
|
|
23
|
+
/** Strip HTML tags from input. When true, uses default options. */
|
|
24
|
+
stripHtml?: boolean | HtmlStripOptions;
|
|
25
|
+
}
|
|
6
26
|
/**
|
|
7
27
|
* Pipeline configuration
|
|
8
28
|
*/
|
|
@@ -19,6 +39,8 @@ export interface ExtractionRequest {
|
|
|
19
39
|
input: string;
|
|
20
40
|
schema: Schema;
|
|
21
41
|
llmConfig: LLMConfig;
|
|
42
|
+
/** Optional preprocessing configuration */
|
|
43
|
+
preprocessing?: PreprocessingConfig;
|
|
22
44
|
debug?: boolean;
|
|
23
45
|
}
|
|
24
46
|
/**
|
|
@@ -28,7 +50,7 @@ export interface StepResult {
|
|
|
28
50
|
step: string;
|
|
29
51
|
success: boolean;
|
|
30
52
|
data?: unknown;
|
|
31
|
-
error?:
|
|
53
|
+
error?: unknown;
|
|
32
54
|
duration?: number;
|
|
33
55
|
}
|
|
34
56
|
/**
|
|
@@ -44,6 +66,7 @@ export interface PipelineResult {
|
|
|
44
66
|
field?: string;
|
|
45
67
|
message: string;
|
|
46
68
|
code: string;
|
|
69
|
+
details?: Record<string, unknown>;
|
|
47
70
|
}>;
|
|
48
71
|
steps?: StepResult[];
|
|
49
72
|
metadata: {
|
package/dist/core/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,6CAA6C;IAC7C,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,0FAA0F;IAC1F,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,+EAA+E;IAC/E,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,iEAAiE;IACjE,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,mEAAmE;IACnE,SAAS,CAAC,EAAE,OAAO,GAAG,gBAAgB,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,2CAA2C;IAC3C,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3C,cAAc,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACV,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACrC,CAAC,CAAC;IACH,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;IACrB,QAAQ,EAAE;QACN,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;CACL"}
|
package/dist/core/validator.d.ts
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { Schema } from '../schemas/types.js';
|
|
5
5
|
export interface ValidationError {
|
|
6
|
-
field
|
|
6
|
+
field?: string;
|
|
7
7
|
message: string;
|
|
8
8
|
code: string;
|
|
9
9
|
value?: unknown;
|
|
10
|
+
expected?: unknown;
|
|
11
|
+
actual?: unknown;
|
|
10
12
|
}
|
|
11
13
|
export interface ValidationResult {
|
|
12
14
|
valid: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAmB,MAAM,qBAAqB,CAAC;AAGnE,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAmB,MAAM,qBAAqB,CAAC;AAGnE,MAAM,WAAW,eAAe;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC7B,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACjC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,MAAM,EAAE,MAAM,GACf,gBAAgB,CA4BlB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAiB,kBAAkB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAiB,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAgBhE;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACjC,IAA6B,EAC7B,MAAc;IAEd,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,wBAAwB;IACxB,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QAChE,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QAE9B,4BAA4B;QAC5B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,SAAS,cAAc;oBACnD,IAAI,EAAE,kBAAkB,CAAC,aAAa;iBACzC,CAAC,CAAC;YACP,CAAC;YACD,SAAS;QACb,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,GAAG,aAAa,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;IAChC,CAAC;IAED,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;QAC1B,MAAM;KACT,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAClB,SAAiB,EACjB,KAAc,EACd,QAAyB;IAEzB,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,QAAQ,QAAQ,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,wBAAwB;gBACxB,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,qBAAqB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,KAAK,EAAE;wBAC1F,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,2BAA2B;gBAC3B,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;oBACnB,IAAI,CAAC;wBACD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;wBAC3C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;4BACrB,MAAM,CAAC,IAAI,CAAC;gCACR,KAAK,EAAE,SAAS;gCAChB,OAAO,EAAE,UAAU,SAAS,6BAA6B,QAAQ,CAAC,OAAO,EAAE;gCAC3E,IAAI,EAAE,kBAAkB,CAAC,aAAa;gCACtC,KAAK;6BACR,CAAC,CAAC;wBACP,CAAC;oBACL,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACT,kEAAkE;oBACtE,CAAC;gBACL,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5C,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,SAAS;YACV,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,sBAAsB;oBAClD,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,4BAA4B;gBAC5B,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,sBAAsB,QAAQ,CAAC,GAAG,EAAE;wBAChE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,qBAAqB,QAAQ,CAAC,GAAG,EAAE;wBAC/D,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,SAAS;YACV,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,4BAA4B,OAAO,KAAK,EAAE;oBACtE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;YACD,MAAM;IACd,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -5,7 +5,10 @@
|
|
|
5
5
|
export { ExtractionPipeline, extract } from './core/pipeline.js';
|
|
6
6
|
export { validateExtractedData } from './core/validator.js';
|
|
7
7
|
export { PipelineError, PipelineErrorCodes } from './core/errors.js';
|
|
8
|
-
export
|
|
8
|
+
export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './core/preprocessor.js';
|
|
9
|
+
export { formatError, formatValidationError, formatValidationErrors, formatLLMError, } from './core/error-formatter.js';
|
|
10
|
+
export type { PreprocessResult } from './core/preprocessor.js';
|
|
11
|
+
export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, HtmlStripOptions, PreprocessingConfig, } from './core/types.js';
|
|
9
12
|
export { loadSchema, parseSchema, loadSchemaFromObject } from './schemas/loader.js';
|
|
10
13
|
export { validateSchema } from './schemas/validator.js';
|
|
11
14
|
export { SchemaValidationError, ErrorCodes as SchemaErrorCodes } from './schemas/errors.js';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACH,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,cAAc,GACjB,MAAM,2BAA2B,CAAC;AACnC,YAAY,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC/D,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5F,YAAY,EACR,MAAM,EACN,eAAe,EACf,SAAS,EACT,eAAe,EACf,gBAAgB,EACnB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACtE,YAAY,EACR,SAAS,EACT,WAAW,EACX,WAAW,EACX,UAAU,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EAClB,WAAW,GACd,MAAM,gBAAgB,CAAC;AACxB,YAAY,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
export { ExtractionPipeline, extract } from './core/pipeline.js';
|
|
7
7
|
export { validateExtractedData } from './core/validator.js';
|
|
8
8
|
export { PipelineError, PipelineErrorCodes } from './core/errors.js';
|
|
9
|
+
export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './core/preprocessor.js';
|
|
10
|
+
export { formatError, formatValidationError, formatValidationErrors, formatLLMError, } from './core/error-formatter.js';
|
|
9
11
|
// Schema exports
|
|
10
12
|
export { loadSchema, parseSchema, loadSchemaFromObject } from './schemas/loader.js';
|
|
11
13
|
export { validateSchema } from './schemas/validator.js';
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wBAAwB;AACxB,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wBAAwB;AACxB,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACH,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,cAAc,GACjB,MAAM,2BAA2B,CAAC;AAWnC,iBAAiB;AACjB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAS5F,cAAc;AACd,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC"}
|
package/dist/llm/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACR,SAAS,EACT,UAAU,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EACrB,MAAM,YAAY,CAAC;AAKpB,qBAAa,SAAS;IAClB,OAAO,CAAC,MAAM,CAIZ;IACF,OAAO,CAAC,WAAW,CAKjB;IACF,OAAO,CAAC,YAAY,CAAe;gBAEvB,MAAM,EAAE,SAAS;IAoB7B;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACR,SAAS,EACT,UAAU,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EACrB,MAAM,YAAY,CAAC;AAKpB,qBAAa,SAAS;IAClB,OAAO,CAAC,MAAM,CAIZ;IACF,OAAO,CAAC,WAAW,CAKjB;IACF,OAAO,CAAC,YAAY,CAAe;gBAEvB,MAAM,EAAE,SAAS;IAoB7B;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAyDtE;;OAEG;YACW,aAAa;IAiC3B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAWxB;;OAEG;IACH,OAAO,CAAC,cAAc;IAoBtB;;OAEG;IACH,OAAO,CAAC,KAAK;IAIb;;OAEG;IACG,IAAI,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAgDrD;;OAEG;YACW,mBAAmB;IAyCjC;;OAEG;IACH,OAAO,CAAC,uBAAuB;CAsDlC;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,CAE5D;AAED;;GAEG;AACH,eAAO,MAAM,UAAU;qBACH,MAAM,KAAc,SAAS;uBAK3B,MAAM,KAAmB,SAAS;qBAKnC,MAAM,UAAS,MAAM,KAAmB,SAAS;yBAM7C,MAAM,UAAS,MAAM,KAA0B,SAAS;CAKhF,CAAC"}
|
package/dist/llm/client.js
CHANGED
|
@@ -38,8 +38,12 @@ export class LLMClient {
|
|
|
38
38
|
// Check token budget
|
|
39
39
|
const usage = this.tokenCounter.calculateUsage(system, user);
|
|
40
40
|
// Debug logging if enabled
|
|
41
|
-
if (this.config.debugTokens) {
|
|
42
|
-
console.error('[Token Usage
|
|
41
|
+
if (this.config.debug || this.config.debugTokens) {
|
|
42
|
+
console.error('[DEBUG] Token Usage:\n' + this.tokenCounter.formatUsage(usage));
|
|
43
|
+
}
|
|
44
|
+
if (this.config.debug) {
|
|
45
|
+
console.error('[DEBUG] System Prompt:\n' + system);
|
|
46
|
+
console.error('[DEBUG] User Prompt:\n' + user.substring(0, 500) + (user.length > 500 ? '...' : ''));
|
|
43
47
|
}
|
|
44
48
|
// Error if over limit
|
|
45
49
|
if (this.tokenCounter.exceedsLimit(usage)) {
|
|
@@ -61,6 +65,9 @@ export class LLMClient {
|
|
|
61
65
|
};
|
|
62
66
|
// Call API with retries
|
|
63
67
|
const response = await this.chatWithRetry(request);
|
|
68
|
+
if (this.config.debug) {
|
|
69
|
+
console.error('[DEBUG] LLM Response:\n' + JSON.stringify(response, null, 2));
|
|
70
|
+
}
|
|
64
71
|
// Parse response
|
|
65
72
|
return this.parseExtractionResponse(response);
|
|
66
73
|
}
|
|
@@ -175,7 +182,20 @@ export class LLMClient {
|
|
|
175
182
|
const details = {};
|
|
176
183
|
try {
|
|
177
184
|
const errorData = await response.json();
|
|
178
|
-
|
|
185
|
+
const rawMessage = errorData.error?.message || errorMessage;
|
|
186
|
+
errorMessage = rawMessage;
|
|
187
|
+
// Detect context window/token limit errors from various providers
|
|
188
|
+
const msgLower = rawMessage.toLowerCase();
|
|
189
|
+
if (msgLower.includes('context length') ||
|
|
190
|
+
msgLower.includes('maximum context') ||
|
|
191
|
+
msgLower.includes('context_length_exceeded') ||
|
|
192
|
+
msgLower.includes('token limit') ||
|
|
193
|
+
msgLower.includes('too many tokens') ||
|
|
194
|
+
msgLower.includes('prompt is too long') ||
|
|
195
|
+
(msgLower.includes('context') && msgLower.includes('exceed'))) {
|
|
196
|
+
errorCode = LLMErrorCodes.TOKEN_LIMIT_EXCEEDED;
|
|
197
|
+
details.originalMessage = rawMessage;
|
|
198
|
+
}
|
|
179
199
|
}
|
|
180
200
|
catch {
|
|
181
201
|
// If we can't parse error JSON, use status text
|
package/dist/llm/client.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAqB,MAAM,aAAa,CAAC;AACzE,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,MAAM,OAAO,SAAS;IACV,MAAM,CAIZ;IACM,WAAW,CAKjB;IACM,YAAY,CAAe;IAEnC,YAAY,MAAiB;QACzB,IAAI,CAAC,MAAM,GAAG;YACV,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,MAAM,EAAE,sCAAsC;YACvD,GAAG,MAAM;SACZ,CAAC;QACF,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,OAAO,IAAI;YACjC,UAAU,EAAE,CAAC;YACb,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,KAAK;YACf,aAAa,EAAE,CAAC;SACnB,CAAC;QACF,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC;YACjC,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;YACzC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,aAAa,EAAE,MAAM,CAAC,aAAa;SACtC,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,OAA0B;QACpC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QAEhD,gBAAgB;QAChB,MAAM,MAAM,GAAG,YAAY,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAEpC,qBAAqB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAE7D,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/llm/client.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAqB,MAAM,aAAa,CAAC;AACzE,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,MAAM,OAAO,SAAS;IACV,MAAM,CAIZ;IACM,WAAW,CAKjB;IACM,YAAY,CAAe;IAEnC,YAAY,MAAiB;QACzB,IAAI,CAAC,MAAM,GAAG;YACV,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,MAAM,EAAE,sCAAsC;YACvD,GAAG,MAAM;SACZ,CAAC;QACF,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,OAAO,IAAI;YACjC,UAAU,EAAE,CAAC;YACb,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,KAAK;YACf,aAAa,EAAE,CAAC;SACnB,CAAC;QACF,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC;YACjC,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;YACzC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,aAAa,EAAE,MAAM,CAAC,aAAa;SACtC,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,OAA0B;QACpC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QAEhD,gBAAgB;QAChB,MAAM,MAAM,GAAG,YAAY,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAEpC,qBAAqB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAE7D,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC/C,OAAO,CAAC,KAAK,CAAC,wBAAwB,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;QACnF,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,0BAA0B,GAAG,MAAM,CAAC,CAAC;YACnD,OAAO,CAAC,KAAK,CAAC,wBAAwB,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACxG,CAAC;QAED,sBAAsB;QACtB,IAAI,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;YACxC,MAAM,IAAI,QAAQ,CACd,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,KAAK,CAAC,EACxC,aAAa,CAAC,oBAAoB,EAClC,SAAS,EACT,EAAE,KAAK,EAAE,CACZ,CAAC;QACN,CAAC;QAED,4BAA4B;QAC5B,IAAI,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,CAAC;QAC7D,CAAC;QAED,iBAAiB;QACjB,MAAM,OAAO,GAAe;YACxB,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE;gBACN,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;gBACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;aAClC;YACD,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;SACpC,CAAC;QAEF,wBAAwB;QACxB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAEnD,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,yBAAyB,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACjF,CAAC;QAED,iBAAiB;QACjB,OAAO,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CAAC,OAAmB;QAC3C,IAAI,SAAS,GAAoB,IAAI,CAAC;QACtC,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,OAAO,OAAO,IAAI,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,CAAC;YAC5C,IAAI,CAAC;gBACD,OAAO,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,SAAS,GAAG,KAAiB,CAAC;gBAE9B,sCAAsC;gBACtC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,EAAE,CAAC;oBACpC,MAAM,SAAS,CAAC;gBACpB,CAAC;gBAED,4BAA4B;gBAC5B,IAAI,OAAO,KAAK,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,CAAC;oBAC1C,MAAM,SAAS,CAAC;gBACpB,CAAC;gBAED,sDAAsD;gBACtD,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;gBAEtD,oBAAoB;gBACpB,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;gBAExB,OAAO,EAAE,CAAC;YACd,CAAC;QACL,CAAC;QAED,MAAM,SAAU,CAAC;IACrB,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,KAAe;QACpC,MAAM,cAAc,GAAmB;YACnC,aAAa,CAAC,aAAa;YAC3B,aAAa,CAAC,OAAO;YACrB,aAAa,CAAC,UAAU;YACxB,aAAa,CAAC,SAAS;SAC1B,CAAC;QACF,iCAAiC;QACjC,OAAO,cAAc,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,OAAe,EAAE,KAAe;QACnD,oDAAoD;QACpD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,CAAC,UAAU,IAAI,KAAK,CAAC,OAAO,EAAE,UAAU,EAAE,CAAC;YACvE,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;YAC3D,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC3D,CAAC;QAED,gEAAgE;QAChE,MAAM,gBAAgB,GAClB,IAAI,CAAC,WAAW,CAAC,YAAY;YAC7B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAEtD,qCAAqC;QACrC,MAAM,MAAM,GAAG,gBAAgB,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvD,MAAM,eAAe,GAAG,gBAAgB,GAAG,MAAM,CAAC;QAElD,kBAAkB;QAClB,OAAO,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IAChE,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,EAAU;QACpB,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,OAAmB;QAC1B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,mBAAmB,CAAC;QAEtD,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE5E,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC9B,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACL,cAAc,EAAE,kBAAkB;oBAClC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;iBACjF;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;gBAC7B,MAAM,EAAE,UAAU,CAAC,MAAM;aAC5B,CAAC,CAAC;YAEH,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACf,MAAM,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;YAC7C,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,OAAO,IAAmB,CAAC;QAC/B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,YAAY,CAAC,SAAS,CAAC,CAAC;YAExB,IAAI,KAAK,YAAY,QAAQ,EAAE,CAAC;gBAC5B,MAAM,KAAK,CAAC;YAChB,CAAC;YAED,IAAK,KAAe,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACzC,MAAM,IAAI,QAAQ,CACd,yBAAyB,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,EAChD,aAAa,CAAC,OAAO,CACxB,CAAC;YACN,CAAC;YAED,MAAM,IAAI,QAAQ,CACd,kBAAmB,KAAe,CAAC,OAAO,EAAE,EAC5C,aAAa,CAAC,aAAa,EAC3B,SAAS,EACT,EAAE,aAAa,EAAG,KAAe,CAAC,OAAO,EAAE,CAC9C,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,mBAAmB,CAAC,QAAkB;QAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC/B,IAAI,YAAY,GAAG,cAAc,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;QACjE,IAAI,SAAS,GAAiB,aAAa,CAAC,SAAS,CAAC;QACtD,MAAM,OAAO,GAA4B,EAAE,CAAC;QAE5C,IAAI,CAAC;YACD,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAqD,CAAC;YAC3F,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,EAAE,OAAO,IAAI,YAAY,CAAC;YAC5D,YAAY,GAAG,UAAU,CAAC;YAE1B,kEAAkE;YAClE,MAAM,QAAQ,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC;YAC1C,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC;gBACnC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC;gBACpC,QAAQ,CAAC,QAAQ,CAAC,yBAAyB,CAAC;gBAC5C,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAChC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC;gBACpC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,SAAS,GAAG,aAAa,CAAC,oBAAoB,CAAC;gBAC/C,OAAO,CAAC,eAAe,GAAG,UAAU,CAAC;YACzC,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACL,gDAAgD;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACnC,SAAS,GAAG,aAAa,CAAC,oBAAoB,CAAC;QACnD,CAAC;aAAM,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACxB,SAAS,GAAG,aAAa,CAAC,UAAU,CAAC;YACrC,wCAAwC;YACxC,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YACvD,IAAI,UAAU,EAAE,CAAC;gBACb,OAAO,CAAC,UAAU,GAAG,UAAU,CAAC;YACpC,CAAC;QACL,CAAC;QAED,MAAM,IAAI,QAAQ,CAAC,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IACjE,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,QAAqB;QACjD,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrD,MAAM,IAAI,QAAQ,CACd,4BAA4B,EAC5B,aAAa,CAAC,gBAAgB,CACjC,CAAC;QACN,CAAC;QAED,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;QAC5C,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC/B,MAAM,IAAI,QAAQ,CACd,4BAA4B,EAC5B,aAAa,CAAC,gBAAgB,CACjC,CAAC;QACN,CAAC;QAED,IAAI,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAErC,yCAAyC;QACzC,IAAI,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACxE,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEnC,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAClD,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;YACpD,CAAC;YAED,IAAI,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;gBACxC,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YACzD,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,iBAAiB,IAAI,OAAO,MAAM,CAAC,iBAAiB,KAAK,QAAQ,EAAE,CAAC;gBAC5E,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YACjE,CAAC;YAED,OAAO;gBACH,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;aAC9C,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,IAAI,QAAQ,CACd,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,aAAa,CAAC,gBAAgB,EAC9B,SAAS,EACT,EAAE,OAAO,EAAE,KAAK,EAAG,KAAe,CAAC,OAAO,EAAE,CAC/C,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,MAAiB;IAC7C,OAAO,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,UAAU,GAAG;IACtB,MAAM,EAAE,CAAC,QAAgB,QAAQ,EAAa,EAAE,CAAC,CAAC;QAC9C,OAAO,EAAE,2BAA2B;QACpC,KAAK;KACR,CAAC;IAEF,QAAQ,EAAE,CAAC,QAAgB,aAAa,EAAa,EAAE,CAAC,CAAC;QACrD,OAAO,EAAE,0BAA0B;QACnC,KAAK;KACR,CAAC;IAEF,MAAM,EAAE,CAAC,MAAc,EAAE,QAAgB,aAAa,EAAa,EAAE,CAAC,CAAC;QACnE,OAAO,EAAE,2BAA2B;QACpC,MAAM;QACN,KAAK;KACR,CAAC;IAEF,UAAU,EAAE,CAAC,MAAc,EAAE,QAAgB,oBAAoB,EAAa,EAAE,CAAC,CAAC;QAC9E,OAAO,EAAE,8BAA8B;QACvC,MAAM;QACN,KAAK;KACR,CAAC;CACL,CAAC"}
|
package/dist/llm/types.d.ts
CHANGED
|
@@ -41,6 +41,8 @@ export interface LLMConfig {
|
|
|
41
41
|
warnThreshold?: number;
|
|
42
42
|
/** Enable debug logging for token usage */
|
|
43
43
|
debugTokens?: boolean;
|
|
44
|
+
/** Enable verbose debug output (shows full request/response) */
|
|
45
|
+
debug?: boolean;
|
|
44
46
|
}
|
|
45
47
|
/**
|
|
46
48
|
* Message in chat completion format
|
package/dist/llm/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/llm/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAElD;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,6CAA6C;IAC7C,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kEAAkE;IAClE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,2CAA2C;IAC3C,WAAW,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/llm/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAElD;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,6CAA6C;IAC7C,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kEAAkE;IAClE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,2CAA2C;IAC3C,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,gEAAgE;IAChE,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,WAAW,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;KACzB,CAAC,CAAC;IACH,KAAK,CAAC,EAAE;QACJ,aAAa,EAAE,MAAM,CAAC;QACtB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,YAAY,EAAE,MAAM,CAAC;KACxB,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB"}
|