@ordis-dev/ordis 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -38,7 +38,7 @@ ordis extract \
38
38
  "invoice_id": { "type": "string" },
39
39
  "amount": { "type": "number" },
40
40
  "currency": { "type": "string", "enum": ["USD", "SGD", "EUR"] },
41
- "date": { "type": "date", "optional": true }
41
+ "date": { "type": "string", "format": "date-time", "optional": true }
42
42
  }
43
43
  }
44
44
  ```
@@ -97,6 +97,17 @@ ordis extract \
97
97
  --debug
98
98
  ```
99
99
 
100
+ **With API key** (for providers like OpenAI, Deepseek, etc.):
101
+
102
+ ```bash
103
+ ordis extract \
104
+ --schema examples/invoice.schema.json \
105
+ --input examples/invoice.txt \
106
+ --base https://api.deepseek.com/v1 \
107
+ --model deepseek-chat \
108
+ --api-key your-api-key-here
109
+ ```
110
+
100
111
  ### Programmatic Usage
101
112
 
102
113
  Use ordis as a library in your Node.js application:
@@ -179,9 +190,19 @@ npm run benchmark
179
190
  ```
180
191
 
181
192
  ## Roadmap
182
- Smart input truncation ([#40](https://github.com/ordis-dev/ordis/issues/40))
193
+
194
+ **Completed in v0.1.0:**
195
+ - ✅ Core extraction pipeline with schema validation
196
+ - ✅ Token budget awareness and management
197
+ - ✅ Confidence scoring for extracted data
198
+ - ✅ Programmatic API for library usage
199
+ - ✅ CLI tool with debug mode
200
+ - ✅ Comprehensive test suite and benchmarks
201
+ - ✅ Support for any OpenAI-compatible API
202
+
203
+ **Upcoming:**
204
+ - [ ] Smart input truncation ([#40](https://github.com/ordis-dev/ordis/issues/40))
183
205
  - [ ] Multi-pass extraction for large inputs ([#41](https://github.com/ordis-dev/ordis/issues/41))
184
- - [ ]
185
206
  - [ ] Config file support ([#16](https://github.com/ordis-dev/ordis/issues/16))
186
207
  - [ ] Output formatting options ([#14](https://github.com/ordis-dev/ordis/issues/14))
187
208
  - [ ] Batch extraction ([#19](https://github.com/ordis-dev/ordis/issues/19))
package/dist/cli.js CHANGED
@@ -3,8 +3,9 @@
3
3
  * Ordis - Schema-first extraction tool
4
4
  * CLI entrypoint
5
5
  */
6
- import * as fs from 'fs/promises';
7
- import * as path from 'path';
6
+ import process from 'node:process';
7
+ import * as fs from 'node:fs/promises';
8
+ import * as path from 'node:path';
8
9
  import { loadSchema } from './schemas/loader.js';
9
10
  import { extract } from './core/pipeline.js';
10
11
  import packageJson from '../package.json' with { type: 'json' };
@@ -36,6 +37,9 @@ function parseArgs(args) {
36
37
  else if (arg === '--model' && args[i + 1]) {
37
38
  parsed.model = args[++i];
38
39
  }
40
+ else if (arg === '--api-key' && args[i + 1]) {
41
+ parsed.apiKey = args[++i];
42
+ }
39
43
  else if (!arg.startsWith('--')) {
40
44
  parsed.command = arg;
41
45
  }
@@ -54,6 +58,7 @@ OPTIONS:
54
58
  --input <path> Path to input text file
55
59
  --base <url> Base URL for OpenAI-compatible API
56
60
  --model <name> Model name to use for extraction
61
+ --api-key <key> API key for the LLM provider (optional)
57
62
  --debug Enable verbose debug output
58
63
  --version, -v Show version number
59
64
  --help, -h Show this help message
@@ -124,6 +129,7 @@ async function runExtraction(args) {
124
129
  const llmConfig = {
125
130
  baseURL: args.base,
126
131
  model: args.model,
132
+ ...(args.apiKey && { apiKey: args.apiKey }),
127
133
  };
128
134
  if (args.debug) {
129
135
  console.log('[DEBUG] LLM config:', {
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,OAAO,WAAW,MAAM,iBAAiB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAWhE,SAAS,SAAS,CAAC,IAAc;IAC7B,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEpB,IAAI,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACnC,QAAQ,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,GAAG,KAAK,WAAW,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACtC,WAAW,EAAE,CAAC;YACd,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;YACpB,SAAS;QACb,CAAC;QAED,IAAI,GAAG,KAAK,UAAU,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;aAAM,IAAI,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,CAAC;aAAM,IAAI,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC;QACzB,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,SAAS,QAAQ;IACb,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2Bf,CAAC,CAAC;AACH,CAAC;AAED,SAAS,WAAW;IAChB,OAAO,CAAC,GAAG,CAAC,UAAU,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;AACjD,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,IAAa;IACtC,8BAA8B;IAC9B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC3C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC;QACD,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,gCAAgC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,CAAC;QAE5C,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,qCAAqC,EAAE;gBAC/C,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE,IAAI;gBAC3B,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClC,mBAAmB,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS;aACpD,CAAC,CAAC;QACP,CAAC;QAED,0BAA0B;QAC1B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAExD,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,yBAAyB,SAAS,CAAC,MAAM,aAAa,CAAC,CAAC;QACxE,CAAC;QAED,4BAA4B;QAC5B,MAAM,SAAS,GAAc;YACzB,OAAO,EAAE,IAAI,CAAC,IAAI;YAClB,KAAK,EAAE,IAAI,CAAC,KAAK;SACpB,CAAC;QAEF,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE;gBAC/B,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,KAAK,EAAE,SAAS,CAAC,KAAK;aACzB,CAAC,CAAC;QACP,CAAC;QAED,yBAAyB;QACzB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YACzB,KAAK,EAAE,SAAS;YAChB,MAAM;YACN,SAAS;YACT,KAAK,EAAE,IAAI,CAAC,KAAK;SACpB,CAAC,CAAC;QAEH,yBAAyB;QACzB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE;gBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM;aACnC,CAAC,CAAC;QACP,CAAC;QAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,uCAAuC;YACvC,MAAM,MAAM,GAAG;gBACX,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;gBAC3C,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;aAC5B,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACJ,6BAA6B;YAC7B,MAAM,MAAM,GAAG;gBACX,OAAO,EAAE,KAAK;gBACd,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,iBAAiB;gBACpC,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;aAC5B,CAAC;YAEF,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,2BAA2B;QAC3B,IAAI,IAAI,CAAC,KAAK,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,WAAW,GAAG;YAChB,OAAO,EAAE,KAAK;YACd,MAAM,EAAE;gBACJ;oBACI,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC/D,IAAI,EAAG,KAAa,CAAC,IAAI,IAAI,eAAe;iBAC/C;aACJ;SACJ,CAAC;QAEF,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC;AAED,KAAK,UAAU,IAAI;IACf,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAErC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,oEAAoE,CAAC,CAAC;QACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,wCAAwC,EAAE;gBAClD,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,KAAK,EAAE,IAAI,CAAC,KAAK;aACpB,CAAC,CAAC;QACP,CAAC;QAED,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;SAAM,CAAC;QACJ,OAAO,CAAC,KAAK,CAAC,2BAA2B,IAAI,CAAC,OAAO,kCAAkC,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,OAAO,OAAO,MAAM,cAAc,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACvC,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,OAAO,WAAW,MAAM,iBAAiB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAYhE,SAAS,SAAS,CAAC,IAAc;IAC7B,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEpB,IAAI,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACnC,QAAQ,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,GAAG,KAAK,WAAW,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACtC,WAAW,EAAE,CAAC;YACd,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC;YACpB,SAAS;QACb,CAAC;QAED,IAAI,GAAG,KAAK,UAAU,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;aAAM,IAAI,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,CAAC;aAAM,IAAI,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,GAAG,KAAK,WAAW,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC5C,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;aAAM,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC;QACzB,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,SAAS,QAAQ;IACb,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4Bf,CAAC,CAAC;AACH,CAAC;AAED,SAAS,WAAW;IAChB,OAAO,CAAC,GAAG,CAAC,UAAU,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;AACjD,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,IAAa;IACtC,8BAA8B;IAC9B,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC3C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,iFAAiF,CAAC,CAAC;QACjG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,CAAC;QACD,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,gCAAgC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,CAAC;QAE5C,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,qCAAqC,EAAE;gBAC/C,IAAI,EAAE,MAAM,CAAC,QAAQ,EAAE,IAAI;gBAC3B,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;gBAClC,mBAAmB,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS;aACpD,CAAC,CAAC;QACP,CAAC;QAED,0BAA0B;QAC1B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAExD,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,yBAAyB,SAAS,CAAC,MAAM,aAAa,CAAC,CAAC;QACxE,CAAC;QAED,4BAA4B;QAC5B,MAAM,SAAS,GAAc;YACzB,OAAO,EAAE,IAAI,CAAC,IAAI;YAClB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;SAC9C,CAAC;QAEF,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE;gBAC/B,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,KAAK,EAAE,SAAS,CAAC,KAAK;aACzB,CAAC,CAAC;QACP,CAAC;QAED,yBAAyB;QACzB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;YACzB,KAAK,EAAE,SAAS;YAChB,MAAM;YACN,SAAS;YACT,KAAK,EAAE,IAAI,CAAC,KAAK;SACpB,CAAC,CAAC;QAEH,yBAAyB;QACzB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE;gBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM;aACnC,CAAC,CAAC;QACP,CAAC;QAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,uCAAuC;YACvC,MAAM,MAAM,GAAG;gBACX,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;gBAC3C,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;aAC5B,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACJ,6BAA6B;YAC7B,MAAM,MAAM,GAAG;gBACX,OAAO,EAAE,KAAK;gBACd,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,iBAAiB;gBACpC,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;aAC5B,CAAC;YAEF,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,2BAA2B;QAC3B,IAAI,IAAI,CAAC,KAAK,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,WAAW,GAAG;YAChB,OAAO,EAAE,KAAK;YACd,MAAM,EAAE;gBACJ;oBACI,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC/D,IAAI,EAAG,KAAa,CAAC,IAAI,IAAI,eAAe;iBAC/C;aACJ;SACJ,CAAC;QAEF,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC;AAED,KAAK,UAAU,IAAI;IACf,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAErC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,oEAAoE,CAAC,CAAC;QACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,wCAAwC,EAAE;gBAClD,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,KAAK,EAAE,IAAI,CAAC,KAAK;aACpB,CAAC,CAAC;QACP,CAAC;QAED,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;SAAM,CAAC;QACJ,OAAO,CAAC,KAAK,CAAC,2BAA2B,IAAI,CAAC,OAAO,kCAAkC,CAAC,CAAC;QACzF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC"}
@@ -4,5 +4,7 @@
4
4
  export { ExtractionPipeline, extract } from './pipeline.js';
5
5
  export { validateExtractedData } from './validator.js';
6
6
  export { PipelineError, PipelineErrorCodes } from './errors.js';
7
- export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, } from './types.js';
7
+ export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './preprocessor.js';
8
+ export type { PreprocessResult } from './preprocessor.js';
9
+ export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, HtmlStripOptions, PreprocessingConfig, } from './types.js';
8
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,GACb,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC;AAC3B,YAAY,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,YAAY,CAAC"}
@@ -4,4 +4,5 @@
4
4
  export { ExtractionPipeline, extract } from './pipeline.js';
5
5
  export { validateExtractedData } from './validator.js';
6
6
  export { PipelineError, PipelineErrorCodes } from './errors.js';
7
+ export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './preprocessor.js';
7
8
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EAAE,iBAAiB,EAAE,cAAc,EAAc,MAAM,YAAY,CAAC;AAEhF;;GAEG;AACH,qBAAa,kBAAkB;IAC3B,OAAO,CAAC,KAAK,CAAU;gBAEX,KAAK,GAAE,OAAe;IAIlC;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IAsKlE;;OAEG;IACH,OAAO,CAAC,UAAU;IAsBlB;;OAEG;YACW,eAAe;CAqBhC;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC,CAGjF"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH,OAAO,KAAK,EAAE,iBAAiB,EAAE,cAAc,EAAc,MAAM,YAAY,CAAC;AAEhF;;GAEG;AACH,qBAAa,kBAAkB;IAC3B,OAAO,CAAC,KAAK,CAAU;gBAEX,KAAK,GAAE,OAAe;IAIlC;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IAoLlE;;OAEG;IACH,OAAO,CAAC,UAAU;IAsBlB;;OAEG;YACW,eAAe;CAqBhC;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC,CAGjF"}
@@ -4,6 +4,7 @@
4
4
  import { LLMClient } from '../llm/client.js';
5
5
  import { validateExtractedData } from './validator.js';
6
6
  import { PipelineError, PipelineErrorCodes } from './errors.js';
7
+ import { preprocessWithDetails } from './preprocessor.js';
7
8
  /**
8
9
  * Main extraction pipeline
9
10
  */
@@ -19,6 +20,18 @@ export class ExtractionPipeline {
19
20
  const startTime = Date.now();
20
21
  const steps = [];
21
22
  try {
23
+ // Step 0: Preprocess input (if configured)
24
+ let processedInput = request.input;
25
+ if (request.preprocessing) {
26
+ const preprocessStep = this.recordStep('preprocess', () => {
27
+ return preprocessWithDetails(request.input, request.preprocessing);
28
+ });
29
+ steps.push(preprocessStep);
30
+ if (preprocessStep.success && preprocessStep.data) {
31
+ const result = preprocessStep.data;
32
+ processedInput = result.text;
33
+ }
34
+ }
22
35
  // Step 1: Create LLM client
23
36
  const clientStep = this.recordStep('create_client', () => {
24
37
  return new LLMClient(request.llmConfig);
@@ -32,7 +45,7 @@ export class ExtractionPipeline {
32
45
  const extractStep = await this.recordStepAsync('llm_extract', async () => {
33
46
  return await client.extract({
34
47
  schema: request.schema,
35
- input: request.input,
48
+ input: processedInput,
36
49
  });
37
50
  });
38
51
  steps.push(extractStep);
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAGhE;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACnB,KAAK,CAAU;IAEvB,YAAY,QAAiB,KAAK;QAC9B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,OAA0B;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC;YACD,4BAA4B;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACrD,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAC5C,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEvB,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtB,MAAM,IAAI,aAAa,CACnB,6BAA6B,EAC7B,kBAAkB,CAAC,SAAS,EAC5B,eAAe,CAClB,CAAC;YACN,CAAC;YAED,MAAM,MAAM,GAAG,UAAU,CAAC,IAAiB,CAAC;YAE5C,kCAAkC;YAClC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,KAAK,IAAI,EAAE;gBACrE,OAAO,MAAM,MAAM,CAAC,OAAO,CAAC;oBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;oBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;iBACvB,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAExB,IAAI,CAAC,WAAW,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;gBAC5C,MAAM,IAAI,aAAa,CACnB,uBAAuB,EACvB,kBAAkB,CAAC,SAAS,EAC5B,aAAa,EACb,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAC/B,CAAC;YACN,CAAC;YAED,MAAM,UAAU,GAAG,WAAW,CAAC,IAI9B,CAAC;YAEF,kCAAkC;YAClC,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACvD,OAAO,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClE,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAEzB,MAAM,UAAU,GAAG,YAAY,CAAC,IAA2F,CAAC;YAE5H,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE,UAAU,CAAC,MAAM;oBACzB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,qCAAqC;YACrC,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,EAAE,GAAG,EAAE;gBAC5D,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;oBAC7B,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;gBACpC,CAAC;gBAED,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC;gBACrE,MAAM,cAAc,GAAG,UAAU,CAAC,UAAU,IAAI,SAAS,CAAC;gBAE1D,OAAO;oBACH,cAAc;oBACd,UAAU,EAAE,CAAC,cAAc,IAAI,mBAAmB;iBACrD,CAAC;YACN,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAE3B,MAAM,eAAe,GAAG,cAAc,CAAC,IAAyD,CAAC;YAEjG,IAAI,eAAe,CAAC,UAAU,EAAE,CAAC;gBAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,IAAI,EAAE,UAAU,CAAC,IAAI;oBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;oBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;oBAC/C,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,cAAc,UAAU,CAAC,UAAU,qBAAqB,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,SAAS,GAAG;4BACxG,IAAI,EAAE,kBAAkB,CAAC,gBAAgB;yBAC5C;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,WAAW;YACX,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,UAAU,CAAC,IAAI;gBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;gBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;gBAC/C,cAAc,EAAE,eAAe,CAAC,cAAc;gBAC9C,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAExC,IAAI,KAAK,YAAY,aAAa,EAAE,CAAC;gBACjC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,KAAK,CAAC,OAAO;4BACtB,IAAI,EAAE,KAAK,CAAC,IAAI;yBACnB;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,OAAO;gBACH,OAAO,EAAE,KAAK;gBACd,cAAc,EAAE,KAAK;gBACrB,MAAM,EAAE;oBACJ;wBACI,OAAO,EAAG,KAAe,CAAC,OAAO;wBACjC,IAAI,EAAE,eAAe;qBACxB;iBACJ;gBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,UAAU,CAAI,IAAY,EAAE,EAAW;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,EAAE,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,KAAe,CAAC,OAAO;gBAC/B,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAI,IAAY,EAAE,EAAoB;QAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,KAAe,CAAC,OAAO;gBAC/B,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAA0B;IACpD,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,OAAO,MAAM,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC"}
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAG1D;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACnB,KAAK,CAAU;IAEvB,YAAY,QAAiB,KAAK;QAC9B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,OAA0B;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAiB,EAAE,CAAC;QAE/B,IAAI,CAAC;YACD,2CAA2C;YAC3C,IAAI,cAAc,GAAG,OAAO,CAAC,KAAK,CAAC;YACnC,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBACxB,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE,GAAG,EAAE;oBACtD,OAAO,qBAAqB,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;gBACvE,CAAC,CAAC,CAAC;gBACH,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAE3B,IAAI,cAAc,CAAC,OAAO,IAAI,cAAc,CAAC,IAAI,EAAE,CAAC;oBAChD,MAAM,MAAM,GAAG,cAAc,CAAC,IAA+C,CAAC;oBAC9E,cAAc,GAAG,MAAM,CAAC,IAAI,CAAC;gBACjC,CAAC;YACL,CAAC;YAED,4BAA4B;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACrD,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAC5C,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEvB,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;gBACtB,MAAM,IAAI,aAAa,CACnB,6BAA6B,EAC7B,kBAAkB,CAAC,SAAS,EAC5B,eAAe,CAClB,CAAC;YACN,CAAC;YAED,MAAM,MAAM,GAAG,UAAU,CAAC,IAAiB,CAAC;YAE5C,kCAAkC;YAClC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,KAAK,IAAI,EAAE;gBACrE,OAAO,MAAM,MAAM,CAAC,OAAO,CAAC;oBACxB,MAAM,EAAE,OAAO,CAAC,MAAM;oBACtB,KAAK,EAAE,cAAc;iBACxB,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAExB,IAAI,CAAC,WAAW,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;gBAC5C,MAAM,IAAI,aAAa,CACnB,uBAAuB,EACvB,kBAAkB,CAAC,SAAS,EAC5B,aAAa,EACb,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE,CAC/B,CAAC;YACN,CAAC;YAED,MAAM,UAAU,GAAG,WAAW,CAAC,IAI9B,CAAC;YAEF,kCAAkC;YAClC,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE;gBACvD,OAAO,qBAAqB,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YAClE,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAEzB,MAAM,UAAU,GAAG,YAAY,CAAC,IAA2F,CAAC;YAE5H,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE,UAAU,CAAC,MAAM;oBACzB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,qCAAqC;YACrC,MAAM,cAAc,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,EAAE,GAAG,EAAE;gBAC5D,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;oBAC7B,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;gBACpC,CAAC;gBAED,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC;gBACrE,MAAM,cAAc,GAAG,UAAU,CAAC,UAAU,IAAI,SAAS,CAAC;gBAE1D,OAAO;oBACH,cAAc;oBACd,UAAU,EAAE,CAAC,cAAc,IAAI,mBAAmB;iBACrD,CAAC;YACN,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAE3B,MAAM,eAAe,GAAG,cAAc,CAAC,IAAyD,CAAC;YAEjG,IAAI,eAAe,CAAC,UAAU,EAAE,CAAC;gBAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBACxC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,IAAI,EAAE,UAAU,CAAC,IAAI;oBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;oBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;oBAC/C,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,cAAc,UAAU,CAAC,UAAU,qBAAqB,OAAO,CAAC,MAAM,CAAC,UAAU,EAAE,SAAS,GAAG;4BACxG,IAAI,EAAE,kBAAkB,CAAC,gBAAgB;yBAC5C;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,WAAW;YACX,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,UAAU,CAAC,IAAI;gBACrB,UAAU,EAAE,UAAU,CAAC,UAAU;gBACjC,iBAAiB,EAAE,UAAU,CAAC,iBAAiB;gBAC/C,cAAc,EAAE,eAAe,CAAC,cAAc;gBAC9C,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAExC,IAAI,KAAK,YAAY,aAAa,EAAE,CAAC;gBACjC,OAAO;oBACH,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,KAAK;oBACrB,MAAM,EAAE;wBACJ;4BACI,OAAO,EAAE,KAAK,CAAC,OAAO;4BACtB,IAAI,EAAE,KAAK,CAAC,IAAI;yBACnB;qBACJ;oBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBACrC,QAAQ,EAAE;wBACN,QAAQ;wBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;wBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;qBAC5C;iBACJ,CAAC;YACN,CAAC;YAED,OAAO;gBACH,OAAO,EAAE,KAAK;gBACd,cAAc,EAAE,KAAK;gBACrB,MAAM,EAAE;oBACJ;wBACI,OAAO,EAAG,KAAe,CAAC,OAAO;wBACjC,IAAI,EAAE,eAAe;qBACxB;iBACJ;gBACD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACrC,QAAQ,EAAE;oBACN,QAAQ;oBACR,KAAK,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK;oBAC9B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI;iBAC5C;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,UAAU,CAAI,IAAY,EAAE,EAAW;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,EAAE,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,KAAe,CAAC,OAAO;gBAC/B,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAI,IAAY,EAAE,EAAoB;QAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,IAAI;gBACb,IAAI,EAAE,MAAM;gBACZ,QAAQ;aACX,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACxC,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,KAAK;gBACd,KAAK,EAAG,KAAe,CAAC,OAAO;gBAC/B,QAAQ;aACX,CAAC;QACN,CAAC;IACL,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAA0B;IACpD,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACvD,OAAO,MAAM,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAC3C,CAAC"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * HTML preprocessing module
3
+ * Strips HTML tags and noise from input text before extraction
4
+ */
5
+ import type { HtmlStripOptions, PreprocessingConfig } from './types.js';
6
+ /**
7
+ * Resolves preprocessing options to concrete HtmlStripOptions
8
+ */
9
+ export declare function resolveHtmlStripOptions(config: boolean | HtmlStripOptions | undefined): HtmlStripOptions | null;
10
+ /**
11
+ * Strips HTML from input text according to options
12
+ */
13
+ export declare function stripHtml(input: string, options: HtmlStripOptions): string;
14
+ /**
15
+ * Preprocesses input text according to configuration
16
+ */
17
+ export declare function preprocess(input: string, config: PreprocessingConfig): string;
18
+ /**
19
+ * Result of preprocessing
20
+ */
21
+ export interface PreprocessResult {
22
+ /** The preprocessed text */
23
+ text: string;
24
+ /** Whether preprocessing was applied */
25
+ wasProcessed: boolean;
26
+ /** Original input length */
27
+ originalLength: number;
28
+ /** Processed text length */
29
+ processedLength: number;
30
+ }
31
+ /**
32
+ * Preprocesses input with detailed result information
33
+ */
34
+ export declare function preprocessWithDetails(input: string, config: PreprocessingConfig | undefined): PreprocessResult;
35
+ //# sourceMappingURL=preprocessor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preprocessor.d.ts","sourceRoot":"","sources":["../../src/core/preprocessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AA4CxE;;GAEG;AACH,wBAAgB,uBAAuB,CACnC,MAAM,EAAE,OAAO,GAAG,gBAAgB,GAAG,SAAS,GAC/C,gBAAgB,GAAG,IAAI,CAqBzB;AAuKD;;GAEG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,MAAM,CA2C1E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,MAAM,CAY7E;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,wCAAwC;IACxC,YAAY,EAAE,OAAO,CAAC;IACtB,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,eAAe,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACjC,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,mBAAmB,GAAG,SAAS,GACxC,gBAAgB,CAkBlB"}
@@ -0,0 +1,297 @@
1
+ /**
2
+ * HTML preprocessing module
3
+ * Strips HTML tags and noise from input text before extraction
4
+ */
5
+ import { parse, HTMLElement } from 'node-html-parser';
6
+ /**
7
+ * Default selectors to remove from HTML
8
+ * These typically contain non-content elements
9
+ */
10
+ const DEFAULT_REMOVE_SELECTORS = [
11
+ 'script',
12
+ 'style',
13
+ 'nav',
14
+ 'footer',
15
+ 'header',
16
+ 'aside',
17
+ 'noscript',
18
+ 'iframe',
19
+ 'svg',
20
+ 'canvas',
21
+ 'form',
22
+ // Common ad and tracking selectors
23
+ '[class*="ad-"]',
24
+ '[class*="advertisement"]',
25
+ '[class*="cookie"]',
26
+ '[class*="subscribe"]',
27
+ '[class*="newsletter"]',
28
+ '[class*="popup"]',
29
+ '[class*="modal"]',
30
+ '[class*="banner"]',
31
+ '[id*="ad-"]',
32
+ '[id*="advertisement"]',
33
+ '[id*="cookie"]',
34
+ ];
35
+ /**
36
+ * Elements that should preserve their semantic meaning
37
+ * when preserveStructure is enabled
38
+ */
39
+ const SEMANTIC_ELEMENTS = {
40
+ headings: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
41
+ lists: ['ul', 'ol', 'li'],
42
+ containers: ['article', 'main', 'section', 'div', 'body', 'html'],
43
+ blocks: ['p', 'blockquote'],
44
+ inline: ['strong', 'b', 'em', 'i', 'a', 'code'],
45
+ };
46
+ /**
47
+ * Resolves preprocessing options to concrete HtmlStripOptions
48
+ */
49
+ export function resolveHtmlStripOptions(config) {
50
+ if (!config) {
51
+ return null;
52
+ }
53
+ if (config === true) {
54
+ // Default options when stripHtml: true
55
+ return {
56
+ extractText: true,
57
+ preserveStructure: false,
58
+ removeSelectors: [],
59
+ maxLength: undefined,
60
+ };
61
+ }
62
+ return {
63
+ extractText: config.extractText ?? true,
64
+ preserveStructure: config.preserveStructure ?? false,
65
+ removeSelectors: config.removeSelectors ?? [],
66
+ maxLength: config.maxLength,
67
+ };
68
+ }
69
+ /**
70
+ * Removes elements matching the specified selectors
71
+ */
72
+ function removeElements(root, selectors) {
73
+ const allSelectors = [...DEFAULT_REMOVE_SELECTORS, ...selectors];
74
+ for (const selector of allSelectors) {
75
+ try {
76
+ const elements = root.querySelectorAll(selector);
77
+ for (const el of elements) {
78
+ el.remove();
79
+ }
80
+ }
81
+ catch {
82
+ // Invalid selector, skip silently
83
+ // This can happen with complex CSS selectors not supported by node-html-parser
84
+ }
85
+ }
86
+ }
87
+ /**
88
+ * Converts semantic HTML elements to markdown-like text
89
+ */
90
+ function convertToStructuredText(root) {
91
+ const lines = [];
92
+ function processNode(node, depth = 0) {
93
+ if (!node)
94
+ return;
95
+ const tagName = node.tagName?.toLowerCase() || '';
96
+ // Handle headings
97
+ if (SEMANTIC_ELEMENTS.headings.includes(tagName)) {
98
+ const level = parseInt(tagName[1], 10);
99
+ const prefix = '#'.repeat(level) + ' ';
100
+ const text = node.text.trim();
101
+ if (text) {
102
+ lines.push('');
103
+ lines.push(prefix + text);
104
+ lines.push('');
105
+ }
106
+ return;
107
+ }
108
+ // Handle list items
109
+ if (tagName === 'li') {
110
+ const parent = node.parentNode;
111
+ const parentTag = parent?.tagName?.toLowerCase();
112
+ const prefix = parentTag === 'ol' ? '1. ' : '- ';
113
+ const text = node.text.trim();
114
+ if (text) {
115
+ lines.push(prefix + text);
116
+ }
117
+ return;
118
+ }
119
+ // Handle lists container
120
+ if (tagName === 'ul' || tagName === 'ol') {
121
+ lines.push('');
122
+ for (const child of node.childNodes) {
123
+ if (child instanceof HTMLElement) {
124
+ processNode(child, depth + 1);
125
+ }
126
+ }
127
+ lines.push('');
128
+ return;
129
+ }
130
+ // Handle blockquotes
131
+ if (tagName === 'blockquote') {
132
+ const text = node.text.trim();
133
+ if (text) {
134
+ lines.push('');
135
+ lines.push('> ' + text.replace(/\n/g, '\n> '));
136
+ lines.push('');
137
+ }
138
+ return;
139
+ }
140
+ // Handle code blocks
141
+ if (tagName === 'pre' || tagName === 'code') {
142
+ const text = node.text.trim();
143
+ if (text) {
144
+ lines.push('');
145
+ lines.push('```');
146
+ lines.push(text);
147
+ lines.push('```');
148
+ lines.push('');
149
+ }
150
+ return;
151
+ }
152
+ // Handle paragraphs and other block elements
153
+ if (SEMANTIC_ELEMENTS.blocks.includes(tagName)) {
154
+ const text = node.text.trim();
155
+ if (text) {
156
+ lines.push('');
157
+ lines.push(text);
158
+ lines.push('');
159
+ }
160
+ return;
161
+ }
162
+ // Handle container elements - recurse into children
163
+ if (SEMANTIC_ELEMENTS.containers.includes(tagName) || !tagName) {
164
+ for (const child of node.childNodes) {
165
+ if (child instanceof HTMLElement) {
166
+ processNode(child, depth);
167
+ }
168
+ else if (child.nodeType === 3) {
169
+ // Text node
170
+ const text = child.text.trim();
171
+ if (text) {
172
+ lines.push(text);
173
+ }
174
+ }
175
+ }
176
+ return;
177
+ }
178
+ // Recursively process children for any other elements
179
+ for (const child of node.childNodes) {
180
+ if (child instanceof HTMLElement) {
181
+ processNode(child, depth);
182
+ }
183
+ else if (child.nodeType === 3) {
184
+ // Text node
185
+ const text = child.text.trim();
186
+ if (text) {
187
+ lines.push(text);
188
+ }
189
+ }
190
+ }
191
+ }
192
+ processNode(root);
193
+ // Clean up multiple blank lines
194
+ return lines
195
+ .join('\n')
196
+ .replace(/\n{3,}/g, '\n\n')
197
+ .trim();
198
+ }
199
+ /**
200
+ * Extracts plain text from HTML, preserving meaningful whitespace
201
+ */
202
+ function extractPlainText(root) {
203
+ // Get raw text
204
+ let text = root.text;
205
+ // Clean up whitespace while preserving paragraph breaks
206
+ text = text
207
+ // Replace multiple spaces with single space
208
+ .replace(/[ \t]+/g, ' ')
209
+ // Replace multiple newlines with double newline (paragraph break)
210
+ .replace(/\n\s*\n/g, '\n\n')
211
+ // Remove leading/trailing whitespace from each line
212
+ .split('\n')
213
+ .map(line => line.trim())
214
+ .join('\n')
215
+ // Remove more than two consecutive newlines
216
+ .replace(/\n{3,}/g, '\n\n')
217
+ .trim();
218
+ return text;
219
+ }
220
+ /**
221
+ * Strips HTML from input text according to options
222
+ */
223
+ export function stripHtml(input, options) {
224
+ // Quick check: if no HTML-like content, return as-is
225
+ if (!input.includes('<') || !input.includes('>')) {
226
+ return options.maxLength ? input.slice(0, options.maxLength) : input;
227
+ }
228
+ // Parse HTML
229
+ const root = parse(input, {
230
+ lowerCaseTagName: true,
231
+ comment: false, // Remove comments
232
+ blockTextElements: {
233
+ script: true,
234
+ noscript: true,
235
+ style: true,
236
+ pre: true,
237
+ },
238
+ });
239
+ // Remove unwanted elements
240
+ removeElements(root, options.removeSelectors || []);
241
+ // Extract text based on options
242
+ let result;
243
+ if (options.preserveStructure) {
244
+ result = convertToStructuredText(root);
245
+ }
246
+ else {
247
+ result = extractPlainText(root);
248
+ }
249
+ // Apply max length if specified
250
+ if (options.maxLength && result.length > options.maxLength) {
251
+ result = result.slice(0, options.maxLength);
252
+ // Try to break at a word boundary
253
+ const lastSpace = result.lastIndexOf(' ');
254
+ if (lastSpace > options.maxLength * 0.8) {
255
+ result = result.slice(0, lastSpace) + '...';
256
+ }
257
+ else {
258
+ result += '...';
259
+ }
260
+ }
261
+ return result;
262
+ }
263
+ /**
264
+ * Preprocesses input text according to configuration
265
+ */
266
+ export function preprocess(input, config) {
267
+ let result = input;
268
+ // Handle HTML stripping
269
+ if (config.stripHtml) {
270
+ const options = resolveHtmlStripOptions(config.stripHtml);
271
+ if (options) {
272
+ result = stripHtml(result, options);
273
+ }
274
+ }
275
+ return result;
276
+ }
277
+ /**
278
+ * Preprocesses input with detailed result information
279
+ */
280
+ export function preprocessWithDetails(input, config) {
281
+ if (!config || (!config.stripHtml)) {
282
+ return {
283
+ text: input,
284
+ wasProcessed: false,
285
+ originalLength: input.length,
286
+ processedLength: input.length,
287
+ };
288
+ }
289
+ const processed = preprocess(input, config);
290
+ return {
291
+ text: processed,
292
+ wasProcessed: processed !== input,
293
+ originalLength: input.length,
294
+ processedLength: processed.length,
295
+ };
296
+ }
297
+ //# sourceMappingURL=preprocessor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preprocessor.js","sourceRoot":"","sources":["../../src/core/preprocessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,MAAM,wBAAwB,GAAG;IAC7B,QAAQ;IACR,OAAO;IACP,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,UAAU;IACV,QAAQ;IACR,KAAK;IACL,QAAQ;IACR,MAAM;IACN,mCAAmC;IACnC,gBAAgB;IAChB,0BAA0B;IAC1B,mBAAmB;IACnB,sBAAsB;IACtB,uBAAuB;IACvB,kBAAkB;IAClB,kBAAkB;IAClB,mBAAmB;IACnB,aAAa;IACb,uBAAuB;IACvB,gBAAgB;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,iBAAiB,GAAG;IACtB,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;IAC9C,KAAK,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;IACzB,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC;IACjE,MAAM,EAAE,CAAC,GAAG,EAAE,YAAY,CAAC;IAC3B,MAAM,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC;CAClD,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACnC,MAA8C;IAE9C,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClB,uCAAuC;QACvC,OAAO;YACH,WAAW,EAAE,IAAI;YACjB,iBAAiB,EAAE,KAAK;YACxB,eAAe,EAAE,EAAE;YACnB,SAAS,EAAE,SAAS;SACvB,CAAC;IACN,CAAC;IAED,OAAO;QACH,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI;QACvC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,KAAK;QACpD,eAAe,EAAE,MAAM,CAAC,eAAe,IAAI,EAAE;QAC7C,SAAS,EAAE,MAAM,CAAC,SAAS;KAC9B,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAiB,EAAE,SAAmB;IAC1D,MAAM,YAAY,GAAG,CAAC,GAAG,wBAAwB,EAAE,GAAG,SAAS,CAAC,CAAC;IAEjE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QAClC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;YACjD,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBACxB,EAAE,CAAC,MAAM,EAAE,CAAC;YAChB,CAAC;QACL,CAAC;QAAC,MAAM,CAAC;YACL,kCAAkC;YAClC,+EAA+E;QACnF,CAAC;IACL,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,IAAiB;IAC9C,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,SAAS,WAAW,CAAC,IAAwB,EAAE,QAAgB,CAAC;QAC5D,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAElD,kBAAkB;QAClB,IAAI,iBAAiB,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACvC,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC;YACvC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;gBAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,IAAI,CAAC,UAAgC,CAAC;YACrD,MAAM,SAAS,GAAG,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,SAAS,KAAK,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;YACjD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YAC9B,CAAC;YACD,OAAO;QACX,CAAC;QAED,yBAAyB;QACzB,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;oBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBAClC,CAAC;YACL,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,OAAO;QACX,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,KAAK,YAAY,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBAC/C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,KAAK,KAAK,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAClB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,6CAA6C;QAC7C,IAAI,iBAAiB,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,CAAC;YACD,OAAO;QACX,CAAC;QAED,oDAAoD;QACpD,IAAI,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;oBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC9B,YAAY;oBACZ,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;oBAC/B,IAAI,IAAI,EAAE,CAAC;wBACP,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;gBACL,CAAC;YACL,CAAC;YACD,OAAO;QACX,CAAC;QAED,sDAAsD;QACtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YAClC,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;gBAC/B,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAC9B,CAAC;iBAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;gBAC9B,YAAY;gBACZ,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,IAAI,EAAE,CAAC;oBACP,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,WAAW,CAAC,IAAI,CAAC,CAAC;IAElB,gCAAgC;IAChC,OAAO,KAAK;SACP,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,IAAiB;IACvC,eAAe;IACf,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IAErB,wDAAwD;IACxD,IAAI,GAAG,IAAI;QACP,4CAA4C;SAC3C,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;QACxB,kEAAkE;SACjE,OAAO,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5B,oDAAoD;SACnD,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SACxB,IAAI,CAAC,IAAI,CAAC;QACX,4CAA4C;SAC3C,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;IAEZ,OAAO,IAAI,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa,EAAE,OAAyB;IAC9D,qDAAqD;IACrD,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,OAAO,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACzE,CAAC;IAED,aAAa;IACb,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE;QACtB,gBAAgB,EAAE,IAAI;QACtB,OAAO,EAAE,KAAK,EAAE,kBAAkB;QAClC,iBAAiB,EAAE;YACf,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI;SACZ;KACJ,CAAC,CAAC;IAEH,2BAA2B;IAC3B,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC;IAEpD,gCAAgC;IAChC,IAAI,MAAc,CAAC;IAEnB,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;QAC5B,MAAM,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;SAAM,CAAC;QACJ,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,gCAAgC;IAChC,IAAI,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;QACzD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QAC5C,kCAAkC;QAClC,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,SAAS,GAAG,OAAO,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC;YACtC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,KAAK,CAAC;QAChD,CAAC;aAAM,CAAC;YACJ,MAAM,IAAI,KAAK,CAAC;QACpB,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,KAAa,EAAE,MAA2B;IACjE,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,wBAAwB;IACxB,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,uBAAuB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,OAAO,EAAE,CAAC;YACV,MAAM,GAAG,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACxC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAgBD;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACjC,KAAa,EACb,MAAuC;IAEvC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,OAAO;YACH,IAAI,EAAE,KAAK;YACX,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,KAAK,CAAC,MAAM;YAC5B,eAAe,EAAE,KAAK,CAAC,MAAM;SAChC,CAAC;IACN,CAAC;IAED,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAE5C,OAAO;QACH,IAAI,EAAE,SAAS;QACf,YAAY,EAAE,SAAS,KAAK,KAAK;QACjC,cAAc,EAAE,KAAK,CAAC,MAAM;QAC5B,eAAe,EAAE,SAAS,CAAC,MAAM;KACpC,CAAC;AACN,CAAC"}
@@ -3,6 +3,26 @@
3
3
  */
4
4
  import type { Schema } from '../schemas/types.js';
5
5
  import type { LLMConfig } from '../llm/types.js';
6
+ /**
7
+ * HTML stripping options for preprocessing
8
+ */
9
+ export interface HtmlStripOptions {
10
+ /** Keep text content only (default: true) */
11
+ extractText?: boolean;
12
+ /** Preserve semantic structure like headings, lists (converts to markdown-like format) */
13
+ preserveStructure?: boolean;
14
+ /** Remove specific CSS selectors (e.g., 'nav', 'footer', '.ad', '#sidebar') */
15
+ removeSelectors?: string[];
16
+ /** Max content length after stripping (truncates if exceeded) */
17
+ maxLength?: number;
18
+ }
19
+ /**
20
+ * Preprocessing configuration for input text
21
+ */
22
+ export interface PreprocessingConfig {
23
+ /** Strip HTML tags from input. When true, uses default options. */
24
+ stripHtml?: boolean | HtmlStripOptions;
25
+ }
6
26
  /**
7
27
  * Pipeline configuration
8
28
  */
@@ -19,6 +39,8 @@ export interface ExtractionRequest {
19
39
  input: string;
20
40
  schema: Schema;
21
41
  llmConfig: LLMConfig;
42
+ /** Optional preprocessing configuration */
43
+ preprocessing?: PreprocessingConfig;
22
44
  debug?: boolean;
23
45
  }
24
46
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3C,cAAc,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACV,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;IACH,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;IACrB,QAAQ,EAAE;QACN,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;CACL"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,6CAA6C;IAC7C,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,0FAA0F;IAC1F,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,+EAA+E;IAC/E,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,iEAAiE;IACjE,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,mEAAmE;IACnE,SAAS,CAAC,EAAE,OAAO,GAAG,gBAAgB,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,SAAS,CAAC;IACrB,2CAA2C;IAC3C,aAAa,CAAC,EAAE,mBAAmB,CAAC;IACpC,KAAK,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3C,cAAc,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,KAAK,CAAC;QACV,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;IACH,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;IACrB,QAAQ,EAAE;QACN,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,UAAU,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;CACL"}
@@ -45,20 +45,32 @@ function validateField(fieldName, value, fieldDef) {
45
45
  value,
46
46
  });
47
47
  }
48
- else if (fieldDef.pattern) {
49
- try {
50
- const regex = new RegExp(fieldDef.pattern);
51
- if (!regex.test(value)) {
52
- errors.push({
53
- field: fieldName,
54
- message: `Field '${fieldName}' does not match pattern: ${fieldDef.pattern}`,
55
- code: PipelineErrorCodes.FIELD_INVALID,
56
- value,
57
- });
58
- }
48
+ else {
49
+ // Check enum constraint
50
+ if (fieldDef.enum && !fieldDef.enum.includes(value)) {
51
+ errors.push({
52
+ field: fieldName,
53
+ message: `Field '${fieldName}' must be one of: ${fieldDef.enum.join(', ')}. Got: ${value}`,
54
+ code: PipelineErrorCodes.FIELD_INVALID,
55
+ value,
56
+ });
59
57
  }
60
- catch (e) {
61
- // Invalid regex in schema - should be caught by schema validation
58
+ // Check pattern constraint
59
+ if (fieldDef.pattern) {
60
+ try {
61
+ const regex = new RegExp(fieldDef.pattern);
62
+ if (!regex.test(value)) {
63
+ errors.push({
64
+ field: fieldName,
65
+ message: `Field '${fieldName}' does not match pattern: ${fieldDef.pattern}`,
66
+ code: PipelineErrorCodes.FIELD_INVALID,
67
+ value,
68
+ });
69
+ }
70
+ }
71
+ catch (e) {
72
+ // Invalid regex in schema - should be caught by schema validation
73
+ }
62
74
  }
63
75
  }
64
76
  break;
@@ -90,45 +102,33 @@ function validateField(fieldName, value, fieldDef) {
90
102
  }
91
103
  }
92
104
  break;
93
- case 'date':
94
- // Accept string or Date object
95
- if (typeof value !== 'string' && !(value instanceof Date)) {
105
+ case 'integer':
106
+ if (typeof value !== 'number' || !Number.isInteger(value)) {
96
107
  errors.push({
97
108
  field: fieldName,
98
- message: `Field '${fieldName}' must be a date string or Date object`,
109
+ message: `Field '${fieldName}' must be an integer`,
99
110
  code: PipelineErrorCodes.TYPE_MISMATCH,
100
111
  value,
101
112
  });
102
113
  }
103
114
  else {
104
- // Try to parse as date
105
- const dateValue = typeof value === 'string' ? new Date(value) : value;
106
- if (isNaN(dateValue.getTime())) {
115
+ // Check min/max constraints
116
+ if (fieldDef.min !== undefined && value < fieldDef.min) {
107
117
  errors.push({
108
118
  field: fieldName,
109
- message: `Field '${fieldName}' is not a valid date`,
119
+ message: `Field '${fieldName}' must be at least ${fieldDef.min}`,
120
+ code: PipelineErrorCodes.FIELD_INVALID,
121
+ value,
122
+ });
123
+ }
124
+ if (fieldDef.max !== undefined && value > fieldDef.max) {
125
+ errors.push({
126
+ field: fieldName,
127
+ message: `Field '${fieldName}' must be at most ${fieldDef.max}`,
110
128
  code: PipelineErrorCodes.FIELD_INVALID,
111
129
  value,
112
130
  });
113
131
  }
114
- }
115
- break;
116
- case 'enum':
117
- if (typeof value !== 'string') {
118
- errors.push({
119
- field: fieldName,
120
- message: `Field '${fieldName}' must be a string for enum type, got ${typeof value}`,
121
- code: PipelineErrorCodes.TYPE_MISMATCH,
122
- value,
123
- });
124
- }
125
- else if (fieldDef.enum && !fieldDef.enum.includes(value)) {
126
- errors.push({
127
- field: fieldName,
128
- message: `Field '${fieldName}' must be one of: ${fieldDef.enum.join(', ')}. Got: ${value}`,
129
- code: PipelineErrorCodes.FIELD_INVALID,
130
- value,
131
- });
132
132
  }
133
133
  break;
134
134
  case 'boolean':
@@ -1 +1 @@
1
- {"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAiB,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAchE;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACjC,IAA6B,EAC7B,MAAc;IAEd,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,wBAAwB;IACxB,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QAChE,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QAE9B,4BAA4B;QAC5B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,SAAS,cAAc;oBACnD,IAAI,EAAE,kBAAkB,CAAC,aAAa;iBACzC,CAAC,CAAC;YACP,CAAC;YACD,SAAS;QACb,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,GAAG,aAAa,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;IAChC,CAAC;IAED,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;QAC1B,MAAM;KACT,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAClB,SAAiB,EACjB,KAAc,EACd,QAAyB;IAEzB,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,QAAQ,QAAQ,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;gBAC1B,IAAI,CAAC;oBACD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;oBAC3C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;wBACrB,MAAM,CAAC,IAAI,CAAC;4BACR,KAAK,EAAE,SAAS;4BAChB,OAAO,EAAE,UAAU,SAAS,6BAA6B,QAAQ,CAAC,OAAO,EAAE;4BAC3E,IAAI,EAAE,kBAAkB,CAAC,aAAa;4BACtC,KAAK;yBACR,CAAC,CAAC;oBACP,CAAC;gBACL,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACT,kEAAkE;gBACtE,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5C,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,MAAM;YACP,+BAA+B;YAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,CAAC,KAAK,YAAY,IAAI,CAAC,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,wCAAwC;oBACpE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,uBAAuB;gBACvB,MAAM,SAAS,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;gBACtE,IAAI,KAAK,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC;oBAC7B,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,uBAAuB;wBACnD,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,MAAM;YACP,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,yCAAyC,OAAO,KAAK,EAAE;oBACnF,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzD,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,qBAAqB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,KAAK,EAAE;oBAC1F,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;YACD,MAAM;QAEV,KAAK,SAAS;YACV,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,4BAA4B,OAAO,KAAK,EAAE;oBACtE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;YACD,MAAM;IACd,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/core/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAiB,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAchE;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACjC,IAA6B,EAC7B,MAAc;IAEd,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,wBAAwB;IACxB,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QAChE,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QAE9B,4BAA4B;QAC5B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACrB,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,SAAS,cAAc;oBACnD,IAAI,EAAE,kBAAkB,CAAC,aAAa;iBACzC,CAAC,CAAC;YACP,CAAC;YACD,SAAS;QACb,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,GAAG,aAAa,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC9D,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;IAChC,CAAC;IAED,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;QAC1B,MAAM;KACT,CAAC;AACN,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAClB,SAAiB,EACjB,KAAc,EACd,QAAyB;IAEzB,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,QAAQ,QAAQ,CAAC,IAAI,EAAE,CAAC;QACpB,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,wBAAwB;gBACxB,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;oBAClD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,qBAAqB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,KAAK,EAAE;wBAC1F,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,2BAA2B;gBAC3B,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;oBACnB,IAAI,CAAC;wBACD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;wBAC3C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;4BACrB,MAAM,CAAC,IAAI,CAAC;gCACR,KAAK,EAAE,SAAS;gCAChB,OAAO,EAAE,UAAU,SAAS,6BAA6B,QAAQ,CAAC,OAAO,EAAE;gCAC3E,IAAI,EAAE,kBAAkB,CAAC,aAAa;gCACtC,KAAK;6BACR,CAAC,CAAC;wBACP,CAAC;oBACL,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACT,kEAAkE;oBACtE,CAAC;gBACL,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,QAAQ;YACT,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5C,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,2BAA2B,OAAO,KAAK,EAAE;oBACrE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,gBAAgB,QAAQ,CAAC,GAAG,SAAS,KAAK,EAAE;wBACxE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,SAAS;YACV,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,sBAAsB;oBAClD,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,4BAA4B;gBAC5B,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,sBAAsB,QAAQ,CAAC,GAAG,EAAE;wBAChE,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;gBACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;oBACrD,MAAM,CAAC,IAAI,CAAC;wBACR,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,UAAU,SAAS,qBAAqB,QAAQ,CAAC,GAAG,EAAE;wBAC/D,IAAI,EAAE,kBAAkB,CAAC,aAAa;wBACtC,KAAK;qBACR,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YACD,MAAM;QAEV,KAAK,SAAS;YACV,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC;oBACR,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,UAAU,SAAS,4BAA4B,OAAO,KAAK,EAAE;oBACtE,IAAI,EAAE,kBAAkB,CAAC,aAAa;oBACtC,KAAK;iBACR,CAAC,CAAC;YACP,CAAC;YACD,MAAM;IACd,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
package/dist/index.d.ts CHANGED
@@ -5,7 +5,9 @@
5
5
  export { ExtractionPipeline, extract } from './core/pipeline.js';
6
6
  export { validateExtractedData } from './core/validator.js';
7
7
  export { PipelineError, PipelineErrorCodes } from './core/errors.js';
8
- export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, } from './core/types.js';
8
+ export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './core/preprocessor.js';
9
+ export type { PreprocessResult } from './core/preprocessor.js';
10
+ export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, HtmlStripOptions, PreprocessingConfig, } from './core/types.js';
9
11
  export { loadSchema, parseSchema, loadSchemaFromObject } from './schemas/loader.js';
10
12
  export { validateSchema } from './schemas/validator.js';
11
13
  export { SchemaValidationError, ErrorCodes as SchemaErrorCodes } from './schemas/errors.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,GACb,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5F,YAAY,EACR,MAAM,EACN,eAAe,EACf,SAAS,EACT,eAAe,EACf,gBAAgB,EACnB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACtE,YAAY,EACR,SAAS,EACT,WAAW,EACX,WAAW,EACX,UAAU,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EAClB,WAAW,GACd,MAAM,gBAAgB,CAAC;AACxB,YAAY,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,wBAAwB,CAAC;AAChC,YAAY,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC/D,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5F,YAAY,EACR,MAAM,EACN,eAAe,EACf,SAAS,EACT,eAAe,EACf,gBAAgB,EACnB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACtE,YAAY,EACR,SAAS,EACT,WAAW,EACX,WAAW,EACX,UAAU,EACV,WAAW,EACX,iBAAiB,EACjB,kBAAkB,EAClB,WAAW,GACd,MAAM,gBAAgB,CAAC;AACxB,YAAY,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC"}
package/dist/index.js CHANGED
@@ -6,6 +6,7 @@
6
6
  export { ExtractionPipeline, extract } from './core/pipeline.js';
7
7
  export { validateExtractedData } from './core/validator.js';
8
8
  export { PipelineError, PipelineErrorCodes } from './core/errors.js';
9
+ export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './core/preprocessor.js';
9
10
  // Schema exports
10
11
  export { loadSchema, parseSchema, loadSchemaFromObject } from './schemas/loader.js';
11
12
  export { validateSchema } from './schemas/validator.js';
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wBAAwB;AACxB,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAQrE,iBAAiB;AACjB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAS5F,cAAc;AACd,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wBAAwB;AACxB,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,wBAAwB,CAAC;AAWhC,iBAAiB;AACjB,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,qBAAqB,EAAE,UAAU,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAS5F,cAAc;AACd,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC"}
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Schema loader - loads and parses schema files
3
3
  */
4
- import * as fs from 'fs/promises';
4
+ import * as fs from 'node:fs/promises';
5
5
  import { validateSchema } from './validator.js';
6
6
  import { SchemaValidationError, ErrorCodes } from './errors.js';
7
7
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/schemas/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AAGlC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEhE;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC7C,IAAI,CAAC;QACD,uCAAuC;QACvC,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACjD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,QAAQ,EAAE,EACtC,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,KAAK,EAAG,KAAe,CAAC,OAAO,EAAE,CAChD,CAAC;IACN,CAAC;IAED,oBAAoB;IACpB,IAAI,OAAe,CAAC;IACpB,IAAI,CAAC;QACD,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,+BAAgC,KAAe,CAAC,OAAO,EAAE,EACzD,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,CACf,CAAC;IACN,CAAC;IAED,aAAa;IACb,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,gCAAiC,KAAe,CAAC,OAAO,EAAE,EAC1D,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,CACf,CAAC;IACN,CAAC;IAED,4BAA4B;IAC5B,cAAc,CAAC,MAAM,CAAC,CAAC;IAEvB,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CAAC,UAAkB;IAC1C,IAAI,MAAe,CAAC;IAEpB,IAAI,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAC3C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,cAAc,CAAC,MAAM,CAAC,CAAC;IACvB,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAY;IAC7C,cAAc,CAAC,GAAG,CAAC,CAAC;IACpB,OAAO,GAAG,CAAC;AACf,CAAC"}
1
+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/schemas/loader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAGvC,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEhE;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC7C,IAAI,CAAC;QACD,uCAAuC;QACvC,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACjD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,QAAQ,EAAE,EACtC,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,KAAK,EAAG,KAAe,CAAC,OAAO,EAAE,CAChD,CAAC;IACN,CAAC;IAED,oBAAoB;IACpB,IAAI,OAAe,CAAC;IACpB,IAAI,CAAC;QACD,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,+BAAgC,KAAe,CAAC,OAAO,EAAE,EACzD,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,CACf,CAAC;IACN,CAAC;IAED,aAAa;IACb,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,gCAAiC,KAAe,CAAC,OAAO,EAAE,EAC1D,UAAU,CAAC,YAAY,EACvB,SAAS,EACT,EAAE,QAAQ,EAAE,CACf,CAAC;IACN,CAAC;IAED,4BAA4B;IAC5B,cAAc,CAAC,MAAM,CAAC,CAAC;IAEvB,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CAAC,UAAkB;IAC1C,IAAI,MAAe,CAAC;IAEpB,IAAI,CAAC;QACD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACb,MAAM,IAAI,qBAAqB,CAC3B,iBAAkB,KAAe,CAAC,OAAO,EAAE,EAC3C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,cAAc,CAAC,MAAM,CAAC,CAAC;IACvB,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAY;IAC7C,cAAc,CAAC,GAAG,CAAC,CAAC;IACpB,OAAO,GAAG,CAAC;AACf,CAAC"}
@@ -5,8 +5,9 @@
5
5
  */
6
6
  /**
7
7
  * Supported field types in schema definitions
8
+ * Note: For dates, use type='string' with format='date-time'
8
9
  */
9
- export type FieldType = 'string' | 'number' | 'date' | 'enum' | 'boolean';
10
+ export type FieldType = 'string' | 'number' | 'integer' | 'boolean';
10
11
  /**
11
12
  * Field definition within a schema
12
13
  */
@@ -14,6 +15,7 @@ export interface FieldDefinition {
14
15
  type: FieldType;
15
16
  description?: string;
16
17
  optional?: boolean;
18
+ format?: string;
17
19
  enum?: string[];
18
20
  min?: number;
19
21
  max?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/schemas/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAE1E;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,MAAM;IACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IACxC,QAAQ,CAAC,EAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,UAAU,CAAC,EAAE;QACT,SAAS,EAAE,MAAM,CAAC;QAClB,mBAAmB,EAAE,OAAO,CAAC;KAChC,CAAC;IACF,MAAM,CAAC,EAAE;QACL,sBAAsB,CAAC,EAAE,OAAO,CAAC;KACpC,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE;QACP,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACL"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/schemas/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;;GAGG;AACH,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,MAAM;IACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IACxC,QAAQ,CAAC,EAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,UAAU,CAAC,EAAE;QACT,SAAS,EAAE,MAAM,CAAC;QAClB,mBAAmB,EAAE,OAAO,CAAC;KAChC,CAAC;IACF,MAAM,CAAC,EAAE;QACL,sBAAsB,CAAC,EAAE,OAAO,CAAC;KACpC,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1C,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE;QACP,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACL"}
@@ -2,7 +2,7 @@
2
2
  * Schema validator - validates schema definitions
3
3
  */
4
4
  import { SchemaValidationError, ErrorCodes } from './errors.js';
5
- const VALID_FIELD_TYPES = ['string', 'number', 'date', 'enum', 'boolean'];
5
+ const VALID_FIELD_TYPES = ['string', 'number', 'integer', 'boolean'];
6
6
  /**
7
7
  * Validates a schema definition
8
8
  *
@@ -82,23 +82,21 @@ function validateFieldDefinition(fieldName, fieldDef) {
82
82
  }
83
83
  // Type-specific validations
84
84
  const fieldType = def.type;
85
- if (fieldType === 'enum') {
86
- validateEnumField(fieldName, def);
87
- }
88
- if (fieldType === 'number') {
85
+ if (fieldType === 'number' || fieldType === 'integer') {
89
86
  validateNumberField(fieldName, def);
90
87
  }
91
88
  if (fieldType === 'string') {
92
89
  validateStringField(fieldName, def);
90
+ // Validate enum constraint if present
91
+ if (def.enum) {
92
+ validateEnumConstraint(fieldName, def);
93
+ }
93
94
  }
94
95
  }
95
96
  /**
96
- * Validates enum field constraints
97
+ * Validates enum constraint on string fields
97
98
  */
98
- function validateEnumField(fieldName, def) {
99
- if (!def.enum) {
100
- throw new SchemaValidationError(`Field '${fieldName}' with type 'enum' must have an 'enum' property`, ErrorCodes.MISSING_ENUM_VALUES, fieldName);
101
- }
99
+ function validateEnumConstraint(fieldName, def) {
102
100
  if (!Array.isArray(def.enum)) {
103
101
  throw new SchemaValidationError(`Field '${fieldName}' enum property must be an array`, ErrorCodes.INVALID_ENUM_VALUE, fieldName);
104
102
  }
@@ -1 +1 @@
1
- {"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/schemas/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEhE,MAAM,iBAAiB,GAAgB,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;AAEvF;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,MAAe;IAC1C,+BAA+B;IAC/B,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,0BAA0B,EAC1B,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,MAAM,SAAS,GAAG,MAAiC,CAAC;IAEpD,uCAAuC;IACvC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,qBAAqB,CAC3B,yCAAyC,EACzC,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,IAAI,OAAO,SAAS,CAAC,MAAM,KAAK,QAAQ,IAAI,SAAS,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QACpE,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,EAC5B,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,SAAS,CAAC,MAAiC,CAAC;IAE3D,2BAA2B;IAC3B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,qBAAqB,CAC3B,wCAAwC,EACxC,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,iCAAiC;IACjC,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACzD,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAC7B,uBAAuB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,+BAA+B;IAC/B,IAAI,SAAS,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnC,gBAAgB,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,+CAA+C;IAC/C,IAAI,SAAS,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QACrC,wBAAwB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IACnD,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,SAAiB;IACxC,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,EAC5B,UAAU,CAAC,kBAAkB,CAChC,CAAC;IACN,CAAC;IAED,wEAAwE;IACxE,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,qBAAqB,CAC3B,uBAAuB,SAAS,gHAAgH,EAChJ,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,SAAiB,EAAE,QAAiB;IACjE,IAAI,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC5C,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAgC,EACnD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,MAAM,GAAG,GAAG,QAAmC,CAAC;IAEhD,qCAAqC;IACrC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACZ,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,yBAAyB,EAC5C,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,sBAAsB;IACtB,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAiB,CAAC,EAAE,CAAC;QACrD,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uBAAuB,GAAG,CAAC,IAAI,uBAAuB,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EACvG,UAAU,CAAC,kBAAkB,EAC7B,SAAS,EACT,EAAE,UAAU,EAAE,iBAAiB,EAAE,YAAY,EAAE,GAAG,CAAC,IAAI,EAAE,CAC5D,CAAC;IACN,CAAC;IAED,wCAAwC;IACxC,IAAI,GAAG,CAAC,QAAQ,KAAK,SAAS,IAAI,OAAO,GAAG,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAClE,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,kCAAkC;IAClC,IAAI,GAAG,CAAC,WAAW,KAAK,SAAS,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;QACvE,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAgC,EACnD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,4BAA4B;IAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,IAAiB,CAAC;IAExC,IAAI,SAAS,KAAK,MAAM,EAAE,CAAC;QACvB,iBAAiB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACtC,CAAC;IAED,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACzB,mBAAmB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACzB,mBAAmB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACxC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,SAAiB,EAAE,GAA4B;IACtE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACZ,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,iDAAiD,EACpE,UAAU,CAAC,mBAAmB,EAC9B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kCAAkC,EACrD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,8BAA8B,EACjD,UAAU,CAAC,iBAAiB,EAC5B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,oCAAoC;IACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,yBAAyB,CAAC,0BAA0B,OAAO,KAAK,EAAE,EACrF,UAAU,CAAC,kBAAkB,EAC7B,SAAS,EACT,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CACtB,CAAC;QACN,CAAC;IACL,CAAC;IAED,6BAA6B;IAC7B,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,YAAY,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kCAAkC,EACrD,UAAU,CAAC,oBAAoB,EAC/B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,SAAiB,EAAE,GAA4B;IACxE,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACxB,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,mCAAmC,EACtD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACxB,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,mCAAmC,EACtD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACjD,IAAK,GAAG,CAAC,GAAc,GAAI,GAAG,CAAC,GAAc,EAAE,CAAC;YAC5C,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gBAAgB,GAAG,CAAC,GAAG,uCAAuC,GAAG,CAAC,GAAG,GAAG,EAC3F,UAAU,CAAC,mBAAmB,EAC9B,SAAS,EACT,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CACjC,CAAC;QACN,CAAC;IACL,CAAC;IAED,oCAAoC;IACpC,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kDAAkD,EACrE,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,SAAiB,EAAE,GAA4B;IACxE,IAAI,GAAG,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAClC,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;QAED,yBAAyB;QACzB,IAAI,CAAC;YACD,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAiC,KAAe,CAAC,OAAO,EAAE,EAC7E,UAAU,CAAC,eAAe,EAC1B,SAAS,EACT,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAC3B,CAAC;QACN,CAAC;IACL,CAAC;IAED,sEAAsE;IACtE,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACjD,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,4DAA4D,EAC/E,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,QAAiB;IACvC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,IAAI,qBAAqB,CAC3B,mCAAmC,EACnC,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,MAAM,IAAI,GAAG,QAAmC,CAAC;IAEjD,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3D,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACjE,MAAM,IAAI,qBAAqB,CAC3B,0CAA0C,EAC1C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;QACzE,MAAM,IAAI,qBAAqB,CAC3B,8CAA8C,EAC9C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,UAAmB;IACjD,IAAI,OAAO,UAAU,KAAK,QAAQ,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QACxD,MAAM,IAAI,qBAAqB,CAC3B,4CAA4C,EAC5C,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,UAAqC,CAAC;IAErD,0CAA0C;IAC1C,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,qBAAqB,CAC3B,yDAAyD,EACzD,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,IAAI,OAAO,MAAM,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,QAAQ,EAAE,OAAO,MAAM,CAAC,SAAS,EAAE,CACxC,CAAC;IACN,CAAC;IAED,mCAAmC;IACnC,IAAI,MAAM,CAAC,SAAS,GAAG,CAAC,IAAI,MAAM,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC;QACjD,MAAM,IAAI,qBAAqB,CAC3B,uDAAuD,MAAM,CAAC,SAAS,EAAE,EACzE,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAClC,CAAC;IACN,CAAC;IAED,oDAAoD;IACpD,IAAI,MAAM,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QAC3C,MAAM,IAAI,qBAAqB,CAC3B,mEAAmE,EACnE,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,IAAI,OAAO,MAAM,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QAClD,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,QAAQ,EAAE,OAAO,MAAM,CAAC,mBAAmB,EAAE,CAClD,CAAC;IACN,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/schemas/validator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEhE,MAAM,iBAAiB,GAAgB,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AAElF;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,MAAe;IAC1C,+BAA+B;IAC/B,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,0BAA0B,EAC1B,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,MAAM,SAAS,GAAG,MAAiC,CAAC;IAEpD,uCAAuC;IACvC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,qBAAqB,CAC3B,yCAAyC,EACzC,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,IAAI,OAAO,SAAS,CAAC,MAAM,KAAK,QAAQ,IAAI,SAAS,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QACpE,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,EAC5B,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,SAAS,CAAC,MAAiC,CAAC;IAE3D,2BAA2B;IAC3B,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,qBAAqB,CAC3B,wCAAwC,EACxC,UAAU,CAAC,cAAc,CAC5B,CAAC;IACN,CAAC;IAED,iCAAiC;IACjC,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACzD,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAC7B,uBAAuB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,+BAA+B;IAC/B,IAAI,SAAS,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnC,gBAAgB,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,+CAA+C;IAC/C,IAAI,SAAS,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;QACrC,wBAAwB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IACnD,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,SAAiB;IACxC,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,4BAA4B,EAC5B,UAAU,CAAC,kBAAkB,CAChC,CAAC;IACN,CAAC;IAED,wEAAwE;IACxE,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,qBAAqB,CAC3B,uBAAuB,SAAS,gHAAgH,EAChJ,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,SAAiB,EAAE,QAAiB;IACjE,IAAI,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC5C,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAgC,EACnD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,MAAM,GAAG,GAAG,QAAmC,CAAC;IAEhD,qCAAqC;IACrC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACZ,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,yBAAyB,EAC5C,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,sBAAsB;IACtB,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAiB,CAAC,EAAE,CAAC;QACrD,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uBAAuB,GAAG,CAAC,IAAI,uBAAuB,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EACvG,UAAU,CAAC,kBAAkB,EAC7B,SAAS,EACT,EAAE,UAAU,EAAE,iBAAiB,EAAE,YAAY,EAAE,GAAG,CAAC,IAAI,EAAE,CAC5D,CAAC;IACN,CAAC;IAED,wCAAwC;IACxC,IAAI,GAAG,CAAC,QAAQ,KAAK,SAAS,IAAI,OAAO,GAAG,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAClE,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,kCAAkC;IAClC,IAAI,GAAG,CAAC,WAAW,KAAK,SAAS,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;QACvE,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAgC,EACnD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,4BAA4B;IAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,IAAiB,CAAC;IAExC,IAAI,SAAS,KAAK,QAAQ,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QACpD,mBAAmB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACzB,mBAAmB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QACpC,sCAAsC;QACtC,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,sBAAsB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAC3C,CAAC;IACL,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,sBAAsB,CAAC,SAAiB,EAAE,GAA4B;IAC3E,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kCAAkC,EACrD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,8BAA8B,EACjD,UAAU,CAAC,iBAAiB,EAC5B,SAAS,CACZ,CAAC;IACN,CAAC;IAED,oCAAoC;IACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,yBAAyB,CAAC,0BAA0B,OAAO,KAAK,EAAE,EACrF,UAAU,CAAC,kBAAkB,EAC7B,SAAS,EACT,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CACtB,CAAC;QACN,CAAC;IACL,CAAC;IAED,6BAA6B;IAC7B,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,YAAY,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QACxC,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kCAAkC,EACrD,UAAU,CAAC,oBAAoB,EAC/B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,SAAiB,EAAE,GAA4B;IACxE,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACxB,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,mCAAmC,EACtD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACxB,IAAI,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,mCAAmC,EACtD,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;IACL,CAAC;IAED,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACjD,IAAK,GAAG,CAAC,GAAc,GAAI,GAAG,CAAC,GAAc,EAAE,CAAC;YAC5C,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gBAAgB,GAAG,CAAC,GAAG,uCAAuC,GAAG,CAAC,GAAG,GAAG,EAC3F,UAAU,CAAC,mBAAmB,EAC9B,SAAS,EACT,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CACjC,CAAC;QACN,CAAC;IACL,CAAC;IAED,oCAAoC;IACpC,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,kDAAkD,EACrE,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,SAAiB,EAAE,GAA4B;IACxE,IAAI,GAAG,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAClC,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,uCAAuC,EAC1D,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;QACN,CAAC;QAED,yBAAyB;QACzB,IAAI,CAAC;YACD,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,gCAAiC,KAAe,CAAC,OAAO,EAAE,EAC7E,UAAU,CAAC,eAAe,EAC1B,SAAS,EACT,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAC3B,CAAC;QACN,CAAC;IACL,CAAC;IAED,sEAAsE;IACtE,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACjD,MAAM,IAAI,qBAAqB,CAC3B,UAAU,SAAS,4DAA4D,EAC/E,UAAU,CAAC,kBAAkB,EAC7B,SAAS,CACZ,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,QAAiB;IACvC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,IAAI,qBAAqB,CAC3B,mCAAmC,EACnC,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,MAAM,IAAI,GAAG,QAAmC,CAAC;IAEjD,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3D,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACjE,MAAM,IAAI,qBAAqB,CAC3B,0CAA0C,EAC1C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,IAAI,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;QACzE,MAAM,IAAI,qBAAqB,CAC3B,8CAA8C,EAC9C,UAAU,CAAC,YAAY,CAC1B,CAAC;IACN,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,UAAmB;IACjD,IAAI,OAAO,UAAU,KAAK,QAAQ,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QACxD,MAAM,IAAI,qBAAqB,CAC3B,4CAA4C,EAC5C,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,UAAqC,CAAC;IAErD,0CAA0C;IAC1C,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,qBAAqB,CAC3B,yDAAyD,EACzD,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,IAAI,OAAO,MAAM,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,QAAQ,EAAE,OAAO,MAAM,CAAC,SAAS,EAAE,CACxC,CAAC;IACN,CAAC;IAED,mCAAmC;IACnC,IAAI,MAAM,CAAC,SAAS,GAAG,CAAC,IAAI,MAAM,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC;QACjD,MAAM,IAAI,qBAAqB,CAC3B,uDAAuD,MAAM,CAAC,SAAS,EAAE,EACzE,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAClC,CAAC;IACN,CAAC;IAED,oDAAoD;IACpD,IAAI,MAAM,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QAC3C,MAAM,IAAI,qBAAqB,CAC3B,mEAAmE,EACnE,UAAU,CAAC,yBAAyB,CACvC,CAAC;IACN,CAAC;IAED,IAAI,OAAO,MAAM,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QAClD,MAAM,IAAI,qBAAqB,CAC3B,uCAAuC,EACvC,UAAU,CAAC,yBAAyB,EACpC,SAAS,EACT,EAAE,QAAQ,EAAE,OAAO,MAAM,CAAC,mBAAmB,EAAE,CAClD,CAAC;IACN,CAAC;AACL,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ordis-dev/ordis",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "type": "module",
5
5
  "description": "Schema-first LLM extraction tool that turns unstructured text into validated structured data",
6
6
  "main": "dist/index.js",
@@ -34,7 +34,15 @@
34
34
  "validation",
35
35
  "cli",
36
36
  "openai",
37
- "ollama"
37
+ "ollama",
38
+ "cross-platform",
39
+ "deno",
40
+ "bun",
41
+ "nodejs",
42
+ "webstandards",
43
+ "typescript",
44
+ "javascript",
45
+ "jsonschema"
38
46
  ],
39
47
  "author": "Ordis",
40
48
  "license": "MIT",
@@ -56,5 +64,8 @@
56
64
  "tsx": "^4.21.0",
57
65
  "typescript": "^5.9.3",
58
66
  "vitest": "^4.0.15"
67
+ },
68
+ "dependencies": {
69
+ "node-html-parser": "^7.0.2"
59
70
  }
60
- }
71
+ }