@ordis-dev/ordis 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/coercion.d.ts +73 -0
- package/dist/core/coercion.d.ts.map +1 -0
- package/dist/core/coercion.js +461 -0
- package/dist/core/coercion.js.map +1 -0
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/index.js.map +1 -1
- package/dist/core/pipeline.d.ts.map +1 -1
- package/dist/core/pipeline.js +12 -3
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/types.d.ts +8 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/core/validator.d.ts +3 -0
- package/dist/core/validator.d.ts.map +1 -1
- package/dist/core/validator.js +86 -2
- package/dist/core/validator.js.map +1 -1
- package/dist/llm/client.d.ts.map +1 -1
- package/dist/llm/client.js +4 -0
- package/dist/llm/client.js.map +1 -1
- package/dist/llm/prompt-builder.d.ts.map +1 -1
- package/dist/llm/prompt-builder.js +63 -19
- package/dist/llm/prompt-builder.js.map +1 -1
- package/dist/llm/types.d.ts +15 -0
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/schemas/types.d.ts +18 -1
- package/dist/schemas/types.d.ts.map +1 -1
- package/dist/schemas/validator.js +61 -1
- package/dist/schemas/validator.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type coercion module - converts LLM output to expected types
|
|
3
|
+
*
|
|
4
|
+
* Handles common LLM quirks like:
|
|
5
|
+
* - String "null" instead of null
|
|
6
|
+
* - String numbers like "123" instead of 123
|
|
7
|
+
* - String booleans like "true" instead of true
|
|
8
|
+
* - Enum case mismatch like "Series A" instead of "series_a"
|
|
9
|
+
* - Date format variations like "11/20/24" instead of "2024-11-20"
|
|
10
|
+
*/
|
|
11
|
+
import type { FieldType, FieldDefinition } from '../schemas/types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Warning generated during coercion
|
|
14
|
+
*/
|
|
15
|
+
export interface CoercionWarning {
|
|
16
|
+
field: string;
|
|
17
|
+
message: string;
|
|
18
|
+
originalValue: unknown;
|
|
19
|
+
coercedValue: unknown;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Result of coercing a value
|
|
23
|
+
*/
|
|
24
|
+
export interface CoercionResult {
|
|
25
|
+
value: unknown;
|
|
26
|
+
coerced: boolean;
|
|
27
|
+
warning?: CoercionWarning;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Coerce an enum value to match expected enum values (case-insensitive)
|
|
31
|
+
*
|
|
32
|
+
* @param value - The string value to coerce
|
|
33
|
+
* @param enumValues - Array of allowed enum values
|
|
34
|
+
* @param fieldName - Field name for warning messages
|
|
35
|
+
* @returns CoercionResult with matched enum value or original if no match
|
|
36
|
+
*/
|
|
37
|
+
export declare function coerceEnumValue(value: string, enumValues: string[], fieldName: string): CoercionResult;
|
|
38
|
+
/**
|
|
39
|
+
* Coerce a date string to ISO format (YYYY-MM-DD)
|
|
40
|
+
*
|
|
41
|
+
* @param value - The date string to coerce
|
|
42
|
+
* @param fieldName - Field name for warning messages
|
|
43
|
+
* @returns CoercionResult with ISO date or original if not parseable
|
|
44
|
+
*/
|
|
45
|
+
export declare function coerceDateValue(value: string, fieldName: string): CoercionResult;
|
|
46
|
+
/**
|
|
47
|
+
* Coerce a value to the expected type
|
|
48
|
+
*
|
|
49
|
+
* @param value - The value to coerce
|
|
50
|
+
* @param targetType - The expected field type
|
|
51
|
+
* @param fieldName - Field name for warning messages
|
|
52
|
+
* @param isOptional - Whether the field is optional (affects null coercion)
|
|
53
|
+
* @returns CoercionResult with the coerced value and any warnings
|
|
54
|
+
*/
|
|
55
|
+
export declare function coerceValue(value: unknown, targetType: FieldType, fieldName: string, isOptional?: boolean): CoercionResult;
|
|
56
|
+
/**
|
|
57
|
+
* Coerce all values in an extracted data object (recursive)
|
|
58
|
+
*
|
|
59
|
+
* Handles:
|
|
60
|
+
* - Top-level field coercion
|
|
61
|
+
* - Enum value normalization (case-insensitive)
|
|
62
|
+
* - Recursive coercion for array items
|
|
63
|
+
* - Recursive coercion for nested objects
|
|
64
|
+
*
|
|
65
|
+
* @param data - The extracted data object
|
|
66
|
+
* @param fields - Field definitions from schema
|
|
67
|
+
* @returns Coerced data and any warnings generated
|
|
68
|
+
*/
|
|
69
|
+
export declare function coerceExtractedData(data: Record<string, unknown>, fields: Record<string, FieldDefinition>): {
|
|
70
|
+
data: Record<string, unknown>;
|
|
71
|
+
warnings: CoercionWarning[];
|
|
72
|
+
};
|
|
73
|
+
//# sourceMappingURL=coercion.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"coercion.d.ts","sourceRoot":"","sources":["../../src/core/coercion.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAuB,MAAM,qBAAqB,CAAC;AAE3F;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B,KAAK,EAAE,OAAO,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,eAAe,CAAC;CAC7B;AAyBD;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC3B,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,MAAM,EAAE,EACpB,SAAS,EAAE,MAAM,GAClB,cAAc,CAyBhB;AA6DD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC3B,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,GAClB,cAAc,CA2ChB;AAED;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CACvB,KAAK,EAAE,OAAO,EACd,UAAU,EAAE,SAAS,EACrB,SAAS,EAAE,MAAM,EACjB,UAAU,GAAE,OAAe,GAC5B,cAAc,CAuChB;AAyKD;;;;;;;;;;;;GAYG;AACH,wBAAgB,mBAAmB,CAC/B,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,GACxC;IAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAAC,QAAQ,EAAE,eAAe,EAAE,CAAA;CAAE,CA8EhE"}
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type coercion module - converts LLM output to expected types
|
|
3
|
+
*
|
|
4
|
+
* Handles common LLM quirks like:
|
|
5
|
+
* - String "null" instead of null
|
|
6
|
+
* - String numbers like "123" instead of 123
|
|
7
|
+
* - String booleans like "true" instead of true
|
|
8
|
+
* - Enum case mismatch like "Series A" instead of "series_a"
|
|
9
|
+
* - Date format variations like "11/20/24" instead of "2024-11-20"
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Null-like string values that should be coerced to null
|
|
13
|
+
*/
|
|
14
|
+
const NULL_STRINGS = new Set(['null', 'none', 'n/a', 'na', 'undefined', '']);
|
|
15
|
+
/**
|
|
16
|
+
* Boolean true string values
|
|
17
|
+
*/
|
|
18
|
+
const TRUE_STRINGS = new Set(['true', 'yes', '1']);
|
|
19
|
+
/**
|
|
20
|
+
* Boolean false string values
|
|
21
|
+
*/
|
|
22
|
+
const FALSE_STRINGS = new Set(['false', 'no', '0']);
|
|
23
|
+
/**
|
|
24
|
+
* Normalize a string for enum matching
|
|
25
|
+
* Converts to lowercase and replaces spaces/hyphens with underscores
|
|
26
|
+
*/
|
|
27
|
+
function normalizeEnumValue(value) {
|
|
28
|
+
return value.trim().toLowerCase().replace(/[\s-]+/g, '_');
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Coerce an enum value to match expected enum values (case-insensitive)
|
|
32
|
+
*
|
|
33
|
+
* @param value - The string value to coerce
|
|
34
|
+
* @param enumValues - Array of allowed enum values
|
|
35
|
+
* @param fieldName - Field name for warning messages
|
|
36
|
+
* @returns CoercionResult with matched enum value or original if no match
|
|
37
|
+
*/
|
|
38
|
+
export function coerceEnumValue(value, enumValues, fieldName) {
|
|
39
|
+
// Exact match - no coercion needed
|
|
40
|
+
if (enumValues.includes(value)) {
|
|
41
|
+
return { value, coerced: false };
|
|
42
|
+
}
|
|
43
|
+
// Try normalized match
|
|
44
|
+
const normalized = normalizeEnumValue(value);
|
|
45
|
+
const match = enumValues.find(e => normalizeEnumValue(e) === normalized);
|
|
46
|
+
if (match) {
|
|
47
|
+
return {
|
|
48
|
+
value: match,
|
|
49
|
+
coerced: true,
|
|
50
|
+
warning: {
|
|
51
|
+
field: fieldName,
|
|
52
|
+
message: `Coerced enum value '${value}' to '${match}'`,
|
|
53
|
+
originalValue: value,
|
|
54
|
+
coercedValue: match,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// No match found - return original (validation will catch it)
|
|
59
|
+
return { value, coerced: false };
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Common date format patterns and their parsing logic
|
|
63
|
+
* Supports: MM/DD/YY, MM/DD/YYYY, DD-MM-YYYY, YYYY-MM-DD, etc.
|
|
64
|
+
*/
|
|
65
|
+
const DATE_PATTERNS = [
|
|
66
|
+
// ISO format: YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS
|
|
67
|
+
{
|
|
68
|
+
regex: /^(\d{4})-(\d{1,2})-(\d{1,2})(?:T[\d:]+)?$/,
|
|
69
|
+
parse: (m) => ({ year: parseInt(m[1]), month: parseInt(m[2]), day: parseInt(m[3]) }),
|
|
70
|
+
},
|
|
71
|
+
// US format: MM/DD/YYYY or MM/DD/YY
|
|
72
|
+
{
|
|
73
|
+
regex: /^(\d{1,2})\/(\d{1,2})\/(\d{2,4})$/,
|
|
74
|
+
parse: (m) => {
|
|
75
|
+
let year = parseInt(m[3]);
|
|
76
|
+
if (year < 100)
|
|
77
|
+
year += year < 50 ? 2000 : 1900;
|
|
78
|
+
return { year, month: parseInt(m[1]), day: parseInt(m[2]) };
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
// European format: DD-MM-YYYY or DD.MM.YYYY
|
|
82
|
+
{
|
|
83
|
+
regex: /^(\d{1,2})[-.](\d{1,2})[-.](\d{4})$/,
|
|
84
|
+
parse: (m) => ({ year: parseInt(m[3]), month: parseInt(m[2]), day: parseInt(m[1]) }),
|
|
85
|
+
},
|
|
86
|
+
// Written format: January 15, 2024 or Jan 15, 2024
|
|
87
|
+
{
|
|
88
|
+
regex: /^([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4})$/,
|
|
89
|
+
parse: (m) => {
|
|
90
|
+
const monthNames = {
|
|
91
|
+
january: 1, jan: 1, february: 2, feb: 2, march: 3, mar: 3,
|
|
92
|
+
april: 4, apr: 4, may: 5, june: 6, jun: 6,
|
|
93
|
+
july: 7, jul: 7, august: 8, aug: 8, september: 9, sep: 9, sept: 9,
|
|
94
|
+
october: 10, oct: 10, november: 11, nov: 11, december: 12, dec: 12,
|
|
95
|
+
};
|
|
96
|
+
const month = monthNames[m[1].toLowerCase()];
|
|
97
|
+
if (!month)
|
|
98
|
+
return null;
|
|
99
|
+
return { year: parseInt(m[3]), month, day: parseInt(m[2]) };
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
// Written format: 15 January 2024 or 15 Jan 2024
|
|
103
|
+
{
|
|
104
|
+
regex: /^(\d{1,2})\s+([A-Za-z]+)\s+(\d{4})$/,
|
|
105
|
+
parse: (m) => {
|
|
106
|
+
const monthNames = {
|
|
107
|
+
january: 1, jan: 1, february: 2, feb: 2, march: 3, mar: 3,
|
|
108
|
+
april: 4, apr: 4, may: 5, june: 6, jun: 6,
|
|
109
|
+
july: 7, jul: 7, august: 8, aug: 8, september: 9, sep: 9, sept: 9,
|
|
110
|
+
october: 10, oct: 10, november: 11, nov: 11, december: 12, dec: 12,
|
|
111
|
+
};
|
|
112
|
+
const month = monthNames[m[2].toLowerCase()];
|
|
113
|
+
if (!month)
|
|
114
|
+
return null;
|
|
115
|
+
return { year: parseInt(m[3]), month, day: parseInt(m[1]) };
|
|
116
|
+
},
|
|
117
|
+
},
|
|
118
|
+
];
|
|
119
|
+
/**
|
|
120
|
+
* Coerce a date string to ISO format (YYYY-MM-DD)
|
|
121
|
+
*
|
|
122
|
+
* @param value - The date string to coerce
|
|
123
|
+
* @param fieldName - Field name for warning messages
|
|
124
|
+
* @returns CoercionResult with ISO date or original if not parseable
|
|
125
|
+
*/
|
|
126
|
+
export function coerceDateValue(value, fieldName) {
|
|
127
|
+
const trimmed = value.trim();
|
|
128
|
+
// Already in ISO format without time - no coercion needed
|
|
129
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) {
|
|
130
|
+
return { value: trimmed, coerced: false };
|
|
131
|
+
}
|
|
132
|
+
// Try each pattern
|
|
133
|
+
for (const pattern of DATE_PATTERNS) {
|
|
134
|
+
const match = trimmed.match(pattern.regex);
|
|
135
|
+
if (match) {
|
|
136
|
+
const parsed = pattern.parse(match);
|
|
137
|
+
if (parsed) {
|
|
138
|
+
// Validate the date components
|
|
139
|
+
if (parsed.month >= 1 && parsed.month <= 12 &&
|
|
140
|
+
parsed.day >= 1 && parsed.day <= 31 &&
|
|
141
|
+
parsed.year >= 1900 && parsed.year <= 2100) {
|
|
142
|
+
const isoDate = `${parsed.year}-${String(parsed.month).padStart(2, '0')}-${String(parsed.day).padStart(2, '0')}`;
|
|
143
|
+
// Check if it's different from original (ignoring time component)
|
|
144
|
+
if (isoDate !== trimmed && !trimmed.startsWith(isoDate)) {
|
|
145
|
+
return {
|
|
146
|
+
value: isoDate,
|
|
147
|
+
coerced: true,
|
|
148
|
+
warning: {
|
|
149
|
+
field: fieldName,
|
|
150
|
+
message: `Coerced date '${value}' to ISO format '${isoDate}'`,
|
|
151
|
+
originalValue: value,
|
|
152
|
+
coercedValue: isoDate,
|
|
153
|
+
},
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
return { value: isoDate, coerced: false };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// No pattern matched - return original
|
|
162
|
+
return { value, coerced: false };
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Coerce a value to the expected type
|
|
166
|
+
*
|
|
167
|
+
* @param value - The value to coerce
|
|
168
|
+
* @param targetType - The expected field type
|
|
169
|
+
* @param fieldName - Field name for warning messages
|
|
170
|
+
* @param isOptional - Whether the field is optional (affects null coercion)
|
|
171
|
+
* @returns CoercionResult with the coerced value and any warnings
|
|
172
|
+
*/
|
|
173
|
+
export function coerceValue(value, targetType, fieldName, isOptional = false) {
|
|
174
|
+
// Already null/undefined - no coercion needed
|
|
175
|
+
if (value === null || value === undefined) {
|
|
176
|
+
return { value, coerced: false };
|
|
177
|
+
}
|
|
178
|
+
// Check for null-like strings first (applies to all types)
|
|
179
|
+
if (typeof value === 'string') {
|
|
180
|
+
const normalized = value.trim().toLowerCase();
|
|
181
|
+
if (NULL_STRINGS.has(normalized)) {
|
|
182
|
+
// Only coerce to null if field is optional
|
|
183
|
+
if (isOptional) {
|
|
184
|
+
return {
|
|
185
|
+
value: null,
|
|
186
|
+
coerced: true,
|
|
187
|
+
warning: {
|
|
188
|
+
field: fieldName,
|
|
189
|
+
message: `Coerced '${value}' string to null`,
|
|
190
|
+
originalValue: value,
|
|
191
|
+
coercedValue: null,
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
// For required fields, try type-specific coercion below
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
switch (targetType) {
|
|
199
|
+
case 'number':
|
|
200
|
+
return coerceToNumber(value, fieldName);
|
|
201
|
+
case 'integer':
|
|
202
|
+
return coerceToInteger(value, fieldName);
|
|
203
|
+
case 'boolean':
|
|
204
|
+
return coerceToBoolean(value, fieldName);
|
|
205
|
+
case 'string':
|
|
206
|
+
return coerceToString(value, fieldName);
|
|
207
|
+
default:
|
|
208
|
+
return { value, coerced: false };
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Coerce value to number
|
|
213
|
+
*/
|
|
214
|
+
function coerceToNumber(value, fieldName) {
|
|
215
|
+
// Already a number
|
|
216
|
+
if (typeof value === 'number') {
|
|
217
|
+
return { value, coerced: false };
|
|
218
|
+
}
|
|
219
|
+
// String to number
|
|
220
|
+
if (typeof value === 'string') {
|
|
221
|
+
const trimmed = value.trim();
|
|
222
|
+
// Handle empty or null-like strings
|
|
223
|
+
if (NULL_STRINGS.has(trimmed.toLowerCase())) {
|
|
224
|
+
return { value, coerced: false }; // Let validation handle it
|
|
225
|
+
}
|
|
226
|
+
// Try to parse as number
|
|
227
|
+
const parsed = parseFloat(trimmed);
|
|
228
|
+
if (!isNaN(parsed)) {
|
|
229
|
+
return {
|
|
230
|
+
value: parsed,
|
|
231
|
+
coerced: true,
|
|
232
|
+
warning: {
|
|
233
|
+
field: fieldName,
|
|
234
|
+
message: `Coerced string '${value}' to number ${parsed}`,
|
|
235
|
+
originalValue: value,
|
|
236
|
+
coercedValue: parsed,
|
|
237
|
+
},
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Boolean to number
|
|
242
|
+
if (typeof value === 'boolean') {
|
|
243
|
+
const num = value ? 1 : 0;
|
|
244
|
+
return {
|
|
245
|
+
value: num,
|
|
246
|
+
coerced: true,
|
|
247
|
+
warning: {
|
|
248
|
+
field: fieldName,
|
|
249
|
+
message: `Coerced boolean ${value} to number ${num}`,
|
|
250
|
+
originalValue: value,
|
|
251
|
+
coercedValue: num,
|
|
252
|
+
},
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
return { value, coerced: false };
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Coerce value to integer
|
|
259
|
+
*/
|
|
260
|
+
function coerceToInteger(value, fieldName) {
|
|
261
|
+
// First coerce to number
|
|
262
|
+
const numberResult = coerceToNumber(value, fieldName);
|
|
263
|
+
if (typeof numberResult.value === 'number') {
|
|
264
|
+
// If it's already an integer, keep as-is
|
|
265
|
+
if (Number.isInteger(numberResult.value)) {
|
|
266
|
+
return numberResult;
|
|
267
|
+
}
|
|
268
|
+
// Truncate to integer
|
|
269
|
+
const intValue = Math.trunc(numberResult.value);
|
|
270
|
+
return {
|
|
271
|
+
value: intValue,
|
|
272
|
+
coerced: true,
|
|
273
|
+
warning: {
|
|
274
|
+
field: fieldName,
|
|
275
|
+
message: `Coerced ${value} to integer ${intValue}`,
|
|
276
|
+
originalValue: value,
|
|
277
|
+
coercedValue: intValue,
|
|
278
|
+
},
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
return { value, coerced: false };
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Coerce value to boolean
|
|
285
|
+
*/
|
|
286
|
+
function coerceToBoolean(value, fieldName) {
|
|
287
|
+
// Already a boolean
|
|
288
|
+
if (typeof value === 'boolean') {
|
|
289
|
+
return { value, coerced: false };
|
|
290
|
+
}
|
|
291
|
+
// String to boolean
|
|
292
|
+
if (typeof value === 'string') {
|
|
293
|
+
const normalized = value.trim().toLowerCase();
|
|
294
|
+
if (TRUE_STRINGS.has(normalized)) {
|
|
295
|
+
return {
|
|
296
|
+
value: true,
|
|
297
|
+
coerced: true,
|
|
298
|
+
warning: {
|
|
299
|
+
field: fieldName,
|
|
300
|
+
message: `Coerced string '${value}' to boolean true`,
|
|
301
|
+
originalValue: value,
|
|
302
|
+
coercedValue: true,
|
|
303
|
+
},
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
if (FALSE_STRINGS.has(normalized)) {
|
|
307
|
+
return {
|
|
308
|
+
value: false,
|
|
309
|
+
coerced: true,
|
|
310
|
+
warning: {
|
|
311
|
+
field: fieldName,
|
|
312
|
+
message: `Coerced string '${value}' to boolean false`,
|
|
313
|
+
originalValue: value,
|
|
314
|
+
coercedValue: false,
|
|
315
|
+
},
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// Number to boolean
|
|
320
|
+
if (typeof value === 'number') {
|
|
321
|
+
const boolValue = value !== 0;
|
|
322
|
+
return {
|
|
323
|
+
value: boolValue,
|
|
324
|
+
coerced: true,
|
|
325
|
+
warning: {
|
|
326
|
+
field: fieldName,
|
|
327
|
+
message: `Coerced number ${value} to boolean ${boolValue}`,
|
|
328
|
+
originalValue: value,
|
|
329
|
+
coercedValue: boolValue,
|
|
330
|
+
},
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
return { value, coerced: false };
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Coerce value to string
|
|
337
|
+
*/
|
|
338
|
+
function coerceToString(value, fieldName) {
|
|
339
|
+
// Already a string
|
|
340
|
+
if (typeof value === 'string') {
|
|
341
|
+
return { value, coerced: false };
|
|
342
|
+
}
|
|
343
|
+
// Number or boolean to string
|
|
344
|
+
if (typeof value === 'number' || typeof value === 'boolean') {
|
|
345
|
+
const strValue = String(value);
|
|
346
|
+
return {
|
|
347
|
+
value: strValue,
|
|
348
|
+
coerced: true,
|
|
349
|
+
warning: {
|
|
350
|
+
field: fieldName,
|
|
351
|
+
message: `Coerced ${typeof value} ${value} to string '${strValue}'`,
|
|
352
|
+
originalValue: value,
|
|
353
|
+
coercedValue: strValue,
|
|
354
|
+
},
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
return { value, coerced: false };
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Coerce all values in an extracted data object (recursive)
|
|
361
|
+
*
|
|
362
|
+
* Handles:
|
|
363
|
+
* - Top-level field coercion
|
|
364
|
+
* - Enum value normalization (case-insensitive)
|
|
365
|
+
* - Recursive coercion for array items
|
|
366
|
+
* - Recursive coercion for nested objects
|
|
367
|
+
*
|
|
368
|
+
* @param data - The extracted data object
|
|
369
|
+
* @param fields - Field definitions from schema
|
|
370
|
+
* @returns Coerced data and any warnings generated
|
|
371
|
+
*/
|
|
372
|
+
export function coerceExtractedData(data, fields) {
|
|
373
|
+
const coercedData = { ...data };
|
|
374
|
+
const warnings = [];
|
|
375
|
+
for (const [fieldName, fieldDef] of Object.entries(fields)) {
|
|
376
|
+
if (!(fieldName in data))
|
|
377
|
+
continue;
|
|
378
|
+
const value = data[fieldName];
|
|
379
|
+
// Handle null/undefined - no coercion needed
|
|
380
|
+
if (value === null || value === undefined) {
|
|
381
|
+
coercedData[fieldName] = value;
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
// Handle array type - recursively coerce items
|
|
385
|
+
if (fieldDef.type === 'array' && Array.isArray(value) && fieldDef.items) {
|
|
386
|
+
const { array: coercedArray, warnings: arrayWarnings } = coerceArrayItems(value, fieldDef.items, fieldName);
|
|
387
|
+
coercedData[fieldName] = coercedArray;
|
|
388
|
+
warnings.push(...arrayWarnings);
|
|
389
|
+
continue;
|
|
390
|
+
}
|
|
391
|
+
// Handle object type - recursively coerce properties
|
|
392
|
+
if (fieldDef.type === 'object' && typeof value === 'object' && !Array.isArray(value) && fieldDef.properties) {
|
|
393
|
+
const { data: coercedObj, warnings: objWarnings } = coerceExtractedData(value, fieldDef.properties);
|
|
394
|
+
// Update warning field paths
|
|
395
|
+
for (const w of objWarnings) {
|
|
396
|
+
w.field = `${fieldName}.${w.field}`;
|
|
397
|
+
}
|
|
398
|
+
coercedData[fieldName] = coercedObj;
|
|
399
|
+
warnings.push(...objWarnings);
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
// Handle string with enum - try enum coercion first
|
|
403
|
+
if (fieldDef.type === 'string' && typeof value === 'string' && fieldDef.enum) {
|
|
404
|
+
const enumResult = coerceEnumValue(value, fieldDef.enum, fieldName);
|
|
405
|
+
coercedData[fieldName] = enumResult.value;
|
|
406
|
+
if (enumResult.warning) {
|
|
407
|
+
warnings.push(enumResult.warning);
|
|
408
|
+
}
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
// Handle string with date/date-time format - normalize to ISO
|
|
412
|
+
if (fieldDef.type === 'string' && typeof value === 'string' &&
|
|
413
|
+
(fieldDef.format === 'date' || fieldDef.format === 'date-time')) {
|
|
414
|
+
const dateResult = coerceDateValue(value, fieldName);
|
|
415
|
+
coercedData[fieldName] = dateResult.value;
|
|
416
|
+
if (dateResult.warning) {
|
|
417
|
+
warnings.push(dateResult.warning);
|
|
418
|
+
}
|
|
419
|
+
continue;
|
|
420
|
+
}
|
|
421
|
+
// Standard type coercion
|
|
422
|
+
const result = coerceValue(value, fieldDef.type, fieldName, fieldDef.optional ?? false);
|
|
423
|
+
coercedData[fieldName] = result.value;
|
|
424
|
+
if (result.warning) {
|
|
425
|
+
warnings.push(result.warning);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
return { data: coercedData, warnings };
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Coerce array items recursively
|
|
432
|
+
*
|
|
433
|
+
* @param array - The array to coerce
|
|
434
|
+
* @param itemDef - Definition of array items
|
|
435
|
+
* @param fieldName - Parent field name for error paths
|
|
436
|
+
* @returns Coerced array and any warnings
|
|
437
|
+
*/
|
|
438
|
+
function coerceArrayItems(array, itemDef, fieldName) {
|
|
439
|
+
const coercedArray = [];
|
|
440
|
+
const warnings = [];
|
|
441
|
+
for (let i = 0; i < array.length; i++) {
|
|
442
|
+
const item = array[i];
|
|
443
|
+
const itemPath = `${fieldName}[${i}]`;
|
|
444
|
+
// Only handle object items for now (as per ArrayItemDefinition)
|
|
445
|
+
if (itemDef.type === 'object' && typeof item === 'object' && item !== null && !Array.isArray(item) && itemDef.properties) {
|
|
446
|
+
const { data: coercedItem, warnings: itemWarnings } = coerceExtractedData(item, itemDef.properties);
|
|
447
|
+
// Update warning field paths
|
|
448
|
+
for (const w of itemWarnings) {
|
|
449
|
+
w.field = `${itemPath}.${w.field}`;
|
|
450
|
+
}
|
|
451
|
+
coercedArray.push(coercedItem);
|
|
452
|
+
warnings.push(...itemWarnings);
|
|
453
|
+
}
|
|
454
|
+
else {
|
|
455
|
+
// Non-object items are passed through as-is
|
|
456
|
+
coercedArray.push(item);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
return { array: coercedArray, warnings };
|
|
460
|
+
}
|
|
461
|
+
//# sourceMappingURL=coercion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"coercion.js","sourceRoot":"","sources":["../../src/core/coercion.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAuBH;;GAEG;AACH,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC;AAE7E;;GAEG;AACH,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;AAEnD;;GAEG;AACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;AAEpD;;;GAGG;AACH,SAAS,kBAAkB,CAAC,KAAa;IACrC,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC3B,KAAa,EACb,UAAoB,EACpB,SAAiB;IAEjB,mCAAmC;IACnC,IAAI,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACrC,CAAC;IAED,uBAAuB;IACvB,MAAM,UAAU,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,kBAAkB,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,CAAC;IAEzE,IAAI,KAAK,EAAE,CAAC;QACR,OAAO;YACH,KAAK,EAAE,KAAK;YACZ,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,uBAAuB,KAAK,SAAS,KAAK,GAAG;gBACtD,aAAa,EAAE,KAAK;gBACpB,YAAY,EAAE,KAAK;aACtB;SACJ,CAAC;IACN,CAAC;IAED,8DAA8D;IAC9D,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,aAAa,GAGd;IACD,gDAAgD;IAChD;QACI,KAAK,EAAE,2CAA2C;QAClD,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;KACvF;IACD,oCAAoC;IACpC;QACI,KAAK,EAAE,mCAAmC;QAC1C,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE;YACT,IAAI,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,IAAI,GAAG,GAAG;gBAAE,IAAI,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;YAChD,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,CAAC;KACJ;IACD,4CAA4C;IAC5C;QACI,KAAK,EAAE,qCAAqC;QAC5C,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;KACvF;IACD,mDAAmD;IACnD;QACI,KAAK,EAAE,uCAAuC;QAC9C,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE;YACT,MAAM,UAAU,GAA2B;gBACvC,OAAO,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;gBACzD,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;gBACzC,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;gBACjE,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE;aACrE,CAAC;YACF,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7C,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,CAAC;KACJ;IACD,iDAAiD;IACjD;QACI,KAAK,EAAE,qCAAqC;QAC5C,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE;YACT,MAAM,UAAU,GAA2B;gBACvC,OAAO,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;gBACzD,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;gBACzC,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;gBACjE,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE;aACrE,CAAC;YACF,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7C,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,CAAC;KACJ;CACJ,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAC3B,KAAa,EACb,SAAiB;IAEjB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;IAE7B,0DAA0D;IAC1D,IAAI,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC9C,CAAC;IAED,mBAAmB;IACnB,KAAK,MAAM,OAAO,IAAI,aAAa,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAI,KAAK,EAAE,CAAC;YACR,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,MAAM,EAAE,CAAC;gBACT,+BAA+B;gBAC/B,IAAI,MAAM,CAAC,KAAK,IAAI,CAAC,IAAI,MAAM,CAAC,KAAK,IAAI,EAAE;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,IAAI,EAAE;oBACnC,MAAM,CAAC,IAAI,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;oBAE7C,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;oBAEjH,kEAAkE;oBAClE,IAAI,OAAO,KAAK,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;wBACtD,OAAO;4BACH,KAAK,EAAE,OAAO;4BACd,OAAO,EAAE,IAAI;4BACb,OAAO,EAAE;gCACL,KAAK,EAAE,SAAS;gCAChB,OAAO,EAAE,iBAAiB,KAAK,oBAAoB,OAAO,GAAG;gCAC7D,aAAa,EAAE,KAAK;gCACpB,YAAY,EAAE,OAAO;6BACxB;yBACJ,CAAC;oBACN,CAAC;oBAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;gBAC9C,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IAED,uCAAuC;IACvC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,WAAW,CACvB,KAAc,EACd,UAAqB,EACrB,SAAiB,EACjB,aAAsB,KAAK;IAE3B,8CAA8C;IAC9C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACrC,CAAC;IAED,2DAA2D;IAC3D,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC9C,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,2CAA2C;YAC3C,IAAI,UAAU,EAAE,CAAC;gBACb,OAAO;oBACH,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,IAAI;oBACb,OAAO,EAAE;wBACL,KAAK,EAAE,SAAS;wBAChB,OAAO,EAAE,YAAY,KAAK,kBAAkB;wBAC5C,aAAa,EAAE,KAAK;wBACpB,YAAY,EAAE,IAAI;qBACrB;iBACJ,CAAC;YACN,CAAC;YACD,wDAAwD;QAC5D,CAAC;IACL,CAAC;IAED,QAAQ,UAAU,EAAE,CAAC;QACjB,KAAK,QAAQ;YACT,OAAO,cAAc,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC5C,KAAK,SAAS;YACV,OAAO,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC7C,KAAK,SAAS;YACV,OAAO,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC7C,KAAK,QAAQ;YACT,OAAO,cAAc,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC5C;YACI,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACzC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,KAAc,EAAE,SAAiB;IACrD,mBAAmB;IACnB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACrC,CAAC;IAED,mBAAmB;IACnB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAE7B,oCAAoC;QACpC,IAAI,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC1C,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,2BAA2B;QACjE,CAAC;QAED,yBAAyB;QACzB,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YACjB,OAAO;gBACH,KAAK,EAAE,MAAM;gBACb,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE;oBACL,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,KAAK,eAAe,MAAM,EAAE;oBACxD,aAAa,EAAE,KAAK;oBACpB,YAAY,EAAE,MAAM;iBACvB;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED,oBAAoB;IACpB,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,OAAO;YACH,KAAK,EAAE,GAAG;YACV,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,mBAAmB,KAAK,cAAc,GAAG,EAAE;gBACpD,aAAa,EAAE,KAAK;gBACpB,YAAY,EAAE,GAAG;aACpB;SACJ,CAAC;IACN,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAc,EAAE,SAAiB;IACtD,yBAAyB;IACzB,MAAM,YAAY,GAAG,cAAc,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAEtD,IAAI,OAAO,YAAY,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QACzC,yCAAyC;QACzC,IAAI,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,YAAY,CAAC;QACxB,CAAC;QAED,sBAAsB;QACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAChD,OAAO;YACH,KAAK,EAAE,QAAQ;YACf,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,WAAW,KAAK,eAAe,QAAQ,EAAE;gBAClD,aAAa,EAAE,KAAK;gBACpB,YAAY,EAAE,QAAQ;aACzB;SACJ,CAAC;IACN,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAc,EAAE,SAAiB;IACtD,oBAAoB;IACpB,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;QAC7B,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACrC,CAAC;IAED,oBAAoB;IACpB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAE9C,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO;gBACH,KAAK,EAAE,IAAI;gBACX,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE;oBACL,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,KAAK,mBAAmB;oBACpD,aAAa,EAAE,KAAK;oBACpB,YAAY,EAAE,IAAI;iBACrB;aACJ,CAAC;QACN,CAAC;QAED,IAAI,aAAa,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAChC,OAAO;gBACH,KAAK,EAAE,KAAK;gBACZ,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE;oBACL,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,mBAAmB,KAAK,oBAAoB;oBACrD,aAAa,EAAE,KAAK;oBACpB,YAAY,EAAE,KAAK;iBACtB;aACJ,CAAC;QACN,CAAC;IACL,CAAC;IAED,oBAAoB;IACpB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,KAAK,KAAK,CAAC,CAAC;QAC9B,OAAO;YACH,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,kBAAkB,KAAK,eAAe,SAAS,EAAE;gBAC1D,aAAa,EAAE,KAAK;gBACpB,YAAY,EAAE,SAAS;aAC1B;SACJ,CAAC;IACN,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,KAAc,EAAE,SAAiB;IACrD,mBAAmB;IACnB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACrC,CAAC;IAED,8BAA8B;IAC9B,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;QAC1D,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;QAC/B,OAAO;YACH,KAAK,EAAE,QAAQ;YACf,OAAO,EAAE,IAAI;YACb,OAAO,EAAE;gBACL,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,WAAW,OAAO,KAAK,IAAI,KAAK,eAAe,QAAQ,GAAG;gBACnE,aAAa,EAAE,KAAK;gBACpB,YAAY,EAAE,QAAQ;aACzB;SACJ,CAAC;IACN,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,mBAAmB,CAC/B,IAA6B,EAC7B,MAAuC;IAEvC,MAAM,WAAW,GAA4B,EAAE,GAAG,IAAI,EAAE,CAAC;IACzD,MAAM,QAAQ,GAAsB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACzD,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC;YAAE,SAAS;QAEnC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QAE9B,6CAA6C;QAC7C,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxC,WAAW,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC;YAC/B,SAAS;QACb,CAAC;QAED,+CAA+C;QAC/C,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YACtE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,gBAAgB,CACrE,KAAK,EACL,QAAQ,CAAC,KAAK,EACd,SAAS,CACZ,CAAC;YACF,WAAW,CAAC,SAAS,CAAC,GAAG,YAAY,CAAC;YACtC,QAAQ,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;YAChC,SAAS;QACb,CAAC;QAED,qDAAqD;QACrD,IAAI,QAAQ,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;YAC1G,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,mBAAmB,CACnE,KAAgC,EAChC,QAAQ,CAAC,UAAU,CACtB,CAAC;YACF,6BAA6B;YAC7B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;gBAC1B,CAAC,CAAC,KAAK,GAAG,GAAG,SAAS,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACxC,CAAC;YACD,WAAW,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC;YACpC,QAAQ,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;YAC9B,SAAS;QACb,CAAC;QAED,oDAAoD;QACpD,IAAI,QAAQ,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC3E,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YACpE,WAAW,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC;YAC1C,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACtC,CAAC;YACD,SAAS;QACb,CAAC;QAED,8DAA8D;QAC9D,IAAI,QAAQ,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,QAAQ;YACvD,CAAC,QAAQ,CAAC,MAAM,KAAK,MAAM,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,CAAC,EAAE,CAAC;YAClE,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YACrD,WAAW,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC;YAC1C,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACtC,CAAC;YACD,SAAS;QACb,CAAC;QAED,yBAAyB;QACzB,MAAM,MAAM,GAAG,WAAW,CACtB,KAAK,EACL,QAAQ,CAAC,IAAI,EACb,SAAS,EACT,QAAQ,CAAC,QAAQ,IAAI,KAAK,CAC7B,CAAC;QAEF,WAAW,CAAC,SAAS,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC;QACtC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAClC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC3C,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,gBAAgB,CACrB,KAAgB,EAChB,OAA4B,EAC5B,SAAiB;IAEjB,MAAM,YAAY,GAAc,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAsB,EAAE,CAAC;IAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,QAAQ,GAAG,GAAG,SAAS,IAAI,CAAC,GAAG,CAAC;QAEtC,gEAAgE;QAChE,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvH,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,mBAAmB,CACrE,IAA+B,EAC/B,OAAO,CAAC,UAAU,CACrB,CAAC;YACF,6BAA6B;YAC7B,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;gBAC3B,CAAC,CAAC,KAAK,GAAG,GAAG,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACvC,CAAC;YACD,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;QACnC,CAAC;aAAM,CAAC;YACJ,4CAA4C;YAC5C,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;IACL,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC;AAC7C,CAAC"}
|
package/dist/core/index.d.ts
CHANGED
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ExtractionPipeline, extract } from './pipeline.js';
|
|
5
5
|
export { validateExtractedData } from './validator.js';
|
|
6
|
+
export { coerceValue, coerceExtractedData, coerceEnumValue, coerceDateValue } from './coercion.js';
|
|
6
7
|
export { PipelineError, PipelineErrorCodes } from './errors.js';
|
|
7
8
|
export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './preprocessor.js';
|
|
8
9
|
export type { PreprocessResult } from './preprocessor.js';
|
|
9
10
|
export type { PipelineConfig, ExtractionRequest, PipelineResult, StepResult, HtmlStripOptions, PreprocessingConfig, } from './types.js';
|
|
11
|
+
export type { CoercionWarning, CoercionResult } from './coercion.js';
|
|
10
12
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/core/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC;AAC3B,YAAY,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACnG,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC;AAC3B,YAAY,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,YAAY,EACR,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,mBAAmB,GACtB,MAAM,YAAY,CAAC;AACpB,YAAY,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC"}
|
package/dist/core/index.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export { ExtractionPipeline, extract } from './pipeline.js';
|
|
5
5
|
export { validateExtractedData } from './validator.js';
|
|
6
|
+
export { coerceValue, coerceExtractedData, coerceEnumValue, coerceDateValue } from './coercion.js';
|
|
6
7
|
export { PipelineError, PipelineErrorCodes } from './errors.js';
|
|
7
8
|
export { stripHtml, preprocess, preprocessWithDetails, resolveHtmlStripOptions, } from './preprocessor.js';
|
|
8
9
|
//# sourceMappingURL=index.js.map
|
package/dist/core/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACnG,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAChE,OAAO,EACH,SAAS,EACT,UAAU,EACV,qBAAqB,EACrB,uBAAuB,GAC1B,MAAM,mBAAmB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,OAAO,KAAK,EAAE,iBAAiB,EAAE,cAAc,EAAc,MAAM,YAAY,CAAC;AAEhF;;GAEG;AACH,qBAAa,kBAAkB;IAC3B,OAAO,CAAC,KAAK,CAAU;gBAEX,KAAK,GAAE,OAAe;IAIlC;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IAqMlE;;OAEG;IACH,OAAO,CAAC,UAAU;IAsBlB;;OAEG;YACW,eAAe;CAqBhC;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC,CAGjF"}
|
package/dist/core/pipeline.js
CHANGED
|
@@ -33,8 +33,12 @@ export class ExtractionPipeline {
|
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
35
|
// Step 1: Create LLM client
|
|
36
|
+
// Merge top-level maxContextTokens into llmConfig (top-level takes precedence)
|
|
37
|
+
const mergedLLMConfig = request.maxContextTokens
|
|
38
|
+
? { ...request.llmConfig, maxContextTokens: request.maxContextTokens }
|
|
39
|
+
: request.llmConfig;
|
|
36
40
|
const clientStep = this.recordStep('create_client', () => {
|
|
37
|
-
return new LLMClient(
|
|
41
|
+
return new LLMClient(mergedLLMConfig);
|
|
38
42
|
});
|
|
39
43
|
steps.push(clientStep);
|
|
40
44
|
if (!clientStep.success) {
|
|
@@ -59,12 +63,15 @@ export class ExtractionPipeline {
|
|
|
59
63
|
});
|
|
60
64
|
steps.push(validateStep);
|
|
61
65
|
const validation = validateStep.data;
|
|
66
|
+
// Use coerced data for output
|
|
67
|
+
const outputData = validation.coercedData ?? extraction.data;
|
|
62
68
|
if (!validation.valid) {
|
|
63
69
|
const duration = Date.now() - startTime;
|
|
64
70
|
return {
|
|
65
71
|
success: false,
|
|
66
72
|
meetsThreshold: false,
|
|
67
73
|
errors: validation.errors,
|
|
74
|
+
warnings: validation.warnings,
|
|
68
75
|
steps: this.debug ? steps : undefined,
|
|
69
76
|
metadata: {
|
|
70
77
|
duration,
|
|
@@ -91,7 +98,7 @@ export class ExtractionPipeline {
|
|
|
91
98
|
const duration = Date.now() - startTime;
|
|
92
99
|
return {
|
|
93
100
|
success: false,
|
|
94
|
-
data:
|
|
101
|
+
data: outputData,
|
|
95
102
|
confidence: extraction.confidence,
|
|
96
103
|
confidenceByField: extraction.confidenceByField,
|
|
97
104
|
meetsThreshold: false,
|
|
@@ -101,6 +108,7 @@ export class ExtractionPipeline {
|
|
|
101
108
|
code: PipelineErrorCodes.CONFIDENCE_ERROR,
|
|
102
109
|
},
|
|
103
110
|
],
|
|
111
|
+
warnings: validation.warnings,
|
|
104
112
|
steps: this.debug ? steps : undefined,
|
|
105
113
|
metadata: {
|
|
106
114
|
duration,
|
|
@@ -113,11 +121,12 @@ export class ExtractionPipeline {
|
|
|
113
121
|
const duration = Date.now() - startTime;
|
|
114
122
|
return {
|
|
115
123
|
success: true,
|
|
116
|
-
data:
|
|
124
|
+
data: outputData,
|
|
117
125
|
confidence: extraction.confidence,
|
|
118
126
|
confidenceByField: extraction.confidenceByField,
|
|
119
127
|
meetsThreshold: confidenceCheck.meetsThreshold,
|
|
120
128
|
errors: [],
|
|
129
|
+
warnings: validation.warnings,
|
|
121
130
|
steps: this.debug ? steps : undefined,
|
|
122
131
|
metadata: {
|
|
123
132
|
duration,
|