@ordis-dev/ordis 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +192 -0
  3. package/dist/__tests__/api.test.d.ts +5 -0
  4. package/dist/__tests__/api.test.d.ts.map +1 -0
  5. package/dist/__tests__/api.test.js +95 -0
  6. package/dist/__tests__/api.test.js.map +1 -0
  7. package/dist/__tests__/cli.test.d.ts +6 -0
  8. package/dist/__tests__/cli.test.d.ts.map +1 -0
  9. package/dist/__tests__/cli.test.js +103 -0
  10. package/dist/__tests__/cli.test.js.map +1 -0
  11. package/dist/cli/__tests__/cli.test.d.ts +5 -0
  12. package/dist/cli/__tests__/cli.test.d.ts.map +1 -0
  13. package/dist/cli/__tests__/cli.test.js +13 -0
  14. package/dist/cli/__tests__/cli.test.js.map +1 -0
  15. package/dist/cli.d.ts +7 -0
  16. package/dist/cli.d.ts.map +1 -0
  17. package/dist/cli.js +225 -0
  18. package/dist/cli.js.map +1 -0
  19. package/dist/core/__tests__/pipeline.test.d.ts +5 -0
  20. package/dist/core/__tests__/pipeline.test.d.ts.map +1 -0
  21. package/dist/core/__tests__/pipeline.test.js +334 -0
  22. package/dist/core/__tests__/pipeline.test.js.map +1 -0
  23. package/dist/core/__tests__/validator.test.d.ts +5 -0
  24. package/dist/core/__tests__/validator.test.d.ts.map +1 -0
  25. package/dist/core/__tests__/validator.test.js +124 -0
  26. package/dist/core/__tests__/validator.test.js.map +1 -0
  27. package/dist/core/errors.d.ts +27 -0
  28. package/dist/core/errors.d.ts.map +1 -0
  29. package/dist/core/errors.js +35 -0
  30. package/dist/core/errors.js.map +1 -0
  31. package/dist/core/index.d.ts +8 -0
  32. package/dist/core/index.d.ts.map +1 -0
  33. package/dist/core/index.js +7 -0
  34. package/dist/core/index.js.map +1 -0
  35. package/dist/core/pipeline.d.ts +28 -0
  36. package/dist/core/pipeline.d.ts.map +1 -0
  37. package/dist/core/pipeline.js +212 -0
  38. package/dist/core/pipeline.js.map +1 -0
  39. package/dist/core/types.d.ts +55 -0
  40. package/dist/core/types.d.ts.map +1 -0
  41. package/dist/core/types.js +5 -0
  42. package/dist/core/types.js.map +1 -0
  43. package/dist/core/validator.d.ts +19 -0
  44. package/dist/core/validator.d.ts.map +1 -0
  45. package/dist/core/validator.js +147 -0
  46. package/dist/core/validator.js.map +1 -0
  47. package/dist/index.d.ts +19 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +18 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/llm/__tests__/client.test.d.ts +5 -0
  52. package/dist/llm/__tests__/client.test.d.ts.map +1 -0
  53. package/dist/llm/__tests__/client.test.js +350 -0
  54. package/dist/llm/__tests__/client.test.js.map +1 -0
  55. package/dist/llm/__tests__/prompt-builder.test.d.ts +5 -0
  56. package/dist/llm/__tests__/prompt-builder.test.d.ts.map +1 -0
  57. package/dist/llm/__tests__/prompt-builder.test.js +171 -0
  58. package/dist/llm/__tests__/prompt-builder.test.js.map +1 -0
  59. package/dist/llm/__tests__/retry.test.d.ts +5 -0
  60. package/dist/llm/__tests__/retry.test.d.ts.map +1 -0
  61. package/dist/llm/__tests__/retry.test.js +350 -0
  62. package/dist/llm/__tests__/retry.test.js.map +1 -0
  63. package/dist/llm/__tests__/token-counter.test.d.ts +5 -0
  64. package/dist/llm/__tests__/token-counter.test.d.ts.map +1 -0
  65. package/dist/llm/__tests__/token-counter.test.js +166 -0
  66. package/dist/llm/__tests__/token-counter.test.js.map +1 -0
  67. package/dist/llm/client.d.ts +56 -0
  68. package/dist/llm/client.d.ts.map +1 -0
  69. package/dist/llm/client.js +266 -0
  70. package/dist/llm/client.js.map +1 -0
  71. package/dist/llm/errors.d.ts +27 -0
  72. package/dist/llm/errors.d.ts.map +1 -0
  73. package/dist/llm/errors.js +35 -0
  74. package/dist/llm/errors.js.map +1 -0
  75. package/dist/llm/index.d.ts +10 -0
  76. package/dist/llm/index.d.ts.map +1 -0
  77. package/dist/llm/index.js +8 -0
  78. package/dist/llm/index.js.map +1 -0
  79. package/dist/llm/prompt-builder.d.ts +13 -0
  80. package/dist/llm/prompt-builder.d.ts.map +1 -0
  81. package/dist/llm/prompt-builder.js +107 -0
  82. package/dist/llm/prompt-builder.js.map +1 -0
  83. package/dist/llm/token-counter.d.ts +82 -0
  84. package/dist/llm/token-counter.d.ts.map +1 -0
  85. package/dist/llm/token-counter.js +109 -0
  86. package/dist/llm/token-counter.js.map +1 -0
  87. package/dist/llm/types.d.ts +96 -0
  88. package/dist/llm/types.d.ts.map +1 -0
  89. package/dist/llm/types.js +5 -0
  90. package/dist/llm/types.js.map +1 -0
  91. package/dist/schemas/__tests__/integration.test.d.ts +5 -0
  92. package/dist/schemas/__tests__/integration.test.d.ts.map +1 -0
  93. package/dist/schemas/__tests__/integration.test.js +366 -0
  94. package/dist/schemas/__tests__/integration.test.js.map +1 -0
  95. package/dist/schemas/__tests__/loader.test.d.ts +5 -0
  96. package/dist/schemas/__tests__/loader.test.d.ts.map +1 -0
  97. package/dist/schemas/__tests__/loader.test.js +271 -0
  98. package/dist/schemas/__tests__/loader.test.js.map +1 -0
  99. package/dist/schemas/__tests__/validator.test.d.ts +5 -0
  100. package/dist/schemas/__tests__/validator.test.d.ts.map +1 -0
  101. package/dist/schemas/__tests__/validator.test.js +592 -0
  102. package/dist/schemas/__tests__/validator.test.js.map +1 -0
  103. package/dist/schemas/errors.d.ts +38 -0
  104. package/dist/schemas/errors.d.ts.map +1 -0
  105. package/dist/schemas/errors.js +49 -0
  106. package/dist/schemas/errors.js.map +1 -0
  107. package/dist/schemas/index.d.ts +8 -0
  108. package/dist/schemas/index.d.ts.map +1 -0
  109. package/dist/schemas/index.js +7 -0
  110. package/dist/schemas/index.js.map +1 -0
  111. package/dist/schemas/loader.d.ts +29 -0
  112. package/dist/schemas/loader.d.ts.map +1 -0
  113. package/dist/schemas/loader.js +71 -0
  114. package/dist/schemas/loader.js.map +1 -0
  115. package/dist/schemas/types.d.ts +75 -0
  116. package/dist/schemas/types.d.ts.map +1 -0
  117. package/dist/schemas/types.js +7 -0
  118. package/dist/schemas/types.js.map +1 -0
  119. package/dist/schemas/validator.d.ts +12 -0
  120. package/dist/schemas/validator.d.ts.map +1 -0
  121. package/dist/schemas/validator.js +211 -0
  122. package/dist/schemas/validator.js.map +1 -0
  123. package/package.json +60 -0
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Prompt builder - generates prompts from schemas
3
+ */
4
+ /**
5
+ * Builds a system prompt for extraction
6
+ */
7
+ export function buildSystemPrompt(schema) {
8
+ const { fields, metadata, confidence, prompt } = schema;
9
+ let promptText = `You are a structured data extraction system. Extract information from text according to the schema below.
10
+
11
+ Rules:
12
+ - Return only valid JSON (no markdown, no explanation)
13
+ - Include a confidence score (0-100) for each field
14
+ - Use null for missing or uncertain values
15
+ - Include all fields in the response, even if null
16
+
17
+ Schema:
18
+ `;
19
+ // Add field definitions
20
+ for (const [fieldName, fieldDef] of Object.entries(fields)) {
21
+ promptText += `\n- ${fieldName}: ${fieldDef.type}`;
22
+ if (fieldDef.optional) {
23
+ promptText += ' (optional)';
24
+ }
25
+ if (fieldDef.description) {
26
+ promptText += ` - ${fieldDef.description}`;
27
+ }
28
+ if (fieldDef.enum) {
29
+ promptText += ` - allowed values: ${fieldDef.enum.join(', ')}`;
30
+ }
31
+ if (fieldDef.min !== undefined || fieldDef.max !== undefined) {
32
+ const min = fieldDef.min !== undefined ? fieldDef.min : 'null';
33
+ const max = fieldDef.max !== undefined ? fieldDef.max : 'null';
34
+ promptText += ` - range: ${min} to ${max}`;
35
+ }
36
+ if (fieldDef.pattern) {
37
+ promptText += ` - pattern: ${fieldDef.pattern}`;
38
+ }
39
+ }
40
+ // Add confidence requirements
41
+ if (confidence) {
42
+ promptText += `\n\nConfidence threshold: ${confidence.threshold}% (extractions below this may be rejected)`;
43
+ }
44
+ // Add few-shot examples only if explicitly enabled (default: false)
45
+ // Benchmarks show examples hurt performance for most models
46
+ if (prompt?.includeFewShotExamples === true) {
47
+ promptText += `\n\nExample extraction:
48
+
49
+ Input text: "Invoice #INV-2024-0042 dated December 15, 2024 for $1,250.00 USD"
50
+
51
+ Output:
52
+ {
53
+ "data": {
54
+ "invoice_id": "INV-2024-0042",
55
+ "date": "2024-12-15",
56
+ "amount": 1250.00,
57
+ "currency": "USD"
58
+ },
59
+ "confidence": 95,
60
+ "confidenceByField": {
61
+ "invoice_id": 98,
62
+ "date": 95,
63
+ "amount": 92,
64
+ "currency": 95
65
+ }
66
+ }
67
+
68
+ Note: Use null for missing or uncertain values. Example with missing data:
69
+
70
+ Input text: "Order reference A-123. Contact: john@example.com"
71
+
72
+ Output:
73
+ {
74
+ "data": {
75
+ "order_id": "A-123",
76
+ "email": "john@example.com",
77
+ "phone": null,
78
+ "address": null
79
+ },
80
+ "confidence": 70,
81
+ "confidenceByField": {
82
+ "order_id": 95,
83
+ "email": 98,
84
+ "phone": 0,
85
+ "address": 0
86
+ }
87
+ }`;
88
+ }
89
+ // Add response format reminder
90
+ const exampleFields = Object.keys(fields).slice(0, 2);
91
+ const field1 = exampleFields[0] || 'field1';
92
+ const field2 = exampleFields[1] || 'field2';
93
+ promptText += `\n\nYour response must follow this exact structure:
94
+ {
95
+ "data": { ... all fields from schema ... },
96
+ "confidence": <number 0-100>,
97
+ "confidenceByField": { ... confidence for each field ... }
98
+ }`;
99
+ return promptText;
100
+ }
101
+ /**
102
+ * Builds a user prompt with input text
103
+ */
104
+ export function buildUserPrompt(input) {
105
+ return `Extract data from the following text:\n\n${input}`;
106
+ }
107
+ //# sourceMappingURL=prompt-builder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-builder.js","sourceRoot":"","sources":["../../src/llm/prompt-builder.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAc;IAC5C,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAExD,IAAI,UAAU,GAAG;;;;;;;;;CASpB,CAAC;IAEE,wBAAwB;IACxB,KAAK,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACzD,UAAU,IAAI,OAAO,SAAS,KAAK,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnD,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACpB,UAAU,IAAI,aAAa,CAAC;QAChC,CAAC;QACD,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;YACvB,UAAU,IAAI,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC/C,CAAC;QACD,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;YAChB,UAAU,IAAI,sBAAsB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACnE,CAAC;QACD,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,IAAI,QAAQ,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;YAC3D,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YAC/D,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;YAC/D,UAAU,IAAI,aAAa,GAAG,OAAO,GAAG,EAAE,CAAC;QAC/C,CAAC;QACD,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACnB,UAAU,IAAI,eAAe,QAAQ,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;IACL,CAAC;IAED,8BAA8B;IAC9B,IAAI,UAAU,EAAE,CAAC;QACb,UAAU,IAAI,6BAA6B,UAAU,CAAC,SAAS,4CAA4C,CAAC;IAChH,CAAC;IAED,oEAAoE;IACpE,4DAA4D;IAC5D,IAAI,MAAM,EAAE,sBAAsB,KAAK,IAAI,EAAE,CAAC;QAC1C,UAAU,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwCpB,CAAC;IACC,CAAC;IAED,+BAA+B;IAC/B,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC;IAC5C,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC;IAE5C,UAAU,IAAI;;;;;EAKhB,CAAC;IAEC,OAAO,UAAU,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,KAAa;IACzC,OAAO,4CAA4C,KAAK,EAAE,CAAC;AAC/D,CAAC"}
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Token counting and budget management
3
+ */
4
+ /**
5
+ * Token budget breakdown
6
+ */
7
+ export interface TokenBudget {
8
+ /** Maximum tokens for system prompt */
9
+ system: number;
10
+ /** Maximum tokens for user input */
11
+ input: number;
12
+ /** Reserved tokens for model output */
13
+ output: number;
14
+ }
15
+ /**
16
+ * Token usage information
17
+ */
18
+ export interface TokenUsage {
19
+ /** Estimated system prompt tokens */
20
+ systemTokens: number;
21
+ /** Estimated input tokens */
22
+ inputTokens: number;
23
+ /** Reserved output tokens */
24
+ outputTokens: number;
25
+ /** Total estimated tokens */
26
+ totalTokens: number;
27
+ /** Maximum context tokens allowed */
28
+ maxContextTokens: number;
29
+ /** Percentage of budget used */
30
+ usagePercent: number;
31
+ }
32
+ /**
33
+ * Token counter configuration
34
+ */
35
+ export interface TokenCounterConfig {
36
+ /** Maximum context tokens (default: 4096) */
37
+ maxContextTokens?: number;
38
+ /** Token budget breakdown */
39
+ tokenBudget?: TokenBudget;
40
+ /** Warn when usage exceeds this percentage (default: 90) */
41
+ warnThreshold?: number;
42
+ }
43
+ /**
44
+ * Estimates token count for text using rough approximation
45
+ * Uses ~4 characters per token heuristic (conservative estimate)
46
+ *
47
+ * @param text Text to estimate tokens for
48
+ * @returns Estimated token count
49
+ */
50
+ export declare function estimateTokens(text: string): number;
51
+ /**
52
+ * Token counter for managing LLM context budgets
53
+ */
54
+ export declare class TokenCounter {
55
+ private config;
56
+ constructor(config?: TokenCounterConfig);
57
+ /**
58
+ * Calculate token usage for system prompt and input
59
+ */
60
+ calculateUsage(systemPrompt: string, input: string): TokenUsage;
61
+ /**
62
+ * Check if usage exceeds maximum context
63
+ */
64
+ exceedsLimit(usage: TokenUsage): boolean;
65
+ /**
66
+ * Check if usage exceeds warning threshold
67
+ */
68
+ shouldWarn(usage: TokenUsage): boolean;
69
+ /**
70
+ * Get human-readable usage summary
71
+ */
72
+ formatUsage(usage: TokenUsage): string;
73
+ /**
74
+ * Get error message when limit exceeded
75
+ */
76
+ getErrorMessage(usage: TokenUsage): string;
77
+ /**
78
+ * Get warning message when approaching limit
79
+ */
80
+ getWarningMessage(usage: TokenUsage): string;
81
+ }
82
+ //# sourceMappingURL=token-counter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token-counter.d.ts","sourceRoot":"","sources":["../../src/llm/token-counter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,qCAAqC;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,6BAA6B;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,6BAA6B;IAC7B,WAAW,EAAE,MAAM,CAAC;IACpB,qCAAqC;IACrC,gBAAgB,EAAE,MAAM,CAAC;IACzB,gCAAgC;IAChC,YAAY,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B,6CAA6C;IAC7C,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,4DAA4D;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASnD;AAED;;GAEG;AACH,qBAAa,YAAY;IACrB,OAAO,CAAC,MAAM,CAA+B;gBAEjC,MAAM,GAAE,kBAAuB;IAc3C;;OAEG;IACH,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,UAAU;IAiB/D;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO;IAIxC;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO;IAItC;;OAEG;IACH,WAAW,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM;IAUtC;;OAEG;IACH,eAAe,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM;IAkB1C;;OAEG;IACH,iBAAiB,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM;CAM/C"}
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Token counting and budget management
3
+ */
4
+ /**
5
+ * Estimates token count for text using rough approximation
6
+ * Uses ~4 characters per token heuristic (conservative estimate)
7
+ *
8
+ * @param text Text to estimate tokens for
9
+ * @returns Estimated token count
10
+ */
11
+ export function estimateTokens(text) {
12
+ if (!text || text.length === 0) {
13
+ return 0;
14
+ }
15
+ // Rough approximation: 4 chars per token
16
+ // This is conservative - actual tokenization varies by model
17
+ const charsPerToken = 4;
18
+ return Math.ceil(text.length / charsPerToken);
19
+ }
20
+ /**
21
+ * Token counter for managing LLM context budgets
22
+ */
23
+ export class TokenCounter {
24
+ config;
25
+ constructor(config = {}) {
26
+ const defaultBudget = {
27
+ system: 1000,
28
+ input: 2000,
29
+ output: 1000,
30
+ };
31
+ this.config = {
32
+ maxContextTokens: config.maxContextTokens || 4096,
33
+ tokenBudget: config.tokenBudget || defaultBudget,
34
+ warnThreshold: config.warnThreshold || 90,
35
+ };
36
+ }
37
+ /**
38
+ * Calculate token usage for system prompt and input
39
+ */
40
+ calculateUsage(systemPrompt, input) {
41
+ const systemTokens = estimateTokens(systemPrompt);
42
+ const inputTokens = estimateTokens(input);
43
+ const outputTokens = this.config.tokenBudget.output;
44
+ const totalTokens = systemTokens + inputTokens + outputTokens;
45
+ const usagePercent = (totalTokens / this.config.maxContextTokens) * 100;
46
+ return {
47
+ systemTokens,
48
+ inputTokens,
49
+ outputTokens,
50
+ totalTokens,
51
+ maxContextTokens: this.config.maxContextTokens,
52
+ usagePercent,
53
+ };
54
+ }
55
+ /**
56
+ * Check if usage exceeds maximum context
57
+ */
58
+ exceedsLimit(usage) {
59
+ return usage.totalTokens > usage.maxContextTokens;
60
+ }
61
+ /**
62
+ * Check if usage exceeds warning threshold
63
+ */
64
+ shouldWarn(usage) {
65
+ return usage.usagePercent >= this.config.warnThreshold;
66
+ }
67
+ /**
68
+ * Get human-readable usage summary
69
+ */
70
+ formatUsage(usage) {
71
+ const lines = [
72
+ `Token usage: ${usage.totalTokens}/${usage.maxContextTokens} (${usage.usagePercent.toFixed(1)}%)`,
73
+ ` System prompt: ${usage.systemTokens} tokens`,
74
+ ` Input: ${usage.inputTokens} tokens`,
75
+ ` Reserved for output: ${usage.outputTokens} tokens`,
76
+ ];
77
+ return lines.join('\n');
78
+ }
79
+ /**
80
+ * Get error message when limit exceeded
81
+ */
82
+ getErrorMessage(usage) {
83
+ const overflow = usage.totalTokens - usage.maxContextTokens;
84
+ return [
85
+ `Token budget exceeded: ${usage.totalTokens}/${usage.maxContextTokens} tokens (${overflow} over limit)`,
86
+ ``,
87
+ `Breakdown:`,
88
+ ` System prompt: ${usage.systemTokens} tokens`,
89
+ ` Input: ${usage.inputTokens} tokens`,
90
+ ` Reserved for output: ${usage.outputTokens} tokens`,
91
+ ``,
92
+ `Suggestions:`,
93
+ ` - Reduce input size (currently ${usage.inputTokens} tokens)`,
94
+ ` - Simplify schema to reduce system prompt (currently ${usage.systemTokens} tokens)`,
95
+ ` - Use a model with larger context window`,
96
+ ` - Consider chunking large inputs (future feature)`,
97
+ ].join('\n');
98
+ }
99
+ /**
100
+ * Get warning message when approaching limit
101
+ */
102
+ getWarningMessage(usage) {
103
+ return [
104
+ `⚠ Approaching token budget limit: ${usage.totalTokens}/${usage.maxContextTokens} tokens (${usage.usagePercent.toFixed(1)}%)`,
105
+ ` System: ${usage.systemTokens} | Input: ${usage.inputTokens} | Output: ${usage.outputTokens}`,
106
+ ].join('\n');
107
+ }
108
+ }
109
+ //# sourceMappingURL=token-counter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token-counter.js","sourceRoot":"","sources":["../../src/llm/token-counter.ts"],"names":[],"mappings":"AAAA;;GAEG;AA4CH;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACvC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,CAAC;IACb,CAAC;IAED,yCAAyC;IACzC,6DAA6D;IAC7D,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,YAAY;IACb,MAAM,CAA+B;IAE7C,YAAY,SAA6B,EAAE;QACvC,MAAM,aAAa,GAAgB;YAC/B,MAAM,EAAE,IAAI;YACZ,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;SACf,CAAC;QAEF,IAAI,CAAC,MAAM,GAAG;YACV,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI;YACjD,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,aAAa;YAChD,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;SAC5C,CAAC;IACN,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,YAAoB,EAAE,KAAa;QAC9C,MAAM,YAAY,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAClD,MAAM,WAAW,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC;QACpD,MAAM,WAAW,GAAG,YAAY,GAAG,WAAW,GAAG,YAAY,CAAC;QAC9D,MAAM,YAAY,GAAG,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,GAAG,GAAG,CAAC;QAExE,OAAO;YACH,YAAY;YACZ,WAAW;YACX,YAAY;YACZ,WAAW;YACX,gBAAgB,EAAE,IAAI,CAAC,MAAM,CAAC,gBAAgB;YAC9C,YAAY;SACf,CAAC;IACN,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,KAAiB;QAC1B,OAAO,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,gBAAgB,CAAC;IACtD,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,KAAiB;QACxB,OAAO,KAAK,CAAC,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,KAAiB;QACzB,MAAM,KAAK,GAAa;YACpB,gBAAgB,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,gBAAgB,KAAK,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YACjG,oBAAoB,KAAK,CAAC,YAAY,SAAS;YAC/C,YAAY,KAAK,CAAC,WAAW,SAAS;YACtC,0BAA0B,KAAK,CAAC,YAAY,SAAS;SACxD,CAAC;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,KAAiB;QAC7B,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,gBAAgB,CAAC;QAC5D,OAAO;YACH,0BAA0B,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,gBAAgB,YAAY,QAAQ,cAAc;YACvG,EAAE;YACF,YAAY;YACZ,oBAAoB,KAAK,CAAC,YAAY,SAAS;YAC/C,YAAY,KAAK,CAAC,WAAW,SAAS;YACtC,0BAA0B,KAAK,CAAC,YAAY,SAAS;YACrD,EAAE;YACF,cAAc;YACd,oCAAoC,KAAK,CAAC,WAAW,UAAU;YAC/D,0DAA0D,KAAK,CAAC,YAAY,UAAU;YACtF,4CAA4C;YAC5C,qDAAqD;SACxD,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,KAAiB;QAC/B,OAAO;YACH,qCAAqC,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,gBAAgB,YAAY,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YAC7H,aAAa,KAAK,CAAC,YAAY,aAAa,KAAK,CAAC,WAAW,cAAc,KAAK,CAAC,YAAY,EAAE;SAClG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;CACJ"}
@@ -0,0 +1,96 @@
1
+ /**
2
+ * LLM client type definitions
3
+ */
4
+ import type { Schema } from '../schemas/types.js';
5
+ /**
6
+ * Retry configuration
7
+ */
8
+ export interface RetryConfig {
9
+ maxRetries: number;
10
+ initialDelay: number;
11
+ maxDelay: number;
12
+ backoffFactor: number;
13
+ }
14
+ /**
15
+ * Token budget breakdown
16
+ */
17
+ export interface TokenBudget {
18
+ /** Maximum tokens for system prompt */
19
+ system: number;
20
+ /** Maximum tokens for user input */
21
+ input: number;
22
+ /** Reserved tokens for model output */
23
+ output: number;
24
+ }
25
+ /**
26
+ * Configuration for LLM client
27
+ */
28
+ export interface LLMConfig {
29
+ baseURL: string;
30
+ apiKey?: string;
31
+ model: string;
32
+ temperature?: number;
33
+ maxTokens?: number;
34
+ timeout?: number;
35
+ retries?: RetryConfig;
36
+ /** Maximum context tokens (default: 4096) */
37
+ maxContextTokens?: number;
38
+ /** Token budget breakdown */
39
+ tokenBudget?: TokenBudget;
40
+ /** Warn when token usage exceeds this percentage (default: 90) */
41
+ warnThreshold?: number;
42
+ /** Enable debug logging for token usage */
43
+ debugTokens?: boolean;
44
+ }
45
+ /**
46
+ * Message in chat completion format
47
+ */
48
+ export interface ChatMessage {
49
+ role: 'system' | 'user' | 'assistant';
50
+ content: string;
51
+ }
52
+ /**
53
+ * Request to LLM API
54
+ */
55
+ export interface LLMRequest {
56
+ model: string;
57
+ messages: ChatMessage[];
58
+ temperature?: number;
59
+ max_tokens?: number;
60
+ }
61
+ /**
62
+ * Response from LLM API (OpenAI-compatible)
63
+ */
64
+ export interface LLMResponse {
65
+ id: string;
66
+ object: string;
67
+ created: number;
68
+ model: string;
69
+ choices: Array<{
70
+ index: number;
71
+ message: ChatMessage;
72
+ finish_reason: string;
73
+ }>;
74
+ usage?: {
75
+ prompt_tokens: number;
76
+ completion_tokens: number;
77
+ total_tokens: number;
78
+ };
79
+ }
80
+ /**
81
+ * Extracted data with confidence scores
82
+ */
83
+ export interface ExtractionResponse {
84
+ data: Record<string, unknown>;
85
+ confidence: number;
86
+ confidenceByField: Record<string, number>;
87
+ }
88
+ /**
89
+ * Options for extraction
90
+ */
91
+ export interface ExtractionOptions {
92
+ schema: Schema;
93
+ input: string;
94
+ systemPrompt?: string;
95
+ }
96
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/llm/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAElD;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,uCAAuC;IACvC,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,6CAA6C;IAC7C,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,6BAA6B;IAC7B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kEAAkE;IAClE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,2CAA2C;IAC3C,WAAW,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,WAAW,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;KACzB,CAAC,CAAC;IACH,KAAK,CAAC,EAAE;QACJ,aAAa,EAAE,MAAM,CAAC;QACtB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,YAAY,EAAE,MAAM,CAAC;KACxB,CAAC;CACL;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * LLM client type definitions
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/llm/types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Integration tests for schema system
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=integration.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"integration.test.d.ts","sourceRoot":"","sources":["../../../src/schemas/__tests__/integration.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}