@retab/node 0.0.48 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +8 -215
  2. package/dist/api/client.d.ts +2 -2
  3. package/dist/api/client.d.ts.map +1 -1
  4. package/dist/api/client.js +2 -2
  5. package/dist/api/documents/client.d.ts +3 -3
  6. package/dist/api/documents/client.d.ts.map +1 -1
  7. package/dist/api/documents/client.js +3 -3
  8. package/dist/api/projects/client.d.ts +15 -0
  9. package/dist/api/projects/client.d.ts.map +1 -0
  10. package/dist/api/projects/client.js +43 -0
  11. package/dist/api/projects/documents/client.d.ts +12 -0
  12. package/dist/api/projects/documents/client.d.ts.map +1 -0
  13. package/dist/api/projects/documents/client.js +39 -0
  14. package/dist/api/projects/iterations/client.d.ts +17 -0
  15. package/dist/api/projects/iterations/client.d.ts.map +1 -0
  16. package/dist/api/projects/iterations/client.js +64 -0
  17. package/dist/client.d.ts +1 -0
  18. package/dist/client.d.ts.map +1 -1
  19. package/dist/client.js +6 -1
  20. package/dist/generated_types.d.ts +17837 -40090
  21. package/dist/generated_types.d.ts.map +1 -1
  22. package/dist/generated_types.js +309 -979
  23. package/dist/index.d.ts +7 -2
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +2 -2
  26. package/dist/types.d.ts +188 -80
  27. package/dist/types.d.ts.map +1 -1
  28. package/dist/types.js +22 -1
  29. package/package.json +6 -9
  30. package/dist/api/consensus/client.d.ts +0 -7
  31. package/dist/api/consensus/client.d.ts.map +0 -1
  32. package/dist/api/consensus/client.js +0 -14
  33. package/dist/errors.d.ts +0 -34
  34. package/dist/errors.d.ts.map +0 -1
  35. package/dist/errors.js +0 -53
  36. package/dist/resource.d.ts +0 -12
  37. package/dist/resource.d.ts.map +0 -1
  38. package/dist/resource.js +0 -19
  39. package/dist/resources/consensus/completions.d.ts +0 -66
  40. package/dist/resources/consensus/completions.d.ts.map +0 -1
  41. package/dist/resources/consensus/completions.js +0 -84
  42. package/dist/resources/consensus/index.d.ts +0 -72
  43. package/dist/resources/consensus/index.d.ts.map +0 -1
  44. package/dist/resources/consensus/index.js +0 -76
  45. package/dist/resources/consensus/responses.d.ts +0 -69
  46. package/dist/resources/consensus/responses.d.ts.map +0 -1
  47. package/dist/resources/consensus/responses.js +0 -99
  48. package/dist/resources/documents/extractions.d.ts +0 -74
  49. package/dist/resources/documents/extractions.d.ts.map +0 -1
  50. package/dist/resources/documents/extractions.js +0 -196
  51. package/dist/resources/documents/index.d.ts +0 -21
  52. package/dist/resources/documents/index.d.ts.map +0 -1
  53. package/dist/resources/documents/index.js +0 -55
  54. package/dist/resources/evaluations/documents.d.ts +0 -40
  55. package/dist/resources/evaluations/documents.d.ts.map +0 -1
  56. package/dist/resources/evaluations/documents.js +0 -123
  57. package/dist/resources/evaluations/index.d.ts +0 -14
  58. package/dist/resources/evaluations/index.d.ts.map +0 -1
  59. package/dist/resources/evaluations/index.js +0 -17
  60. package/dist/resources/evaluations/iterations.d.ts +0 -50
  61. package/dist/resources/evaluations/iterations.d.ts.map +0 -1
  62. package/dist/resources/evaluations/iterations.js +0 -156
  63. package/dist/resources/files.d.ts +0 -82
  64. package/dist/resources/files.d.ts.map +0 -1
  65. package/dist/resources/files.js +0 -150
  66. package/dist/resources/finetuning.d.ts +0 -105
  67. package/dist/resources/finetuning.d.ts.map +0 -1
  68. package/dist/resources/finetuning.js +0 -181
  69. package/dist/resources/index.d.ts +0 -11
  70. package/dist/resources/index.d.ts.map +0 -1
  71. package/dist/resources/index.js +0 -10
  72. package/dist/resources/models.d.ts +0 -57
  73. package/dist/resources/models.d.ts.map +0 -1
  74. package/dist/resources/models.js +0 -72
  75. package/dist/resources/processors/automations/endpoints.d.ts +0 -90
  76. package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
  77. package/dist/resources/processors/automations/endpoints.js +0 -145
  78. package/dist/resources/processors/automations/index.d.ts +0 -7
  79. package/dist/resources/processors/automations/index.d.ts.map +0 -1
  80. package/dist/resources/processors/automations/index.js +0 -6
  81. package/dist/resources/processors/automations/links.d.ts +0 -90
  82. package/dist/resources/processors/automations/links.d.ts.map +0 -1
  83. package/dist/resources/processors/automations/links.js +0 -149
  84. package/dist/resources/processors/automations/logs.d.ts +0 -35
  85. package/dist/resources/processors/automations/logs.d.ts.map +0 -1
  86. package/dist/resources/processors/automations/logs.js +0 -60
  87. package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
  88. package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
  89. package/dist/resources/processors/automations/mailboxes.js +0 -157
  90. package/dist/resources/processors/automations/outlook.d.ts +0 -114
  91. package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
  92. package/dist/resources/processors/automations/outlook.js +0 -170
  93. package/dist/resources/processors/automations/tests.d.ts +0 -58
  94. package/dist/resources/processors/automations/tests.d.ts.map +0 -1
  95. package/dist/resources/processors/automations/tests.js +0 -90
  96. package/dist/resources/processors/index.d.ts +0 -303
  97. package/dist/resources/processors/index.d.ts.map +0 -1
  98. package/dist/resources/processors/index.js +0 -261
  99. package/dist/resources/schemas.d.ts +0 -63
  100. package/dist/resources/schemas.d.ts.map +0 -1
  101. package/dist/resources/schemas.js +0 -183
  102. package/dist/resources/secrets/external_api_keys.d.ts +0 -61
  103. package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
  104. package/dist/resources/secrets/external_api_keys.js +0 -120
  105. package/dist/resources/secrets/index.d.ts +0 -14
  106. package/dist/resources/secrets/index.d.ts.map +0 -1
  107. package/dist/resources/secrets/index.js +0 -17
  108. package/dist/resources/secrets/webhooks.d.ts +0 -73
  109. package/dist/resources/secrets/webhooks.d.ts.map +0 -1
  110. package/dist/resources/secrets/webhooks.js +0 -145
  111. package/dist/resources/usage.d.ts +0 -223
  112. package/dist/resources/usage.d.ts.map +0 -1
  113. package/dist/resources/usage.js +0 -310
  114. package/dist/types/ai_models.d.ts +0 -389
  115. package/dist/types/ai_models.d.ts.map +0 -1
  116. package/dist/types/ai_models.js +0 -145
  117. package/dist/types/automations/cron.d.ts +0 -28
  118. package/dist/types/automations/cron.d.ts.map +0 -1
  119. package/dist/types/automations/cron.js +0 -1
  120. package/dist/types/automations/endpoints.d.ts +0 -13
  121. package/dist/types/automations/endpoints.d.ts.map +0 -1
  122. package/dist/types/automations/endpoints.js +0 -1
  123. package/dist/types/automations/index.d.ts +0 -7
  124. package/dist/types/automations/index.d.ts.map +0 -1
  125. package/dist/types/automations/index.js +0 -6
  126. package/dist/types/automations/links.d.ts +0 -15
  127. package/dist/types/automations/links.d.ts.map +0 -1
  128. package/dist/types/automations/links.js +0 -1
  129. package/dist/types/automations/mailboxes.d.ts +0 -18
  130. package/dist/types/automations/mailboxes.d.ts.map +0 -1
  131. package/dist/types/automations/mailboxes.js +0 -1
  132. package/dist/types/automations/outlook.d.ts +0 -37
  133. package/dist/types/automations/outlook.d.ts.map +0 -1
  134. package/dist/types/automations/outlook.js +0 -1
  135. package/dist/types/automations/webhooks.d.ts +0 -13
  136. package/dist/types/automations/webhooks.d.ts.map +0 -1
  137. package/dist/types/automations/webhooks.js +0 -1
  138. package/dist/types/browser_canvas.d.ts +0 -4
  139. package/dist/types/browser_canvas.d.ts.map +0 -1
  140. package/dist/types/browser_canvas.js +0 -2
  141. package/dist/types/chat.d.ts +0 -99
  142. package/dist/types/chat.d.ts.map +0 -1
  143. package/dist/types/chat.js +0 -20
  144. package/dist/types/consensus.d.ts +0 -10
  145. package/dist/types/consensus.d.ts.map +0 -1
  146. package/dist/types/consensus.js +0 -1
  147. package/dist/types/db/annotations.d.ts +0 -108
  148. package/dist/types/db/annotations.d.ts.map +0 -1
  149. package/dist/types/db/annotations.js +0 -6
  150. package/dist/types/db/files.d.ts +0 -133
  151. package/dist/types/db/files.d.ts.map +0 -1
  152. package/dist/types/db/files.js +0 -5
  153. package/dist/types/documents/extractions.d.ts +0 -1849
  154. package/dist/types/documents/extractions.d.ts.map +0 -1
  155. package/dist/types/documents/extractions.js +0 -211
  156. package/dist/types/documents/processing.d.ts +0 -249
  157. package/dist/types/documents/processing.d.ts.map +0 -1
  158. package/dist/types/documents/processing.js +0 -6
  159. package/dist/types/evaluations/iterations.d.ts +0 -41
  160. package/dist/types/evaluations/iterations.d.ts.map +0 -1
  161. package/dist/types/evaluations/iterations.js +0 -1
  162. package/dist/types/jobs/base.d.ts +0 -162
  163. package/dist/types/jobs/base.d.ts.map +0 -1
  164. package/dist/types/jobs/base.js +0 -6
  165. package/dist/types/jobs/specialized.d.ts +0 -200
  166. package/dist/types/jobs/specialized.d.ts.map +0 -1
  167. package/dist/types/jobs/specialized.js +0 -37
  168. package/dist/types/logs.d.ts +0 -92
  169. package/dist/types/logs.d.ts.map +0 -1
  170. package/dist/types/logs.js +0 -1
  171. package/dist/types/mime.d.ts +0 -426
  172. package/dist/types/mime.d.ts.map +0 -1
  173. package/dist/types/mime.js +0 -48
  174. package/dist/types/modalities.d.ts +0 -31
  175. package/dist/types/modalities.d.ts.map +0 -1
  176. package/dist/types/modalities.js +0 -109
  177. package/dist/types/pagination.d.ts +0 -5
  178. package/dist/types/pagination.d.ts.map +0 -1
  179. package/dist/types/pagination.js +0 -1
  180. package/dist/types/schemas/enhancement.d.ts +0 -250
  181. package/dist/types/schemas/enhancement.d.ts.map +0 -1
  182. package/dist/types/schemas/enhancement.js +0 -6
  183. package/dist/types/schemas/generate.d.ts +0 -160
  184. package/dist/types/schemas/generate.d.ts.map +0 -1
  185. package/dist/types/schemas/generate.js +0 -19
  186. package/dist/types/schemas/object.d.ts +0 -116
  187. package/dist/types/schemas/object.d.ts.map +0 -1
  188. package/dist/types/schemas/object.js +0 -861
  189. package/dist/types/secrets/external_api_keys.d.ts +0 -27
  190. package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
  191. package/dist/types/secrets/external_api_keys.js +0 -11
  192. package/dist/types/secrets/index.d.ts +0 -2
  193. package/dist/types/secrets/index.d.ts.map +0 -1
  194. package/dist/types/secrets/index.js +0 -1
  195. package/dist/types/standards.d.ts +0 -37
  196. package/dist/types/standards.d.ts.map +0 -1
  197. package/dist/types/standards.js +0 -1
  198. package/dist/utils/ai_models.d.ts +0 -10
  199. package/dist/utils/ai_models.d.ts.map +0 -1
  200. package/dist/utils/ai_models.js +0 -183
  201. package/dist/utils/batch_processing.d.ts +0 -227
  202. package/dist/utils/batch_processing.d.ts.map +0 -1
  203. package/dist/utils/batch_processing.js +0 -268
  204. package/dist/utils/benchmarking.d.ts +0 -115
  205. package/dist/utils/benchmarking.d.ts.map +0 -1
  206. package/dist/utils/benchmarking.js +0 -355
  207. package/dist/utils/chat.d.ts +0 -70
  208. package/dist/utils/chat.d.ts.map +0 -1
  209. package/dist/utils/chat.js +0 -79
  210. package/dist/utils/cost_calculation.d.ts +0 -26
  211. package/dist/utils/cost_calculation.d.ts.map +0 -1
  212. package/dist/utils/cost_calculation.js +0 -99
  213. package/dist/utils/datasets.d.ts +0 -135
  214. package/dist/utils/datasets.d.ts.map +0 -1
  215. package/dist/utils/datasets.js +0 -359
  216. package/dist/utils/display.d.ts +0 -108
  217. package/dist/utils/display.d.ts.map +0 -1
  218. package/dist/utils/display.js +0 -244
  219. package/dist/utils/hash.d.ts +0 -18
  220. package/dist/utils/hash.d.ts.map +0 -1
  221. package/dist/utils/hash.js +0 -31
  222. package/dist/utils/hashing.d.ts +0 -18
  223. package/dist/utils/hashing.d.ts.map +0 -1
  224. package/dist/utils/hashing.js +0 -28
  225. package/dist/utils/index.d.ts +0 -8
  226. package/dist/utils/index.d.ts.map +0 -1
  227. package/dist/utils/index.js +0 -10
  228. package/dist/utils/json_schema.d.ts +0 -18
  229. package/dist/utils/json_schema.d.ts.map +0 -1
  230. package/dist/utils/json_schema.js +0 -334
  231. package/dist/utils/json_schema_utils.d.ts +0 -42
  232. package/dist/utils/json_schema_utils.d.ts.map +0 -1
  233. package/dist/utils/json_schema_utils.js +0 -212
  234. package/dist/utils/jsonl.d.ts +0 -60
  235. package/dist/utils/jsonl.d.ts.map +0 -1
  236. package/dist/utils/jsonl.js +0 -259
  237. package/dist/utils/mime.d.ts +0 -6
  238. package/dist/utils/mime.d.ts.map +0 -1
  239. package/dist/utils/mime.js +0 -129
  240. package/dist/utils/model_cards.d.ts +0 -219
  241. package/dist/utils/model_cards.d.ts.map +0 -1
  242. package/dist/utils/model_cards.js +0 -462
  243. package/dist/utils/prompt_optimization.d.ts +0 -96
  244. package/dist/utils/prompt_optimization.d.ts.map +0 -1
  245. package/dist/utils/prompt_optimization.js +0 -275
  246. package/dist/utils/responses.d.ts +0 -35
  247. package/dist/utils/responses.d.ts.map +0 -1
  248. package/dist/utils/responses.js +0 -37
  249. package/dist/utils/stream.d.ts +0 -13
  250. package/dist/utils/stream.d.ts.map +0 -1
  251. package/dist/utils/stream.js +0 -64
  252. package/dist/utils/stream_context_managers.d.ts +0 -147
  253. package/dist/utils/stream_context_managers.d.ts.map +0 -1
  254. package/dist/utils/stream_context_managers.js +0 -380
  255. package/dist/utils/usage.d.ts +0 -57
  256. package/dist/utils/usage.d.ts.map +0 -1
  257. package/dist/utils/usage.js +0 -97
  258. package/dist/utils/webhook_secrets.d.ts +0 -59
  259. package/dist/utils/webhook_secrets.d.ts.map +0 -1
  260. package/dist/utils/webhook_secrets.js +0 -107
  261. package/dist/utils/zod_to_json_schema.d.ts +0 -11
  262. package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
  263. package/dist/utils/zod_to_json_schema.js +0 -123
@@ -1,861 +0,0 @@
1
- import { z } from 'zod';
2
- import { generateSchemaDataId, generateSchemaId, loadJsonSchema } from '../../utils/json_schema_utils.js';
3
- import { zodToJsonSchema } from '../../utils/zod_to_json_schema.js';
4
- export const PartialSchemaSchema = z.object({
5
- object: z.literal('schema'),
6
- created_at: z.string().datetime(),
7
- json_schema: z.record(z.any()).default({}),
8
- strict: z.boolean().default(true),
9
- });
10
- export const PartialSchemaChunkSchema = z.object({
11
- object: z.literal('schema.chunk'),
12
- created_at: z.string().datetime(),
13
- delta_json_schema_flat: z.record(z.any()).default({}),
14
- streaming_error: z.custom().nullable().optional(),
15
- });
16
- export const SchemaSchema = PartialSchemaSchema.extend({}).transform((data) => new Schema(data));
17
- export class Schema {
18
- constructor(data) {
19
- this.object = 'schema';
20
- this.json_schema = {};
21
- this.strict = true;
22
- this.created_at = new Date().toISOString();
23
- // Validate input like Python version
24
- if (data.json_schema && data.pydanticModel) {
25
- throw new Error('Cannot provide both json_schema and pydanticModel');
26
- }
27
- if (!data.json_schema && !data.pydanticModel && !data.zod_model) {
28
- throw new Error('Must provide either json_schema, pydanticModel, or zod_model');
29
- }
30
- if (data.json_schema) {
31
- this.json_schema = loadJsonSchema(data.json_schema);
32
- }
33
- else if (data.pydanticModel) {
34
- // For pydantic models, we expect the model to have a model_json_schema() method
35
- // In Node.js context, this would be a pre-serialized schema from Python
36
- if (typeof data.pydanticModel === 'object' && data.pydanticModel.model_json_schema) {
37
- this.json_schema = data.pydanticModel.model_json_schema();
38
- }
39
- else if (typeof data.pydanticModel === 'object' && data.pydanticModel.schema) {
40
- this.json_schema = data.pydanticModel.schema;
41
- }
42
- else {
43
- throw new Error('pydanticModel must have a model_json_schema() method or schema property');
44
- }
45
- }
46
- else if (data.zod_model) {
47
- this._zodModel = data.zod_model;
48
- // Convert Zod to JSON Schema using proper converter
49
- this.json_schema = zodToJsonSchema(data.zod_model);
50
- // Add system prompt if provided
51
- if (data.system_prompt) {
52
- this.json_schema['X-SystemPrompt'] = data.system_prompt;
53
- }
54
- // Add reasoning prompts if provided
55
- if (data.reasoning_prompts) {
56
- for (const [field, prompt] of Object.entries(data.reasoning_prompts)) {
57
- if (this.json_schema.properties && this.json_schema.properties[field]) {
58
- this.json_schema.properties[field]['X-ReasoningPrompt'] = prompt;
59
- }
60
- }
61
- }
62
- }
63
- }
64
- get dataId() {
65
- return generateSchemaDataId(this.json_schema);
66
- }
67
- get id() {
68
- return generateSchemaId(this.json_schema);
69
- }
70
- get inference_json_schema() {
71
- // Returns the schema formatted for structured output with OpenAI requirements
72
- if (this.strict) {
73
- // For strict schemas, convert to OpenAI-compatible format
74
- const inferenceSchema = this.jsonSchemaToStrictOpenaiSchema(JSON.parse(JSON.stringify(this._reasoningObjectSchema)));
75
- if (typeof inferenceSchema !== 'object' || inferenceSchema === null) {
76
- throw new Error('Validation Error: The inference_json_schema is not a dict');
77
- }
78
- return inferenceSchema;
79
- }
80
- else {
81
- // For non-strict schemas, return a deep copy of the reasoning schema without strict modifications
82
- return JSON.parse(JSON.stringify(this._reasoningObjectSchema));
83
- }
84
- }
85
- get inferenceJsonSchema() {
86
- // Alias for backwards compatibility
87
- return this.inference_json_schema;
88
- }
89
- get openaiMessages() {
90
- // Returns messages formatted for OpenAI's API
91
- return this.messages.map(msg => ({
92
- role: msg.role,
93
- content: msg.content
94
- }));
95
- }
96
- get anthropicSystemPrompt() {
97
- return 'Return your response as a JSON object following the provided schema.' + this.systemPrompt;
98
- }
99
- get anthropicMessages() {
100
- // Returns messages in Anthropic's Claude format
101
- return this.messages.slice(1); // Skip system message
102
- }
103
- get geminiSystemPrompt() {
104
- return this.systemPrompt;
105
- }
106
- get geminiMessages() {
107
- // Returns messages formatted for Google's Gemini API
108
- return this.messages.slice(1);
109
- }
110
- get inferenceGeminiJsonSchema() {
111
- // Convert schema for Gemini compatibility (no anyOf, etc.)
112
- const schema = { ...this._reasoningObjectSchema };
113
- // Remove unsupported Gemini fields recursively
114
- const removeUnsupportedFields = (obj) => {
115
- if (typeof obj !== 'object' || obj === null)
116
- return obj;
117
- if (Array.isArray(obj)) {
118
- return obj.map(removeUnsupportedFields);
119
- }
120
- const result = { ...obj };
121
- // Remove Gemini-unsupported fields
122
- delete result.anyOf;
123
- delete result.oneOf;
124
- delete result.allOf;
125
- delete result.not;
126
- delete result.if;
127
- delete result.then;
128
- delete result.else;
129
- // Recursively process nested objects
130
- Object.keys(result).forEach(key => {
131
- if (typeof result[key] === 'object' && result[key] !== null) {
132
- result[key] = removeUnsupportedFields(result[key]);
133
- }
134
- });
135
- return result;
136
- };
137
- return removeUnsupportedFields(schema);
138
- }
139
- get inferenceTypescriptInterface() {
140
- // Returns TypeScript interface representation of the inference schema
141
- return this.jsonSchemaToTypescriptInterface(this._reasoningObjectSchema);
142
- }
143
- get inferenceNlpDataStructure() {
144
- // Returns NLP data structure representation of the inference schema
145
- return this.jsonSchemaToNlpDataStructure(this._reasoningObjectSchema);
146
- }
147
- get developerSystemPrompt() {
148
- return `
149
- # General Instructions
150
-
151
- You are an expert in data extraction and structured data outputs.
152
-
153
- When provided with a **JSON schema** and a **document**, you must:
154
-
155
- 1. Carefully extract all relevant data from the provided document according to the given schema.
156
- 2. Return extracted data strictly formatted according to the provided schema.
157
- 3. Make sure that the extracted values are **UTF-8** encodable strings.
158
- 4. Avoid generating bytes, binary data, base64 encoded data, or other non-UTF-8 encodable data.
159
-
160
- ---
161
-
162
- ## Date and Time Formatting
163
-
164
- When extracting date, time, or datetime values:
165
-
166
- - **Always use ISO format** for dates and times (e.g., "2023-12-25", "14:30:00", "2023-12-25T14:30:00Z")
167
- - **Include timezone information** when available (e.g., "2023-12-25T14:30:00+02:00")
168
- - **Use UTC timezone** when timezone is not specified or unclear (e.g., "2023-12-25T14:30:00Z")
169
- - **Maintain precision** as found in the source document (seconds, milliseconds if present)
170
-
171
- ---
172
-
173
- ## Handling Missing and Nullable Fields
174
-
175
- ### Nullable Leaf Attributes
176
-
177
- - If valid data is missing or not explicitly present, set leaf attributes explicitly to \`null\`.
178
- - **Do NOT** use empty strings (\`""\`), placeholder values, or fabricated data.
179
-
180
- ### Nullable Nested Objects
181
-
182
- - If an entire nested object's data is missing or incomplete, **do NOT** set the object itself to \`null\`.
183
- - Keep the object structure fully intact, explicitly setting each leaf attribute within to \`null\`.
184
- - This preserves overall structure and explicitly communicates exactly which fields lack data.
185
-
186
- ---
187
-
188
- ## Reasoning Fields
189
-
190
- Your schema includes special reasoning fields (\`reasoning___*\`) used exclusively to document your extraction logic. These fields are for detailed explanations and will not appear in final outputs.
191
-
192
- You MUST include these details explicitly in your reasoning fields:
193
-
194
- - **Explicit Evidence**: Quote specific lines or phrases from the document confirming your extraction.
195
- - **Decision Justification**: Clearly justify why specific data was chosen or rejected.
196
- - **Calculations/Transformations**: Document explicitly any computations, unit conversions, or normalizations.
197
- - **Alternative Interpretations**: Explicitly describe any alternative data interpretations considered and why you rejected them.
198
- - **Confidence and Assumptions**: Clearly state your confidence level and explicitly articulate any assumptions.
199
-
200
- ---
201
-
202
- ## Source Fields
203
-
204
- Some leaf fields require you to explicitly provide the source of the data (verbatim from the document).
205
- The idea is to simply provide a verbatim quote from the document, without any additional formatting or commentary, keeping it as close as possible to the original text.
206
- Make sure to reasonably include some surrounding text to provide context about the quote.
207
-
208
- You can easily identify the fields that require a source by the \`quote___[attributename]\` naming pattern.
209
-
210
- ---
211
-
212
- # User Defined System Prompt
213
-
214
- `;
215
- }
216
- get userSystemPrompt() {
217
- return this.json_schema['X-SystemPrompt'] || '';
218
- }
219
- get schemaSystemPrompt() {
220
- return (this.inferenceNlpDataStructure +
221
- '\n---\n' +
222
- '## Expected output schema as a TypeScript interface for better readability:\n\n' +
223
- this.inferenceTypescriptInterface);
224
- }
225
- get systemPrompt() {
226
- return this.developerSystemPrompt + '\n\n' + this.userSystemPrompt + '\n\n' + this.schemaSystemPrompt;
227
- }
228
- get title() {
229
- return this.json_schema.title || 'NoTitle';
230
- }
231
- get _expandedObjectSchema() {
232
- // Returns schema with all references expanded inline
233
- return this.expandRefs(JSON.parse(JSON.stringify(this.json_schema)));
234
- }
235
- get _reasoningObjectSchema() {
236
- // Returns schema with inference-specific modifications (reasoning fields added)
237
- return this.createReasoningSchema(JSON.parse(JSON.stringify(this._expandedObjectSchema)));
238
- }
239
- get messages() {
240
- return [{ role: 'developer', content: this.systemPrompt }];
241
- }
242
- get openai_messages() {
243
- return [{ role: 'developer', content: this.systemPrompt }];
244
- }
245
- get zod_model() {
246
- if (this._zodModel) {
247
- return this._zodModel;
248
- }
249
- // Convert JSON schema to basic Zod schema for validation
250
- return z.object({}).passthrough();
251
- }
252
- getPatternAttribute(pattern, attribute) {
253
- return this._getPatternAttribute(pattern, attribute);
254
- }
255
- setPatternAttribute(pattern, attribute, value) {
256
- this._setPatternAttribute(pattern, attribute, value);
257
- }
258
- save(path) {
259
- // Save JSON schema to file
260
- try {
261
- const fs = require('fs');
262
- fs.writeFileSync(path, JSON.stringify(this.json_schema, null, 2), 'utf8');
263
- }
264
- catch (error) {
265
- if (error instanceof Error && error.message.includes('Cannot find module')) {
266
- throw new Error('save() method not available in browser environment');
267
- }
268
- throw error;
269
- }
270
- }
271
- static validate(data) {
272
- return new Schema(data);
273
- }
274
- createReasoningSchema(schema) {
275
- // Add reasoning fields to the schema structure, matching Python implementation
276
- const processedSchema = this.insertReasoningFieldsInner(JSON.parse(JSON.stringify(schema)));
277
- // Add root reasoning if schema has X-ReasoningPrompt
278
- const rootReasoning = processedSchema.rootReasoning;
279
- if (rootReasoning && processedSchema.updatedSchema.type === 'object') {
280
- if (!processedSchema.updatedSchema.properties) {
281
- processedSchema.updatedSchema.properties = {};
282
- }
283
- // Add reasoning___root field
284
- processedSchema.updatedSchema.properties.reasoning___root = {
285
- type: 'string',
286
- description: rootReasoning
287
- };
288
- // Add to required fields if needed
289
- if (processedSchema.updatedSchema.required) {
290
- processedSchema.updatedSchema.required.push('reasoning___root');
291
- }
292
- }
293
- // Clean custom fields like Python implementation
294
- return this.cleanSchema(processedSchema.updatedSchema, { removeCustomFields: true });
295
- }
296
- insertReasoningFieldsInner(schema) {
297
- // Extract X-ReasoningPrompt from this node
298
- const reasoningDesc = schema['X-ReasoningPrompt'] || null;
299
- delete schema['X-ReasoningPrompt'];
300
- const nodeType = schema.type;
301
- // Process children recursively
302
- if (nodeType === 'object' && schema.properties) {
303
- const newProps = {};
304
- for (const [propertyKey, propertyValue] of Object.entries(schema.properties)) {
305
- const { updatedSchema: updatedProp, rootReasoning: childReasoning } = this.insertReasoningFieldsInner(propertyValue);
306
- newProps[propertyKey] = updatedProp;
307
- // ALWAYS add reasoning field for every property (Python behavior)
308
- const reasoningDescription = childReasoning || `Reasoning for ${propertyKey}`;
309
- newProps[`reasoning___${propertyKey}`] = {
310
- type: 'string',
311
- description: reasoningDescription
312
- };
313
- // Add to required if property is required
314
- if (schema.required && schema.required.includes(propertyKey)) {
315
- if (!schema.required.includes(`reasoning___${propertyKey}`)) {
316
- schema.required.push(`reasoning___${propertyKey}`);
317
- }
318
- }
319
- }
320
- schema.properties = newProps;
321
- }
322
- else if (nodeType === 'array' && schema.items) {
323
- // Process array items
324
- const { updatedSchema: updatedItems, rootReasoning: itemReasoning } = this.insertReasoningFieldsInner(schema.items);
325
- schema.items = updatedItems;
326
- // Always add reasoning___item if items are objects (Python behavior)
327
- if (updatedItems.type === 'object') {
328
- if (!updatedItems.properties) {
329
- updatedItems.properties = {};
330
- }
331
- // Add reasoning___item as first property
332
- const reasoningKey = 'reasoning___item';
333
- const reasoningDescription = itemReasoning || 'Reasoning for this item';
334
- const newProperties = {
335
- [reasoningKey]: {
336
- type: 'string',
337
- description: reasoningDescription
338
- }
339
- };
340
- // Add existing properties
341
- Object.assign(newProperties, updatedItems.properties);
342
- updatedItems.properties = newProperties;
343
- }
344
- }
345
- return {
346
- updatedSchema: schema,
347
- rootReasoning: reasoningDesc
348
- };
349
- }
350
- cleanSchema(schema, options = {}) {
351
- const { removeCustomFields = false } = options;
352
- function cleanObject(obj) {
353
- if (typeof obj !== 'object' || obj === null)
354
- return obj;
355
- if (Array.isArray(obj))
356
- return obj.map(cleanObject);
357
- const result = {};
358
- for (const [key, value] of Object.entries(obj)) {
359
- // Remove custom fields if requested
360
- if (removeCustomFields && key.startsWith('X-')) {
361
- continue;
362
- }
363
- result[key] = cleanObject(value);
364
- }
365
- return result;
366
- }
367
- return cleanObject(schema);
368
- }
369
- jsonSchemaToStrictOpenaiSchema(schema) {
370
- // Convert schema to OpenAI strict format, matching Python implementation exactly
371
- function makeStrict(obj) {
372
- if (typeof obj !== 'object' || obj === null)
373
- return obj;
374
- if (Array.isArray(obj))
375
- return obj.map(makeStrict);
376
- const result = { ...obj };
377
- // Remove unsupported fields (matching Python implementation)
378
- for (const key of ['default', 'format', 'X-FieldTranslation', 'X-EnumTranslation']) {
379
- delete result[key];
380
- }
381
- // Convert integer to number (Python requirement)
382
- if (result.type === 'integer') {
383
- result.type = 'number';
384
- }
385
- else if (Array.isArray(result.type)) {
386
- result.type = result.type.map((t) => t === 'integer' ? 'number' : t);
387
- }
388
- // Handle allOf (merge all schemas)
389
- if (result.allOf) {
390
- const subschemas = result.allOf;
391
- delete result.allOf;
392
- const merged = {};
393
- for (const subschema of subschemas) {
394
- if (subschema.$ref) {
395
- merged.$ref = subschema.$ref;
396
- }
397
- else {
398
- Object.assign(merged, makeStrict(subschema));
399
- }
400
- }
401
- Object.assign(result, merged);
402
- }
403
- // Handle anyOf
404
- if (result.anyOf) {
405
- result.anyOf = result.anyOf.map(makeStrict);
406
- }
407
- // Handle enum (force to string)
408
- if (result.enum) {
409
- result.enum = result.enum.map((e) => String(e));
410
- result.type = 'string';
411
- }
412
- // Handle object type - make all properties required and set additionalProperties: false
413
- if (result.type === 'object' && result.properties) {
414
- result.required = Object.keys(result.properties); // All properties required in strict mode
415
- result.additionalProperties = false;
416
- const newProperties = {};
417
- for (const [key, prop] of Object.entries(result.properties)) {
418
- newProperties[key] = makeStrict(prop);
419
- }
420
- result.properties = newProperties;
421
- }
422
- // Handle array items
423
- if (result.type === 'array' && result.items) {
424
- result.items = makeStrict(result.items);
425
- }
426
- // Handle $defs
427
- if (result.$defs) {
428
- const newDefs = {};
429
- for (const [key, def] of Object.entries(result.$defs)) {
430
- newDefs[key] = makeStrict(def);
431
- }
432
- result.$defs = newDefs;
433
- }
434
- return result;
435
- }
436
- return makeStrict(schema);
437
- }
438
- expandRefs(schema) {
439
- // Check for cyclic references first
440
- if (this.hasCyclicRefs(schema)) {
441
- // Cyclic references detected, keeping schema unchanged
442
- return schema;
443
- }
444
- const definitions = schema.$defs ? { ...schema.$defs } : {};
445
- delete schema.$defs; // Remove $defs from the schema copy
446
- // Handle allOf at root level - merge all schemas
447
- if (schema.allOf) {
448
- const merged = this.mergeAllOfSchemas(schema.allOf);
449
- // Merge the allOf result into the current schema
450
- Object.assign(schema, merged);
451
- delete schema.allOf;
452
- }
453
- return this.expandRefsRecursive(schema, definitions);
454
- }
455
- hasCyclicRefs(schema) {
456
- const definitions = schema.$defs || {};
457
- if (!definitions || Object.keys(definitions).length === 0) {
458
- return false;
459
- }
460
- const memo = {};
461
- const dfs = (defName, stack) => {
462
- if (stack.has(defName)) {
463
- return true; // Cycle detected
464
- }
465
- if (defName in memo) {
466
- return memo[defName];
467
- }
468
- stack.add(defName);
469
- const node = definitions[defName];
470
- if (!node) {
471
- stack.delete(defName);
472
- memo[defName] = false;
473
- return false;
474
- }
475
- const result = this.traverseForCycles(node, stack, definitions);
476
- stack.delete(defName);
477
- memo[defName] = result;
478
- return result;
479
- };
480
- // Check each definition for cycles
481
- for (const defName of Object.keys(definitions)) {
482
- if (dfs(defName, new Set())) {
483
- return true;
484
- }
485
- }
486
- return false;
487
- }
488
- traverseForCycles(node, stack, definitions) {
489
- if (typeof node !== 'object' || node === null) {
490
- return false;
491
- }
492
- if (Array.isArray(node)) {
493
- return node.some(item => this.traverseForCycles(item, stack, definitions));
494
- }
495
- // Check for $ref
496
- if (node.$ref) {
497
- const refPath = node.$ref;
498
- if (refPath.startsWith('#/$defs/')) {
499
- const targetDef = refPath.substring('#/$defs/'.length);
500
- if (stack.has(targetDef)) {
501
- return true; // Cycle detected
502
- }
503
- if (definitions[targetDef]) {
504
- const newStack = new Set(stack);
505
- newStack.add(targetDef);
506
- return this.traverseForCycles(definitions[targetDef], newStack, definitions);
507
- }
508
- }
509
- }
510
- // Traverse all properties except $ref
511
- for (const [key, value] of Object.entries(node)) {
512
- if (key === '$ref')
513
- continue;
514
- if (this.traverseForCycles(value, stack, definitions)) {
515
- return true;
516
- }
517
- }
518
- return false;
519
- }
520
- expandRefsRecursive(obj, definitions) {
521
- if (typeof obj !== 'object' || obj === null) {
522
- return obj;
523
- }
524
- if (Array.isArray(obj)) {
525
- return obj.map(item => this.expandRefsRecursive(item, definitions));
526
- }
527
- if (obj.$ref) {
528
- const refPath = obj.$ref;
529
- if (refPath.startsWith('#/$defs/')) {
530
- const defName = refPath.substring('#/$defs/'.length);
531
- if (definitions[defName]) {
532
- const target = definitions[defName];
533
- // Merge descriptions if present
534
- const merged = this.mergeDescriptions(obj, target);
535
- delete merged.$ref;
536
- return this.expandRefsRecursive(merged, definitions);
537
- }
538
- }
539
- return obj;
540
- }
541
- const result = {};
542
- for (const [key, value] of Object.entries(obj)) {
543
- if (key === 'properties' && typeof value === 'object' && value !== null) {
544
- const newProps = {};
545
- for (const [propKey, propValue] of Object.entries(value)) {
546
- newProps[propKey] = this.expandRefsRecursive(propValue, definitions);
547
- }
548
- result[key] = newProps;
549
- }
550
- else if (key === 'items') {
551
- result[key] = this.expandRefsRecursive(value, definitions);
552
- }
553
- else if (key === '$defs' && typeof value === 'object' && value !== null) {
554
- const newDefs = {};
555
- for (const [defKey, defValue] of Object.entries(value)) {
556
- newDefs[defKey] = this.expandRefsRecursive(defValue, definitions);
557
- }
558
- result[key] = newDefs;
559
- }
560
- else {
561
- result[key] = this.expandRefsRecursive(value, definitions);
562
- }
563
- }
564
- return result;
565
- }
566
- mergeDescriptions(source, target) {
567
- const merged = { ...target };
568
- // If source has description and target doesn't, use source's description
569
- if (source.description && !target.description) {
570
- merged.description = source.description;
571
- }
572
- return merged;
573
- }
574
- jsonSchemaToTypescriptInterface(schema) {
575
- // Convert JSON schema to TypeScript interface
576
- function convertType(obj, depth = 0) {
577
- const indent = ' '.repeat(depth);
578
- if (!obj || typeof obj !== 'object') {
579
- return 'any';
580
- }
581
- if (obj.enum) {
582
- return obj.enum.map((e) => typeof e === 'string' ? `"${e}"` : String(e)).join(' | ');
583
- }
584
- if (obj.type === 'string')
585
- return 'string';
586
- if (obj.type === 'number' || obj.type === 'integer')
587
- return 'number';
588
- if (obj.type === 'boolean')
589
- return 'boolean';
590
- if (obj.type === 'null')
591
- return 'null';
592
- if (obj.type === 'array') {
593
- const itemType = obj.items ? convertType(obj.items, depth) : 'any';
594
- return `${itemType}[]`;
595
- }
596
- if (obj.type === 'object' && obj.properties) {
597
- const props = Object.entries(obj.properties)
598
- .map(([key, prop]) => {
599
- const optional = !obj.required?.includes(key) ? '?' : '';
600
- const type = convertType(prop, depth + 1);
601
- const desc = prop.description ? ` // ${prop.description}` : '';
602
- return `${indent} ${key}${optional}: ${type};${desc}`;
603
- })
604
- .join('\n');
605
- return `{\n${props}\n${indent}}`;
606
- }
607
- if (obj.anyOf) {
608
- return obj.anyOf.map((subSchema) => convertType(subSchema, depth)).join(' | ');
609
- }
610
- return 'any';
611
- }
612
- const interfaceName = schema.title || 'Schema';
613
- const interfaceBody = convertType(schema, 0);
614
- return `interface ${interfaceName} ${interfaceBody}`;
615
- }
616
- jsonSchemaToNlpDataStructure(schema) {
617
- // Convert JSON schema to natural language data structure description
618
- function describe(obj, depth = 0) {
619
- const indent = ' '.repeat(depth);
620
- if (!obj || typeof obj !== 'object') {
621
- return 'any value';
622
- }
623
- if (obj.description) {
624
- return obj.description;
625
- }
626
- if (obj.type === 'string')
627
- return 'text string';
628
- if (obj.type === 'number' || obj.type === 'integer')
629
- return 'number';
630
- if (obj.type === 'boolean')
631
- return 'true/false value';
632
- if (obj.type === 'null')
633
- return 'null value';
634
- if (obj.type === 'array') {
635
- const itemDesc = obj.items ? describe(obj.items, depth) : 'any item';
636
- return `array of ${itemDesc}`;
637
- }
638
- if (obj.type === 'object' && obj.properties) {
639
- const props = Object.entries(obj.properties)
640
- .map(([key, prop]) => {
641
- const optional = !obj.required?.includes(key) ? ' (optional)' : '';
642
- const desc = describe(prop, depth + 1);
643
- return `${indent}- ${key}${optional}: ${desc}`;
644
- })
645
- .join('\n');
646
- return `object containing:\n${props}`;
647
- }
648
- if (obj.anyOf) {
649
- return `one of: ${obj.anyOf.map((subSchema) => describe(subSchema, depth)).join(', ')}`;
650
- }
651
- return 'value';
652
- }
653
- return describe(schema, 0);
654
- }
655
- _getPatternAttribute(pattern, attribute) {
656
- // Navigate schema using pattern and return specified attribute
657
- const currentSchema = this._expandedObjectSchema;
658
- // Special case: "*" means the root schema itself
659
- if (pattern.trim() === '*') {
660
- if (attribute === 'X-FieldPrompt') {
661
- return currentSchema[attribute] || currentSchema.description || null;
662
- }
663
- if (attribute === 'type') {
664
- return this.schemaToTsType(currentSchema);
665
- }
666
- return currentSchema[attribute] || null;
667
- }
668
- const parts = pattern.split('.');
669
- let current = currentSchema;
670
- let index = 0;
671
- while (index < parts.length) {
672
- const part = parts[index];
673
- if (part === '*' || /^\d+$/.test(part)) {
674
- // Handle wildcard case for arrays
675
- if (current.items) {
676
- current = current.items;
677
- index++;
678
- }
679
- else {
680
- // Invalid use of "*" for the current schema
681
- return null;
682
- }
683
- }
684
- else if (current.properties && part in current.properties) {
685
- // Handle normal property navigation
686
- current = current.properties[part];
687
- index++;
688
- }
689
- else {
690
- // Cannot navigate further; invalid pattern
691
- return null;
692
- }
693
- }
694
- // At this point, we've navigated to the target node
695
- if (attribute === 'X-FieldPrompt') {
696
- return current[attribute] || current.description || null;
697
- }
698
- else if (attribute === 'type') {
699
- return this.schemaToTsType(current);
700
- }
701
- return current[attribute] || null;
702
- }
703
- _setPatternAttribute(pattern, attribute, value) {
704
- // Navigate schema using pattern and set attribute at target location
705
- let current = this.json_schema;
706
- const definitions = this.json_schema.$defs || {};
707
- const parts = pattern.split('.');
708
- if (pattern.trim() === '*') {
709
- // Special case: "*" means the root schema itself
710
- current[attribute] = value;
711
- return;
712
- }
713
- if (attribute === 'X-SystemPrompt') {
714
- throw new Error('Cannot set the X-SystemPrompt attribute other than at the root schema.');
715
- }
716
- let index = 0;
717
- while (index < parts.length) {
718
- const part = parts[index];
719
- if (part === '*' || /^\d+$/.test(part)) {
720
- // Handle the array case
721
- if (current.items) {
722
- current = current.items;
723
- index++;
724
- }
725
- else {
726
- return; // Invalid pattern for the current schema
727
- }
728
- }
729
- else if (current.properties && part in current.properties) {
730
- // Handle the properties case
731
- current = current.properties[part];
732
- index++;
733
- }
734
- else if (current.$ref) {
735
- // Handle the $ref case
736
- const ref = current.$ref;
737
- if (!ref.startsWith('#/$defs/')) {
738
- return;
739
- }
740
- const refName = ref.substring('#/$defs/'.length);
741
- if (!definitions[refName]) {
742
- return;
743
- }
744
- // Count how many times this ref is used in the entire schema
745
- const refCount = JSON.stringify(this.json_schema).split(`"${ref}"`).length - 1;
746
- if (refCount > 1) {
747
- // Create a unique copy name by appending a number
748
- let copyNum = 1;
749
- let nextCopyName = `${refName}Copy${copyNum}`;
750
- while (definitions[nextCopyName]) {
751
- copyNum++;
752
- nextCopyName = `${refName}Copy${copyNum}`;
753
- }
754
- // Create a copy of the definition
755
- const defCopy = JSON.parse(JSON.stringify(definitions[refName]));
756
- // Change the title and name of the definition
757
- if (defCopy.title) {
758
- defCopy.title = `${defCopy.title} Copy ${copyNum}`;
759
- }
760
- if (defCopy.name) {
761
- defCopy.name = nextCopyName;
762
- }
763
- // Add the new copy to definitions
764
- definitions[nextCopyName] = defCopy;
765
- // Update the reference
766
- current.$ref = `#/$defs/${nextCopyName}`;
767
- current = definitions[nextCopyName];
768
- }
769
- else {
770
- // Reference is used only once; directly navigate to the definition
771
- current = definitions[refName];
772
- }
773
- }
774
- else {
775
- // Cannot navigate further; invalid pattern
776
- return;
777
- }
778
- }
779
- // Once we have navigated to the correct node, set the attribute
780
- current[attribute] = value;
781
- }
782
- schemaToTsType(schema) {
783
- // Convert JSON schema type to TypeScript type representation
784
- if (!schema || typeof schema !== 'object') {
785
- return 'any';
786
- }
787
- if (schema.type === 'string')
788
- return 'string';
789
- if (schema.type === 'number' || schema.type === 'integer')
790
- return 'number';
791
- if (schema.type === 'boolean')
792
- return 'boolean';
793
- if (schema.type === 'null')
794
- return 'null';
795
- if (schema.type === 'array') {
796
- const itemType = schema.items ? this.schemaToTsType(schema.items) : 'any';
797
- return `${itemType}[]`;
798
- }
799
- if (schema.type === 'object') {
800
- if (schema.properties) {
801
- const props = Object.entries(schema.properties)
802
- .map(([key, prop]) => {
803
- const optional = !schema.required?.includes(key) ? '?' : '';
804
- const type = this.schemaToTsType(prop);
805
- return `${key}${optional}: ${type}`;
806
- })
807
- .join('; ');
808
- return `{ ${props} }`;
809
- }
810
- return 'object';
811
- }
812
- if (schema.anyOf) {
813
- return schema.anyOf.map((subSchema) => this.schemaToTsType(subSchema)).join(' | ');
814
- }
815
- if (schema.enum) {
816
- return schema.enum.map((e) => typeof e === 'string' ? `"${e}"` : String(e)).join(' | ');
817
- }
818
- return 'any';
819
- }
820
- mergeAllOfSchemas(allOfSchemas) {
821
- // Merge multiple schemas from allOf into a single schema
822
- const merged = {};
823
- for (const subschema of allOfSchemas) {
824
- if (subschema.$ref) {
825
- // Handle $ref within allOf - this would need to be resolved first
826
- // For now, we'll include the $ref as-is
827
- Object.assign(merged, subschema);
828
- }
829
- else {
830
- // Merge properties, required fields, etc.
831
- if (subschema.type && !merged.type) {
832
- merged.type = subschema.type;
833
- }
834
- if (subschema.properties) {
835
- if (!merged.properties) {
836
- merged.properties = {};
837
- }
838
- Object.assign(merged.properties, subschema.properties);
839
- }
840
- if (subschema.required) {
841
- if (!merged.required) {
842
- merged.required = [];
843
- }
844
- // Merge required arrays, avoiding duplicates
845
- for (const field of subschema.required) {
846
- if (!merged.required.includes(field)) {
847
- merged.required.push(field);
848
- }
849
- }
850
- }
851
- // Copy other schema properties
852
- for (const [key, value] of Object.entries(subschema)) {
853
- if (!['type', 'properties', 'required'].includes(key) && !merged[key]) {
854
- merged[key] = value;
855
- }
856
- }
857
- }
858
- }
859
- return merged;
860
- }
861
- }