@retab/node 0.0.48 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -215
- package/dist/api/client.d.ts +2 -2
- package/dist/api/client.d.ts.map +1 -1
- package/dist/api/client.js +2 -2
- package/dist/api/documents/client.d.ts +3 -3
- package/dist/api/documents/client.d.ts.map +1 -1
- package/dist/api/documents/client.js +3 -3
- package/dist/api/projects/client.d.ts +15 -0
- package/dist/api/projects/client.d.ts.map +1 -0
- package/dist/api/projects/client.js +43 -0
- package/dist/api/projects/documents/client.d.ts +12 -0
- package/dist/api/projects/documents/client.d.ts.map +1 -0
- package/dist/api/projects/documents/client.js +39 -0
- package/dist/api/projects/iterations/client.d.ts +17 -0
- package/dist/api/projects/iterations/client.d.ts.map +1 -0
- package/dist/api/projects/iterations/client.js +64 -0
- package/dist/client.d.ts +1 -0
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +6 -1
- package/dist/generated_types.d.ts +17837 -40090
- package/dist/generated_types.d.ts.map +1 -1
- package/dist/generated_types.js +309 -979
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/types.d.ts +188 -80
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +22 -1
- package/package.json +6 -9
- package/dist/api/consensus/client.d.ts +0 -7
- package/dist/api/consensus/client.d.ts.map +0 -1
- package/dist/api/consensus/client.js +0 -14
- package/dist/errors.d.ts +0 -34
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -53
- package/dist/resource.d.ts +0 -12
- package/dist/resource.d.ts.map +0 -1
- package/dist/resource.js +0 -19
- package/dist/resources/consensus/completions.d.ts +0 -66
- package/dist/resources/consensus/completions.d.ts.map +0 -1
- package/dist/resources/consensus/completions.js +0 -84
- package/dist/resources/consensus/index.d.ts +0 -72
- package/dist/resources/consensus/index.d.ts.map +0 -1
- package/dist/resources/consensus/index.js +0 -76
- package/dist/resources/consensus/responses.d.ts +0 -69
- package/dist/resources/consensus/responses.d.ts.map +0 -1
- package/dist/resources/consensus/responses.js +0 -99
- package/dist/resources/documents/extractions.d.ts +0 -74
- package/dist/resources/documents/extractions.d.ts.map +0 -1
- package/dist/resources/documents/extractions.js +0 -196
- package/dist/resources/documents/index.d.ts +0 -21
- package/dist/resources/documents/index.d.ts.map +0 -1
- package/dist/resources/documents/index.js +0 -55
- package/dist/resources/evaluations/documents.d.ts +0 -40
- package/dist/resources/evaluations/documents.d.ts.map +0 -1
- package/dist/resources/evaluations/documents.js +0 -123
- package/dist/resources/evaluations/index.d.ts +0 -14
- package/dist/resources/evaluations/index.d.ts.map +0 -1
- package/dist/resources/evaluations/index.js +0 -17
- package/dist/resources/evaluations/iterations.d.ts +0 -50
- package/dist/resources/evaluations/iterations.d.ts.map +0 -1
- package/dist/resources/evaluations/iterations.js +0 -156
- package/dist/resources/files.d.ts +0 -82
- package/dist/resources/files.d.ts.map +0 -1
- package/dist/resources/files.js +0 -150
- package/dist/resources/finetuning.d.ts +0 -105
- package/dist/resources/finetuning.d.ts.map +0 -1
- package/dist/resources/finetuning.js +0 -181
- package/dist/resources/index.d.ts +0 -11
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js +0 -10
- package/dist/resources/models.d.ts +0 -57
- package/dist/resources/models.d.ts.map +0 -1
- package/dist/resources/models.js +0 -72
- package/dist/resources/processors/automations/endpoints.d.ts +0 -90
- package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
- package/dist/resources/processors/automations/endpoints.js +0 -145
- package/dist/resources/processors/automations/index.d.ts +0 -7
- package/dist/resources/processors/automations/index.d.ts.map +0 -1
- package/dist/resources/processors/automations/index.js +0 -6
- package/dist/resources/processors/automations/links.d.ts +0 -90
- package/dist/resources/processors/automations/links.d.ts.map +0 -1
- package/dist/resources/processors/automations/links.js +0 -149
- package/dist/resources/processors/automations/logs.d.ts +0 -35
- package/dist/resources/processors/automations/logs.d.ts.map +0 -1
- package/dist/resources/processors/automations/logs.js +0 -60
- package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
- package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
- package/dist/resources/processors/automations/mailboxes.js +0 -157
- package/dist/resources/processors/automations/outlook.d.ts +0 -114
- package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
- package/dist/resources/processors/automations/outlook.js +0 -170
- package/dist/resources/processors/automations/tests.d.ts +0 -58
- package/dist/resources/processors/automations/tests.d.ts.map +0 -1
- package/dist/resources/processors/automations/tests.js +0 -90
- package/dist/resources/processors/index.d.ts +0 -303
- package/dist/resources/processors/index.d.ts.map +0 -1
- package/dist/resources/processors/index.js +0 -261
- package/dist/resources/schemas.d.ts +0 -63
- package/dist/resources/schemas.d.ts.map +0 -1
- package/dist/resources/schemas.js +0 -183
- package/dist/resources/secrets/external_api_keys.d.ts +0 -61
- package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
- package/dist/resources/secrets/external_api_keys.js +0 -120
- package/dist/resources/secrets/index.d.ts +0 -14
- package/dist/resources/secrets/index.d.ts.map +0 -1
- package/dist/resources/secrets/index.js +0 -17
- package/dist/resources/secrets/webhooks.d.ts +0 -73
- package/dist/resources/secrets/webhooks.d.ts.map +0 -1
- package/dist/resources/secrets/webhooks.js +0 -145
- package/dist/resources/usage.d.ts +0 -223
- package/dist/resources/usage.d.ts.map +0 -1
- package/dist/resources/usage.js +0 -310
- package/dist/types/ai_models.d.ts +0 -389
- package/dist/types/ai_models.d.ts.map +0 -1
- package/dist/types/ai_models.js +0 -145
- package/dist/types/automations/cron.d.ts +0 -28
- package/dist/types/automations/cron.d.ts.map +0 -1
- package/dist/types/automations/cron.js +0 -1
- package/dist/types/automations/endpoints.d.ts +0 -13
- package/dist/types/automations/endpoints.d.ts.map +0 -1
- package/dist/types/automations/endpoints.js +0 -1
- package/dist/types/automations/index.d.ts +0 -7
- package/dist/types/automations/index.d.ts.map +0 -1
- package/dist/types/automations/index.js +0 -6
- package/dist/types/automations/links.d.ts +0 -15
- package/dist/types/automations/links.d.ts.map +0 -1
- package/dist/types/automations/links.js +0 -1
- package/dist/types/automations/mailboxes.d.ts +0 -18
- package/dist/types/automations/mailboxes.d.ts.map +0 -1
- package/dist/types/automations/mailboxes.js +0 -1
- package/dist/types/automations/outlook.d.ts +0 -37
- package/dist/types/automations/outlook.d.ts.map +0 -1
- package/dist/types/automations/outlook.js +0 -1
- package/dist/types/automations/webhooks.d.ts +0 -13
- package/dist/types/automations/webhooks.d.ts.map +0 -1
- package/dist/types/automations/webhooks.js +0 -1
- package/dist/types/browser_canvas.d.ts +0 -4
- package/dist/types/browser_canvas.d.ts.map +0 -1
- package/dist/types/browser_canvas.js +0 -2
- package/dist/types/chat.d.ts +0 -99
- package/dist/types/chat.d.ts.map +0 -1
- package/dist/types/chat.js +0 -20
- package/dist/types/consensus.d.ts +0 -10
- package/dist/types/consensus.d.ts.map +0 -1
- package/dist/types/consensus.js +0 -1
- package/dist/types/db/annotations.d.ts +0 -108
- package/dist/types/db/annotations.d.ts.map +0 -1
- package/dist/types/db/annotations.js +0 -6
- package/dist/types/db/files.d.ts +0 -133
- package/dist/types/db/files.d.ts.map +0 -1
- package/dist/types/db/files.js +0 -5
- package/dist/types/documents/extractions.d.ts +0 -1849
- package/dist/types/documents/extractions.d.ts.map +0 -1
- package/dist/types/documents/extractions.js +0 -211
- package/dist/types/documents/processing.d.ts +0 -249
- package/dist/types/documents/processing.d.ts.map +0 -1
- package/dist/types/documents/processing.js +0 -6
- package/dist/types/evaluations/iterations.d.ts +0 -41
- package/dist/types/evaluations/iterations.d.ts.map +0 -1
- package/dist/types/evaluations/iterations.js +0 -1
- package/dist/types/jobs/base.d.ts +0 -162
- package/dist/types/jobs/base.d.ts.map +0 -1
- package/dist/types/jobs/base.js +0 -6
- package/dist/types/jobs/specialized.d.ts +0 -200
- package/dist/types/jobs/specialized.d.ts.map +0 -1
- package/dist/types/jobs/specialized.js +0 -37
- package/dist/types/logs.d.ts +0 -92
- package/dist/types/logs.d.ts.map +0 -1
- package/dist/types/logs.js +0 -1
- package/dist/types/mime.d.ts +0 -426
- package/dist/types/mime.d.ts.map +0 -1
- package/dist/types/mime.js +0 -48
- package/dist/types/modalities.d.ts +0 -31
- package/dist/types/modalities.d.ts.map +0 -1
- package/dist/types/modalities.js +0 -109
- package/dist/types/pagination.d.ts +0 -5
- package/dist/types/pagination.d.ts.map +0 -1
- package/dist/types/pagination.js +0 -1
- package/dist/types/schemas/enhancement.d.ts +0 -250
- package/dist/types/schemas/enhancement.d.ts.map +0 -1
- package/dist/types/schemas/enhancement.js +0 -6
- package/dist/types/schemas/generate.d.ts +0 -160
- package/dist/types/schemas/generate.d.ts.map +0 -1
- package/dist/types/schemas/generate.js +0 -19
- package/dist/types/schemas/object.d.ts +0 -116
- package/dist/types/schemas/object.d.ts.map +0 -1
- package/dist/types/schemas/object.js +0 -861
- package/dist/types/secrets/external_api_keys.d.ts +0 -27
- package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
- package/dist/types/secrets/external_api_keys.js +0 -11
- package/dist/types/secrets/index.d.ts +0 -2
- package/dist/types/secrets/index.d.ts.map +0 -1
- package/dist/types/secrets/index.js +0 -1
- package/dist/types/standards.d.ts +0 -37
- package/dist/types/standards.d.ts.map +0 -1
- package/dist/types/standards.js +0 -1
- package/dist/utils/ai_models.d.ts +0 -10
- package/dist/utils/ai_models.d.ts.map +0 -1
- package/dist/utils/ai_models.js +0 -183
- package/dist/utils/batch_processing.d.ts +0 -227
- package/dist/utils/batch_processing.d.ts.map +0 -1
- package/dist/utils/batch_processing.js +0 -268
- package/dist/utils/benchmarking.d.ts +0 -115
- package/dist/utils/benchmarking.d.ts.map +0 -1
- package/dist/utils/benchmarking.js +0 -355
- package/dist/utils/chat.d.ts +0 -70
- package/dist/utils/chat.d.ts.map +0 -1
- package/dist/utils/chat.js +0 -79
- package/dist/utils/cost_calculation.d.ts +0 -26
- package/dist/utils/cost_calculation.d.ts.map +0 -1
- package/dist/utils/cost_calculation.js +0 -99
- package/dist/utils/datasets.d.ts +0 -135
- package/dist/utils/datasets.d.ts.map +0 -1
- package/dist/utils/datasets.js +0 -359
- package/dist/utils/display.d.ts +0 -108
- package/dist/utils/display.d.ts.map +0 -1
- package/dist/utils/display.js +0 -244
- package/dist/utils/hash.d.ts +0 -18
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -31
- package/dist/utils/hashing.d.ts +0 -18
- package/dist/utils/hashing.d.ts.map +0 -1
- package/dist/utils/hashing.js +0 -28
- package/dist/utils/index.d.ts +0 -8
- package/dist/utils/index.d.ts.map +0 -1
- package/dist/utils/index.js +0 -10
- package/dist/utils/json_schema.d.ts +0 -18
- package/dist/utils/json_schema.d.ts.map +0 -1
- package/dist/utils/json_schema.js +0 -334
- package/dist/utils/json_schema_utils.d.ts +0 -42
- package/dist/utils/json_schema_utils.d.ts.map +0 -1
- package/dist/utils/json_schema_utils.js +0 -212
- package/dist/utils/jsonl.d.ts +0 -60
- package/dist/utils/jsonl.d.ts.map +0 -1
- package/dist/utils/jsonl.js +0 -259
- package/dist/utils/mime.d.ts +0 -6
- package/dist/utils/mime.d.ts.map +0 -1
- package/dist/utils/mime.js +0 -129
- package/dist/utils/model_cards.d.ts +0 -219
- package/dist/utils/model_cards.d.ts.map +0 -1
- package/dist/utils/model_cards.js +0 -462
- package/dist/utils/prompt_optimization.d.ts +0 -96
- package/dist/utils/prompt_optimization.d.ts.map +0 -1
- package/dist/utils/prompt_optimization.js +0 -275
- package/dist/utils/responses.d.ts +0 -35
- package/dist/utils/responses.d.ts.map +0 -1
- package/dist/utils/responses.js +0 -37
- package/dist/utils/stream.d.ts +0 -13
- package/dist/utils/stream.d.ts.map +0 -1
- package/dist/utils/stream.js +0 -64
- package/dist/utils/stream_context_managers.d.ts +0 -147
- package/dist/utils/stream_context_managers.d.ts.map +0 -1
- package/dist/utils/stream_context_managers.js +0 -380
- package/dist/utils/usage.d.ts +0 -57
- package/dist/utils/usage.d.ts.map +0 -1
- package/dist/utils/usage.js +0 -97
- package/dist/utils/webhook_secrets.d.ts +0 -59
- package/dist/utils/webhook_secrets.d.ts.map +0 -1
- package/dist/utils/webhook_secrets.js +0 -107
- package/dist/utils/zod_to_json_schema.d.ts +0 -11
- package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
- package/dist/utils/zod_to_json_schema.js +0 -123
|
@@ -1,861 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
import { generateSchemaDataId, generateSchemaId, loadJsonSchema } from '../../utils/json_schema_utils.js';
|
|
3
|
-
import { zodToJsonSchema } from '../../utils/zod_to_json_schema.js';
|
|
4
|
-
export const PartialSchemaSchema = z.object({
|
|
5
|
-
object: z.literal('schema'),
|
|
6
|
-
created_at: z.string().datetime(),
|
|
7
|
-
json_schema: z.record(z.any()).default({}),
|
|
8
|
-
strict: z.boolean().default(true),
|
|
9
|
-
});
|
|
10
|
-
export const PartialSchemaChunkSchema = z.object({
|
|
11
|
-
object: z.literal('schema.chunk'),
|
|
12
|
-
created_at: z.string().datetime(),
|
|
13
|
-
delta_json_schema_flat: z.record(z.any()).default({}),
|
|
14
|
-
streaming_error: z.custom().nullable().optional(),
|
|
15
|
-
});
|
|
16
|
-
export const SchemaSchema = PartialSchemaSchema.extend({}).transform((data) => new Schema(data));
|
|
17
|
-
export class Schema {
|
|
18
|
-
constructor(data) {
|
|
19
|
-
this.object = 'schema';
|
|
20
|
-
this.json_schema = {};
|
|
21
|
-
this.strict = true;
|
|
22
|
-
this.created_at = new Date().toISOString();
|
|
23
|
-
// Validate input like Python version
|
|
24
|
-
if (data.json_schema && data.pydanticModel) {
|
|
25
|
-
throw new Error('Cannot provide both json_schema and pydanticModel');
|
|
26
|
-
}
|
|
27
|
-
if (!data.json_schema && !data.pydanticModel && !data.zod_model) {
|
|
28
|
-
throw new Error('Must provide either json_schema, pydanticModel, or zod_model');
|
|
29
|
-
}
|
|
30
|
-
if (data.json_schema) {
|
|
31
|
-
this.json_schema = loadJsonSchema(data.json_schema);
|
|
32
|
-
}
|
|
33
|
-
else if (data.pydanticModel) {
|
|
34
|
-
// For pydantic models, we expect the model to have a model_json_schema() method
|
|
35
|
-
// In Node.js context, this would be a pre-serialized schema from Python
|
|
36
|
-
if (typeof data.pydanticModel === 'object' && data.pydanticModel.model_json_schema) {
|
|
37
|
-
this.json_schema = data.pydanticModel.model_json_schema();
|
|
38
|
-
}
|
|
39
|
-
else if (typeof data.pydanticModel === 'object' && data.pydanticModel.schema) {
|
|
40
|
-
this.json_schema = data.pydanticModel.schema;
|
|
41
|
-
}
|
|
42
|
-
else {
|
|
43
|
-
throw new Error('pydanticModel must have a model_json_schema() method or schema property');
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
else if (data.zod_model) {
|
|
47
|
-
this._zodModel = data.zod_model;
|
|
48
|
-
// Convert Zod to JSON Schema using proper converter
|
|
49
|
-
this.json_schema = zodToJsonSchema(data.zod_model);
|
|
50
|
-
// Add system prompt if provided
|
|
51
|
-
if (data.system_prompt) {
|
|
52
|
-
this.json_schema['X-SystemPrompt'] = data.system_prompt;
|
|
53
|
-
}
|
|
54
|
-
// Add reasoning prompts if provided
|
|
55
|
-
if (data.reasoning_prompts) {
|
|
56
|
-
for (const [field, prompt] of Object.entries(data.reasoning_prompts)) {
|
|
57
|
-
if (this.json_schema.properties && this.json_schema.properties[field]) {
|
|
58
|
-
this.json_schema.properties[field]['X-ReasoningPrompt'] = prompt;
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
get dataId() {
|
|
65
|
-
return generateSchemaDataId(this.json_schema);
|
|
66
|
-
}
|
|
67
|
-
get id() {
|
|
68
|
-
return generateSchemaId(this.json_schema);
|
|
69
|
-
}
|
|
70
|
-
get inference_json_schema() {
|
|
71
|
-
// Returns the schema formatted for structured output with OpenAI requirements
|
|
72
|
-
if (this.strict) {
|
|
73
|
-
// For strict schemas, convert to OpenAI-compatible format
|
|
74
|
-
const inferenceSchema = this.jsonSchemaToStrictOpenaiSchema(JSON.parse(JSON.stringify(this._reasoningObjectSchema)));
|
|
75
|
-
if (typeof inferenceSchema !== 'object' || inferenceSchema === null) {
|
|
76
|
-
throw new Error('Validation Error: The inference_json_schema is not a dict');
|
|
77
|
-
}
|
|
78
|
-
return inferenceSchema;
|
|
79
|
-
}
|
|
80
|
-
else {
|
|
81
|
-
// For non-strict schemas, return a deep copy of the reasoning schema without strict modifications
|
|
82
|
-
return JSON.parse(JSON.stringify(this._reasoningObjectSchema));
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
get inferenceJsonSchema() {
|
|
86
|
-
// Alias for backwards compatibility
|
|
87
|
-
return this.inference_json_schema;
|
|
88
|
-
}
|
|
89
|
-
get openaiMessages() {
|
|
90
|
-
// Returns messages formatted for OpenAI's API
|
|
91
|
-
return this.messages.map(msg => ({
|
|
92
|
-
role: msg.role,
|
|
93
|
-
content: msg.content
|
|
94
|
-
}));
|
|
95
|
-
}
|
|
96
|
-
get anthropicSystemPrompt() {
|
|
97
|
-
return 'Return your response as a JSON object following the provided schema.' + this.systemPrompt;
|
|
98
|
-
}
|
|
99
|
-
get anthropicMessages() {
|
|
100
|
-
// Returns messages in Anthropic's Claude format
|
|
101
|
-
return this.messages.slice(1); // Skip system message
|
|
102
|
-
}
|
|
103
|
-
get geminiSystemPrompt() {
|
|
104
|
-
return this.systemPrompt;
|
|
105
|
-
}
|
|
106
|
-
get geminiMessages() {
|
|
107
|
-
// Returns messages formatted for Google's Gemini API
|
|
108
|
-
return this.messages.slice(1);
|
|
109
|
-
}
|
|
110
|
-
get inferenceGeminiJsonSchema() {
|
|
111
|
-
// Convert schema for Gemini compatibility (no anyOf, etc.)
|
|
112
|
-
const schema = { ...this._reasoningObjectSchema };
|
|
113
|
-
// Remove unsupported Gemini fields recursively
|
|
114
|
-
const removeUnsupportedFields = (obj) => {
|
|
115
|
-
if (typeof obj !== 'object' || obj === null)
|
|
116
|
-
return obj;
|
|
117
|
-
if (Array.isArray(obj)) {
|
|
118
|
-
return obj.map(removeUnsupportedFields);
|
|
119
|
-
}
|
|
120
|
-
const result = { ...obj };
|
|
121
|
-
// Remove Gemini-unsupported fields
|
|
122
|
-
delete result.anyOf;
|
|
123
|
-
delete result.oneOf;
|
|
124
|
-
delete result.allOf;
|
|
125
|
-
delete result.not;
|
|
126
|
-
delete result.if;
|
|
127
|
-
delete result.then;
|
|
128
|
-
delete result.else;
|
|
129
|
-
// Recursively process nested objects
|
|
130
|
-
Object.keys(result).forEach(key => {
|
|
131
|
-
if (typeof result[key] === 'object' && result[key] !== null) {
|
|
132
|
-
result[key] = removeUnsupportedFields(result[key]);
|
|
133
|
-
}
|
|
134
|
-
});
|
|
135
|
-
return result;
|
|
136
|
-
};
|
|
137
|
-
return removeUnsupportedFields(schema);
|
|
138
|
-
}
|
|
139
|
-
get inferenceTypescriptInterface() {
|
|
140
|
-
// Returns TypeScript interface representation of the inference schema
|
|
141
|
-
return this.jsonSchemaToTypescriptInterface(this._reasoningObjectSchema);
|
|
142
|
-
}
|
|
143
|
-
get inferenceNlpDataStructure() {
|
|
144
|
-
// Returns NLP data structure representation of the inference schema
|
|
145
|
-
return this.jsonSchemaToNlpDataStructure(this._reasoningObjectSchema);
|
|
146
|
-
}
|
|
147
|
-
get developerSystemPrompt() {
|
|
148
|
-
return `
|
|
149
|
-
# General Instructions
|
|
150
|
-
|
|
151
|
-
You are an expert in data extraction and structured data outputs.
|
|
152
|
-
|
|
153
|
-
When provided with a **JSON schema** and a **document**, you must:
|
|
154
|
-
|
|
155
|
-
1. Carefully extract all relevant data from the provided document according to the given schema.
|
|
156
|
-
2. Return extracted data strictly formatted according to the provided schema.
|
|
157
|
-
3. Make sure that the extracted values are **UTF-8** encodable strings.
|
|
158
|
-
4. Avoid generating bytes, binary data, base64 encoded data, or other non-UTF-8 encodable data.
|
|
159
|
-
|
|
160
|
-
---
|
|
161
|
-
|
|
162
|
-
## Date and Time Formatting
|
|
163
|
-
|
|
164
|
-
When extracting date, time, or datetime values:
|
|
165
|
-
|
|
166
|
-
- **Always use ISO format** for dates and times (e.g., "2023-12-25", "14:30:00", "2023-12-25T14:30:00Z")
|
|
167
|
-
- **Include timezone information** when available (e.g., "2023-12-25T14:30:00+02:00")
|
|
168
|
-
- **Use UTC timezone** when timezone is not specified or unclear (e.g., "2023-12-25T14:30:00Z")
|
|
169
|
-
- **Maintain precision** as found in the source document (seconds, milliseconds if present)
|
|
170
|
-
|
|
171
|
-
---
|
|
172
|
-
|
|
173
|
-
## Handling Missing and Nullable Fields
|
|
174
|
-
|
|
175
|
-
### Nullable Leaf Attributes
|
|
176
|
-
|
|
177
|
-
- If valid data is missing or not explicitly present, set leaf attributes explicitly to \`null\`.
|
|
178
|
-
- **Do NOT** use empty strings (\`""\`), placeholder values, or fabricated data.
|
|
179
|
-
|
|
180
|
-
### Nullable Nested Objects
|
|
181
|
-
|
|
182
|
-
- If an entire nested object's data is missing or incomplete, **do NOT** set the object itself to \`null\`.
|
|
183
|
-
- Keep the object structure fully intact, explicitly setting each leaf attribute within to \`null\`.
|
|
184
|
-
- This preserves overall structure and explicitly communicates exactly which fields lack data.
|
|
185
|
-
|
|
186
|
-
---
|
|
187
|
-
|
|
188
|
-
## Reasoning Fields
|
|
189
|
-
|
|
190
|
-
Your schema includes special reasoning fields (\`reasoning___*\`) used exclusively to document your extraction logic. These fields are for detailed explanations and will not appear in final outputs.
|
|
191
|
-
|
|
192
|
-
You MUST include these details explicitly in your reasoning fields:
|
|
193
|
-
|
|
194
|
-
- **Explicit Evidence**: Quote specific lines or phrases from the document confirming your extraction.
|
|
195
|
-
- **Decision Justification**: Clearly justify why specific data was chosen or rejected.
|
|
196
|
-
- **Calculations/Transformations**: Document explicitly any computations, unit conversions, or normalizations.
|
|
197
|
-
- **Alternative Interpretations**: Explicitly describe any alternative data interpretations considered and why you rejected them.
|
|
198
|
-
- **Confidence and Assumptions**: Clearly state your confidence level and explicitly articulate any assumptions.
|
|
199
|
-
|
|
200
|
-
---
|
|
201
|
-
|
|
202
|
-
## Source Fields
|
|
203
|
-
|
|
204
|
-
Some leaf fields require you to explicitly provide the source of the data (verbatim from the document).
|
|
205
|
-
The idea is to simply provide a verbatim quote from the document, without any additional formatting or commentary, keeping it as close as possible to the original text.
|
|
206
|
-
Make sure to reasonably include some surrounding text to provide context about the quote.
|
|
207
|
-
|
|
208
|
-
You can easily identify the fields that require a source by the \`quote___[attributename]\` naming pattern.
|
|
209
|
-
|
|
210
|
-
---
|
|
211
|
-
|
|
212
|
-
# User Defined System Prompt
|
|
213
|
-
|
|
214
|
-
`;
|
|
215
|
-
}
|
|
216
|
-
get userSystemPrompt() {
|
|
217
|
-
return this.json_schema['X-SystemPrompt'] || '';
|
|
218
|
-
}
|
|
219
|
-
get schemaSystemPrompt() {
|
|
220
|
-
return (this.inferenceNlpDataStructure +
|
|
221
|
-
'\n---\n' +
|
|
222
|
-
'## Expected output schema as a TypeScript interface for better readability:\n\n' +
|
|
223
|
-
this.inferenceTypescriptInterface);
|
|
224
|
-
}
|
|
225
|
-
get systemPrompt() {
|
|
226
|
-
return this.developerSystemPrompt + '\n\n' + this.userSystemPrompt + '\n\n' + this.schemaSystemPrompt;
|
|
227
|
-
}
|
|
228
|
-
get title() {
|
|
229
|
-
return this.json_schema.title || 'NoTitle';
|
|
230
|
-
}
|
|
231
|
-
get _expandedObjectSchema() {
|
|
232
|
-
// Returns schema with all references expanded inline
|
|
233
|
-
return this.expandRefs(JSON.parse(JSON.stringify(this.json_schema)));
|
|
234
|
-
}
|
|
235
|
-
get _reasoningObjectSchema() {
|
|
236
|
-
// Returns schema with inference-specific modifications (reasoning fields added)
|
|
237
|
-
return this.createReasoningSchema(JSON.parse(JSON.stringify(this._expandedObjectSchema)));
|
|
238
|
-
}
|
|
239
|
-
get messages() {
|
|
240
|
-
return [{ role: 'developer', content: this.systemPrompt }];
|
|
241
|
-
}
|
|
242
|
-
get openai_messages() {
|
|
243
|
-
return [{ role: 'developer', content: this.systemPrompt }];
|
|
244
|
-
}
|
|
245
|
-
get zod_model() {
|
|
246
|
-
if (this._zodModel) {
|
|
247
|
-
return this._zodModel;
|
|
248
|
-
}
|
|
249
|
-
// Convert JSON schema to basic Zod schema for validation
|
|
250
|
-
return z.object({}).passthrough();
|
|
251
|
-
}
|
|
252
|
-
getPatternAttribute(pattern, attribute) {
|
|
253
|
-
return this._getPatternAttribute(pattern, attribute);
|
|
254
|
-
}
|
|
255
|
-
setPatternAttribute(pattern, attribute, value) {
|
|
256
|
-
this._setPatternAttribute(pattern, attribute, value);
|
|
257
|
-
}
|
|
258
|
-
save(path) {
|
|
259
|
-
// Save JSON schema to file
|
|
260
|
-
try {
|
|
261
|
-
const fs = require('fs');
|
|
262
|
-
fs.writeFileSync(path, JSON.stringify(this.json_schema, null, 2), 'utf8');
|
|
263
|
-
}
|
|
264
|
-
catch (error) {
|
|
265
|
-
if (error instanceof Error && error.message.includes('Cannot find module')) {
|
|
266
|
-
throw new Error('save() method not available in browser environment');
|
|
267
|
-
}
|
|
268
|
-
throw error;
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
static validate(data) {
|
|
272
|
-
return new Schema(data);
|
|
273
|
-
}
|
|
274
|
-
createReasoningSchema(schema) {
|
|
275
|
-
// Add reasoning fields to the schema structure, matching Python implementation
|
|
276
|
-
const processedSchema = this.insertReasoningFieldsInner(JSON.parse(JSON.stringify(schema)));
|
|
277
|
-
// Add root reasoning if schema has X-ReasoningPrompt
|
|
278
|
-
const rootReasoning = processedSchema.rootReasoning;
|
|
279
|
-
if (rootReasoning && processedSchema.updatedSchema.type === 'object') {
|
|
280
|
-
if (!processedSchema.updatedSchema.properties) {
|
|
281
|
-
processedSchema.updatedSchema.properties = {};
|
|
282
|
-
}
|
|
283
|
-
// Add reasoning___root field
|
|
284
|
-
processedSchema.updatedSchema.properties.reasoning___root = {
|
|
285
|
-
type: 'string',
|
|
286
|
-
description: rootReasoning
|
|
287
|
-
};
|
|
288
|
-
// Add to required fields if needed
|
|
289
|
-
if (processedSchema.updatedSchema.required) {
|
|
290
|
-
processedSchema.updatedSchema.required.push('reasoning___root');
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
// Clean custom fields like Python implementation
|
|
294
|
-
return this.cleanSchema(processedSchema.updatedSchema, { removeCustomFields: true });
|
|
295
|
-
}
|
|
296
|
-
insertReasoningFieldsInner(schema) {
|
|
297
|
-
// Extract X-ReasoningPrompt from this node
|
|
298
|
-
const reasoningDesc = schema['X-ReasoningPrompt'] || null;
|
|
299
|
-
delete schema['X-ReasoningPrompt'];
|
|
300
|
-
const nodeType = schema.type;
|
|
301
|
-
// Process children recursively
|
|
302
|
-
if (nodeType === 'object' && schema.properties) {
|
|
303
|
-
const newProps = {};
|
|
304
|
-
for (const [propertyKey, propertyValue] of Object.entries(schema.properties)) {
|
|
305
|
-
const { updatedSchema: updatedProp, rootReasoning: childReasoning } = this.insertReasoningFieldsInner(propertyValue);
|
|
306
|
-
newProps[propertyKey] = updatedProp;
|
|
307
|
-
// ALWAYS add reasoning field for every property (Python behavior)
|
|
308
|
-
const reasoningDescription = childReasoning || `Reasoning for ${propertyKey}`;
|
|
309
|
-
newProps[`reasoning___${propertyKey}`] = {
|
|
310
|
-
type: 'string',
|
|
311
|
-
description: reasoningDescription
|
|
312
|
-
};
|
|
313
|
-
// Add to required if property is required
|
|
314
|
-
if (schema.required && schema.required.includes(propertyKey)) {
|
|
315
|
-
if (!schema.required.includes(`reasoning___${propertyKey}`)) {
|
|
316
|
-
schema.required.push(`reasoning___${propertyKey}`);
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
schema.properties = newProps;
|
|
321
|
-
}
|
|
322
|
-
else if (nodeType === 'array' && schema.items) {
|
|
323
|
-
// Process array items
|
|
324
|
-
const { updatedSchema: updatedItems, rootReasoning: itemReasoning } = this.insertReasoningFieldsInner(schema.items);
|
|
325
|
-
schema.items = updatedItems;
|
|
326
|
-
// Always add reasoning___item if items are objects (Python behavior)
|
|
327
|
-
if (updatedItems.type === 'object') {
|
|
328
|
-
if (!updatedItems.properties) {
|
|
329
|
-
updatedItems.properties = {};
|
|
330
|
-
}
|
|
331
|
-
// Add reasoning___item as first property
|
|
332
|
-
const reasoningKey = 'reasoning___item';
|
|
333
|
-
const reasoningDescription = itemReasoning || 'Reasoning for this item';
|
|
334
|
-
const newProperties = {
|
|
335
|
-
[reasoningKey]: {
|
|
336
|
-
type: 'string',
|
|
337
|
-
description: reasoningDescription
|
|
338
|
-
}
|
|
339
|
-
};
|
|
340
|
-
// Add existing properties
|
|
341
|
-
Object.assign(newProperties, updatedItems.properties);
|
|
342
|
-
updatedItems.properties = newProperties;
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
return {
|
|
346
|
-
updatedSchema: schema,
|
|
347
|
-
rootReasoning: reasoningDesc
|
|
348
|
-
};
|
|
349
|
-
}
|
|
350
|
-
cleanSchema(schema, options = {}) {
|
|
351
|
-
const { removeCustomFields = false } = options;
|
|
352
|
-
function cleanObject(obj) {
|
|
353
|
-
if (typeof obj !== 'object' || obj === null)
|
|
354
|
-
return obj;
|
|
355
|
-
if (Array.isArray(obj))
|
|
356
|
-
return obj.map(cleanObject);
|
|
357
|
-
const result = {};
|
|
358
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
359
|
-
// Remove custom fields if requested
|
|
360
|
-
if (removeCustomFields && key.startsWith('X-')) {
|
|
361
|
-
continue;
|
|
362
|
-
}
|
|
363
|
-
result[key] = cleanObject(value);
|
|
364
|
-
}
|
|
365
|
-
return result;
|
|
366
|
-
}
|
|
367
|
-
return cleanObject(schema);
|
|
368
|
-
}
|
|
369
|
-
jsonSchemaToStrictOpenaiSchema(schema) {
|
|
370
|
-
// Convert schema to OpenAI strict format, matching Python implementation exactly
|
|
371
|
-
function makeStrict(obj) {
|
|
372
|
-
if (typeof obj !== 'object' || obj === null)
|
|
373
|
-
return obj;
|
|
374
|
-
if (Array.isArray(obj))
|
|
375
|
-
return obj.map(makeStrict);
|
|
376
|
-
const result = { ...obj };
|
|
377
|
-
// Remove unsupported fields (matching Python implementation)
|
|
378
|
-
for (const key of ['default', 'format', 'X-FieldTranslation', 'X-EnumTranslation']) {
|
|
379
|
-
delete result[key];
|
|
380
|
-
}
|
|
381
|
-
// Convert integer to number (Python requirement)
|
|
382
|
-
if (result.type === 'integer') {
|
|
383
|
-
result.type = 'number';
|
|
384
|
-
}
|
|
385
|
-
else if (Array.isArray(result.type)) {
|
|
386
|
-
result.type = result.type.map((t) => t === 'integer' ? 'number' : t);
|
|
387
|
-
}
|
|
388
|
-
// Handle allOf (merge all schemas)
|
|
389
|
-
if (result.allOf) {
|
|
390
|
-
const subschemas = result.allOf;
|
|
391
|
-
delete result.allOf;
|
|
392
|
-
const merged = {};
|
|
393
|
-
for (const subschema of subschemas) {
|
|
394
|
-
if (subschema.$ref) {
|
|
395
|
-
merged.$ref = subschema.$ref;
|
|
396
|
-
}
|
|
397
|
-
else {
|
|
398
|
-
Object.assign(merged, makeStrict(subschema));
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
Object.assign(result, merged);
|
|
402
|
-
}
|
|
403
|
-
// Handle anyOf
|
|
404
|
-
if (result.anyOf) {
|
|
405
|
-
result.anyOf = result.anyOf.map(makeStrict);
|
|
406
|
-
}
|
|
407
|
-
// Handle enum (force to string)
|
|
408
|
-
if (result.enum) {
|
|
409
|
-
result.enum = result.enum.map((e) => String(e));
|
|
410
|
-
result.type = 'string';
|
|
411
|
-
}
|
|
412
|
-
// Handle object type - make all properties required and set additionalProperties: false
|
|
413
|
-
if (result.type === 'object' && result.properties) {
|
|
414
|
-
result.required = Object.keys(result.properties); // All properties required in strict mode
|
|
415
|
-
result.additionalProperties = false;
|
|
416
|
-
const newProperties = {};
|
|
417
|
-
for (const [key, prop] of Object.entries(result.properties)) {
|
|
418
|
-
newProperties[key] = makeStrict(prop);
|
|
419
|
-
}
|
|
420
|
-
result.properties = newProperties;
|
|
421
|
-
}
|
|
422
|
-
// Handle array items
|
|
423
|
-
if (result.type === 'array' && result.items) {
|
|
424
|
-
result.items = makeStrict(result.items);
|
|
425
|
-
}
|
|
426
|
-
// Handle $defs
|
|
427
|
-
if (result.$defs) {
|
|
428
|
-
const newDefs = {};
|
|
429
|
-
for (const [key, def] of Object.entries(result.$defs)) {
|
|
430
|
-
newDefs[key] = makeStrict(def);
|
|
431
|
-
}
|
|
432
|
-
result.$defs = newDefs;
|
|
433
|
-
}
|
|
434
|
-
return result;
|
|
435
|
-
}
|
|
436
|
-
return makeStrict(schema);
|
|
437
|
-
}
|
|
438
|
-
expandRefs(schema) {
|
|
439
|
-
// Check for cyclic references first
|
|
440
|
-
if (this.hasCyclicRefs(schema)) {
|
|
441
|
-
// Cyclic references detected, keeping schema unchanged
|
|
442
|
-
return schema;
|
|
443
|
-
}
|
|
444
|
-
const definitions = schema.$defs ? { ...schema.$defs } : {};
|
|
445
|
-
delete schema.$defs; // Remove $defs from the schema copy
|
|
446
|
-
// Handle allOf at root level - merge all schemas
|
|
447
|
-
if (schema.allOf) {
|
|
448
|
-
const merged = this.mergeAllOfSchemas(schema.allOf);
|
|
449
|
-
// Merge the allOf result into the current schema
|
|
450
|
-
Object.assign(schema, merged);
|
|
451
|
-
delete schema.allOf;
|
|
452
|
-
}
|
|
453
|
-
return this.expandRefsRecursive(schema, definitions);
|
|
454
|
-
}
|
|
455
|
-
hasCyclicRefs(schema) {
|
|
456
|
-
const definitions = schema.$defs || {};
|
|
457
|
-
if (!definitions || Object.keys(definitions).length === 0) {
|
|
458
|
-
return false;
|
|
459
|
-
}
|
|
460
|
-
const memo = {};
|
|
461
|
-
const dfs = (defName, stack) => {
|
|
462
|
-
if (stack.has(defName)) {
|
|
463
|
-
return true; // Cycle detected
|
|
464
|
-
}
|
|
465
|
-
if (defName in memo) {
|
|
466
|
-
return memo[defName];
|
|
467
|
-
}
|
|
468
|
-
stack.add(defName);
|
|
469
|
-
const node = definitions[defName];
|
|
470
|
-
if (!node) {
|
|
471
|
-
stack.delete(defName);
|
|
472
|
-
memo[defName] = false;
|
|
473
|
-
return false;
|
|
474
|
-
}
|
|
475
|
-
const result = this.traverseForCycles(node, stack, definitions);
|
|
476
|
-
stack.delete(defName);
|
|
477
|
-
memo[defName] = result;
|
|
478
|
-
return result;
|
|
479
|
-
};
|
|
480
|
-
// Check each definition for cycles
|
|
481
|
-
for (const defName of Object.keys(definitions)) {
|
|
482
|
-
if (dfs(defName, new Set())) {
|
|
483
|
-
return true;
|
|
484
|
-
}
|
|
485
|
-
}
|
|
486
|
-
return false;
|
|
487
|
-
}
|
|
488
|
-
traverseForCycles(node, stack, definitions) {
|
|
489
|
-
if (typeof node !== 'object' || node === null) {
|
|
490
|
-
return false;
|
|
491
|
-
}
|
|
492
|
-
if (Array.isArray(node)) {
|
|
493
|
-
return node.some(item => this.traverseForCycles(item, stack, definitions));
|
|
494
|
-
}
|
|
495
|
-
// Check for $ref
|
|
496
|
-
if (node.$ref) {
|
|
497
|
-
const refPath = node.$ref;
|
|
498
|
-
if (refPath.startsWith('#/$defs/')) {
|
|
499
|
-
const targetDef = refPath.substring('#/$defs/'.length);
|
|
500
|
-
if (stack.has(targetDef)) {
|
|
501
|
-
return true; // Cycle detected
|
|
502
|
-
}
|
|
503
|
-
if (definitions[targetDef]) {
|
|
504
|
-
const newStack = new Set(stack);
|
|
505
|
-
newStack.add(targetDef);
|
|
506
|
-
return this.traverseForCycles(definitions[targetDef], newStack, definitions);
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
// Traverse all properties except $ref
|
|
511
|
-
for (const [key, value] of Object.entries(node)) {
|
|
512
|
-
if (key === '$ref')
|
|
513
|
-
continue;
|
|
514
|
-
if (this.traverseForCycles(value, stack, definitions)) {
|
|
515
|
-
return true;
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
return false;
|
|
519
|
-
}
|
|
520
|
-
expandRefsRecursive(obj, definitions) {
|
|
521
|
-
if (typeof obj !== 'object' || obj === null) {
|
|
522
|
-
return obj;
|
|
523
|
-
}
|
|
524
|
-
if (Array.isArray(obj)) {
|
|
525
|
-
return obj.map(item => this.expandRefsRecursive(item, definitions));
|
|
526
|
-
}
|
|
527
|
-
if (obj.$ref) {
|
|
528
|
-
const refPath = obj.$ref;
|
|
529
|
-
if (refPath.startsWith('#/$defs/')) {
|
|
530
|
-
const defName = refPath.substring('#/$defs/'.length);
|
|
531
|
-
if (definitions[defName]) {
|
|
532
|
-
const target = definitions[defName];
|
|
533
|
-
// Merge descriptions if present
|
|
534
|
-
const merged = this.mergeDescriptions(obj, target);
|
|
535
|
-
delete merged.$ref;
|
|
536
|
-
return this.expandRefsRecursive(merged, definitions);
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
return obj;
|
|
540
|
-
}
|
|
541
|
-
const result = {};
|
|
542
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
543
|
-
if (key === 'properties' && typeof value === 'object' && value !== null) {
|
|
544
|
-
const newProps = {};
|
|
545
|
-
for (const [propKey, propValue] of Object.entries(value)) {
|
|
546
|
-
newProps[propKey] = this.expandRefsRecursive(propValue, definitions);
|
|
547
|
-
}
|
|
548
|
-
result[key] = newProps;
|
|
549
|
-
}
|
|
550
|
-
else if (key === 'items') {
|
|
551
|
-
result[key] = this.expandRefsRecursive(value, definitions);
|
|
552
|
-
}
|
|
553
|
-
else if (key === '$defs' && typeof value === 'object' && value !== null) {
|
|
554
|
-
const newDefs = {};
|
|
555
|
-
for (const [defKey, defValue] of Object.entries(value)) {
|
|
556
|
-
newDefs[defKey] = this.expandRefsRecursive(defValue, definitions);
|
|
557
|
-
}
|
|
558
|
-
result[key] = newDefs;
|
|
559
|
-
}
|
|
560
|
-
else {
|
|
561
|
-
result[key] = this.expandRefsRecursive(value, definitions);
|
|
562
|
-
}
|
|
563
|
-
}
|
|
564
|
-
return result;
|
|
565
|
-
}
|
|
566
|
-
mergeDescriptions(source, target) {
|
|
567
|
-
const merged = { ...target };
|
|
568
|
-
// If source has description and target doesn't, use source's description
|
|
569
|
-
if (source.description && !target.description) {
|
|
570
|
-
merged.description = source.description;
|
|
571
|
-
}
|
|
572
|
-
return merged;
|
|
573
|
-
}
|
|
574
|
-
jsonSchemaToTypescriptInterface(schema) {
|
|
575
|
-
// Convert JSON schema to TypeScript interface
|
|
576
|
-
function convertType(obj, depth = 0) {
|
|
577
|
-
const indent = ' '.repeat(depth);
|
|
578
|
-
if (!obj || typeof obj !== 'object') {
|
|
579
|
-
return 'any';
|
|
580
|
-
}
|
|
581
|
-
if (obj.enum) {
|
|
582
|
-
return obj.enum.map((e) => typeof e === 'string' ? `"${e}"` : String(e)).join(' | ');
|
|
583
|
-
}
|
|
584
|
-
if (obj.type === 'string')
|
|
585
|
-
return 'string';
|
|
586
|
-
if (obj.type === 'number' || obj.type === 'integer')
|
|
587
|
-
return 'number';
|
|
588
|
-
if (obj.type === 'boolean')
|
|
589
|
-
return 'boolean';
|
|
590
|
-
if (obj.type === 'null')
|
|
591
|
-
return 'null';
|
|
592
|
-
if (obj.type === 'array') {
|
|
593
|
-
const itemType = obj.items ? convertType(obj.items, depth) : 'any';
|
|
594
|
-
return `${itemType}[]`;
|
|
595
|
-
}
|
|
596
|
-
if (obj.type === 'object' && obj.properties) {
|
|
597
|
-
const props = Object.entries(obj.properties)
|
|
598
|
-
.map(([key, prop]) => {
|
|
599
|
-
const optional = !obj.required?.includes(key) ? '?' : '';
|
|
600
|
-
const type = convertType(prop, depth + 1);
|
|
601
|
-
const desc = prop.description ? ` // ${prop.description}` : '';
|
|
602
|
-
return `${indent} ${key}${optional}: ${type};${desc}`;
|
|
603
|
-
})
|
|
604
|
-
.join('\n');
|
|
605
|
-
return `{\n${props}\n${indent}}`;
|
|
606
|
-
}
|
|
607
|
-
if (obj.anyOf) {
|
|
608
|
-
return obj.anyOf.map((subSchema) => convertType(subSchema, depth)).join(' | ');
|
|
609
|
-
}
|
|
610
|
-
return 'any';
|
|
611
|
-
}
|
|
612
|
-
const interfaceName = schema.title || 'Schema';
|
|
613
|
-
const interfaceBody = convertType(schema, 0);
|
|
614
|
-
return `interface ${interfaceName} ${interfaceBody}`;
|
|
615
|
-
}
|
|
616
|
-
jsonSchemaToNlpDataStructure(schema) {
|
|
617
|
-
// Convert JSON schema to natural language data structure description
|
|
618
|
-
function describe(obj, depth = 0) {
|
|
619
|
-
const indent = ' '.repeat(depth);
|
|
620
|
-
if (!obj || typeof obj !== 'object') {
|
|
621
|
-
return 'any value';
|
|
622
|
-
}
|
|
623
|
-
if (obj.description) {
|
|
624
|
-
return obj.description;
|
|
625
|
-
}
|
|
626
|
-
if (obj.type === 'string')
|
|
627
|
-
return 'text string';
|
|
628
|
-
if (obj.type === 'number' || obj.type === 'integer')
|
|
629
|
-
return 'number';
|
|
630
|
-
if (obj.type === 'boolean')
|
|
631
|
-
return 'true/false value';
|
|
632
|
-
if (obj.type === 'null')
|
|
633
|
-
return 'null value';
|
|
634
|
-
if (obj.type === 'array') {
|
|
635
|
-
const itemDesc = obj.items ? describe(obj.items, depth) : 'any item';
|
|
636
|
-
return `array of ${itemDesc}`;
|
|
637
|
-
}
|
|
638
|
-
if (obj.type === 'object' && obj.properties) {
|
|
639
|
-
const props = Object.entries(obj.properties)
|
|
640
|
-
.map(([key, prop]) => {
|
|
641
|
-
const optional = !obj.required?.includes(key) ? ' (optional)' : '';
|
|
642
|
-
const desc = describe(prop, depth + 1);
|
|
643
|
-
return `${indent}- ${key}${optional}: ${desc}`;
|
|
644
|
-
})
|
|
645
|
-
.join('\n');
|
|
646
|
-
return `object containing:\n${props}`;
|
|
647
|
-
}
|
|
648
|
-
if (obj.anyOf) {
|
|
649
|
-
return `one of: ${obj.anyOf.map((subSchema) => describe(subSchema, depth)).join(', ')}`;
|
|
650
|
-
}
|
|
651
|
-
return 'value';
|
|
652
|
-
}
|
|
653
|
-
return describe(schema, 0);
|
|
654
|
-
}
|
|
655
|
-
_getPatternAttribute(pattern, attribute) {
|
|
656
|
-
// Navigate schema using pattern and return specified attribute
|
|
657
|
-
const currentSchema = this._expandedObjectSchema;
|
|
658
|
-
// Special case: "*" means the root schema itself
|
|
659
|
-
if (pattern.trim() === '*') {
|
|
660
|
-
if (attribute === 'X-FieldPrompt') {
|
|
661
|
-
return currentSchema[attribute] || currentSchema.description || null;
|
|
662
|
-
}
|
|
663
|
-
if (attribute === 'type') {
|
|
664
|
-
return this.schemaToTsType(currentSchema);
|
|
665
|
-
}
|
|
666
|
-
return currentSchema[attribute] || null;
|
|
667
|
-
}
|
|
668
|
-
const parts = pattern.split('.');
|
|
669
|
-
let current = currentSchema;
|
|
670
|
-
let index = 0;
|
|
671
|
-
while (index < parts.length) {
|
|
672
|
-
const part = parts[index];
|
|
673
|
-
if (part === '*' || /^\d+$/.test(part)) {
|
|
674
|
-
// Handle wildcard case for arrays
|
|
675
|
-
if (current.items) {
|
|
676
|
-
current = current.items;
|
|
677
|
-
index++;
|
|
678
|
-
}
|
|
679
|
-
else {
|
|
680
|
-
// Invalid use of "*" for the current schema
|
|
681
|
-
return null;
|
|
682
|
-
}
|
|
683
|
-
}
|
|
684
|
-
else if (current.properties && part in current.properties) {
|
|
685
|
-
// Handle normal property navigation
|
|
686
|
-
current = current.properties[part];
|
|
687
|
-
index++;
|
|
688
|
-
}
|
|
689
|
-
else {
|
|
690
|
-
// Cannot navigate further; invalid pattern
|
|
691
|
-
return null;
|
|
692
|
-
}
|
|
693
|
-
}
|
|
694
|
-
// At this point, we've navigated to the target node
|
|
695
|
-
if (attribute === 'X-FieldPrompt') {
|
|
696
|
-
return current[attribute] || current.description || null;
|
|
697
|
-
}
|
|
698
|
-
else if (attribute === 'type') {
|
|
699
|
-
return this.schemaToTsType(current);
|
|
700
|
-
}
|
|
701
|
-
return current[attribute] || null;
|
|
702
|
-
}
|
|
703
|
-
_setPatternAttribute(pattern, attribute, value) {
|
|
704
|
-
// Navigate schema using pattern and set attribute at target location
|
|
705
|
-
let current = this.json_schema;
|
|
706
|
-
const definitions = this.json_schema.$defs || {};
|
|
707
|
-
const parts = pattern.split('.');
|
|
708
|
-
if (pattern.trim() === '*') {
|
|
709
|
-
// Special case: "*" means the root schema itself
|
|
710
|
-
current[attribute] = value;
|
|
711
|
-
return;
|
|
712
|
-
}
|
|
713
|
-
if (attribute === 'X-SystemPrompt') {
|
|
714
|
-
throw new Error('Cannot set the X-SystemPrompt attribute other than at the root schema.');
|
|
715
|
-
}
|
|
716
|
-
let index = 0;
|
|
717
|
-
while (index < parts.length) {
|
|
718
|
-
const part = parts[index];
|
|
719
|
-
if (part === '*' || /^\d+$/.test(part)) {
|
|
720
|
-
// Handle the array case
|
|
721
|
-
if (current.items) {
|
|
722
|
-
current = current.items;
|
|
723
|
-
index++;
|
|
724
|
-
}
|
|
725
|
-
else {
|
|
726
|
-
return; // Invalid pattern for the current schema
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
else if (current.properties && part in current.properties) {
|
|
730
|
-
// Handle the properties case
|
|
731
|
-
current = current.properties[part];
|
|
732
|
-
index++;
|
|
733
|
-
}
|
|
734
|
-
else if (current.$ref) {
|
|
735
|
-
// Handle the $ref case
|
|
736
|
-
const ref = current.$ref;
|
|
737
|
-
if (!ref.startsWith('#/$defs/')) {
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
740
|
-
const refName = ref.substring('#/$defs/'.length);
|
|
741
|
-
if (!definitions[refName]) {
|
|
742
|
-
return;
|
|
743
|
-
}
|
|
744
|
-
// Count how many times this ref is used in the entire schema
|
|
745
|
-
const refCount = JSON.stringify(this.json_schema).split(`"${ref}"`).length - 1;
|
|
746
|
-
if (refCount > 1) {
|
|
747
|
-
// Create a unique copy name by appending a number
|
|
748
|
-
let copyNum = 1;
|
|
749
|
-
let nextCopyName = `${refName}Copy${copyNum}`;
|
|
750
|
-
while (definitions[nextCopyName]) {
|
|
751
|
-
copyNum++;
|
|
752
|
-
nextCopyName = `${refName}Copy${copyNum}`;
|
|
753
|
-
}
|
|
754
|
-
// Create a copy of the definition
|
|
755
|
-
const defCopy = JSON.parse(JSON.stringify(definitions[refName]));
|
|
756
|
-
// Change the title and name of the definition
|
|
757
|
-
if (defCopy.title) {
|
|
758
|
-
defCopy.title = `${defCopy.title} Copy ${copyNum}`;
|
|
759
|
-
}
|
|
760
|
-
if (defCopy.name) {
|
|
761
|
-
defCopy.name = nextCopyName;
|
|
762
|
-
}
|
|
763
|
-
// Add the new copy to definitions
|
|
764
|
-
definitions[nextCopyName] = defCopy;
|
|
765
|
-
// Update the reference
|
|
766
|
-
current.$ref = `#/$defs/${nextCopyName}`;
|
|
767
|
-
current = definitions[nextCopyName];
|
|
768
|
-
}
|
|
769
|
-
else {
|
|
770
|
-
// Reference is used only once; directly navigate to the definition
|
|
771
|
-
current = definitions[refName];
|
|
772
|
-
}
|
|
773
|
-
}
|
|
774
|
-
else {
|
|
775
|
-
// Cannot navigate further; invalid pattern
|
|
776
|
-
return;
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
// Once we have navigated to the correct node, set the attribute
|
|
780
|
-
current[attribute] = value;
|
|
781
|
-
}
|
|
782
|
-
schemaToTsType(schema) {
|
|
783
|
-
// Convert JSON schema type to TypeScript type representation
|
|
784
|
-
if (!schema || typeof schema !== 'object') {
|
|
785
|
-
return 'any';
|
|
786
|
-
}
|
|
787
|
-
if (schema.type === 'string')
|
|
788
|
-
return 'string';
|
|
789
|
-
if (schema.type === 'number' || schema.type === 'integer')
|
|
790
|
-
return 'number';
|
|
791
|
-
if (schema.type === 'boolean')
|
|
792
|
-
return 'boolean';
|
|
793
|
-
if (schema.type === 'null')
|
|
794
|
-
return 'null';
|
|
795
|
-
if (schema.type === 'array') {
|
|
796
|
-
const itemType = schema.items ? this.schemaToTsType(schema.items) : 'any';
|
|
797
|
-
return `${itemType}[]`;
|
|
798
|
-
}
|
|
799
|
-
if (schema.type === 'object') {
|
|
800
|
-
if (schema.properties) {
|
|
801
|
-
const props = Object.entries(schema.properties)
|
|
802
|
-
.map(([key, prop]) => {
|
|
803
|
-
const optional = !schema.required?.includes(key) ? '?' : '';
|
|
804
|
-
const type = this.schemaToTsType(prop);
|
|
805
|
-
return `${key}${optional}: ${type}`;
|
|
806
|
-
})
|
|
807
|
-
.join('; ');
|
|
808
|
-
return `{ ${props} }`;
|
|
809
|
-
}
|
|
810
|
-
return 'object';
|
|
811
|
-
}
|
|
812
|
-
if (schema.anyOf) {
|
|
813
|
-
return schema.anyOf.map((subSchema) => this.schemaToTsType(subSchema)).join(' | ');
|
|
814
|
-
}
|
|
815
|
-
if (schema.enum) {
|
|
816
|
-
return schema.enum.map((e) => typeof e === 'string' ? `"${e}"` : String(e)).join(' | ');
|
|
817
|
-
}
|
|
818
|
-
return 'any';
|
|
819
|
-
}
|
|
820
|
-
mergeAllOfSchemas(allOfSchemas) {
|
|
821
|
-
// Merge multiple schemas from allOf into a single schema
|
|
822
|
-
const merged = {};
|
|
823
|
-
for (const subschema of allOfSchemas) {
|
|
824
|
-
if (subschema.$ref) {
|
|
825
|
-
// Handle $ref within allOf - this would need to be resolved first
|
|
826
|
-
// For now, we'll include the $ref as-is
|
|
827
|
-
Object.assign(merged, subschema);
|
|
828
|
-
}
|
|
829
|
-
else {
|
|
830
|
-
// Merge properties, required fields, etc.
|
|
831
|
-
if (subschema.type && !merged.type) {
|
|
832
|
-
merged.type = subschema.type;
|
|
833
|
-
}
|
|
834
|
-
if (subschema.properties) {
|
|
835
|
-
if (!merged.properties) {
|
|
836
|
-
merged.properties = {};
|
|
837
|
-
}
|
|
838
|
-
Object.assign(merged.properties, subschema.properties);
|
|
839
|
-
}
|
|
840
|
-
if (subschema.required) {
|
|
841
|
-
if (!merged.required) {
|
|
842
|
-
merged.required = [];
|
|
843
|
-
}
|
|
844
|
-
// Merge required arrays, avoiding duplicates
|
|
845
|
-
for (const field of subschema.required) {
|
|
846
|
-
if (!merged.required.includes(field)) {
|
|
847
|
-
merged.required.push(field);
|
|
848
|
-
}
|
|
849
|
-
}
|
|
850
|
-
}
|
|
851
|
-
// Copy other schema properties
|
|
852
|
-
for (const [key, value] of Object.entries(subschema)) {
|
|
853
|
-
if (!['type', 'properties', 'required'].includes(key) && !merged[key]) {
|
|
854
|
-
merged[key] = value;
|
|
855
|
-
}
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
}
|
|
859
|
-
return merged;
|
|
860
|
-
}
|
|
861
|
-
}
|