@retab/node 0.0.48 → 0.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +8 -215
  2. package/dist/api/client.d.ts +2 -2
  3. package/dist/api/client.d.ts.map +1 -1
  4. package/dist/api/client.js +2 -2
  5. package/dist/api/documents/client.d.ts +3 -3
  6. package/dist/api/documents/client.d.ts.map +1 -1
  7. package/dist/api/documents/client.js +3 -3
  8. package/dist/api/projects/client.d.ts +15 -0
  9. package/dist/api/projects/client.d.ts.map +1 -0
  10. package/dist/api/projects/client.js +43 -0
  11. package/dist/api/projects/documents/client.d.ts +12 -0
  12. package/dist/api/projects/documents/client.d.ts.map +1 -0
  13. package/dist/api/projects/documents/client.js +39 -0
  14. package/dist/api/projects/iterations/client.d.ts +17 -0
  15. package/dist/api/projects/iterations/client.d.ts.map +1 -0
  16. package/dist/api/projects/iterations/client.js +64 -0
  17. package/dist/client.d.ts +1 -0
  18. package/dist/client.d.ts.map +1 -1
  19. package/dist/client.js +6 -1
  20. package/dist/generated_types.d.ts +17837 -40090
  21. package/dist/generated_types.d.ts.map +1 -1
  22. package/dist/generated_types.js +309 -979
  23. package/dist/index.d.ts +7 -2
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +2 -2
  26. package/dist/types.d.ts +188 -80
  27. package/dist/types.d.ts.map +1 -1
  28. package/dist/types.js +22 -1
  29. package/package.json +6 -9
  30. package/dist/api/consensus/client.d.ts +0 -7
  31. package/dist/api/consensus/client.d.ts.map +0 -1
  32. package/dist/api/consensus/client.js +0 -14
  33. package/dist/errors.d.ts +0 -34
  34. package/dist/errors.d.ts.map +0 -1
  35. package/dist/errors.js +0 -53
  36. package/dist/resource.d.ts +0 -12
  37. package/dist/resource.d.ts.map +0 -1
  38. package/dist/resource.js +0 -19
  39. package/dist/resources/consensus/completions.d.ts +0 -66
  40. package/dist/resources/consensus/completions.d.ts.map +0 -1
  41. package/dist/resources/consensus/completions.js +0 -84
  42. package/dist/resources/consensus/index.d.ts +0 -72
  43. package/dist/resources/consensus/index.d.ts.map +0 -1
  44. package/dist/resources/consensus/index.js +0 -76
  45. package/dist/resources/consensus/responses.d.ts +0 -69
  46. package/dist/resources/consensus/responses.d.ts.map +0 -1
  47. package/dist/resources/consensus/responses.js +0 -99
  48. package/dist/resources/documents/extractions.d.ts +0 -74
  49. package/dist/resources/documents/extractions.d.ts.map +0 -1
  50. package/dist/resources/documents/extractions.js +0 -196
  51. package/dist/resources/documents/index.d.ts +0 -21
  52. package/dist/resources/documents/index.d.ts.map +0 -1
  53. package/dist/resources/documents/index.js +0 -55
  54. package/dist/resources/evaluations/documents.d.ts +0 -40
  55. package/dist/resources/evaluations/documents.d.ts.map +0 -1
  56. package/dist/resources/evaluations/documents.js +0 -123
  57. package/dist/resources/evaluations/index.d.ts +0 -14
  58. package/dist/resources/evaluations/index.d.ts.map +0 -1
  59. package/dist/resources/evaluations/index.js +0 -17
  60. package/dist/resources/evaluations/iterations.d.ts +0 -50
  61. package/dist/resources/evaluations/iterations.d.ts.map +0 -1
  62. package/dist/resources/evaluations/iterations.js +0 -156
  63. package/dist/resources/files.d.ts +0 -82
  64. package/dist/resources/files.d.ts.map +0 -1
  65. package/dist/resources/files.js +0 -150
  66. package/dist/resources/finetuning.d.ts +0 -105
  67. package/dist/resources/finetuning.d.ts.map +0 -1
  68. package/dist/resources/finetuning.js +0 -181
  69. package/dist/resources/index.d.ts +0 -11
  70. package/dist/resources/index.d.ts.map +0 -1
  71. package/dist/resources/index.js +0 -10
  72. package/dist/resources/models.d.ts +0 -57
  73. package/dist/resources/models.d.ts.map +0 -1
  74. package/dist/resources/models.js +0 -72
  75. package/dist/resources/processors/automations/endpoints.d.ts +0 -90
  76. package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
  77. package/dist/resources/processors/automations/endpoints.js +0 -145
  78. package/dist/resources/processors/automations/index.d.ts +0 -7
  79. package/dist/resources/processors/automations/index.d.ts.map +0 -1
  80. package/dist/resources/processors/automations/index.js +0 -6
  81. package/dist/resources/processors/automations/links.d.ts +0 -90
  82. package/dist/resources/processors/automations/links.d.ts.map +0 -1
  83. package/dist/resources/processors/automations/links.js +0 -149
  84. package/dist/resources/processors/automations/logs.d.ts +0 -35
  85. package/dist/resources/processors/automations/logs.d.ts.map +0 -1
  86. package/dist/resources/processors/automations/logs.js +0 -60
  87. package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
  88. package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
  89. package/dist/resources/processors/automations/mailboxes.js +0 -157
  90. package/dist/resources/processors/automations/outlook.d.ts +0 -114
  91. package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
  92. package/dist/resources/processors/automations/outlook.js +0 -170
  93. package/dist/resources/processors/automations/tests.d.ts +0 -58
  94. package/dist/resources/processors/automations/tests.d.ts.map +0 -1
  95. package/dist/resources/processors/automations/tests.js +0 -90
  96. package/dist/resources/processors/index.d.ts +0 -303
  97. package/dist/resources/processors/index.d.ts.map +0 -1
  98. package/dist/resources/processors/index.js +0 -261
  99. package/dist/resources/schemas.d.ts +0 -63
  100. package/dist/resources/schemas.d.ts.map +0 -1
  101. package/dist/resources/schemas.js +0 -183
  102. package/dist/resources/secrets/external_api_keys.d.ts +0 -61
  103. package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
  104. package/dist/resources/secrets/external_api_keys.js +0 -120
  105. package/dist/resources/secrets/index.d.ts +0 -14
  106. package/dist/resources/secrets/index.d.ts.map +0 -1
  107. package/dist/resources/secrets/index.js +0 -17
  108. package/dist/resources/secrets/webhooks.d.ts +0 -73
  109. package/dist/resources/secrets/webhooks.d.ts.map +0 -1
  110. package/dist/resources/secrets/webhooks.js +0 -145
  111. package/dist/resources/usage.d.ts +0 -223
  112. package/dist/resources/usage.d.ts.map +0 -1
  113. package/dist/resources/usage.js +0 -310
  114. package/dist/types/ai_models.d.ts +0 -389
  115. package/dist/types/ai_models.d.ts.map +0 -1
  116. package/dist/types/ai_models.js +0 -145
  117. package/dist/types/automations/cron.d.ts +0 -28
  118. package/dist/types/automations/cron.d.ts.map +0 -1
  119. package/dist/types/automations/cron.js +0 -1
  120. package/dist/types/automations/endpoints.d.ts +0 -13
  121. package/dist/types/automations/endpoints.d.ts.map +0 -1
  122. package/dist/types/automations/endpoints.js +0 -1
  123. package/dist/types/automations/index.d.ts +0 -7
  124. package/dist/types/automations/index.d.ts.map +0 -1
  125. package/dist/types/automations/index.js +0 -6
  126. package/dist/types/automations/links.d.ts +0 -15
  127. package/dist/types/automations/links.d.ts.map +0 -1
  128. package/dist/types/automations/links.js +0 -1
  129. package/dist/types/automations/mailboxes.d.ts +0 -18
  130. package/dist/types/automations/mailboxes.d.ts.map +0 -1
  131. package/dist/types/automations/mailboxes.js +0 -1
  132. package/dist/types/automations/outlook.d.ts +0 -37
  133. package/dist/types/automations/outlook.d.ts.map +0 -1
  134. package/dist/types/automations/outlook.js +0 -1
  135. package/dist/types/automations/webhooks.d.ts +0 -13
  136. package/dist/types/automations/webhooks.d.ts.map +0 -1
  137. package/dist/types/automations/webhooks.js +0 -1
  138. package/dist/types/browser_canvas.d.ts +0 -4
  139. package/dist/types/browser_canvas.d.ts.map +0 -1
  140. package/dist/types/browser_canvas.js +0 -2
  141. package/dist/types/chat.d.ts +0 -99
  142. package/dist/types/chat.d.ts.map +0 -1
  143. package/dist/types/chat.js +0 -20
  144. package/dist/types/consensus.d.ts +0 -10
  145. package/dist/types/consensus.d.ts.map +0 -1
  146. package/dist/types/consensus.js +0 -1
  147. package/dist/types/db/annotations.d.ts +0 -108
  148. package/dist/types/db/annotations.d.ts.map +0 -1
  149. package/dist/types/db/annotations.js +0 -6
  150. package/dist/types/db/files.d.ts +0 -133
  151. package/dist/types/db/files.d.ts.map +0 -1
  152. package/dist/types/db/files.js +0 -5
  153. package/dist/types/documents/extractions.d.ts +0 -1849
  154. package/dist/types/documents/extractions.d.ts.map +0 -1
  155. package/dist/types/documents/extractions.js +0 -211
  156. package/dist/types/documents/processing.d.ts +0 -249
  157. package/dist/types/documents/processing.d.ts.map +0 -1
  158. package/dist/types/documents/processing.js +0 -6
  159. package/dist/types/evaluations/iterations.d.ts +0 -41
  160. package/dist/types/evaluations/iterations.d.ts.map +0 -1
  161. package/dist/types/evaluations/iterations.js +0 -1
  162. package/dist/types/jobs/base.d.ts +0 -162
  163. package/dist/types/jobs/base.d.ts.map +0 -1
  164. package/dist/types/jobs/base.js +0 -6
  165. package/dist/types/jobs/specialized.d.ts +0 -200
  166. package/dist/types/jobs/specialized.d.ts.map +0 -1
  167. package/dist/types/jobs/specialized.js +0 -37
  168. package/dist/types/logs.d.ts +0 -92
  169. package/dist/types/logs.d.ts.map +0 -1
  170. package/dist/types/logs.js +0 -1
  171. package/dist/types/mime.d.ts +0 -426
  172. package/dist/types/mime.d.ts.map +0 -1
  173. package/dist/types/mime.js +0 -48
  174. package/dist/types/modalities.d.ts +0 -31
  175. package/dist/types/modalities.d.ts.map +0 -1
  176. package/dist/types/modalities.js +0 -109
  177. package/dist/types/pagination.d.ts +0 -5
  178. package/dist/types/pagination.d.ts.map +0 -1
  179. package/dist/types/pagination.js +0 -1
  180. package/dist/types/schemas/enhancement.d.ts +0 -250
  181. package/dist/types/schemas/enhancement.d.ts.map +0 -1
  182. package/dist/types/schemas/enhancement.js +0 -6
  183. package/dist/types/schemas/generate.d.ts +0 -160
  184. package/dist/types/schemas/generate.d.ts.map +0 -1
  185. package/dist/types/schemas/generate.js +0 -19
  186. package/dist/types/schemas/object.d.ts +0 -116
  187. package/dist/types/schemas/object.d.ts.map +0 -1
  188. package/dist/types/schemas/object.js +0 -861
  189. package/dist/types/secrets/external_api_keys.d.ts +0 -27
  190. package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
  191. package/dist/types/secrets/external_api_keys.js +0 -11
  192. package/dist/types/secrets/index.d.ts +0 -2
  193. package/dist/types/secrets/index.d.ts.map +0 -1
  194. package/dist/types/secrets/index.js +0 -1
  195. package/dist/types/standards.d.ts +0 -37
  196. package/dist/types/standards.d.ts.map +0 -1
  197. package/dist/types/standards.js +0 -1
  198. package/dist/utils/ai_models.d.ts +0 -10
  199. package/dist/utils/ai_models.d.ts.map +0 -1
  200. package/dist/utils/ai_models.js +0 -183
  201. package/dist/utils/batch_processing.d.ts +0 -227
  202. package/dist/utils/batch_processing.d.ts.map +0 -1
  203. package/dist/utils/batch_processing.js +0 -268
  204. package/dist/utils/benchmarking.d.ts +0 -115
  205. package/dist/utils/benchmarking.d.ts.map +0 -1
  206. package/dist/utils/benchmarking.js +0 -355
  207. package/dist/utils/chat.d.ts +0 -70
  208. package/dist/utils/chat.d.ts.map +0 -1
  209. package/dist/utils/chat.js +0 -79
  210. package/dist/utils/cost_calculation.d.ts +0 -26
  211. package/dist/utils/cost_calculation.d.ts.map +0 -1
  212. package/dist/utils/cost_calculation.js +0 -99
  213. package/dist/utils/datasets.d.ts +0 -135
  214. package/dist/utils/datasets.d.ts.map +0 -1
  215. package/dist/utils/datasets.js +0 -359
  216. package/dist/utils/display.d.ts +0 -108
  217. package/dist/utils/display.d.ts.map +0 -1
  218. package/dist/utils/display.js +0 -244
  219. package/dist/utils/hash.d.ts +0 -18
  220. package/dist/utils/hash.d.ts.map +0 -1
  221. package/dist/utils/hash.js +0 -31
  222. package/dist/utils/hashing.d.ts +0 -18
  223. package/dist/utils/hashing.d.ts.map +0 -1
  224. package/dist/utils/hashing.js +0 -28
  225. package/dist/utils/index.d.ts +0 -8
  226. package/dist/utils/index.d.ts.map +0 -1
  227. package/dist/utils/index.js +0 -10
  228. package/dist/utils/json_schema.d.ts +0 -18
  229. package/dist/utils/json_schema.d.ts.map +0 -1
  230. package/dist/utils/json_schema.js +0 -334
  231. package/dist/utils/json_schema_utils.d.ts +0 -42
  232. package/dist/utils/json_schema_utils.d.ts.map +0 -1
  233. package/dist/utils/json_schema_utils.js +0 -212
  234. package/dist/utils/jsonl.d.ts +0 -60
  235. package/dist/utils/jsonl.d.ts.map +0 -1
  236. package/dist/utils/jsonl.js +0 -259
  237. package/dist/utils/mime.d.ts +0 -6
  238. package/dist/utils/mime.d.ts.map +0 -1
  239. package/dist/utils/mime.js +0 -129
  240. package/dist/utils/model_cards.d.ts +0 -219
  241. package/dist/utils/model_cards.d.ts.map +0 -1
  242. package/dist/utils/model_cards.js +0 -462
  243. package/dist/utils/prompt_optimization.d.ts +0 -96
  244. package/dist/utils/prompt_optimization.d.ts.map +0 -1
  245. package/dist/utils/prompt_optimization.js +0 -275
  246. package/dist/utils/responses.d.ts +0 -35
  247. package/dist/utils/responses.d.ts.map +0 -1
  248. package/dist/utils/responses.js +0 -37
  249. package/dist/utils/stream.d.ts +0 -13
  250. package/dist/utils/stream.d.ts.map +0 -1
  251. package/dist/utils/stream.js +0 -64
  252. package/dist/utils/stream_context_managers.d.ts +0 -147
  253. package/dist/utils/stream_context_managers.d.ts.map +0 -1
  254. package/dist/utils/stream_context_managers.js +0 -380
  255. package/dist/utils/usage.d.ts +0 -57
  256. package/dist/utils/usage.d.ts.map +0 -1
  257. package/dist/utils/usage.js +0 -97
  258. package/dist/utils/webhook_secrets.d.ts +0 -59
  259. package/dist/utils/webhook_secrets.d.ts.map +0 -1
  260. package/dist/utils/webhook_secrets.js +0 -107
  261. package/dist/utils/zod_to_json_schema.d.ts +0 -11
  262. package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
  263. package/dist/utils/zod_to_json_schema.js +0 -123
@@ -1,135 +0,0 @@
1
- import { SyncAPIResource, AsyncAPIResource } from '../resource.js';
2
- import { DatasetMetrics } from './display.js';
3
- /**
4
- * Advanced Dataset management utilities for ML training workflows
5
- * Equivalent to Python's jsonlUtils.py
6
- */
7
- export interface FinetuningJSON {
8
- messages: Array<{
9
- role: 'system' | 'user' | 'assistant';
10
- content: string;
11
- }>;
12
- }
13
- export interface DocumentAnnotationPair {
14
- document: string | Buffer;
15
- annotation: Record<string, any>;
16
- }
17
- export interface BatchJSONLRequest {
18
- custom_id: string;
19
- method: 'POST';
20
- url: string;
21
- body: Record<string, any>;
22
- }
23
- export interface BatchJSONLResponse {
24
- id: string;
25
- custom_id: string;
26
- response: {
27
- status_code: number;
28
- request_id: string;
29
- body: Record<string, any>;
30
- };
31
- error?: {
32
- code: string;
33
- message: string;
34
- };
35
- }
36
- export interface AnnotationOptions {
37
- model?: string;
38
- temperature?: number;
39
- modality?: 'native' | 'text';
40
- maxConcurrency?: number;
41
- reasoning_effort?: 'low' | 'medium' | 'high';
42
- provider?: 'openai' | 'anthropic' | 'xai' | 'gemini';
43
- idempotencyKey?: string;
44
- }
45
- export interface SaveOptions {
46
- modality?: 'native' | 'text';
47
- imageResolutionDpi?: number;
48
- browserCanvas?: 'A3' | 'A4' | 'A5';
49
- }
50
- export declare class BaseDatasetsMixin {
51
- /**
52
- * Process dataset and compute comprehensive metrics
53
- */
54
- pprint(datasetPath: string, inputTokenPrice?: number, outputTokenPrice?: number): Promise<DatasetMetrics>;
55
- /**
56
- * Save document-annotation pairs as JSONL training dataset
57
- */
58
- save(jsonSchema: Record<string, any> | string, documentAnnotationPairsPaths: Array<{
59
- document: string;
60
- annotation: string;
61
- }>, datasetPath: string, options?: SaveOptions): Promise<void>;
62
- /**
63
- * Change schema in existing dataset
64
- */
65
- changeSchema(inputDatasetPath: string, jsonSchema: Record<string, any> | string, outputDatasetPath?: string, inplace?: boolean): Promise<void>;
66
- /**
67
- * Stitch multiple documents and save as dataset
68
- */
69
- stitchAndSave(jsonSchema: Record<string, any> | string, pairsPaths: Array<{
70
- documents: string[];
71
- annotation: string;
72
- }>, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
73
- /**
74
- * Generate annotations for documents using AI models
75
- */
76
- annotate(jsonSchema: Record<string, any> | string, documents: string[], datasetPath: string, options?: AnnotationOptions): Promise<void>;
77
- /**
78
- * Update existing annotations with new model/schema
79
- */
80
- updateAnnotations(jsonSchema: Record<string, any> | string, oldDatasetPath: string, newDatasetPath: string, options?: AnnotationOptions): Promise<void>;
81
- /**
82
- * Save batch annotation requests for OpenAI Batch API
83
- */
84
- saveBatchAnnotateRequests(jsonSchema: Record<string, any> | string, documents: string[], batchRequestsPath: string, options?: AnnotationOptions): Promise<void>;
85
- /**
86
- * Build dataset from batch API results
87
- */
88
- buildDatasetFromBatchResults(jsonSchema: Record<string, any> | string, batchResultsPath: string, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
89
- private createSystemMessage;
90
- private createUserMessage;
91
- private createMultiDocumentUserMessage;
92
- private generateAnnotation;
93
- private generateAnnotationFromUserMessage;
94
- }
95
- export declare class Datasets extends SyncAPIResource {
96
- private mixin;
97
- pprint(datasetPath: string, inputTokenPrice?: number, outputTokenPrice?: number): Promise<DatasetMetrics>;
98
- save(jsonSchema: Record<string, any> | string, documentAnnotationPairsPaths: Array<{
99
- document: string;
100
- annotation: string;
101
- }>, datasetPath: string, options?: SaveOptions): Promise<void>;
102
- changeSchema(inputDatasetPath: string, jsonSchema: Record<string, any> | string, outputDatasetPath?: string, inplace?: boolean): Promise<void>;
103
- stitchAndSave(jsonSchema: Record<string, any> | string, pairsPaths: Array<{
104
- documents: string[];
105
- annotation: string;
106
- }>, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
107
- annotate(jsonSchema: Record<string, any> | string, documents: string[], datasetPath: string, options?: AnnotationOptions): Promise<void>;
108
- updateAnnotations(jsonSchema: Record<string, any> | string, oldDatasetPath: string, newDatasetPath: string, options?: AnnotationOptions): Promise<void>;
109
- saveBatchAnnotateRequests(jsonSchema: Record<string, any> | string, documents: string[], batchRequestsPath: string, options?: AnnotationOptions): Promise<void>;
110
- buildDatasetFromBatchResults(jsonSchema: Record<string, any> | string, batchResultsPath: string, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
111
- }
112
- export declare class AsyncDatasets extends AsyncAPIResource {
113
- private mixin;
114
- pprint(datasetPath: string, inputTokenPrice?: number, outputTokenPrice?: number): Promise<DatasetMetrics>;
115
- save(jsonSchema: Record<string, any> | string, documentAnnotationPairsPaths: Array<{
116
- document: string;
117
- annotation: string;
118
- }>, datasetPath: string, options?: SaveOptions): Promise<void>;
119
- changeSchema(inputDatasetPath: string, jsonSchema: Record<string, any> | string, outputDatasetPath?: string, inplace?: boolean): Promise<void>;
120
- stitchAndSave(jsonSchema: Record<string, any> | string, pairsPaths: Array<{
121
- documents: string[];
122
- annotation: string;
123
- }>, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
124
- annotate(jsonSchema: Record<string, any> | string, documents: string[], datasetPath: string, options?: AnnotationOptions): Promise<void>;
125
- updateAnnotations(jsonSchema: Record<string, any> | string, oldDatasetPath: string, newDatasetPath: string, options?: AnnotationOptions): Promise<void>;
126
- saveBatchAnnotateRequests(jsonSchema: Record<string, any> | string, documents: string[], batchRequestsPath: string, options?: AnnotationOptions): Promise<void>;
127
- buildDatasetFromBatchResults(jsonSchema: Record<string, any> | string, batchResultsPath: string, datasetPath: string, modality?: 'native' | 'text'): Promise<void>;
128
- }
129
- declare const _default: {
130
- Datasets: typeof Datasets;
131
- AsyncDatasets: typeof AsyncDatasets;
132
- BaseDatasetsMixin: typeof BaseDatasetsMixin;
133
- };
134
- export default _default;
135
- //# sourceMappingURL=datasets.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"datasets.d.ts","sourceRoot":"","sources":["../../src/utils/datasets.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEnE,OAAO,EAAmD,cAAc,EAAE,MAAM,cAAc,CAAC;AAE/F;;;GAGG;AAEH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;QACtC,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAC3B,CAAC;IACF,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC7C,QAAQ,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,GAAG,QAAQ,CAAC;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC;IAC7B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,aAAa,CAAC,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;CACpC;AAED,qBAAa,iBAAiB;IAC5B;;OAEG;IACG,MAAM,CACV,WAAW,EAAE,MAAM,EACnB,eAAe,GAAE,MAAgB,EACjC,gBAAgB,GAAE,MAAe,GAChC,OAAO,CAAC,cAAc,CAAC;IAe1B;;OAEG;IACG,IAAI,CACR,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,4BAA4B,EAAE,KAAK,CAAC;QAClC,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,EACF,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,WAAgB,GACxB,OAAO,CAAC,IAAI,CAAC;IAkChB;;OAEG;IACG,YAAY,CAChB,gBAAgB,EAAE,MAAM,EACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,OAAO,GAAE,OAAe,GACvB,OAAO,CAAC,IAAI,CAAC;IAkChB;;OAEG;IACG,aAAa,CACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,UAAU,EAAE,KAAK,CAAC;QAChB,SAAS,EAAE,MAAM,EAAE,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,EACF,WAAW,EAAE,MAAM,EACnB,QAAQ,GAAE,QAAQ,GAAG,MAAiB,GACrC,OAAO,CAAC,IAAI,CAAC;IAkChB;;OAEG;IACG,QAAQ,CACZ,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,IAAI,CAAC;IAyDhB;;OAEG;IACG,iBAAiB,CACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,cAAc,EAAE,MAAM,EACtB,cAAc,EAAE,MAAM,EACtB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,IAAI,CAAC;IAoDhB;;OAEG;IACG,yBAAyB,CAC7B,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,iBAAiB,EAAE,MAAM,EACzB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,IAAI,CAAC;IA+BhB;;OAEG;IACG,4BAA4B,CAChC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,gBAAgB,EAAE,MAAM,EACxB,WAAW,EAAE,MAAM,EACnB,QAAQ,GAAE,QAAQ,GAAG,MAAiB,GACrC,OAAO,CAAC,IAAI,CAAC;IAoDhB,OAAO,CAAC,mBAAmB;YAab,iBAAiB;YAejB,8BAA8B;YAe9B,kBAAkB;YAclB,iCAAiC;CAQhD;AAED,qBAAa,QAAS,SAAQ,eAAe;IAC3C,OAAO,CAAC,KAAK,CAA2B;IAElC,MAAM,CACV,WAAW,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,EACxB,gBAAgB,CAAC,EAAE,MAAM,GACxB,OAAO,CAAC,cAAc,CAAC;IAIpB,IAAI,CACR,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,4BAA4B,EAAE,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAC7E,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,WAAW,GACpB,OAAO,CAAC,IAAI,CAAC;IAIV,YAAY,CAChB,gBAAgB,EAAE,MAAM,EACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,OAAO,CAAC,EAAE,OAAO,GAChB,OAAO,CAAC,IAAI,CAAC;IAIV,aAAa,CACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,UAAU,EAAE,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,EAAE,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAC9D,WAAW,EAAE,MAAM,EACnB,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAC3B,OAAO,CAAC,IAAI,CAAC;IAIV,QAAQ,CACZ,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,iBAAiB,CACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,cAAc,EAAE,MAAM,EACtB,cAAc,EAAE,MAAM,EACtB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,yBAAyB,CAC7B,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,iBAAiB,EAAE,MAAM,EACzB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,4BAA4B,CAChC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,gBAAgB,EAAE,MAAM,EACxB,WAAW,EAAE,MAAM,EACnB,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAC3B,OAAO,CAAC,IAAI,CAAC;CAGjB;AAED,qBAAa,aAAc,SAAQ,gBAAgB;IACjD,OAAO,CAAC,KAAK,CAA2B;IAElC,MAAM,CACV,WAAW,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,EACxB,gBAAgB,CAAC,EAAE,MAAM,GACxB,OAAO,CAAC,cAAc,CAAC;IAIpB,IAAI,CACR,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,4BAA4B,EAAE,KAAK,CAAC;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAC7E,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,WAAW,GACpB,OAAO,CAAC,IAAI,CAAC;IAIV,YAAY,CAChB,gBAAgB,EAAE,MAAM,EACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,OAAO,CAAC,EAAE,OAAO,GAChB,OAAO,CAAC,IAAI,CAAC;IAIV,aAAa,CACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,UAAU,EAAE,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,EAAE,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAC9D,WAAW,EAAE,MAAM,EACnB,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAC3B,OAAO,CAAC,IAAI,CAAC;IAIV,QAAQ,CACZ,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,iBAAiB,CACrB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,cAAc,EAAE,MAAM,EACtB,cAAc,EAAE,MAAM,EACtB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,yBAAyB,CAC7B,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,SAAS,EAAE,MAAM,EAAE,EACnB,iBAAiB,EAAE,MAAM,EACzB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,IAAI,CAAC;IAIV,4BAA4B,CAChC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,EACxC,gBAAgB,EAAE,MAAM,EACxB,WAAW,EAAE,MAAM,EACnB,QAAQ,CAAC,EAAE,QAAQ,GAAG,MAAM,GAC3B,OAAO,CAAC,IAAI,CAAC;CAGjB;;;;;;AAED,wBAIE"}
@@ -1,359 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
- import { SyncAPIResource, AsyncAPIResource } from '../resource.js';
4
- import { readJSONL, writeJSONL } from './jsonl.js';
5
- import { displayMetrics, processDatasetAndComputeMetrics } from './display.js';
6
- export class BaseDatasetsMixin {
7
- /**
8
- * Process dataset and compute comprehensive metrics
9
- */
10
- async pprint(datasetPath, inputTokenPrice = 0.00015, outputTokenPrice = 0.0006) {
11
- if (!fs.existsSync(datasetPath)) {
12
- throw new Error(`Dataset file not found: ${datasetPath}`);
13
- }
14
- const metrics = await processDatasetAndComputeMetrics(datasetPath, inputTokenPrice, outputTokenPrice);
15
- displayMetrics(metrics);
16
- return metrics;
17
- }
18
- /**
19
- * Save document-annotation pairs as JSONL training dataset
20
- */
21
- async save(jsonSchema, documentAnnotationPairsPaths, datasetPath, options = {}) {
22
- const { modality = 'native' } = options;
23
- const finetuningData = [];
24
- for (const { document: docPath, annotation: annPath } of documentAnnotationPairsPaths) {
25
- // Read document and annotation
26
- if (!fs.existsSync(docPath) || !fs.existsSync(annPath)) {
27
- throw new Error(`Document or annotation file not found: ${docPath}, ${annPath}`);
28
- }
29
- const annotation = JSON.parse(fs.readFileSync(annPath, 'utf-8'));
30
- // Create system message with schema
31
- const systemMessage = this.createSystemMessage(jsonSchema, modality);
32
- // Create user message with document
33
- const userMessage = await this.createUserMessage(docPath, modality, options);
34
- // Create assistant message with annotation
35
- const assistantMessage = {
36
- role: 'assistant',
37
- content: JSON.stringify(annotation),
38
- };
39
- finetuningData.push({
40
- messages: [systemMessage, userMessage, assistantMessage],
41
- });
42
- }
43
- // Write to JSONL file
44
- await writeJSONL(datasetPath, finetuningData);
45
- console.log(`✅ Dataset saved to ${datasetPath} with ${finetuningData.length} examples`);
46
- }
47
- /**
48
- * Change schema in existing dataset
49
- */
50
- async changeSchema(inputDatasetPath, jsonSchema, outputDatasetPath, inplace = false) {
51
- if (!fs.existsSync(inputDatasetPath)) {
52
- throw new Error(`Input dataset not found: ${inputDatasetPath}`);
53
- }
54
- const outputPath = inplace ? inputDatasetPath : (outputDatasetPath || inputDatasetPath);
55
- const tempPath = `${outputPath}.tmp`;
56
- try {
57
- const dataset = await readJSONL(inputDatasetPath);
58
- const newSystemMessage = this.createSystemMessage(jsonSchema, 'native');
59
- const updatedDataset = dataset.map((item) => ({
60
- ...item,
61
- messages: [
62
- newSystemMessage,
63
- ...item.messages.slice(1), // Keep user and assistant messages
64
- ],
65
- }));
66
- await writeJSONL(tempPath, updatedDataset);
67
- // Atomic move
68
- fs.renameSync(tempPath, outputPath);
69
- console.log(`✅ Schema updated in ${outputPath}`);
70
- }
71
- catch (error) {
72
- // Cleanup temp file on error
73
- if (fs.existsSync(tempPath)) {
74
- fs.unlinkSync(tempPath);
75
- }
76
- throw error;
77
- }
78
- }
79
- /**
80
- * Stitch multiple documents and save as dataset
81
- */
82
- async stitchAndSave(jsonSchema, pairsPaths, datasetPath, modality = 'native') {
83
- const finetuningData = [];
84
- for (const { documents: docPaths, annotation: annPath } of pairsPaths) {
85
- if (!fs.existsSync(annPath)) {
86
- throw new Error(`Annotation file not found: ${annPath}`);
87
- }
88
- // Verify all document files exist
89
- for (const docPath of docPaths) {
90
- if (!fs.existsSync(docPath)) {
91
- throw new Error(`Document file not found: ${docPath}`);
92
- }
93
- }
94
- const annotation = JSON.parse(fs.readFileSync(annPath, 'utf-8'));
95
- const systemMessage = this.createSystemMessage(jsonSchema, modality);
96
- const userMessage = await this.createMultiDocumentUserMessage(docPaths, modality);
97
- const assistantMessage = {
98
- role: 'assistant',
99
- content: JSON.stringify(annotation),
100
- };
101
- finetuningData.push({
102
- messages: [systemMessage, userMessage, assistantMessage],
103
- });
104
- }
105
- await writeJSONL(datasetPath, finetuningData);
106
- console.log(`✅ Stitched dataset saved to ${datasetPath} with ${finetuningData.length} examples`);
107
- }
108
- /**
109
- * Generate annotations for documents using AI models
110
- */
111
- async annotate(jsonSchema, documents, datasetPath, options = {}) {
112
- const { model = 'gpt-4o-mini', temperature = 0.0, modality = 'native', maxConcurrency = 5, reasoning_effort = 'medium', provider = 'openai', } = options;
113
- console.log(`🚀 Starting annotation of ${documents.length} documents...`);
114
- const finetuningData = [];
115
- const concurrencyLimit = Math.min(maxConcurrency, documents.length);
116
- // Process documents in batches
117
- for (let i = 0; i < documents.length; i += concurrencyLimit) {
118
- const batch = documents.slice(i, i + concurrencyLimit);
119
- const batchPromises = batch.map(async (docPath, index) => {
120
- const globalIndex = i + index;
121
- console.log(`📝 Processing document ${globalIndex + 1}/${documents.length}: ${path.basename(docPath)}`);
122
- try {
123
- const annotation = await this.generateAnnotation(jsonSchema, docPath, model, temperature, modality, reasoning_effort, provider);
124
- const systemMessage = this.createSystemMessage(jsonSchema, modality);
125
- const userMessage = await this.createUserMessage(docPath, modality);
126
- const assistantMessage = {
127
- role: 'assistant',
128
- content: JSON.stringify(annotation),
129
- };
130
- return {
131
- messages: [systemMessage, userMessage, assistantMessage],
132
- };
133
- }
134
- catch (error) {
135
- console.error(`❌ Failed to process ${docPath}:`, error);
136
- return null;
137
- }
138
- });
139
- const batchResults = await Promise.all(batchPromises);
140
- finetuningData.push(...batchResults.filter(result => result !== null));
141
- }
142
- await writeJSONL(datasetPath, finetuningData);
143
- console.log(`✅ Annotation complete! Generated ${finetuningData.length}/${documents.length} annotations`);
144
- }
145
- /**
146
- * Update existing annotations with new model/schema
147
- */
148
- async updateAnnotations(jsonSchema, oldDatasetPath, newDatasetPath, options = {}) {
149
- if (!fs.existsSync(oldDatasetPath)) {
150
- throw new Error(`Old dataset not found: ${oldDatasetPath}`);
151
- }
152
- console.log(`🔄 Updating annotations from ${oldDatasetPath}...`);
153
- const oldDataset = await readJSONL(oldDatasetPath);
154
- const updatedDataset = [];
155
- for (let i = 0; i < oldDataset.length; i++) {
156
- const item = oldDataset[i];
157
- console.log(`🔄 Updating annotation ${i + 1}/${oldDataset.length}`);
158
- try {
159
- // Extract document path from user message (this is simplified)
160
- const userContent = item.messages.find(m => m.role === 'user')?.content;
161
- if (!userContent) {
162
- console.warn(`⚠️ No user message found in item ${i + 1}, skipping`);
163
- continue;
164
- }
165
- // For this implementation, we assume the document is referenced in the user message
166
- // In practice, you'd need to store document paths or reconstruct them
167
- const newAnnotation = await this.generateAnnotationFromUserMessage(jsonSchema, userContent, options);
168
- const systemMessage = this.createSystemMessage(jsonSchema, options.modality || 'native');
169
- const assistantMessage = {
170
- role: 'assistant',
171
- content: JSON.stringify(newAnnotation),
172
- };
173
- updatedDataset.push({
174
- messages: [
175
- systemMessage,
176
- item.messages.find(m => m.role === 'user'),
177
- assistantMessage,
178
- ],
179
- });
180
- }
181
- catch (error) {
182
- console.error(`❌ Failed to update annotation ${i + 1}:`, error);
183
- }
184
- }
185
- await writeJSONL(newDatasetPath, updatedDataset);
186
- console.log(`✅ Updated ${updatedDataset.length}/${oldDataset.length} annotations`);
187
- }
188
- /**
189
- * Save batch annotation requests for OpenAI Batch API
190
- */
191
- async saveBatchAnnotateRequests(jsonSchema, documents, batchRequestsPath, options = {}) {
192
- const { model = 'gpt-4o-mini', temperature = 0.0, modality = 'native', } = options;
193
- const batchRequests = [];
194
- for (let i = 0; i < documents.length; i++) {
195
- const docPath = documents[i];
196
- const systemMessage = this.createSystemMessage(jsonSchema, modality);
197
- const userMessage = await this.createUserMessage(docPath, modality);
198
- batchRequests.push({
199
- custom_id: `doc_${i}_${path.basename(docPath, path.extname(docPath))}`,
200
- method: 'POST',
201
- url: '/v1/chat/completions',
202
- body: {
203
- model,
204
- messages: [systemMessage, userMessage],
205
- temperature,
206
- response_format: { type: 'json_object' },
207
- },
208
- });
209
- }
210
- await writeJSONL(batchRequestsPath, batchRequests);
211
- console.log(`✅ Saved ${batchRequests.length} batch requests to ${batchRequestsPath}`);
212
- }
213
- /**
214
- * Build dataset from batch API results
215
- */
216
- async buildDatasetFromBatchResults(jsonSchema, batchResultsPath, datasetPath, modality = 'native') {
217
- if (!fs.existsSync(batchResultsPath)) {
218
- throw new Error(`Batch results file not found: ${batchResultsPath}`);
219
- }
220
- const batchResults = await readJSONL(batchResultsPath);
221
- const finetuningData = [];
222
- for (const result of batchResults) {
223
- if (result.error) {
224
- console.warn(`⚠️ Skipping failed request ${result.custom_id}: ${result.error.message}`);
225
- continue;
226
- }
227
- const response = result.response.body;
228
- const content = response.choices?.[0]?.message?.content;
229
- if (!content) {
230
- console.warn(`⚠️ No content in response for ${result.custom_id}`);
231
- continue;
232
- }
233
- try {
234
- const annotation = JSON.parse(content);
235
- // Reconstruct messages (this is simplified)
236
- const systemMessage = this.createSystemMessage(jsonSchema, modality);
237
- // Extract user message from original request (would need to be stored)
238
- const userMessage = {
239
- role: 'user',
240
- content: `Document content for ${result.custom_id}`,
241
- };
242
- const assistantMessage = {
243
- role: 'assistant',
244
- content: JSON.stringify(annotation),
245
- };
246
- finetuningData.push({
247
- messages: [systemMessage, userMessage, assistantMessage],
248
- });
249
- }
250
- catch (error) {
251
- console.warn(`⚠️ Failed to parse annotation for ${result.custom_id}:`, error);
252
- }
253
- }
254
- await writeJSONL(datasetPath, finetuningData);
255
- console.log(`✅ Built dataset with ${finetuningData.length} examples from batch results`);
256
- }
257
- // Helper methods
258
- createSystemMessage(jsonSchema, _modality) {
259
- const schemaObj = typeof jsonSchema === 'string' ? JSON.parse(jsonSchema) : jsonSchema;
260
- const schemaStr = JSON.stringify(schemaObj, null, 2);
261
- return {
262
- role: 'system',
263
- content: `You are an expert data extraction assistant. Extract information from the provided document according to the following JSON schema:\n\n${schemaStr}\n\nReturn only valid JSON that matches the schema exactly.`,
264
- };
265
- }
266
- async createUserMessage(docPath, _modality, _options = {}) {
267
- // This is a simplified implementation
268
- // In practice, you'd handle different file types, base64 encoding, etc.
269
- const content = fs.readFileSync(docPath, 'utf-8');
270
- return {
271
- role: 'user',
272
- content: `Please extract data from this document:\n\n${content}`,
273
- };
274
- }
275
- async createMultiDocumentUserMessage(docPaths, _modality) {
276
- const contents = docPaths.map((docPath, index) => {
277
- const content = fs.readFileSync(docPath, 'utf-8');
278
- return `Document ${index + 1} (${path.basename(docPath)}):\n${content}`;
279
- }).join('\n\n---\n\n');
280
- return {
281
- role: 'user',
282
- content: `Please extract data from these documents:\n\n${contents}`,
283
- };
284
- }
285
- async generateAnnotation(_jsonSchema, _docPath, _model, _temperature, _modality, _reasoningEffort, _provider) {
286
- // This would integrate with the actual AI providers
287
- // For now, return a placeholder implementation
288
- throw new Error('AI provider integration not implemented in this version');
289
- }
290
- async generateAnnotationFromUserMessage(_jsonSchema, _userContent, _options) {
291
- // This would re-generate annotation from existing user message
292
- throw new Error('Annotation update not implemented in this version');
293
- }
294
- }
295
- export class Datasets extends SyncAPIResource {
296
- constructor() {
297
- super(...arguments);
298
- this.mixin = new BaseDatasetsMixin();
299
- }
300
- async pprint(datasetPath, inputTokenPrice, outputTokenPrice) {
301
- return this.mixin.pprint(datasetPath, inputTokenPrice, outputTokenPrice);
302
- }
303
- async save(jsonSchema, documentAnnotationPairsPaths, datasetPath, options) {
304
- return this.mixin.save(jsonSchema, documentAnnotationPairsPaths, datasetPath, options);
305
- }
306
- async changeSchema(inputDatasetPath, jsonSchema, outputDatasetPath, inplace) {
307
- return this.mixin.changeSchema(inputDatasetPath, jsonSchema, outputDatasetPath, inplace);
308
- }
309
- async stitchAndSave(jsonSchema, pairsPaths, datasetPath, modality) {
310
- return this.mixin.stitchAndSave(jsonSchema, pairsPaths, datasetPath, modality);
311
- }
312
- async annotate(jsonSchema, documents, datasetPath, options) {
313
- return this.mixin.annotate(jsonSchema, documents, datasetPath, options);
314
- }
315
- async updateAnnotations(jsonSchema, oldDatasetPath, newDatasetPath, options) {
316
- return this.mixin.updateAnnotations(jsonSchema, oldDatasetPath, newDatasetPath, options);
317
- }
318
- async saveBatchAnnotateRequests(jsonSchema, documents, batchRequestsPath, options) {
319
- return this.mixin.saveBatchAnnotateRequests(jsonSchema, documents, batchRequestsPath, options);
320
- }
321
- async buildDatasetFromBatchResults(jsonSchema, batchResultsPath, datasetPath, modality) {
322
- return this.mixin.buildDatasetFromBatchResults(jsonSchema, batchResultsPath, datasetPath, modality);
323
- }
324
- }
325
- export class AsyncDatasets extends AsyncAPIResource {
326
- constructor() {
327
- super(...arguments);
328
- this.mixin = new BaseDatasetsMixin();
329
- }
330
- async pprint(datasetPath, inputTokenPrice, outputTokenPrice) {
331
- return this.mixin.pprint(datasetPath, inputTokenPrice, outputTokenPrice);
332
- }
333
- async save(jsonSchema, documentAnnotationPairsPaths, datasetPath, options) {
334
- return this.mixin.save(jsonSchema, documentAnnotationPairsPaths, datasetPath, options);
335
- }
336
- async changeSchema(inputDatasetPath, jsonSchema, outputDatasetPath, inplace) {
337
- return this.mixin.changeSchema(inputDatasetPath, jsonSchema, outputDatasetPath, inplace);
338
- }
339
- async stitchAndSave(jsonSchema, pairsPaths, datasetPath, modality) {
340
- return this.mixin.stitchAndSave(jsonSchema, pairsPaths, datasetPath, modality);
341
- }
342
- async annotate(jsonSchema, documents, datasetPath, options) {
343
- return this.mixin.annotate(jsonSchema, documents, datasetPath, options);
344
- }
345
- async updateAnnotations(jsonSchema, oldDatasetPath, newDatasetPath, options) {
346
- return this.mixin.updateAnnotations(jsonSchema, oldDatasetPath, newDatasetPath, options);
347
- }
348
- async saveBatchAnnotateRequests(jsonSchema, documents, batchRequestsPath, options) {
349
- return this.mixin.saveBatchAnnotateRequests(jsonSchema, documents, batchRequestsPath, options);
350
- }
351
- async buildDatasetFromBatchResults(jsonSchema, batchResultsPath, datasetPath, modality) {
352
- return this.mixin.buildDatasetFromBatchResults(jsonSchema, batchResultsPath, datasetPath, modality);
353
- }
354
- }
355
- export default {
356
- Datasets,
357
- AsyncDatasets,
358
- BaseDatasetsMixin,
359
- };
@@ -1,108 +0,0 @@
1
- /**
2
- * Rich display and visualization utilities for datasets and metrics
3
- * Equivalent to Python's display.py
4
- */
5
- export interface DatasetMetrics {
6
- totalExamples: number;
7
- inputTokens: {
8
- total: number;
9
- min: number;
10
- max: number;
11
- mean: number;
12
- median: number;
13
- p95: number;
14
- p99: number;
15
- };
16
- outputTokens: {
17
- total: number;
18
- min: number;
19
- max: number;
20
- mean: number;
21
- median: number;
22
- p95: number;
23
- p99: number;
24
- };
25
- totalTokens: {
26
- total: number;
27
- min: number;
28
- max: number;
29
- mean: number;
30
- median: number;
31
- };
32
- estimatedCost: {
33
- input: number;
34
- output: number;
35
- total: number;
36
- };
37
- messageStats: {
38
- systemMessages: number;
39
- userMessages: number;
40
- assistantMessages: number;
41
- avgMessagesPerExample: number;
42
- };
43
- contentAnalysis: {
44
- avgSystemLength: number;
45
- avgUserLength: number;
46
- avgAssistantLength: number;
47
- hasImages: boolean;
48
- imageCount: number;
49
- };
50
- }
51
- export interface TokenCountResult {
52
- textTokens: number;
53
- imageTokens: number;
54
- totalTokens: number;
55
- }
56
- /**
57
- * Count tokens in text using a simple approximation
58
- * In production, you'd want to use tiktoken equivalent for JavaScript
59
- */
60
- export declare function countTokens(text: string, _model?: string): number;
61
- /**
62
- * Count tokens in content (text + images)
63
- */
64
- export declare function countContentTokens(content: string, _model?: string): TokenCountResult;
65
- /**
66
- * Calculate statistical metrics for an array of numbers
67
- */
68
- export declare function calculateStats(values: number[]): {
69
- min: number;
70
- max: number;
71
- mean: number;
72
- median: number;
73
- p95: number;
74
- p99: number;
75
- total: number;
76
- };
77
- /**
78
- * Process dataset and compute comprehensive metrics
79
- */
80
- export declare function processDatasetAndComputeMetrics(datasetPath: string, inputTokenPrice?: number, outputTokenPrice?: number, model?: string): Promise<DatasetMetrics>;
81
- /**
82
- * Display metrics in a formatted table
83
- */
84
- export declare function displayMetrics(metrics: DatasetMetrics): void;
85
- /**
86
- * Format large numbers with appropriate units
87
- */
88
- export declare function formatNumber(num: number): string;
89
- /**
90
- * Create a simple ASCII progress bar
91
- */
92
- export declare function createProgressBar(current: number, total: number, width?: number): string;
93
- /**
94
- * Display progress with a progress bar
95
- */
96
- export declare function displayProgress(current: number, total: number, message?: string): void;
97
- declare const _default: {
98
- processDatasetAndComputeMetrics: typeof processDatasetAndComputeMetrics;
99
- displayMetrics: typeof displayMetrics;
100
- countTokens: typeof countTokens;
101
- countContentTokens: typeof countContentTokens;
102
- calculateStats: typeof calculateStats;
103
- formatNumber: typeof formatNumber;
104
- createProgressBar: typeof createProgressBar;
105
- displayProgress: typeof displayProgress;
106
- };
107
- export default _default;
108
- //# sourceMappingURL=display.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"display.d.ts","sourceRoot":"","sources":["../../src/utils/display.ts"],"names":[],"mappings":"AAGA;;;GAGG;AAEH,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;QACf,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,YAAY,EAAE;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;QACf,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;IACF,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,GAAG,EAAE,MAAM,CAAC;QACZ,GAAG,EAAE,MAAM,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,YAAY,EAAE;QACZ,cAAc,EAAE,MAAM,CAAC;QACvB,YAAY,EAAE,MAAM,CAAC;QACrB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,qBAAqB,EAAE,MAAM,CAAC;KAC/B,CAAC;IACF,eAAe,EAAE;QACf,eAAe,EAAE,MAAM,CAAC;QACxB,aAAa,EAAE,MAAM,CAAC;QACtB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,SAAS,EAAE,OAAO,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,GAAE,MAAsB,GAAG,MAAM,CAKhF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,GAAE,MAAsB,GAAG,gBAAgB,CA+BpG;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG;IAChD,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf,CAuBA;AAED;;GAEG;AACH,wBAAsB,+BAA+B,CACnD,WAAW,EAAE,MAAM,EACnB,eAAe,GAAE,MAAgB,EACjC,gBAAgB,GAAE,MAAe,EACjC,KAAK,GAAE,MAAsB,GAC5B,OAAO,CAAC,cAAc,CAAC,CAiGzB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,cAAc,GAAG,IAAI,CAoD5D;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAOhD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,MAAW,GAAG,MAAM,CAM5F;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAUtF;;;;;;;;;;;AAED,wBASE"}