@retab/node 0.0.48 → 0.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -215
- package/dist/api/client.d.ts +2 -2
- package/dist/api/client.d.ts.map +1 -1
- package/dist/api/client.js +2 -2
- package/dist/api/documents/client.d.ts +3 -3
- package/dist/api/documents/client.d.ts.map +1 -1
- package/dist/api/documents/client.js +3 -3
- package/dist/api/projects/client.d.ts +15 -0
- package/dist/api/projects/client.d.ts.map +1 -0
- package/dist/api/projects/client.js +43 -0
- package/dist/api/projects/documents/client.d.ts +12 -0
- package/dist/api/projects/documents/client.d.ts.map +1 -0
- package/dist/api/projects/documents/client.js +39 -0
- package/dist/api/projects/iterations/client.d.ts +17 -0
- package/dist/api/projects/iterations/client.d.ts.map +1 -0
- package/dist/api/projects/iterations/client.js +64 -0
- package/dist/client.d.ts +1 -0
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +6 -1
- package/dist/generated_types.d.ts +17837 -40090
- package/dist/generated_types.d.ts.map +1 -1
- package/dist/generated_types.js +309 -979
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/types.d.ts +188 -80
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +22 -1
- package/package.json +6 -9
- package/dist/api/consensus/client.d.ts +0 -7
- package/dist/api/consensus/client.d.ts.map +0 -1
- package/dist/api/consensus/client.js +0 -14
- package/dist/errors.d.ts +0 -34
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -53
- package/dist/resource.d.ts +0 -12
- package/dist/resource.d.ts.map +0 -1
- package/dist/resource.js +0 -19
- package/dist/resources/consensus/completions.d.ts +0 -66
- package/dist/resources/consensus/completions.d.ts.map +0 -1
- package/dist/resources/consensus/completions.js +0 -84
- package/dist/resources/consensus/index.d.ts +0 -72
- package/dist/resources/consensus/index.d.ts.map +0 -1
- package/dist/resources/consensus/index.js +0 -76
- package/dist/resources/consensus/responses.d.ts +0 -69
- package/dist/resources/consensus/responses.d.ts.map +0 -1
- package/dist/resources/consensus/responses.js +0 -99
- package/dist/resources/documents/extractions.d.ts +0 -74
- package/dist/resources/documents/extractions.d.ts.map +0 -1
- package/dist/resources/documents/extractions.js +0 -196
- package/dist/resources/documents/index.d.ts +0 -21
- package/dist/resources/documents/index.d.ts.map +0 -1
- package/dist/resources/documents/index.js +0 -55
- package/dist/resources/evaluations/documents.d.ts +0 -40
- package/dist/resources/evaluations/documents.d.ts.map +0 -1
- package/dist/resources/evaluations/documents.js +0 -123
- package/dist/resources/evaluations/index.d.ts +0 -14
- package/dist/resources/evaluations/index.d.ts.map +0 -1
- package/dist/resources/evaluations/index.js +0 -17
- package/dist/resources/evaluations/iterations.d.ts +0 -50
- package/dist/resources/evaluations/iterations.d.ts.map +0 -1
- package/dist/resources/evaluations/iterations.js +0 -156
- package/dist/resources/files.d.ts +0 -82
- package/dist/resources/files.d.ts.map +0 -1
- package/dist/resources/files.js +0 -150
- package/dist/resources/finetuning.d.ts +0 -105
- package/dist/resources/finetuning.d.ts.map +0 -1
- package/dist/resources/finetuning.js +0 -181
- package/dist/resources/index.d.ts +0 -11
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js +0 -10
- package/dist/resources/models.d.ts +0 -57
- package/dist/resources/models.d.ts.map +0 -1
- package/dist/resources/models.js +0 -72
- package/dist/resources/processors/automations/endpoints.d.ts +0 -90
- package/dist/resources/processors/automations/endpoints.d.ts.map +0 -1
- package/dist/resources/processors/automations/endpoints.js +0 -145
- package/dist/resources/processors/automations/index.d.ts +0 -7
- package/dist/resources/processors/automations/index.d.ts.map +0 -1
- package/dist/resources/processors/automations/index.js +0 -6
- package/dist/resources/processors/automations/links.d.ts +0 -90
- package/dist/resources/processors/automations/links.d.ts.map +0 -1
- package/dist/resources/processors/automations/links.js +0 -149
- package/dist/resources/processors/automations/logs.d.ts +0 -35
- package/dist/resources/processors/automations/logs.d.ts.map +0 -1
- package/dist/resources/processors/automations/logs.js +0 -60
- package/dist/resources/processors/automations/mailboxes.d.ts +0 -102
- package/dist/resources/processors/automations/mailboxes.d.ts.map +0 -1
- package/dist/resources/processors/automations/mailboxes.js +0 -157
- package/dist/resources/processors/automations/outlook.d.ts +0 -114
- package/dist/resources/processors/automations/outlook.d.ts.map +0 -1
- package/dist/resources/processors/automations/outlook.js +0 -170
- package/dist/resources/processors/automations/tests.d.ts +0 -58
- package/dist/resources/processors/automations/tests.d.ts.map +0 -1
- package/dist/resources/processors/automations/tests.js +0 -90
- package/dist/resources/processors/index.d.ts +0 -303
- package/dist/resources/processors/index.d.ts.map +0 -1
- package/dist/resources/processors/index.js +0 -261
- package/dist/resources/schemas.d.ts +0 -63
- package/dist/resources/schemas.d.ts.map +0 -1
- package/dist/resources/schemas.js +0 -183
- package/dist/resources/secrets/external_api_keys.d.ts +0 -61
- package/dist/resources/secrets/external_api_keys.d.ts.map +0 -1
- package/dist/resources/secrets/external_api_keys.js +0 -120
- package/dist/resources/secrets/index.d.ts +0 -14
- package/dist/resources/secrets/index.d.ts.map +0 -1
- package/dist/resources/secrets/index.js +0 -17
- package/dist/resources/secrets/webhooks.d.ts +0 -73
- package/dist/resources/secrets/webhooks.d.ts.map +0 -1
- package/dist/resources/secrets/webhooks.js +0 -145
- package/dist/resources/usage.d.ts +0 -223
- package/dist/resources/usage.d.ts.map +0 -1
- package/dist/resources/usage.js +0 -310
- package/dist/types/ai_models.d.ts +0 -389
- package/dist/types/ai_models.d.ts.map +0 -1
- package/dist/types/ai_models.js +0 -145
- package/dist/types/automations/cron.d.ts +0 -28
- package/dist/types/automations/cron.d.ts.map +0 -1
- package/dist/types/automations/cron.js +0 -1
- package/dist/types/automations/endpoints.d.ts +0 -13
- package/dist/types/automations/endpoints.d.ts.map +0 -1
- package/dist/types/automations/endpoints.js +0 -1
- package/dist/types/automations/index.d.ts +0 -7
- package/dist/types/automations/index.d.ts.map +0 -1
- package/dist/types/automations/index.js +0 -6
- package/dist/types/automations/links.d.ts +0 -15
- package/dist/types/automations/links.d.ts.map +0 -1
- package/dist/types/automations/links.js +0 -1
- package/dist/types/automations/mailboxes.d.ts +0 -18
- package/dist/types/automations/mailboxes.d.ts.map +0 -1
- package/dist/types/automations/mailboxes.js +0 -1
- package/dist/types/automations/outlook.d.ts +0 -37
- package/dist/types/automations/outlook.d.ts.map +0 -1
- package/dist/types/automations/outlook.js +0 -1
- package/dist/types/automations/webhooks.d.ts +0 -13
- package/dist/types/automations/webhooks.d.ts.map +0 -1
- package/dist/types/automations/webhooks.js +0 -1
- package/dist/types/browser_canvas.d.ts +0 -4
- package/dist/types/browser_canvas.d.ts.map +0 -1
- package/dist/types/browser_canvas.js +0 -2
- package/dist/types/chat.d.ts +0 -99
- package/dist/types/chat.d.ts.map +0 -1
- package/dist/types/chat.js +0 -20
- package/dist/types/consensus.d.ts +0 -10
- package/dist/types/consensus.d.ts.map +0 -1
- package/dist/types/consensus.js +0 -1
- package/dist/types/db/annotations.d.ts +0 -108
- package/dist/types/db/annotations.d.ts.map +0 -1
- package/dist/types/db/annotations.js +0 -6
- package/dist/types/db/files.d.ts +0 -133
- package/dist/types/db/files.d.ts.map +0 -1
- package/dist/types/db/files.js +0 -5
- package/dist/types/documents/extractions.d.ts +0 -1849
- package/dist/types/documents/extractions.d.ts.map +0 -1
- package/dist/types/documents/extractions.js +0 -211
- package/dist/types/documents/processing.d.ts +0 -249
- package/dist/types/documents/processing.d.ts.map +0 -1
- package/dist/types/documents/processing.js +0 -6
- package/dist/types/evaluations/iterations.d.ts +0 -41
- package/dist/types/evaluations/iterations.d.ts.map +0 -1
- package/dist/types/evaluations/iterations.js +0 -1
- package/dist/types/jobs/base.d.ts +0 -162
- package/dist/types/jobs/base.d.ts.map +0 -1
- package/dist/types/jobs/base.js +0 -6
- package/dist/types/jobs/specialized.d.ts +0 -200
- package/dist/types/jobs/specialized.d.ts.map +0 -1
- package/dist/types/jobs/specialized.js +0 -37
- package/dist/types/logs.d.ts +0 -92
- package/dist/types/logs.d.ts.map +0 -1
- package/dist/types/logs.js +0 -1
- package/dist/types/mime.d.ts +0 -426
- package/dist/types/mime.d.ts.map +0 -1
- package/dist/types/mime.js +0 -48
- package/dist/types/modalities.d.ts +0 -31
- package/dist/types/modalities.d.ts.map +0 -1
- package/dist/types/modalities.js +0 -109
- package/dist/types/pagination.d.ts +0 -5
- package/dist/types/pagination.d.ts.map +0 -1
- package/dist/types/pagination.js +0 -1
- package/dist/types/schemas/enhancement.d.ts +0 -250
- package/dist/types/schemas/enhancement.d.ts.map +0 -1
- package/dist/types/schemas/enhancement.js +0 -6
- package/dist/types/schemas/generate.d.ts +0 -160
- package/dist/types/schemas/generate.d.ts.map +0 -1
- package/dist/types/schemas/generate.js +0 -19
- package/dist/types/schemas/object.d.ts +0 -116
- package/dist/types/schemas/object.d.ts.map +0 -1
- package/dist/types/schemas/object.js +0 -861
- package/dist/types/secrets/external_api_keys.d.ts +0 -27
- package/dist/types/secrets/external_api_keys.d.ts.map +0 -1
- package/dist/types/secrets/external_api_keys.js +0 -11
- package/dist/types/secrets/index.d.ts +0 -2
- package/dist/types/secrets/index.d.ts.map +0 -1
- package/dist/types/secrets/index.js +0 -1
- package/dist/types/standards.d.ts +0 -37
- package/dist/types/standards.d.ts.map +0 -1
- package/dist/types/standards.js +0 -1
- package/dist/utils/ai_models.d.ts +0 -10
- package/dist/utils/ai_models.d.ts.map +0 -1
- package/dist/utils/ai_models.js +0 -183
- package/dist/utils/batch_processing.d.ts +0 -227
- package/dist/utils/batch_processing.d.ts.map +0 -1
- package/dist/utils/batch_processing.js +0 -268
- package/dist/utils/benchmarking.d.ts +0 -115
- package/dist/utils/benchmarking.d.ts.map +0 -1
- package/dist/utils/benchmarking.js +0 -355
- package/dist/utils/chat.d.ts +0 -70
- package/dist/utils/chat.d.ts.map +0 -1
- package/dist/utils/chat.js +0 -79
- package/dist/utils/cost_calculation.d.ts +0 -26
- package/dist/utils/cost_calculation.d.ts.map +0 -1
- package/dist/utils/cost_calculation.js +0 -99
- package/dist/utils/datasets.d.ts +0 -135
- package/dist/utils/datasets.d.ts.map +0 -1
- package/dist/utils/datasets.js +0 -359
- package/dist/utils/display.d.ts +0 -108
- package/dist/utils/display.d.ts.map +0 -1
- package/dist/utils/display.js +0 -244
- package/dist/utils/hash.d.ts +0 -18
- package/dist/utils/hash.d.ts.map +0 -1
- package/dist/utils/hash.js +0 -31
- package/dist/utils/hashing.d.ts +0 -18
- package/dist/utils/hashing.d.ts.map +0 -1
- package/dist/utils/hashing.js +0 -28
- package/dist/utils/index.d.ts +0 -8
- package/dist/utils/index.d.ts.map +0 -1
- package/dist/utils/index.js +0 -10
- package/dist/utils/json_schema.d.ts +0 -18
- package/dist/utils/json_schema.d.ts.map +0 -1
- package/dist/utils/json_schema.js +0 -334
- package/dist/utils/json_schema_utils.d.ts +0 -42
- package/dist/utils/json_schema_utils.d.ts.map +0 -1
- package/dist/utils/json_schema_utils.js +0 -212
- package/dist/utils/jsonl.d.ts +0 -60
- package/dist/utils/jsonl.d.ts.map +0 -1
- package/dist/utils/jsonl.js +0 -259
- package/dist/utils/mime.d.ts +0 -6
- package/dist/utils/mime.d.ts.map +0 -1
- package/dist/utils/mime.js +0 -129
- package/dist/utils/model_cards.d.ts +0 -219
- package/dist/utils/model_cards.d.ts.map +0 -1
- package/dist/utils/model_cards.js +0 -462
- package/dist/utils/prompt_optimization.d.ts +0 -96
- package/dist/utils/prompt_optimization.d.ts.map +0 -1
- package/dist/utils/prompt_optimization.js +0 -275
- package/dist/utils/responses.d.ts +0 -35
- package/dist/utils/responses.d.ts.map +0 -1
- package/dist/utils/responses.js +0 -37
- package/dist/utils/stream.d.ts +0 -13
- package/dist/utils/stream.d.ts.map +0 -1
- package/dist/utils/stream.js +0 -64
- package/dist/utils/stream_context_managers.d.ts +0 -147
- package/dist/utils/stream_context_managers.d.ts.map +0 -1
- package/dist/utils/stream_context_managers.js +0 -380
- package/dist/utils/usage.d.ts +0 -57
- package/dist/utils/usage.d.ts.map +0 -1
- package/dist/utils/usage.js +0 -97
- package/dist/utils/webhook_secrets.d.ts +0 -59
- package/dist/utils/webhook_secrets.d.ts.map +0 -1
- package/dist/utils/webhook_secrets.js +0 -107
- package/dist/utils/zod_to_json_schema.d.ts +0 -11
- package/dist/utils/zod_to_json_schema.d.ts.map +0 -1
- package/dist/utils/zod_to_json_schema.js +0 -123
package/dist/utils/display.js
DELETED
|
@@ -1,244 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import { readJSONL } from './jsonl.js';
|
|
3
|
-
/**
|
|
4
|
-
* Count tokens in text using a simple approximation
|
|
5
|
-
* In production, you'd want to use tiktoken equivalent for JavaScript
|
|
6
|
-
*/
|
|
7
|
-
export function countTokens(text, _model = 'gpt-4o-mini') {
|
|
8
|
-
// Simple approximation: ~4 characters per token for English text
|
|
9
|
-
// This is a rough estimate; for production use tiktoken-js or similar
|
|
10
|
-
const avgCharsPerToken = 4;
|
|
11
|
-
return Math.ceil(text.length / avgCharsPerToken);
|
|
12
|
-
}
|
|
13
|
-
/**
|
|
14
|
-
* Count tokens in content (text + images)
|
|
15
|
-
*/
|
|
16
|
-
export function countContentTokens(content, _model = 'gpt-4o-mini') {
|
|
17
|
-
let textTokens = 0;
|
|
18
|
-
let imageTokens = 0;
|
|
19
|
-
// Check for image references (simplified detection)
|
|
20
|
-
const imagePatterns = [
|
|
21
|
-
/data:image\/[^;]+;base64,/g,
|
|
22
|
-
/!\[.*?\]\(.*?\)/g, // Markdown images
|
|
23
|
-
/<img[^>]*>/g, // HTML images
|
|
24
|
-
];
|
|
25
|
-
let textContent = content;
|
|
26
|
-
// Count and remove image references
|
|
27
|
-
for (const pattern of imagePatterns) {
|
|
28
|
-
const matches = content.match(pattern);
|
|
29
|
-
if (matches) {
|
|
30
|
-
// OpenAI vision pricing: roughly 85 tokens per image for low detail
|
|
31
|
-
imageTokens += matches.length * 85;
|
|
32
|
-
textContent = textContent.replace(pattern, '[IMAGE]');
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
// Count text tokens
|
|
36
|
-
textTokens = countTokens(textContent, _model);
|
|
37
|
-
return {
|
|
38
|
-
textTokens,
|
|
39
|
-
imageTokens,
|
|
40
|
-
totalTokens: textTokens + imageTokens,
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Calculate statistical metrics for an array of numbers
|
|
45
|
-
*/
|
|
46
|
-
export function calculateStats(values) {
|
|
47
|
-
if (values.length === 0) {
|
|
48
|
-
return { min: 0, max: 0, mean: 0, median: 0, p95: 0, p99: 0, total: 0 };
|
|
49
|
-
}
|
|
50
|
-
const sorted = [...values].sort((a, b) => a - b);
|
|
51
|
-
const total = values.reduce((sum, val) => sum + val, 0);
|
|
52
|
-
const mean = total / values.length;
|
|
53
|
-
const getPercentile = (p) => {
|
|
54
|
-
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
|
55
|
-
return sorted[Math.max(0, index)];
|
|
56
|
-
};
|
|
57
|
-
return {
|
|
58
|
-
min: sorted[0],
|
|
59
|
-
max: sorted[sorted.length - 1],
|
|
60
|
-
mean: Math.round(mean * 100) / 100,
|
|
61
|
-
median: sorted[Math.floor(sorted.length / 2)],
|
|
62
|
-
p95: getPercentile(95),
|
|
63
|
-
p99: getPercentile(99),
|
|
64
|
-
total,
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Process dataset and compute comprehensive metrics
|
|
69
|
-
*/
|
|
70
|
-
export async function processDatasetAndComputeMetrics(datasetPath, inputTokenPrice = 0.00015, outputTokenPrice = 0.0006, model = 'gpt-4o-mini') {
|
|
71
|
-
if (!fs.existsSync(datasetPath)) {
|
|
72
|
-
throw new Error(`Dataset file not found: ${datasetPath}`);
|
|
73
|
-
}
|
|
74
|
-
const dataset = await readJSONL(datasetPath);
|
|
75
|
-
const inputTokenCounts = [];
|
|
76
|
-
const outputTokenCounts = [];
|
|
77
|
-
const totalTokenCounts = [];
|
|
78
|
-
let systemMessages = 0;
|
|
79
|
-
let userMessages = 0;
|
|
80
|
-
let assistantMessages = 0;
|
|
81
|
-
let totalMessages = 0;
|
|
82
|
-
let systemLengths = [];
|
|
83
|
-
let userLengths = [];
|
|
84
|
-
let assistantLengths = [];
|
|
85
|
-
let imageCount = 0;
|
|
86
|
-
let hasImages = false;
|
|
87
|
-
for (const example of dataset) {
|
|
88
|
-
if (!example.messages || !Array.isArray(example.messages)) {
|
|
89
|
-
continue;
|
|
90
|
-
}
|
|
91
|
-
let exampleInputTokens = 0;
|
|
92
|
-
let exampleOutputTokens = 0;
|
|
93
|
-
for (const message of example.messages) {
|
|
94
|
-
totalMessages++;
|
|
95
|
-
const content = message.content || '';
|
|
96
|
-
const tokenCount = countContentTokens(content, model);
|
|
97
|
-
// Track content lengths
|
|
98
|
-
const contentLength = content.length;
|
|
99
|
-
switch (message.role) {
|
|
100
|
-
case 'system':
|
|
101
|
-
systemMessages++;
|
|
102
|
-
exampleInputTokens += tokenCount.totalTokens;
|
|
103
|
-
systemLengths.push(contentLength);
|
|
104
|
-
break;
|
|
105
|
-
case 'user':
|
|
106
|
-
userMessages++;
|
|
107
|
-
exampleInputTokens += tokenCount.totalTokens;
|
|
108
|
-
userLengths.push(contentLength);
|
|
109
|
-
break;
|
|
110
|
-
case 'assistant':
|
|
111
|
-
assistantMessages++;
|
|
112
|
-
exampleOutputTokens += tokenCount.totalTokens;
|
|
113
|
-
assistantLengths.push(contentLength);
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
// Check for images
|
|
117
|
-
if (tokenCount.imageTokens > 0) {
|
|
118
|
-
hasImages = true;
|
|
119
|
-
imageCount += tokenCount.imageTokens / 85; // Rough estimate
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
inputTokenCounts.push(exampleInputTokens);
|
|
123
|
-
outputTokenCounts.push(exampleOutputTokens);
|
|
124
|
-
totalTokenCounts.push(exampleInputTokens + exampleOutputTokens);
|
|
125
|
-
}
|
|
126
|
-
const inputStats = calculateStats(inputTokenCounts);
|
|
127
|
-
const outputStats = calculateStats(outputTokenCounts);
|
|
128
|
-
const totalStats = calculateStats(totalTokenCounts);
|
|
129
|
-
return {
|
|
130
|
-
totalExamples: dataset.length,
|
|
131
|
-
inputTokens: inputStats,
|
|
132
|
-
outputTokens: outputStats,
|
|
133
|
-
totalTokens: totalStats,
|
|
134
|
-
estimatedCost: {
|
|
135
|
-
input: (inputStats.total * inputTokenPrice) / 1000,
|
|
136
|
-
output: (outputStats.total * outputTokenPrice) / 1000,
|
|
137
|
-
total: ((inputStats.total * inputTokenPrice) + (outputStats.total * outputTokenPrice)) / 1000,
|
|
138
|
-
},
|
|
139
|
-
messageStats: {
|
|
140
|
-
systemMessages,
|
|
141
|
-
userMessages,
|
|
142
|
-
assistantMessages,
|
|
143
|
-
avgMessagesPerExample: Math.round((totalMessages / dataset.length) * 100) / 100,
|
|
144
|
-
},
|
|
145
|
-
contentAnalysis: {
|
|
146
|
-
avgSystemLength: systemLengths.length > 0 ? Math.round((systemLengths.reduce((a, b) => a + b, 0) / systemLengths.length) * 100) / 100 : 0,
|
|
147
|
-
avgUserLength: userLengths.length > 0 ? Math.round((userLengths.reduce((a, b) => a + b, 0) / userLengths.length) * 100) / 100 : 0,
|
|
148
|
-
avgAssistantLength: assistantLengths.length > 0 ? Math.round((assistantLengths.reduce((a, b) => a + b, 0) / assistantLengths.length) * 100) / 100 : 0,
|
|
149
|
-
hasImages,
|
|
150
|
-
imageCount: Math.round(imageCount),
|
|
151
|
-
},
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
/**
|
|
155
|
-
* Display metrics in a formatted table
|
|
156
|
-
*/
|
|
157
|
-
export function displayMetrics(metrics) {
|
|
158
|
-
console.log('\n📊 Dataset Analysis Report');
|
|
159
|
-
console.log('═'.repeat(50));
|
|
160
|
-
// Basic Stats
|
|
161
|
-
console.log(`\n📈 Basic Statistics:`);
|
|
162
|
-
console.log(` Total Examples: ${metrics.totalExamples.toLocaleString()}`);
|
|
163
|
-
console.log(` Avg Messages/Example: ${metrics.messageStats.avgMessagesPerExample}`);
|
|
164
|
-
// Message Distribution
|
|
165
|
-
console.log(`\n💬 Message Distribution:`);
|
|
166
|
-
console.log(` System Messages: ${metrics.messageStats.systemMessages.toLocaleString()}`);
|
|
167
|
-
console.log(` User Messages: ${metrics.messageStats.userMessages.toLocaleString()}`);
|
|
168
|
-
console.log(` Assistant Messages: ${metrics.messageStats.assistantMessages.toLocaleString()}`);
|
|
169
|
-
// Token Statistics
|
|
170
|
-
console.log(`\n🔢 Token Statistics:`);
|
|
171
|
-
console.log(` Input Tokens:`);
|
|
172
|
-
console.log(` Total: ${metrics.inputTokens.total.toLocaleString()}`);
|
|
173
|
-
console.log(` Mean: ${metrics.inputTokens.mean.toLocaleString()}`);
|
|
174
|
-
console.log(` Median: ${metrics.inputTokens.median.toLocaleString()}`);
|
|
175
|
-
console.log(` Min: ${metrics.inputTokens.min.toLocaleString()}`);
|
|
176
|
-
console.log(` Max: ${metrics.inputTokens.max.toLocaleString()}`);
|
|
177
|
-
console.log(` 95th percentile: ${metrics.inputTokens.p95.toLocaleString()}`);
|
|
178
|
-
console.log(` 99th percentile: ${metrics.inputTokens.p99.toLocaleString()}`);
|
|
179
|
-
console.log(`\n Output Tokens:`);
|
|
180
|
-
console.log(` Total: ${metrics.outputTokens.total.toLocaleString()}`);
|
|
181
|
-
console.log(` Mean: ${metrics.outputTokens.mean.toLocaleString()}`);
|
|
182
|
-
console.log(` Median: ${metrics.outputTokens.median.toLocaleString()}`);
|
|
183
|
-
console.log(` Min: ${metrics.outputTokens.min.toLocaleString()}`);
|
|
184
|
-
console.log(` Max: ${metrics.outputTokens.max.toLocaleString()}`);
|
|
185
|
-
console.log(` 95th percentile: ${metrics.outputTokens.p95.toLocaleString()}`);
|
|
186
|
-
console.log(` 99th percentile: ${metrics.outputTokens.p99.toLocaleString()}`);
|
|
187
|
-
// Cost Estimation
|
|
188
|
-
console.log(`\n💰 Cost Estimation:`);
|
|
189
|
-
console.log(` Input Cost: $${metrics.estimatedCost.input.toFixed(4)}`);
|
|
190
|
-
console.log(` Output Cost: $${metrics.estimatedCost.output.toFixed(4)}`);
|
|
191
|
-
console.log(` Total Cost: $${metrics.estimatedCost.total.toFixed(4)}`);
|
|
192
|
-
// Content Analysis
|
|
193
|
-
console.log(`\n📝 Content Analysis:`);
|
|
194
|
-
console.log(` Avg System Message Length: ${metrics.contentAnalysis.avgSystemLength.toLocaleString()} chars`);
|
|
195
|
-
console.log(` Avg User Message Length: ${metrics.contentAnalysis.avgUserLength.toLocaleString()} chars`);
|
|
196
|
-
console.log(` Avg Assistant Message Length: ${metrics.contentAnalysis.avgAssistantLength.toLocaleString()} chars`);
|
|
197
|
-
if (metrics.contentAnalysis.hasImages) {
|
|
198
|
-
console.log(` Images Detected: ${metrics.contentAnalysis.imageCount.toLocaleString()}`);
|
|
199
|
-
}
|
|
200
|
-
console.log('\n' + '═'.repeat(50));
|
|
201
|
-
}
|
|
202
|
-
/**
|
|
203
|
-
* Format large numbers with appropriate units
|
|
204
|
-
*/
|
|
205
|
-
export function formatNumber(num) {
|
|
206
|
-
if (num >= 1000000) {
|
|
207
|
-
return `${(num / 1000000).toFixed(1)}M`;
|
|
208
|
-
}
|
|
209
|
-
else if (num >= 1000) {
|
|
210
|
-
return `${(num / 1000).toFixed(1)}K`;
|
|
211
|
-
}
|
|
212
|
-
return num.toLocaleString();
|
|
213
|
-
}
|
|
214
|
-
/**
|
|
215
|
-
* Create a simple ASCII progress bar
|
|
216
|
-
*/
|
|
217
|
-
export function createProgressBar(current, total, width = 40) {
|
|
218
|
-
const percentage = Math.min(current / total, 1);
|
|
219
|
-
const filled = Math.floor(percentage * width);
|
|
220
|
-
const empty = width - filled;
|
|
221
|
-
return `[${'█'.repeat(filled)}${' '.repeat(empty)}] ${(percentage * 100).toFixed(1)}% (${current}/${total})`;
|
|
222
|
-
}
|
|
223
|
-
/**
|
|
224
|
-
* Display progress with a progress bar
|
|
225
|
-
*/
|
|
226
|
-
export function displayProgress(current, total, message) {
|
|
227
|
-
const progressBar = createProgressBar(current, total);
|
|
228
|
-
const output = message ? `${message} ${progressBar}` : progressBar;
|
|
229
|
-
// Clear line and write progress (works in most terminals)
|
|
230
|
-
process.stdout.write(`\r${output}`);
|
|
231
|
-
if (current >= total) {
|
|
232
|
-
process.stdout.write('\n');
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
export default {
|
|
236
|
-
processDatasetAndComputeMetrics,
|
|
237
|
-
displayMetrics,
|
|
238
|
-
countTokens,
|
|
239
|
-
countContentTokens,
|
|
240
|
-
calculateStats,
|
|
241
|
-
formatNumber,
|
|
242
|
-
createProgressBar,
|
|
243
|
-
displayProgress,
|
|
244
|
-
};
|
package/dist/utils/hash.d.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Generate a BLAKE2b hash from bytes.
|
|
3
|
-
* Uses the blake2 package to match Python's hashlib.blake2b with 8-byte digest.
|
|
4
|
-
*/
|
|
5
|
-
export declare function generateBlake2bHashFromBytes(bytes: Buffer): string;
|
|
6
|
-
/**
|
|
7
|
-
* Generate a BLAKE2b hash from a base64 string.
|
|
8
|
-
*/
|
|
9
|
-
export declare function generateBlake2bHashFromBase64(base64String: string): string;
|
|
10
|
-
/**
|
|
11
|
-
* Generate a BLAKE2b hash from a UTF-8 string.
|
|
12
|
-
*/
|
|
13
|
-
export declare function generateBlake2bHashFromString(inputString: string): string;
|
|
14
|
-
/**
|
|
15
|
-
* Generate a BLAKE2b hash from a dictionary/object.
|
|
16
|
-
*/
|
|
17
|
-
export declare function generateBlake2bHashFromDict(inputDict: Record<string, any>): string;
|
|
18
|
-
//# sourceMappingURL=hash.d.ts.map
|
package/dist/utils/hash.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../../src/utils/hash.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,wBAAgB,4BAA4B,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAIlE;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAG1E;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAGzE;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAGlF"}
|
package/dist/utils/hash.js
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import * as blake2 from 'blake2';
|
|
2
|
-
/**
|
|
3
|
-
* Generate a BLAKE2b hash from bytes.
|
|
4
|
-
* Uses the blake2 package to match Python's hashlib.blake2b with 8-byte digest.
|
|
5
|
-
*/
|
|
6
|
-
export function generateBlake2bHashFromBytes(bytes) {
|
|
7
|
-
const hash = blake2.createHash('blake2b', { digestLength: 8 });
|
|
8
|
-
hash.update(bytes);
|
|
9
|
-
return hash.digest('hex');
|
|
10
|
-
}
|
|
11
|
-
/**
|
|
12
|
-
* Generate a BLAKE2b hash from a base64 string.
|
|
13
|
-
*/
|
|
14
|
-
export function generateBlake2bHashFromBase64(base64String) {
|
|
15
|
-
const bytes = Buffer.from(base64String, 'base64');
|
|
16
|
-
return generateBlake2bHashFromBytes(bytes);
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* Generate a BLAKE2b hash from a UTF-8 string.
|
|
20
|
-
*/
|
|
21
|
-
export function generateBlake2bHashFromString(inputString) {
|
|
22
|
-
const bytes = Buffer.from(inputString, 'utf-8');
|
|
23
|
-
return generateBlake2bHashFromBytes(bytes);
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* Generate a BLAKE2b hash from a dictionary/object.
|
|
27
|
-
*/
|
|
28
|
-
export function generateBlake2bHashFromDict(inputDict) {
|
|
29
|
-
const jsonString = JSON.stringify(inputDict, Object.keys(inputDict).sort());
|
|
30
|
-
return generateBlake2bHashFromString(jsonString.trim());
|
|
31
|
-
}
|
package/dist/utils/hashing.d.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Cryptographic hashing utilities
|
|
3
|
-
* Equivalent to Python's utils/hashing.py
|
|
4
|
-
*/
|
|
5
|
-
export declare function md5(data: string | Buffer): string;
|
|
6
|
-
export declare function sha256(data: string | Buffer): string;
|
|
7
|
-
export declare function sha512(data: string | Buffer): string;
|
|
8
|
-
export declare function hmacSha256(data: string | Buffer, secret: string): string;
|
|
9
|
-
export declare function contentHash(content: any): string;
|
|
10
|
-
declare const _default: {
|
|
11
|
-
md5: typeof md5;
|
|
12
|
-
sha256: typeof sha256;
|
|
13
|
-
sha512: typeof sha512;
|
|
14
|
-
hmacSha256: typeof hmacSha256;
|
|
15
|
-
contentHash: typeof contentHash;
|
|
16
|
-
};
|
|
17
|
-
export default _default;
|
|
18
|
-
//# sourceMappingURL=hashing.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"hashing.d.ts","sourceRoot":"","sources":["../../src/utils/hashing.ts"],"names":[],"mappings":"AAEA;;;GAGG;AAEH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEjD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAEpD;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAExE;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,GAAG,GAAG,MAAM,CAGhD;;;;;;;;AAED,wBAME"}
|
package/dist/utils/hashing.js
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { createHash, createHmac } from 'crypto';
|
|
2
|
-
/**
|
|
3
|
-
* Cryptographic hashing utilities
|
|
4
|
-
* Equivalent to Python's utils/hashing.py
|
|
5
|
-
*/
|
|
6
|
-
export function md5(data) {
|
|
7
|
-
return createHash('md5').update(data).digest('hex');
|
|
8
|
-
}
|
|
9
|
-
export function sha256(data) {
|
|
10
|
-
return createHash('sha256').update(data).digest('hex');
|
|
11
|
-
}
|
|
12
|
-
export function sha512(data) {
|
|
13
|
-
return createHash('sha512').update(data).digest('hex');
|
|
14
|
-
}
|
|
15
|
-
export function hmacSha256(data, secret) {
|
|
16
|
-
return createHmac('sha256', secret).update(data).digest('hex');
|
|
17
|
-
}
|
|
18
|
-
export function contentHash(content) {
|
|
19
|
-
const normalized = typeof content === 'string' ? content : JSON.stringify(content);
|
|
20
|
-
return sha256(normalized);
|
|
21
|
-
}
|
|
22
|
-
export default {
|
|
23
|
-
md5,
|
|
24
|
-
sha256,
|
|
25
|
-
sha512,
|
|
26
|
-
hmacSha256,
|
|
27
|
-
contentHash,
|
|
28
|
-
};
|
package/dist/utils/index.d.ts
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
export * from './stream.js';
|
|
2
|
-
export * from './ai_models.js';
|
|
3
|
-
export * from './json_schema_utils.js';
|
|
4
|
-
export * from './jsonl.js';
|
|
5
|
-
export * from './prompt_optimization.js';
|
|
6
|
-
export { default as jsonlUtils } from './jsonl.js';
|
|
7
|
-
export { default as promptOptimization } from './prompt_optimization.js';
|
|
8
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AACA,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,wBAAwB,CAAC;AAGvC,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AAGzC,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
|
package/dist/utils/index.js
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
// Core utilities
|
|
2
|
-
export * from './stream.js';
|
|
3
|
-
export * from './ai_models.js';
|
|
4
|
-
export * from './json_schema_utils.js';
|
|
5
|
-
// New utilities for 100% feature parity
|
|
6
|
-
export * from './jsonl.js';
|
|
7
|
-
export * from './prompt_optimization.js';
|
|
8
|
-
// Re-export commonly used utilities
|
|
9
|
-
export { default as jsonlUtils } from './jsonl.js';
|
|
10
|
-
export { default as promptOptimization } from './prompt_optimization.js';
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
export declare function generateBlake2bHashFromString(input: string): string;
|
|
2
|
-
export declare function generateSchemaDataId(jsonSchema: Record<string, any>): string;
|
|
3
|
-
export declare function generateSchemaId(jsonSchema: Record<string, any>): string;
|
|
4
|
-
export declare function cleanSchema(schema: Record<string, any>, removeCustomFields?: boolean, fieldsToRemove?: string[]): Record<string, any>;
|
|
5
|
-
export declare function loadJsonSchema(jsonSchema: Record<string, any> | string): Record<string, any>;
|
|
6
|
-
export declare function expandRefs(schema: Record<string, any>): Record<string, any>;
|
|
7
|
-
export declare function createReasoningSchema(schema: Record<string, any>): Record<string, any>;
|
|
8
|
-
export declare function validateCurrency(currencyCode: any): string | null;
|
|
9
|
-
export declare function validateCountryCode(value: any): string | null;
|
|
10
|
-
export declare function validateEmailRegex(value: any): string | null;
|
|
11
|
-
export declare function validatePhoneNumber(value: any): string | null;
|
|
12
|
-
export declare function filterAuxiliaryFieldsJson(jsonData: any): any;
|
|
13
|
-
export declare function jsonSchemaToTypescriptInterface(schema: Record<string, any>, addFieldDescription?: boolean): string;
|
|
14
|
-
export declare function jsonSchemaToNlpDataStructure(schema: Record<string, any>): string;
|
|
15
|
-
export declare function jsonSchemaToStrictOpenaiSchema(schema: Record<string, any>): Record<string, any>;
|
|
16
|
-
export declare function unflattenDict(flatDict: Record<string, any>, separator?: string): Record<string, any>;
|
|
17
|
-
export declare function schemaToTsType(schema: Record<string, any>, definitions?: Record<string, any>, visited?: Set<string>, depth?: number, maxDepth?: number, addFieldDescription?: boolean): string;
|
|
18
|
-
//# sourceMappingURL=json_schema.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"json_schema.d.ts","sourceRoot":"","sources":["../../src/utils/json_schema.ts"],"names":[],"mappings":"AAIA,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAGnE;AAGD,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAS5E;AAED,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAIxE;AAGD,wBAAgB,WAAW,CACzB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,kBAAkB,GAAE,OAAc,EAClC,cAAc,GAAE,MAAM,EAAO,GAC5B,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAiCrB;AAGD,wBAAgB,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAQ5F;AAGD,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CA+B3E;AAGD,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAmCtF;AAGD,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQjE;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQ7D;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAO5D;AAED,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,GAAG,GAAG,MAAM,GAAG,IAAI,CAQ7D;AAGD,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,GAAG,GAAG,GAAG,CA4B5D;AAGD,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,mBAAmB,GAAE,OAAc,GAClC,MAAM,CAoCR;AAGD,wBAAgB,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAkChF;AAGD,wBAAgB,8BAA8B,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAqB/F;AAGD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,SAAS,GAAE,MAAY,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAmBzG;AAGD,wBAAgB,cAAc,CAC5B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC3B,WAAW,GAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAM,EACrC,OAAO,GAAE,GAAG,CAAC,MAAM,CAAa,EAChC,KAAK,GAAE,MAAU,EACjB,QAAQ,GAAE,MAAW,EACrB,mBAAmB,GAAE,OAAc,GAClC,MAAM,CAwCR"}
|