@jtml/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +370 -0
- package/dist/chunk-SHDXMADE.mjs +675 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +684 -0
- package/dist/cli.mjs +168 -0
- package/dist/index.d.mts +262 -0
- package/dist/index.d.ts +262 -0
- package/dist/index.js +726 -0
- package/dist/index.mjs +58 -0
- package/package.json +62 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
JTMLDecoder: () => JTMLDecoder,
|
|
24
|
+
JTMLEncoder: () => JTMLEncoder,
|
|
25
|
+
JTMLError: () => JTMLError,
|
|
26
|
+
SchemaManager: () => SchemaManager,
|
|
27
|
+
TYPE_MAP: () => TYPE_MAP,
|
|
28
|
+
analyzeTokens: () => analyzeTokens,
|
|
29
|
+
calculateEfficiency: () => calculateEfficiency,
|
|
30
|
+
compareTokens: () => compareTokens,
|
|
31
|
+
convertJsonString: () => convertJsonString,
|
|
32
|
+
decode: () => decode,
|
|
33
|
+
decoder: () => decoder,
|
|
34
|
+
default: () => index_default,
|
|
35
|
+
encode: () => encode,
|
|
36
|
+
encodeBatch: () => encodeBatch,
|
|
37
|
+
encoder: () => encoder,
|
|
38
|
+
estimateCostSavings: () => estimateCostSavings,
|
|
39
|
+
estimateTokens: () => estimateTokens,
|
|
40
|
+
formatTokenStats: () => formatTokenStats,
|
|
41
|
+
inferSchema: () => inferSchema,
|
|
42
|
+
inferType: () => inferType,
|
|
43
|
+
jsonToJtml: () => jsonToJtml,
|
|
44
|
+
jtmlToJson: () => jtmlToJson,
|
|
45
|
+
parseSchema: () => parseSchema,
|
|
46
|
+
roundTrip: () => roundTrip,
|
|
47
|
+
schemaManager: () => schemaManager,
|
|
48
|
+
serializeSchema: () => serializeSchema,
|
|
49
|
+
validateAgainstSchema: () => validateAgainstSchema
|
|
50
|
+
});
|
|
51
|
+
module.exports = __toCommonJS(index_exports);
|
|
52
|
+
|
|
53
|
+
// src/core/types.ts
|
|
54
|
+
var TYPE_MAP = {
|
|
55
|
+
i: "integer",
|
|
56
|
+
f: "float",
|
|
57
|
+
s: "string",
|
|
58
|
+
b: "boolean",
|
|
59
|
+
t: "timestamp",
|
|
60
|
+
n: "null",
|
|
61
|
+
o: "object",
|
|
62
|
+
a: "array",
|
|
63
|
+
e: "enum",
|
|
64
|
+
ref: "reference"
|
|
65
|
+
};
|
|
66
|
+
var JTMLError = class extends Error {
|
|
67
|
+
constructor(message, code) {
|
|
68
|
+
super(message);
|
|
69
|
+
this.code = code;
|
|
70
|
+
this.name = "JTMLError";
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
// src/core/schema.ts
|
|
75
|
+
var SchemaManager = class {
|
|
76
|
+
constructor() {
|
|
77
|
+
this.schemas = /* @__PURE__ */ new Map();
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Register a schema for reuse
|
|
81
|
+
*/
|
|
82
|
+
register(schema) {
|
|
83
|
+
this.schemas.set(schema.id, schema);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Get a registered schema
|
|
87
|
+
*/
|
|
88
|
+
get(id) {
|
|
89
|
+
return this.schemas.get(id);
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Check if schema exists
|
|
93
|
+
*/
|
|
94
|
+
has(id) {
|
|
95
|
+
return this.schemas.has(id);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Clear all schemas
|
|
99
|
+
*/
|
|
100
|
+
clear() {
|
|
101
|
+
this.schemas.clear();
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Export all schemas
|
|
105
|
+
*/
|
|
106
|
+
export() {
|
|
107
|
+
return Array.from(this.schemas.values());
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Import schemas
|
|
111
|
+
*/
|
|
112
|
+
import(schemas) {
|
|
113
|
+
schemas.forEach((schema) => this.register(schema));
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
function inferType(value) {
|
|
117
|
+
if (value === null || value === void 0) {
|
|
118
|
+
return { type: "n" };
|
|
119
|
+
}
|
|
120
|
+
if (typeof value === "boolean") {
|
|
121
|
+
return { type: "b" };
|
|
122
|
+
}
|
|
123
|
+
if (typeof value === "number") {
|
|
124
|
+
return Number.isInteger(value) ? { type: "i" } : { type: "f" };
|
|
125
|
+
}
|
|
126
|
+
if (typeof value === "string") {
|
|
127
|
+
if (/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(value)) {
|
|
128
|
+
return { type: "t" };
|
|
129
|
+
}
|
|
130
|
+
return { type: "s" };
|
|
131
|
+
}
|
|
132
|
+
if (Array.isArray(value)) {
|
|
133
|
+
if (value.length === 0) {
|
|
134
|
+
return { type: "a" };
|
|
135
|
+
}
|
|
136
|
+
const firstItemType = inferType(value[0]);
|
|
137
|
+
return { type: "a", arrayOf: firstItemType.type };
|
|
138
|
+
}
|
|
139
|
+
if (typeof value === "object") {
|
|
140
|
+
return { type: "o" };
|
|
141
|
+
}
|
|
142
|
+
throw new JTMLError(`Cannot infer type for value: ${value}`, "TYPE_INFERENCE_ERROR");
|
|
143
|
+
}
|
|
144
|
+
function inferSchema(data, schemaId) {
|
|
145
|
+
if (!Array.isArray(data) && typeof data !== "object") {
|
|
146
|
+
throw new JTMLError("Schema inference requires array or object data", "INVALID_DATA");
|
|
147
|
+
}
|
|
148
|
+
const fields = [];
|
|
149
|
+
const sample = Array.isArray(data) ? data[0] : data;
|
|
150
|
+
if (!sample || typeof sample !== "object") {
|
|
151
|
+
throw new JTMLError("Cannot infer schema from empty or non-object data", "INVALID_DATA");
|
|
152
|
+
}
|
|
153
|
+
for (const [key, value] of Object.entries(sample)) {
|
|
154
|
+
const typeInfo = inferType(value);
|
|
155
|
+
let optional = false;
|
|
156
|
+
if (Array.isArray(data)) {
|
|
157
|
+
optional = data.some(
|
|
158
|
+
(item) => item[key] === null || item[key] === void 0
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
fields.push({
|
|
162
|
+
name: key,
|
|
163
|
+
typeInfo: { ...typeInfo, optional }
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
id: schemaId,
|
|
168
|
+
fields,
|
|
169
|
+
version: "1.0"
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
function serializeSchema(schema) {
|
|
173
|
+
const fieldDefs = schema.fields.map((field) => {
|
|
174
|
+
let def = `${field.name}:${field.typeInfo.type}`;
|
|
175
|
+
if (field.typeInfo.arrayOf) {
|
|
176
|
+
def += `[]`;
|
|
177
|
+
}
|
|
178
|
+
if (field.typeInfo.enumValues) {
|
|
179
|
+
def += `[${field.typeInfo.enumValues.join(",")}]`;
|
|
180
|
+
}
|
|
181
|
+
if (field.typeInfo.refSchema) {
|
|
182
|
+
def += `[${field.typeInfo.refSchema}]`;
|
|
183
|
+
}
|
|
184
|
+
if (field.typeInfo.optional) {
|
|
185
|
+
def += "?";
|
|
186
|
+
}
|
|
187
|
+
return def;
|
|
188
|
+
}).join(" ");
|
|
189
|
+
return `@schema ${schema.id}
|
|
190
|
+
${fieldDefs}`;
|
|
191
|
+
}
|
|
192
|
+
function parseSchema(schemaStr) {
|
|
193
|
+
const lines = schemaStr.trim().split("\n");
|
|
194
|
+
const headerMatch = lines[0].match(/^@schema\s+(\S+)/);
|
|
195
|
+
if (!headerMatch) {
|
|
196
|
+
throw new JTMLError("Invalid schema format", "SCHEMA_PARSE_ERROR");
|
|
197
|
+
}
|
|
198
|
+
const schemaId = headerMatch[1];
|
|
199
|
+
if (lines.length < 2 || !lines[1]) {
|
|
200
|
+
throw new JTMLError("Schema is missing field definitions", "SCHEMA_PARSE_ERROR");
|
|
201
|
+
}
|
|
202
|
+
const fieldLine = lines[1];
|
|
203
|
+
const fields = [];
|
|
204
|
+
const fieldDefs = fieldLine.split(/\s+/);
|
|
205
|
+
for (const fieldDef of fieldDefs) {
|
|
206
|
+
const match = fieldDef.match(/^(\w+):([ifsbtnoae]+)(\[\])?(\[([^\]]+)\])?(\?)?$/);
|
|
207
|
+
if (!match) {
|
|
208
|
+
throw new JTMLError(`Invalid field definition: ${fieldDef}`, "SCHEMA_PARSE_ERROR");
|
|
209
|
+
}
|
|
210
|
+
const [, name, type, isArray, , enumOrRef, isOptional] = match;
|
|
211
|
+
const typeInfo = {
|
|
212
|
+
type,
|
|
213
|
+
optional: !!isOptional
|
|
214
|
+
};
|
|
215
|
+
if (isArray) {
|
|
216
|
+
typeInfo.arrayOf = type;
|
|
217
|
+
}
|
|
218
|
+
if (enumOrRef) {
|
|
219
|
+
if (type === "e") {
|
|
220
|
+
typeInfo.enumValues = enumOrRef.split(",");
|
|
221
|
+
} else if (type === "ref") {
|
|
222
|
+
typeInfo.refSchema = enumOrRef;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
fields.push({ name, typeInfo });
|
|
226
|
+
}
|
|
227
|
+
return {
|
|
228
|
+
id: schemaId,
|
|
229
|
+
fields,
|
|
230
|
+
version: "1.0"
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
function validateAgainstSchema(data, schema) {
|
|
234
|
+
if (Array.isArray(data)) {
|
|
235
|
+
return data.every((item) => validateItem(item, schema));
|
|
236
|
+
}
|
|
237
|
+
return validateItem(data, schema);
|
|
238
|
+
}
|
|
239
|
+
function validateItem(item, schema) {
|
|
240
|
+
if (typeof item !== "object" || item === null) {
|
|
241
|
+
return false;
|
|
242
|
+
}
|
|
243
|
+
for (const field of schema.fields) {
|
|
244
|
+
const value = item[field.name];
|
|
245
|
+
if (value === null || value === void 0) {
|
|
246
|
+
if (!field.typeInfo.optional) {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
const actualType = inferType(value);
|
|
252
|
+
if (actualType.type !== field.typeInfo.type) {
|
|
253
|
+
return false;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return true;
|
|
257
|
+
}
|
|
258
|
+
var schemaManager = new SchemaManager();
|
|
259
|
+
|
|
260
|
+
// src/core/encoder.ts
|
|
261
|
+
var JTMLEncoder = class {
|
|
262
|
+
/**
|
|
263
|
+
* Encode JSON data to JTML format
|
|
264
|
+
*/
|
|
265
|
+
encode(data, options = {}) {
|
|
266
|
+
const {
|
|
267
|
+
schemaId = "default",
|
|
268
|
+
schemaRef,
|
|
269
|
+
autoInferTypes = true,
|
|
270
|
+
includeSchema = true
|
|
271
|
+
} = options;
|
|
272
|
+
let schema;
|
|
273
|
+
if (schemaRef) {
|
|
274
|
+
schema = schemaManager.get(schemaRef);
|
|
275
|
+
if (!schema) {
|
|
276
|
+
throw new JTMLError(`Schema not found: ${schemaRef}`, "SCHEMA_NOT_FOUND");
|
|
277
|
+
}
|
|
278
|
+
return this.encodeWithSchema(data, schema, false);
|
|
279
|
+
}
|
|
280
|
+
if (autoInferTypes) {
|
|
281
|
+
schema = inferSchema(data, schemaId);
|
|
282
|
+
schemaManager.register(schema);
|
|
283
|
+
}
|
|
284
|
+
if (schema) {
|
|
285
|
+
return this.encodeWithSchema(data, schema, includeSchema);
|
|
286
|
+
}
|
|
287
|
+
return this.encodeSimple(data);
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Encode with explicit schema
|
|
291
|
+
*/
|
|
292
|
+
encodeWithSchema(data, schema, includeSchema) {
|
|
293
|
+
const parts = [];
|
|
294
|
+
if (includeSchema) {
|
|
295
|
+
parts.push(serializeSchema(schema));
|
|
296
|
+
parts.push("");
|
|
297
|
+
parts.push("@data");
|
|
298
|
+
} else {
|
|
299
|
+
parts.push(`@ref ${schema.id}`);
|
|
300
|
+
parts.push("@data");
|
|
301
|
+
}
|
|
302
|
+
if (Array.isArray(data)) {
|
|
303
|
+
parts.push("@array");
|
|
304
|
+
for (const item of data) {
|
|
305
|
+
parts.push(this.encodeRow(item, schema));
|
|
306
|
+
}
|
|
307
|
+
} else if (typeof data === "object" && data !== null) {
|
|
308
|
+
parts.push(this.encodeRow(data, schema));
|
|
309
|
+
}
|
|
310
|
+
return parts.join("\n");
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Encode a single row according to schema
|
|
314
|
+
*/
|
|
315
|
+
encodeRow(item, schema) {
|
|
316
|
+
if (typeof item !== "object" || item === null) {
|
|
317
|
+
throw new JTMLError("Cannot encode non-object item", "INVALID_DATA");
|
|
318
|
+
}
|
|
319
|
+
const values = [];
|
|
320
|
+
const obj = item;
|
|
321
|
+
for (const field of schema.fields) {
|
|
322
|
+
const value = obj[field.name];
|
|
323
|
+
values.push(this.encodeValue(value));
|
|
324
|
+
}
|
|
325
|
+
return values.join("|");
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Encode a single value
|
|
329
|
+
*/
|
|
330
|
+
encodeValue(value) {
|
|
331
|
+
if (value === null || value === void 0) {
|
|
332
|
+
return "";
|
|
333
|
+
}
|
|
334
|
+
if (typeof value === "boolean") {
|
|
335
|
+
return value ? "1" : "0";
|
|
336
|
+
}
|
|
337
|
+
if (typeof value === "number") {
|
|
338
|
+
return String(value);
|
|
339
|
+
}
|
|
340
|
+
if (typeof value === "string") {
|
|
341
|
+
return value.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\n/g, "\\n");
|
|
342
|
+
}
|
|
343
|
+
if (Array.isArray(value)) {
|
|
344
|
+
return `[${value.map((v) => this.encodeValue(v)).join(",")}]`;
|
|
345
|
+
}
|
|
346
|
+
if (typeof value === "object") {
|
|
347
|
+
return `{${Object.entries(value).map(([k, v]) => `${k}:${this.encodeValue(v)}`).join(",")}}`;
|
|
348
|
+
}
|
|
349
|
+
return String(value);
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Simple encoding without schema (fallback)
|
|
353
|
+
*/
|
|
354
|
+
encodeSimple(data) {
|
|
355
|
+
if (Array.isArray(data)) {
|
|
356
|
+
return data.map((item) => JSON.stringify(item)).join("\n");
|
|
357
|
+
}
|
|
358
|
+
return JSON.stringify(data);
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Encode with metadata
|
|
362
|
+
*/
|
|
363
|
+
encodeWithMetadata(data, metadata, options = {}) {
|
|
364
|
+
const dataEncoded = this.encode(data, options);
|
|
365
|
+
const metaParts = ["", "@meta"];
|
|
366
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
367
|
+
metaParts.push(`${key}:${this.encodeValue(value)}`);
|
|
368
|
+
}
|
|
369
|
+
return dataEncoded + "\n" + metaParts.join("\n");
|
|
370
|
+
}
|
|
371
|
+
};
|
|
372
|
+
var encoder = new JTMLEncoder();
|
|
373
|
+
function encode(data, options) {
|
|
374
|
+
return encoder.encode(data, options);
|
|
375
|
+
}
|
|
376
|
+
function encodeBatch(datasets, schemaId = "batch") {
|
|
377
|
+
if (datasets.length === 0) {
|
|
378
|
+
throw new JTMLError("Cannot encode empty batch", "INVALID_DATA");
|
|
379
|
+
}
|
|
380
|
+
const schema = inferSchema(datasets[0], schemaId);
|
|
381
|
+
schemaManager.register(schema);
|
|
382
|
+
const parts = [];
|
|
383
|
+
parts.push(serializeSchema(schema));
|
|
384
|
+
parts.push("");
|
|
385
|
+
datasets.forEach((data, index) => {
|
|
386
|
+
parts.push(`@batch ${index}`);
|
|
387
|
+
const encoded = encoder.encode(data, {
|
|
388
|
+
schemaRef: schemaId,
|
|
389
|
+
includeSchema: false
|
|
390
|
+
});
|
|
391
|
+
parts.push(encoded);
|
|
392
|
+
parts.push("");
|
|
393
|
+
});
|
|
394
|
+
return parts.join("\n");
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// src/core/decoder.ts
|
|
398
|
+
var JTMLDecoder = class {
|
|
399
|
+
/**
|
|
400
|
+
* Decode JTML format to JSON
|
|
401
|
+
*/
|
|
402
|
+
decode(jtml, options = {}) {
|
|
403
|
+
const { schemaCache, strict = true } = options;
|
|
404
|
+
if (schemaCache) {
|
|
405
|
+
schemaCache.forEach((schema2, _id) => {
|
|
406
|
+
schemaManager.register(schema2);
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
const lines = jtml.trim().split("\n");
|
|
410
|
+
let schema;
|
|
411
|
+
let dataStartIndex = 0;
|
|
412
|
+
let metadata = {};
|
|
413
|
+
for (let i = 0; i < lines.length; i++) {
|
|
414
|
+
const line = lines[i].trim();
|
|
415
|
+
if (line.startsWith("@schema")) {
|
|
416
|
+
if (i + 1 >= lines.length) {
|
|
417
|
+
throw new JTMLError("Incomplete schema definition: missing field definitions", "SCHEMA_PARSE_ERROR");
|
|
418
|
+
}
|
|
419
|
+
const schemaLines = [line, lines[i + 1]];
|
|
420
|
+
schema = parseSchema(schemaLines.join("\n"));
|
|
421
|
+
schemaManager.register(schema);
|
|
422
|
+
i++;
|
|
423
|
+
} else if (line.startsWith("@ref")) {
|
|
424
|
+
const schemaId = line.split(/\s+/)[1];
|
|
425
|
+
if (!schemaId) {
|
|
426
|
+
throw new JTMLError("Missing schema ID in @ref directive", "SCHEMA_PARSE_ERROR");
|
|
427
|
+
}
|
|
428
|
+
schema = schemaManager.get(schemaId);
|
|
429
|
+
if (!schema) {
|
|
430
|
+
throw new JTMLError(`Schema not found: ${schemaId}`, "SCHEMA_NOT_FOUND");
|
|
431
|
+
}
|
|
432
|
+
} else if (line === "@data") {
|
|
433
|
+
dataStartIndex = i + 1;
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
if (!schema && strict) {
|
|
438
|
+
throw new JTMLError("No schema found in JTML data", "SCHEMA_REQUIRED");
|
|
439
|
+
}
|
|
440
|
+
let metaStartIndex = -1;
|
|
441
|
+
for (let i = dataStartIndex; i < lines.length; i++) {
|
|
442
|
+
if (lines[i].trim() === "@meta") {
|
|
443
|
+
metaStartIndex = i + 1;
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
const dataEndIndex = metaStartIndex > 0 ? metaStartIndex - 1 : lines.length;
|
|
448
|
+
const rawDataLines = lines.slice(dataStartIndex, dataEndIndex).filter((l) => l.trim());
|
|
449
|
+
const isArrayEncoding = rawDataLines[0]?.trim() === "@array";
|
|
450
|
+
const dataLines = isArrayEncoding ? rawDataLines.slice(1) : rawDataLines;
|
|
451
|
+
const results = [];
|
|
452
|
+
if (schema) {
|
|
453
|
+
for (const line of dataLines) {
|
|
454
|
+
if (line.trim()) {
|
|
455
|
+
results.push(this.decodeRow(line, schema));
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
} else {
|
|
459
|
+
for (const line of dataLines) {
|
|
460
|
+
if (line.trim()) {
|
|
461
|
+
results.push(JSON.parse(line));
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
if (metaStartIndex > 0) {
|
|
466
|
+
for (let i = metaStartIndex; i < lines.length; i++) {
|
|
467
|
+
const line = lines[i].trim();
|
|
468
|
+
if (line && !line.startsWith("@")) {
|
|
469
|
+
const colonIdx = line.indexOf(":");
|
|
470
|
+
if (colonIdx === -1) continue;
|
|
471
|
+
const key = line.slice(0, colonIdx);
|
|
472
|
+
const value = line.slice(colonIdx + 1);
|
|
473
|
+
if (key === "__proto__" || key === "constructor" || key === "prototype") continue;
|
|
474
|
+
metadata[key] = this.decodeValue(value);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
if (!isArrayEncoding && results.length === 1 && Object.keys(metadata).length === 0) {
|
|
479
|
+
return results[0];
|
|
480
|
+
}
|
|
481
|
+
if (Object.keys(metadata).length > 0) {
|
|
482
|
+
return {
|
|
483
|
+
data: results.length === 1 ? results[0] : results,
|
|
484
|
+
metadata
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
return results;
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Decode a single row according to schema
|
|
491
|
+
*/
|
|
492
|
+
decodeRow(line, schema) {
|
|
493
|
+
const values = this.splitRow(line);
|
|
494
|
+
const result = {};
|
|
495
|
+
if (values.length !== schema.fields.length) {
|
|
496
|
+
throw new JTMLError(
|
|
497
|
+
`Row has ${values.length} values but schema expects ${schema.fields.length}`,
|
|
498
|
+
"SCHEMA_MISMATCH"
|
|
499
|
+
);
|
|
500
|
+
}
|
|
501
|
+
for (let i = 0; i < schema.fields.length; i++) {
|
|
502
|
+
const field = schema.fields[i];
|
|
503
|
+
const rawValue = values[i];
|
|
504
|
+
if (rawValue === "" || rawValue === null) {
|
|
505
|
+
result[field.name] = null;
|
|
506
|
+
} else {
|
|
507
|
+
result[field.name] = this.decodeValue(rawValue, field.typeInfo.type);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
return result;
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Split row by pipe delimiter, handling escaped pipes
|
|
514
|
+
*/
|
|
515
|
+
splitRow(line) {
|
|
516
|
+
const parts = [];
|
|
517
|
+
let current = "";
|
|
518
|
+
let escaped = false;
|
|
519
|
+
for (let i = 0; i < line.length; i++) {
|
|
520
|
+
const char = line[i];
|
|
521
|
+
if (escaped) {
|
|
522
|
+
if (char === "n") {
|
|
523
|
+
current += "\n";
|
|
524
|
+
} else if (char === "\\") {
|
|
525
|
+
current += "\\";
|
|
526
|
+
} else if (char === "|") {
|
|
527
|
+
current += "|";
|
|
528
|
+
} else {
|
|
529
|
+
current += char;
|
|
530
|
+
}
|
|
531
|
+
escaped = false;
|
|
532
|
+
} else if (char === "\\") {
|
|
533
|
+
escaped = true;
|
|
534
|
+
} else if (char === "|") {
|
|
535
|
+
parts.push(current);
|
|
536
|
+
current = "";
|
|
537
|
+
} else {
|
|
538
|
+
current += char;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
parts.push(current);
|
|
542
|
+
return parts;
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Decode a single value
|
|
546
|
+
*/
|
|
547
|
+
decodeValue(value, type) {
|
|
548
|
+
if (value === void 0 || value === "" || value === null) {
|
|
549
|
+
return null;
|
|
550
|
+
}
|
|
551
|
+
if (type === "b") {
|
|
552
|
+
return value === "1" || value === "true";
|
|
553
|
+
}
|
|
554
|
+
if (type === "i" || type === "f") {
|
|
555
|
+
const num = type === "i" ? parseInt(value, 10) : parseFloat(value);
|
|
556
|
+
if (!isFinite(num)) {
|
|
557
|
+
throw new JTMLError(`Invalid numeric value: ${value}`, "INVALID_VALUE");
|
|
558
|
+
}
|
|
559
|
+
return num;
|
|
560
|
+
}
|
|
561
|
+
if (!type && /^-?\d+(\.\d+)?$/.test(value)) {
|
|
562
|
+
return value.includes(".") ? parseFloat(value) : parseInt(value, 10);
|
|
563
|
+
}
|
|
564
|
+
if (value.startsWith("[") && value.endsWith("]")) {
|
|
565
|
+
const inner = value.slice(1, -1);
|
|
566
|
+
if (!inner) return [];
|
|
567
|
+
return inner.split(",").map((v) => this.decodeValue(v.trim(), type));
|
|
568
|
+
}
|
|
569
|
+
if (value.startsWith("{") && value.endsWith("}")) {
|
|
570
|
+
const inner = value.slice(1, -1);
|
|
571
|
+
const obj = /* @__PURE__ */ Object.create(null);
|
|
572
|
+
if (!inner) return obj;
|
|
573
|
+
const pairs = inner.split(",");
|
|
574
|
+
for (const pair of pairs) {
|
|
575
|
+
const colonIdx = pair.indexOf(":");
|
|
576
|
+
if (colonIdx === -1) continue;
|
|
577
|
+
const k = pair.slice(0, colonIdx).trim();
|
|
578
|
+
const v = pair.slice(colonIdx + 1).trim();
|
|
579
|
+
if (k === "__proto__" || k === "constructor" || k === "prototype") continue;
|
|
580
|
+
obj[k] = this.decodeValue(v);
|
|
581
|
+
}
|
|
582
|
+
return obj;
|
|
583
|
+
}
|
|
584
|
+
return value;
|
|
585
|
+
}
|
|
586
|
+
};
|
|
587
|
+
var decoder = new JTMLDecoder();
|
|
588
|
+
function decode(jtml, options) {
|
|
589
|
+
return decoder.decode(jtml, options);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// src/utils/tokenizer.ts
|
|
593
|
+
function estimateTokens(text, tokenizer = "claude") {
|
|
594
|
+
const normalized = text.trim().replace(/\s+/g, " ");
|
|
595
|
+
const charsPerToken = {
|
|
596
|
+
gpt: 4,
|
|
597
|
+
claude: 3.8,
|
|
598
|
+
llama: 4.2
|
|
599
|
+
};
|
|
600
|
+
const ratio = charsPerToken[tokenizer];
|
|
601
|
+
let estimate = normalized.length / ratio;
|
|
602
|
+
const structuralChars = (normalized.match(/[{}[\]":,]/g) || []).length;
|
|
603
|
+
estimate += structuralChars * 0.3;
|
|
604
|
+
const numbers = (normalized.match(/\d+/g) || []).length;
|
|
605
|
+
estimate += numbers * 0.2;
|
|
606
|
+
return Math.ceil(estimate);
|
|
607
|
+
}
|
|
608
|
+
function compareTokens(jsonText, jtmlText, tokenizer = "claude") {
|
|
609
|
+
const jsonTokens = estimateTokens(jsonText, tokenizer);
|
|
610
|
+
const jtmlTokens = estimateTokens(jtmlText, tokenizer);
|
|
611
|
+
const savings = jsonTokens - jtmlTokens;
|
|
612
|
+
const savingsPercent = jsonTokens > 0 ? savings / jsonTokens * 100 : 0;
|
|
613
|
+
return {
|
|
614
|
+
jsonTokens,
|
|
615
|
+
jtmlTokens,
|
|
616
|
+
savings,
|
|
617
|
+
savingsPercent
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
function calculateEfficiency(jsonText, jtmlText) {
|
|
621
|
+
const jsonLen = jsonText.length;
|
|
622
|
+
const jtmlLen = jtmlText.length;
|
|
623
|
+
return (jsonLen - jtmlLen) / jsonLen * 100;
|
|
624
|
+
}
|
|
625
|
+
function analyzeTokens(jsonText, jtmlText, tokenizer = "claude") {
|
|
626
|
+
const jsonStructural = (jsonText.match(/[{}[\]":,]/g) || []).length;
|
|
627
|
+
const jsonKeys = (jsonText.match(/"(\w+)":/g) || []).length;
|
|
628
|
+
const jtmlSchema = jtmlText.split("@data")[0] || "";
|
|
629
|
+
const jtmlData = jtmlText.split("@data")[1] || "";
|
|
630
|
+
const jtmlDelimiters = (jtmlText.match(/[|:]/g) || []).length;
|
|
631
|
+
return {
|
|
632
|
+
json: {
|
|
633
|
+
total: estimateTokens(jsonText, tokenizer),
|
|
634
|
+
structural: Math.ceil(jsonStructural * 0.8),
|
|
635
|
+
keys: Math.ceil(jsonKeys * 1.5),
|
|
636
|
+
// Keys with quotes
|
|
637
|
+
values: estimateTokens(jsonText, tokenizer) - Math.ceil(jsonStructural * 0.8) - Math.ceil(jsonKeys * 1.5)
|
|
638
|
+
},
|
|
639
|
+
jtml: {
|
|
640
|
+
total: estimateTokens(jtmlText, tokenizer),
|
|
641
|
+
schema: estimateTokens(jtmlSchema, tokenizer),
|
|
642
|
+
data: estimateTokens(jtmlData, tokenizer),
|
|
643
|
+
delimiters: Math.ceil(jtmlDelimiters * 0.5)
|
|
644
|
+
},
|
|
645
|
+
comparison: compareTokens(jsonText, jtmlText, tokenizer)
|
|
646
|
+
};
|
|
647
|
+
}
|
|
648
|
+
function formatTokenStats(stats) {
|
|
649
|
+
return `
|
|
650
|
+
Token Comparison:
|
|
651
|
+
JSON: ${stats.jsonTokens} tokens
|
|
652
|
+
JTML: ${stats.jtmlTokens} tokens
|
|
653
|
+
Savings: ${stats.savings} tokens (${stats.savingsPercent.toFixed(2)}%)
|
|
654
|
+
`.trim();
|
|
655
|
+
}
|
|
656
|
+
function estimateCostSavings(stats, pricePerMillion = 3) {
|
|
657
|
+
const costSavedPer1M = stats.savings / 1e6 * pricePerMillion;
|
|
658
|
+
const costSavedPer1K = stats.savings / 1e3 * (pricePerMillion / 1e3);
|
|
659
|
+
return {
|
|
660
|
+
tokensSaved: stats.savings,
|
|
661
|
+
costSavedPer1M,
|
|
662
|
+
costSavedPer1K
|
|
663
|
+
};
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// src/index.ts
|
|
667
|
+
function jsonToJtml(json, schemaId) {
|
|
668
|
+
return encode(json, { schemaId, autoInferTypes: true });
|
|
669
|
+
}
|
|
670
|
+
function jtmlToJson(jtml) {
|
|
671
|
+
return decode(jtml);
|
|
672
|
+
}
|
|
673
|
+
function convertJsonString(jsonStr, schemaId) {
|
|
674
|
+
const data = JSON.parse(jsonStr);
|
|
675
|
+
return jsonToJtml(data, schemaId);
|
|
676
|
+
}
|
|
677
|
+
function roundTrip(data) {
|
|
678
|
+
const jtml = encode(data);
|
|
679
|
+
const recovered = decode(jtml);
|
|
680
|
+
return {
|
|
681
|
+
success: JSON.stringify(data) === JSON.stringify(recovered),
|
|
682
|
+
original: data,
|
|
683
|
+
recovered,
|
|
684
|
+
jtml
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
var index_default = {
|
|
688
|
+
encode,
|
|
689
|
+
decode,
|
|
690
|
+
jsonToJtml,
|
|
691
|
+
jtmlToJson,
|
|
692
|
+
convertJsonString,
|
|
693
|
+
roundTrip,
|
|
694
|
+
schemaManager,
|
|
695
|
+
estimateTokens,
|
|
696
|
+
compareTokens
|
|
697
|
+
};
|
|
698
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
699
|
+
0 && (module.exports = {
|
|
700
|
+
JTMLDecoder,
|
|
701
|
+
JTMLEncoder,
|
|
702
|
+
JTMLError,
|
|
703
|
+
SchemaManager,
|
|
704
|
+
TYPE_MAP,
|
|
705
|
+
analyzeTokens,
|
|
706
|
+
calculateEfficiency,
|
|
707
|
+
compareTokens,
|
|
708
|
+
convertJsonString,
|
|
709
|
+
decode,
|
|
710
|
+
decoder,
|
|
711
|
+
encode,
|
|
712
|
+
encodeBatch,
|
|
713
|
+
encoder,
|
|
714
|
+
estimateCostSavings,
|
|
715
|
+
estimateTokens,
|
|
716
|
+
formatTokenStats,
|
|
717
|
+
inferSchema,
|
|
718
|
+
inferType,
|
|
719
|
+
jsonToJtml,
|
|
720
|
+
jtmlToJson,
|
|
721
|
+
parseSchema,
|
|
722
|
+
roundTrip,
|
|
723
|
+
schemaManager,
|
|
724
|
+
serializeSchema,
|
|
725
|
+
validateAgainstSchema
|
|
726
|
+
});
|