@creationix/jot 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -23
- package/dist/jot.cjs +446 -0
- package/dist/jot.d.ts +6 -0
- package/dist/jot.js +442 -0
- package/package.json +34 -4
- package/SUMMARY.md +0 -151
- package/TOKEN_COUNTS.md +0 -97
- package/bun.lock +0 -19
- package/jot.test.ts +0 -133
- package/jot.ts +0 -650
- package/samples/chat.jot +0 -1
- package/samples/chat.json +0 -1
- package/samples/chat.pretty.jot +0 -6
- package/samples/chat.pretty.json +0 -16
- package/samples/firewall.jot +0 -1
- package/samples/firewall.json +0 -1
- package/samples/firewall.pretty.jot +0 -235
- package/samples/firewall.pretty.json +0 -344
- package/samples/github-issue.jot +0 -1
- package/samples/github-issue.json +0 -1
- package/samples/github-issue.pretty.jot +0 -15
- package/samples/github-issue.pretty.json +0 -20
- package/samples/hikes.jot +0 -1
- package/samples/hikes.json +0 -1
- package/samples/hikes.pretty.jot +0 -14
- package/samples/hikes.pretty.json +0 -38
- package/samples/irregular.jot +0 -1
- package/samples/irregular.json +0 -1
- package/samples/irregular.pretty.jot +0 -13
- package/samples/irregular.pretty.json +0 -23
- package/samples/json-counts-cache.jot +0 -1
- package/samples/json-counts-cache.json +0 -1
- package/samples/json-counts-cache.pretty.jot +0 -26
- package/samples/json-counts-cache.pretty.json +0 -26
- package/samples/key-folding-basic.jot +0 -1
- package/samples/key-folding-basic.json +0 -1
- package/samples/key-folding-basic.pretty.jot +0 -7
- package/samples/key-folding-basic.pretty.json +0 -25
- package/samples/key-folding-mixed.jot +0 -1
- package/samples/key-folding-mixed.json +0 -1
- package/samples/key-folding-mixed.pretty.jot +0 -16
- package/samples/key-folding-mixed.pretty.json +0 -24
- package/samples/key-folding-with-array.jot +0 -1
- package/samples/key-folding-with-array.json +0 -1
- package/samples/key-folding-with-array.pretty.jot +0 -6
- package/samples/key-folding-with-array.pretty.json +0 -29
- package/samples/large.jot +0 -1
- package/samples/large.json +0 -1
- package/samples/large.pretty.jot +0 -72
- package/samples/large.pretty.json +0 -93
- package/samples/logs.jot +0 -1
- package/samples/logs.json +0 -1
- package/samples/logs.pretty.jot +0 -96
- package/samples/logs.pretty.json +0 -350
- package/samples/medium.jot +0 -1
- package/samples/medium.json +0 -1
- package/samples/medium.pretty.jot +0 -13
- package/samples/medium.pretty.json +0 -30
- package/samples/metrics.jot +0 -1
- package/samples/metrics.json +0 -1
- package/samples/metrics.pretty.jot +0 -11
- package/samples/metrics.pretty.json +0 -25
- package/samples/package.jot +0 -1
- package/samples/package.json +0 -1
- package/samples/package.pretty.jot +0 -18
- package/samples/package.pretty.json +0 -18
- package/samples/products.jot +0 -1
- package/samples/products.json +0 -1
- package/samples/products.pretty.jot +0 -69
- package/samples/products.pretty.json +0 -235
- package/samples/routes.jot +0 -1
- package/samples/routes.json +0 -1
- package/samples/routes.pretty.jot +0 -142
- package/samples/routes.pretty.json +0 -354
- package/samples/small.jot +0 -1
- package/samples/small.json +0 -1
- package/samples/small.pretty.jot +0 -8
- package/samples/small.pretty.json +0 -12
- package/samples/users-50.jot +0 -1
- package/samples/users-50.json +0 -1
- package/samples/users-50.pretty.jot +0 -53
- package/samples/users-50.pretty.json +0 -354
package/dist/jot.js
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
const RESERVED = new Set(["true", "false", "null"])
|
|
2
|
+
const UNSAFE = [":", ",", "{", "}", "[", "]", '"', ";", "\\"]
|
|
3
|
+
const WS_RE = /\s/
|
|
4
|
+
const KEY_TERM_RE = /[:\,{}\[\];]|\s/
|
|
5
|
+
function needsQuotes(s, extra = []) {
|
|
6
|
+
const chars = [...UNSAFE, ...extra]
|
|
7
|
+
return (
|
|
8
|
+
s === "" ||
|
|
9
|
+
s.trim() !== s ||
|
|
10
|
+
RESERVED.has(s) ||
|
|
11
|
+
!Number.isNaN(Number(s)) ||
|
|
12
|
+
chars.some((c) => s.includes(c)) ||
|
|
13
|
+
[...s].some((c) => c.charCodeAt(0) < 32)
|
|
14
|
+
)
|
|
15
|
+
}
|
|
16
|
+
const quote = (s) => (needsQuotes(s) ? JSON.stringify(s) : s)
|
|
17
|
+
const quoteKey = (s) => (needsQuotes(s, ["."]) ? JSON.stringify(s) : s)
|
|
18
|
+
function getFoldPath(value) {
|
|
19
|
+
const path = []
|
|
20
|
+
let current = value
|
|
21
|
+
while (current !== null && typeof current === "object" && !Array.isArray(current)) {
|
|
22
|
+
const keys = Object.keys(current)
|
|
23
|
+
if (keys.length !== 1 || keys[0].includes(".")) {
|
|
24
|
+
break
|
|
25
|
+
}
|
|
26
|
+
path.push(keys[0])
|
|
27
|
+
current = current[keys[0]]
|
|
28
|
+
}
|
|
29
|
+
return path.length > 0 ? { path, leaf: current } : null
|
|
30
|
+
}
|
|
31
|
+
function groupBySchema(arr) {
|
|
32
|
+
const groups = []
|
|
33
|
+
for (const obj of arr) {
|
|
34
|
+
const keys = Object.keys(obj)
|
|
35
|
+
const last = groups.at(-1)
|
|
36
|
+
if (last && last.keys.join(",") === keys.join(",")) {
|
|
37
|
+
last.objects.push(obj)
|
|
38
|
+
} else {
|
|
39
|
+
groups.push({ keys, objects: [obj] })
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return groups
|
|
43
|
+
}
|
|
44
|
+
let opts = {}
|
|
45
|
+
let depth = 0
|
|
46
|
+
const ind = () => (opts.pretty ? (opts.indent ?? " ").repeat(depth) : "")
|
|
47
|
+
function stringifyValue(value, atLineStart = false) {
|
|
48
|
+
if (value === null) {
|
|
49
|
+
return "null"
|
|
50
|
+
}
|
|
51
|
+
if (typeof value === "boolean") {
|
|
52
|
+
return String(value)
|
|
53
|
+
}
|
|
54
|
+
if (typeof value === "number") {
|
|
55
|
+
return String(value)
|
|
56
|
+
}
|
|
57
|
+
if (typeof value === "string") {
|
|
58
|
+
return quote(value)
|
|
59
|
+
}
|
|
60
|
+
if (Array.isArray(value)) {
|
|
61
|
+
return stringifyArray(value)
|
|
62
|
+
}
|
|
63
|
+
if (typeof value === "object") {
|
|
64
|
+
return stringifyObject(value, atLineStart)
|
|
65
|
+
}
|
|
66
|
+
return String(value)
|
|
67
|
+
}
|
|
68
|
+
function stringifyArray(arr) {
|
|
69
|
+
const isTable = arr.length >= 2 && arr.every((i) => i !== null && typeof i === "object" && !Array.isArray(i))
|
|
70
|
+
if (isTable) {
|
|
71
|
+
const groups = groupBySchema(arr)
|
|
72
|
+
if (groups.some((g) => g.objects.length >= 2)) {
|
|
73
|
+
return stringifyTable(groups)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (arr.length === 1) {
|
|
77
|
+
return `[${stringifyValue(arr[0])}]`
|
|
78
|
+
}
|
|
79
|
+
const hasComplex = arr.some((i) => i !== null && typeof i === "object")
|
|
80
|
+
if (opts.pretty && arr.length > 0 && hasComplex) {
|
|
81
|
+
depth++
|
|
82
|
+
const items = arr.map((i) => `${ind()}${stringifyValue(i, true)}`)
|
|
83
|
+
depth--
|
|
84
|
+
return `[\n${items.join(",\n")}\n${ind()}]`
|
|
85
|
+
}
|
|
86
|
+
const sep = opts.pretty ? ", " : ","
|
|
87
|
+
const items = arr.map((v) => stringifyValue(v)).join(sep)
|
|
88
|
+
return opts.pretty ? `[ ${items} ]` : `[${items}]`
|
|
89
|
+
}
|
|
90
|
+
function stringifyTable(groups) {
|
|
91
|
+
const sep = opts.pretty ? ", " : ","
|
|
92
|
+
if (opts.pretty) {
|
|
93
|
+
depth++
|
|
94
|
+
const schemaInd = ind()
|
|
95
|
+
depth++
|
|
96
|
+
const dataInd = ind()
|
|
97
|
+
const rows = []
|
|
98
|
+
for (const { keys, objects } of groups) {
|
|
99
|
+
rows.push(`${schemaInd}:${keys.map((k) => quoteKey(k)).join(sep)}`)
|
|
100
|
+
for (const obj of objects) rows.push(`${dataInd}${keys.map((k) => stringifyValue(obj[k])).join(sep)}`)
|
|
101
|
+
}
|
|
102
|
+
depth -= 2
|
|
103
|
+
return `{{\n${rows.join("\n")}\n${ind()}}}`
|
|
104
|
+
}
|
|
105
|
+
const parts = []
|
|
106
|
+
for (const { keys, objects } of groups) {
|
|
107
|
+
parts.push(`:${keys.map((k) => quoteKey(k)).join(sep)}`)
|
|
108
|
+
for (const obj of objects) {
|
|
109
|
+
parts.push(keys.map((k) => stringifyValue(obj[k])).join(sep))
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return `{{${parts.join(";")}}}`
|
|
113
|
+
}
|
|
114
|
+
function stringifyObject(obj, atLineStart = false) {
|
|
115
|
+
const keys = Object.keys(obj)
|
|
116
|
+
const pair = (k, pretty) => {
|
|
117
|
+
const val = obj[k]
|
|
118
|
+
if (!needsQuotes(k, ["."]) && val !== null && typeof val === "object" && !Array.isArray(val)) {
|
|
119
|
+
const fold = getFoldPath(val)
|
|
120
|
+
if (fold) {
|
|
121
|
+
const foldedKey = `${k}.${fold.path.join(".")}`
|
|
122
|
+
return pretty ? `${foldedKey}: ${stringifyValue(fold.leaf)}` : `${foldedKey}:${stringifyValue(fold.leaf)}`
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const qk = quoteKey(k)
|
|
126
|
+
return pretty ? `${qk}: ${stringifyValue(val)}` : `${qk}:${stringifyValue(val)}`
|
|
127
|
+
}
|
|
128
|
+
if (opts.pretty && keys.length > 1) {
|
|
129
|
+
depth++
|
|
130
|
+
const rawPairs = keys.map((k) => pair(k, true))
|
|
131
|
+
const lastMulti = rawPairs.at(-1)?.endsWith("}") || rawPairs.at(-1)?.endsWith("]")
|
|
132
|
+
const compact = atLineStart && !lastMulti
|
|
133
|
+
const pairs = rawPairs.map((p, i) => (i === 0 && compact ? p : `${ind()}${p}`))
|
|
134
|
+
depth--
|
|
135
|
+
return compact ? `{ ${pairs.join(",\n")} }` : `{\n${pairs.join(",\n")}\n${ind()}}`
|
|
136
|
+
}
|
|
137
|
+
if (opts.pretty && keys.length === 1) {
|
|
138
|
+
return `{ ${pair(keys[0], true)} }`
|
|
139
|
+
}
|
|
140
|
+
return `{${keys.map((k) => pair(k, false)).join(",")}}`
|
|
141
|
+
}
|
|
142
|
+
export function stringify(data, options = {}) {
|
|
143
|
+
opts = { pretty: false, indent: " ", ...options }
|
|
144
|
+
depth = 0
|
|
145
|
+
return stringifyValue(data)
|
|
146
|
+
}
|
|
147
|
+
// Parser
|
|
148
|
+
class JotParser {
|
|
149
|
+
input
|
|
150
|
+
pos = 0
|
|
151
|
+
constructor(input) {
|
|
152
|
+
this.input = input
|
|
153
|
+
}
|
|
154
|
+
parse() {
|
|
155
|
+
this.ws()
|
|
156
|
+
const result = this.value("")
|
|
157
|
+
this.ws()
|
|
158
|
+
if (this.pos < this.input.length) {
|
|
159
|
+
throw new Error(`Unexpected character at position ${this.pos}: '${this.input[this.pos]}'`)
|
|
160
|
+
}
|
|
161
|
+
return result
|
|
162
|
+
}
|
|
163
|
+
ws() {
|
|
164
|
+
while (this.pos < this.input.length && WS_RE.test(this.input[this.pos])) this.pos++
|
|
165
|
+
}
|
|
166
|
+
peek = () => this.input[this.pos] || ""
|
|
167
|
+
value(terminators = "") {
|
|
168
|
+
this.ws()
|
|
169
|
+
const ch = this.peek()
|
|
170
|
+
if (ch === "{") {
|
|
171
|
+
return this.input[this.pos + 1] === "{" ? this.table() : this.object()
|
|
172
|
+
}
|
|
173
|
+
if (ch === "[") {
|
|
174
|
+
return this.array()
|
|
175
|
+
}
|
|
176
|
+
if (ch === '"') {
|
|
177
|
+
return this.quoted()
|
|
178
|
+
}
|
|
179
|
+
return this.atom(terminators)
|
|
180
|
+
}
|
|
181
|
+
quoted() {
|
|
182
|
+
this.pos++
|
|
183
|
+
let result = ""
|
|
184
|
+
while (this.pos < this.input.length) {
|
|
185
|
+
const ch = this.input[this.pos]
|
|
186
|
+
if (ch === '"') {
|
|
187
|
+
this.pos++
|
|
188
|
+
return result
|
|
189
|
+
}
|
|
190
|
+
if (ch === "\\") {
|
|
191
|
+
this.pos++
|
|
192
|
+
const esc = this.input[this.pos]
|
|
193
|
+
const escMap = {
|
|
194
|
+
'"': '"',
|
|
195
|
+
"\\": "\\",
|
|
196
|
+
"/": "/",
|
|
197
|
+
b: "\b",
|
|
198
|
+
f: "\f",
|
|
199
|
+
n: "\n",
|
|
200
|
+
r: "\r",
|
|
201
|
+
t: "\t",
|
|
202
|
+
}
|
|
203
|
+
if (esc in escMap) {
|
|
204
|
+
result += escMap[esc]
|
|
205
|
+
} else if (esc === "u") {
|
|
206
|
+
result += String.fromCharCode(Number.parseInt(this.input.slice(this.pos + 1, this.pos + 5), 16))
|
|
207
|
+
this.pos += 4
|
|
208
|
+
} else {
|
|
209
|
+
throw new Error(`Invalid escape sequence '\\${esc}'`)
|
|
210
|
+
}
|
|
211
|
+
} else {
|
|
212
|
+
result += ch
|
|
213
|
+
}
|
|
214
|
+
this.pos++
|
|
215
|
+
}
|
|
216
|
+
throw new Error("Unterminated string")
|
|
217
|
+
}
|
|
218
|
+
parseToken(terminators) {
|
|
219
|
+
const start = this.pos
|
|
220
|
+
if (terminators === "") {
|
|
221
|
+
const token = this.input.slice(start).trim()
|
|
222
|
+
this.pos = this.input.length
|
|
223
|
+
if (token === "") {
|
|
224
|
+
throw new Error(`Unexpected end of input at position ${start}`)
|
|
225
|
+
}
|
|
226
|
+
return token
|
|
227
|
+
}
|
|
228
|
+
while (this.pos < this.input.length && !terminators.includes(this.input[this.pos])) {
|
|
229
|
+
this.pos++
|
|
230
|
+
}
|
|
231
|
+
const token = this.input.slice(start, this.pos).trim()
|
|
232
|
+
if (token === "") {
|
|
233
|
+
throw new Error(`Unexpected character at position ${this.pos}: '${this.peek()}'`)
|
|
234
|
+
}
|
|
235
|
+
return token
|
|
236
|
+
}
|
|
237
|
+
tokenToValue(token) {
|
|
238
|
+
if (token === "null") {
|
|
239
|
+
return null
|
|
240
|
+
}
|
|
241
|
+
if (token === "true") {
|
|
242
|
+
return true
|
|
243
|
+
}
|
|
244
|
+
if (token === "false") {
|
|
245
|
+
return false
|
|
246
|
+
}
|
|
247
|
+
const num = Number(token)
|
|
248
|
+
if (!Number.isNaN(num) && token !== "") {
|
|
249
|
+
return num
|
|
250
|
+
}
|
|
251
|
+
return token
|
|
252
|
+
}
|
|
253
|
+
atom(terminators) {
|
|
254
|
+
return this.tokenToValue(this.parseToken(terminators))
|
|
255
|
+
}
|
|
256
|
+
array() {
|
|
257
|
+
this.pos++
|
|
258
|
+
const result = []
|
|
259
|
+
this.ws()
|
|
260
|
+
while (this.peek() !== "]") {
|
|
261
|
+
if (this.pos >= this.input.length) {
|
|
262
|
+
throw new Error("Unterminated array")
|
|
263
|
+
}
|
|
264
|
+
result.push(this.value(",]"))
|
|
265
|
+
this.ws()
|
|
266
|
+
if (this.peek() === ",") {
|
|
267
|
+
this.pos++
|
|
268
|
+
this.ws()
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
this.pos++
|
|
272
|
+
return result
|
|
273
|
+
}
|
|
274
|
+
table() {
|
|
275
|
+
this.pos += 2
|
|
276
|
+
const result = []
|
|
277
|
+
let schema = []
|
|
278
|
+
this.ws()
|
|
279
|
+
while (this.input.slice(this.pos, this.pos + 2) !== "}}") {
|
|
280
|
+
if (this.pos >= this.input.length) {
|
|
281
|
+
throw new Error("Unterminated table")
|
|
282
|
+
}
|
|
283
|
+
this.ws()
|
|
284
|
+
if (this.peek() === ":") {
|
|
285
|
+
this.pos++
|
|
286
|
+
schema = this.schemaRow()
|
|
287
|
+
} else {
|
|
288
|
+
if (schema.length === 0) {
|
|
289
|
+
throw new Error(`Data row without schema at position ${this.pos}`)
|
|
290
|
+
}
|
|
291
|
+
const values = this.dataRow(schema.length)
|
|
292
|
+
const obj = {}
|
|
293
|
+
for (let i = 0; i < schema.length; i++) {
|
|
294
|
+
obj[schema[i]] = values[i]
|
|
295
|
+
}
|
|
296
|
+
result.push(obj)
|
|
297
|
+
}
|
|
298
|
+
this.ws()
|
|
299
|
+
if (this.peek() === ";") {
|
|
300
|
+
this.pos++
|
|
301
|
+
this.ws()
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
this.pos += 2
|
|
305
|
+
return result
|
|
306
|
+
}
|
|
307
|
+
schemaRow() {
|
|
308
|
+
const cols = []
|
|
309
|
+
let col = ""
|
|
310
|
+
while (this.pos < this.input.length) {
|
|
311
|
+
const ch = this.input[this.pos]
|
|
312
|
+
if ((ch === "}" && this.input[this.pos + 1] === "}") || ch === ";" || ch === "\n") {
|
|
313
|
+
if (col.trim()) {
|
|
314
|
+
cols.push(col.trim())
|
|
315
|
+
}
|
|
316
|
+
break
|
|
317
|
+
}
|
|
318
|
+
if (ch === ",") {
|
|
319
|
+
if (col.trim()) {
|
|
320
|
+
cols.push(col.trim())
|
|
321
|
+
}
|
|
322
|
+
col = ""
|
|
323
|
+
this.pos++
|
|
324
|
+
continue
|
|
325
|
+
}
|
|
326
|
+
col += ch
|
|
327
|
+
this.pos++
|
|
328
|
+
}
|
|
329
|
+
return cols
|
|
330
|
+
}
|
|
331
|
+
dataRow(colCount) {
|
|
332
|
+
const values = []
|
|
333
|
+
for (let i = 0; i < colCount; i++) {
|
|
334
|
+
this.ws()
|
|
335
|
+
values.push(this.tableValue(i < colCount - 1 ? ",;}\n" : ";}\n"))
|
|
336
|
+
this.ws()
|
|
337
|
+
if (this.peek() === ",") {
|
|
338
|
+
this.pos++
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return values
|
|
342
|
+
}
|
|
343
|
+
tableValue(terminators) {
|
|
344
|
+
this.ws()
|
|
345
|
+
const ch = this.peek()
|
|
346
|
+
if (ch === '"') {
|
|
347
|
+
return this.quoted()
|
|
348
|
+
}
|
|
349
|
+
if (ch === "{") {
|
|
350
|
+
return this.input[this.pos + 1] === "{" ? this.table() : this.object()
|
|
351
|
+
}
|
|
352
|
+
if (ch === "[") {
|
|
353
|
+
return this.array()
|
|
354
|
+
}
|
|
355
|
+
const start = this.pos
|
|
356
|
+
while (this.pos < this.input.length) {
|
|
357
|
+
const c = this.input[this.pos]
|
|
358
|
+
if ((c === "}" && this.input[this.pos + 1] === "}") || terminators.includes(c)) {
|
|
359
|
+
break
|
|
360
|
+
}
|
|
361
|
+
this.pos++
|
|
362
|
+
}
|
|
363
|
+
const token = this.input.slice(start, this.pos).trim()
|
|
364
|
+
return token === "" ? null : this.tokenToValue(token)
|
|
365
|
+
}
|
|
366
|
+
object() {
|
|
367
|
+
this.pos++
|
|
368
|
+
const result = {}
|
|
369
|
+
this.ws()
|
|
370
|
+
while (this.peek() !== "}") {
|
|
371
|
+
if (this.pos >= this.input.length) {
|
|
372
|
+
throw new Error("Unterminated object")
|
|
373
|
+
}
|
|
374
|
+
const { key, quoted } = this.parseKey()
|
|
375
|
+
this.ws()
|
|
376
|
+
if (this.peek() !== ":") {
|
|
377
|
+
throw new Error(`Expected ':' after key '${key}' at position ${this.pos}`)
|
|
378
|
+
}
|
|
379
|
+
this.pos++
|
|
380
|
+
const value = this.value(",}")
|
|
381
|
+
if (quoted) {
|
|
382
|
+
result[key] = value
|
|
383
|
+
} else {
|
|
384
|
+
this.merge(result, this.unfold(key, value))
|
|
385
|
+
}
|
|
386
|
+
this.ws()
|
|
387
|
+
if (this.peek() === ",") {
|
|
388
|
+
this.pos++
|
|
389
|
+
this.ws()
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
this.pos++
|
|
393
|
+
return result
|
|
394
|
+
}
|
|
395
|
+
parseKey() {
|
|
396
|
+
this.ws()
|
|
397
|
+
if (this.peek() === '"') {
|
|
398
|
+
return { key: this.quoted(), quoted: true }
|
|
399
|
+
}
|
|
400
|
+
const start = this.pos
|
|
401
|
+
while (this.pos < this.input.length && !KEY_TERM_RE.test(this.input[this.pos])) this.pos++
|
|
402
|
+
const key = this.input.slice(start, this.pos)
|
|
403
|
+
if (key === "") {
|
|
404
|
+
throw new Error(`Expected key at position ${this.pos}`)
|
|
405
|
+
}
|
|
406
|
+
return { key, quoted: false }
|
|
407
|
+
}
|
|
408
|
+
unfold(keyPath, value) {
|
|
409
|
+
const parts = keyPath.split(".")
|
|
410
|
+
const result = {}
|
|
411
|
+
let current = result
|
|
412
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
413
|
+
const nested = {}
|
|
414
|
+
current[parts[i]] = nested
|
|
415
|
+
current = nested
|
|
416
|
+
}
|
|
417
|
+
current[parts.at(-1)] = value
|
|
418
|
+
return result
|
|
419
|
+
}
|
|
420
|
+
merge(target, src) {
|
|
421
|
+
for (const key of Object.keys(src)) {
|
|
422
|
+
const tv = target[key]
|
|
423
|
+
const sv = src[key]
|
|
424
|
+
if (
|
|
425
|
+
key in target &&
|
|
426
|
+
typeof tv === "object" &&
|
|
427
|
+
tv !== null &&
|
|
428
|
+
!Array.isArray(tv) &&
|
|
429
|
+
typeof sv === "object" &&
|
|
430
|
+
sv !== null &&
|
|
431
|
+
!Array.isArray(sv)
|
|
432
|
+
) {
|
|
433
|
+
this.merge(tv, sv)
|
|
434
|
+
} else {
|
|
435
|
+
target[key] = sv
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
export function parse(input) {
|
|
441
|
+
return new JotParser(input).parse()
|
|
442
|
+
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,40 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@creationix/jot",
|
|
3
|
-
"
|
|
3
|
+
"repository": {
|
|
4
|
+
"type": "git",
|
|
5
|
+
"url": "https://github.com/creationix/jot"
|
|
6
|
+
},
|
|
7
|
+
"version": "0.1.0",
|
|
8
|
+
"description": "LLM and human friendly JSON alternative",
|
|
9
|
+
"type": "module",
|
|
10
|
+
"main": "./dist/jot.cjs",
|
|
11
|
+
"module": "./dist/jot.js",
|
|
12
|
+
"types": "./dist/jot.d.ts",
|
|
13
|
+
"exports": {
|
|
14
|
+
"import": "./dist/jot.js",
|
|
15
|
+
"require": "./dist/jot.cjs"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist/*",
|
|
19
|
+
"README.md"
|
|
20
|
+
],
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build:clean": "rm -rf dist/*",
|
|
23
|
+
"build:cjs": "tsc --project tsconfig.cjs.json && mv dist/jot.js dist/jot.cjs",
|
|
24
|
+
"build:esm": "tsc --project tsconfig.json",
|
|
25
|
+
"build:tidy": "biome format --fix dist && biome lint --fix dist",
|
|
26
|
+
"build": "npm run build:clean && npm run build:cjs && npm run build:esm && npm run build:tidy",
|
|
27
|
+
"format": "biome format --fix src && biome lint --fix src",
|
|
28
|
+
"lint": "biome lint src",
|
|
29
|
+
"test": "bun test",
|
|
30
|
+
"prepublish": "npm run format && npm run lint && npm run build && bun samples/measure-tokens.ts"
|
|
31
|
+
},
|
|
4
32
|
"author": "Tim Caswell <tim@creationix.com>",
|
|
5
33
|
"license": "MIT",
|
|
6
|
-
"dependencies": {},
|
|
7
34
|
"devDependencies": {
|
|
8
|
-
"@
|
|
35
|
+
"@anthropic-ai/tokenizer": "^0.0.4",
|
|
36
|
+
"@biomejs/biome": "^1.9.4",
|
|
37
|
+
"@types/bun": "^1.3.5",
|
|
38
|
+
"typescript": "^5.7.3"
|
|
9
39
|
}
|
|
10
|
-
}
|
|
40
|
+
}
|
package/SUMMARY.md
DELETED
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
# Encoding Format Comparison
|
|
2
|
-
|
|
3
|
-
Token counts for 18 test documents across three tokenizers. For LLM systems, **tokens matter more than bytes**.
|
|
4
|
-
|
|
5
|
-
## Recommendation
|
|
6
|
-
|
|
7
|
-
**Use Jot** for LLM contexts — saves 16-17% tokens vs JSON.
|
|
8
|
-
|
|
9
|
-
## Token Efficiency
|
|
10
|
-
|
|
11
|
-
<!-- CHART_START -->
|
|
12
|
-
```mermaid
|
|
13
|
-
xychart-beta
|
|
14
|
-
title "Token Counts by Format"
|
|
15
|
-
x-axis ["Jot", "JSON-m", "JSONito", "Jot-P", "D2", "TOON", "YAML", "TOML", "JSON-s", "JSON-p"]
|
|
16
|
-
y-axis "Tokens" 0 --> 16000
|
|
17
|
-
line "Qwen" [6525, 7748, 7757, 8239, 8292, 8315, 9543, 10180, 11799, 12656]
|
|
18
|
-
line "Legacy" [6420, 7377, 7794, 7204, 7582, 7079, 7661, 11204, 10966, 11937]
|
|
19
|
-
line "Claude" [6747, 8132, 8327, 8500, 7928, 8405, 9456, 11485, 12687, 14403]
|
|
20
|
-
```
|
|
21
|
-
<!-- CHART_END -->
|
|
22
|
-
|
|
23
|
-
### Compact Formats
|
|
24
|
-
|
|
25
|
-
For machine-to-machine or LLM contexts where readability isn't required.
|
|
26
|
-
|
|
27
|
-
<!-- COMPACT_START -->
|
|
28
|
-
| Format | Qwen | Legacy | Claude | Bytes |
|
|
29
|
-
|-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
|
|
30
|
-
| **[Jot](jot/)** | 6,525 (-16%) | 6,420 (-13%) | 6,747 (-17%) | 16,621 (-28%) |
|
|
31
|
-
| [JSON](https://www.json.org/) (mini) | 7,748 | 7,377 | 8,132 | 23,119 |
|
|
32
|
-
| [JSONito](https://github.com/creationix/jsonito) | 7,757 (+0%) | 7,794 (+6%) | 8,327 (+2%) | 14,059 (-39%) |
|
|
33
|
-
| [D2](https://github.com/creationix/d2) | 8,292 (+7%) | 7,582 (+3%) | 7,928 (-3%) | 17,328 (-25%) |
|
|
34
|
-
<!-- COMPACT_END -->
|
|
35
|
-
|
|
36
|
-
### Pretty-Printed Formats
|
|
37
|
-
|
|
38
|
-
For human-readable output or when LLMs need to read/write structured data.
|
|
39
|
-
|
|
40
|
-
<!-- PRETTY_START -->
|
|
41
|
-
| Format | Qwen | Legacy | Claude | Bytes |
|
|
42
|
-
|-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
|
|
43
|
-
| **[Jot](jot/) (pretty)** | 8,239 (-35%) | 7,204 (-40%) | 8,500 (-41%) | 23,676 (-41%) |
|
|
44
|
-
| [TOON](toon/) | 8,315 (-34%) | 7,079 (-41%) | 8,405 (-42%) | 22,780 (-43%) |
|
|
45
|
-
| [YAML](https://yaml.org/) | 9,543 (-25%) | 7,661 (-36%) | 9,456 (-34%) | 26,757 (-33%) |
|
|
46
|
-
| [TOML](https://toml.io/) | 10,180 (-20%) | 11,204 (-6%) | 11,485 (-20%) | 28,930 (-27%) |
|
|
47
|
-
| [JSON](json/smart-json.ts) (smart) | 11,799 (-7%) | 10,966 (-8%) | 12,687 (-12%) | 32,657 (-18%) |
|
|
48
|
-
| [JSON](https://www.json.org/) (pretty) | 12,656 | 11,937 | 14,403 | 39,884 |
|
|
49
|
-
<!-- PRETTY_END -->
|
|
50
|
-
|
|
51
|
-
## Format Descriptions
|
|
52
|
-
|
|
53
|
-
### [Jot](jot/)
|
|
54
|
-
|
|
55
|
-
JSON with three optimizations:
|
|
56
|
-
|
|
57
|
-
1. **Unquoted strings** — omit quotes unless value contains `: ; , { } [ ] "` or parses as number/boolean/null
|
|
58
|
-
2. **Key folding** — `{a:{b:1}}` → `{a.b:1}` for single-key nested objects
|
|
59
|
-
3. **Tables** — `[{a:1},{a:2}]` → `{{:a;1;2}}` for repeating object schemas
|
|
60
|
-
|
|
61
|
-
```jot
|
|
62
|
-
{config.host:localhost,users:{{:id,name;1,Alice;2,Bob}}}
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
It also has a pretty-printed variant that adds indentation and newlines for readability.
|
|
66
|
-
|
|
67
|
-
```jot
|
|
68
|
-
{
|
|
69
|
-
config.host: localhost,
|
|
70
|
-
users: {{
|
|
71
|
-
:id, name;
|
|
72
|
-
1, Alice;
|
|
73
|
-
2, Bob
|
|
74
|
-
}}
|
|
75
|
-
}
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
### [TOON](toon/)
|
|
79
|
-
|
|
80
|
-
YAML-like indentation with optional table syntax and count guards.
|
|
81
|
-
|
|
82
|
-
```toon
|
|
83
|
-
users[2]{id,name}:
|
|
84
|
-
1,Alice
|
|
85
|
-
2,Bob
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### [JSONito](https://github.com/creationix/jsonito)
|
|
89
|
-
|
|
90
|
-
Byte-optimized JSON with string deduplication via preamble dictionary.
|
|
91
|
-
|
|
92
|
-
```jito
|
|
93
|
-
{name'config'version'5~1.0.0enabled'!a~maxRetries6.timeout'eFw.tags'[a~productionapi'v1']}
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### [D2](https://github.com/creationix/d2)
|
|
97
|
-
|
|
98
|
-
Declarative data format using `=` assignment and shell-like quoting.
|
|
99
|
-
|
|
100
|
-
## Why Not Byte-Optimized Formats?
|
|
101
|
-
|
|
102
|
-
Formats like JSONito achieve excellent byte compression (-39%) but:
|
|
103
|
-
|
|
104
|
-
- Token savings are inconsistent (small docs often cost more than JSON)
|
|
105
|
-
- Deduplication preambles add overhead that doesn't scale down
|
|
106
|
-
- LLMs cannot reliably generate formats requiring state tracking
|
|
107
|
-
|
|
108
|
-
## LLM Encoding Ability
|
|
109
|
-
|
|
110
|
-
Tested Qwen3-30b's ability to encode JSON → Jot (3 runs per document, 17 docs):
|
|
111
|
-
|
|
112
|
-
| Document Type | Semantic Accuracy |
|
|
113
|
-
|----------------------------------------------|------------------:|
|
|
114
|
-
| Simple configs (small, metrics, package) | 100% |
|
|
115
|
-
| Key folding test cases | 100% |
|
|
116
|
-
| Table-friendly (users-50) | 100% |
|
|
117
|
-
| Text-heavy (chat) | 100% |
|
|
118
|
-
| Complex/nested (large, firewall, routes) | 0% |
|
|
119
|
-
| Irregular schemas (medium, hikes, irregular) | 0% |
|
|
120
|
-
| **Overall** | **47%** |
|
|
121
|
-
|
|
122
|
-
Small models struggle with Jot's advanced features on complex documents. For LLM-generated output, consider using simpler Jot (unquoted strings only) or providing FORMAT.md as context.
|
|
123
|
-
|
|
124
|
-
## Tokenizers
|
|
125
|
-
|
|
126
|
-
- **Qwen**: Qwen3-Coder-30b via LM Studio API
|
|
127
|
-
- **Legacy**: Anthropic legacy tokenizer (`@anthropic-ai/tokenizer`)
|
|
128
|
-
- **Claude**: Claude API token counting endpoint (Sonnet/Opus/Haiku share tokenizer)
|
|
129
|
-
|
|
130
|
-
## Test Data
|
|
131
|
-
|
|
132
|
-
18 documents covering diverse structures:
|
|
133
|
-
|
|
134
|
-
| Document | Description |
|
|
135
|
-
|-------------------|----------------------------------|
|
|
136
|
-
| small | Config object (6 fields) |
|
|
137
|
-
| medium | User list with metadata |
|
|
138
|
-
| large | Kubernetes deployment spec |
|
|
139
|
-
| hikes | Tabular records (uniform schema) |
|
|
140
|
-
| chat | LLM conversation (text-heavy) |
|
|
141
|
-
| metrics | Time series (numeric-heavy) |
|
|
142
|
-
| package | npm manifest (nested deps) |
|
|
143
|
-
| github-issue | Mixed nesting with labels |
|
|
144
|
-
| irregular | Event log (varying keys) |
|
|
145
|
-
| users-50 | 50 user records (table-friendly) |
|
|
146
|
-
| logs | 50 log entries (semi-uniform) |
|
|
147
|
-
| firewall | WAF rules (deeply nested) |
|
|
148
|
-
| products | E-commerce catalog (variants) |
|
|
149
|
-
| routes | API routing config (large tables)|
|
|
150
|
-
| key-folding-* | Key folding test cases |
|
|
151
|
-
| json-counts-cache | Cached token counts |
|