@creationix/jot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +92 -0
  2. package/SUMMARY.md +151 -0
  3. package/TOKEN_COUNTS.md +97 -0
  4. package/bun.lock +19 -0
  5. package/jot.test.ts +133 -0
  6. package/jot.ts +650 -0
  7. package/package.json +10 -0
  8. package/samples/chat.jot +1 -0
  9. package/samples/chat.json +1 -0
  10. package/samples/chat.pretty.jot +6 -0
  11. package/samples/chat.pretty.json +16 -0
  12. package/samples/firewall.jot +1 -0
  13. package/samples/firewall.json +1 -0
  14. package/samples/firewall.pretty.jot +235 -0
  15. package/samples/firewall.pretty.json +344 -0
  16. package/samples/github-issue.jot +1 -0
  17. package/samples/github-issue.json +1 -0
  18. package/samples/github-issue.pretty.jot +15 -0
  19. package/samples/github-issue.pretty.json +20 -0
  20. package/samples/hikes.jot +1 -0
  21. package/samples/hikes.json +1 -0
  22. package/samples/hikes.pretty.jot +14 -0
  23. package/samples/hikes.pretty.json +38 -0
  24. package/samples/irregular.jot +1 -0
  25. package/samples/irregular.json +1 -0
  26. package/samples/irregular.pretty.jot +13 -0
  27. package/samples/irregular.pretty.json +23 -0
  28. package/samples/json-counts-cache.jot +1 -0
  29. package/samples/json-counts-cache.json +1 -0
  30. package/samples/json-counts-cache.pretty.jot +26 -0
  31. package/samples/json-counts-cache.pretty.json +26 -0
  32. package/samples/key-folding-basic.jot +1 -0
  33. package/samples/key-folding-basic.json +1 -0
  34. package/samples/key-folding-basic.pretty.jot +7 -0
  35. package/samples/key-folding-basic.pretty.json +25 -0
  36. package/samples/key-folding-mixed.jot +1 -0
  37. package/samples/key-folding-mixed.json +1 -0
  38. package/samples/key-folding-mixed.pretty.jot +16 -0
  39. package/samples/key-folding-mixed.pretty.json +24 -0
  40. package/samples/key-folding-with-array.jot +1 -0
  41. package/samples/key-folding-with-array.json +1 -0
  42. package/samples/key-folding-with-array.pretty.jot +6 -0
  43. package/samples/key-folding-with-array.pretty.json +29 -0
  44. package/samples/large.jot +1 -0
  45. package/samples/large.json +1 -0
  46. package/samples/large.pretty.jot +72 -0
  47. package/samples/large.pretty.json +93 -0
  48. package/samples/logs.jot +1 -0
  49. package/samples/logs.json +1 -0
  50. package/samples/logs.pretty.jot +96 -0
  51. package/samples/logs.pretty.json +350 -0
  52. package/samples/medium.jot +1 -0
  53. package/samples/medium.json +1 -0
  54. package/samples/medium.pretty.jot +13 -0
  55. package/samples/medium.pretty.json +30 -0
  56. package/samples/metrics.jot +1 -0
  57. package/samples/metrics.json +1 -0
  58. package/samples/metrics.pretty.jot +11 -0
  59. package/samples/metrics.pretty.json +25 -0
  60. package/samples/package.jot +1 -0
  61. package/samples/package.json +1 -0
  62. package/samples/package.pretty.jot +18 -0
  63. package/samples/package.pretty.json +18 -0
  64. package/samples/products.jot +1 -0
  65. package/samples/products.json +1 -0
  66. package/samples/products.pretty.jot +69 -0
  67. package/samples/products.pretty.json +235 -0
  68. package/samples/routes.jot +1 -0
  69. package/samples/routes.json +1 -0
  70. package/samples/routes.pretty.jot +142 -0
  71. package/samples/routes.pretty.json +354 -0
  72. package/samples/small.jot +1 -0
  73. package/samples/small.json +1 -0
  74. package/samples/small.pretty.jot +8 -0
  75. package/samples/small.pretty.json +12 -0
  76. package/samples/users-50.jot +1 -0
  77. package/samples/users-50.json +1 -0
  78. package/samples/users-50.pretty.jot +53 -0
  79. package/samples/users-50.pretty.json +354 -0
package/README.md ADDED
@@ -0,0 +1,92 @@
1
+ # Jot Format
2
+
3
+ Jot is a compact, LLM friendly JSON variant designed to use fewer tokens while remaining easy to read and write.
4
+
5
+ ```jot
6
+ {
7
+ context: {
8
+ task: Our favorite hikes together,
9
+ location: Boulder,
10
+ season: spring_2025
11
+ },
12
+ friends: [ ana, luis, sam ],
13
+ hikes: {{
14
+ :id, name, distanceKm, elevationGain, companion, wasSunny
15
+ 1, Blue Lake Trail, 7.5, 320, ana, true
16
+ 2, Ridge Overlook, 9.2, 540, luis, false
17
+ 3, Wildflower Loop, 5.1, 180, sam, true
18
+ }}
19
+ }
20
+ ```
21
+
22
+ It is JSON with three optimizations:
23
+
24
+ 1. **Unquoted strings** — Strings are only quoted if necessary.
25
+ 2. **Key folding** — Single-key nested objects collapse: `{a:{b:1}}` → `{a.b:1}`
26
+ if normal keys contain dots, keep quotes: `{"a.b":1}`
27
+ 3. **Tables** — Object arrays with repeating schemas use `{{:cols;row;row}}` syntax
28
+
29
+ ## Unquoted Strings
30
+
31
+ The only times that you need to quote a string are:
32
+
33
+ - It is a valid JSON value (`true`, `false`, `null`, or a number like `42`, `3.14`, `-0.5`, or `1e10`)
34
+ - It contains special characters: `: ; , { } [ ] "` or control characters (newline, tab, etc)
35
+ - It is empty or has leading or trailing whitespace
36
+ - It being used as a key in an object and contains `.` (to distinguish from folded keys)
37
+
38
+ ```json
39
+ {"name":"Alice","city":"New York","count":"42"}
40
+ ```
41
+
42
+ ```jot
43
+ {name:Alice,city:New York,count:"42"}
44
+ ```
45
+
46
+ ## Key Folding
47
+
48
+ When a nested object has exactly ONE key, fold it:
49
+
50
+ ```json
51
+ {"server":{"host":"localhost"}}
52
+ ```
53
+
54
+ ```jot
55
+ {server.host:localhost}
56
+ ```
57
+
58
+ If normal keys contain dots, keep quotes to avoid confusion:
59
+
60
+ ```json
61
+ {"data.point":{"x":10,"y":20}}
62
+ ```
63
+
64
+ ```jot
65
+ {"data.point":{x:10,y:20}}
66
+ ```
67
+
68
+ ## Tables
69
+
70
+ One common shape in data is a table — an array of multiple objects with the same schema.
71
+
72
+ Object arrays use `{{:schema;row;row;...}}` when schemas repeat. Start with `:` followed by column names:
73
+
74
+ Don't use tables when there's no schema reuse (each object unique) — regular arrays are more compact.
75
+
76
+ ```json
77
+ [{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]
78
+ ```
79
+
80
+ ```jot
81
+ {{:id,name;1,Alice;2,Bob}}
82
+ ```
83
+
84
+ To change schema mid-table, add another `:schema;` row:
85
+
86
+ ```json
87
+ [{"id":1,"name":"Alice"},{"id":2,"name":"Bob"},{"x":10,"y":20},{"x":30,"y":40}]
88
+ ```
89
+
90
+ ```jot
91
+ {{:id,name;1,Alice;2,Bob;:x,y;10,20;30,40}}
92
+ ```
package/SUMMARY.md ADDED
@@ -0,0 +1,151 @@
1
+ # Encoding Format Comparison
2
+
3
+ Token counts for 18 test documents across three tokenizers. For LLM systems, **tokens matter more than bytes**.
4
+
5
+ ## Recommendation
6
+
7
+ **Use Jot** for LLM contexts — saves 16-17% tokens vs JSON.
8
+
9
+ ## Token Efficiency
10
+
11
+ <!-- CHART_START -->
12
+ ```mermaid
13
+ xychart-beta
14
+ title "Token Counts by Format"
15
+ x-axis ["Jot", "JSON-m", "JSONito", "Jot-P", "D2", "TOON", "YAML", "TOML", "JSON-s", "JSON-p"]
16
+ y-axis "Tokens" 0 --> 16000
17
+ line "Qwen" [6525, 7748, 7757, 8239, 8292, 8315, 9543, 10180, 11799, 12656]
18
+ line "Legacy" [6420, 7377, 7794, 7204, 7582, 7079, 7661, 11204, 10966, 11937]
19
+ line "Claude" [6747, 8132, 8327, 8500, 7928, 8405, 9456, 11485, 12687, 14403]
20
+ ```
21
+ <!-- CHART_END -->
22
+
23
+ ### Compact Formats
24
+
25
+ For machine-to-machine or LLM contexts where readability isn't required.
26
+
27
+ <!-- COMPACT_START -->
28
+ | Format | Qwen | Legacy | Claude | Bytes |
29
+ |-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
30
+ | **[Jot](jot/)** | 6,525 (-16%) | 6,420 (-13%) | 6,747 (-17%) | 16,621 (-28%) |
31
+ | [JSON](https://www.json.org/) (mini) | 7,748 | 7,377 | 8,132 | 23,119 |
32
+ | [JSONito](https://github.com/creationix/jsonito) | 7,757 (+0%) | 7,794 (+6%) | 8,327 (+2%) | 14,059 (-39%) |
33
+ | [D2](https://github.com/creationix/d2) | 8,292 (+7%) | 7,582 (+3%) | 7,928 (-3%) | 17,328 (-25%) |
34
+ <!-- COMPACT_END -->
35
+
36
+ ### Pretty-Printed Formats
37
+
38
+ For human-readable output or when LLMs need to read/write structured data.
39
+
40
+ <!-- PRETTY_START -->
41
+ | Format | Qwen | Legacy | Claude | Bytes |
42
+ |-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
43
+ | **[Jot](jot/) (pretty)** | 8,239 (-35%) | 7,204 (-40%) | 8,500 (-41%) | 23,676 (-41%) |
44
+ | [TOON](toon/) | 8,315 (-34%) | 7,079 (-41%) | 8,405 (-42%) | 22,780 (-43%) |
45
+ | [YAML](https://yaml.org/) | 9,543 (-25%) | 7,661 (-36%) | 9,456 (-34%) | 26,757 (-33%) |
46
+ | [TOML](https://toml.io/) | 10,180 (-20%) | 11,204 (-6%) | 11,485 (-20%) | 28,930 (-27%) |
47
+ | [JSON](json/smart-json.ts) (smart) | 11,799 (-7%) | 10,966 (-8%) | 12,687 (-12%) | 32,657 (-18%) |
48
+ | [JSON](https://www.json.org/) (pretty) | 12,656 | 11,937 | 14,403 | 39,884 |
49
+ <!-- PRETTY_END -->
50
+
51
+ ## Format Descriptions
52
+
53
+ ### [Jot](jot/)
54
+
55
+ JSON with three optimizations:
56
+
57
+ 1. **Unquoted strings** — omit quotes unless value contains `: ; , { } [ ] "` or parses as number/boolean/null
58
+ 2. **Key folding** — `{a:{b:1}}` → `{a.b:1}` for single-key nested objects
59
+ 3. **Tables** — `[{a:1},{a:2}]` → `{{:a;1;2}}` for repeating object schemas
60
+
61
+ ```jot
62
+ {config.host:localhost,users:{{:id,name;1,Alice;2,Bob}}}
63
+ ```
64
+
65
+ It also has a pretty-printed variant that adds indentation and newlines for readability.
66
+
67
+ ```jot
68
+ {
69
+ config.host: localhost,
70
+ users: {{
71
+ :id, name;
72
+ 1, Alice;
73
+ 2, Bob
74
+ }}
75
+ }
76
+ ```
77
+
78
+ ### [TOON](toon/)
79
+
80
+ YAML-like indentation with optional table syntax and count guards.
81
+
82
+ ```toon
83
+ users[2]{id,name}:
84
+ 1,Alice
85
+ 2,Bob
86
+ ```
87
+
88
+ ### [JSONito](https://github.com/creationix/jsonito)
89
+
90
+ Byte-optimized JSON with string deduplication via preamble dictionary.
91
+
92
+ ```jito
93
+ {name'config'version'5~1.0.0enabled'!a~maxRetries6.timeout'eFw.tags'[a~productionapi'v1']}
94
+ ```
95
+
96
+ ### [D2](https://github.com/creationix/d2)
97
+
98
+ Declarative data format using `=` assignment and shell-like quoting.
99
+
100
+ ## Why Not Byte-Optimized Formats?
101
+
102
+ Formats like JSONito achieve excellent byte compression (-39%) but:
103
+
104
+ - Token savings are inconsistent (small docs often cost more than JSON)
105
+ - Deduplication preambles add overhead that doesn't scale down
106
+ - LLMs cannot reliably generate formats requiring state tracking
107
+
108
+ ## LLM Encoding Ability
109
+
110
+ Tested Qwen3-30b's ability to encode JSON → Jot (3 runs per document, 17 docs):
111
+
112
+ | Document Type | Semantic Accuracy |
113
+ |----------------------------------------------|------------------:|
114
+ | Simple configs (small, metrics, package) | 100% |
115
+ | Key folding test cases | 100% |
116
+ | Table-friendly (users-50) | 100% |
117
+ | Text-heavy (chat) | 100% |
118
+ | Complex/nested (large, firewall, routes) | 0% |
119
+ | Irregular schemas (medium, hikes, irregular) | 0% |
120
+ | **Overall** | **47%** |
121
+
122
+ Small models struggle with Jot's advanced features on complex documents. For LLM-generated output, consider using simpler Jot (unquoted strings only) or providing FORMAT.md as context.
123
+
124
+ ## Tokenizers
125
+
126
+ - **Qwen**: Qwen3-Coder-30b via LM Studio API
127
+ - **Legacy**: Anthropic legacy tokenizer (`@anthropic-ai/tokenizer`)
128
+ - **Claude**: Claude API token counting endpoint (Sonnet/Opus/Haiku share tokenizer)
129
+
130
+ ## Test Data
131
+
132
+ 18 documents covering diverse structures:
133
+
134
+ | Document | Description |
135
+ |-------------------|----------------------------------|
136
+ | small | Config object (6 fields) |
137
+ | medium | User list with metadata |
138
+ | large | Kubernetes deployment spec |
139
+ | hikes | Tabular records (uniform schema) |
140
+ | chat | LLM conversation (text-heavy) |
141
+ | metrics | Time series (numeric-heavy) |
142
+ | package | npm manifest (nested deps) |
143
+ | github-issue | Mixed nesting with labels |
144
+ | irregular | Event log (varying keys) |
145
+ | users-50 | 50 user records (table-friendly) |
146
+ | logs | 50 log entries (semi-uniform) |
147
+ | firewall | WAF rules (deeply nested) |
148
+ | products | E-commerce catalog (variants) |
149
+ | routes | API routing config (large tables)|
150
+ | key-folding-* | Key folding test cases |
151
+ | json-counts-cache | Cached token counts |
@@ -0,0 +1,97 @@
1
+ # Token Counts by Format
2
+
3
+ ## Qwen Tokenizer (LM Studio)
4
+
5
+ Tokens measured using **Qwen3-Coder-30b** via LM Studio API. This is the primary tokenizer used for testing local model efficiency.
6
+
7
+ <!-- QWEN_CHART_START -->
8
+ ```mermaid
9
+ xychart-beta
10
+ title "Token Savings vs JSON (negative = better)"
11
+ x-axis ["Users50", "Hikes", "Medium", "KF-arr", "Firewall", "KF-basic", "Products", "Routes", "Metrics", "Package", "Chat", "KF-mix", "Issue", "Small", "Large", "Irregular", "Logs"]
12
+ y-axis "% vs JSON" -60 --> 40
13
+ line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
14
+ line "Jot" [-48, -29, -27, -26, -19, -19, -19, -15, -14, -12, -12, -12, -10, -8, -8, -7, -3]
15
+ line "YAML" [25, 18, 27, 19, 24, 10, 26, 16, 20, 7, 8, 14, 11, 17, 23, 28, 18]
16
+ line "TOON" [-40, -23, -14, 9, 30, 9, 10, 8, -6, 7, -11, 13, 2, 4, 18, 29, 18]
17
+ ```
18
+ <!-- QWEN_CHART_END -->
19
+
20
+ ### Per-File Breakdown (Qwen)
21
+
22
+ <!-- QWEN_TABLE_START -->
23
+ | Format | Chat | Firewall | Issue | Hikes | Irregular | KF-basic | KF-mix | KF-arr | Large | Logs | Medium | Metrics | Package | Products | Routes | Small | Users50 | Total |
24
+ |--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
25
+ | Jot | [67](jot/chat.jot) | [669](jot/firewall.jot) | [79](jot/github-issue.jot) | [112](jot/hikes.jot) | [63](jot/irregular.jot) | [47](jot/key-folding-basic.jot) | [69](jot/key-folding-mixed.jot) | [43](jot/key-folding-with-array.jot) | [246](jot/large.jot) | [2044](jot/logs.jot) | [71](jot/medium.jot) | [101](jot/metrics.jot) | [85](jot/package.jot) | [702](jot/products.jot) | [1246](jot/routes.jot) | [44](jot/small.jot) | [663](jot/users-50.jot) | 6,351 |
26
+ | JSONito | [86](jsonito/chat.jito) | [919](jsonito/firewall.jito) | [89](jsonito/github-issue.jito) | [158](jsonito/hikes.jito) | [59](jsonito/irregular.jito) | [62](jsonito/key-folding-basic.jito) | [82](jsonito/key-folding-mixed.jito) | [63](jsonito/key-folding-with-array.jito) | [312](jsonito/large.jito) | [1941](jsonito/logs.jito) | [103](jsonito/medium.jito) | [89](jsonito/metrics.jito) | [101](jsonito/package.jito) | [846](jsonito/products.jito) | [1426](jsonito/routes.jito) | [45](jsonito/small.jito) | [1234](jsonito/users-50.jito) | 7,615 |
27
+ | JSON-m | [76](json/chat.json) | [827](json/firewall.json) | [88](json/github-issue.json) | [158](json/hikes.json) | [68](json/irregular.json) | [58](json/key-folding-basic.json) | [78](json/key-folding-mixed.json) | [58](json/key-folding-with-array.json) | [266](json/large.json) | [2108](json/logs.json) | [97](json/medium.json) | [117](json/metrics.json) | [97](json/package.json) | [866](json/products.json) | [1459](json/routes.json) | [48](json/small.json) | [1279](json/users-50.json) | 7,748 |
28
+ | Jot-P | [73](jot/chat.pretty.jot) | [1119](jot/firewall.pretty.jot) | [105](jot/github-issue.pretty.jot) | [142](jot/hikes.pretty.jot) | [91](jot/irregular.pretty.jot) | [54](jot/key-folding-basic.pretty.jot) | [96](jot/key-folding-mixed.pretty.jot) | [52](jot/key-folding-with-array.pretty.jot) | [365](jot/large.pretty.jot) | [2304](jot/logs.pretty.jot) | [100](jot/medium.pretty.jot) | [123](jot/metrics.pretty.jot) | [120](jot/package.pretty.jot) | [828](jot/products.pretty.jot) | [1521](jot/routes.pretty.jot) | [59](jot/small.pretty.jot) | [863](jot/users-50.pretty.jot) | 8,015 |
29
+ | D2 | [80](d2/chat.d2) | [894](d2/firewall.d2) | [97](d2/github-issue.d2) | [173](d2/hikes.d2) | [81](d2/irregular.d2) | [60](d2/key-folding-basic.d2) | [80](d2/key-folding-mixed.d2) | [67](d2/key-folding-with-array.d2) | [316](d2/large.d2) | [2092](d2/logs.d2) | [104](d2/medium.d2) | [138](d2/metrics.d2) | [90](d2/package.d2) | [994](d2/products.d2) | [1536](d2/routes.d2) | [55](d2/small.d2) | [1202](d2/users-50.d2) | 8,059 |
30
+ | TOON | [68](toon/chat.toon) | [1073](toon/firewall.toon) | [90](toon/github-issue.toon) | [122](toon/hikes.toon) | [88](toon/irregular.toon) | [63](toon/key-folding-basic.toon) | [88](toon/key-folding-mixed.toon) | [63](toon/key-folding-with-array.toon) | [313](toon/large.toon) | [2492](toon/logs.toon) | [83](toon/medium.toon) | [110](toon/metrics.toon) | [104](toon/package.toon) | [954](toon/products.toon) | [1574](toon/routes.toon) | [50](toon/small.toon) | [763](toon/users-50.toon) | 8,098 |
31
+ | YAML | [82](yaml/chat.yaml) | [1029](yaml/firewall.yaml) | [98](yaml/github-issue.yaml) | [187](yaml/hikes.yaml) | [87](yaml/irregular.yaml) | [64](yaml/key-folding-basic.yaml) | [89](yaml/key-folding-mixed.yaml) | [69](yaml/key-folding-with-array.yaml) | [327](yaml/large.yaml) | [2487](yaml/logs.yaml) | [123](yaml/medium.yaml) | [140](yaml/metrics.yaml) | [104](yaml/package.yaml) | [1095](yaml/products.yaml) | [1696](yaml/routes.yaml) | [56](yaml/small.yaml) | [1597](yaml/users-50.yaml) | 9,330 |
32
+ | TOML | [84](toml/chat.toml) | [1495](toml/firewall.toml) | [99](toml/github-issue.toml) | [189](toml/hikes.toml) | [86](toml/irregular.toml) | [60](toml/key-folding-basic.toml) | [85](toml/key-folding-mixed.toml) | [61](toml/key-folding-with-array.toml) | [377](toml/large.toml) | [2498](toml/logs.toml) | [118](toml/medium.toml) | [139](toml/metrics.toml) | [104](toml/package.toml) | [1114](toml/products.toml) | [1790](toml/routes.toml) | [56](toml/small.toml) | [1625](toml/users-50.toml) | 9,980 |
33
+ <!-- QWEN_TABLE_END -->
34
+
35
+ ---
36
+
37
+ ## Legacy Claude Tokenizer
38
+
39
+ Tokens measured using **@anthropic-ai/tokenizer** (Claude's legacy tokenizer). This is the older tokenizer used by earlier Claude models.
40
+
41
+ <!-- LEGACY_CHART_START -->
42
+ ```mermaid
43
+ xychart-beta
44
+ title "Token Savings vs JSON (negative = better)"
45
+ x-axis ["Users50", "Hikes", "Products", "Metrics", "Medium", "Routes", "Chat", "KF-arr", "KF-basic", "Large", "KF-mix", "Issue", "Firewall", "Package", "Logs", "Small", "Irregular"]
46
+ y-axis "% vs JSON" -50 --> 30
47
+ line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
48
+ line "Jot" [-37, -26, -20, -20, -20, -11, -10, -10, -8, -8, -6, -6, -2, -2, -1, 0, 2]
49
+ line "YAML" [3, 3, 4, 5, 4, 4, 4, 15, 8, 9, 4, 7, 8, 0, 2, 11, 8]
50
+ line "TOON" [-37, -24, -7, -21, -20, 3, -11, 12, 6, 4, 6, 0, 13, 0, 8, 3, 10]
51
+ ```
52
+ <!-- LEGACY_CHART_END -->
53
+
54
+ ### Per-File Breakdown (Legacy)
55
+
56
+ <!-- LEGACY_TABLE_START -->
57
+ | Format | Chat | Metrics | Large | KF-mix | Logs | Firewall | Small | Issue | Users50 | Medium | Hikes | Package | KF-basic | Irregular | KF-arr | Products | Routes | Total |
58
+ |--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
59
+ | Jot | [63](jot/chat.jot) | [60](jot/metrics.jot) | [221](jot/large.jot) | [65](jot/key-folding-mixed.jot) | [1737](jot/logs.jot) | [825](jot/firewall.jot) | [36](jot/small.jot) | [68](jot/github-issue.jot) | [837](jot/users-50.jot) | [76](jot/medium.jot) | [103](jot/hikes.jot) | [91](jot/package.jot) | [45](jot/key-folding-basic.jot) | [49](jot/irregular.jot) | [47](jot/key-folding-with-array.jot) | [613](jot/products.jot) | [1352](jot/routes.jot) | 6,288 |
60
+ | JSON-m | [70](json/chat.json) | [75](json/metrics.json) | [239](json/large.json) | [69](json/key-folding-mixed.json) | [1750](json/logs.json) | [845](json/firewall.json) | [36](json/small.json) | [72](json/github-issue.json) | [1326](json/users-50.json) | [95](json/medium.json) | [139](json/hikes.json) | [93](json/package.json) | [49](json/key-folding-basic.json) | [48](json/irregular.json) | [52](json/key-folding-with-array.json) | [771](json/products.json) | [1516](json/routes.json) | 7,245 |
61
+ | YAML | [73](yaml/chat.yaml) | [79](yaml/metrics.yaml) | [260](yaml/large.yaml) | [72](yaml/key-folding-mixed.yaml) | [1780](yaml/logs.yaml) | [911](yaml/firewall.yaml) | [40](yaml/small.yaml) | [77](yaml/github-issue.yaml) | [1365](yaml/users-50.yaml) | [99](yaml/medium.yaml) | [143](yaml/hikes.yaml) | [93](yaml/package.yaml) | [53](yaml/key-folding-basic.yaml) | [52](yaml/irregular.yaml) | [60](yaml/key-folding-with-array.yaml) | [799](yaml/products.yaml) | [1572](yaml/routes.yaml) | 7,528 |
62
+ | TOON | [62](toon/chat.toon) | [59](toon/metrics.toon) | [249](toon/large.toon) | [73](toon/key-folding-mixed.toon) | [1885](toon/logs.toon) | [955](toon/firewall.toon) | [37](toon/small.toon) | [72](toon/github-issue.toon) | [836](toon/users-50.toon) | [76](toon/medium.toon) | [105](toon/hikes.toon) | [93](toon/package.toon) | [52](toon/key-folding-basic.toon) | [53](toon/irregular.toon) | [58](toon/key-folding-with-array.toon) | [720](toon/products.toon) | [1557](toon/routes.toon) | 6,942 |
63
+ <!-- LEGACY_TABLE_END -->
64
+
65
+ Note: The legacy tokenizer may produce different results than modern Claude models, but is useful for comparison and runs locally without API calls.
66
+
67
+ ---
68
+
69
+ ## Modern Claude Tokenizer
70
+
71
+ Tokens measured using **Claude API** token counting endpoint (claude-sonnet-4). This represents the actual token usage for modern Claude models. Token counting is free via the API.
72
+
73
+ <!-- CLAUDE_CHART_START -->
74
+ ```mermaid
75
+ xychart-beta
76
+ title "Token Savings vs JSON (negative = better)"
77
+ x-axis ["Users50", "Hikes", "Products", "KF-basic", "KF-arr", "Metrics", "Medium", "Large", "Firewall", "KF-mix", "Routes", "Chat", "Issue", "Irregular", "Package", "Small", "Logs"]
78
+ y-axis "% vs JSON" -50 --> 40
79
+ line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
80
+ line "Jot" [-39, -29, -24, -24, -24, -23, -22, -16, -15, -15, -14, -12, -9, -6, -6, -4, -2]
81
+ line "YAML" [22, 19, 27, 1, 14, 19, 19, 16, 13, 6, 9, 6, 8, 21, 4, 13, 18]
82
+ line "TOON" [-36, -25, 8, 1, 6, -18, -14, 12, 17, 8, 7, -10, 1, 24, 5, 2, 21]
83
+ ```
84
+ <!-- CLAUDE_CHART_END -->
85
+
86
+ ### Per-File Breakdown (Claude)
87
+
88
+ <!-- CLAUDE_TABLE_START -->
89
+ | Format | Chat | Metrics | Large | KF-mix | Logs | Firewall | Small | Issue | Users50 | Medium | Hikes | Package | KF-basic | Irregular | KF-arr | Products | Routes | Total |
90
+ |--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
91
+ | Jot | [71](jot/chat.jot) | [68](jot/metrics.jot) | [241](jot/large.jot) | [76](jot/key-folding-mixed.jot) | [1762](jot/logs.jot) | [867](jot/firewall.jot) | [44](jot/small.jot) | [78](jot/github-issue.jot) | [830](jot/users-50.jot) | [83](jot/medium.jot) | [115](jot/hikes.jot) | [99](jot/package.jot) | [53](jot/key-folding-basic.jot) | [59](jot/irregular.jot) | [53](jot/key-folding-with-array.jot) | [634](jot/products.jot) | [1472](jot/routes.jot) | 6,605 |
92
+ | JSON-m | [81](json/chat.json) | [88](json/metrics.json) | [286](json/large.json) | [89](json/key-folding-mixed.json) | [1798](json/logs.json) | [1022](json/firewall.json) | [46](json/small.json) | [86](json/github-issue.json) | [1369](json/users-50.json) | [107](json/medium.json) | [163](json/hikes.json) | [105](json/package.json) | [70](json/key-folding-basic.json) | [63](json/irregular.json) | [70](json/key-folding-with-array.json) | [839](json/products.json) | [1707](json/routes.json) | 7,989 |
93
+ | YAML | [86](yaml/chat.yaml) | [105](yaml/metrics.yaml) | [333](yaml/large.yaml) | [94](yaml/key-folding-mixed.yaml) | [2118](yaml/logs.yaml) | [1152](yaml/firewall.yaml) | [52](yaml/small.yaml) | [93](yaml/github-issue.yaml) | [1666](yaml/users-50.yaml) | [127](yaml/medium.yaml) | [194](yaml/hikes.yaml) | [109](yaml/package.yaml) | [71](yaml/key-folding-basic.yaml) | [76](yaml/irregular.yaml) | [80](yaml/key-folding-with-array.yaml) | [1063](yaml/products.yaml) | [1857](yaml/routes.yaml) | 9,276 |
94
+ | TOON | [73](toon/chat.toon) | [72](toon/metrics.toon) | [319](toon/large.toon) | [96](toon/key-folding-mixed.toon) | [2175](toon/logs.toon) | [1195](toon/firewall.toon) | [47](toon/small.toon) | [87](toon/github-issue.toon) | [879](toon/users-50.toon) | [92](toon/medium.toon) | [123](toon/hikes.toon) | [110](toon/package.toon) | [71](toon/key-folding-basic.toon) | [78](toon/irregular.toon) | [74](toon/key-folding-with-array.toon) | [907](toon/products.toon) | [1822](toon/routes.toon) | 8,220 |
95
+ <!-- CLAUDE_TABLE_END -->
96
+
97
+ Note: Run `ANTHROPIC_API_KEY=... bun scripts/count-claude-tokens.ts` to regenerate Claude counts.
package/bun.lock ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "lockfileVersion": 1,
3
+ "workspaces": {
4
+ "": {
5
+ "devDependencies": {
6
+ "@types/bun": "^1.3.5",
7
+ },
8
+ },
9
+ },
10
+ "packages": {
11
+ "@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="],
12
+
13
+ "@types/node": ["@types/node@25.0.6", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-NNu0sjyNxpoiW3YuVFfNz7mxSQ+S4X2G28uqg2s+CzoqoQjLPsWSbsFFyztIAqt2vb8kfEAsJNepMGPTxFDx3Q=="],
14
+
15
+ "bun-types": ["bun-types@1.3.5", "", { "dependencies": { "@types/node": "*" } }, "sha512-inmAYe2PFLs0SUbFOWSVD24sg1jFlMPxOjOSSCYqUgn4Hsc3rDc7dFvfVYjFPNHtov6kgUeulV4SxbuIV/stPw=="],
16
+
17
+ "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
18
+ }
19
+ }
package/jot.test.ts ADDED
@@ -0,0 +1,133 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { stringify, parse } from "./jot"
3
+
4
+ describe("stringify", () => {
5
+ test("null", () => expect(stringify(null)).toBe("null"))
6
+ test("true", () => expect(stringify(true)).toBe("true"))
7
+ test("false", () => expect(stringify(false)).toBe("false"))
8
+ test("number", () => expect(stringify(42)).toBe("42"))
9
+ test("float", () => expect(stringify(3.14)).toBe("3.14"))
10
+
11
+ describe("strings", () => {
12
+ test("simple string", () => expect(stringify("hello")).toBe("hello"))
13
+ test("string with space", () => expect(stringify("hello world")).toBe("hello world"))
14
+ test("empty string", () => expect(stringify("")).toBe('""'))
15
+ test("numeric string", () => expect(stringify("123")).toBe('"123"'))
16
+ test("reserved word", () => expect(stringify("true")).toBe('"true"'))
17
+ test("contains colon", () => expect(stringify("a:b")).toBe('"a:b"'))
18
+ test("contains semicolon", () => expect(stringify("a;b")).toBe('"a;b"'))
19
+ })
20
+
21
+ describe("arrays", () => {
22
+ test("empty array", () => expect(stringify([])).toBe("[]"))
23
+ test("simple array", () => expect(stringify([1, 2, 3])).toBe("[1,2,3]"))
24
+ test("string array", () => expect(stringify(["a", "b"])).toBe("[a,b]"))
25
+ })
26
+
27
+ describe("objects", () => {
28
+ test("empty object", () => expect(stringify({})).toBe("{}"))
29
+ test("simple object", () => expect(stringify({ name: "Alice", age: 30 })).toBe("{name:Alice,age:30}"))
30
+ })
31
+
32
+ describe("key folding", () => {
33
+ test("fold 1 level", () => expect(stringify({ a: { b: 1 } })).toBe("{a.b:1}"))
34
+ test("fold 2 levels", () => expect(stringify({ a: { b: { c: 1 } } })).toBe("{a.b.c:1}"))
35
+ test("no fold multi-key", () => expect(stringify({ a: { b: 1, c: 2 } })).toBe("{a:{b:1,c:2}}"))
36
+ test("key with dot", () => expect(stringify({ "a.b": 1 })).toBe('{"a.b":1}'))
37
+ test("key with dot nested", () => expect(stringify({ "a.b": { c: 1 } })).toBe('{"a.b":{c:1}}'))
38
+ })
39
+
40
+ describe("tables", () => {
41
+ test("uniform table", () => expect(stringify([{ a: 1, b: 2 }, { a: 3, b: 4 }])).toBe("{{:a,b;1,2;3,4}}"))
42
+ test("3-row table", () => expect(stringify([{ x: 1 }, { x: 2 }, { x: 3 }])).toBe("{{:x;1;2;3}}"))
43
+ test("mixed schema with reuse", () => expect(stringify([{ a: 1 }, { a: 2 }, { b: 3 }])).toBe("{{:a;1;2;:b;3}}"))
44
+ test("no reuse", () => expect(stringify([{ a: 1 }, { b: 2 }])).toBe("[{a:1},{b:2}]"))
45
+ test("single obj", () => expect(stringify([{ a: 1 }])).toBe("[{a:1}]"))
46
+ })
47
+ })
48
+
49
+ describe("parse", () => {
50
+ test("null", () => expect(parse("null")).toBe(null))
51
+ test("true", () => expect(parse("true")).toBe(true))
52
+ test("number", () => expect(parse("42")).toBe(42))
53
+ test("string", () => expect(parse("hello")).toBe("hello"))
54
+ test("quoted", () => expect(parse('"hello world"')).toBe("hello world"))
55
+
56
+ test("array", () => expect(parse("[1,2,3]")).toEqual([1, 2, 3]))
57
+ test("object", () => expect(parse("{name:Alice,age:30}")).toEqual({ name: "Alice", age: 30 }))
58
+
59
+ describe("key unfolding", () => {
60
+ test("fold", () => expect(parse("{a.b:1}")).toEqual({ a: { b: 1 } }))
61
+ test("fold deep", () => expect(parse("{a.b.c:1}")).toEqual({ a: { b: { c: 1 } } }))
62
+ test("quoted dot key", () => expect(parse('{"a.b":1}')).toEqual({ "a.b": 1 }))
63
+ })
64
+
65
+ describe("tables", () => {
66
+ test("table", () => expect(parse("{{:a,b;1,2;3,4}}")).toEqual([{ a: 1, b: 2 }, { a: 3, b: 4 }]))
67
+ test("schema change", () => expect(parse("{{:a;1;:b;2}}")).toEqual([{ a: 1 }, { b: 2 }]))
68
+ })
69
+ })
70
+
71
+ describe("round-trip", () => {
72
+ const testCases: [string, unknown][] = [
73
+ ["null", null],
74
+ ["true", true],
75
+ ["false", false],
76
+ ["number", 42],
77
+ ["string", "hello"],
78
+ ["empty array", []],
79
+ ["number array", [1, 2, 3]],
80
+ ["empty object", {}],
81
+ ["simple object", { a: 1, b: 2 }],
82
+ ["nested 1 level", { a: { b: 1 } }],
83
+ ["nested 2 levels", { a: { b: { c: 1 } } }],
84
+ ["uniform table", [{ a: 1, b: 2 }, { a: 3, b: 4 }]],
85
+ ["mixed schema", [{ a: 1 }, { b: 2 }]],
86
+ ["nested table", { users: [{ id: 1, name: "Alice" }, { id: 2, name: "Bob" }] }],
87
+ ["key with dot", { "a.b": 1 }],
88
+ ["key with semicolon", { "x;y": "test" }],
89
+ ["string with semicolon", "a;b"],
90
+ ]
91
+
92
+ for (const [name, original] of testCases) {
93
+ test(name, () => {
94
+ const encoded = stringify(original)
95
+ const decoded = parse(encoded)
96
+ expect(decoded).toEqual(original)
97
+ })
98
+ }
99
+ })
100
+
101
+ describe("samples", () => {
102
+ const { readdirSync, readFileSync } = require("fs")
103
+ const { join } = require("path")
104
+
105
+ const samplesDir = join(__dirname, "samples")
106
+ const jsonFiles = readdirSync(samplesDir)
107
+ .filter((f: string) => f.endsWith(".json") && !f.includes(".pretty."))
108
+
109
+ for (const jsonFile of jsonFiles) {
110
+ const baseName = jsonFile.replace(".json", "")
111
+ const jotFile = `${baseName}.jot`
112
+
113
+ describe(baseName, () => {
114
+ const jsonPath = join(samplesDir, jsonFile)
115
+ const jotPath = join(samplesDir, jotFile)
116
+
117
+ const jsonContent = readFileSync(jsonPath, "utf-8").trim()
118
+ const expectedJot = readFileSync(jotPath, "utf-8").trim()
119
+ const originalData = JSON.parse(jsonContent)
120
+
121
+ test("encoding matches jot", () => {
122
+ const encoded = stringify(originalData)
123
+ expect(encoded).toBe(expectedJot)
124
+ })
125
+
126
+ test("round-trip matches json semantically", () => {
127
+ const encoded = stringify(originalData)
128
+ const decoded = parse(encoded)
129
+ expect(decoded).toEqual(originalData)
130
+ })
131
+ })
132
+ }
133
+ })