@creationix/jot 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -0
- package/SUMMARY.md +151 -0
- package/TOKEN_COUNTS.md +97 -0
- package/bun.lock +19 -0
- package/jot.test.ts +133 -0
- package/jot.ts +650 -0
- package/package.json +10 -0
- package/samples/chat.jot +1 -0
- package/samples/chat.json +1 -0
- package/samples/chat.pretty.jot +6 -0
- package/samples/chat.pretty.json +16 -0
- package/samples/firewall.jot +1 -0
- package/samples/firewall.json +1 -0
- package/samples/firewall.pretty.jot +235 -0
- package/samples/firewall.pretty.json +344 -0
- package/samples/github-issue.jot +1 -0
- package/samples/github-issue.json +1 -0
- package/samples/github-issue.pretty.jot +15 -0
- package/samples/github-issue.pretty.json +20 -0
- package/samples/hikes.jot +1 -0
- package/samples/hikes.json +1 -0
- package/samples/hikes.pretty.jot +14 -0
- package/samples/hikes.pretty.json +38 -0
- package/samples/irregular.jot +1 -0
- package/samples/irregular.json +1 -0
- package/samples/irregular.pretty.jot +13 -0
- package/samples/irregular.pretty.json +23 -0
- package/samples/json-counts-cache.jot +1 -0
- package/samples/json-counts-cache.json +1 -0
- package/samples/json-counts-cache.pretty.jot +26 -0
- package/samples/json-counts-cache.pretty.json +26 -0
- package/samples/key-folding-basic.jot +1 -0
- package/samples/key-folding-basic.json +1 -0
- package/samples/key-folding-basic.pretty.jot +7 -0
- package/samples/key-folding-basic.pretty.json +25 -0
- package/samples/key-folding-mixed.jot +1 -0
- package/samples/key-folding-mixed.json +1 -0
- package/samples/key-folding-mixed.pretty.jot +16 -0
- package/samples/key-folding-mixed.pretty.json +24 -0
- package/samples/key-folding-with-array.jot +1 -0
- package/samples/key-folding-with-array.json +1 -0
- package/samples/key-folding-with-array.pretty.jot +6 -0
- package/samples/key-folding-with-array.pretty.json +29 -0
- package/samples/large.jot +1 -0
- package/samples/large.json +1 -0
- package/samples/large.pretty.jot +72 -0
- package/samples/large.pretty.json +93 -0
- package/samples/logs.jot +1 -0
- package/samples/logs.json +1 -0
- package/samples/logs.pretty.jot +96 -0
- package/samples/logs.pretty.json +350 -0
- package/samples/medium.jot +1 -0
- package/samples/medium.json +1 -0
- package/samples/medium.pretty.jot +13 -0
- package/samples/medium.pretty.json +30 -0
- package/samples/metrics.jot +1 -0
- package/samples/metrics.json +1 -0
- package/samples/metrics.pretty.jot +11 -0
- package/samples/metrics.pretty.json +25 -0
- package/samples/package.jot +1 -0
- package/samples/package.json +1 -0
- package/samples/package.pretty.jot +18 -0
- package/samples/package.pretty.json +18 -0
- package/samples/products.jot +1 -0
- package/samples/products.json +1 -0
- package/samples/products.pretty.jot +69 -0
- package/samples/products.pretty.json +235 -0
- package/samples/routes.jot +1 -0
- package/samples/routes.json +1 -0
- package/samples/routes.pretty.jot +142 -0
- package/samples/routes.pretty.json +354 -0
- package/samples/small.jot +1 -0
- package/samples/small.json +1 -0
- package/samples/small.pretty.jot +8 -0
- package/samples/small.pretty.json +12 -0
- package/samples/users-50.jot +1 -0
- package/samples/users-50.json +1 -0
- package/samples/users-50.pretty.jot +53 -0
- package/samples/users-50.pretty.json +354 -0
package/README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Jot Format
|
|
2
|
+
|
|
3
|
+
Jot is a compact, LLM friendly JSON variant designed to use fewer tokens while remaining easy to read and write.
|
|
4
|
+
|
|
5
|
+
```jot
|
|
6
|
+
{
|
|
7
|
+
context: {
|
|
8
|
+
task: Our favorite hikes together,
|
|
9
|
+
location: Boulder,
|
|
10
|
+
season: spring_2025
|
|
11
|
+
},
|
|
12
|
+
friends: [ ana, luis, sam ],
|
|
13
|
+
hikes: {{
|
|
14
|
+
:id, name, distanceKm, elevationGain, companion, wasSunny
|
|
15
|
+
1, Blue Lake Trail, 7.5, 320, ana, true
|
|
16
|
+
2, Ridge Overlook, 9.2, 540, luis, false
|
|
17
|
+
3, Wildflower Loop, 5.1, 180, sam, true
|
|
18
|
+
}}
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
It is JSON with three optimizations:
|
|
23
|
+
|
|
24
|
+
1. **Unquoted strings** — Strings are only quoted if necessary.
|
|
25
|
+
2. **Key folding** — Single-key nested objects collapse: `{a:{b:1}}` → `{a.b:1}`
|
|
26
|
+
if normal keys contain dots, keep quotes: `{"a.b":1}`
|
|
27
|
+
3. **Tables** — Object arrays with repeating schemas use `{{:cols;row;row}}` syntax
|
|
28
|
+
|
|
29
|
+
## Unquoted Strings
|
|
30
|
+
|
|
31
|
+
The only times that you need to quote a string are:
|
|
32
|
+
|
|
33
|
+
- It is a valid JSON value (`true`, `false`, `null`, or a number like `42`, `3.14`, `-0.5`, or `1e10`)
|
|
34
|
+
- It contains special characters: `: ; , { } [ ] "` or control characters (newline, tab, etc)
|
|
35
|
+
- It is empty or has leading or trailing whitespace
|
|
36
|
+
- It being used as a key in an object and contains `.` (to distinguish from folded keys)
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{"name":"Alice","city":"New York","count":"42"}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
```jot
|
|
43
|
+
{name:Alice,city:New York,count:"42"}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Key Folding
|
|
47
|
+
|
|
48
|
+
When a nested object has exactly ONE key, fold it:
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{"server":{"host":"localhost"}}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```jot
|
|
55
|
+
{server.host:localhost}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
If normal keys contain dots, keep quotes to avoid confusion:
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{"data.point":{"x":10,"y":20}}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```jot
|
|
65
|
+
{"data.point":{x:10,y:20}}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Tables
|
|
69
|
+
|
|
70
|
+
One common shape in data is a table — an array of multiple objects with the same schema.
|
|
71
|
+
|
|
72
|
+
Object arrays use `{{:schema;row;row;...}}` when schemas repeat. Start with `:` followed by column names:
|
|
73
|
+
|
|
74
|
+
Don't use tables when there's no schema reuse (each object unique) — regular arrays are more compact.
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```jot
|
|
81
|
+
{{:id,name;1,Alice;2,Bob}}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
To change schema mid-table, add another `:schema;` row:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"},{"x":10,"y":20},{"x":30,"y":40}]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
```jot
|
|
91
|
+
{{:id,name;1,Alice;2,Bob;:x,y;10,20;30,40}}
|
|
92
|
+
```
|
package/SUMMARY.md
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Encoding Format Comparison
|
|
2
|
+
|
|
3
|
+
Token counts for 18 test documents across three tokenizers. For LLM systems, **tokens matter more than bytes**.
|
|
4
|
+
|
|
5
|
+
## Recommendation
|
|
6
|
+
|
|
7
|
+
**Use Jot** for LLM contexts — saves 16-17% tokens vs JSON.
|
|
8
|
+
|
|
9
|
+
## Token Efficiency
|
|
10
|
+
|
|
11
|
+
<!-- CHART_START -->
|
|
12
|
+
```mermaid
|
|
13
|
+
xychart-beta
|
|
14
|
+
title "Token Counts by Format"
|
|
15
|
+
x-axis ["Jot", "JSON-m", "JSONito", "Jot-P", "D2", "TOON", "YAML", "TOML", "JSON-s", "JSON-p"]
|
|
16
|
+
y-axis "Tokens" 0 --> 16000
|
|
17
|
+
line "Qwen" [6525, 7748, 7757, 8239, 8292, 8315, 9543, 10180, 11799, 12656]
|
|
18
|
+
line "Legacy" [6420, 7377, 7794, 7204, 7582, 7079, 7661, 11204, 10966, 11937]
|
|
19
|
+
line "Claude" [6747, 8132, 8327, 8500, 7928, 8405, 9456, 11485, 12687, 14403]
|
|
20
|
+
```
|
|
21
|
+
<!-- CHART_END -->
|
|
22
|
+
|
|
23
|
+
### Compact Formats
|
|
24
|
+
|
|
25
|
+
For machine-to-machine or LLM contexts where readability isn't required.
|
|
26
|
+
|
|
27
|
+
<!-- COMPACT_START -->
|
|
28
|
+
| Format | Qwen | Legacy | Claude | Bytes |
|
|
29
|
+
|-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
|
|
30
|
+
| **[Jot](jot/)** | 6,525 (-16%) | 6,420 (-13%) | 6,747 (-17%) | 16,621 (-28%) |
|
|
31
|
+
| [JSON](https://www.json.org/) (mini) | 7,748 | 7,377 | 8,132 | 23,119 |
|
|
32
|
+
| [JSONito](https://github.com/creationix/jsonito) | 7,757 (+0%) | 7,794 (+6%) | 8,327 (+2%) | 14,059 (-39%) |
|
|
33
|
+
| [D2](https://github.com/creationix/d2) | 8,292 (+7%) | 7,582 (+3%) | 7,928 (-3%) | 17,328 (-25%) |
|
|
34
|
+
<!-- COMPACT_END -->
|
|
35
|
+
|
|
36
|
+
### Pretty-Printed Formats
|
|
37
|
+
|
|
38
|
+
For human-readable output or when LLMs need to read/write structured data.
|
|
39
|
+
|
|
40
|
+
<!-- PRETTY_START -->
|
|
41
|
+
| Format | Qwen | Legacy | Claude | Bytes |
|
|
42
|
+
|-----------------------------------------------------|---------------:|---------------:|---------------:|---------------:|
|
|
43
|
+
| **[Jot](jot/) (pretty)** | 8,239 (-35%) | 7,204 (-40%) | 8,500 (-41%) | 23,676 (-41%) |
|
|
44
|
+
| [TOON](toon/) | 8,315 (-34%) | 7,079 (-41%) | 8,405 (-42%) | 22,780 (-43%) |
|
|
45
|
+
| [YAML](https://yaml.org/) | 9,543 (-25%) | 7,661 (-36%) | 9,456 (-34%) | 26,757 (-33%) |
|
|
46
|
+
| [TOML](https://toml.io/) | 10,180 (-20%) | 11,204 (-6%) | 11,485 (-20%) | 28,930 (-27%) |
|
|
47
|
+
| [JSON](json/smart-json.ts) (smart) | 11,799 (-7%) | 10,966 (-8%) | 12,687 (-12%) | 32,657 (-18%) |
|
|
48
|
+
| [JSON](https://www.json.org/) (pretty) | 12,656 | 11,937 | 14,403 | 39,884 |
|
|
49
|
+
<!-- PRETTY_END -->
|
|
50
|
+
|
|
51
|
+
## Format Descriptions
|
|
52
|
+
|
|
53
|
+
### [Jot](jot/)
|
|
54
|
+
|
|
55
|
+
JSON with three optimizations:
|
|
56
|
+
|
|
57
|
+
1. **Unquoted strings** — omit quotes unless value contains `: ; , { } [ ] "` or parses as number/boolean/null
|
|
58
|
+
2. **Key folding** — `{a:{b:1}}` → `{a.b:1}` for single-key nested objects
|
|
59
|
+
3. **Tables** — `[{a:1},{a:2}]` → `{{:a;1;2}}` for repeating object schemas
|
|
60
|
+
|
|
61
|
+
```jot
|
|
62
|
+
{config.host:localhost,users:{{:id,name;1,Alice;2,Bob}}}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
It also has a pretty-printed variant that adds indentation and newlines for readability.
|
|
66
|
+
|
|
67
|
+
```jot
|
|
68
|
+
{
|
|
69
|
+
config.host: localhost,
|
|
70
|
+
users: {{
|
|
71
|
+
:id, name;
|
|
72
|
+
1, Alice;
|
|
73
|
+
2, Bob
|
|
74
|
+
}}
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### [TOON](toon/)
|
|
79
|
+
|
|
80
|
+
YAML-like indentation with optional table syntax and count guards.
|
|
81
|
+
|
|
82
|
+
```toon
|
|
83
|
+
users[2]{id,name}:
|
|
84
|
+
1,Alice
|
|
85
|
+
2,Bob
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### [JSONito](https://github.com/creationix/jsonito)
|
|
89
|
+
|
|
90
|
+
Byte-optimized JSON with string deduplication via preamble dictionary.
|
|
91
|
+
|
|
92
|
+
```jito
|
|
93
|
+
{name'config'version'5~1.0.0enabled'!a~maxRetries6.timeout'eFw.tags'[a~productionapi'v1']}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### [D2](https://github.com/creationix/d2)
|
|
97
|
+
|
|
98
|
+
Declarative data format using `=` assignment and shell-like quoting.
|
|
99
|
+
|
|
100
|
+
## Why Not Byte-Optimized Formats?
|
|
101
|
+
|
|
102
|
+
Formats like JSONito achieve excellent byte compression (-39%) but:
|
|
103
|
+
|
|
104
|
+
- Token savings are inconsistent (small docs often cost more than JSON)
|
|
105
|
+
- Deduplication preambles add overhead that doesn't scale down
|
|
106
|
+
- LLMs cannot reliably generate formats requiring state tracking
|
|
107
|
+
|
|
108
|
+
## LLM Encoding Ability
|
|
109
|
+
|
|
110
|
+
Tested Qwen3-30b's ability to encode JSON → Jot (3 runs per document, 17 docs):
|
|
111
|
+
|
|
112
|
+
| Document Type | Semantic Accuracy |
|
|
113
|
+
|----------------------------------------------|------------------:|
|
|
114
|
+
| Simple configs (small, metrics, package) | 100% |
|
|
115
|
+
| Key folding test cases | 100% |
|
|
116
|
+
| Table-friendly (users-50) | 100% |
|
|
117
|
+
| Text-heavy (chat) | 100% |
|
|
118
|
+
| Complex/nested (large, firewall, routes) | 0% |
|
|
119
|
+
| Irregular schemas (medium, hikes, irregular) | 0% |
|
|
120
|
+
| **Overall** | **47%** |
|
|
121
|
+
|
|
122
|
+
Small models struggle with Jot's advanced features on complex documents. For LLM-generated output, consider using simpler Jot (unquoted strings only) or providing FORMAT.md as context.
|
|
123
|
+
|
|
124
|
+
## Tokenizers
|
|
125
|
+
|
|
126
|
+
- **Qwen**: Qwen3-Coder-30b via LM Studio API
|
|
127
|
+
- **Legacy**: Anthropic legacy tokenizer (`@anthropic-ai/tokenizer`)
|
|
128
|
+
- **Claude**: Claude API token counting endpoint (Sonnet/Opus/Haiku share tokenizer)
|
|
129
|
+
|
|
130
|
+
## Test Data
|
|
131
|
+
|
|
132
|
+
18 documents covering diverse structures:
|
|
133
|
+
|
|
134
|
+
| Document | Description |
|
|
135
|
+
|-------------------|----------------------------------|
|
|
136
|
+
| small | Config object (6 fields) |
|
|
137
|
+
| medium | User list with metadata |
|
|
138
|
+
| large | Kubernetes deployment spec |
|
|
139
|
+
| hikes | Tabular records (uniform schema) |
|
|
140
|
+
| chat | LLM conversation (text-heavy) |
|
|
141
|
+
| metrics | Time series (numeric-heavy) |
|
|
142
|
+
| package | npm manifest (nested deps) |
|
|
143
|
+
| github-issue | Mixed nesting with labels |
|
|
144
|
+
| irregular | Event log (varying keys) |
|
|
145
|
+
| users-50 | 50 user records (table-friendly) |
|
|
146
|
+
| logs | 50 log entries (semi-uniform) |
|
|
147
|
+
| firewall | WAF rules (deeply nested) |
|
|
148
|
+
| products | E-commerce catalog (variants) |
|
|
149
|
+
| routes | API routing config (large tables)|
|
|
150
|
+
| key-folding-* | Key folding test cases |
|
|
151
|
+
| json-counts-cache | Cached token counts |
|
package/TOKEN_COUNTS.md
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Token Counts by Format
|
|
2
|
+
|
|
3
|
+
## Qwen Tokenizer (LM Studio)
|
|
4
|
+
|
|
5
|
+
Tokens measured using **Qwen3-Coder-30b** via LM Studio API. This is the primary tokenizer used for testing local model efficiency.
|
|
6
|
+
|
|
7
|
+
<!-- QWEN_CHART_START -->
|
|
8
|
+
```mermaid
|
|
9
|
+
xychart-beta
|
|
10
|
+
title "Token Savings vs JSON (negative = better)"
|
|
11
|
+
x-axis ["Users50", "Hikes", "Medium", "KF-arr", "Firewall", "KF-basic", "Products", "Routes", "Metrics", "Package", "Chat", "KF-mix", "Issue", "Small", "Large", "Irregular", "Logs"]
|
|
12
|
+
y-axis "% vs JSON" -60 --> 40
|
|
13
|
+
line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
14
|
+
line "Jot" [-48, -29, -27, -26, -19, -19, -19, -15, -14, -12, -12, -12, -10, -8, -8, -7, -3]
|
|
15
|
+
line "YAML" [25, 18, 27, 19, 24, 10, 26, 16, 20, 7, 8, 14, 11, 17, 23, 28, 18]
|
|
16
|
+
line "TOON" [-40, -23, -14, 9, 30, 9, 10, 8, -6, 7, -11, 13, 2, 4, 18, 29, 18]
|
|
17
|
+
```
|
|
18
|
+
<!-- QWEN_CHART_END -->
|
|
19
|
+
|
|
20
|
+
### Per-File Breakdown (Qwen)
|
|
21
|
+
|
|
22
|
+
<!-- QWEN_TABLE_START -->
|
|
23
|
+
| Format | Chat | Firewall | Issue | Hikes | Irregular | KF-basic | KF-mix | KF-arr | Large | Logs | Medium | Metrics | Package | Products | Routes | Small | Users50 | Total |
|
|
24
|
+
|--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
|
|
25
|
+
| Jot | [67](jot/chat.jot) | [669](jot/firewall.jot) | [79](jot/github-issue.jot) | [112](jot/hikes.jot) | [63](jot/irregular.jot) | [47](jot/key-folding-basic.jot) | [69](jot/key-folding-mixed.jot) | [43](jot/key-folding-with-array.jot) | [246](jot/large.jot) | [2044](jot/logs.jot) | [71](jot/medium.jot) | [101](jot/metrics.jot) | [85](jot/package.jot) | [702](jot/products.jot) | [1246](jot/routes.jot) | [44](jot/small.jot) | [663](jot/users-50.jot) | 6,351 |
|
|
26
|
+
| JSONito | [86](jsonito/chat.jito) | [919](jsonito/firewall.jito) | [89](jsonito/github-issue.jito) | [158](jsonito/hikes.jito) | [59](jsonito/irregular.jito) | [62](jsonito/key-folding-basic.jito) | [82](jsonito/key-folding-mixed.jito) | [63](jsonito/key-folding-with-array.jito) | [312](jsonito/large.jito) | [1941](jsonito/logs.jito) | [103](jsonito/medium.jito) | [89](jsonito/metrics.jito) | [101](jsonito/package.jito) | [846](jsonito/products.jito) | [1426](jsonito/routes.jito) | [45](jsonito/small.jito) | [1234](jsonito/users-50.jito) | 7,615 |
|
|
27
|
+
| JSON-m | [76](json/chat.json) | [827](json/firewall.json) | [88](json/github-issue.json) | [158](json/hikes.json) | [68](json/irregular.json) | [58](json/key-folding-basic.json) | [78](json/key-folding-mixed.json) | [58](json/key-folding-with-array.json) | [266](json/large.json) | [2108](json/logs.json) | [97](json/medium.json) | [117](json/metrics.json) | [97](json/package.json) | [866](json/products.json) | [1459](json/routes.json) | [48](json/small.json) | [1279](json/users-50.json) | 7,748 |
|
|
28
|
+
| Jot-P | [73](jot/chat.pretty.jot) | [1119](jot/firewall.pretty.jot) | [105](jot/github-issue.pretty.jot) | [142](jot/hikes.pretty.jot) | [91](jot/irregular.pretty.jot) | [54](jot/key-folding-basic.pretty.jot) | [96](jot/key-folding-mixed.pretty.jot) | [52](jot/key-folding-with-array.pretty.jot) | [365](jot/large.pretty.jot) | [2304](jot/logs.pretty.jot) | [100](jot/medium.pretty.jot) | [123](jot/metrics.pretty.jot) | [120](jot/package.pretty.jot) | [828](jot/products.pretty.jot) | [1521](jot/routes.pretty.jot) | [59](jot/small.pretty.jot) | [863](jot/users-50.pretty.jot) | 8,015 |
|
|
29
|
+
| D2 | [80](d2/chat.d2) | [894](d2/firewall.d2) | [97](d2/github-issue.d2) | [173](d2/hikes.d2) | [81](d2/irregular.d2) | [60](d2/key-folding-basic.d2) | [80](d2/key-folding-mixed.d2) | [67](d2/key-folding-with-array.d2) | [316](d2/large.d2) | [2092](d2/logs.d2) | [104](d2/medium.d2) | [138](d2/metrics.d2) | [90](d2/package.d2) | [994](d2/products.d2) | [1536](d2/routes.d2) | [55](d2/small.d2) | [1202](d2/users-50.d2) | 8,059 |
|
|
30
|
+
| TOON | [68](toon/chat.toon) | [1073](toon/firewall.toon) | [90](toon/github-issue.toon) | [122](toon/hikes.toon) | [88](toon/irregular.toon) | [63](toon/key-folding-basic.toon) | [88](toon/key-folding-mixed.toon) | [63](toon/key-folding-with-array.toon) | [313](toon/large.toon) | [2492](toon/logs.toon) | [83](toon/medium.toon) | [110](toon/metrics.toon) | [104](toon/package.toon) | [954](toon/products.toon) | [1574](toon/routes.toon) | [50](toon/small.toon) | [763](toon/users-50.toon) | 8,098 |
|
|
31
|
+
| YAML | [82](yaml/chat.yaml) | [1029](yaml/firewall.yaml) | [98](yaml/github-issue.yaml) | [187](yaml/hikes.yaml) | [87](yaml/irregular.yaml) | [64](yaml/key-folding-basic.yaml) | [89](yaml/key-folding-mixed.yaml) | [69](yaml/key-folding-with-array.yaml) | [327](yaml/large.yaml) | [2487](yaml/logs.yaml) | [123](yaml/medium.yaml) | [140](yaml/metrics.yaml) | [104](yaml/package.yaml) | [1095](yaml/products.yaml) | [1696](yaml/routes.yaml) | [56](yaml/small.yaml) | [1597](yaml/users-50.yaml) | 9,330 |
|
|
32
|
+
| TOML | [84](toml/chat.toml) | [1495](toml/firewall.toml) | [99](toml/github-issue.toml) | [189](toml/hikes.toml) | [86](toml/irregular.toml) | [60](toml/key-folding-basic.toml) | [85](toml/key-folding-mixed.toml) | [61](toml/key-folding-with-array.toml) | [377](toml/large.toml) | [2498](toml/logs.toml) | [118](toml/medium.toml) | [139](toml/metrics.toml) | [104](toml/package.toml) | [1114](toml/products.toml) | [1790](toml/routes.toml) | [56](toml/small.toml) | [1625](toml/users-50.toml) | 9,980 |
|
|
33
|
+
<!-- QWEN_TABLE_END -->
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Legacy Claude Tokenizer
|
|
38
|
+
|
|
39
|
+
Tokens measured using **@anthropic-ai/tokenizer** (Claude's legacy tokenizer). This is the older tokenizer used by earlier Claude models.
|
|
40
|
+
|
|
41
|
+
<!-- LEGACY_CHART_START -->
|
|
42
|
+
```mermaid
|
|
43
|
+
xychart-beta
|
|
44
|
+
title "Token Savings vs JSON (negative = better)"
|
|
45
|
+
x-axis ["Users50", "Hikes", "Products", "Metrics", "Medium", "Routes", "Chat", "KF-arr", "KF-basic", "Large", "KF-mix", "Issue", "Firewall", "Package", "Logs", "Small", "Irregular"]
|
|
46
|
+
y-axis "% vs JSON" -50 --> 30
|
|
47
|
+
line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
48
|
+
line "Jot" [-37, -26, -20, -20, -20, -11, -10, -10, -8, -8, -6, -6, -2, -2, -1, 0, 2]
|
|
49
|
+
line "YAML" [3, 3, 4, 5, 4, 4, 4, 15, 8, 9, 4, 7, 8, 0, 2, 11, 8]
|
|
50
|
+
line "TOON" [-37, -24, -7, -21, -20, 3, -11, 12, 6, 4, 6, 0, 13, 0, 8, 3, 10]
|
|
51
|
+
```
|
|
52
|
+
<!-- LEGACY_CHART_END -->
|
|
53
|
+
|
|
54
|
+
### Per-File Breakdown (Legacy)
|
|
55
|
+
|
|
56
|
+
<!-- LEGACY_TABLE_START -->
|
|
57
|
+
| Format | Chat | Metrics | Large | KF-mix | Logs | Firewall | Small | Issue | Users50 | Medium | Hikes | Package | KF-basic | Irregular | KF-arr | Products | Routes | Total |
|
|
58
|
+
|--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
|
|
59
|
+
| Jot | [63](jot/chat.jot) | [60](jot/metrics.jot) | [221](jot/large.jot) | [65](jot/key-folding-mixed.jot) | [1737](jot/logs.jot) | [825](jot/firewall.jot) | [36](jot/small.jot) | [68](jot/github-issue.jot) | [837](jot/users-50.jot) | [76](jot/medium.jot) | [103](jot/hikes.jot) | [91](jot/package.jot) | [45](jot/key-folding-basic.jot) | [49](jot/irregular.jot) | [47](jot/key-folding-with-array.jot) | [613](jot/products.jot) | [1352](jot/routes.jot) | 6,288 |
|
|
60
|
+
| JSON-m | [70](json/chat.json) | [75](json/metrics.json) | [239](json/large.json) | [69](json/key-folding-mixed.json) | [1750](json/logs.json) | [845](json/firewall.json) | [36](json/small.json) | [72](json/github-issue.json) | [1326](json/users-50.json) | [95](json/medium.json) | [139](json/hikes.json) | [93](json/package.json) | [49](json/key-folding-basic.json) | [48](json/irregular.json) | [52](json/key-folding-with-array.json) | [771](json/products.json) | [1516](json/routes.json) | 7,245 |
|
|
61
|
+
| YAML | [73](yaml/chat.yaml) | [79](yaml/metrics.yaml) | [260](yaml/large.yaml) | [72](yaml/key-folding-mixed.yaml) | [1780](yaml/logs.yaml) | [911](yaml/firewall.yaml) | [40](yaml/small.yaml) | [77](yaml/github-issue.yaml) | [1365](yaml/users-50.yaml) | [99](yaml/medium.yaml) | [143](yaml/hikes.yaml) | [93](yaml/package.yaml) | [53](yaml/key-folding-basic.yaml) | [52](yaml/irregular.yaml) | [60](yaml/key-folding-with-array.yaml) | [799](yaml/products.yaml) | [1572](yaml/routes.yaml) | 7,528 |
|
|
62
|
+
| TOON | [62](toon/chat.toon) | [59](toon/metrics.toon) | [249](toon/large.toon) | [73](toon/key-folding-mixed.toon) | [1885](toon/logs.toon) | [955](toon/firewall.toon) | [37](toon/small.toon) | [72](toon/github-issue.toon) | [836](toon/users-50.toon) | [76](toon/medium.toon) | [105](toon/hikes.toon) | [93](toon/package.toon) | [52](toon/key-folding-basic.toon) | [53](toon/irregular.toon) | [58](toon/key-folding-with-array.toon) | [720](toon/products.toon) | [1557](toon/routes.toon) | 6,942 |
|
|
63
|
+
<!-- LEGACY_TABLE_END -->
|
|
64
|
+
|
|
65
|
+
Note: The legacy tokenizer may produce different results than modern Claude models, but is useful for comparison and runs locally without API calls.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Modern Claude Tokenizer
|
|
70
|
+
|
|
71
|
+
Tokens measured using **Claude API** token counting endpoint (claude-sonnet-4). This represents the actual token usage for modern Claude models. Token counting is free via the API.
|
|
72
|
+
|
|
73
|
+
<!-- CLAUDE_CHART_START -->
|
|
74
|
+
```mermaid
|
|
75
|
+
xychart-beta
|
|
76
|
+
title "Token Savings vs JSON (negative = better)"
|
|
77
|
+
x-axis ["Users50", "Hikes", "Products", "KF-basic", "KF-arr", "Metrics", "Medium", "Large", "Firewall", "KF-mix", "Routes", "Chat", "Issue", "Irregular", "Package", "Small", "Logs"]
|
|
78
|
+
y-axis "% vs JSON" -50 --> 40
|
|
79
|
+
line "JSON" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
|
80
|
+
line "Jot" [-39, -29, -24, -24, -24, -23, -22, -16, -15, -15, -14, -12, -9, -6, -6, -4, -2]
|
|
81
|
+
line "YAML" [22, 19, 27, 1, 14, 19, 19, 16, 13, 6, 9, 6, 8, 21, 4, 13, 18]
|
|
82
|
+
line "TOON" [-36, -25, 8, 1, 6, -18, -14, 12, 17, 8, 7, -10, 1, 24, 5, 2, 21]
|
|
83
|
+
```
|
|
84
|
+
<!-- CLAUDE_CHART_END -->
|
|
85
|
+
|
|
86
|
+
### Per-File Breakdown (Claude)
|
|
87
|
+
|
|
88
|
+
<!-- CLAUDE_TABLE_START -->
|
|
89
|
+
| Format | Chat | Metrics | Large | KF-mix | Logs | Firewall | Small | Issue | Users50 | Medium | Hikes | Package | KF-basic | Irregular | KF-arr | Products | Routes | Total |
|
|
90
|
+
|--------|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|
|
|
91
|
+
| Jot | [71](jot/chat.jot) | [68](jot/metrics.jot) | [241](jot/large.jot) | [76](jot/key-folding-mixed.jot) | [1762](jot/logs.jot) | [867](jot/firewall.jot) | [44](jot/small.jot) | [78](jot/github-issue.jot) | [830](jot/users-50.jot) | [83](jot/medium.jot) | [115](jot/hikes.jot) | [99](jot/package.jot) | [53](jot/key-folding-basic.jot) | [59](jot/irregular.jot) | [53](jot/key-folding-with-array.jot) | [634](jot/products.jot) | [1472](jot/routes.jot) | 6,605 |
|
|
92
|
+
| JSON-m | [81](json/chat.json) | [88](json/metrics.json) | [286](json/large.json) | [89](json/key-folding-mixed.json) | [1798](json/logs.json) | [1022](json/firewall.json) | [46](json/small.json) | [86](json/github-issue.json) | [1369](json/users-50.json) | [107](json/medium.json) | [163](json/hikes.json) | [105](json/package.json) | [70](json/key-folding-basic.json) | [63](json/irregular.json) | [70](json/key-folding-with-array.json) | [839](json/products.json) | [1707](json/routes.json) | 7,989 |
|
|
93
|
+
| YAML | [86](yaml/chat.yaml) | [105](yaml/metrics.yaml) | [333](yaml/large.yaml) | [94](yaml/key-folding-mixed.yaml) | [2118](yaml/logs.yaml) | [1152](yaml/firewall.yaml) | [52](yaml/small.yaml) | [93](yaml/github-issue.yaml) | [1666](yaml/users-50.yaml) | [127](yaml/medium.yaml) | [194](yaml/hikes.yaml) | [109](yaml/package.yaml) | [71](yaml/key-folding-basic.yaml) | [76](yaml/irregular.yaml) | [80](yaml/key-folding-with-array.yaml) | [1063](yaml/products.yaml) | [1857](yaml/routes.yaml) | 9,276 |
|
|
94
|
+
| TOON | [73](toon/chat.toon) | [72](toon/metrics.toon) | [319](toon/large.toon) | [96](toon/key-folding-mixed.toon) | [2175](toon/logs.toon) | [1195](toon/firewall.toon) | [47](toon/small.toon) | [87](toon/github-issue.toon) | [879](toon/users-50.toon) | [92](toon/medium.toon) | [123](toon/hikes.toon) | [110](toon/package.toon) | [71](toon/key-folding-basic.toon) | [78](toon/irregular.toon) | [74](toon/key-folding-with-array.toon) | [907](toon/products.toon) | [1822](toon/routes.toon) | 8,220 |
|
|
95
|
+
<!-- CLAUDE_TABLE_END -->
|
|
96
|
+
|
|
97
|
+
Note: Run `ANTHROPIC_API_KEY=... bun scripts/count-claude-tokens.ts` to regenerate Claude counts.
|
package/bun.lock
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"lockfileVersion": 1,
|
|
3
|
+
"workspaces": {
|
|
4
|
+
"": {
|
|
5
|
+
"devDependencies": {
|
|
6
|
+
"@types/bun": "^1.3.5",
|
|
7
|
+
},
|
|
8
|
+
},
|
|
9
|
+
},
|
|
10
|
+
"packages": {
|
|
11
|
+
"@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="],
|
|
12
|
+
|
|
13
|
+
"@types/node": ["@types/node@25.0.6", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-NNu0sjyNxpoiW3YuVFfNz7mxSQ+S4X2G28uqg2s+CzoqoQjLPsWSbsFFyztIAqt2vb8kfEAsJNepMGPTxFDx3Q=="],
|
|
14
|
+
|
|
15
|
+
"bun-types": ["bun-types@1.3.5", "", { "dependencies": { "@types/node": "*" } }, "sha512-inmAYe2PFLs0SUbFOWSVD24sg1jFlMPxOjOSSCYqUgn4Hsc3rDc7dFvfVYjFPNHtov6kgUeulV4SxbuIV/stPw=="],
|
|
16
|
+
|
|
17
|
+
"undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
|
|
18
|
+
}
|
|
19
|
+
}
|
package/jot.test.ts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { stringify, parse } from "./jot"
|
|
3
|
+
|
|
4
|
+
describe("stringify", () => {
|
|
5
|
+
test("null", () => expect(stringify(null)).toBe("null"))
|
|
6
|
+
test("true", () => expect(stringify(true)).toBe("true"))
|
|
7
|
+
test("false", () => expect(stringify(false)).toBe("false"))
|
|
8
|
+
test("number", () => expect(stringify(42)).toBe("42"))
|
|
9
|
+
test("float", () => expect(stringify(3.14)).toBe("3.14"))
|
|
10
|
+
|
|
11
|
+
describe("strings", () => {
|
|
12
|
+
test("simple string", () => expect(stringify("hello")).toBe("hello"))
|
|
13
|
+
test("string with space", () => expect(stringify("hello world")).toBe("hello world"))
|
|
14
|
+
test("empty string", () => expect(stringify("")).toBe('""'))
|
|
15
|
+
test("numeric string", () => expect(stringify("123")).toBe('"123"'))
|
|
16
|
+
test("reserved word", () => expect(stringify("true")).toBe('"true"'))
|
|
17
|
+
test("contains colon", () => expect(stringify("a:b")).toBe('"a:b"'))
|
|
18
|
+
test("contains semicolon", () => expect(stringify("a;b")).toBe('"a;b"'))
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
describe("arrays", () => {
|
|
22
|
+
test("empty array", () => expect(stringify([])).toBe("[]"))
|
|
23
|
+
test("simple array", () => expect(stringify([1, 2, 3])).toBe("[1,2,3]"))
|
|
24
|
+
test("string array", () => expect(stringify(["a", "b"])).toBe("[a,b]"))
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
describe("objects", () => {
|
|
28
|
+
test("empty object", () => expect(stringify({})).toBe("{}"))
|
|
29
|
+
test("simple object", () => expect(stringify({ name: "Alice", age: 30 })).toBe("{name:Alice,age:30}"))
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
describe("key folding", () => {
|
|
33
|
+
test("fold 1 level", () => expect(stringify({ a: { b: 1 } })).toBe("{a.b:1}"))
|
|
34
|
+
test("fold 2 levels", () => expect(stringify({ a: { b: { c: 1 } } })).toBe("{a.b.c:1}"))
|
|
35
|
+
test("no fold multi-key", () => expect(stringify({ a: { b: 1, c: 2 } })).toBe("{a:{b:1,c:2}}"))
|
|
36
|
+
test("key with dot", () => expect(stringify({ "a.b": 1 })).toBe('{"a.b":1}'))
|
|
37
|
+
test("key with dot nested", () => expect(stringify({ "a.b": { c: 1 } })).toBe('{"a.b":{c:1}}'))
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
describe("tables", () => {
|
|
41
|
+
test("uniform table", () => expect(stringify([{ a: 1, b: 2 }, { a: 3, b: 4 }])).toBe("{{:a,b;1,2;3,4}}"))
|
|
42
|
+
test("3-row table", () => expect(stringify([{ x: 1 }, { x: 2 }, { x: 3 }])).toBe("{{:x;1;2;3}}"))
|
|
43
|
+
test("mixed schema with reuse", () => expect(stringify([{ a: 1 }, { a: 2 }, { b: 3 }])).toBe("{{:a;1;2;:b;3}}"))
|
|
44
|
+
test("no reuse", () => expect(stringify([{ a: 1 }, { b: 2 }])).toBe("[{a:1},{b:2}]"))
|
|
45
|
+
test("single obj", () => expect(stringify([{ a: 1 }])).toBe("[{a:1}]"))
|
|
46
|
+
})
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
describe("parse", () => {
|
|
50
|
+
test("null", () => expect(parse("null")).toBe(null))
|
|
51
|
+
test("true", () => expect(parse("true")).toBe(true))
|
|
52
|
+
test("number", () => expect(parse("42")).toBe(42))
|
|
53
|
+
test("string", () => expect(parse("hello")).toBe("hello"))
|
|
54
|
+
test("quoted", () => expect(parse('"hello world"')).toBe("hello world"))
|
|
55
|
+
|
|
56
|
+
test("array", () => expect(parse("[1,2,3]")).toEqual([1, 2, 3]))
|
|
57
|
+
test("object", () => expect(parse("{name:Alice,age:30}")).toEqual({ name: "Alice", age: 30 }))
|
|
58
|
+
|
|
59
|
+
describe("key unfolding", () => {
|
|
60
|
+
test("fold", () => expect(parse("{a.b:1}")).toEqual({ a: { b: 1 } }))
|
|
61
|
+
test("fold deep", () => expect(parse("{a.b.c:1}")).toEqual({ a: { b: { c: 1 } } }))
|
|
62
|
+
test("quoted dot key", () => expect(parse('{"a.b":1}')).toEqual({ "a.b": 1 }))
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
describe("tables", () => {
|
|
66
|
+
test("table", () => expect(parse("{{:a,b;1,2;3,4}}")).toEqual([{ a: 1, b: 2 }, { a: 3, b: 4 }]))
|
|
67
|
+
test("schema change", () => expect(parse("{{:a;1;:b;2}}")).toEqual([{ a: 1 }, { b: 2 }]))
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
describe("round-trip", () => {
|
|
72
|
+
const testCases: [string, unknown][] = [
|
|
73
|
+
["null", null],
|
|
74
|
+
["true", true],
|
|
75
|
+
["false", false],
|
|
76
|
+
["number", 42],
|
|
77
|
+
["string", "hello"],
|
|
78
|
+
["empty array", []],
|
|
79
|
+
["number array", [1, 2, 3]],
|
|
80
|
+
["empty object", {}],
|
|
81
|
+
["simple object", { a: 1, b: 2 }],
|
|
82
|
+
["nested 1 level", { a: { b: 1 } }],
|
|
83
|
+
["nested 2 levels", { a: { b: { c: 1 } } }],
|
|
84
|
+
["uniform table", [{ a: 1, b: 2 }, { a: 3, b: 4 }]],
|
|
85
|
+
["mixed schema", [{ a: 1 }, { b: 2 }]],
|
|
86
|
+
["nested table", { users: [{ id: 1, name: "Alice" }, { id: 2, name: "Bob" }] }],
|
|
87
|
+
["key with dot", { "a.b": 1 }],
|
|
88
|
+
["key with semicolon", { "x;y": "test" }],
|
|
89
|
+
["string with semicolon", "a;b"],
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
for (const [name, original] of testCases) {
|
|
93
|
+
test(name, () => {
|
|
94
|
+
const encoded = stringify(original)
|
|
95
|
+
const decoded = parse(encoded)
|
|
96
|
+
expect(decoded).toEqual(original)
|
|
97
|
+
})
|
|
98
|
+
}
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
describe("samples", () => {
|
|
102
|
+
const { readdirSync, readFileSync } = require("fs")
|
|
103
|
+
const { join } = require("path")
|
|
104
|
+
|
|
105
|
+
const samplesDir = join(__dirname, "samples")
|
|
106
|
+
const jsonFiles = readdirSync(samplesDir)
|
|
107
|
+
.filter((f: string) => f.endsWith(".json") && !f.includes(".pretty."))
|
|
108
|
+
|
|
109
|
+
for (const jsonFile of jsonFiles) {
|
|
110
|
+
const baseName = jsonFile.replace(".json", "")
|
|
111
|
+
const jotFile = `${baseName}.jot`
|
|
112
|
+
|
|
113
|
+
describe(baseName, () => {
|
|
114
|
+
const jsonPath = join(samplesDir, jsonFile)
|
|
115
|
+
const jotPath = join(samplesDir, jotFile)
|
|
116
|
+
|
|
117
|
+
const jsonContent = readFileSync(jsonPath, "utf-8").trim()
|
|
118
|
+
const expectedJot = readFileSync(jotPath, "utf-8").trim()
|
|
119
|
+
const originalData = JSON.parse(jsonContent)
|
|
120
|
+
|
|
121
|
+
test("encoding matches jot", () => {
|
|
122
|
+
const encoded = stringify(originalData)
|
|
123
|
+
expect(encoded).toBe(expectedJot)
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
test("round-trip matches json semantically", () => {
|
|
127
|
+
const encoded = stringify(originalData)
|
|
128
|
+
const decoded = parse(encoded)
|
|
129
|
+
expect(decoded).toEqual(originalData)
|
|
130
|
+
})
|
|
131
|
+
})
|
|
132
|
+
}
|
|
133
|
+
})
|