jtoken 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ *.so
7
+ *.egg
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ .eggs/
12
+ *.whl
13
+ .claude/
14
+ pg.py
15
+ .env
16
+
17
+ # Virtual environments
18
+ .venv/
19
+ venv/
20
+ env/
21
+ ENV/
22
+
23
+ # Testing
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+ .tox/
28
+
29
+ # Type checkers
30
+ .mypy_cache/
31
+ .pyright/
32
+
33
+ # IDEs
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+
39
+ # macOS
40
+ .DS_Store
jtoken-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hermann Samimi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
jtoken-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: jtoken
3
+ Version: 0.1.0
4
+ Summary: A lightweight, human-readable key-value serialization format
5
+ Project-URL: Homepage, https://github.com/hermannsamimi/jtoken
6
+ Project-URL: Repository, https://github.com/hermannsamimi/jtoken
7
+ Project-URL: Issues, https://github.com/hermannsamimi/jtoken/issues
8
+ Author-email: Hermann Samimi <hermannsamimi@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: encoding,format,key-value,llm,serialization,text,tokens
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: Text Processing :: General
23
+ Requires-Python: >=3.8
24
+ Provides-Extra: dev
25
+ Requires-Dist: build>=1.0; extra == 'dev'
26
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
27
+ Requires-Dist: pytest>=7.0; extra == 'dev'
28
+ Requires-Dist: tiktoken>=0.5; extra == 'dev'
29
+ Provides-Extra: tiktoken
30
+ Requires-Dist: tiktoken>=0.5; extra == 'tiktoken'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # jtoken
34
+
35
+ Compress JSON for LLM prompts — same data, fewer tokens.
36
+
37
+ ## What it does
38
+
39
+ jtoken strips the syntactic noise from JSON (`"`, `{}`, `,`) and collapses all
40
+ `null`, `true`, and `false` fields each into a single summary line. Nested dicts
41
+ are flattened with dot notation so the same collapse applies at every level.
42
+ The result is a compact format an LLM reads just as well as JSON.
43
+
44
+ **JSON (30 tokens):**
45
+ ```json
46
+ {"name": "Alice", "age": 30, "active": true, "verified": false, "ref": null}
47
+ ```
48
+
49
+ **jtoken (21 tokens):**
50
+ ```
51
+ name: Alice
52
+ age: 30
53
+ trues: active
54
+ falses: verified
55
+ nulls: ref
56
+ ```
57
+
58
+ The round-trip is lossless: `decode(encode(data)) == data` for all supported types.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ # Core — no external dependencies
64
+ pip install jtoken
65
+
66
+ # With accurate LLM token counting
67
+ pip install jtoken[tiktoken]
68
+ ```
69
+
70
+ ## Quick start
71
+
72
+ ```python
73
+ import jtoken
74
+
75
+ data = {
76
+ "user": "alice",
77
+ "age": 30,
78
+ "premium": True,
79
+ "verified": True,
80
+ "is_remote": False,
81
+ "trial": False,
82
+ "score": 9.5,
83
+ "referral": None,
84
+ "last_login": None,
85
+ }
86
+
87
+ text = jtoken.encode(data)
88
+ # user: alice
89
+ # age: 30
90
+ # score: 9.5
91
+ # trues: premium,verified
92
+ # falses: is_remote,trial
93
+ # nulls: referral,last_login
94
+
95
+ original = jtoken.decode(text)
96
+ assert original == data
97
+ ```
98
+
99
+ `dumps` / `loads` are available as `json`-style aliases.
100
+
101
+ ## CLI
102
+
103
+ ```bash
104
+ echo '{"name": "Alice", "active": true}' | jtoken encode
105
+ echo 'name: Alice\ntrues: active' | jtoken decode
106
+ echo '{"name": "Alice", "active": true}' | jtoken stats
107
+ echo '{"name": "Alice", "active": true}' | jtoken count
108
+ ```
109
+
110
+ Use `-f/--file` to read from a file instead of stdin. `stats` and `count` accept
111
+ `--model` and `--backend` (`auto`, `tiktoken`, `estimate`).
112
+
113
+ ## Nested documents
114
+
115
+ Nested dicts are flattened with dot notation. Booleans and nulls at any depth
116
+ are collapsed into the same summary lines.
117
+
118
+ ```python
119
+ data = {
120
+ "title": "Engineer",
121
+ "metadata": {
122
+ "verified": True,
123
+ "sponsored": False,
124
+ "score": None,
125
+ "source": {
126
+ "crawled": True,
127
+ "enriched": None,
128
+ },
129
+ },
130
+ }
131
+
132
+ print(jtoken.encode(data))
133
+ # title: Engineer
134
+ # trues: metadata.verified,metadata.source.crawled
135
+ # falses: metadata.sponsored
136
+ # nulls: metadata.score,metadata.source.enriched
137
+ ```
138
+
139
+ Decode reconstructs the full nested structure:
140
+
141
+ ```python
142
+ assert jtoken.decode(jtoken.encode(data)) == data # ✓
143
+ ```
144
+
145
+ **Limitation:** keys cannot contain `.` (reserved for nesting) or `": "`.
146
+ Arrays are not supported.
147
+
148
+ ## Token savings
149
+
150
+ ```python
151
+ import jtoken
152
+
153
+ stats = jtoken.token_savings(data)
154
+ print(stats)
155
+ # jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)
156
+
157
+ n = jtoken.count_tokens(data) # count jtoken tokens only
158
+ ```
159
+
160
+ Savings are compared against `json.dumps(data)` — the standard representation
161
+ you'd paste into a prompt. Savings are highest when a document has many `null`
162
+ or boolean fields.
163
+
164
+ ```python
165
+ # Specify model or encoding
166
+ stats = jtoken.token_savings(data, model="gpt-4o")
167
+ stats = jtoken.token_savings(data, model="o200k_base")
168
+
169
+ # No tiktoken dependency
170
+ stats = jtoken.token_savings(data, backend="estimate")
171
+ ```
172
+
173
+ ## API
174
+
175
+ ### `encode(data: dict) -> str`
176
+
177
+ Compresses a dict into jtoken. Supported value types: `str`, `int`, `float`,
178
+ `bool`, `None`, nested `dict`.
179
+
180
+ **Summary lines (always at the end):**
181
+
182
+ | line | contains |
183
+ |---|---|
184
+ | `trues: k1,k2,...` | all keys whose value is `True` |
185
+ | `falses: k1,k2,...` | all keys whose value is `False` |
186
+ | `nulls: k1,k2,...` | all keys whose value is `None` |
187
+
188
+ String values that would decode ambiguously (look like a number or boolean)
189
+ keep their quotes:
190
+
191
+ ```python
192
+ jtoken.encode({"zip": "90210"}) # → 'zip: "90210"' (string, quotes kept)
193
+ jtoken.encode({"zip": 90210}) # → 'zip: 90210' (int, no quotes)
194
+ jtoken.encode({"ok": "true"}) # → 'ok: "true"' (string, quotes kept)
195
+ jtoken.encode({"ok": True}) # → 'trues: ok' (bool, collapsed)
196
+ ```
197
+
198
+ Raises `JPackEncodeError` for unsupported types, dots or `": "` in keys, or
199
+ reserved key names (`nulls`, `trues`, `falses`).
200
+
201
+ ### `decode(text: str) -> dict`
202
+
203
+ Reconstructs the original dict, including nested structure from dot-notation
204
+ keys. Type inference for scalar values:
205
+
206
+ | value | decoded as |
207
+ |---|---|
208
+ | `"quoted"` | `str` (always) |
209
+ | key in `trues:` line | `True` |
210
+ | key in `falses:` line | `False` |
211
+ | key in `nulls:` line | `None` |
212
+ | integer literal, e.g. `42` | `int` |
213
+ | float literal, e.g. `3.14` | `float` |
214
+ | anything else | `str` |
215
+
216
+ Raises `JPackDecodeError` for invalid input.
217
+
218
+ ### `token_savings(data, *, model, backend) -> TokenSavings`
219
+
220
+ Compares jtoken vs `json.dumps` token usage.
221
+
222
+ ```python
223
+ stats.jtoken_tokens # int
224
+ stats.json_tokens # int
225
+ stats.saved # int
226
+ stats.percent # float
227
+ str(stats) # "jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)"
228
+ ```
229
+
230
+ ### `count_tokens(data, *, model, backend) -> int`
231
+
232
+ Counts LLM tokens in the jtoken representation. Accepts a dict or an
233
+ already-encoded jtoken string.
234
+
235
+ **`backend` options:**
236
+
237
+ | value | behaviour |
238
+ |---|---|
239
+ | `"auto"` (default) | tiktoken if installed, otherwise estimates |
240
+ | `"tiktoken"` | requires tiktoken; raises `TokenCountError` if absent |
241
+ | `"estimate"` | ~4 chars/token heuristic, no extra dependency |
242
+
243
+ ## Exceptions
244
+
245
+ ```
246
+ JPackError
247
+ ├── JPackEncodeError
248
+ ├── JPackDecodeError
249
+ └── TokenCountError
250
+ ```
251
+
252
+ ## Development
253
+
254
+ ```bash
255
+ git clone https://github.com/hermannsamimi/jtoken
256
+ cd jtoken
257
+ pip install -e ".[dev]"
258
+ pytest
259
+ pytest --cov=jtoken --cov-report=term-missing
260
+ ```
261
+
262
+ ## License
263
+
264
+ MIT — © 2026 Hermann Samimi
jtoken-0.1.0/README.md ADDED
@@ -0,0 +1,232 @@
1
+ # jtoken
2
+
3
+ Compress JSON for LLM prompts — same data, fewer tokens.
4
+
5
+ ## What it does
6
+
7
+ jtoken strips the syntactic noise from JSON (`"`, `{}`, `,`) and collapses all
8
+ `null`, `true`, and `false` fields each into a single summary line. Nested dicts
9
+ are flattened with dot notation so the same collapse applies at every level.
10
+ The result is a compact format an LLM reads just as well as JSON.
11
+
12
+ **JSON (30 tokens):**
13
+ ```json
14
+ {"name": "Alice", "age": 30, "active": true, "verified": false, "ref": null}
15
+ ```
16
+
17
+ **jtoken (21 tokens):**
18
+ ```
19
+ name: Alice
20
+ age: 30
21
+ trues: active
22
+ falses: verified
23
+ nulls: ref
24
+ ```
25
+
26
+ The round-trip is lossless: `decode(encode(data)) == data` for all supported types.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ # Core — no external dependencies
32
+ pip install jtoken
33
+
34
+ # With accurate LLM token counting
35
+ pip install jtoken[tiktoken]
36
+ ```
37
+
38
+ ## Quick start
39
+
40
+ ```python
41
+ import jtoken
42
+
43
+ data = {
44
+ "user": "alice",
45
+ "age": 30,
46
+ "premium": True,
47
+ "verified": True,
48
+ "is_remote": False,
49
+ "trial": False,
50
+ "score": 9.5,
51
+ "referral": None,
52
+ "last_login": None,
53
+ }
54
+
55
+ text = jtoken.encode(data)
56
+ # user: alice
57
+ # age: 30
58
+ # score: 9.5
59
+ # trues: premium,verified
60
+ # falses: is_remote,trial
61
+ # nulls: referral,last_login
62
+
63
+ original = jtoken.decode(text)
64
+ assert original == data
65
+ ```
66
+
67
+ `dumps` / `loads` are available as `json`-style aliases.
68
+
69
+ ## CLI
70
+
71
+ ```bash
72
+ echo '{"name": "Alice", "active": true}' | jtoken encode
73
+ echo 'name: Alice\ntrues: active' | jtoken decode
74
+ echo '{"name": "Alice", "active": true}' | jtoken stats
75
+ echo '{"name": "Alice", "active": true}' | jtoken count
76
+ ```
77
+
78
+ Use `-f/--file` to read from a file instead of stdin. `stats` and `count` accept
79
+ `--model` and `--backend` (`auto`, `tiktoken`, `estimate`).
80
+
81
+ ## Nested documents
82
+
83
+ Nested dicts are flattened with dot notation. Booleans and nulls at any depth
84
+ are collapsed into the same summary lines.
85
+
86
+ ```python
87
+ data = {
88
+ "title": "Engineer",
89
+ "metadata": {
90
+ "verified": True,
91
+ "sponsored": False,
92
+ "score": None,
93
+ "source": {
94
+ "crawled": True,
95
+ "enriched": None,
96
+ },
97
+ },
98
+ }
99
+
100
+ print(jtoken.encode(data))
101
+ # title: Engineer
102
+ # trues: metadata.verified,metadata.source.crawled
103
+ # falses: metadata.sponsored
104
+ # nulls: metadata.score,metadata.source.enriched
105
+ ```
106
+
107
+ Decode reconstructs the full nested structure:
108
+
109
+ ```python
110
+ assert jtoken.decode(jtoken.encode(data)) == data # ✓
111
+ ```
112
+
113
+ **Limitation:** keys cannot contain `.` (reserved for nesting) or `": "`.
114
+ Arrays are not supported.
115
+
116
+ ## Token savings
117
+
118
+ ```python
119
+ import jtoken
120
+
121
+ stats = jtoken.token_savings(data)
122
+ print(stats)
123
+ # jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)
124
+
125
+ n = jtoken.count_tokens(data) # count jtoken tokens only
126
+ ```
127
+
128
+ Savings are compared against `json.dumps(data)` — the standard representation
129
+ you'd paste into a prompt. Savings are highest when a document has many `null`
130
+ or boolean fields.
131
+
132
+ ```python
133
+ # Specify model or encoding
134
+ stats = jtoken.token_savings(data, model="gpt-4o")
135
+ stats = jtoken.token_savings(data, model="o200k_base")
136
+
137
+ # No tiktoken dependency
138
+ stats = jtoken.token_savings(data, backend="estimate")
139
+ ```
140
+
141
+ ## API
142
+
143
+ ### `encode(data: dict) -> str`
144
+
145
+ Compresses a dict into jtoken. Supported value types: `str`, `int`, `float`,
146
+ `bool`, `None`, nested `dict`.
147
+
148
+ **Summary lines (always at the end):**
149
+
150
+ | line | contains |
151
+ |---|---|
152
+ | `trues: k1,k2,...` | all keys whose value is `True` |
153
+ | `falses: k1,k2,...` | all keys whose value is `False` |
154
+ | `nulls: k1,k2,...` | all keys whose value is `None` |
155
+
156
+ String values that would decode ambiguously (look like a number or boolean)
157
+ keep their quotes:
158
+
159
+ ```python
160
+ jtoken.encode({"zip": "90210"}) # → 'zip: "90210"' (string, quotes kept)
161
+ jtoken.encode({"zip": 90210}) # → 'zip: 90210' (int, no quotes)
162
+ jtoken.encode({"ok": "true"}) # → 'ok: "true"' (string, quotes kept)
163
+ jtoken.encode({"ok": True}) # → 'trues: ok' (bool, collapsed)
164
+ ```
165
+
166
+ Raises `JPackEncodeError` for unsupported types, dots or `": "` in keys, or
167
+ reserved key names (`nulls`, `trues`, `falses`).
168
+
169
+ ### `decode(text: str) -> dict`
170
+
171
+ Reconstructs the original dict, including nested structure from dot-notation
172
+ keys. Type inference for scalar values:
173
+
174
+ | value | decoded as |
175
+ |---|---|
176
+ | `"quoted"` | `str` (always) |
177
+ | key in `trues:` line | `True` |
178
+ | key in `falses:` line | `False` |
179
+ | key in `nulls:` line | `None` |
180
+ | integer literal, e.g. `42` | `int` |
181
+ | float literal, e.g. `3.14` | `float` |
182
+ | anything else | `str` |
183
+
184
+ Raises `JPackDecodeError` for invalid input.
185
+
186
+ ### `token_savings(data, *, model, backend) -> TokenSavings`
187
+
188
+ Compares jtoken vs `json.dumps` token usage.
189
+
190
+ ```python
191
+ stats.jtoken_tokens # int
192
+ stats.json_tokens # int
193
+ stats.saved # int
194
+ stats.percent # float
195
+ str(stats) # "jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)"
196
+ ```
197
+
198
+ ### `count_tokens(data, *, model, backend) -> int`
199
+
200
+ Counts LLM tokens in the jtoken representation. Accepts a dict or an
201
+ already-encoded jtoken string.
202
+
203
+ **`backend` options:**
204
+
205
+ | value | behaviour |
206
+ |---|---|
207
+ | `"auto"` (default) | tiktoken if installed, otherwise estimates |
208
+ | `"tiktoken"` | requires tiktoken; raises `TokenCountError` if absent |
209
+ | `"estimate"` | ~4 chars/token heuristic, no extra dependency |
210
+
211
+ ## Exceptions
212
+
213
+ ```
214
+ JPackError
215
+ ├── JPackEncodeError
216
+ ├── JPackDecodeError
217
+ └── TokenCountError
218
+ ```
219
+
220
+ ## Development
221
+
222
+ ```bash
223
+ git clone https://github.com/hermannsamimi/jtoken
224
+ cd jtoken
225
+ pip install -e ".[dev]"
226
+ pytest
227
+ pytest --cov=jtoken --cov-report=term-missing
228
+ ```
229
+
230
+ ## License
231
+
232
+ MIT — © 2026 Hermann Samimi
@@ -0,0 +1,28 @@
1
+ """jtoken — Compress JSON for LLM prompts with ~30% fewer tokens."""
2
+
3
+ from ._codec import decode, encode
4
+ from .exceptions import JPackDecodeError, JPackEncodeError, JPackError
5
+ from .tokens import TokenCountError, TokenSavings, count_tokens, token_savings
6
+
7
+ __version__ = "0.1.0"
8
+ __author__ = "Hermann Samimi"
9
+
10
+ # json-style aliases
11
+ dumps = encode
12
+ loads = decode
13
+
14
+ __all__ = [
15
+ "encode",
16
+ "decode",
17
+ "dumps",
18
+ "loads",
19
+ "count_tokens",
20
+ "token_savings",
21
+ "TokenSavings",
22
+ "JPackError",
23
+ "JPackEncodeError",
24
+ "JPackDecodeError",
25
+ "TokenCountError",
26
+ "__version__",
27
+ "__author__",
28
+ ]
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()