jtoken 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jtoken/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ """jtoken — Compress JSON for LLM prompts with ~30% fewer tokens."""
2
+
3
+ from ._codec import decode, encode
4
+ from .exceptions import JPackDecodeError, JPackEncodeError, JPackError
5
+ from .tokens import TokenCountError, TokenSavings, count_tokens, token_savings
6
+
7
+ __version__ = "0.1.0"
8
+ __author__ = "Hermann Samimi"
9
+
10
+ # json-style aliases
11
+ dumps = encode
12
+ loads = decode
13
+
14
+ __all__ = [
15
+ "encode",
16
+ "decode",
17
+ "dumps",
18
+ "loads",
19
+ "count_tokens",
20
+ "token_savings",
21
+ "TokenSavings",
22
+ "JPackError",
23
+ "JPackEncodeError",
24
+ "JPackDecodeError",
25
+ "TokenCountError",
26
+ "__version__",
27
+ "__author__",
28
+ ]
jtoken/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
jtoken/_codec.py ADDED
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .exceptions import JPackDecodeError, JPackEncodeError
6
+
7
+ _SEP = ": "
8
+ _NULLS_KEY = "nulls"
9
+ _TRUES_KEY = "trues"
10
+ _FALSES_KEY = "falses"
11
+ _RESERVED = {_NULLS_KEY, _TRUES_KEY, _FALSES_KEY}
12
+
13
+
14
+ def encode(data: dict[str, Any]) -> str:
15
+ """Compress a JSON-like dict into jtoken format.
16
+
17
+ Strips JSON syntax and collapses all null, true, and false fields each into
18
+ a single summary line. Nested dicts are flattened with dot notation.
19
+ The result is lossless: decode(encode(data)) == data.
20
+ """
21
+ if not isinstance(data, dict):
22
+ raise JPackEncodeError(f"Expected dict, got {type(data).__name__}")
23
+
24
+ flat = _flatten(data)
25
+
26
+ null_keys: list[str] = []
27
+ true_keys: list[str] = []
28
+ false_keys: list[str] = []
29
+ lines: list[str] = []
30
+
31
+ for k, v in flat.items():
32
+ if v is None:
33
+ null_keys.append(k)
34
+ elif v is True:
35
+ true_keys.append(k)
36
+ elif v is False:
37
+ false_keys.append(k)
38
+ elif isinstance(v, (int, float)):
39
+ lines.append(f"{k}{_SEP}{v}")
40
+ elif isinstance(v, str):
41
+ val = f'"{v}"' if _is_ambiguous(v) else v
42
+ lines.append(f"{k}{_SEP}{val}")
43
+ else:
44
+ raise JPackEncodeError(
45
+ f"Unsupported value type for key {k!r}: {type(v).__name__}. "
46
+ "Supported types: str, int, float, bool, None."
47
+ )
48
+
49
+ if true_keys:
50
+ lines.append(f"{_TRUES_KEY}{_SEP}{','.join(true_keys)}")
51
+ if false_keys:
52
+ lines.append(f"{_FALSES_KEY}{_SEP}{','.join(false_keys)}")
53
+ if null_keys:
54
+ lines.append(f"{_NULLS_KEY}{_SEP}{','.join(null_keys)}")
55
+
56
+ return "\n".join(lines)
57
+
58
+
59
+ def decode(text: str) -> dict[str, Any]:
60
+ """Reconstruct a dict from a jtoken-compressed string."""
61
+ if not isinstance(text, str):
62
+ raise JPackDecodeError(f"Expected str, got {type(text).__name__}")
63
+
64
+ flat: dict[str, Any] = {}
65
+
66
+ for lineno, line in enumerate(text.strip().splitlines(), 1):
67
+ if not line.strip():
68
+ continue
69
+ if _SEP not in line:
70
+ raise JPackDecodeError(
71
+ f"Invalid format on line {lineno}: missing {_SEP!r} separator"
72
+ )
73
+
74
+ key, _, value = line.partition(_SEP)
75
+
76
+ if key == _NULLS_KEY:
77
+ for k in value.split(","):
78
+ flat[k.strip()] = None
79
+ elif key == _TRUES_KEY:
80
+ for k in value.split(","):
81
+ flat[k.strip()] = True
82
+ elif key == _FALSES_KEY:
83
+ for k in value.split(","):
84
+ flat[k.strip()] = False
85
+ elif _is_quoted(value):
86
+ flat[key] = value[1:-1]
87
+ elif value.lower() == "true":
88
+ flat[key] = True # backward-compat with inline key: true
89
+ elif value.lower() == "false":
90
+ flat[key] = False # backward-compat with inline key: false
91
+ else:
92
+ try:
93
+ flat[key] = int(value)
94
+ except ValueError:
95
+ try:
96
+ flat[key] = float(value)
97
+ except ValueError:
98
+ flat[key] = value
99
+
100
+ return _unflatten(flat)
101
+
102
+
103
+ # ── helpers ───────────────────────────────────────────────────────────────────
104
+
105
+ def _flatten(data: dict[str, Any], prefix: str = "") -> dict[str, Any]:
106
+ """Recursively flatten a nested dict using dot-notation keys."""
107
+ result: dict[str, Any] = {}
108
+ for k, v in data.items():
109
+ k_str = str(k)
110
+ if "." in k_str:
111
+ raise JPackEncodeError(
112
+ f"Key {k_str!r} contains '.' which is reserved for nested paths"
113
+ )
114
+ if _SEP in k_str:
115
+ raise JPackEncodeError(f"Key cannot contain {_SEP!r}: {k_str!r}")
116
+ if not prefix and k_str in _RESERVED:
117
+ raise JPackEncodeError(f"Key '{k_str}' is reserved")
118
+ key = f"{prefix}.{k_str}" if prefix else k_str
119
+ if isinstance(v, dict):
120
+ result.update(_flatten(v, key))
121
+ else:
122
+ result[key] = v
123
+ return result
124
+
125
+
126
+ def _unflatten(flat: dict[str, Any]) -> dict[str, Any]:
127
+ """Reconstruct a nested dict from dot-notation keys."""
128
+ result: dict[str, Any] = {}
129
+ for dotted_key, value in flat.items():
130
+ parts = dotted_key.split(".")
131
+ d = result
132
+ for part in parts[:-1]:
133
+ if part not in d or not isinstance(d[part], dict):
134
+ d[part] = {}
135
+ d = d[part]
136
+ d[parts[-1]] = value
137
+ return result
138
+
139
+
140
+ def _is_ambiguous(v: str) -> bool:
141
+ """True if this string would be mistyped as a number or bool on decode."""
142
+ if not v:
143
+ return True
144
+ if v.lower() in ("true", "false"):
145
+ return True
146
+ try:
147
+ int(v)
148
+ return True
149
+ except ValueError:
150
+ pass
151
+ try:
152
+ float(v)
153
+ return True
154
+ except ValueError:
155
+ pass
156
+ return False
157
+
158
+
159
+ def _is_quoted(v: str) -> bool:
160
+ return len(v) >= 2 and v[0] == '"' and v[-1] == '"'
jtoken/cli.py ADDED
@@ -0,0 +1,141 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from typing import Any
7
+
8
+ from . import decode, encode, count_tokens, token_savings
9
+ from .exceptions import JPackDecodeError, JPackEncodeError, JPackError
10
+ from .tokens import TokenCountError
11
+
12
+
13
+ def _read_input(path: str | None) -> str:
14
+ if path:
15
+ with open(path, encoding="utf-8") as handle:
16
+ return handle.read()
17
+ return sys.stdin.read()
18
+
19
+
20
+ def _load_json_object(text: str) -> dict[str, Any]:
21
+ try:
22
+ data = json.loads(text)
23
+ except json.JSONDecodeError as exc:
24
+ raise SystemExit(f"Invalid JSON input: {exc}") from exc
25
+ if not isinstance(data, dict):
26
+ raise SystemExit("JSON input must be an object")
27
+ return data
28
+
29
+
30
+ def _load_json_or_jtoken(text: str) -> dict[str, Any] | str:
31
+ stripped = text.strip()
32
+ if not stripped:
33
+ raise SystemExit("Input is empty")
34
+ if stripped[0] in "{[":
35
+ return _load_json_object(text)
36
+ return stripped
37
+
38
+
39
+ def _handle_errors(func):
40
+ def wrapper(*args, **kwargs):
41
+ try:
42
+ return func(*args, **kwargs)
43
+ except (JPackError, TokenCountError) as exc:
44
+ print(exc, file=sys.stderr)
45
+ raise SystemExit(1) from exc
46
+
47
+ return wrapper
48
+
49
+
50
+ @_handle_errors
51
+ def _cmd_encode(args: argparse.Namespace) -> None:
52
+ data = _load_json_object(_read_input(args.file))
53
+ sys.stdout.write(encode(data))
54
+
55
+
56
+ @_handle_errors
57
+ def _cmd_decode(args: argparse.Namespace) -> None:
58
+ text = _read_input(args.file)
59
+ data = decode(text)
60
+ json.dump(data, sys.stdout, indent=2, sort_keys=True)
61
+ sys.stdout.write("\n")
62
+
63
+
64
+ @_handle_errors
65
+ def _cmd_stats(args: argparse.Namespace) -> None:
66
+ payload = _load_json_or_jtoken(_read_input(args.file))
67
+ stats = token_savings(payload, model=args.model, backend=args.backend)
68
+ print(stats)
69
+
70
+
71
+ @_handle_errors
72
+ def _cmd_count(args: argparse.Namespace) -> None:
73
+ payload = _load_json_or_jtoken(_read_input(args.file))
74
+ print(count_tokens(payload, model=args.model, backend=args.backend))
75
+
76
+
77
+ def _add_token_flags(parser: argparse.ArgumentParser) -> None:
78
+ parser.add_argument(
79
+ "--model",
80
+ default="cl100k_base",
81
+ help="tiktoken model or encoding name (default: cl100k_base)",
82
+ )
83
+ parser.add_argument(
84
+ "--backend",
85
+ choices=("auto", "tiktoken", "estimate"),
86
+ default="auto",
87
+ help="token counting backend (default: auto)",
88
+ )
89
+
90
+
91
+ def _add_input_file(parser: argparse.ArgumentParser) -> None:
92
+ parser.add_argument(
93
+ "-f",
94
+ "--file",
95
+ help="read input from a file instead of stdin",
96
+ )
97
+
98
+
99
+ def _build_parser() -> argparse.ArgumentParser:
100
+ parser = argparse.ArgumentParser(prog="jtoken", description="jtoken CLI")
101
+ input_parent = argparse.ArgumentParser(add_help=False)
102
+ _add_input_file(input_parent)
103
+ subparsers = parser.add_subparsers(dest="command", required=True)
104
+
105
+ subparsers.add_parser(
106
+ "encode",
107
+ parents=[input_parent],
108
+ help="encode JSON to jtoken",
109
+ ).set_defaults(func=_cmd_encode)
110
+ subparsers.add_parser(
111
+ "decode",
112
+ parents=[input_parent],
113
+ help="decode jtoken to JSON",
114
+ ).set_defaults(func=_cmd_decode)
115
+ stats_parser = subparsers.add_parser(
116
+ "stats",
117
+ parents=[input_parent],
118
+ help="compare jtoken vs JSON token usage",
119
+ )
120
+ _add_token_flags(stats_parser)
121
+ stats_parser.set_defaults(func=_cmd_stats)
122
+
123
+ count_parser = subparsers.add_parser(
124
+ "count",
125
+ parents=[input_parent],
126
+ help="count jtoken tokens",
127
+ )
128
+ _add_token_flags(count_parser)
129
+ count_parser.set_defaults(func=_cmd_count)
130
+
131
+ return parser
132
+
133
+
134
+ def main(argv: list[str] | None = None) -> None:
135
+ parser = _build_parser()
136
+ args = parser.parse_args(argv)
137
+ args.func(args)
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
jtoken/exceptions.py ADDED
@@ -0,0 +1,10 @@
1
+ class JPackError(Exception):
2
+ """Base exception for all jtoken errors."""
3
+
4
+
5
+ class JPackEncodeError(JPackError):
6
+ """Raised when encoding a dict fails."""
7
+
8
+
9
+ class JPackDecodeError(JPackError):
10
+ """Raised when decoding a jtoken string fails."""
jtoken/tokens.py ADDED
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from typing import Any, Union
6
+
7
+ from ._codec import decode, encode
8
+ from .exceptions import JPackError
9
+
10
+ try:
11
+ import tiktoken as _tiktoken
12
+
13
+ _TIKTOKEN_AVAILABLE = True
14
+ except ImportError:
15
+ _TIKTOKEN_AVAILABLE = False
16
+
17
+
18
+ class TokenCountError(JPackError):
19
+ """Raised when token counting cannot be completed."""
20
+
21
+
22
+ @dataclass
23
+ class TokenSavings:
24
+ """Token comparison between jtoken and JSON representations."""
25
+
26
+ jtoken_tokens: int
27
+ json_tokens: int
28
+
29
+ @property
30
+ def saved(self) -> int:
31
+ return self.json_tokens - self.jtoken_tokens
32
+
33
+ @property
34
+ def percent(self) -> float:
35
+ if self.json_tokens == 0:
36
+ return 0.0
37
+ return self.saved / self.json_tokens * 100
38
+
39
+ def __str__(self) -> str:
40
+ return (
41
+ f"jtoken: {self.jtoken_tokens} tokens | "
42
+ f"json: {self.json_tokens} tokens | "
43
+ f"saved: {self.saved} ({self.percent:.1f}%)"
44
+ )
45
+
46
+
47
+ def count_tokens(
48
+ data: Union[dict[str, Any], str],
49
+ *,
50
+ model: str = "cl100k_base",
51
+ backend: str = "auto",
52
+ ) -> int:
53
+ """Count the LLM tokens in jtoken-encoded data.
54
+
55
+ Args:
56
+ data: A dict (auto-encoded to jtoken) or an already-encoded jtoken string.
57
+ model: tiktoken encoding or model name (default: cl100k_base, used by
58
+ GPT-4 and a close approximation for Claude).
59
+ Accepts encoding names ("cl100k_base", "o200k_base") or
60
+ OpenAI model names ("gpt-4", "gpt-4o").
61
+ backend: "auto" — tiktoken if installed, otherwise estimates.
62
+ "tiktoken" — tiktoken required; raises TokenCountError if absent.
63
+ "estimate" — always uses the ~4 chars/token heuristic.
64
+
65
+ Returns:
66
+ Integer token count for the jtoken representation.
67
+ """
68
+ text = encode(data) if isinstance(data, dict) else data
69
+ return _count(text, model=model, backend=backend)
70
+
71
+
72
+ def token_savings(
73
+ data: Union[dict[str, Any], str],
74
+ *,
75
+ model: str = "cl100k_base",
76
+ backend: str = "auto",
77
+ ) -> TokenSavings:
78
+ """Compare token usage between jtoken and JSON for the same data.
79
+
80
+ Args:
81
+ data: A dict or an already-encoded jtoken string.
82
+ model: tiktoken encoding or model name (see count_tokens).
83
+ backend: counting backend (see count_tokens).
84
+
85
+ Returns:
86
+ TokenSavings with jtoken_tokens, json_tokens, saved, and percent.
87
+
88
+ Example::
89
+
90
+ stats = jtoken.token_savings({"name": "Alice", "age": 30, "active": True})
91
+ print(stats)
92
+ # jtoken: 8 tokens | json: 12 tokens | saved: 4 (33.3%)
93
+ """
94
+ if isinstance(data, str):
95
+ source_dict = decode(data)
96
+ jtoken_text = data
97
+ else:
98
+ source_dict = data
99
+ jtoken_text = encode(data)
100
+
101
+ json_text = json.dumps(source_dict)
102
+
103
+ jtoken_n = _count(jtoken_text, model=model, backend=backend)
104
+ json_n = _count(json_text, model=model, backend=backend)
105
+
106
+ return TokenSavings(jtoken_tokens=jtoken_n, json_tokens=json_n)
107
+
108
+
109
+ def _count(text: str, *, model: str, backend: str) -> int:
110
+ if backend == "estimate":
111
+ return _estimate(text)
112
+
113
+ if backend == "tiktoken" and not _TIKTOKEN_AVAILABLE:
114
+ raise TokenCountError(
115
+ "tiktoken is not installed. Run: pip install jtoken[tiktoken]"
116
+ )
117
+
118
+ if _TIKTOKEN_AVAILABLE and backend in ("auto", "tiktoken"):
119
+ try:
120
+ enc = _tiktoken.encoding_for_model(model)
121
+ except KeyError:
122
+ try:
123
+ enc = _tiktoken.get_encoding(model)
124
+ except Exception as exc:
125
+ if backend == "tiktoken":
126
+ raise TokenCountError(
127
+ f"Unknown tiktoken model/encoding: {model!r}"
128
+ ) from exc
129
+ return _estimate(text)
130
+ return len(enc.encode(text))
131
+
132
+ return _estimate(text)
133
+
134
+
135
+ def _estimate(text: str) -> int:
136
+ """~4 characters per token heuristic."""
137
+ return max(1, (len(text) + 3) // 4) if text else 0
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: jtoken
3
+ Version: 0.1.0
4
+ Summary: A lightweight, human-readable key-value serialization format
5
+ Project-URL: Homepage, https://github.com/hermannsamimi/jtoken
6
+ Project-URL: Repository, https://github.com/hermannsamimi/jtoken
7
+ Project-URL: Issues, https://github.com/hermannsamimi/jtoken/issues
8
+ Author-email: Hermann Samimi <hermannsamimi@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: encoding,format,key-value,llm,serialization,text,tokens
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: Text Processing :: General
23
+ Requires-Python: >=3.8
24
+ Provides-Extra: dev
25
+ Requires-Dist: build>=1.0; extra == 'dev'
26
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
27
+ Requires-Dist: pytest>=7.0; extra == 'dev'
28
+ Requires-Dist: tiktoken>=0.5; extra == 'dev'
29
+ Provides-Extra: tiktoken
30
+ Requires-Dist: tiktoken>=0.5; extra == 'tiktoken'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # jtoken
34
+
35
+ Compress JSON for LLM prompts — same data, fewer tokens.
36
+
37
+ ## What it does
38
+
39
+ jtoken strips the syntactic noise from JSON (`"`, `{}`, `,`) and collapses all
40
+ `null`, `true`, and `false` fields each into a single summary line. Nested dicts
41
+ are flattened with dot notation so the same collapse applies at every level.
42
+ The result is a compact format an LLM reads just as well as JSON.
43
+
44
+ **JSON (30 tokens):**
45
+ ```json
46
+ {"name": "Alice", "age": 30, "active": true, "verified": false, "ref": null}
47
+ ```
48
+
49
+ **jtoken (21 tokens):**
50
+ ```
51
+ name: Alice
52
+ age: 30
53
+ trues: active
54
+ falses: verified
55
+ nulls: ref
56
+ ```
57
+
58
+ The round-trip is lossless: `decode(encode(data)) == data` for all supported types.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ # Core — no external dependencies
64
+ pip install jtoken
65
+
66
+ # With accurate LLM token counting
67
+ pip install jtoken[tiktoken]
68
+ ```
69
+
70
+ ## Quick start
71
+
72
+ ```python
73
+ import jtoken
74
+
75
+ data = {
76
+ "user": "alice",
77
+ "age": 30,
78
+ "premium": True,
79
+ "verified": True,
80
+ "is_remote": False,
81
+ "trial": False,
82
+ "score": 9.5,
83
+ "referral": None,
84
+ "last_login": None,
85
+ }
86
+
87
+ text = jtoken.encode(data)
88
+ # user: alice
89
+ # age: 30
90
+ # score: 9.5
91
+ # trues: premium,verified
92
+ # falses: is_remote,trial
93
+ # nulls: referral,last_login
94
+
95
+ original = jtoken.decode(text)
96
+ assert original == data
97
+ ```
98
+
99
+ `dumps` / `loads` are available as `json`-style aliases.
100
+
101
+ ## CLI
102
+
103
+ ```bash
104
+ echo '{"name": "Alice", "active": true}' | jtoken encode
105
+ echo 'name: Alice\ntrues: active' | jtoken decode
106
+ echo '{"name": "Alice", "active": true}' | jtoken stats
107
+ echo '{"name": "Alice", "active": true}' | jtoken count
108
+ ```
109
+
110
+ Use `-f/--file` to read from a file instead of stdin. `stats` and `count` accept
111
+ `--model` and `--backend` (`auto`, `tiktoken`, `estimate`).
112
+
113
+ ## Nested documents
114
+
115
+ Nested dicts are flattened with dot notation. Booleans and nulls at any depth
116
+ are collapsed into the same summary lines.
117
+
118
+ ```python
119
+ data = {
120
+ "title": "Engineer",
121
+ "metadata": {
122
+ "verified": True,
123
+ "sponsored": False,
124
+ "score": None,
125
+ "source": {
126
+ "crawled": True,
127
+ "enriched": None,
128
+ },
129
+ },
130
+ }
131
+
132
+ print(jtoken.encode(data))
133
+ # title: Engineer
134
+ # trues: metadata.verified,metadata.source.crawled
135
+ # falses: metadata.sponsored
136
+ # nulls: metadata.score,metadata.source.enriched
137
+ ```
138
+
139
+ Decode reconstructs the full nested structure:
140
+
141
+ ```python
142
+ assert jtoken.decode(jtoken.encode(data)) == data # ✓
143
+ ```
144
+
145
+ **Limitation:** keys cannot contain `.` (reserved for nesting) or `": "`.
146
+ Arrays are not supported.
147
+
148
+ ## Token savings
149
+
150
+ ```python
151
+ import jtoken
152
+
153
+ stats = jtoken.token_savings(data)
154
+ print(stats)
155
+ # jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)
156
+
157
+ n = jtoken.count_tokens(data) # count jtoken tokens only
158
+ ```
159
+
160
+ Savings are compared against `json.dumps(data)` — the standard representation
161
+ you'd paste into a prompt. Savings are highest when a document has many `null`
162
+ or boolean fields.
163
+
164
+ ```python
165
+ # Specify model or encoding
166
+ stats = jtoken.token_savings(data, model="gpt-4o")
167
+ stats = jtoken.token_savings(data, model="o200k_base")
168
+
169
+ # No tiktoken dependency
170
+ stats = jtoken.token_savings(data, backend="estimate")
171
+ ```
172
+
173
+ ## API
174
+
175
+ ### `encode(data: dict) -> str`
176
+
177
+ Compresses a dict into jtoken. Supported value types: `str`, `int`, `float`,
178
+ `bool`, `None`, nested `dict`.
179
+
180
+ **Summary lines (always at the end):**
181
+
182
+ | line | contains |
183
+ |---|---|
184
+ | `trues: k1,k2,...` | all keys whose value is `True` |
185
+ | `falses: k1,k2,...` | all keys whose value is `False` |
186
+ | `nulls: k1,k2,...` | all keys whose value is `None` |
187
+
188
+ String values that would decode ambiguously (look like a number or boolean)
189
+ keep their quotes:
190
+
191
+ ```python
192
+ jtoken.encode({"zip": "90210"}) # → 'zip: "90210"' (string, quotes kept)
193
+ jtoken.encode({"zip": 90210}) # → 'zip: 90210' (int, no quotes)
194
+ jtoken.encode({"ok": "true"}) # → 'ok: "true"' (string, quotes kept)
195
+ jtoken.encode({"ok": True}) # → 'trues: ok' (bool, collapsed)
196
+ ```
197
+
198
+ Raises `JPackEncodeError` for unsupported types, dots or `": "` in keys, or
199
+ reserved key names (`nulls`, `trues`, `falses`).
200
+
201
+ ### `decode(text: str) -> dict`
202
+
203
+ Reconstructs the original dict, including nested structure from dot-notation
204
+ keys. Type inference for scalar values:
205
+
206
+ | value | decoded as |
207
+ |---|---|
208
+ | `"quoted"` | `str` (always) |
209
+ | key in `trues:` line | `True` |
210
+ | key in `falses:` line | `False` |
211
+ | key in `nulls:` line | `None` |
212
+ | integer literal, e.g. `42` | `int` |
213
+ | float literal, e.g. `3.14` | `float` |
214
+ | anything else | `str` |
215
+
216
+ Raises `JPackDecodeError` for invalid input.
217
+
218
+ ### `token_savings(data, *, model, backend) -> TokenSavings`
219
+
220
+ Compares jtoken vs `json.dumps` token usage.
221
+
222
+ ```python
223
+ stats.jtoken_tokens # int
224
+ stats.json_tokens # int
225
+ stats.saved # int
226
+ stats.percent # float
227
+ str(stats) # "jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)"
228
+ ```
229
+
230
+ ### `count_tokens(data, *, model, backend) -> int`
231
+
232
+ Counts LLM tokens in the jtoken representation. Accepts a dict or an
233
+ already-encoded jtoken string.
234
+
235
+ **`backend` options:**
236
+
237
+ | value | behaviour |
238
+ |---|---|
239
+ | `"auto"` (default) | tiktoken if installed, otherwise estimates |
240
+ | `"tiktoken"` | requires tiktoken; raises `TokenCountError` if absent |
241
+ | `"estimate"` | ~4 chars/token heuristic, no extra dependency |
242
+
243
+ ## Exceptions
244
+
245
+ ```
246
+ JPackError
247
+ ├── JPackEncodeError
248
+ ├── JPackDecodeError
249
+ └── TokenCountError
250
+ ```
251
+
252
+ ## Development
253
+
254
+ ```bash
255
+ git clone https://github.com/hermannsamimi/jtoken
256
+ cd jtoken
257
+ pip install -e ".[dev]"
258
+ pytest
259
+ pytest --cov=jtoken --cov-report=term-missing
260
+ ```
261
+
262
+ ## License
263
+
264
+ MIT — © 2026 Hermann Samimi
@@ -0,0 +1,11 @@
1
+ jtoken/__init__.py,sha256=KeGDSIUyUn51_cjdOLyQmpy1o4RiQswrYE1-L-WaZUA,618
2
+ jtoken/__main__.py,sha256=MSmt_5Xg84uHqzTN38JwgseJK8rsJn_11A8WD99VtEo,61
3
+ jtoken/_codec.py,sha256=2j9i-pjjEUgEdu_VkddUMZBX_0mnrizhJJU5bzjo09w,5107
4
+ jtoken/cli.py,sha256=1fMEvO_a5iM-Fp9IxF8PHhHAKj_95zM_f1UfPVMcIDI,3859
5
+ jtoken/exceptions.py,sha256=-B11CECC_BEHVMm39Ub4CPBfF_usR-Bs4EDq9w6MHWw,252
6
+ jtoken/tokens.py,sha256=HGr4R1jeGSm9D_3UFjRys_EE1axwCbHKlEkwHOcRERE,4077
7
+ jtoken-0.1.0.dist-info/METADATA,sha256=hHxFwTqdLEl6EtYrnkrwMmU_FNn2Oj6rqY2-5qouQZ4,6965
8
+ jtoken-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ jtoken-0.1.0.dist-info/entry_points.txt,sha256=hUIc42NM-4aqDuGlYffU3cmJaqs7GG_UzggmTneDwfY,43
10
+ jtoken-0.1.0.dist-info/licenses/LICENSE,sha256=06gleizuefVN7pyIwD4F1-bHnBkKPkQTuYABA6fHWH4,1071
11
+ jtoken-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ jtoken = jtoken.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hermann Samimi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.