PyPI - jtoken - Versions diffs - 0.1.0__tar.gz - Mend

jtoken 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

jtoken-0.1.0/.gitignore +40 -0
jtoken-0.1.0/LICENSE +21 -0
jtoken-0.1.0/PKG-INFO +264 -0
jtoken-0.1.0/README.md +232 -0
jtoken-0.1.0/jtoken/__init__.py +28 -0
jtoken-0.1.0/jtoken/__main__.py +4 -0
jtoken-0.1.0/jtoken/_codec.py +160 -0
jtoken-0.1.0/jtoken/cli.py +141 -0
jtoken-0.1.0/jtoken/exceptions.py +10 -0
jtoken-0.1.0/jtoken/tokens.py +137 -0
jtoken-0.1.0/pyproject.toml +53 -0
jtoken-0.1.0/tests/__init__.py +0 -0
jtoken-0.1.0/tests/test_cli.py +122 -0
jtoken-0.1.0/tests/test_codec.py +285 -0
jtoken-0.1.0/tests/test_tokens.py +84 -0

jtoken-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,40 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.so
+*.egg
+*.egg-info/
+dist/
+build/
+.eggs/
+*.whl
+.claude/
+pg.py
+.env
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+# Type checkers
+.mypy_cache/
+.pyright/
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+# macOS
+.DS_Store

jtoken-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Hermann Samimi
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

jtoken-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,264 @@
+Metadata-Version: 2.4
+Name: jtoken
+Version: 0.1.0
+Summary: A lightweight, human-readable key-value serialization format
+Project-URL: Homepage, https://github.com/hermannsamimi/jtoken
+Project-URL: Repository, https://github.com/hermannsamimi/jtoken
+Project-URL: Issues, https://github.com/hermannsamimi/jtoken/issues
+Author-email: Hermann Samimi <hermannsamimi@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: encoding,format,key-value,llm,serialization,text,tokens
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing :: General
+Requires-Python: >=3.8
+Provides-Extra: dev
+Requires-Dist: build>=1.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: tiktoken>=0.5; extra == 'dev'
+Provides-Extra: tiktoken
+Requires-Dist: tiktoken>=0.5; extra == 'tiktoken'
+Description-Content-Type: text/markdown
+# jtoken
+Compress JSON for LLM prompts — same data, fewer tokens.
+## What it does
+jtoken strips the syntactic noise from JSON (`"`, `{}`, `,`) and collapses all
+`null`, `true`, and `false` fields each into a single summary line. Nested dicts
+are flattened with dot notation so the same collapse applies at every level.
+The result is a compact format an LLM reads just as well as JSON.
+**JSON (30 tokens):**
+```json
+{"name": "Alice", "age": 30, "active": true, "verified": false, "ref": null}
+```
+**jtoken (21 tokens):**
+```
+name: Alice
+age: 30
+trues: active
+falses: verified
+nulls: ref
+```
+The round-trip is lossless: `decode(encode(data)) == data` for all supported types.
+## Installation
+```bash
+# Core — no external dependencies
+pip install jtoken
+# With accurate LLM token counting
+pip install jtoken[tiktoken]
+```
+## Quick start
+```python
+import jtoken
+data = {
+    "user": "alice",
+    "age": 30,
+    "premium": True,
+    "verified": True,
+    "is_remote": False,
+    "trial": False,
+    "score": 9.5,
+    "referral": None,
+    "last_login": None,
+}
+text = jtoken.encode(data)
+# user: alice
+# age: 30
+# score: 9.5
+# trues: premium,verified
+# falses: is_remote,trial
+# nulls: referral,last_login
+original = jtoken.decode(text)
+assert original == data
+```
+`dumps` / `loads` are available as `json`-style aliases.
+## CLI
+```bash
+echo '{"name": "Alice", "active": true}' | jtoken encode
+echo 'name: Alice\ntrues: active' | jtoken decode
+echo '{"name": "Alice", "active": true}' | jtoken stats
+echo '{"name": "Alice", "active": true}' | jtoken count
+```
+Use `-f/--file` to read from a file instead of stdin. `stats` and `count` accept
+`--model` and `--backend` (`auto`, `tiktoken`, `estimate`).
+## Nested documents
+Nested dicts are flattened with dot notation. Booleans and nulls at any depth
+are collapsed into the same summary lines.
+```python
+data = {
+    "title": "Engineer",
+    "metadata": {
+        "verified": True,
+        "sponsored": False,
+        "score": None,
+        "source": {
+            "crawled": True,
+            "enriched": None,
+        },
+    },
+}
+print(jtoken.encode(data))
+# title: Engineer
+# trues: metadata.verified,metadata.source.crawled
+# falses: metadata.sponsored
+# nulls: metadata.score,metadata.source.enriched
+```
+Decode reconstructs the full nested structure:
+```python
+assert jtoken.decode(jtoken.encode(data)) == data  # ✓
+```
+**Limitation:** keys cannot contain `.` (reserved for nesting) or `": "`.
+Arrays are not supported.
+## Token savings
+```python
+import jtoken
+stats = jtoken.token_savings(data)
+print(stats)
+# jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)
+n = jtoken.count_tokens(data)  # count jtoken tokens only
+```
+Savings are compared against `json.dumps(data)` — the standard representation
+you'd paste into a prompt. Savings are highest when a document has many `null`
+or boolean fields.
+```python
+# Specify model or encoding
+stats = jtoken.token_savings(data, model="gpt-4o")
+stats = jtoken.token_savings(data, model="o200k_base")
+# No tiktoken dependency
+stats = jtoken.token_savings(data, backend="estimate")
+```
+## API
+### `encode(data: dict) -> str`
+Compresses a dict into jtoken. Supported value types: `str`, `int`, `float`,
+`bool`, `None`, nested `dict`.
+**Summary lines (always at the end):**
+| line | contains |
+|---|---|
+| `trues: k1,k2,...` | all keys whose value is `True` |
+| `falses: k1,k2,...` | all keys whose value is `False` |
+| `nulls: k1,k2,...` | all keys whose value is `None` |
+String values that would decode ambiguously (look like a number or boolean)
+keep their quotes:
+```python
+jtoken.encode({"zip": "90210"})  # → 'zip: "90210"'   (string, quotes kept)
+jtoken.encode({"zip":  90210})   # → 'zip: 90210'      (int, no quotes)
+jtoken.encode({"ok": "true"})    # → 'ok: "true"'      (string, quotes kept)
+jtoken.encode({"ok": True})      # → 'trues: ok'       (bool, collapsed)
+```
+Raises `JPackEncodeError` for unsupported types, dots or `": "` in keys, or
+reserved key names (`nulls`, `trues`, `falses`).
+### `decode(text: str) -> dict`
+Reconstructs the original dict, including nested structure from dot-notation
+keys. Type inference for scalar values:
+| value | decoded as |
+|---|---|
+| `"quoted"` | `str` (always) |
+| key in `trues:` line | `True` |
+| key in `falses:` line | `False` |
+| key in `nulls:` line | `None` |
+| integer literal, e.g. `42` | `int` |
+| float literal, e.g. `3.14` | `float` |
+| anything else | `str` |
+Raises `JPackDecodeError` for invalid input.
+### `token_savings(data, *, model, backend) -> TokenSavings`
+Compares jtoken vs `json.dumps` token usage.
+```python
+stats.jtoken_tokens   # int
+stats.json_tokens    # int
+stats.saved          # int
+stats.percent        # float
+str(stats)           # "jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)"
+```
+### `count_tokens(data, *, model, backend) -> int`
+Counts LLM tokens in the jtoken representation. Accepts a dict or an
+already-encoded jtoken string.
+**`backend` options:**
+| value | behaviour |
+|---|---|
+| `"auto"` (default) | tiktoken if installed, otherwise estimates |
+| `"tiktoken"` | requires tiktoken; raises `TokenCountError` if absent |
+| `"estimate"` | ~4 chars/token heuristic, no extra dependency |
+## Exceptions
+```
+JPackError
+├── JPackEncodeError
+├── JPackDecodeError
+└── TokenCountError
+```
+## Development
+```bash
+git clone https://github.com/hermannsamimi/jtoken
+cd jtoken
+pip install -e ".[dev]"
+pytest
+pytest --cov=jtoken --cov-report=term-missing
+```
+## License
+MIT — © 2026 Hermann Samimi

jtoken-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,232 @@
+# jtoken
+Compress JSON for LLM prompts — same data, fewer tokens.
+## What it does
+jtoken strips the syntactic noise from JSON (`"`, `{}`, `,`) and collapses all
+`null`, `true`, and `false` fields each into a single summary line. Nested dicts
+are flattened with dot notation so the same collapse applies at every level.
+The result is a compact format an LLM reads just as well as JSON.
+**JSON (30 tokens):**
+```json
+{"name": "Alice", "age": 30, "active": true, "verified": false, "ref": null}
+```
+**jtoken (21 tokens):**
+```
+name: Alice
+age: 30
+trues: active
+falses: verified
+nulls: ref
+```
+The round-trip is lossless: `decode(encode(data)) == data` for all supported types.
+## Installation
+```bash
+# Core — no external dependencies
+pip install jtoken
+# With accurate LLM token counting
+pip install jtoken[tiktoken]
+```
+## Quick start
+```python
+import jtoken
+data = {
+    "user": "alice",
+    "age": 30,
+    "premium": True,
+    "verified": True,
+    "is_remote": False,
+    "trial": False,
+    "score": 9.5,
+    "referral": None,
+    "last_login": None,
+}
+text = jtoken.encode(data)
+# user: alice
+# age: 30
+# score: 9.5
+# trues: premium,verified
+# falses: is_remote,trial
+# nulls: referral,last_login
+original = jtoken.decode(text)
+assert original == data
+```
+`dumps` / `loads` are available as `json`-style aliases.
+## CLI
+```bash
+echo '{"name": "Alice", "active": true}' | jtoken encode
+echo 'name: Alice\ntrues: active' | jtoken decode
+echo '{"name": "Alice", "active": true}' | jtoken stats
+echo '{"name": "Alice", "active": true}' | jtoken count
+```
+Use `-f/--file` to read from a file instead of stdin. `stats` and `count` accept
+`--model` and `--backend` (`auto`, `tiktoken`, `estimate`).
+## Nested documents
+Nested dicts are flattened with dot notation. Booleans and nulls at any depth
+are collapsed into the same summary lines.
+```python
+data = {
+    "title": "Engineer",
+    "metadata": {
+        "verified": True,
+        "sponsored": False,
+        "score": None,
+        "source": {
+            "crawled": True,
+            "enriched": None,
+        },
+    },
+}
+print(jtoken.encode(data))
+# title: Engineer
+# trues: metadata.verified,metadata.source.crawled
+# falses: metadata.sponsored
+# nulls: metadata.score,metadata.source.enriched
+```
+Decode reconstructs the full nested structure:
+```python
+assert jtoken.decode(jtoken.encode(data)) == data  # ✓
+```
+**Limitation:** keys cannot contain `.` (reserved for nesting) or `": "`.
+Arrays are not supported.
+## Token savings
+```python
+import jtoken
+stats = jtoken.token_savings(data)
+print(stats)
+# jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)
+n = jtoken.count_tokens(data)  # count jtoken tokens only
+```
+Savings are compared against `json.dumps(data)` — the standard representation
+you'd paste into a prompt. Savings are highest when a document has many `null`
+or boolean fields.
+```python
+# Specify model or encoding
+stats = jtoken.token_savings(data, model="gpt-4o")
+stats = jtoken.token_savings(data, model="o200k_base")
+# No tiktoken dependency
+stats = jtoken.token_savings(data, backend="estimate")
+```
+## API
+### `encode(data: dict) -> str`
+Compresses a dict into jtoken. Supported value types: `str`, `int`, `float`,
+`bool`, `None`, nested `dict`.
+**Summary lines (always at the end):**
+| line | contains |
+|---|---|
+| `trues: k1,k2,...` | all keys whose value is `True` |
+| `falses: k1,k2,...` | all keys whose value is `False` |
+| `nulls: k1,k2,...` | all keys whose value is `None` |
+String values that would decode ambiguously (look like a number or boolean)
+keep their quotes:
+```python
+jtoken.encode({"zip": "90210"})  # → 'zip: "90210"'   (string, quotes kept)
+jtoken.encode({"zip":  90210})   # → 'zip: 90210'      (int, no quotes)
+jtoken.encode({"ok": "true"})    # → 'ok: "true"'      (string, quotes kept)
+jtoken.encode({"ok": True})      # → 'trues: ok'       (bool, collapsed)
+```
+Raises `JPackEncodeError` for unsupported types, dots or `": "` in keys, or
+reserved key names (`nulls`, `trues`, `falses`).
+### `decode(text: str) -> dict`
+Reconstructs the original dict, including nested structure from dot-notation
+keys. Type inference for scalar values:
+| value | decoded as |
+|---|---|
+| `"quoted"` | `str` (always) |
+| key in `trues:` line | `True` |
+| key in `falses:` line | `False` |
+| key in `nulls:` line | `None` |
+| integer literal, e.g. `42` | `int` |
+| float literal, e.g. `3.14` | `float` |
+| anything else | `str` |
+Raises `JPackDecodeError` for invalid input.
+### `token_savings(data, *, model, backend) -> TokenSavings`
+Compares jtoken vs `json.dumps` token usage.
+```python
+stats.jtoken_tokens   # int
+stats.json_tokens    # int
+stats.saved          # int
+stats.percent        # float
+str(stats)           # "jtoken: 22 tokens | json: 36 tokens | saved: 14 (38.9%)"
+```
+### `count_tokens(data, *, model, backend) -> int`
+Counts LLM tokens in the jtoken representation. Accepts a dict or an
+already-encoded jtoken string.
+**`backend` options:**
+| value | behaviour |
+|---|---|
+| `"auto"` (default) | tiktoken if installed, otherwise estimates |
+| `"tiktoken"` | requires tiktoken; raises `TokenCountError` if absent |
+| `"estimate"` | ~4 chars/token heuristic, no extra dependency |
+## Exceptions
+```
+JPackError
+├── JPackEncodeError
+├── JPackDecodeError
+└── TokenCountError
+```
+## Development
+```bash
+git clone https://github.com/hermannsamimi/jtoken
+cd jtoken
+pip install -e ".[dev]"
+pytest
+pytest --cov=jtoken --cov-report=term-missing
+```
+## License
+MIT — © 2026 Hermann Samimi

jtoken-0.1.0/jtoken/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""jtoken — Compress JSON for LLM prompts with ~30% fewer tokens."""
+from ._codec import decode, encode
+from .exceptions import JPackDecodeError, JPackEncodeError, JPackError
+from .tokens import TokenCountError, TokenSavings, count_tokens, token_savings
+__version__ = "0.1.0"
+__author__ = "Hermann Samimi"
+# json-style aliases
+dumps = encode
+loads = decode
+__all__ = [
+    "encode",
+    "decode",
+    "dumps",
+    "loads",
+    "count_tokens",
+    "token_savings",
+    "TokenSavings",
+    "JPackError",
+    "JPackEncodeError",
+    "JPackDecodeError",
+    "TokenCountError",
+    "__version__",
+    "__author__",
+]

jtoken-0.1.0/jtoken/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cli import main
+if __name__ == "__main__":
+    main()