PyPI - jtoken - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

jtoken 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{jtoken-0.2.2 → jtoken-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jtoken
-Version: 0.2.2
+Version: 0.2.4
 Summary: Compress JSON-shaped documents for LLM prompts with normalization, CLI, and token measurement
 Project-URL: Homepage, https://github.com/hermannsamimi/jtoken
 Project-URL: Repository, https://github.com/hermannsamimi/jtoken
@@ -254,6 +254,19 @@ print(stats.jtoken_tokens, stats.json_tokens, stats.saved, stats.percent)
 `json_indent=2` compares against prompt-style pretty JSON. Use `json_indent=None` for compact JSON.
+### Representative token counts
+Sample payloads measured as pretty JSON versus jtoken on representative documents:
+| Document type | JSON | jtoken |
+|---|---:|---:|
+| ELK hit | 1537 | 583 |
+| Mongo shell | 770 | 508 |
+| PostgreSQL structured document | 831 | 685 |
+| Standard JSON | 617 | 503 |
+![Token count by representation](https://raw.githubusercontent.com/hermannsamimi/jtoken/main/docs/token-savings-bar-chart.svg)
 ## CLI
 ```bash

{jtoken-0.2.2 → jtoken-0.2.4}/README.md RENAMED Viewed

@@ -150,6 +150,19 @@ print(stats.jtoken_tokens, stats.json_tokens, stats.saved, stats.percent)
 `count_tokens` and `count_text_tokens` are also available. Savings compare the jtoken representation against pretty JSON by default (`json_indent=2`).
+### Representative token counts
+Sample payloads measured as pretty JSON versus jtoken on representative documents:
+| Document type | JSON | jtoken |
+|---|---:|---:|
+| ELK hit | 1537 | 583 |
+| Mongo shell | 770 | 508 |
+| PostgreSQL structured document | 831 | 685 |
+| Standard JSON | 617 | 503 |
+![Token count by representation](docs/token-savings-bar-chart.svg)
 ## API reference
 ### Package metadata

{jtoken-0.2.2 → jtoken-0.2.4}/README.pypi.md RENAMED Viewed

@@ -222,6 +222,19 @@ print(stats.jtoken_tokens, stats.json_tokens, stats.saved, stats.percent)
 `json_indent=2` compares against prompt-style pretty JSON. Use `json_indent=None` for compact JSON.
+### Representative token counts
+Sample payloads measured as pretty JSON versus jtoken on representative documents:
+| Document type | JSON | jtoken |
+|---|---:|---:|
+| ELK hit | 1537 | 583 |
+| Mongo shell | 770 | 508 |
+| PostgreSQL structured document | 831 | 685 |
+| Standard JSON | 617 | 503 |
+![Token count by representation](https://raw.githubusercontent.com/hermannsamimi/jtoken/main/docs/token-savings-bar-chart.svg)
 ## CLI
 ```bash

jtoken-0.2.4/docs/token-savings-bar-chart.svg ADDED Viewed

@@ -0,0 +1,40 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="960" height="520" viewBox="0 0 960 520" role="img" aria-label="Token count by representation">
+<rect width="100%" height="100%" fill="#ffffff"/>
+<text x="24" y="34" font-family="Inter, Arial, sans-serif" font-size="20" font-weight="600" fill="#1f2937">Token count by representation</text>
+<text x="72" y="492" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Document type</text>
+<text x="18" y="248.0" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563" transform="rotate(-90 18 248.0)">Tokens</text>
+<line x1="72" y1="424.0" x2="928" y2="424.0" stroke="#e5e7eb" stroke-width="1"/>
+<text x="62" y="428.0" text-anchor="end" font-family="Inter, Arial, sans-serif" font-size="11" fill="#6b7280">0</text>
+<line x1="72" y1="344.3" x2="928" y2="344.3" stroke="#e5e7eb" stroke-width="1"/>
+<text x="62" y="348.3" text-anchor="end" font-family="Inter, Arial, sans-serif" font-size="11" fill="#6b7280">400</text>
+<line x1="72" y1="264.7" x2="928" y2="264.7" stroke="#e5e7eb" stroke-width="1"/>
+<text x="62" y="268.7" text-anchor="end" font-family="Inter, Arial, sans-serif" font-size="11" fill="#6b7280">800</text>
+<line x1="72" y1="185.0" x2="928" y2="185.0" stroke="#e5e7eb" stroke-width="1"/>
+<text x="62" y="189.0" text-anchor="end" font-family="Inter, Arial, sans-serif" font-size="11" fill="#6b7280">1200</text>
+<line x1="72" y1="105.4" x2="928" y2="105.4" stroke="#e5e7eb" stroke-width="1"/>
+<text x="62" y="109.4" text-anchor="end" font-family="Inter, Arial, sans-serif" font-size="11" fill="#6b7280">1600</text>
+<rect x="140.0" y="117.9" width="34" height="306.1" rx="4" fill="#4C78A8"/>
+<text x="157.0" y="109.9" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">1537</text>
+<rect x="184.0" y="307.9" width="34" height="116.1" rx="4" fill="#F58518"/>
+<text x="201.0" y="299.9" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">583</text>
+<text x="179.0" y="462" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">ELK hit</text>
+<rect x="354.0" y="270.7" width="34" height="153.3" rx="4" fill="#4C78A8"/>
+<text x="371.0" y="262.7" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">770</text>
+<rect x="398.0" y="322.8" width="34" height="101.2" rx="4" fill="#F58518"/>
+<text x="415.0" y="314.8" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">508</text>
+<text x="393.0" y="462" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">Mongo shell</text>
+<rect x="568.0" y="258.5" width="34" height="165.5" rx="4" fill="#4C78A8"/>
+<text x="585.0" y="250.5" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">831</text>
+<rect x="612.0" y="287.6" width="34" height="136.4" rx="4" fill="#F58518"/>
+<text x="629.0" y="279.6" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">685</text>
+<text x="607.0" y="462" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">PostgreSQL document</text>
+<rect x="782.0" y="301.1" width="34" height="122.9" rx="4" fill="#4C78A8"/>
+<text x="799.0" y="293.1" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">617</text>
+<rect x="826.0" y="323.8" width="34" height="100.2" rx="4" fill="#F58518"/>
+<text x="843.0" y="315.8" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">503</text>
+<text x="821.0" y="462" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">Standard JSON</text>
+<rect x="778" y="28" width="14" height="14" rx="3" fill="#4C78A8"/>
+<text x="798" y="39" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">JSON</text>
+<rect x="856" y="28" width="14" height="14" rx="3" fill="#F58518"/>
+<text x="876" y="39" font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">jtoken</text>
+</svg>

{jtoken-0.2.2 → jtoken-0.2.4}/jtoken/__init__.py RENAMED Viewed

@@ -19,7 +19,7 @@ from .tokens import (
     token_savings,
 )
-__version__ = "0.2.2"
+__version__ = "0.2.4"
 __author__ = "Hermann Samimi"
 # json-style aliases

jtoken-0.2.4/jtoken/data.json ADDED Viewed

@@ -0,0 +1,169 @@
+data = {
+    "organization": {
+        "id": 7001,
+        "name": "Nexus Corp",
+        "founded": "2001-04-12",
+        "dissolved": None,
+        "active": True,
+        "public": False,
+        "revenue": 4_500_000.75,
+        "employeeCount": 312,
+        "website": "https://nexuscorp.example.com",
+        "tagline": None,
+        "headquarters": {
+            "street": "88 Innovation Drive",
+            "city": "Austin",
+            "state": "TX",
+            "country": "US",
+            "postalCode": "78701",
+            "geo": {
+                "lat": 30.2672,
+                "lng": -97.7431,
+                "altitude_m": 149,
+                "verified": True,
+                "lastChecked": "2026-03-01T00:00:00Z",
+            },
+        },
+        "departments": [
+            {
+                "deptId": "DEPT-ENG",
+                "name": "Engineering",
+                "headCount": 120,
+                "remote": True,
+                "budget": 1_200_000.00,
+                "managerId": "USR-0042",
+                "parentDept": None,
+                "tags": ["backend", "infra", "ml"],
+                "kpis": {
+                    "deployFrequency": "daily",
+                    "avgLeadTimeDays": 3.5,
+                    "changeFailureRate": 0.02,
+                    "onCallActive": True,
+                    "incidentCount": 7,
+                    "lastIncident": "2026-04-29T03:12:00Z",
+                    "slaBreached": False,
+                },
+                "teams": [
+                    {
+                        "teamId": "TEAM-CORE",
+                        "name": "Core Platform",
+                        "size": 8,
+                        "lead": "Alice Nakamura",
+                        "stack": ["Python", "Go", "PostgreSQL"],
+                        "activeSprintId": "SPR-2026-19",
+                        "velocity": 42,
+                        "archived": False,
+                    },
+                    {
+                        "teamId": "TEAM-ML",
+                        "name": "Machine Learning",
+                        "size": 5,
+                        "lead": None,
+                        "stack": ["Python", "PyTorch", "CUDA"],
+                        "activeSprintId": None,
+                        "velocity": 0,
+                        "archived": False,
+                    },
+                ],
+            },
+            {
+                "deptId": "DEPT-OPS",
+                "name": "Operations",
+                "headCount": 45,
+                "remote": False,
+                "budget": 620_000.00,
+                "managerId": "USR-0017",
+                "parentDept": None,
+                "tags": ["logistics", "support"],
+                "kpis": {
+                    "ticketsClosedMonthly": 980,
+                    "avgResolutionHrs": 4.2,
+                    "customerSatisfaction": 4.7,
+                    "escalationRate": 0.05,
+                    "slaBreached": False,
+                    "lastIncident": None,
+                },
+                "teams": [],
+            },
+        ],
+        "auditLog": [
+            {
+                "eventId": "EVT-001",
+                "action": "ORG_CREATED",
+                "performedBy": "USR-0001",
+                "timestamp": "2001-04-12T09:00:00Z",
+                "ipAddress": "192.168.1.1",
+                "success": True,
+                "errorCode": None,
+                "meta": {},
+            },
+            {
+                "eventId": "EVT-419",
+                "action": "DEPT_BUDGET_UPDATE",
+                "performedBy": "USR-0042",
+                "timestamp": "2026-05-01T11:30:00+02:00",
+                "ipAddress": "10.0.0.55",
+                "success": False,
+                "errorCode": "PERMISSION_DENIED",
+                "meta": {
+                    "attemptedValue": 2_000_000,
+                    "currentValue": 1_200_000,
+                    "delta": 800_000,
+                    "requiresApproval": True,
+                    "approvedBy": None,
+                    "approvedAt": None,
+                },
+            },
+        ],
+        "settings": {
+            "locale": "en-US",
+            "timezone": "America/Chicago",
+            "currency": "USD",
+            "fiscalYearStart": "01-01",
+            "mfaRequired": True,
+            "ssoEnabled": False,
+            "ssoProvider": None,
+            "allowedDomains": ["nexuscorp.example.com", "nexus.internal"],
+            "blockedIPs": [],
+            "retentionDays": 365,
+            "notifications": {
+                "email": True,
+                "slack": True,
+                "sms": False,
+                "webhookUrl": "https://hooks.example.com/nexus",
+                "webhookSecret": None,
+                "digest": {
+                    "enabled": True,
+                    "frequency": "weekly",
+                    "day": "Monday",
+                    "time": "08:00",
+                    "lastSentAt": "2026-05-04T08:00:00Z",
+                    "nextScheduledAt": "2026-05-11T08:00:00Z",
+                    "failedAttempts": 0,
+                    "paused": False,
+                },
+            },
+        },
+        "metrics": {
+            "uptime": 99.97,
+            "requestsPerDay": 1_482_903,
+            "errorRate": 0.003,
+            "p50LatencyMs": 12,
+            "p95LatencyMs": 88,
+            "p99LatencyMs": 210,
+            "degraded": False,
+            "lastDowntime": "2025-09-14T02:11:00Z",
+            "maintenanceWindow": None,
+            "regions": {
+                "us-east-1": {"healthy": True, "load": 0.61, "instances": 4},
+                "eu-west-1": {"healthy": True, "load": 0.44, "instances": 2},
+                "ap-southeast-1": {"healthy": False, "load": None, "instances": 0},
+            },
+        },
+    }
+}

{jtoken-0.2.2 → jtoken-0.2.4}/jtoken/normalize.py RENAMED Viewed

@@ -80,8 +80,7 @@ def normalize(
     ctx = context or NormalizationContext()
     if isinstance(data, str):
         data = parse_input(data, source=source)
-    if not isinstance(data, dict):
-        raise NormalizationError(f"Expected dict, got {type(data).__name__}")
+    data = _coerce_root_document(data, ctx)
     if source != InputFormat.AUTO.value:
         fmt = InputFormat(source)
@@ -116,15 +115,41 @@ def _resolve_input_format(text: str, source: str) -> InputFormat:
     stripped = text.lstrip()
     if _MONGO_SHELL_OBJECT_ID.search(text) or _MONGO_SHELL_ISO_DATE.search(text):
         return InputFormat.MONGO_SHELL
-    if stripped.startswith("{"):
+    if stripped.startswith("{") or stripped.startswith("["):
         try:
             data = json.loads(text)
         except json.JSONDecodeError as exc:
             raise NormalizationError(f"Invalid JSON input: {exc}") from exc
-        return _detect_dict_format(data)
+        return _detect_parsed_format(data)
     raise NormalizationError("Could not detect input format")
+def _coerce_root_document(
+    data: Any,
+    ctx: NormalizationContext,
+) -> dict[str, Any]:
+    if isinstance(data, dict):
+        return data
+    if isinstance(data, list):
+        if len(data) == 1 and isinstance(data[0], dict):
+            return data[0]
+        ctx.lists.add("")
+        if not data:
+            return {}
+        return {str(index): item for index, item in enumerate(data)}
+    raise NormalizationError(f"Expected dict or list, got {type(data).__name__}")
+def _detect_parsed_format(data: Any) -> InputFormat:
+    if isinstance(data, dict):
+        return _detect_dict_format(data)
+    if isinstance(data, list):
+        if len(data) == 1 and isinstance(data[0], dict):
+            return _detect_dict_format(data[0])
+        return InputFormat.JSON
+    raise NormalizationError("Expected a JSON object or array")
 def _detect_dict_format(data: dict[str, Any]) -> InputFormat:
     if "_source" in data and isinstance(data.get("_source"), dict):
         return InputFormat.ELASTIC_HIT

{jtoken-0.2.2 → jtoken-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "jtoken"
-version = "0.2.2"
+version = "0.2.4"
 description = "Compress JSON-shaped documents for LLM prompts with normalization, CLI, and token measurement"
 readme = "README.pypi.md"
 requires-python = ">=3.8"

{jtoken-0.2.2 → jtoken-0.2.4}/tests/test_normalize.py RENAMED Viewed

@@ -60,6 +60,12 @@ class TestParseInput:
     def test_parse_json(self):
         assert parse_input('{"a": 1}', source="json") == {"a": 1}
+    def test_parse_json_array(self):
+        assert parse_input('[{"a": 1}]', source="json") == [{"a": 1}]
+    def test_parse_json_array_auto(self):
+        assert parse_input('[{"a": 1}]', source="auto") == [{"a": 1}]
     def test_parse_mongo_shell(self):
         parsed = parse_input(MONGO_SHELL_DOC, source="mongo_shell")
         assert parsed["_id"]["$oid"] == "69ca983fbf8c8953c43c2407"
@@ -126,3 +132,27 @@ class TestNormalizeErrors:
     def test_unsupported_type_raises(self):
         with pytest.raises(NormalizationError):
             normalize({"bad": object()}, source="json")
+class TestNormalizeJsonArrays:
+    def test_single_object_array_is_unwrapped(self):
+        normalized, context = normalize('[{"QUERY_ID": "q-1", "ROWS_DELETED": 0}]', source="json")
+        assert normalized["QUERY_ID"] == "q-1"
+        assert normalized["ROWS_DELETED"] == 0
+        assert "" not in context.lists
+    def test_single_object_array_auto(self):
+        normalized, _ = normalize('[{"a": 1}]', source="auto")
+        assert normalized == {"a": 1}
+    def test_multi_object_array_is_indexed(self):
+        normalized, context = normalize('[{"a": 1}, {"b": 2}]', source="json")
+        assert normalized["0"]["a"] == 1
+        assert normalized["1"]["b"] == 2
+        assert "" in context.lists
+    def test_primitive_array_is_indexed(self):
+        normalized, context = normalize('["a", "b"]', source="json")
+        assert normalized["0"] == "a"
+        assert normalized["1"] == "b"
+        assert "" in context.lists