PyPI - jtoken - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

jtoken 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{jtoken-0.2.2 → jtoken-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jtoken
-Version: 0.2.2
+Version: 0.2.3
 Summary: Compress JSON-shaped documents for LLM prompts with normalization, CLI, and token measurement
 Project-URL: Homepage, https://github.com/hermannsamimi/jtoken
 Project-URL: Repository, https://github.com/hermannsamimi/jtoken

{jtoken-0.2.2 → jtoken-0.2.3}/jtoken/__init__.py RENAMED Viewed

@@ -19,7 +19,7 @@ from .tokens import (
     token_savings,
 )
-__version__ = "0.2.2"
+__version__ = "0.2.3"
 __author__ = "Hermann Samimi"
 # json-style aliases

{jtoken-0.2.2 → jtoken-0.2.3}/jtoken/normalize.py RENAMED Viewed

@@ -80,8 +80,7 @@ def normalize(
     ctx = context or NormalizationContext()
     if isinstance(data, str):
         data = parse_input(data, source=source)
-    if not isinstance(data, dict):
-        raise NormalizationError(f"Expected dict, got {type(data).__name__}")
+    data = _coerce_root_document(data, ctx)
     if source != InputFormat.AUTO.value:
         fmt = InputFormat(source)
@@ -116,15 +115,41 @@ def _resolve_input_format(text: str, source: str) -> InputFormat:
     stripped = text.lstrip()
     if _MONGO_SHELL_OBJECT_ID.search(text) or _MONGO_SHELL_ISO_DATE.search(text):
         return InputFormat.MONGO_SHELL
-    if stripped.startswith("{"):
+    if stripped.startswith("{") or stripped.startswith("["):
         try:
             data = json.loads(text)
         except json.JSONDecodeError as exc:
             raise NormalizationError(f"Invalid JSON input: {exc}") from exc
-        return _detect_dict_format(data)
+        return _detect_parsed_format(data)
     raise NormalizationError("Could not detect input format")
+def _coerce_root_document(
+    data: Any,
+    ctx: NormalizationContext,
+) -> dict[str, Any]:
+    if isinstance(data, dict):
+        return data
+    if isinstance(data, list):
+        if len(data) == 1 and isinstance(data[0], dict):
+            return data[0]
+        ctx.lists.add("")
+        if not data:
+            return {}
+        return {str(index): item for index, item in enumerate(data)}
+    raise NormalizationError(f"Expected dict or list, got {type(data).__name__}")
+def _detect_parsed_format(data: Any) -> InputFormat:
+    if isinstance(data, dict):
+        return _detect_dict_format(data)
+    if isinstance(data, list):
+        if len(data) == 1 and isinstance(data[0], dict):
+            return _detect_dict_format(data[0])
+        return InputFormat.JSON
+    raise NormalizationError("Expected a JSON object or array")
 def _detect_dict_format(data: dict[str, Any]) -> InputFormat:
     if "_source" in data and isinstance(data.get("_source"), dict):
         return InputFormat.ELASTIC_HIT

{jtoken-0.2.2 → jtoken-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "jtoken"
-version = "0.2.2"
+version = "0.2.3"
 description = "Compress JSON-shaped documents for LLM prompts with normalization, CLI, and token measurement"
 readme = "README.pypi.md"
 requires-python = ">=3.8"

{jtoken-0.2.2 → jtoken-0.2.3}/tests/test_normalize.py RENAMED Viewed

@@ -60,6 +60,12 @@ class TestParseInput:
     def test_parse_json(self):
         assert parse_input('{"a": 1}', source="json") == {"a": 1}
+    def test_parse_json_array(self):
+        assert parse_input('[{"a": 1}]', source="json") == [{"a": 1}]
+    def test_parse_json_array_auto(self):
+        assert parse_input('[{"a": 1}]', source="auto") == [{"a": 1}]
     def test_parse_mongo_shell(self):
         parsed = parse_input(MONGO_SHELL_DOC, source="mongo_shell")
         assert parsed["_id"]["$oid"] == "69ca983fbf8c8953c43c2407"
@@ -126,3 +132,27 @@ class TestNormalizeErrors:
     def test_unsupported_type_raises(self):
         with pytest.raises(NormalizationError):
             normalize({"bad": object()}, source="json")
+class TestNormalizeJsonArrays:
+    def test_single_object_array_is_unwrapped(self):
+        normalized, context = normalize('[{"QUERY_ID": "q-1", "ROWS_DELETED": 0}]', source="json")
+        assert normalized["QUERY_ID"] == "q-1"
+        assert normalized["ROWS_DELETED"] == 0
+        assert "" not in context.lists
+    def test_single_object_array_auto(self):
+        normalized, _ = normalize('[{"a": 1}]', source="auto")
+        assert normalized == {"a": 1}
+    def test_multi_object_array_is_indexed(self):
+        normalized, context = normalize('[{"a": 1}, {"b": 2}]', source="json")
+        assert normalized["0"]["a"] == 1
+        assert normalized["1"]["b"] == 2
+        assert "" in context.lists
+    def test_primitive_array_is_indexed(self):
+        normalized, context = normalize('["a", "b"]', source="json")
+        assert normalized["0"] == "a"
+        assert normalized["1"] == "b"
+        assert "" in context.lists