python-toon-parser 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_toon_parser-0.1.1/LICENSE +21 -0
- python_toon_parser-0.1.1/MANIFEST.in +3 -0
- python_toon_parser-0.1.1/PKG-INFO +78 -0
- python_toon_parser-0.1.1/README.md +55 -0
- python_toon_parser-0.1.1/pyproject.toml +33 -0
- python_toon_parser-0.1.1/python_toon_parser.egg-info/PKG-INFO +78 -0
- python_toon_parser-0.1.1/python_toon_parser.egg-info/SOURCES.txt +16 -0
- python_toon_parser-0.1.1/python_toon_parser.egg-info/dependency_links.txt +1 -0
- python_toon_parser-0.1.1/python_toon_parser.egg-info/top_level.txt +1 -0
- python_toon_parser-0.1.1/pytoon/__init__.py +14 -0
- python_toon_parser-0.1.1/pytoon/__version__.py +1 -0
- python_toon_parser-0.1.1/pytoon/toon.py +529 -0
- python_toon_parser-0.1.1/setup.cfg +4 -0
- python_toon_parser-0.1.1/setup.py +3 -0
- python_toon_parser-0.1.1/tests/__init__.py +0 -0
- python_toon_parser-0.1.1/tests/test_edgecases.py +33 -0
- python_toon_parser-0.1.1/tests/test_fuzz.py +118 -0
- python_toon_parser-0.1.1/tests/test_toon.py +269 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Akash Wankhede
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: python-toon-parser
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: TOON (Token-Oriented Object Notation) serializer & parser for Python
|
|
5
|
+
Author-email: Akash Wankhede <akash.wankhede.pune@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/akash1551/pytoon
|
|
8
|
+
Project-URL: Source, https://github.com/akash1551/pytoon
|
|
9
|
+
Project-URL: Issues, https://github.com/akash1551/pytoon/issues
|
|
10
|
+
Keywords: toon,serialization,parser,format,llm,data
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# python-toon-parser
|
|
25
|
+
|
|
26
|
+

|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
`python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install python-toon-parser
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- **Human-Readable**: Minimal syntax, similar to YAML but distinct.
|
|
41
|
+
- **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
|
|
42
|
+
- **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
|
|
43
|
+
- **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
|
|
44
|
+
|
|
45
|
+
## Quickstart
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pytoon import dumps, loads
|
|
49
|
+
|
|
50
|
+
data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
|
|
51
|
+
|
|
52
|
+
# Serialize
|
|
53
|
+
s = dumps(data)
|
|
54
|
+
print(s)
|
|
55
|
+
|
|
56
|
+
# Parse back
|
|
57
|
+
obj = loads(s)
|
|
58
|
+
print(obj)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Output:**
|
|
62
|
+
```yaml
|
|
63
|
+
items[2]{id,name}:
|
|
64
|
+
1,A
|
|
65
|
+
2,B
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## API Reference
|
|
69
|
+
|
|
70
|
+
### `dumps(obj, name=None, indent=0) -> str`
|
|
71
|
+
Serializes a Python object to a TOON string.
|
|
72
|
+
- `obj`: The object to serialize.
|
|
73
|
+
- `name`: (Optional) Root key name for the object.
|
|
74
|
+
- `indent`: (Optional) Starting indentation level (default 0).
|
|
75
|
+
|
|
76
|
+
### `loads(toon_str) -> Any`
|
|
77
|
+
Parses a TOON string back into Python objects.
|
|
78
|
+
- Returns `dict`, `list`, or primitive depending on the input.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# python-toon-parser
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
`python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install python-toon-parser
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- **Human-Readable**: Minimal syntax, similar to YAML but distinct.
|
|
18
|
+
- **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
|
|
19
|
+
- **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
|
|
20
|
+
- **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from pytoon import dumps, loads
|
|
26
|
+
|
|
27
|
+
data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
|
|
28
|
+
|
|
29
|
+
# Serialize
|
|
30
|
+
s = dumps(data)
|
|
31
|
+
print(s)
|
|
32
|
+
|
|
33
|
+
# Parse back
|
|
34
|
+
obj = loads(s)
|
|
35
|
+
print(obj)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Output:**
|
|
39
|
+
```yaml
|
|
40
|
+
items[2]{id,name}:
|
|
41
|
+
1,A
|
|
42
|
+
2,B
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## API Reference
|
|
46
|
+
|
|
47
|
+
### `dumps(obj, name=None, indent=0) -> str`
|
|
48
|
+
Serializes a Python object to a TOON string.
|
|
49
|
+
- `obj`: The object to serialize.
|
|
50
|
+
- `name`: (Optional) Root key name for the object.
|
|
51
|
+
- `indent`: (Optional) Starting indentation level (default 0).
|
|
52
|
+
|
|
53
|
+
### `loads(toon_str) -> Any`
|
|
54
|
+
Parses a TOON string back into Python objects.
|
|
55
|
+
- Returns `dict`, `list`, or primitive depending on the input.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "python-toon-parser"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "TOON (Token-Oriented Object Notation) serializer & parser for Python"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
|
|
13
|
+
authors = [
|
|
14
|
+
{ name="Akash Wankhede", email="akash.wankhede.pune@gmail.com" }
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Topic :: Software Development :: Libraries",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
keywords = ["toon", "serialization", "parser", "format", "llm", "data"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/akash1551/pytoon"
|
|
32
|
+
Source = "https://github.com/akash1551/pytoon"
|
|
33
|
+
Issues = "https://github.com/akash1551/pytoon/issues"
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: python-toon-parser
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: TOON (Token-Oriented Object Notation) serializer & parser for Python
|
|
5
|
+
Author-email: Akash Wankhede <akash.wankhede.pune@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/akash1551/pytoon
|
|
8
|
+
Project-URL: Source, https://github.com/akash1551/pytoon
|
|
9
|
+
Project-URL: Issues, https://github.com/akash1551/pytoon/issues
|
|
10
|
+
Keywords: toon,serialization,parser,format,llm,data
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# python-toon-parser
|
|
25
|
+
|
|
26
|
+

|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
`python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install python-toon-parser
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- **Human-Readable**: Minimal syntax, similar to YAML but distinct.
|
|
41
|
+
- **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
|
|
42
|
+
- **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
|
|
43
|
+
- **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
|
|
44
|
+
|
|
45
|
+
## Quickstart
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pytoon import dumps, loads
|
|
49
|
+
|
|
50
|
+
data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
|
|
51
|
+
|
|
52
|
+
# Serialize
|
|
53
|
+
s = dumps(data)
|
|
54
|
+
print(s)
|
|
55
|
+
|
|
56
|
+
# Parse back
|
|
57
|
+
obj = loads(s)
|
|
58
|
+
print(obj)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**Output:**
|
|
62
|
+
```yaml
|
|
63
|
+
items[2]{id,name}:
|
|
64
|
+
1,A
|
|
65
|
+
2,B
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## API Reference
|
|
69
|
+
|
|
70
|
+
### `dumps(obj, name=None, indent=0) -> str`
|
|
71
|
+
Serializes a Python object to a TOON string.
|
|
72
|
+
- `obj`: The object to serialize.
|
|
73
|
+
- `name`: (Optional) Root key name for the object.
|
|
74
|
+
- `indent`: (Optional) Starting indentation level (default 0).
|
|
75
|
+
|
|
76
|
+
### `loads(toon_str) -> Any`
|
|
77
|
+
Parses a TOON string back into Python objects.
|
|
78
|
+
- Returns `dict`, `list`, or primitive depending on the input.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
setup.py
|
|
6
|
+
python_toon_parser.egg-info/PKG-INFO
|
|
7
|
+
python_toon_parser.egg-info/SOURCES.txt
|
|
8
|
+
python_toon_parser.egg-info/dependency_links.txt
|
|
9
|
+
python_toon_parser.egg-info/top_level.txt
|
|
10
|
+
pytoon/__init__.py
|
|
11
|
+
pytoon/__version__.py
|
|
12
|
+
pytoon/toon.py
|
|
13
|
+
tests/__init__.py
|
|
14
|
+
tests/test_edgecases.py
|
|
15
|
+
tests/test_fuzz.py
|
|
16
|
+
tests/test_toon.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pytoon
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pytoon package - TOON (Token-Oriented Object Notation) for Python.
|
|
3
|
+
Expose dumps / loads API and package version.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .toon import dumps, loads
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
# populated by setuptools_scm at build time
|
|
10
|
+
from __version__ import __version__
|
|
11
|
+
except Exception:
|
|
12
|
+
__version__ = "0.0.0"
|
|
13
|
+
|
|
14
|
+
__all__ = ["dumps", "loads", "__version__"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.1"
|
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
"""
|
|
2
|
+
toon.py
|
|
3
|
+
|
|
4
|
+
A self-contained, practical Python implementation of TOON (Token-Oriented Object Notation)
|
|
5
|
+
with a JSON-like API: `dumps(obj)` and `loads(toon_str)`.
|
|
6
|
+
|
|
7
|
+
- dumps(obj) -> str : Serialize common Python objects into TOON.
|
|
8
|
+
- loads(toon_str) -> Any : Parse TOON produced by dumps back into Python types.
|
|
9
|
+
|
|
10
|
+
This is a best-effort, usable implementation focused on round-tripping dumps -> loads for
|
|
11
|
+
typical Python objects: primitives, dicts, lists/tuples/sets, dataclasses/namedtuples and
|
|
12
|
+
uniform-table arrays. It is not a complete spec implementation, but is practical and easy
|
|
13
|
+
to extend.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
from toon import dumps, loads
|
|
17
|
+
s = dumps(my_obj)
|
|
18
|
+
obj = loads(s)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from dataclasses import is_dataclass, asdict
|
|
22
|
+
import json
|
|
23
|
+
import inspect
|
|
24
|
+
from typing import Any, List, Tuple, Dict
|
|
25
|
+
|
|
26
|
+
INDENT_STR = " " # two spaces per level (modifiable if desired)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -------------------- Helpers for serialization --------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _is_primitive(x):
|
|
33
|
+
return x is None or isinstance(x, (bool, int, float, str))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _to_toon_primitive(x):
|
|
37
|
+
"""Return a TOON-safe string for a primitive."""
|
|
38
|
+
if x is None:
|
|
39
|
+
return "null"
|
|
40
|
+
if isinstance(x, bool):
|
|
41
|
+
return "true" if x else "false"
|
|
42
|
+
if isinstance(x, (int, float)):
|
|
43
|
+
return str(x)
|
|
44
|
+
s = str(x)
|
|
45
|
+
if s == "":
|
|
46
|
+
return '""'
|
|
47
|
+
needs_quote = (
|
|
48
|
+
any(ch in s for ch in [",", "\n", "\r"])
|
|
49
|
+
or s[0].isspace()
|
|
50
|
+
or s[-1].isspace()
|
|
51
|
+
or s.startswith(('"', "'"))
|
|
52
|
+
or s.lower() in ("null", "true", "false")
|
|
53
|
+
)
|
|
54
|
+
if needs_quote:
|
|
55
|
+
return json.dumps(s, ensure_ascii=False)
|
|
56
|
+
return s
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _escape_header_key(k: str) -> str:
|
|
60
|
+
"""Make a key safe for header usage; quote if it's not a simple identifier."""
|
|
61
|
+
if isinstance(k, str) and k.isidentifier():
|
|
62
|
+
return k
|
|
63
|
+
return json.dumps(str(k), ensure_ascii=False)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _is_namedtuple_instance(x):
|
|
67
|
+
return isinstance(x, tuple) and hasattr(x, "_fields")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _object_to_dict(obj):
|
|
71
|
+
"""Convert dataclass/namedtuple/object to dict for serialization."""
|
|
72
|
+
if is_dataclass(obj):
|
|
73
|
+
return asdict(obj)
|
|
74
|
+
if _is_namedtuple_instance(obj):
|
|
75
|
+
return obj._asdict()
|
|
76
|
+
if hasattr(obj, "__dict__"):
|
|
77
|
+
return {
|
|
78
|
+
k: v
|
|
79
|
+
for k, v in vars(obj).items()
|
|
80
|
+
if not k.startswith("_") and not inspect.isroutine(v)
|
|
81
|
+
}
|
|
82
|
+
# fallback
|
|
83
|
+
return {"value": repr(obj)}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _all_dicts_uniform(list_of_dicts: List[dict]) -> Tuple[bool, List[str] or None]:
|
|
87
|
+
"""
|
|
88
|
+
Return (is_uniform, keys_order).
|
|
89
|
+
Uniform means every element is a dict and they all have the same set of keys.
|
|
90
|
+
If insertion order is consistent across elements, return that order; otherwise return sorted keys.
|
|
91
|
+
"""
|
|
92
|
+
if not list_of_dicts:
|
|
93
|
+
return False, None
|
|
94
|
+
if not all(isinstance(item, dict) for item in list_of_dicts):
|
|
95
|
+
return False, None
|
|
96
|
+
sets = [set(d.keys()) for d in list_of_dicts]
|
|
97
|
+
first_set = sets[0]
|
|
98
|
+
if all(s == first_set for s in sets):
|
|
99
|
+
first_keys = list(list_of_dicts[0].keys())
|
|
100
|
+
if all(tuple(d.keys()) == tuple(first_keys) for d in list_of_dicts):
|
|
101
|
+
return True, first_keys
|
|
102
|
+
return True, sorted(first_set)
|
|
103
|
+
return False, None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# -------------------- Serialization: dumps --------------------
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def dumps(obj: Any, name: str = None, indent: int = 0) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Serialize Python object into TOON. `name` is optional and produces a top-level key.
|
|
112
|
+
indent counts indentation levels (0 == top).
|
|
113
|
+
"""
|
|
114
|
+
pad = INDENT_STR * indent
|
|
115
|
+
|
|
116
|
+
# primitives
|
|
117
|
+
if _is_primitive(obj):
|
|
118
|
+
val = _to_toon_primitive(obj)
|
|
119
|
+
if name:
|
|
120
|
+
return f"{pad}{name}: {val}\n"
|
|
121
|
+
return f"{pad}{val}\n"
|
|
122
|
+
|
|
123
|
+
# dataclass / namedtuple => dict
|
|
124
|
+
if _is_namedtuple_instance(obj) or is_dataclass(obj):
|
|
125
|
+
obj = _object_to_dict(obj)
|
|
126
|
+
|
|
127
|
+
# dict
|
|
128
|
+
if isinstance(obj, dict):
|
|
129
|
+
lines = []
|
|
130
|
+
if name:
|
|
131
|
+
lines.append(f"{pad}{name}:")
|
|
132
|
+
child_pad = INDENT_STR * (indent + 1)
|
|
133
|
+
else:
|
|
134
|
+
child_pad = pad
|
|
135
|
+
# explicit empty dict representation: return a '{}' line so parser can detect it
|
|
136
|
+
if not obj:
|
|
137
|
+
if name:
|
|
138
|
+
return f"{pad}{name}:\n{child_pad}{{}}\n"
|
|
139
|
+
return f"{pad}{{}}\n"
|
|
140
|
+
for k, v in obj.items():
|
|
141
|
+
key = _escape_header_key(k)
|
|
142
|
+
if _is_primitive(v):
|
|
143
|
+
lines.append(f"{child_pad}{key}: {_to_toon_primitive(v)}")
|
|
144
|
+
else:
|
|
145
|
+
lines.append(f"{child_pad}{key}:")
|
|
146
|
+
# use one-level deeper indentation for nested content
|
|
147
|
+
lines.append(dumps(v, name=None, indent=indent + 1).rstrip("\n"))
|
|
148
|
+
return "\n".join(lines) + ("\n" if lines else "")
|
|
149
|
+
|
|
150
|
+
# list / tuple / set
|
|
151
|
+
if isinstance(obj, (list, tuple, set)):
|
|
152
|
+
if isinstance(obj, set):
|
|
153
|
+
try:
|
|
154
|
+
lst = sorted(list(obj))
|
|
155
|
+
except Exception:
|
|
156
|
+
lst = list(obj)
|
|
157
|
+
else:
|
|
158
|
+
lst = list(obj)
|
|
159
|
+
n = len(lst)
|
|
160
|
+
if n == 0:
|
|
161
|
+
return f"{pad}{name}[0]:\n" if name else f"{pad}[]\n"
|
|
162
|
+
|
|
163
|
+
uniform, keys = _all_dicts_uniform(lst)
|
|
164
|
+
# compact table if uniform dicts and all primitive values
|
|
165
|
+
if (
|
|
166
|
+
uniform
|
|
167
|
+
and keys
|
|
168
|
+
and all(
|
|
169
|
+
_is_primitive(v)
|
|
170
|
+
for d in lst
|
|
171
|
+
for v in (d.values() if isinstance(d, dict) else [])
|
|
172
|
+
)
|
|
173
|
+
):
|
|
174
|
+
keys_escaped = ",".join(_escape_header_key(k) for k in keys)
|
|
175
|
+
header = (
|
|
176
|
+
f"{pad}{name}[{n}]{{{keys_escaped}}}:"
|
|
177
|
+
if name
|
|
178
|
+
else f"{pad}[{n}]{{{keys_escaped}}}:"
|
|
179
|
+
)
|
|
180
|
+
lines = [header]
|
|
181
|
+
for d in lst:
|
|
182
|
+
row = ",".join(_to_toon_primitive(d.get(k)) for k in keys)
|
|
183
|
+
lines.append(f"{pad}{INDENT_STR}{row}")
|
|
184
|
+
return "\n".join(lines) + "\n"
|
|
185
|
+
|
|
186
|
+
# all primitives => single-line comma list (if named) or a simple "- ..." block
|
|
187
|
+
if all(_is_primitive(x) for x in lst):
|
|
188
|
+
vals = ",".join(_to_toon_primitive(x) for x in lst)
|
|
189
|
+
if name:
|
|
190
|
+
return f"{pad}{name}[{n}]: {vals}\n"
|
|
191
|
+
# unnamed list of primitives as a single "- ..." line (useful for readability)
|
|
192
|
+
return f"{pad}- " + ", ".join(_to_toon_primitive(x) for x in lst) + "\n"
|
|
193
|
+
|
|
194
|
+
# mixed/complex items => list block with '-' markers; nested items are indented one level deeper
|
|
195
|
+
lines = []
|
|
196
|
+
if name:
|
|
197
|
+
lines.append(f"{pad}{name}[{n}]:")
|
|
198
|
+
item_indent_str = pad + INDENT_STR
|
|
199
|
+
nested_indent = indent + 2
|
|
200
|
+
else:
|
|
201
|
+
item_indent_str = pad
|
|
202
|
+
nested_indent = indent + 1
|
|
203
|
+
|
|
204
|
+
for item in lst:
|
|
205
|
+
if _is_primitive(item):
|
|
206
|
+
lines.append(f"{item_indent_str}- {_to_toon_primitive(item)}")
|
|
207
|
+
else:
|
|
208
|
+
# migrate dataclass/namedtuple -> dict first
|
|
209
|
+
if _is_namedtuple_instance(item) or is_dataclass(item):
|
|
210
|
+
item = _object_to_dict(item)
|
|
211
|
+
# list item header
|
|
212
|
+
lines.append(f"{item_indent_str}-")
|
|
213
|
+
# nested item content at indent+2 (one level deeper than the '-' line)
|
|
214
|
+
lines.append(dumps(item, name=None, indent=nested_indent).rstrip("\n"))
|
|
215
|
+
return "\n".join(lines) + "\n"
|
|
216
|
+
|
|
217
|
+
# fallback for objects: try to convert to dict and include class name
|
|
218
|
+
try:
|
|
219
|
+
obj_dict = _object_to_dict(obj)
|
|
220
|
+
return dumps(obj_dict, name=name, indent=indent)
|
|
221
|
+
except Exception:
|
|
222
|
+
val = _to_toon_primitive(repr(obj))
|
|
223
|
+
if name:
|
|
224
|
+
return f"{pad}{name}: {val}\n"
|
|
225
|
+
return f"{pad}{val}\n"
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# -------------------- Parsing: loads --------------------
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _parse_primitive_token(tok: str):
|
|
232
|
+
tok = tok.strip()
|
|
233
|
+
if tok == "":
|
|
234
|
+
return ""
|
|
235
|
+
if tok == "null":
|
|
236
|
+
return None
|
|
237
|
+
if tok == "true":
|
|
238
|
+
return True
|
|
239
|
+
if tok == "false":
|
|
240
|
+
return False
|
|
241
|
+
# JSON quoted string
|
|
242
|
+
if (tok.startswith('"') and tok.endswith('"')) or (
|
|
243
|
+
tok.startswith("'") and tok.endswith("'")
|
|
244
|
+
):
|
|
245
|
+
try:
|
|
246
|
+
return json.loads(tok)
|
|
247
|
+
except Exception:
|
|
248
|
+
return tok[1:-1]
|
|
249
|
+
# try int then float (float handles nan/inf and exponent forms)
|
|
250
|
+
try:
|
|
251
|
+
# attempt exact int parsing first
|
|
252
|
+
try:
|
|
253
|
+
return int(tok)
|
|
254
|
+
except Exception:
|
|
255
|
+
# fall back to float parsing (accepts 'nan', 'inf', '1e3', etc.)
|
|
256
|
+
return float(tok)
|
|
257
|
+
except Exception:
|
|
258
|
+
return tok
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _count_leading_indent(s: str) -> int:
|
|
262
|
+
"""Count how many INDENT_STR are at the start of s."""
|
|
263
|
+
count = 0
|
|
264
|
+
while s.startswith(INDENT_STR):
|
|
265
|
+
count += 1
|
|
266
|
+
s = s[len(INDENT_STR) :]
|
|
267
|
+
return count
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _split_csv_like(s: str) -> List[str]:
|
|
271
|
+
"""
|
|
272
|
+
Split comma-separated tokens but respect quoted substrings.
|
|
273
|
+
Returns list of tokens (whitespace preserved trimmed).
|
|
274
|
+
"""
|
|
275
|
+
parts = []
|
|
276
|
+
cur = ""
|
|
277
|
+
in_q = False
|
|
278
|
+
qchar = None
|
|
279
|
+
i = 0
|
|
280
|
+
while i < len(s):
|
|
281
|
+
ch = s[i]
|
|
282
|
+
if ch in ('"', "'"):
|
|
283
|
+
if not in_q:
|
|
284
|
+
in_q = True
|
|
285
|
+
qchar = ch
|
|
286
|
+
cur += ch
|
|
287
|
+
elif qchar == ch:
|
|
288
|
+
in_q = False
|
|
289
|
+
cur += ch
|
|
290
|
+
else:
|
|
291
|
+
cur += ch
|
|
292
|
+
elif ch == "," and not in_q:
|
|
293
|
+
parts.append(cur.strip())
|
|
294
|
+
cur = ""
|
|
295
|
+
else:
|
|
296
|
+
cur += ch
|
|
297
|
+
i += 1
|
|
298
|
+
if cur.strip() != "":
|
|
299
|
+
parts.append(cur.strip())
|
|
300
|
+
return parts
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def loads(toon_str: str) -> Any:
|
|
304
|
+
"""
|
|
305
|
+
Parse a TOON string (created by dumps) back into Python objects (dict/list/primitives).
|
|
306
|
+
This parser is intentionally aligned to the dumps() format above for reliable round-trips.
|
|
307
|
+
"""
|
|
308
|
+
# Split and filter out blank lines but preserve structural indentation
|
|
309
|
+
raw_lines = toon_str.splitlines()
|
|
310
|
+
lines = []
|
|
311
|
+
for ln in raw_lines:
|
|
312
|
+
# keep lines that are not empty after stripping spaces (but preserve indentation)
|
|
313
|
+
if ln.strip() == "":
|
|
314
|
+
continue
|
|
315
|
+
lines.append(ln.rstrip("\n"))
|
|
316
|
+
|
|
317
|
+
# Preprocess into (indent_level, content) tuples
|
|
318
|
+
processed: List[Tuple[int, str]] = []
|
|
319
|
+
for ln in lines:
|
|
320
|
+
indent = _count_leading_indent(ln)
|
|
321
|
+
content = ln[indent * len(INDENT_STR) :]
|
|
322
|
+
processed.append((indent, content))
|
|
323
|
+
|
|
324
|
+
idx = 0
|
|
325
|
+
N = len(processed)
|
|
326
|
+
|
|
327
|
+
def parse_block(expected_indent: int):
|
|
328
|
+
nonlocal idx
|
|
329
|
+
result: Dict[str, Any] = {}
|
|
330
|
+
arr_mode = None # if we encounter '-' list items, we build a list
|
|
331
|
+
|
|
332
|
+
while idx < N:
|
|
333
|
+
indent, content = processed[idx]
|
|
334
|
+
if indent < expected_indent:
|
|
335
|
+
break
|
|
336
|
+
if indent > expected_indent:
|
|
337
|
+
# Deeper indentation than expected: this should be consumed by caller.
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
# Handle explicit empty list "[]"
|
|
341
|
+
if content.strip() == "[]":
|
|
342
|
+
idx += 1
|
|
343
|
+
if arr_mode is None and not result:
|
|
344
|
+
return []
|
|
345
|
+
continue
|
|
346
|
+
# Handle explicit empty dict "{}"
|
|
347
|
+
if content.strip() == "{}":
|
|
348
|
+
idx += 1
|
|
349
|
+
if arr_mode is None and not result:
|
|
350
|
+
return {}
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Handle list item lines: "- value" or "-" (then nested)
|
|
354
|
+
if content.startswith("- ") or content == "-":
|
|
355
|
+
if arr_mode is None:
|
|
356
|
+
arr_mode = []
|
|
357
|
+
# consume the line
|
|
358
|
+
idx += 1
|
|
359
|
+
if content == "-":
|
|
360
|
+
# nested block follows with indent > current indent
|
|
361
|
+
if idx < N and processed[idx][0] > indent:
|
|
362
|
+
item = parse_block(indent + 1)
|
|
363
|
+
arr_mode.append(item)
|
|
364
|
+
else:
|
|
365
|
+
arr_mode.append(None)
|
|
366
|
+
else:
|
|
367
|
+
val_tok = content[2:].strip()
|
|
368
|
+
# if comma-separated tokens (e.g. "- a, b, c") we append each as separate primitives
|
|
369
|
+
if "," in val_tok and not (
|
|
370
|
+
val_tok.startswith('"') or val_tok.startswith("'")
|
|
371
|
+
):
|
|
372
|
+
parts = _split_csv_like(val_tok)
|
|
373
|
+
for p in parts:
|
|
374
|
+
arr_mode.append(_parse_primitive_token(p))
|
|
375
|
+
else:
|
|
376
|
+
arr_mode.append(_parse_primitive_token(val_tok))
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
# otherwise handle "key: value" or "key:" or "name[N]{...}:" or "name[N]: v1,v2" forms
|
|
380
|
+
# find colon not inside quotes
|
|
381
|
+
colon_pos = None
|
|
382
|
+
in_q = False
|
|
383
|
+
qch = None
|
|
384
|
+
for i, ch in enumerate(content):
|
|
385
|
+
if ch in ('"', "'"):
|
|
386
|
+
if not in_q:
|
|
387
|
+
in_q = True
|
|
388
|
+
qch = ch
|
|
389
|
+
elif qch == ch:
|
|
390
|
+
in_q = False
|
|
391
|
+
if ch == ":" and not in_q:
|
|
392
|
+
colon_pos = i
|
|
393
|
+
break
|
|
394
|
+
|
|
395
|
+
if colon_pos is None:
|
|
396
|
+
# No key: treat as a primitive-only line (top-level primitive or inline primitive)
|
|
397
|
+
val = _parse_primitive_token(content)
|
|
398
|
+
idx += 1
|
|
399
|
+
if arr_mode is None and not result:
|
|
400
|
+
return val
|
|
401
|
+
if arr_mode is not None:
|
|
402
|
+
arr_mode.append(val)
|
|
403
|
+
continue
|
|
404
|
+
# otherwise skip stray primitive
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
key_part = content[:colon_pos].strip()
|
|
408
|
+
val_part = content[colon_pos + 1 :].strip()
|
|
409
|
+
idx += 1
|
|
410
|
+
|
|
411
|
+
# If val_part is empty -> either table header like name[N]{...}: OR nested block key:
|
|
412
|
+
if val_part == "":
|
|
413
|
+
# Table-style header?
|
|
414
|
+
if (
|
|
415
|
+
"[" in key_part
|
|
416
|
+
and "]" in key_part
|
|
417
|
+
and "{" in key_part
|
|
418
|
+
and "}" in key_part
|
|
419
|
+
):
|
|
420
|
+
# parse header: name[NN]{k1,k2} (name may be empty if not provided)
|
|
421
|
+
name_section = key_part.split("[", 1)[0].strip()
|
|
422
|
+
# extract keys inside {}
|
|
423
|
+
try:
|
|
424
|
+
br_start = key_part.index("[")
|
|
425
|
+
br_end = key_part.index("]", br_start)
|
|
426
|
+
keys_start = key_part.index("{", br_end)
|
|
427
|
+
keys_end = key_part.index("}", keys_start)
|
|
428
|
+
n_section = key_part[br_start + 1 : br_end]
|
|
429
|
+
keys_str = key_part[keys_start + 1 : keys_end]
|
|
430
|
+
keys = [
|
|
431
|
+
k.strip().strip('"').strip("'")
|
|
432
|
+
for k in keys_str.split(",")
|
|
433
|
+
if k.strip() != ""
|
|
434
|
+
]
|
|
435
|
+
except Exception:
|
|
436
|
+
keys = []
|
|
437
|
+
# collect rows at indent == expected_indent + 1
|
|
438
|
+
rows = []
|
|
439
|
+
while idx < N and processed[idx][0] == expected_indent + 1:
|
|
440
|
+
_, row_content = processed[idx]
|
|
441
|
+
parts = _split_csv_like(row_content.strip())
|
|
442
|
+
row = {}
|
|
443
|
+
for k, vtok in zip(keys, parts):
|
|
444
|
+
row[k] = _parse_primitive_token(vtok)
|
|
445
|
+
rows.append(row)
|
|
446
|
+
idx += 1
|
|
447
|
+
if name_section == "":
|
|
448
|
+
# anonymous table -> return as list? put under special key
|
|
449
|
+
# but to be consistent, put under "_table" with rows
|
|
450
|
+
result_key = "_table"
|
|
451
|
+
else:
|
|
452
|
+
result_key = name_section
|
|
453
|
+
result[result_key] = rows
|
|
454
|
+
continue
|
|
455
|
+
else:
|
|
456
|
+
# Check for list header: name[N]
|
|
457
|
+
real_key = key_part
|
|
458
|
+
is_list_header = False
|
|
459
|
+
if "[" in key_part and key_part.endswith("]"):
|
|
460
|
+
try:
|
|
461
|
+
name_part, rest = key_part.split("[", 1)
|
|
462
|
+
if rest.endswith("]"):
|
|
463
|
+
real_key = name_part.strip()
|
|
464
|
+
is_list_header = True
|
|
465
|
+
except ValueError:
|
|
466
|
+
pass
|
|
467
|
+
|
|
468
|
+
# nested block "key:" -> parse a nested block at indent+1
|
|
469
|
+
nested = parse_block(expected_indent + 1)
|
|
470
|
+
|
|
471
|
+
if is_list_header and nested == {}:
|
|
472
|
+
nested = []
|
|
473
|
+
|
|
474
|
+
result[real_key] = nested
|
|
475
|
+
continue
|
|
476
|
+
else:
|
|
477
|
+
# inline value exists after colon.
|
|
478
|
+
# handle name[N]: v1,v2 (array inline)
|
|
479
|
+
if "[" in key_part and "]" in key_part:
|
|
480
|
+
name = key_part.split("[", 1)[0].strip()
|
|
481
|
+
parts = _split_csv_like(val_part)
|
|
482
|
+
vals = [_parse_primitive_token(p) for p in parts if p != ""]
|
|
483
|
+
result[name] = vals
|
|
484
|
+
continue
|
|
485
|
+
# regular "key: value"
|
|
486
|
+
result[key_part] = _parse_primitive_token(val_part)
|
|
487
|
+
continue
|
|
488
|
+
|
|
489
|
+
if arr_mode is not None:
|
|
490
|
+
return arr_mode
|
|
491
|
+
|
|
492
|
+
# Unwrap anonymous table if it's the only thing
|
|
493
|
+
if len(result) == 1 and "_table" in result:
|
|
494
|
+
return result["_table"]
|
|
495
|
+
|
|
496
|
+
return result
|
|
497
|
+
|
|
498
|
+
# Start parse from top-level indent 0
|
|
499
|
+
idx = 0
|
|
500
|
+
parsed = parse_block(0)
|
|
501
|
+
return parsed
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
# -------------------- Quick demo when run as script --------------------
|
|
505
|
+
if __name__ == "__main__":
|
|
506
|
+
from collections import namedtuple
|
|
507
|
+
|
|
508
|
+
Person = namedtuple("Person", ["id", "name", "role"])
|
|
509
|
+
p1 = Person(1, "Alice", "admin")
|
|
510
|
+
p2 = Person(2, "Bob", "user")
|
|
511
|
+
|
|
512
|
+
example = {
|
|
513
|
+
"context": {"task": "Roundtrip demo", "season": "spring_2025"},
|
|
514
|
+
"friends": ["ana", "luis", "sam"],
|
|
515
|
+
"hikes": [
|
|
516
|
+
{"id": 1, "name": "Blue Lake Trail", "distanceKm": 7.5},
|
|
517
|
+
{"id": 2, "name": "Ridge, Overlook", "distanceKm": 9.2},
|
|
518
|
+
],
|
|
519
|
+
"people": [p1, p2],
|
|
520
|
+
"misc": (1, None, "two"),
|
|
521
|
+
"empty_list": [],
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
s = dumps(example)
|
|
525
|
+
print("=== TOON ===")
|
|
526
|
+
print(s)
|
|
527
|
+
print("=== PARSED BACK ===")
|
|
528
|
+
parsed = loads(s)
|
|
529
|
+
print(parsed)
|
|
File without changes
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from pytoon import toon as toon_mod
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_special_floats():
|
|
6
|
+
obj = {"a": float("nan"), "b": float("inf"), "c": -float("inf")}
|
|
7
|
+
s = toon_mod.dumps(obj)
|
|
8
|
+
parsed = toon_mod.loads(s)
|
|
9
|
+
assert math.isinf(parsed["b"]) and parsed["b"] > 0
|
|
10
|
+
assert math.isinf(parsed["c"]) and parsed["c"] < 0
|
|
11
|
+
assert math.isnan(parsed["a"])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_null_byte_and_control_chars():
|
|
15
|
+
obj = {"x": "null\x00byte", "y": "line1\nline2", "z": "\tindented"}
|
|
16
|
+
s = toon_mod.dumps(obj)
|
|
17
|
+
parsed = toon_mod.loads(s)
|
|
18
|
+
assert parsed["x"] == obj["x"]
|
|
19
|
+
assert parsed["y"] == obj["y"]
|
|
20
|
+
assert parsed["z"] == obj["z"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_deeply_nested():
|
|
24
|
+
depth = 60
|
|
25
|
+
cur = {}
|
|
26
|
+
root = cur
|
|
27
|
+
for i in range(depth):
|
|
28
|
+
new = {f"lvl{i}": {}}
|
|
29
|
+
cur.update(new)
|
|
30
|
+
cur = new[f"lvl{i}"]
|
|
31
|
+
s = toon_mod.dumps(root)
|
|
32
|
+
parsed = toon_mod.loads(s)
|
|
33
|
+
assert isinstance(parsed, dict)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import math
|
|
3
|
+
import pytest
|
|
4
|
+
from pytoon import toon as toon_mod
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
# reuse normalize from existing tests if available
|
|
8
|
+
from tests.test_toon import normalize
|
|
9
|
+
except Exception:
|
|
10
|
+
def normalize(x):
|
|
11
|
+
return x
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_nan(x):
|
|
15
|
+
return isinstance(x, float) and math.isnan(x)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def compare_allow_nan(a, b):
|
|
19
|
+
"""Recursively compare normalized values allowing NaN == NaN."""
|
|
20
|
+
if is_nan(a) and is_nan(b):
|
|
21
|
+
return True
|
|
22
|
+
if type(a) != type(b):
|
|
23
|
+
return False
|
|
24
|
+
if isinstance(a, dict):
|
|
25
|
+
if set(a.keys()) != set(b.keys()):
|
|
26
|
+
return False
|
|
27
|
+
return all(compare_allow_nan(a[k], b[k]) for k in a)
|
|
28
|
+
if isinstance(a, (list, tuple)):
|
|
29
|
+
if len(a) != len(b):
|
|
30
|
+
return False
|
|
31
|
+
# try elementwise comparison first
|
|
32
|
+
if all(compare_allow_nan(x, y) for x, y in zip(a, b)):
|
|
33
|
+
return True
|
|
34
|
+
# fallback: unordered but comparable by string representations (e.g., sets -> lists)
|
|
35
|
+
try:
|
|
36
|
+
return sorted(map(str, a)) == sorted(map(str, b))
|
|
37
|
+
except Exception:
|
|
38
|
+
return False
|
|
39
|
+
if isinstance(a, set):
|
|
40
|
+
try:
|
|
41
|
+
return compare_allow_nan(sorted(a), sorted(b))
|
|
42
|
+
except Exception:
|
|
43
|
+
return sorted(map(str, a)) == sorted(map(str, b))
|
|
44
|
+
return a == b
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
PRNG = random.Random(12345)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def random_primitive(rng: random.Random):
|
|
51
|
+
t = rng.choice(['none', 'bool', 'int', 'float', 'str'])
|
|
52
|
+
if t == 'none':
|
|
53
|
+
return None
|
|
54
|
+
if t == 'bool':
|
|
55
|
+
return rng.choice([True, False])
|
|
56
|
+
if t == 'int':
|
|
57
|
+
# sometimes large ints
|
|
58
|
+
return rng.randint(-10**18, 10**18)
|
|
59
|
+
if t == 'float':
|
|
60
|
+
v = rng.choice([rng.uniform(-1e6, 1e6), float('nan'), float('inf'), -float('inf')])
|
|
61
|
+
return v
|
|
62
|
+
# str
|
|
63
|
+
# include commas, newlines, unicode and control chars sometimes
|
|
64
|
+
s = rng.choice(['simple', 'comma', 'newline', 'unicode', 'nullbyte', 'spaces'])
|
|
65
|
+
if s == 'simple':
|
|
66
|
+
return ''.join(rng.choices('abcdEFG123', k=rng.randint(0, 10)))
|
|
67
|
+
if s == 'comma':
|
|
68
|
+
return 'a,b,c'
|
|
69
|
+
if s == 'newline':
|
|
70
|
+
return 'line1\nline2'
|
|
71
|
+
if s == 'unicode':
|
|
72
|
+
return '你好🙂'
|
|
73
|
+
if s == 'nullbyte':
|
|
74
|
+
return 'null\x00byte'
|
|
75
|
+
return ' padded '
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def random_structure(rng: random.Random, depth=0, max_depth=5):
|
|
79
|
+
if depth >= max_depth:
|
|
80
|
+
return random_primitive(rng)
|
|
81
|
+
choice = rng.choice(['prim', 'list', 'dict', 'tuple', 'set'])
|
|
82
|
+
if choice == 'prim':
|
|
83
|
+
return random_primitive(rng)
|
|
84
|
+
if choice == 'list':
|
|
85
|
+
return [random_structure(rng, depth + 1, max_depth) for _ in range(rng.randint(0, 5))]
|
|
86
|
+
if choice == 'tuple':
|
|
87
|
+
return tuple(random_structure(rng, depth + 1, max_depth) for _ in range(rng.randint(0, 4)))
|
|
88
|
+
if choice == 'set':
|
|
89
|
+
# sets of simple primitives to reduce unhashable items
|
|
90
|
+
s = set()
|
|
91
|
+
for _ in range(rng.randint(0, 4)):
|
|
92
|
+
val = random_primitive(rng)
|
|
93
|
+
try:
|
|
94
|
+
s.add(val)
|
|
95
|
+
except Exception:
|
|
96
|
+
s.add(str(val))
|
|
97
|
+
return s
|
|
98
|
+
# dict
|
|
99
|
+
d = {}
|
|
100
|
+
for i in range(rng.randint(0, 5)):
|
|
101
|
+
k = rng.choice(['k', 'key', 'n', 'id']) + str(rng.randint(0, 1000))
|
|
102
|
+
d[k] = random_structure(rng, depth + 1, max_depth)
|
|
103
|
+
return d
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@pytest.mark.skip(reason="Fuzz test - run manually; exposes edge cases for further fixes")
|
|
107
|
+
def test_fuzz_roundtrip_random():
|
|
108
|
+
failures = []
|
|
109
|
+
for i in range(300):
|
|
110
|
+
obj = random_structure(PRNG, 0, max_depth=5)
|
|
111
|
+
s = toon_mod.dumps(obj)
|
|
112
|
+
parsed = toon_mod.loads(s)
|
|
113
|
+
n1 = normalize(obj)
|
|
114
|
+
n2 = normalize(parsed)
|
|
115
|
+
if not compare_allow_nan(n1, n2):
|
|
116
|
+
failures.append((obj, s, parsed, n1, n2))
|
|
117
|
+
break
|
|
118
|
+
assert not failures, f"Fuzz found mismatch (first shown): {failures[0]}"
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# test_toon.py
|
|
2
|
+
import pytest
|
|
3
|
+
from collections import namedtuple
|
|
4
|
+
from dataclasses import dataclass, is_dataclass
|
|
5
|
+
import inspect
|
|
6
|
+
|
|
7
|
+
# Import your toon module
|
|
8
|
+
from pytoon import toon as toon_mod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# -----------------------------
|
|
12
|
+
# Helper normalization for comparisons
|
|
13
|
+
# -----------------------------
|
|
14
|
+
def _is_namedtuple_instance(x):
|
|
15
|
+
return isinstance(x, tuple) and hasattr(x, "_fields")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def normalize(obj):
|
|
19
|
+
"""
|
|
20
|
+
Convert Python object into a canonical comparable form.
|
|
21
|
+
Ensures round-trip comparison works for dataclasses, namedtuples, sets, tuples, custom objects.
|
|
22
|
+
"""
|
|
23
|
+
if obj is None or isinstance(obj, (bool, int, float, str)):
|
|
24
|
+
return obj
|
|
25
|
+
|
|
26
|
+
# dataclass → dict
|
|
27
|
+
try:
|
|
28
|
+
from dataclasses import asdict
|
|
29
|
+
if is_dataclass(obj):
|
|
30
|
+
return normalize(asdict(obj))
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
# namedtuple → dict
|
|
35
|
+
if _is_namedtuple_instance(obj):
|
|
36
|
+
return normalize(obj._asdict())
|
|
37
|
+
|
|
38
|
+
# dict
|
|
39
|
+
if isinstance(obj, dict):
|
|
40
|
+
return {k: normalize(v) for k, v in obj.items()}
|
|
41
|
+
|
|
42
|
+
# list or tuple
|
|
43
|
+
if isinstance(obj, (list, tuple)):
|
|
44
|
+
return [normalize(v) for v in obj]
|
|
45
|
+
|
|
46
|
+
# set → sorted list
|
|
47
|
+
if isinstance(obj, set):
|
|
48
|
+
try:
|
|
49
|
+
return sorted(normalize(v) for v in obj)
|
|
50
|
+
except Exception:
|
|
51
|
+
return sorted(str(normalize(v)) for v in obj)
|
|
52
|
+
|
|
53
|
+
# custom object → public attributes dict
|
|
54
|
+
if hasattr(obj, "__dict__"):
|
|
55
|
+
public = {k: v for k, v in vars(obj).items()
|
|
56
|
+
if not k.startswith("_") and not inspect.isroutine(v)}
|
|
57
|
+
return normalize(public)
|
|
58
|
+
|
|
59
|
+
# fallback
|
|
60
|
+
return str(obj)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# -----------------------------
|
|
64
|
+
# Fixtures
|
|
65
|
+
# -----------------------------
|
|
66
|
+
Item = namedtuple("Item", ["id", "label"])
|
|
67
|
+
i1 = Item(1, "A")
|
|
68
|
+
i2 = Item(2, "B")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class Book:
|
|
73
|
+
title: str
|
|
74
|
+
pages: int
|
|
75
|
+
tags: list
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Device:
|
|
79
|
+
def __init__(self, model, version, status):
|
|
80
|
+
self.model = model
|
|
81
|
+
self.version = version
|
|
82
|
+
self.status = status
|
|
83
|
+
self._internal = "hidden"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Test objects
|
|
87
|
+
PYTHON_OBJECTS = [
|
|
88
|
+
|
|
89
|
+
# 1. Primitives
|
|
90
|
+
({"a": 1, "b": 2.2, "c": True, "d": None, "e": "hello"}, "primitives"),
|
|
91
|
+
|
|
92
|
+
# 2. Strings needing quoting
|
|
93
|
+
({"s1": "hello,world", "s2": "line\nbreak", "s3": " padded", "s4": "null"}, "quoted_strings"),
|
|
94
|
+
|
|
95
|
+
# 3. Nested object
|
|
96
|
+
({
|
|
97
|
+
"user": {
|
|
98
|
+
"id": 123,
|
|
99
|
+
"config": {"x": 10, "y": 20, "z": {"enabled": True}}
|
|
100
|
+
},
|
|
101
|
+
"roles": ["a", "b", "c"]
|
|
102
|
+
}, "nested"),
|
|
103
|
+
|
|
104
|
+
# 4. Uniform table
|
|
105
|
+
({
|
|
106
|
+
"items": [
|
|
107
|
+
{"a": 1, "b": 2},
|
|
108
|
+
{"a": 3, "b": 4},
|
|
109
|
+
]
|
|
110
|
+
}, "uniform_table"),
|
|
111
|
+
|
|
112
|
+
# 5. Non-uniform list
|
|
113
|
+
({
|
|
114
|
+
"mixed": [
|
|
115
|
+
{"x": 1},
|
|
116
|
+
{"x": 2, "y": True},
|
|
117
|
+
100,
|
|
118
|
+
"test"
|
|
119
|
+
]
|
|
120
|
+
}, "non_uniform_list"),
|
|
121
|
+
|
|
122
|
+
# 6. Tuples & Sets
|
|
123
|
+
({
|
|
124
|
+
"tuple": (1, 2, 3),
|
|
125
|
+
"set": {"x", "y", "z"}
|
|
126
|
+
}, "tuple_set"),
|
|
127
|
+
|
|
128
|
+
# 7. Dataclass
|
|
129
|
+
(Book("Sample", 100, ["t1", "t2"]), "dataclass"),
|
|
130
|
+
|
|
131
|
+
# 8. Namedtuple list
|
|
132
|
+
([i1, i2], "namedtuple_list"),
|
|
133
|
+
|
|
134
|
+
# 9. Custom object
|
|
135
|
+
(Device("X100", "v1.0", "active"), "custom_object"),
|
|
136
|
+
|
|
137
|
+
# 10. Deeply nested data
|
|
138
|
+
({
|
|
139
|
+
"system": {
|
|
140
|
+
"config": {
|
|
141
|
+
"levels": {
|
|
142
|
+
"one": {"a": 1},
|
|
143
|
+
"two": {"b": 2},
|
|
144
|
+
"three": {"c": 3},
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
"modes": ["on", "off"]
|
|
148
|
+
}
|
|
149
|
+
}, "deep_nested"),
|
|
150
|
+
|
|
151
|
+
# 11. Mixed complex list
|
|
152
|
+
([
|
|
153
|
+
{"k": "v"},
|
|
154
|
+
[10, 20],
|
|
155
|
+
{"coords": (5, 6)},
|
|
156
|
+
None,
|
|
157
|
+
True,
|
|
158
|
+
{"nested": [{"m": 1}, {"n": 2}]}
|
|
159
|
+
], "complex_list"),
|
|
160
|
+
|
|
161
|
+
# 12. Unicode
|
|
162
|
+
({
|
|
163
|
+
"u1": "こんにちは",
|
|
164
|
+
"u2": "你好",
|
|
165
|
+
"u3": "🙂🔥"
|
|
166
|
+
}, "unicode"),
|
|
167
|
+
|
|
168
|
+
# 13. Multiline text
|
|
169
|
+
({
|
|
170
|
+
"note": "line1\nline2\nline3"
|
|
171
|
+
}, "multiline"),
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# -----------------------------
|
|
176
|
+
# Round-trip tests: loads(dumps(obj)) == normalize(obj)
|
|
177
|
+
# -----------------------------
|
|
178
|
+
@pytest.mark.parametrize("obj,label", PYTHON_OBJECTS)
|
|
179
|
+
def test_roundtrip(obj, label):
|
|
180
|
+
s = toon_mod.dumps(obj)
|
|
181
|
+
parsed = toon_mod.loads(s)
|
|
182
|
+
assert normalize(parsed) == normalize(obj), (
|
|
183
|
+
f"Roundtrip mismatch for {label}\n"
|
|
184
|
+
f"TOON:\n{s}\n"
|
|
185
|
+
f"Parsed: {parsed}\n"
|
|
186
|
+
f"Expected normalized: {normalize(obj)}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# -----------------------------
|
|
191
|
+
# Hand-made TOON loads tests
|
|
192
|
+
# -----------------------------
|
|
193
|
+
@pytest.mark.parametrize("toon_str,expected", [
|
|
194
|
+
|
|
195
|
+
# Simple
|
|
196
|
+
("a: 10\nb: true\n", {"a": 10, "b": True}),
|
|
197
|
+
|
|
198
|
+
# Nested
|
|
199
|
+
("root:\n x: 1\n y:\n z: 2\n", {"root": {"x": 1, "y": {"z": 2}}}),
|
|
200
|
+
|
|
201
|
+
# Primitive list
|
|
202
|
+
("nums[3]: 1,2,3\n", {"nums": [1, 2, 3]}),
|
|
203
|
+
|
|
204
|
+
# Table
|
|
205
|
+
("tbl[2]{id,val}:\n 1,A\n 2,B\n",
|
|
206
|
+
{"tbl": [{"id": 1, "val": "A"}, {"id": 2, "val": "B"}]}),
|
|
207
|
+
|
|
208
|
+
# Mixed list
|
|
209
|
+
("""items[4]:
|
|
210
|
+
- 10
|
|
211
|
+
- test
|
|
212
|
+
-
|
|
213
|
+
x: 1
|
|
214
|
+
- false
|
|
215
|
+
""",
|
|
216
|
+
{"items": [10, "test", {"x": 1}, False]}),
|
|
217
|
+
|
|
218
|
+
# Unicode
|
|
219
|
+
("greet:\n hi: \"你好\"\n", {"greet": {"hi": "你好"}}),
|
|
220
|
+
|
|
221
|
+
# Quoted
|
|
222
|
+
("strs:\n a: \"hello,world\"\n b: \"line1\\nline2\"\n",
|
|
223
|
+
{"strs": {"a": "hello,world", "b": "line1\nline2"}}),
|
|
224
|
+
|
|
225
|
+
])
|
|
226
|
+
def test_loads_examples(toon_str, expected):
|
|
227
|
+
parsed = toon_mod.loads(toon_str)
|
|
228
|
+
assert normalize(parsed) == normalize(expected)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# -----------------------------
|
|
232
|
+
# Edge cases
|
|
233
|
+
# -----------------------------
|
|
234
|
+
def test_empty_structs():
|
|
235
|
+
obj = {"empty_list": [], "empty_dict": {}, "nested": {"x": []}}
|
|
236
|
+
s = toon_mod.dumps(obj)
|
|
237
|
+
parsed = toon_mod.loads(s)
|
|
238
|
+
assert normalize(parsed) == normalize(obj)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_table_with_commas_in_values():
|
|
242
|
+
obj = {
|
|
243
|
+
"records": [
|
|
244
|
+
{"id": 1, "name": "A,B"},
|
|
245
|
+
{"id": 2, "name": "C,D"},
|
|
246
|
+
]
|
|
247
|
+
}
|
|
248
|
+
s = toon_mod.dumps(obj)
|
|
249
|
+
parsed = toon_mod.loads(s)
|
|
250
|
+
assert normalize(parsed) == normalize(obj)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def test_custom_object_ignores_private_attrs():
|
|
254
|
+
d = Device("M1", "v2", "ready")
|
|
255
|
+
s = toon_mod.dumps(d)
|
|
256
|
+
parsed = toon_mod.loads(s)
|
|
257
|
+
np = normalize(parsed)
|
|
258
|
+
|
|
259
|
+
assert "model" in np
|
|
260
|
+
assert "version" in np
|
|
261
|
+
assert "status" in np
|
|
262
|
+
|
|
263
|
+
assert "_internal" not in np
|
|
264
|
+
assert "internal" not in np
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def test_dumps_produces_string():
|
|
268
|
+
s = toon_mod.dumps({"a": 1})
|
|
269
|
+
assert isinstance(s, str) and len(s) > 0
|