python-toon-parser 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Akash Wankhede
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include tests *.py
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: python-toon-parser
3
+ Version: 0.1.1
4
+ Summary: TOON (Token-Oriented Object Notation) serializer & parser for Python
5
+ Author-email: Akash Wankhede <akash.wankhede.pune@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/akash1551/pytoon
8
+ Project-URL: Source, https://github.com/akash1551/pytoon
9
+ Project-URL: Issues, https://github.com/akash1551/pytoon/issues
10
+ Keywords: toon,serialization,parser,format,llm,data
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Dynamic: license-file
23
+
24
+ # python-toon-parser
25
+
26
+ ![Build Status](https://img.shields.io/badge/build-passing-brightgreen)
27
+ ![Version](https://img.shields.io/badge/pypi-v0.1.0-blue)
28
+ ![License](https://img.shields.io/badge/license-MIT-green)
29
+
30
+ `python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install python-toon-parser
36
+ ```
37
+
38
+ ## Features
39
+
40
+ - **Human-Readable**: Minimal syntax, similar to YAML but distinct.
41
+ - **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
42
+ - **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
43
+ - **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
44
+
45
+ ## Quickstart
46
+
47
+ ```python
48
+ from pytoon import dumps, loads
49
+
50
+ data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
51
+
52
+ # Serialize
53
+ s = dumps(data)
54
+ print(s)
55
+
56
+ # Parse back
57
+ obj = loads(s)
58
+ print(obj)
59
+ ```
60
+
61
+ **Output:**
62
+ ```yaml
63
+ items[2]{id,name}:
64
+ 1,A
65
+ 2,B
66
+ ```
67
+
68
+ ## API Reference
69
+
70
+ ### `dumps(obj, name=None, indent=0) -> str`
71
+ Serializes a Python object to a TOON string.
72
+ - `obj`: The object to serialize.
73
+ - `name`: (Optional) Root key name for the object.
74
+ - `indent`: (Optional) Starting indentation level (default 0).
75
+
76
+ ### `loads(toon_str) -> Any`
77
+ Parses a TOON string back into Python objects.
78
+ - Returns `dict`, `list`, or primitive depending on the input.
@@ -0,0 +1,55 @@
1
+ # python-toon-parser
2
+
3
+ ![Build Status](https://img.shields.io/badge/build-passing-brightgreen)
4
+ ![Version](https://img.shields.io/badge/pypi-v0.1.0-blue)
5
+ ![License](https://img.shields.io/badge/license-MIT-green)
6
+
7
+ `python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install python-toon-parser
13
+ ```
14
+
15
+ ## Features
16
+
17
+ - **Human-Readable**: Minimal syntax, similar to YAML but distinct.
18
+ - **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
19
+ - **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
20
+ - **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
21
+
22
+ ## Quickstart
23
+
24
+ ```python
25
+ from pytoon import dumps, loads
26
+
27
+ data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
28
+
29
+ # Serialize
30
+ s = dumps(data)
31
+ print(s)
32
+
33
+ # Parse back
34
+ obj = loads(s)
35
+ print(obj)
36
+ ```
37
+
38
+ **Output:**
39
+ ```yaml
40
+ items[2]{id,name}:
41
+ 1,A
42
+ 2,B
43
+ ```
44
+
45
+ ## API Reference
46
+
47
+ ### `dumps(obj, name=None, indent=0) -> str`
48
+ Serializes a Python object to a TOON string.
49
+ - `obj`: The object to serialize.
50
+ - `name`: (Optional) Root key name for the object.
51
+ - `indent`: (Optional) Starting indentation level (default 0).
52
+
53
+ ### `loads(toon_str) -> Any`
54
+ Parses a TOON string back into Python objects.
55
+ - Returns `dict`, `list`, or primitive depending on the input.
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "python-toon-parser"
7
+ version = "0.1.1"
8
+ description = "TOON (Token-Oriented Object Notation) serializer & parser for Python"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+
13
+ authors = [
14
+ { name="Akash Wankhede", email="akash.wankhede.pune@gmail.com" }
15
+ ]
16
+
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Topic :: Software Development :: Libraries",
25
+ "License :: OSI Approved :: MIT License",
26
+ ]
27
+
28
+ keywords = ["toon", "serialization", "parser", "format", "llm", "data"]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/akash1551/pytoon"
32
+ Source = "https://github.com/akash1551/pytoon"
33
+ Issues = "https://github.com/akash1551/pytoon/issues"
@@ -0,0 +1,78 @@
1
+ Metadata-Version: 2.4
2
+ Name: python-toon-parser
3
+ Version: 0.1.1
4
+ Summary: TOON (Token-Oriented Object Notation) serializer & parser for Python
5
+ Author-email: Akash Wankhede <akash.wankhede.pune@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/akash1551/pytoon
8
+ Project-URL: Source, https://github.com/akash1551/pytoon
9
+ Project-URL: Issues, https://github.com/akash1551/pytoon/issues
10
+ Keywords: toon,serialization,parser,format,llm,data
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Dynamic: license-file
23
+
24
+ # python-toon-parser
25
+
26
+ ![Build Status](https://img.shields.io/badge/build-passing-brightgreen)
27
+ ![Version](https://img.shields.io/badge/pypi-v0.1.0-blue)
28
+ ![License](https://img.shields.io/badge/license-MIT-green)
29
+
30
+ `python-toon-parser` — **TOON** (**T**oken-**O**riented **O**bject **N**otation) serializer and parser for Python.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install python-toon-parser
36
+ ```
37
+
38
+ ## Features
39
+
40
+ - **Human-Readable**: Minimal syntax, similar to YAML but distinct.
41
+ - **Round-Trip**: `dumps(obj)` -> `loads(text)` preserves structure.
42
+ - **Compact Tables**: Automatically detects lists of uniform objects and formats them as compact tables.
43
+ - **Broad Support**: Handles `dict`, `list`, `tuple`, `set`, `dataclasses`, `namedtuples`, and simple objects.
44
+
45
+ ## Quickstart
46
+
47
+ ```python
48
+ from pytoon import dumps, loads
49
+
50
+ data = {"items": [{"id":1,"name":"A"}, {"id":2,"name":"B"}]}
51
+
52
+ # Serialize
53
+ s = dumps(data)
54
+ print(s)
55
+
56
+ # Parse back
57
+ obj = loads(s)
58
+ print(obj)
59
+ ```
60
+
61
+ **Output:**
62
+ ```yaml
63
+ items[2]{id,name}:
64
+ 1,A
65
+ 2,B
66
+ ```
67
+
68
+ ## API Reference
69
+
70
+ ### `dumps(obj, name=None, indent=0) -> str`
71
+ Serializes a Python object to a TOON string.
72
+ - `obj`: The object to serialize.
73
+ - `name`: (Optional) Root key name for the object.
74
+ - `indent`: (Optional) Starting indentation level (default 0).
75
+
76
+ ### `loads(toon_str) -> Any`
77
+ Parses a TOON string back into Python objects.
78
+ - Returns `dict`, `list`, or primitive depending on the input.
@@ -0,0 +1,16 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ setup.py
6
+ python_toon_parser.egg-info/PKG-INFO
7
+ python_toon_parser.egg-info/SOURCES.txt
8
+ python_toon_parser.egg-info/dependency_links.txt
9
+ python_toon_parser.egg-info/top_level.txt
10
+ pytoon/__init__.py
11
+ pytoon/__version__.py
12
+ pytoon/toon.py
13
+ tests/__init__.py
14
+ tests/test_edgecases.py
15
+ tests/test_fuzz.py
16
+ tests/test_toon.py
@@ -0,0 +1,14 @@
1
+ """
2
+ pytoon package - TOON (Token-Oriented Object Notation) for Python.
3
+ Expose dumps / loads API and package version.
4
+ """
5
+
6
+ from .toon import dumps, loads
7
+
8
+ try:
9
+ # populated by setuptools_scm at build time
10
+ from __version__ import __version__
11
+ except Exception:
12
+ __version__ = "0.0.0"
13
+
14
+ __all__ = ["dumps", "loads", "__version__"]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.1"
@@ -0,0 +1,529 @@
1
+ """
2
+ toon.py
3
+
4
+ A self-contained, practical Python implementation of TOON (Token-Oriented Object Notation)
5
+ with a JSON-like API: `dumps(obj)` and `loads(toon_str)`.
6
+
7
+ - dumps(obj) -> str : Serialize common Python objects into TOON.
8
+ - loads(toon_str) -> Any : Parse TOON produced by dumps back into Python types.
9
+
10
+ This is a best-effort, usable implementation focused on round-tripping dumps -> loads for
11
+ typical Python objects: primitives, dicts, lists/tuples/sets, dataclasses/namedtuples and
12
+ uniform-table arrays. It is not a complete spec implementation, but is practical and easy
13
+ to extend.
14
+
15
+ Usage:
16
+ from toon import dumps, loads
17
+ s = dumps(my_obj)
18
+ obj = loads(s)
19
+ """
20
+
21
+ from dataclasses import is_dataclass, asdict
22
+ import json
23
+ import inspect
24
+ from typing import Any, List, Tuple, Dict
25
+
26
+ INDENT_STR = " " # two spaces per level (modifiable if desired)
27
+
28
+
29
+ # -------------------- Helpers for serialization --------------------
30
+
31
+
32
+ def _is_primitive(x):
33
+ return x is None or isinstance(x, (bool, int, float, str))
34
+
35
+
36
+ def _to_toon_primitive(x):
37
+ """Return a TOON-safe string for a primitive."""
38
+ if x is None:
39
+ return "null"
40
+ if isinstance(x, bool):
41
+ return "true" if x else "false"
42
+ if isinstance(x, (int, float)):
43
+ return str(x)
44
+ s = str(x)
45
+ if s == "":
46
+ return '""'
47
+ needs_quote = (
48
+ any(ch in s for ch in [",", "\n", "\r"])
49
+ or s[0].isspace()
50
+ or s[-1].isspace()
51
+ or s.startswith(('"', "'"))
52
+ or s.lower() in ("null", "true", "false")
53
+ )
54
+ if needs_quote:
55
+ return json.dumps(s, ensure_ascii=False)
56
+ return s
57
+
58
+
59
+ def _escape_header_key(k: str) -> str:
60
+ """Make a key safe for header usage; quote if it's not a simple identifier."""
61
+ if isinstance(k, str) and k.isidentifier():
62
+ return k
63
+ return json.dumps(str(k), ensure_ascii=False)
64
+
65
+
66
+ def _is_namedtuple_instance(x):
67
+ return isinstance(x, tuple) and hasattr(x, "_fields")
68
+
69
+
70
+ def _object_to_dict(obj):
71
+ """Convert dataclass/namedtuple/object to dict for serialization."""
72
+ if is_dataclass(obj):
73
+ return asdict(obj)
74
+ if _is_namedtuple_instance(obj):
75
+ return obj._asdict()
76
+ if hasattr(obj, "__dict__"):
77
+ return {
78
+ k: v
79
+ for k, v in vars(obj).items()
80
+ if not k.startswith("_") and not inspect.isroutine(v)
81
+ }
82
+ # fallback
83
+ return {"value": repr(obj)}
84
+
85
+
86
+ def _all_dicts_uniform(list_of_dicts: List[dict]) -> Tuple[bool, List[str] or None]:
87
+ """
88
+ Return (is_uniform, keys_order).
89
+ Uniform means every element is a dict and they all have the same set of keys.
90
+ If insertion order is consistent across elements, return that order; otherwise return sorted keys.
91
+ """
92
+ if not list_of_dicts:
93
+ return False, None
94
+ if not all(isinstance(item, dict) for item in list_of_dicts):
95
+ return False, None
96
+ sets = [set(d.keys()) for d in list_of_dicts]
97
+ first_set = sets[0]
98
+ if all(s == first_set for s in sets):
99
+ first_keys = list(list_of_dicts[0].keys())
100
+ if all(tuple(d.keys()) == tuple(first_keys) for d in list_of_dicts):
101
+ return True, first_keys
102
+ return True, sorted(first_set)
103
+ return False, None
104
+
105
+
106
+ # -------------------- Serialization: dumps --------------------
107
+
108
+
109
+ def dumps(obj: Any, name: str = None, indent: int = 0) -> str:
110
+ """
111
+ Serialize Python object into TOON. `name` is optional and produces a top-level key.
112
+ indent counts indentation levels (0 == top).
113
+ """
114
+ pad = INDENT_STR * indent
115
+
116
+ # primitives
117
+ if _is_primitive(obj):
118
+ val = _to_toon_primitive(obj)
119
+ if name:
120
+ return f"{pad}{name}: {val}\n"
121
+ return f"{pad}{val}\n"
122
+
123
+ # dataclass / namedtuple => dict
124
+ if _is_namedtuple_instance(obj) or is_dataclass(obj):
125
+ obj = _object_to_dict(obj)
126
+
127
+ # dict
128
+ if isinstance(obj, dict):
129
+ lines = []
130
+ if name:
131
+ lines.append(f"{pad}{name}:")
132
+ child_pad = INDENT_STR * (indent + 1)
133
+ else:
134
+ child_pad = pad
135
+ # explicit empty dict representation: return a '{}' line so parser can detect it
136
+ if not obj:
137
+ if name:
138
+ return f"{pad}{name}:\n{child_pad}{{}}\n"
139
+ return f"{pad}{{}}\n"
140
+ for k, v in obj.items():
141
+ key = _escape_header_key(k)
142
+ if _is_primitive(v):
143
+ lines.append(f"{child_pad}{key}: {_to_toon_primitive(v)}")
144
+ else:
145
+ lines.append(f"{child_pad}{key}:")
146
+ # use one-level deeper indentation for nested content
147
+ lines.append(dumps(v, name=None, indent=indent + 1).rstrip("\n"))
148
+ return "\n".join(lines) + ("\n" if lines else "")
149
+
150
+ # list / tuple / set
151
+ if isinstance(obj, (list, tuple, set)):
152
+ if isinstance(obj, set):
153
+ try:
154
+ lst = sorted(list(obj))
155
+ except Exception:
156
+ lst = list(obj)
157
+ else:
158
+ lst = list(obj)
159
+ n = len(lst)
160
+ if n == 0:
161
+ return f"{pad}{name}[0]:\n" if name else f"{pad}[]\n"
162
+
163
+ uniform, keys = _all_dicts_uniform(lst)
164
+ # compact table if uniform dicts and all primitive values
165
+ if (
166
+ uniform
167
+ and keys
168
+ and all(
169
+ _is_primitive(v)
170
+ for d in lst
171
+ for v in (d.values() if isinstance(d, dict) else [])
172
+ )
173
+ ):
174
+ keys_escaped = ",".join(_escape_header_key(k) for k in keys)
175
+ header = (
176
+ f"{pad}{name}[{n}]{{{keys_escaped}}}:"
177
+ if name
178
+ else f"{pad}[{n}]{{{keys_escaped}}}:"
179
+ )
180
+ lines = [header]
181
+ for d in lst:
182
+ row = ",".join(_to_toon_primitive(d.get(k)) for k in keys)
183
+ lines.append(f"{pad}{INDENT_STR}{row}")
184
+ return "\n".join(lines) + "\n"
185
+
186
+ # all primitives => single-line comma list (if named) or a simple "- ..." block
187
+ if all(_is_primitive(x) for x in lst):
188
+ vals = ",".join(_to_toon_primitive(x) for x in lst)
189
+ if name:
190
+ return f"{pad}{name}[{n}]: {vals}\n"
191
+ # unnamed list of primitives as a single "- ..." line (useful for readability)
192
+ return f"{pad}- " + ", ".join(_to_toon_primitive(x) for x in lst) + "\n"
193
+
194
+ # mixed/complex items => list block with '-' markers; nested items are indented one level deeper
195
+ lines = []
196
+ if name:
197
+ lines.append(f"{pad}{name}[{n}]:")
198
+ item_indent_str = pad + INDENT_STR
199
+ nested_indent = indent + 2
200
+ else:
201
+ item_indent_str = pad
202
+ nested_indent = indent + 1
203
+
204
+ for item in lst:
205
+ if _is_primitive(item):
206
+ lines.append(f"{item_indent_str}- {_to_toon_primitive(item)}")
207
+ else:
208
+ # migrate dataclass/namedtuple -> dict first
209
+ if _is_namedtuple_instance(item) or is_dataclass(item):
210
+ item = _object_to_dict(item)
211
+ # list item header
212
+ lines.append(f"{item_indent_str}-")
213
+ # nested item content at indent+2 (one level deeper than the '-' line)
214
+ lines.append(dumps(item, name=None, indent=nested_indent).rstrip("\n"))
215
+ return "\n".join(lines) + "\n"
216
+
217
+ # fallback for objects: try to convert to dict and include class name
218
+ try:
219
+ obj_dict = _object_to_dict(obj)
220
+ return dumps(obj_dict, name=name, indent=indent)
221
+ except Exception:
222
+ val = _to_toon_primitive(repr(obj))
223
+ if name:
224
+ return f"{pad}{name}: {val}\n"
225
+ return f"{pad}{val}\n"
226
+
227
+
228
+ # -------------------- Parsing: loads --------------------
229
+
230
+
231
+ def _parse_primitive_token(tok: str):
232
+ tok = tok.strip()
233
+ if tok == "":
234
+ return ""
235
+ if tok == "null":
236
+ return None
237
+ if tok == "true":
238
+ return True
239
+ if tok == "false":
240
+ return False
241
+ # JSON quoted string
242
+ if (tok.startswith('"') and tok.endswith('"')) or (
243
+ tok.startswith("'") and tok.endswith("'")
244
+ ):
245
+ try:
246
+ return json.loads(tok)
247
+ except Exception:
248
+ return tok[1:-1]
249
+ # try int then float (float handles nan/inf and exponent forms)
250
+ try:
251
+ # attempt exact int parsing first
252
+ try:
253
+ return int(tok)
254
+ except Exception:
255
+ # fall back to float parsing (accepts 'nan', 'inf', '1e3', etc.)
256
+ return float(tok)
257
+ except Exception:
258
+ return tok
259
+
260
+
261
+ def _count_leading_indent(s: str) -> int:
262
+ """Count how many INDENT_STR are at the start of s."""
263
+ count = 0
264
+ while s.startswith(INDENT_STR):
265
+ count += 1
266
+ s = s[len(INDENT_STR) :]
267
+ return count
268
+
269
+
270
+ def _split_csv_like(s: str) -> List[str]:
271
+ """
272
+ Split comma-separated tokens but respect quoted substrings.
273
+ Returns list of tokens (whitespace preserved trimmed).
274
+ """
275
+ parts = []
276
+ cur = ""
277
+ in_q = False
278
+ qchar = None
279
+ i = 0
280
+ while i < len(s):
281
+ ch = s[i]
282
+ if ch in ('"', "'"):
283
+ if not in_q:
284
+ in_q = True
285
+ qchar = ch
286
+ cur += ch
287
+ elif qchar == ch:
288
+ in_q = False
289
+ cur += ch
290
+ else:
291
+ cur += ch
292
+ elif ch == "," and not in_q:
293
+ parts.append(cur.strip())
294
+ cur = ""
295
+ else:
296
+ cur += ch
297
+ i += 1
298
+ if cur.strip() != "":
299
+ parts.append(cur.strip())
300
+ return parts
301
+
302
+
303
+ def loads(toon_str: str) -> Any:
304
+ """
305
+ Parse a TOON string (created by dumps) back into Python objects (dict/list/primitives).
306
+ This parser is intentionally aligned to the dumps() format above for reliable round-trips.
307
+ """
308
+ # Split and filter out blank lines but preserve structural indentation
309
+ raw_lines = toon_str.splitlines()
310
+ lines = []
311
+ for ln in raw_lines:
312
+ # keep lines that are not empty after stripping spaces (but preserve indentation)
313
+ if ln.strip() == "":
314
+ continue
315
+ lines.append(ln.rstrip("\n"))
316
+
317
+ # Preprocess into (indent_level, content) tuples
318
+ processed: List[Tuple[int, str]] = []
319
+ for ln in lines:
320
+ indent = _count_leading_indent(ln)
321
+ content = ln[indent * len(INDENT_STR) :]
322
+ processed.append((indent, content))
323
+
324
+ idx = 0
325
+ N = len(processed)
326
+
327
+ def parse_block(expected_indent: int):
328
+ nonlocal idx
329
+ result: Dict[str, Any] = {}
330
+ arr_mode = None # if we encounter '-' list items, we build a list
331
+
332
+ while idx < N:
333
+ indent, content = processed[idx]
334
+ if indent < expected_indent:
335
+ break
336
+ if indent > expected_indent:
337
+ # Deeper indentation than expected: this should be consumed by caller.
338
+ break
339
+
340
+ # Handle explicit empty list "[]"
341
+ if content.strip() == "[]":
342
+ idx += 1
343
+ if arr_mode is None and not result:
344
+ return []
345
+ continue
346
+ # Handle explicit empty dict "{}"
347
+ if content.strip() == "{}":
348
+ idx += 1
349
+ if arr_mode is None and not result:
350
+ return {}
351
+ continue
352
+
353
+ # Handle list item lines: "- value" or "-" (then nested)
354
+ if content.startswith("- ") or content == "-":
355
+ if arr_mode is None:
356
+ arr_mode = []
357
+ # consume the line
358
+ idx += 1
359
+ if content == "-":
360
+ # nested block follows with indent > current indent
361
+ if idx < N and processed[idx][0] > indent:
362
+ item = parse_block(indent + 1)
363
+ arr_mode.append(item)
364
+ else:
365
+ arr_mode.append(None)
366
+ else:
367
+ val_tok = content[2:].strip()
368
+ # if comma-separated tokens (e.g. "- a, b, c") we append each as separate primitives
369
+ if "," in val_tok and not (
370
+ val_tok.startswith('"') or val_tok.startswith("'")
371
+ ):
372
+ parts = _split_csv_like(val_tok)
373
+ for p in parts:
374
+ arr_mode.append(_parse_primitive_token(p))
375
+ else:
376
+ arr_mode.append(_parse_primitive_token(val_tok))
377
+ continue
378
+
379
+ # otherwise handle "key: value" or "key:" or "name[N]{...}:" or "name[N]: v1,v2" forms
380
+ # find colon not inside quotes
381
+ colon_pos = None
382
+ in_q = False
383
+ qch = None
384
+ for i, ch in enumerate(content):
385
+ if ch in ('"', "'"):
386
+ if not in_q:
387
+ in_q = True
388
+ qch = ch
389
+ elif qch == ch:
390
+ in_q = False
391
+ if ch == ":" and not in_q:
392
+ colon_pos = i
393
+ break
394
+
395
+ if colon_pos is None:
396
+ # No key: treat as a primitive-only line (top-level primitive or inline primitive)
397
+ val = _parse_primitive_token(content)
398
+ idx += 1
399
+ if arr_mode is None and not result:
400
+ return val
401
+ if arr_mode is not None:
402
+ arr_mode.append(val)
403
+ continue
404
+ # otherwise skip stray primitive
405
+ continue
406
+
407
+ key_part = content[:colon_pos].strip()
408
+ val_part = content[colon_pos + 1 :].strip()
409
+ idx += 1
410
+
411
+ # If val_part is empty -> either table header like name[N]{...}: OR nested block key:
412
+ if val_part == "":
413
+ # Table-style header?
414
+ if (
415
+ "[" in key_part
416
+ and "]" in key_part
417
+ and "{" in key_part
418
+ and "}" in key_part
419
+ ):
420
+ # parse header: name[NN]{k1,k2} (name may be empty if not provided)
421
+ name_section = key_part.split("[", 1)[0].strip()
422
+ # extract keys inside {}
423
+ try:
424
+ br_start = key_part.index("[")
425
+ br_end = key_part.index("]", br_start)
426
+ keys_start = key_part.index("{", br_end)
427
+ keys_end = key_part.index("}", keys_start)
428
+ n_section = key_part[br_start + 1 : br_end]
429
+ keys_str = key_part[keys_start + 1 : keys_end]
430
+ keys = [
431
+ k.strip().strip('"').strip("'")
432
+ for k in keys_str.split(",")
433
+ if k.strip() != ""
434
+ ]
435
+ except Exception:
436
+ keys = []
437
+ # collect rows at indent == expected_indent + 1
438
+ rows = []
439
+ while idx < N and processed[idx][0] == expected_indent + 1:
440
+ _, row_content = processed[idx]
441
+ parts = _split_csv_like(row_content.strip())
442
+ row = {}
443
+ for k, vtok in zip(keys, parts):
444
+ row[k] = _parse_primitive_token(vtok)
445
+ rows.append(row)
446
+ idx += 1
447
+ if name_section == "":
448
+ # anonymous table -> return as list? put under special key
449
+ # but to be consistent, put under "_table" with rows
450
+ result_key = "_table"
451
+ else:
452
+ result_key = name_section
453
+ result[result_key] = rows
454
+ continue
455
+ else:
456
+ # Check for list header: name[N]
457
+ real_key = key_part
458
+ is_list_header = False
459
+ if "[" in key_part and key_part.endswith("]"):
460
+ try:
461
+ name_part, rest = key_part.split("[", 1)
462
+ if rest.endswith("]"):
463
+ real_key = name_part.strip()
464
+ is_list_header = True
465
+ except ValueError:
466
+ pass
467
+
468
+ # nested block "key:" -> parse a nested block at indent+1
469
+ nested = parse_block(expected_indent + 1)
470
+
471
+ if is_list_header and nested == {}:
472
+ nested = []
473
+
474
+ result[real_key] = nested
475
+ continue
476
+ else:
477
+ # inline value exists after colon.
478
+ # handle name[N]: v1,v2 (array inline)
479
+ if "[" in key_part and "]" in key_part:
480
+ name = key_part.split("[", 1)[0].strip()
481
+ parts = _split_csv_like(val_part)
482
+ vals = [_parse_primitive_token(p) for p in parts if p != ""]
483
+ result[name] = vals
484
+ continue
485
+ # regular "key: value"
486
+ result[key_part] = _parse_primitive_token(val_part)
487
+ continue
488
+
489
+ if arr_mode is not None:
490
+ return arr_mode
491
+
492
+ # Unwrap anonymous table if it's the only thing
493
+ if len(result) == 1 and "_table" in result:
494
+ return result["_table"]
495
+
496
+ return result
497
+
498
+ # Start parse from top-level indent 0
499
+ idx = 0
500
+ parsed = parse_block(0)
501
+ return parsed
502
+
503
+
504
+ # -------------------- Quick demo when run as script --------------------
505
+ if __name__ == "__main__":
506
+ from collections import namedtuple
507
+
508
+ Person = namedtuple("Person", ["id", "name", "role"])
509
+ p1 = Person(1, "Alice", "admin")
510
+ p2 = Person(2, "Bob", "user")
511
+
512
+ example = {
513
+ "context": {"task": "Roundtrip demo", "season": "spring_2025"},
514
+ "friends": ["ana", "luis", "sam"],
515
+ "hikes": [
516
+ {"id": 1, "name": "Blue Lake Trail", "distanceKm": 7.5},
517
+ {"id": 2, "name": "Ridge, Overlook", "distanceKm": 9.2},
518
+ ],
519
+ "people": [p1, p2],
520
+ "misc": (1, None, "two"),
521
+ "empty_list": [],
522
+ }
523
+
524
+ s = dumps(example)
525
+ print("=== TOON ===")
526
+ print(s)
527
+ print("=== PARSED BACK ===")
528
+ parsed = loads(s)
529
+ print(parsed)
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from setuptools import setup
2
+
3
+ setup()
File without changes
@@ -0,0 +1,33 @@
1
+ import math
2
+ from pytoon import toon as toon_mod
3
+
4
+
5
+ def test_special_floats():
6
+ obj = {"a": float("nan"), "b": float("inf"), "c": -float("inf")}
7
+ s = toon_mod.dumps(obj)
8
+ parsed = toon_mod.loads(s)
9
+ assert math.isinf(parsed["b"]) and parsed["b"] > 0
10
+ assert math.isinf(parsed["c"]) and parsed["c"] < 0
11
+ assert math.isnan(parsed["a"])
12
+
13
+
14
+ def test_null_byte_and_control_chars():
15
+ obj = {"x": "null\x00byte", "y": "line1\nline2", "z": "\tindented"}
16
+ s = toon_mod.dumps(obj)
17
+ parsed = toon_mod.loads(s)
18
+ assert parsed["x"] == obj["x"]
19
+ assert parsed["y"] == obj["y"]
20
+ assert parsed["z"] == obj["z"]
21
+
22
+
23
+ def test_deeply_nested():
24
+ depth = 60
25
+ cur = {}
26
+ root = cur
27
+ for i in range(depth):
28
+ new = {f"lvl{i}": {}}
29
+ cur.update(new)
30
+ cur = new[f"lvl{i}"]
31
+ s = toon_mod.dumps(root)
32
+ parsed = toon_mod.loads(s)
33
+ assert isinstance(parsed, dict)
@@ -0,0 +1,118 @@
1
+ import random
2
+ import math
3
+ import pytest
4
+ from pytoon import toon as toon_mod
5
+
6
+ try:
7
+ # reuse normalize from existing tests if available
8
+ from tests.test_toon import normalize
9
+ except Exception:
10
+ def normalize(x):
11
+ return x
12
+
13
+
14
+ def is_nan(x):
15
+ return isinstance(x, float) and math.isnan(x)
16
+
17
+
18
+ def compare_allow_nan(a, b):
19
+ """Recursively compare normalized values allowing NaN == NaN."""
20
+ if is_nan(a) and is_nan(b):
21
+ return True
22
+ if type(a) != type(b):
23
+ return False
24
+ if isinstance(a, dict):
25
+ if set(a.keys()) != set(b.keys()):
26
+ return False
27
+ return all(compare_allow_nan(a[k], b[k]) for k in a)
28
+ if isinstance(a, (list, tuple)):
29
+ if len(a) != len(b):
30
+ return False
31
+ # try elementwise comparison first
32
+ if all(compare_allow_nan(x, y) for x, y in zip(a, b)):
33
+ return True
34
+ # fallback: unordered but comparable by string representations (e.g., sets -> lists)
35
+ try:
36
+ return sorted(map(str, a)) == sorted(map(str, b))
37
+ except Exception:
38
+ return False
39
+ if isinstance(a, set):
40
+ try:
41
+ return compare_allow_nan(sorted(a), sorted(b))
42
+ except Exception:
43
+ return sorted(map(str, a)) == sorted(map(str, b))
44
+ return a == b
45
+
46
+
47
+ PRNG = random.Random(12345)
48
+
49
+
50
+ def random_primitive(rng: random.Random):
51
+ t = rng.choice(['none', 'bool', 'int', 'float', 'str'])
52
+ if t == 'none':
53
+ return None
54
+ if t == 'bool':
55
+ return rng.choice([True, False])
56
+ if t == 'int':
57
+ # sometimes large ints
58
+ return rng.randint(-10**18, 10**18)
59
+ if t == 'float':
60
+ v = rng.choice([rng.uniform(-1e6, 1e6), float('nan'), float('inf'), -float('inf')])
61
+ return v
62
+ # str
63
+ # include commas, newlines, unicode and control chars sometimes
64
+ s = rng.choice(['simple', 'comma', 'newline', 'unicode', 'nullbyte', 'spaces'])
65
+ if s == 'simple':
66
+ return ''.join(rng.choices('abcdEFG123', k=rng.randint(0, 10)))
67
+ if s == 'comma':
68
+ return 'a,b,c'
69
+ if s == 'newline':
70
+ return 'line1\nline2'
71
+ if s == 'unicode':
72
+ return '你好🙂'
73
+ if s == 'nullbyte':
74
+ return 'null\x00byte'
75
+ return ' padded '
76
+
77
+
78
+ def random_structure(rng: random.Random, depth=0, max_depth=5):
79
+ if depth >= max_depth:
80
+ return random_primitive(rng)
81
+ choice = rng.choice(['prim', 'list', 'dict', 'tuple', 'set'])
82
+ if choice == 'prim':
83
+ return random_primitive(rng)
84
+ if choice == 'list':
85
+ return [random_structure(rng, depth + 1, max_depth) for _ in range(rng.randint(0, 5))]
86
+ if choice == 'tuple':
87
+ return tuple(random_structure(rng, depth + 1, max_depth) for _ in range(rng.randint(0, 4)))
88
+ if choice == 'set':
89
+ # sets of simple primitives to reduce unhashable items
90
+ s = set()
91
+ for _ in range(rng.randint(0, 4)):
92
+ val = random_primitive(rng)
93
+ try:
94
+ s.add(val)
95
+ except Exception:
96
+ s.add(str(val))
97
+ return s
98
+ # dict
99
+ d = {}
100
+ for i in range(rng.randint(0, 5)):
101
+ k = rng.choice(['k', 'key', 'n', 'id']) + str(rng.randint(0, 1000))
102
+ d[k] = random_structure(rng, depth + 1, max_depth)
103
+ return d
104
+
105
+
106
+ @pytest.mark.skip(reason="Fuzz test - run manually; exposes edge cases for further fixes")
107
+ def test_fuzz_roundtrip_random():
108
+ failures = []
109
+ for i in range(300):
110
+ obj = random_structure(PRNG, 0, max_depth=5)
111
+ s = toon_mod.dumps(obj)
112
+ parsed = toon_mod.loads(s)
113
+ n1 = normalize(obj)
114
+ n2 = normalize(parsed)
115
+ if not compare_allow_nan(n1, n2):
116
+ failures.append((obj, s, parsed, n1, n2))
117
+ break
118
+ assert not failures, f"Fuzz found mismatch (first shown): {failures[0]}"
@@ -0,0 +1,269 @@
1
+ # test_toon.py
2
+ import pytest
3
+ from collections import namedtuple
4
+ from dataclasses import dataclass, is_dataclass
5
+ import inspect
6
+
7
+ # Import your toon module
8
+ from pytoon import toon as toon_mod
9
+
10
+
11
+ # -----------------------------
12
+ # Helper normalization for comparisons
13
+ # -----------------------------
14
+ def _is_namedtuple_instance(x):
15
+ return isinstance(x, tuple) and hasattr(x, "_fields")
16
+
17
+
18
+ def normalize(obj):
19
+ """
20
+ Convert Python object into a canonical comparable form.
21
+ Ensures round-trip comparison works for dataclasses, namedtuples, sets, tuples, custom objects.
22
+ """
23
+ if obj is None or isinstance(obj, (bool, int, float, str)):
24
+ return obj
25
+
26
+ # dataclass → dict
27
+ try:
28
+ from dataclasses import asdict
29
+ if is_dataclass(obj):
30
+ return normalize(asdict(obj))
31
+ except Exception:
32
+ pass
33
+
34
+ # namedtuple → dict
35
+ if _is_namedtuple_instance(obj):
36
+ return normalize(obj._asdict())
37
+
38
+ # dict
39
+ if isinstance(obj, dict):
40
+ return {k: normalize(v) for k, v in obj.items()}
41
+
42
+ # list or tuple
43
+ if isinstance(obj, (list, tuple)):
44
+ return [normalize(v) for v in obj]
45
+
46
+ # set → sorted list
47
+ if isinstance(obj, set):
48
+ try:
49
+ return sorted(normalize(v) for v in obj)
50
+ except Exception:
51
+ return sorted(str(normalize(v)) for v in obj)
52
+
53
+ # custom object → public attributes dict
54
+ if hasattr(obj, "__dict__"):
55
+ public = {k: v for k, v in vars(obj).items()
56
+ if not k.startswith("_") and not inspect.isroutine(v)}
57
+ return normalize(public)
58
+
59
+ # fallback
60
+ return str(obj)
61
+
62
+
63
+ # -----------------------------
64
+ # Fixtures
65
+ # -----------------------------
66
+ Item = namedtuple("Item", ["id", "label"])
67
+ i1 = Item(1, "A")
68
+ i2 = Item(2, "B")
69
+
70
+
71
+ @dataclass
72
+ class Book:
73
+ title: str
74
+ pages: int
75
+ tags: list
76
+
77
+
78
+ class Device:
79
+ def __init__(self, model, version, status):
80
+ self.model = model
81
+ self.version = version
82
+ self.status = status
83
+ self._internal = "hidden"
84
+
85
+
86
+ # Test objects
87
+ PYTHON_OBJECTS = [
88
+
89
+ # 1. Primitives
90
+ ({"a": 1, "b": 2.2, "c": True, "d": None, "e": "hello"}, "primitives"),
91
+
92
+ # 2. Strings needing quoting
93
+ ({"s1": "hello,world", "s2": "line\nbreak", "s3": " padded", "s4": "null"}, "quoted_strings"),
94
+
95
+ # 3. Nested object
96
+ ({
97
+ "user": {
98
+ "id": 123,
99
+ "config": {"x": 10, "y": 20, "z": {"enabled": True}}
100
+ },
101
+ "roles": ["a", "b", "c"]
102
+ }, "nested"),
103
+
104
+ # 4. Uniform table
105
+ ({
106
+ "items": [
107
+ {"a": 1, "b": 2},
108
+ {"a": 3, "b": 4},
109
+ ]
110
+ }, "uniform_table"),
111
+
112
+ # 5. Non-uniform list
113
+ ({
114
+ "mixed": [
115
+ {"x": 1},
116
+ {"x": 2, "y": True},
117
+ 100,
118
+ "test"
119
+ ]
120
+ }, "non_uniform_list"),
121
+
122
+ # 6. Tuples & Sets
123
+ ({
124
+ "tuple": (1, 2, 3),
125
+ "set": {"x", "y", "z"}
126
+ }, "tuple_set"),
127
+
128
+ # 7. Dataclass
129
+ (Book("Sample", 100, ["t1", "t2"]), "dataclass"),
130
+
131
+ # 8. Namedtuple list
132
+ ([i1, i2], "namedtuple_list"),
133
+
134
+ # 9. Custom object
135
+ (Device("X100", "v1.0", "active"), "custom_object"),
136
+
137
+ # 10. Deeply nested data
138
+ ({
139
+ "system": {
140
+ "config": {
141
+ "levels": {
142
+ "one": {"a": 1},
143
+ "two": {"b": 2},
144
+ "three": {"c": 3},
145
+ }
146
+ },
147
+ "modes": ["on", "off"]
148
+ }
149
+ }, "deep_nested"),
150
+
151
+ # 11. Mixed complex list
152
+ ([
153
+ {"k": "v"},
154
+ [10, 20],
155
+ {"coords": (5, 6)},
156
+ None,
157
+ True,
158
+ {"nested": [{"m": 1}, {"n": 2}]}
159
+ ], "complex_list"),
160
+
161
+ # 12. Unicode
162
+ ({
163
+ "u1": "こんにちは",
164
+ "u2": "你好",
165
+ "u3": "🙂🔥"
166
+ }, "unicode"),
167
+
168
+ # 13. Multiline text
169
+ ({
170
+ "note": "line1\nline2\nline3"
171
+ }, "multiline"),
172
+ ]
173
+
174
+
175
+ # -----------------------------
176
+ # Round-trip tests: loads(dumps(obj)) == normalize(obj)
177
+ # -----------------------------
178
+ @pytest.mark.parametrize("obj,label", PYTHON_OBJECTS)
179
+ def test_roundtrip(obj, label):
180
+ s = toon_mod.dumps(obj)
181
+ parsed = toon_mod.loads(s)
182
+ assert normalize(parsed) == normalize(obj), (
183
+ f"Roundtrip mismatch for {label}\n"
184
+ f"TOON:\n{s}\n"
185
+ f"Parsed: {parsed}\n"
186
+ f"Expected normalized: {normalize(obj)}"
187
+ )
188
+
189
+
190
+ # -----------------------------
191
+ # Hand-made TOON loads tests
192
+ # -----------------------------
193
+ @pytest.mark.parametrize("toon_str,expected", [
194
+
195
+ # Simple
196
+ ("a: 10\nb: true\n", {"a": 10, "b": True}),
197
+
198
+ # Nested
199
+ ("root:\n x: 1\n y:\n z: 2\n", {"root": {"x": 1, "y": {"z": 2}}}),
200
+
201
+ # Primitive list
202
+ ("nums[3]: 1,2,3\n", {"nums": [1, 2, 3]}),
203
+
204
+ # Table
205
+ ("tbl[2]{id,val}:\n 1,A\n 2,B\n",
206
+ {"tbl": [{"id": 1, "val": "A"}, {"id": 2, "val": "B"}]}),
207
+
208
+ # Mixed list
209
+ ("""items[4]:
210
+ - 10
211
+ - test
212
+ -
213
+ x: 1
214
+ - false
215
+ """,
216
+ {"items": [10, "test", {"x": 1}, False]}),
217
+
218
+ # Unicode
219
+ ("greet:\n hi: \"你好\"\n", {"greet": {"hi": "你好"}}),
220
+
221
+ # Quoted
222
+ ("strs:\n a: \"hello,world\"\n b: \"line1\\nline2\"\n",
223
+ {"strs": {"a": "hello,world", "b": "line1\nline2"}}),
224
+
225
+ ])
226
+ def test_loads_examples(toon_str, expected):
227
+ parsed = toon_mod.loads(toon_str)
228
+ assert normalize(parsed) == normalize(expected)
229
+
230
+
231
+ # -----------------------------
232
+ # Edge cases
233
+ # -----------------------------
234
+ def test_empty_structs():
235
+ obj = {"empty_list": [], "empty_dict": {}, "nested": {"x": []}}
236
+ s = toon_mod.dumps(obj)
237
+ parsed = toon_mod.loads(s)
238
+ assert normalize(parsed) == normalize(obj)
239
+
240
+
241
+ def test_table_with_commas_in_values():
242
+ obj = {
243
+ "records": [
244
+ {"id": 1, "name": "A,B"},
245
+ {"id": 2, "name": "C,D"},
246
+ ]
247
+ }
248
+ s = toon_mod.dumps(obj)
249
+ parsed = toon_mod.loads(s)
250
+ assert normalize(parsed) == normalize(obj)
251
+
252
+
253
+ def test_custom_object_ignores_private_attrs():
254
+ d = Device("M1", "v2", "ready")
255
+ s = toon_mod.dumps(d)
256
+ parsed = toon_mod.loads(s)
257
+ np = normalize(parsed)
258
+
259
+ assert "model" in np
260
+ assert "version" in np
261
+ assert "status" in np
262
+
263
+ assert "_internal" not in np
264
+ assert "internal" not in np
265
+
266
+
267
+ def test_dumps_produces_string():
268
+ s = toon_mod.dumps({"a": 1})
269
+ assert isinstance(s, str) and len(s) > 0