cdxf 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cdxf/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ """CDXF -- Compact Data Exchange Format.
2
+
3
+ A universal binary interchange format whose information model is a provable
4
+ superset of JSON, YAML, XML, and TOML.
5
+ """
6
+
7
+ __version__ = "0.1.0"
8
+
9
+ from cdxf.model import (
10
+ Stream,
11
+ Document,
12
+ Map,
13
+ Sequence,
14
+ Scalar,
15
+ Element,
16
+ Attribute,
17
+ Comment,
18
+ ProcessingInstruction,
19
+ Directive,
20
+ TagAnnotation,
21
+ Anchor,
22
+ Alias,
23
+ ScalarType,
24
+ SourceFormat,
25
+ )
26
+
27
+ from cdxf.codec import encode, decode, Encoder, Decoder
28
+
29
+ __all__ = [
30
+ # Model
31
+ "Stream",
32
+ "Document",
33
+ "Map",
34
+ "Sequence",
35
+ "Scalar",
36
+ "Element",
37
+ "Attribute",
38
+ "Comment",
39
+ "ProcessingInstruction",
40
+ "Directive",
41
+ "TagAnnotation",
42
+ "Anchor",
43
+ "Alias",
44
+ "ScalarType",
45
+ "SourceFormat",
46
+ # Codec
47
+ "encode",
48
+ "decode",
49
+ "Encoder",
50
+ "Decoder",
51
+ ]
@@ -0,0 +1,13 @@
1
+ """CDXF format bridges — convert between text formats and CDXF model."""
2
+
3
+ from cdxf.bridges.json_bridge import from_json, to_json
4
+ from cdxf.bridges.yaml_bridge import from_yaml, to_yaml
5
+ from cdxf.bridges.xml_bridge import from_xml, to_xml
6
+ from cdxf.bridges.toml_bridge import from_toml, to_toml
7
+
8
+ __all__ = [
9
+ "from_json", "to_json",
10
+ "from_yaml", "to_yaml",
11
+ "from_xml", "to_xml",
12
+ "from_toml", "to_toml",
13
+ ]
@@ -0,0 +1,122 @@
1
+ """JSON bridge — convert between JSON text and CDXF model.
2
+
3
+ Functions:
4
+ from_json(text) -> Stream
5
+ to_json(stream, indent=None) -> str
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from typing import Any
12
+
13
+ from cdxf.model import (
14
+ Comment,
15
+ Document,
16
+ Map,
17
+ Scalar,
18
+ ScalarType,
19
+ Sequence,
20
+ SourceFormat,
21
+ Stream,
22
+ )
23
+
24
+
25
+ def from_json(text: str) -> Stream:
26
+ """Parse JSON text into a CDXF Stream.
27
+
28
+ Parameters
29
+ ----------
30
+ text : str
31
+ Valid JSON text.
32
+
33
+ Returns
34
+ -------
35
+ Stream
36
+ A single-document CDXF Stream with source_format_hint=JSON.
37
+ """
38
+ raw = json.loads(text)
39
+ root = _from_native(raw)
40
+ doc = Document(root=root, source_format_hint=SourceFormat.JSON)
41
+ return Stream(documents=[doc])
42
+
43
+
44
+ def to_json(stream: Stream, *, indent: int | None = None) -> str:
45
+ """Convert a CDXF Stream to JSON text.
46
+
47
+ Uses the first document in the stream. Comments and CDXF-specific
48
+ annotations are silently dropped (JSON cannot represent them).
49
+
50
+ Parameters
51
+ ----------
52
+ stream : Stream
53
+ A CDXF Stream.
54
+ indent : int or None
55
+ If set, pretty-print with this indentation level.
56
+
57
+ Returns
58
+ -------
59
+ str
60
+ Valid JSON text.
61
+ """
62
+ if not stream.documents:
63
+ return "null"
64
+ root = stream.documents[0].root
65
+ native = _to_native(root)
66
+ return json.dumps(native, indent=indent, ensure_ascii=False)
67
+
68
+
69
+ # -------------------------------------------------------------------
70
+ # Internal: Python native types ↔ CDXF model
71
+ # -------------------------------------------------------------------
72
+
73
+ def _from_native(value: Any) -> Scalar | Map | Sequence:
74
+ """Convert a Python value (from json.loads) to a CDXF node."""
75
+ if value is None:
76
+ return Scalar(ScalarType.NULL, None)
77
+ if isinstance(value, bool):
78
+ return Scalar(ScalarType.BOOLEAN, value)
79
+ if isinstance(value, int):
80
+ return Scalar(ScalarType.INTEGER, value)
81
+ if isinstance(value, float):
82
+ return Scalar(ScalarType.FLOAT, value)
83
+ if isinstance(value, str):
84
+ return Scalar(ScalarType.STRING, value)
85
+ if isinstance(value, dict):
86
+ entries = []
87
+ for k, v in value.items():
88
+ key_node = Scalar(ScalarType.STRING, k)
89
+ value_node = _from_native(v)
90
+ entries.append((key_node, value_node))
91
+ return Map(entries=entries)
92
+ if isinstance(value, list):
93
+ items = [_from_native(item) for item in value]
94
+ return Sequence(items=items)
95
+ raise ValueError(f"Unsupported JSON value type: {type(value)}")
96
+
97
+
98
+ def _to_native(node) -> Any:
99
+ """Convert a CDXF node to a Python native value for json.dumps."""
100
+ if isinstance(node, Scalar):
101
+ return node.value
102
+ if isinstance(node, Map):
103
+ result = {}
104
+ for entry in node.entries:
105
+ # Skip comments — JSON can't represent them
106
+ if isinstance(entry, Comment):
107
+ continue
108
+ key, value = entry
109
+ # JSON keys must be strings
110
+ key_str = key.value if isinstance(key, Scalar) else str(key)
111
+ result[key_str] = _to_native(value)
112
+ return result
113
+ if isinstance(node, Sequence):
114
+ return [
115
+ _to_native(item)
116
+ for item in node.items
117
+ if not isinstance(item, Comment)
118
+ ]
119
+ # Fallback for types JSON can't represent
120
+ if hasattr(node, "value"):
121
+ return node.value
122
+ return str(node)
@@ -0,0 +1,307 @@
1
+ """TOML bridge — convert between TOML text and CDXF model.
2
+
3
+ Uses tomlkit for parsing and serialization, which preserves comments,
4
+ formatting, and distinguishes all four TOML temporal types.
5
+
6
+ Functions:
7
+ from_toml(text) -> Stream
8
+ to_toml(stream) -> str
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from datetime import date, datetime, time
14
+
15
+ import tomlkit
16
+ from tomlkit import items as toml_items
17
+
18
+ from cdxf.model import (
19
+ Comment,
20
+ Document,
21
+ Map,
22
+ Scalar,
23
+ ScalarType,
24
+ Sequence,
25
+ SourceFormat,
26
+ Stream,
27
+ )
28
+
29
+
30
+ # ===================================================================
31
+ # Public API
32
+ # ===================================================================
33
+
34
+ def from_toml(text: str) -> Stream:
35
+ """Parse TOML text into a CDXF Stream."""
36
+ doc = tomlkit.parse(text)
37
+ converter = _TomlToModel()
38
+ root = converter.convert_container(doc)
39
+ return Stream(documents=[Document(root=root, source_format_hint=SourceFormat.TOML)])
40
+
41
+
42
+ def to_toml(stream: Stream) -> str:
43
+ """Convert a CDXF Stream to TOML text."""
44
+ if not stream.documents:
45
+ return ""
46
+ root = stream.documents[0].root
47
+ converter = _ModelToToml()
48
+ doc = converter.convert(root)
49
+ return tomlkit.dumps(doc)
50
+
51
+
52
+ # ===================================================================
53
+ # Helpers
54
+ # ===================================================================
55
+
56
+ def _get_body(container) -> list:
57
+ """Extract the body list from a tomlkit container."""
58
+ # Direct Container (TOMLDocument)
59
+ if hasattr(container, "body") and not isinstance(container, toml_items.Item):
60
+ return container.body
61
+ # Table / InlineTable — unwrap through .value
62
+ if isinstance(container, (toml_items.Table, toml_items.InlineTable)):
63
+ inner = container.value
64
+ if hasattr(inner, "body"):
65
+ return inner.body
66
+ # Generic Item with .value wrapping a Container
67
+ if hasattr(container, "value"):
68
+ val = container.value
69
+ if hasattr(val, "body"):
70
+ return val.body
71
+ return []
72
+
73
+
74
+ def _unwrap_key(key) -> str:
75
+ """Extract the raw key string from a tomlkit Key object.
76
+
77
+ tomlkit Key objects may include TOML quoting in their str()
78
+ representation. The .key property returns the unquoted value.
79
+ """
80
+ if hasattr(key, "key"):
81
+ return str(key.key)
82
+ return str(key).strip().strip('"').strip("'")
83
+
84
+
85
+ def _merge_maps(base: Map, addition: Map) -> Map:
86
+ """Merge two CDXF Maps, combining entries under the same key.
87
+
88
+ TOML allows split table definitions (e.g., [workspace] appearing
89
+ twice with different sub-keys). This function merges them into a
90
+ single Map without losing any entries.
91
+ """
92
+ # Build a dict of key -> index for efficient lookup
93
+ key_index: dict[str, int] = {}
94
+ for i, entry in enumerate(base.entries):
95
+ if isinstance(entry, Comment):
96
+ continue
97
+ k, _ = entry
98
+ if isinstance(k, Scalar):
99
+ key_index[k.value] = i
100
+
101
+ for entry in addition.entries:
102
+ if isinstance(entry, Comment):
103
+ base.entries.append(entry)
104
+ continue
105
+ k, v = entry
106
+ k_str = k.value if isinstance(k, Scalar) else str(k)
107
+
108
+ if k_str in key_index:
109
+ # Key exists — if both are Maps, merge recursively
110
+ existing_entry = base.entries[key_index[k_str]]
111
+ _, existing_val = existing_entry
112
+ if isinstance(existing_val, Map) and isinstance(v, Map):
113
+ merged = _merge_maps(existing_val, v)
114
+ base.entries[key_index[k_str]] = (k, merged)
115
+ else:
116
+ # Overwrite (last wins)
117
+ base.entries[key_index[k_str]] = (k, v)
118
+ else:
119
+ key_index[k_str] = len(base.entries)
120
+ base.entries.append((k, v))
121
+
122
+ return base
123
+
124
+
125
+ # ===================================================================
126
+ # TOML -> CDXF model converter
127
+ # ===================================================================
128
+
129
+ class _TomlToModel:
130
+ """Convert a tomlkit document tree to CDXF model nodes."""
131
+
132
+ def convert_container(self, container) -> Map:
133
+ """Convert a tomlkit Container (Document/Table) to a CDXF Map.
134
+
135
+ Handles TOML's split table definitions by merging entries that
136
+ share the same key (e.g., [workspace] appearing in multiple
137
+ sections).
138
+ """
139
+ entries: list = []
140
+ body = _get_body(container)
141
+
142
+ # Track keys found via body iteration, mapping to entry index
143
+ found_keys: dict[str, int] = {}
144
+
145
+ for key, item in body:
146
+ if key is None:
147
+ if isinstance(item, toml_items.Comment):
148
+ text = self._comment_text(item.trivia.comment)
149
+ if text:
150
+ entries.append(Comment(text))
151
+ continue
152
+
153
+ key_str = _unwrap_key(key)
154
+ key_node = Scalar(ScalarType.STRING, key_str)
155
+ value_node = self._convert_item(item)
156
+
157
+ if key_str in found_keys:
158
+ # Duplicate key — merge Maps (split table definitions)
159
+ idx = found_keys[key_str]
160
+ existing_entry = entries[idx]
161
+ _, existing_val = existing_entry
162
+ if isinstance(existing_val, Map) and isinstance(value_node, Map):
163
+ merged = _merge_maps(existing_val, value_node)
164
+ entries[idx] = (key_node, merged)
165
+ else:
166
+ entries[idx] = (key_node, value_node)
167
+ else:
168
+ found_keys[key_str] = len(entries)
169
+ entries.append((key_node, value_node))
170
+
171
+ # Extract inline comment
172
+ if hasattr(item, "trivia") and item.trivia.comment:
173
+ text = self._comment_text(item.trivia.comment)
174
+ if text:
175
+ entries.append(Comment(text))
176
+
177
+ # Fallback: dict-based iteration for any keys body missed
178
+ if hasattr(container, "items") and callable(container.items):
179
+ for k, v in container.items():
180
+ k_str = str(k)
181
+ if k_str not in found_keys:
182
+ key_node = Scalar(ScalarType.STRING, k_str)
183
+ if isinstance(v, toml_items.Item):
184
+ value_node = self._convert_item(v)
185
+ else:
186
+ value_node = self._convert_native(v)
187
+ found_keys[k_str] = len(entries)
188
+ entries.append((key_node, value_node))
189
+
190
+ return Map(entries=entries)
191
+
192
+ def _convert_item(self, item) -> Map | Sequence | Scalar:
193
+ """Convert a tomlkit Item to the appropriate CDXF node."""
194
+ if isinstance(item, (toml_items.Table, toml_items.InlineTable)):
195
+ return self.convert_container(item)
196
+ if isinstance(item, toml_items.AoT):
197
+ return Sequence(
198
+ items=[self.convert_container(table) for table in item.body]
199
+ )
200
+ if isinstance(item, toml_items.Array):
201
+ return Sequence(
202
+ items=[self._convert_native(v) for v in item.unwrap()]
203
+ )
204
+ return self._convert_scalar(item)
205
+
206
+ def _convert_scalar(self, item) -> Scalar:
207
+ value = item.unwrap() if hasattr(item, "unwrap") else item
208
+ return self._convert_native(value)
209
+
210
+ def _convert_native(self, value) -> Map | Sequence | Scalar:
211
+ """Convert a Python native value to a CDXF node."""
212
+ if isinstance(value, bool):
213
+ return Scalar(ScalarType.BOOLEAN, value)
214
+ if isinstance(value, int):
215
+ return Scalar(ScalarType.INTEGER, value)
216
+ if isinstance(value, float):
217
+ return Scalar(ScalarType.FLOAT, value)
218
+ if isinstance(value, str):
219
+ return Scalar(ScalarType.STRING, value)
220
+ if isinstance(value, datetime):
221
+ if value.tzinfo is not None:
222
+ return Scalar(ScalarType.TIMESTAMP_OFFSET, value)
223
+ return Scalar(ScalarType.TIMESTAMP_LOCAL, value)
224
+ if isinstance(value, date):
225
+ return Scalar(ScalarType.DATE, value)
226
+ if isinstance(value, time):
227
+ return Scalar(ScalarType.TIME, value)
228
+ if isinstance(value, dict):
229
+ entries = [
230
+ (Scalar(ScalarType.STRING, k), self._convert_native(v))
231
+ for k, v in value.items()
232
+ ]
233
+ return Map(entries=entries)
234
+ if isinstance(value, list):
235
+ return Sequence(items=[self._convert_native(v) for v in value])
236
+ return Scalar(ScalarType.STRING, str(value))
237
+
238
+ @staticmethod
239
+ def _comment_text(raw: str) -> str | None:
240
+ if not raw:
241
+ return None
242
+ text = raw.strip().lstrip("#").strip()
243
+ return text if text else None
244
+
245
+
246
+ # ===================================================================
247
+ # CDXF model -> TOML converter
248
+ # ===================================================================
249
+
250
+ class _ModelToToml:
251
+ """Convert CDXF model nodes to a tomlkit Document."""
252
+
253
+ def convert(self, node) -> tomlkit.TOMLDocument:
254
+ doc = tomlkit.document()
255
+ if isinstance(node, Map):
256
+ self._populate_table(doc, node)
257
+ return doc
258
+
259
+ def _populate_table(self, table, map_node: Map) -> None:
260
+ for entry in map_node.entries:
261
+ if isinstance(entry, Comment):
262
+ continue
263
+ key, value = entry
264
+ key_str = key.value if isinstance(key, Scalar) else str(key)
265
+ table.add(key_str, self._convert_value(value))
266
+
267
+ def _convert_value(self, node):
268
+ if isinstance(node, Map):
269
+ t = tomlkit.table()
270
+ self._populate_table(t, node)
271
+ return t
272
+ if isinstance(node, Sequence):
273
+ return self._convert_sequence(node)
274
+ if isinstance(node, Scalar):
275
+ return self._convert_scalar(node)
276
+ return None
277
+
278
+ def _convert_sequence(self, seq: Sequence):
279
+ items = [i for i in seq.items if not isinstance(i, Comment)]
280
+
281
+ if items and all(isinstance(i, Map) for i in items):
282
+ aot = tomlkit.aot()
283
+ for item in items:
284
+ t = tomlkit.table()
285
+ self._populate_table(t, item)
286
+ aot.append(t)
287
+ return aot
288
+
289
+ arr = tomlkit.array()
290
+ for item in items:
291
+ if isinstance(item, Scalar):
292
+ arr.append(item.value)
293
+ elif isinstance(item, Sequence):
294
+ inner = self._convert_sequence(item)
295
+ arr.append(inner)
296
+ elif isinstance(item, Map):
297
+ # Inline table inside an array (mixed array)
298
+ it = tomlkit.inline_table()
299
+ self._populate_table(it, item)
300
+ arr.append(it)
301
+ else:
302
+ arr.append(str(item))
303
+ return arr
304
+
305
+ @staticmethod
306
+ def _convert_scalar(scalar: Scalar):
307
+ return scalar.value