cayaml 0.1.0.dev3__tar.gz → 0.1.1.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cayaml
3
- Version: 0.1.0.dev3
3
+ Version: 0.1.1.dev1
4
4
  Summary: Swarmauri's Canon Yaml Handler
5
5
  License: Apache-2.0
6
6
  Author: Jacob Stewart
@@ -0,0 +1,29 @@
1
+ from .api import (
2
+ load,
3
+ loads,
4
+ dump,
5
+ dumps,
6
+ load_all,
7
+ loads_all,
8
+ round_trip_load,
9
+ round_trip_loads,
10
+ round_trip_dump,
11
+ round_trip_dumps,
12
+ round_trip_load_all,
13
+ round_trip_loads_all,
14
+ )
15
+
16
+ __all__ = [
17
+ "load",
18
+ "loads",
19
+ "dump",
20
+ "dumps",
21
+ "load_all",
22
+ "loads_all",
23
+ "round_trip_load",
24
+ "round_trip_loads",
25
+ "round_trip_dump",
26
+ "round_trip_dumps",
27
+ "round_trip_load_all",
28
+ "round_trip_loads_all",
29
+ ]
@@ -0,0 +1,150 @@
1
+ """
2
+ api.py - Public API for Cayaml
3
+
4
+ This module exports two sets of functions (plain mode vs. round-trip mode),
5
+ plus additional "_all" variants for multi-document loading:
6
+
7
+ Plain mode (single doc):
8
+ - loads(yaml_str)
9
+ - load(file_obj)
10
+ - dumps(data)
11
+ - dump(data, file_obj)
12
+
13
+ Plain mode (multi-doc):
14
+ - loads_all(yaml_str)
15
+ - load_all(file_obj)
16
+
17
+ Round-trip mode (single doc):
18
+ - round_trip_loads(yaml_str)
19
+ - round_trip_load(file_obj)
20
+ - round_trip_dumps(data)
21
+ - round_trip_dump(data, file_obj)
22
+
23
+ Round-trip mode (multi-doc):
24
+ - round_trip_loads_all(yaml_str)
25
+ - round_trip_load_all(file_obj)
26
+ """
27
+
28
+ from .parser import _internal_parse_stream, _internal_to_ast
29
+ from .unparser import _internal_dump_plain, _internal_dump_round_trip
30
+ from .plain_conversion import to_plain
31
+ from .ast_nodes import Node, YamlStream
32
+
33
+
34
+ # -----------------------------
35
+ # Plain mode (single-document)
36
+ # -----------------------------
37
+ def loads(yaml_str: str):
38
+ """
39
+ Parse a YAML string (plain mode) and return plain Python objects (dict, list, scalars).
40
+ If multiple documents exist, this returns only the first one.
41
+ """
42
+ docs = loads_all(yaml_str)
43
+ return docs[0] if docs else None
44
+
45
+
46
+ def load(file_obj):
47
+ """
48
+ Parse YAML from a file-like object (plain mode) and return plain Python objects.
49
+ If multiple documents exist, this returns only the first one.
50
+ """
51
+ yaml_str = file_obj.read()
52
+ return loads(yaml_str)
53
+
54
+
55
+ def dumps(data) -> str:
56
+ """
57
+ Convert plain Python objects into a YAML-formatted string (without preserving formatting metadata).
58
+ """
59
+ if not isinstance(data, Node):
60
+ data = _internal_to_ast(data)
61
+ return _internal_dump_plain(data)
62
+
63
+
64
+ def dump(data, file_obj):
65
+ """
66
+ Dump plain Python objects to a file-like object as YAML.
67
+ """
68
+ file_obj.write(dumps(data))
69
+
70
+
71
+ # -----------------------------
72
+ # Plain mode (multi-document)
73
+ # -----------------------------
74
+ def loads_all(yaml_str: str):
75
+ """
76
+ Parse a YAML string in plain mode.
77
+ Return a list of plain Python objects, one per document in the stream.
78
+ """
79
+ yaml_stream = _internal_parse_stream(yaml_str)
80
+ if not isinstance(yaml_stream, YamlStream):
81
+ # If for some reason only one doc was found, wrap it in a list
82
+ return [to_plain(yaml_stream)]
83
+ return [to_plain(doc) for doc in yaml_stream.documents]
84
+
85
+
86
+ def load_all(file_obj):
87
+ """
88
+ Parse YAML from a file-like object in plain mode.
89
+ Return a list of plain Python objects, one per document.
90
+ """
91
+ yaml_str = file_obj.read()
92
+ return loads_all(yaml_str)
93
+
94
+
95
+ # --------------------------------
96
+ # Round-trip mode (single-doc)
97
+ # --------------------------------
98
+ def round_trip_loads(yaml_str: str):
99
+ """
100
+ Parse a YAML string in round-trip mode (preserving formatting).
101
+ If multiple documents exist, returns only the first one.
102
+ """
103
+ docs = round_trip_loads_all(yaml_str)
104
+ return docs[0] if docs else None
105
+
106
+
107
+ def round_trip_load(file_obj):
108
+ """
109
+ Round-trip from a file-like object.
110
+ If multiple docs, returns only the first one.
111
+ """
112
+ yaml_str = file_obj.read()
113
+ return round_trip_loads(yaml_str)
114
+
115
+
116
+ def round_trip_dumps(data) -> str:
117
+ """
118
+ Convert the AST (or plain objects -> AST) to YAML, preserving formatting.
119
+ """
120
+ if not isinstance(data, Node):
121
+ data = _internal_to_ast(data)
122
+ return _internal_dump_round_trip(data)
123
+
124
+
125
+ def round_trip_dump(data, file_obj):
126
+ """
127
+ Dump the AST to a file, preserving formatting.
128
+ """
129
+ file_obj.write(round_trip_dumps(data))
130
+
131
+
132
+ # --------------------------------
133
+ # Round-trip mode (multi-doc)
134
+ # --------------------------------
135
+ def round_trip_loads_all(yaml_str: str):
136
+ """
137
+ Parse a YAML string in round-trip mode, returning multiple docs as a list of DocumentNodes.
138
+ """
139
+ yaml_stream = _internal_parse_stream(yaml_str)
140
+ if isinstance(yaml_stream, YamlStream):
141
+ return yaml_stream.documents
142
+ return [yaml_stream]
143
+
144
+
145
+ def round_trip_load_all(file_obj):
146
+ """
147
+ Parse from a file in round-trip mode, returning multiple docs as DocumentNodes.
148
+ """
149
+ yaml_str = file_obj.read()
150
+ return round_trip_loads_all(yaml_str)
@@ -0,0 +1,221 @@
1
+ # ast_nodes.py - Updated with equality methods for SequenceNode (and MappingNode)
2
+
3
+
4
+ class Node:
5
+ """
6
+ Base class for all YAML AST nodes.
7
+
8
+ Attributes:
9
+ leading_comments (list of str): Comments preceding this node.
10
+ trailing_comments (list of str): Comments following this node.
11
+ tag (str or None): YAML tag/type hint (e.g., "!!str", "!CustomTag").
12
+ anchor (str or None): Anchor name if this node is anchored (e.g., &anchorName).
13
+ alias_of (str or None): If this node is an alias (e.g., *anchorName), this stores the referenced anchor name.
14
+ """
15
+
16
+ def __init__(self):
17
+ self.leading_comments = [] # Comments before the node
18
+ self.trailing_comments = [] # Comments after the node
19
+ self.tag = None
20
+ self.anchor = None
21
+ self.alias_of = None
22
+
23
+ def is_alias(self):
24
+ return self.alias_of is not None
25
+
26
+ def has_anchor(self):
27
+ return self.anchor is not None
28
+
29
+ def __repr__(self):
30
+ return (
31
+ f"<{self.__class__.__name__} tag={self.tag!r} "
32
+ f"anchor={self.anchor!r} alias_of={self.alias_of!r}>"
33
+ )
34
+
35
+
36
+ class DocumentNode(Node):
37
+ """
38
+ Represents a single YAML document.
39
+
40
+ Attributes:
41
+ root (Node): The root node of the document (MappingNode, SequenceNode, or ScalarNode).
42
+ has_doc_start (bool): True if the document start marker '---' was encountered.
43
+ has_doc_end (bool): True if the document end marker '...' was encountered.
44
+ """
45
+
46
+ def __init__(self):
47
+ super().__init__()
48
+ self.root = None
49
+ self.has_doc_start = False
50
+ self.has_doc_end = False
51
+
52
+ def __getitem__(self, key):
53
+ if self.root and isinstance(self.root, MappingNode):
54
+ return self.root[key]
55
+ raise TypeError("DocumentNode does not contain a subscriptable mapping")
56
+
57
+ def __setitem__(self, key, value):
58
+ if self.root and isinstance(self.root, MappingNode):
59
+ self.root[key] = value
60
+ else:
61
+ raise TypeError("DocumentNode does not contain a subscriptable mapping")
62
+
63
+ def __eq__(self, other):
64
+ # If other is a DocumentNode, compare all attributes.
65
+ if isinstance(other, DocumentNode):
66
+ return (
67
+ self.has_doc_start == other.has_doc_start
68
+ and self.has_doc_end == other.has_doc_end
69
+ and self.leading_comments == other.leading_comments
70
+ and self.trailing_comments == other.trailing_comments
71
+ and self.root == other.root
72
+ )
73
+ # Otherwise, delegate equality to the root node.
74
+ return self.root == other
75
+
76
+ def __repr__(self):
77
+ return (
78
+ f"<DocumentNode doc_start={self.has_doc_start} "
79
+ f"doc_end={self.has_doc_end} root={self.root!r}>"
80
+ )
81
+
82
+
83
+ class MappingNode(Node):
84
+ """
85
+ Represents a YAML mapping (key-value pairs) while preserving key order and merge operators.
86
+
87
+ Attributes:
88
+ pairs (list of tuple(Node, Node)): An ordered list of (key, value) node pairs.
89
+ merges (list of Node): A list of nodes specified by merge operators (<<:), if any.
90
+ """
91
+
92
+ def __init__(self):
93
+ super().__init__()
94
+ self.pairs = [] # List of tuples: (key_node, value_node)
95
+ self.merges = [] # Nodes merged via the '<<' operator
96
+
97
+ def add_pair(self, key_node, value_node):
98
+ self.pairs.append((key_node, value_node))
99
+
100
+ def __getitem__(self, key):
101
+ for k, v in self.pairs:
102
+ if hasattr(k, "value") and k.value == key:
103
+ # For plain scalars (no block style, no comments, etc.), return the raw value.
104
+ if isinstance(v, ScalarNode) and v.style is None:
105
+ return v.value
106
+ return v
107
+ raise KeyError(key)
108
+
109
+ def __setitem__(self, key, value):
110
+ for i, (k, _) in enumerate(self.pairs):
111
+ if hasattr(k, "value") and k.value == key:
112
+ self.pairs[i] = (k, value)
113
+ return
114
+ new_key = ScalarNode(key)
115
+ self.add_pair(new_key, value)
116
+
117
+ def __eq__(self, other):
118
+ if isinstance(other, MappingNode):
119
+ return self.pairs == other.pairs
120
+ elif isinstance(other, dict):
121
+ converted = {}
122
+ for k, v in self.pairs:
123
+ key = k.value if hasattr(k, "value") else k
124
+ # For scalar nodes, use the unboxed value.
125
+ if isinstance(v, ScalarNode):
126
+ converted[key] = v.value
127
+ else:
128
+ converted[key] = v
129
+ return converted == other
130
+ return False
131
+
132
+ def __repr__(self):
133
+ return f"<MappingNode pairs={self.pairs!r} merges={self.merges!r}>"
134
+
135
+
136
+ class SequenceNode(Node):
137
+ """
138
+ Represents a YAML sequence (an ordered list of items).
139
+
140
+ Attributes:
141
+ items (list of Node): The sequence items.
142
+ """
143
+
144
+ def __init__(self):
145
+ super().__init__()
146
+ self.items = []
147
+
148
+ def add_item(self, item_node):
149
+ self.items.append(item_node)
150
+
151
+ def __eq__(self, other):
152
+ if isinstance(other, SequenceNode):
153
+ return self.items == other.items
154
+ elif isinstance(other, list):
155
+ # Convert items: if an item is a ScalarNode, compare its value; otherwise, compare the item directly.
156
+ converted = [
157
+ item.value if hasattr(item, "value") else item for item in self.items
158
+ ]
159
+ return converted == other
160
+ return False
161
+
162
+ def __repr__(self):
163
+ return f"<SequenceNode items={self.items!r}>"
164
+
165
+
166
+ class ScalarNode(Node):
167
+ """
168
+ Represents a YAML scalar value (string, int, float, etc.) and captures block style details.
169
+
170
+ Attributes:
171
+ value: The scalar value.
172
+ style (str or None): The style of the scalar. Options include:
173
+ None for plain scalars, '|' for literal, '>' for folded.
174
+ chomping (str or None): The chomping indicator for block scalars ('+', '-', or None).
175
+ lines (list of str or None): The original lines of a block scalar for precise re-emission.
176
+ """
177
+
178
+ def __init__(self, value, style=None):
179
+ super().__init__()
180
+ self.value = value
181
+ self.style = style # None, '|', or '>' (plain, literal, folded)
182
+ self.chomping = None
183
+ self.lines = None
184
+
185
+ def __repr__(self):
186
+ return (
187
+ f"<ScalarNode value={self.value!r} style={self.style!r} tag={self.tag!r}>"
188
+ )
189
+
190
+ def __eq__(self, other):
191
+ if isinstance(other, ScalarNode):
192
+ return (
193
+ self.value == other.value
194
+ and self.style == other.style
195
+ and self.tag == other.tag
196
+ )
197
+ else:
198
+ return self.value == other
199
+
200
+
201
+ class YamlStream:
202
+ """
203
+ Represents an entire YAML stream which may contain multiple documents.
204
+
205
+ Attributes:
206
+ documents (list of DocumentNode): The list of documents in the stream.
207
+ """
208
+
209
+ def __init__(self):
210
+ self.documents = []
211
+
212
+ def add_document(self, doc: DocumentNode):
213
+ self.documents.append(doc)
214
+
215
+ def __eq__(self, other):
216
+ if not isinstance(other, YamlStream):
217
+ return False
218
+ return self.documents == other.documents
219
+
220
+ def __repr__(self):
221
+ return f"<YamlStream documents={self.documents!r}>"
@@ -0,0 +1,350 @@
1
+ """
2
+ parser.py - YAML parser for Cayaml (Swarmauri's Canon YAML)
3
+
4
+ This minimal parser tokenizes YAML input and builds an AST (using node classes from ast_nodes.py).
5
+ It preserves basic metadata such as document markers and comments.
6
+ Advanced features (anchors, aliases, block styles, etc.) can be added by expanding these functions.
7
+
8
+ This module exposes two internal functions:
9
+ - _internal_load(yaml_str): Returns an AST representing the YAML input.
10
+ - _internal_to_ast(data): Converts plain Python data into an AST.
11
+ """
12
+
13
+ import math
14
+ from .ast_nodes import YamlStream, DocumentNode, MappingNode, SequenceNode, ScalarNode
15
+
16
+
17
+ def parse_scalar(value: str):
18
+ """
19
+ Convert a scalar string into int, float, bool, None, or leave as string.
20
+ This function also strips quotes if present.
21
+ """
22
+ value = value.strip()
23
+ # Remove quotes if present:
24
+ if (value.startswith('"') and value.endswith('"')) or (
25
+ value.startswith("'") and value.endswith("'")
26
+ ):
27
+ return value[1:-1]
28
+
29
+ lower = value.lower()
30
+ # Handle booleans
31
+ if lower == "true":
32
+ return True
33
+ if lower == "false":
34
+ return False
35
+
36
+ # Handle null
37
+ if lower in ("null", "~"):
38
+ return None
39
+
40
+ # Handle special float values
41
+ if lower in (".inf", "+.inf"):
42
+ return math.inf
43
+ if lower == "-.inf":
44
+ return -math.inf
45
+ if lower == ".nan":
46
+ return math.nan
47
+
48
+ # Try to parse int (base=0 helps with 0x, 0o, etc.)
49
+ try:
50
+ return int(value, 0)
51
+ except ValueError:
52
+ pass
53
+
54
+ # Try float
55
+ try:
56
+ return float(value)
57
+ except ValueError:
58
+ pass
59
+
60
+ return value
61
+
62
+
63
+ def _internal_parse_stream(yaml_str: str) -> YamlStream:
64
+ """
65
+ Tokenize and parse a YAML string, returning a YamlStream (which may have multiple DocumentNodes).
66
+ """
67
+ lines = yaml_str.splitlines()
68
+ return parse_stream(lines)
69
+
70
+
71
+ def _internal_load(yaml_str: str):
72
+ """
73
+ Parse a YAML string and return an AST.
74
+ If there is only one document, return that DocumentNode;
75
+ otherwise, return a YamlStream containing multiple DocumentNodes.
76
+ """
77
+ lines = yaml_str.splitlines()
78
+ stream = parse_stream(lines)
79
+ if len(stream.documents) == 1:
80
+ return stream.documents[0]
81
+ return stream
82
+
83
+
84
+ def _internal_to_ast(data):
85
+ """
86
+ Convert plain Python data (dict, list, or scalar) into our AST.
87
+ """
88
+ from .ast_nodes import MappingNode, SequenceNode, ScalarNode
89
+
90
+ if isinstance(data, dict):
91
+ node = MappingNode()
92
+ for key, value in data.items():
93
+ key_node = ScalarNode(key)
94
+ value_node = _internal_to_ast(value)
95
+ node.add_pair(key_node, value_node)
96
+ return node
97
+ elif isinstance(data, list):
98
+ node = SequenceNode()
99
+ for item in data:
100
+ node.add_item(_internal_to_ast(item))
101
+ return node
102
+ else:
103
+ return ScalarNode(data)
104
+
105
+
106
+ def parse_stream(lines: list) -> YamlStream:
107
+ """
108
+ Parse the entire YAML stream (which may contain multiple documents).
109
+ Returns a YamlStream object containing DocumentNode(s).
110
+ """
111
+ stream = YamlStream()
112
+ i = 0
113
+ n = len(lines)
114
+
115
+ while i < n:
116
+ # Skip any leading blank lines
117
+ while i < n and not lines[i].strip():
118
+ i += 1
119
+ if i >= n:
120
+ break
121
+
122
+ doc = DocumentNode()
123
+ line = lines[i].strip()
124
+
125
+ # Check if we see a doc start marker
126
+ if line.startswith("---"):
127
+ doc.has_doc_start = True
128
+ i += 1
129
+
130
+ # Collect lines for *this* document until we see '...' or '---'
131
+ doc_lines = []
132
+ while i < n:
133
+ curr = lines[i].rstrip("\n")
134
+ curr_strip = curr.strip()
135
+ if curr_strip.startswith("..."):
136
+ doc.has_doc_end = True
137
+ i += 1
138
+ break
139
+ if curr_strip.startswith("---"):
140
+ # Start of next doc
141
+ break
142
+ doc_lines.append(curr)
143
+ i += 1
144
+
145
+ # If we have lines for this document, parse them as a block
146
+ if doc_lines:
147
+ doc.root, _ = parse_block(doc_lines, indent=0)
148
+ stream.add_document(doc)
149
+
150
+ return stream
151
+
152
+
153
+ def parse_block(lines: list, indent: int):
154
+ """
155
+ Decide whether the block is a mapping or a sequence, then parse.
156
+ Returns (Node, remaining_lines).
157
+ """
158
+ # Skip blank or comment lines to see what's next
159
+ trimmed = skip_blank_and_comment(lines)
160
+ if not trimmed:
161
+ return None, []
162
+
163
+ first_line = trimmed[0].lstrip()
164
+ if first_line.startswith("-"):
165
+ return parse_sequence(lines, indent)
166
+ else:
167
+ return parse_mapping(lines, indent)
168
+
169
+
170
+ def parse_mapping(lines: list, indent: int):
171
+ """
172
+ Parse a block of lines as a mapping.
173
+ Returns (MappingNode, remaining_lines).
174
+ """
175
+ print("DEBUG parse_mapping lines:", repr(lines), "indent=", indent)
176
+ mapping = MappingNode()
177
+ i = 0
178
+ n = len(lines)
179
+
180
+ while i < n:
181
+ line = lines[i]
182
+ line_strip = line.strip()
183
+
184
+ # Check current indentation of this line
185
+ current_indent = len(line) - len(line.lstrip())
186
+
187
+ # If line is blank or has less indent, we break out of this mapping
188
+ if not line_strip or current_indent < indent:
189
+ break
190
+
191
+ # If it's a full-line comment at this level, store as leading comment
192
+ if line.lstrip().startswith("#"):
193
+ mapping.leading_comments.append(line_strip)
194
+ i += 1
195
+ continue
196
+
197
+ # If no colon, we presumably have reached a new block or item
198
+ if ":" not in line_strip:
199
+ break
200
+
201
+ # Split key : value
202
+ key_part, _, value_part = line_strip.partition(":")
203
+ key_node = ScalarNode(parse_scalar(key_part.strip()))
204
+
205
+ # Move to next line to see if there's nested content or block scalars
206
+ i += 1
207
+ raw_value = value_part.strip()
208
+
209
+ # -- Block scalar check (| or >) --
210
+ if raw_value in ("|", ">"):
211
+ style_char = raw_value # '|' or '>'
212
+ block_node = ScalarNode(None, style=style_char)
213
+ block_node.lines = []
214
+
215
+ # Determine the indentation level of the block content from the
216
+ # first line following the block indicator. YAML treats that
217
+ # indentation as significant for the entire block, so we capture it
218
+ # and strip exactly that amount from each subsequent line.
219
+
220
+ block_indent = None
221
+ while i < n:
222
+ next_line = lines[i]
223
+ next_line_indent = len(next_line) - len(next_line.lstrip())
224
+ if next_line_indent <= current_indent or not next_line.strip():
225
+ break
226
+ if block_indent is None:
227
+ block_indent = next_line_indent
228
+ if next_line_indent < block_indent:
229
+ break
230
+ block_node.lines.append(next_line[block_indent:])
231
+ i += 1
232
+
233
+ value_node = block_node
234
+
235
+ # If value part is empty => The actual value is on subsequent lines
236
+ elif not raw_value:
237
+ nested_lines = []
238
+ while i < n:
239
+ nl = lines[i]
240
+ nl_indent = len(nl) - len(nl.lstrip())
241
+ if nl_indent <= current_indent or not nl.strip():
242
+ break
243
+ nested_lines.append(nl)
244
+ i += 1
245
+
246
+ if nested_lines:
247
+ value_node, _ = parse_block(nested_lines, indent=current_indent + 1)
248
+ else:
249
+ value_node = ScalarNode("")
250
+ else:
251
+ # Normal scalar
252
+ value_node = ScalarNode(parse_scalar(raw_value))
253
+
254
+ mapping.add_pair(key_node, value_node)
255
+
256
+ remaining = lines[i:]
257
+ return mapping, remaining
258
+
259
+
260
+ def parse_sequence(lines: list, indent: int):
261
+ """
262
+ Parse a block of lines as a sequence.
263
+ Returns (SequenceNode, remaining_lines).
264
+ """
265
+ sequence = SequenceNode()
266
+ i = 0
267
+ n = len(lines)
268
+
269
+ while i < n:
270
+ line = lines[i]
271
+ line_strip = line.strip()
272
+ current_indent = len(line) - len(line.lstrip())
273
+
274
+ if not line_strip or current_indent < indent:
275
+ break
276
+
277
+ if line.lstrip().startswith("#"):
278
+ sequence.leading_comments.append(line_strip)
279
+ i += 1
280
+ continue
281
+
282
+ if not line_strip.startswith("-"):
283
+ break
284
+
285
+ # Remove leading dash
286
+ dash_part = line_strip[1:].strip() # everything after '-'
287
+ i += 1
288
+
289
+ # If dash_part is '|' or '>', we have a block scalar in a sequence item
290
+ if dash_part in ("|", ">"):
291
+ style_char = dash_part
292
+ block_node = ScalarNode(None, style=style_char)
293
+ block_node.lines = []
294
+
295
+ # As with mappings, determine the indentation for the block scalar
296
+ # from the first line that follows the indicator. Each subsequent
297
+ # line must be at least that indented; anything less signals the end
298
+ # of the block.
299
+
300
+ block_indent = None
301
+ while i < n:
302
+ nxt = lines[i]
303
+ nxt_indent = len(nxt) - len(nxt.lstrip())
304
+ if nxt_indent <= current_indent or not nxt.strip():
305
+ break
306
+ if block_indent is None:
307
+ block_indent = nxt_indent
308
+ if nxt_indent < block_indent:
309
+ break
310
+ block_node.lines.append(nxt[block_indent:])
311
+ i += 1
312
+
313
+ sequence.add_item(block_node)
314
+
315
+ elif not dash_part:
316
+ # Possibly nested structure
317
+ nested_lines = []
318
+ while i < n:
319
+ nested_line = lines[i]
320
+ nested_indent = len(nested_line) - len(nested_line.lstrip())
321
+ if nested_indent <= current_indent or not nested_line.strip():
322
+ break
323
+ nested_lines.append(nested_line)
324
+ i += 1
325
+
326
+ if nested_lines:
327
+ item_node, _ = parse_block(nested_lines, indent=current_indent + 1)
328
+ else:
329
+ item_node = ScalarNode("")
330
+ sequence.add_item(item_node)
331
+ else:
332
+ # Normal scalar or inline text after '-'
333
+ item_node = ScalarNode(parse_scalar(dash_part))
334
+ sequence.add_item(item_node)
335
+
336
+ remaining = lines[i:]
337
+ return sequence, remaining
338
+
339
+
340
+ def skip_blank_and_comment(lines: list):
341
+ """
342
+ Return the subset of lines starting with the first non-blank, non-comment line.
343
+ """
344
+ i = 0
345
+ while i < len(lines):
346
+ if not lines[i].strip() or lines[i].lstrip().startswith("#"):
347
+ i += 1
348
+ else:
349
+ break
350
+ return lines[i:]
@@ -0,0 +1,60 @@
1
+ """
2
+ plain_conversion.py - Helpers to convert Cayaml AST nodes to plain Python objects.
3
+
4
+ This module provides the `to_plain()` function which recursively traverses
5
+ the AST (returned by the round-trip loader) and converts each node into its plain
6
+ Python equivalent. For example:
7
+ - DocumentNode and MappingNode are converted to dictionaries.
8
+ - SequenceNode is converted to a list.
9
+ - ScalarNode is converted to its underlying value.
10
+ If the node is a YamlStream containing multiple documents, a list of plain objects is returned.
11
+ """
12
+
13
+ from .ast_nodes import DocumentNode, MappingNode, SequenceNode, ScalarNode, YamlStream
14
+
15
+
16
+ def to_plain(node):
17
+ """
18
+ Recursively convert the given AST node into plain Python objects.
19
+
20
+ Parameters:
21
+ node: An AST node (DocumentNode, MappingNode, SequenceNode, or ScalarNode)
22
+ or a YamlStream.
23
+
24
+ Returns:
25
+ The equivalent plain Python data structure (dict, list, scalar) for that node.
26
+ """
27
+ # If node is a YamlStream, return a list of plain objects, one per document.
28
+ if isinstance(node, YamlStream):
29
+ return [to_plain(doc) for doc in node.documents]
30
+
31
+ # If node is a DocumentNode, return the plain version of its root.
32
+ if isinstance(node, DocumentNode):
33
+ if node.root is not None:
34
+ return to_plain(node.root)
35
+ else:
36
+ return {}
37
+
38
+ # If node is a MappingNode, convert its pairs into a dictionary.
39
+ if isinstance(node, MappingNode):
40
+ result = {}
41
+ for key_node, value_node in node.pairs:
42
+ # Convert the key: if it's a ScalarNode, use its value; otherwise, convert recursively.
43
+ key = (
44
+ key_node.value
45
+ if isinstance(key_node, ScalarNode)
46
+ else to_plain(key_node)
47
+ )
48
+ result[key] = to_plain(value_node)
49
+ return result
50
+
51
+ # If node is a SequenceNode, convert each item recursively.
52
+ if isinstance(node, SequenceNode):
53
+ return [to_plain(item) for item in node.items]
54
+
55
+ # If node is a ScalarNode, return its underlying value.
56
+ if isinstance(node, ScalarNode):
57
+ return node.value
58
+
59
+ # Fallback: if the node is already a plain Python object or unknown type.
60
+ return node
@@ -0,0 +1,229 @@
1
+ """
2
+ unparser.py - YAML unparser for Cayaml (Swarmauri's Canon YAML)
3
+
4
+ This module traverses the AST (constructed using ast_nodes.py) and
5
+ reconstructs a YAML-formatted string.
6
+
7
+ It provides two internal dump functions:
8
+ - _internal_dump_plain(node, indent=0): Emits plain YAML (ignoring extra formatting metadata).
9
+ - _internal_dump_round_trip(node, indent=0): Emits YAML preserving document markers,
10
+ comments, anchors/tags, block styles (folded/literal), merge operators, and key order.
11
+
12
+ If the input to dumps() is a plain Python structure (list, dict, or scalar),
13
+ we convert it to an AST before emitting YAML.
14
+ """
15
+
16
+ from .ast_nodes import (
17
+ DocumentNode,
18
+ MappingNode,
19
+ SequenceNode,
20
+ ScalarNode,
21
+ Node,
22
+ )
23
+
24
+
25
+ # Helper for plain mode scalar conversion.
26
+ def _plain_scalar(node: ScalarNode) -> str:
27
+ val = node.value
28
+ if isinstance(val, bool):
29
+ return "true" if val else "false"
30
+ elif val is None:
31
+ return "null"
32
+ elif isinstance(val, (int, float)):
33
+ return str(val)
34
+ elif isinstance(val, str):
35
+ # Quote if needed.
36
+ if not val or any(c in val for c in [" ", ":", "-", "#"]):
37
+ return '"' + val.replace('"', '\\"') + '"'
38
+ return val
39
+ else:
40
+ return str(val)
41
+
42
+
43
+ # ====================
44
+ # Round-Trip Dumping
45
+ # ====================
46
+ def _internal_dump_round_trip(node: Node, indent: int = 0) -> str:
47
+ """Dump the AST node preserving formatting metadata."""
48
+ if isinstance(node, DocumentNode):
49
+ return unparse_document(node, indent)
50
+ elif isinstance(node, MappingNode):
51
+ return unparse_mapping(node, indent)
52
+ elif isinstance(node, SequenceNode):
53
+ return unparse_sequence(node, indent)
54
+ elif isinstance(node, ScalarNode):
55
+ return unparse_scalar(node, indent)
56
+ else:
57
+ return " " * indent + str(node)
58
+
59
+
60
+ def unparse_document(doc: DocumentNode, indent: int = 0) -> str:
61
+ """Unparse a DocumentNode into YAML text, preserving document markers and comments."""
62
+ lines = []
63
+ prefix = " " * indent
64
+ for comment in doc.leading_comments:
65
+ lines.append(prefix + comment)
66
+ if doc.has_doc_start:
67
+ lines.append(prefix + "---")
68
+ if doc.root is not None:
69
+ lines.append(unparse_node(doc.root, indent))
70
+ if doc.has_doc_end:
71
+ lines.append(prefix + "...")
72
+ for comment in doc.trailing_comments:
73
+ lines.append(prefix + comment)
74
+ return "\n".join(lines)
75
+
76
+
77
+ def unparse_node(node: Node, indent: int = 0) -> str:
78
+ """Dispatch unparse based on node type (round-trip mode)."""
79
+ if isinstance(node, MappingNode):
80
+ return unparse_mapping(node, indent)
81
+ elif isinstance(node, SequenceNode):
82
+ return unparse_sequence(node, indent)
83
+ elif isinstance(node, ScalarNode):
84
+ return unparse_scalar(node, indent)
85
+ else:
86
+ return " " * indent + str(node)
87
+
88
+
89
+ def unparse_mapping(mapping: MappingNode, indent: int = 0) -> str:
90
+ """Unparse a MappingNode with formatting metadata preserved."""
91
+ lines = []
92
+ prefix = " " * indent
93
+ for merge_node in mapping.merges:
94
+ line = prefix + "<<: " + unparse_node(merge_node, 0)
95
+ lines.append(line)
96
+ for key_node, value_node in mapping.pairs:
97
+ for comment in key_node.leading_comments:
98
+ lines.append(prefix + comment)
99
+ key_str = (
100
+ unparse_scalar(key_node, 0)
101
+ if isinstance(key_node, ScalarNode)
102
+ else unparse_node(key_node, 0)
103
+ )
104
+ if isinstance(value_node, (MappingNode, SequenceNode)):
105
+ line = prefix + f"{key_str}:"
106
+ if key_node.trailing_comments:
107
+ line += " " + " ".join(key_node.trailing_comments)
108
+ lines.append(line)
109
+ lines.append(unparse_node(value_node, indent + 2))
110
+ else:
111
+ value_str = unparse_node(value_node, 0)
112
+ line = prefix + f"{key_str}: {value_str}"
113
+ if key_node.trailing_comments:
114
+ line += " " + " ".join(key_node.trailing_comments)
115
+ lines.append(line)
116
+ return "\n".join(lines)
117
+
118
+
119
+ def unparse_sequence(seq: SequenceNode, indent: int = 0) -> str:
120
+ """Unparse a SequenceNode with formatting metadata preserved."""
121
+ lines = []
122
+ prefix = " " * indent
123
+ for item in seq.items:
124
+ if isinstance(item, (MappingNode, SequenceNode)):
125
+ lines.append(prefix + "-")
126
+ lines.append(unparse_node(item, indent + 2))
127
+ else:
128
+ item_str = unparse_node(item, 0)
129
+ lines.append(prefix + f"- {item_str}")
130
+ return "\n".join(lines)
131
+
132
+
133
+ def unparse_scalar(scalar: ScalarNode, indent: int = 0) -> str:
134
+ """Unparse a ScalarNode with formatting metadata preserved."""
135
+ prefix = " " * indent
136
+ if scalar.alias_of:
137
+ return prefix + "*" + scalar.alias_of
138
+ tag_part = f"{scalar.tag} " if scalar.tag else ""
139
+ anchor_part = f"&{scalar.anchor} " if scalar.anchor else ""
140
+ if scalar.style in ("|", ">"):
141
+ lines = [prefix + tag_part + anchor_part + scalar.style]
142
+ if scalar.lines:
143
+ for line in scalar.lines:
144
+ lines.append(" " * (indent + 2) + line)
145
+ else:
146
+ for line in str(scalar.value).splitlines():
147
+ lines.append(" " * (indent + 2) + line)
148
+ return "\n".join(lines)
149
+ else:
150
+ val = scalar.value
151
+ if isinstance(val, bool):
152
+ text = "true" if val else "false"
153
+ elif val is None:
154
+ text = "null"
155
+ elif isinstance(val, (int, float)):
156
+ text = str(val)
157
+ elif isinstance(val, str):
158
+ text = val
159
+ if not text or any(c in text for c in [" ", ":", "-", "#"]):
160
+ text = '"' + text.replace('"', '\\"') + '"'
161
+ else:
162
+ text = str(val)
163
+ return prefix + tag_part + anchor_part + text
164
+
165
+
166
+ # ==================
167
+ # Plain Dumping
168
+ # ==================
169
+ def _internal_dump_plain(node: Node, indent: int = 0) -> str:
170
+ """
171
+ Dump the AST node to plain YAML, ignoring extra formatting metadata.
172
+ Document markers, comments, and anchors are omitted.
173
+ """
174
+ if isinstance(node, DocumentNode):
175
+ return _internal_dump_plain(node.root, indent)
176
+ elif isinstance(node, MappingNode):
177
+ lines = []
178
+ prefix = " " * indent
179
+ for key_node, value_node in node.pairs:
180
+ key_str = (
181
+ _plain_scalar(key_node)
182
+ if isinstance(key_node, ScalarNode)
183
+ else _internal_dump_plain(key_node, 0)
184
+ )
185
+ if isinstance(value_node, (MappingNode, SequenceNode)):
186
+ lines.append(prefix + f"{key_str}:")
187
+ lines.append(_internal_dump_plain(value_node, indent + 2))
188
+ else:
189
+ value_str = (
190
+ _plain_scalar(value_node)
191
+ if isinstance(value_node, ScalarNode)
192
+ else _internal_dump_plain(value_node, 0)
193
+ )
194
+ lines.append(prefix + f"{key_str}: {value_str}")
195
+ return "\n".join(lines)
196
+ elif isinstance(node, SequenceNode):
197
+ lines = []
198
+ prefix = " " * indent
199
+ for item in node.items:
200
+ if isinstance(item, (MappingNode, SequenceNode)):
201
+ lines.append(prefix + "-")
202
+ lines.append(_internal_dump_plain(item, indent + 2))
203
+ else:
204
+ lines.append(
205
+ prefix
206
+ + f"- {_plain_scalar(item) if isinstance(item, ScalarNode) else _internal_dump_plain(item, 0)}"
207
+ )
208
+ return "\n".join(lines)
209
+ elif isinstance(node, ScalarNode):
210
+ return " " * indent + _plain_scalar(node)
211
+ else:
212
+ return " " * indent + str(node)
213
+
214
+
215
+ # The plain scalar conversion is similar to our helper in round-trip mode.
216
+ def _plain_scalar(node: ScalarNode) -> str:
217
+ val = node.value
218
+ if isinstance(val, bool):
219
+ return "true" if val else "false"
220
+ elif val is None:
221
+ return "null"
222
+ elif isinstance(val, (int, float)):
223
+ return str(val)
224
+ elif isinstance(val, str):
225
+ if not val or any(c in val for c in [" ", ":", "-", "#"]):
226
+ return '"' + val.replace('"', '\\"') + '"'
227
+ return val
228
+ else:
229
+ return str(val)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cayaml"
3
- version = "0.1.0.dev3"
3
+ version = "0.1.1.dev1"
4
4
  description = "Swarmauri's Canon Yaml Handler"
5
5
  authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
6
6
  license = "Apache-2.0"
@@ -1,168 +0,0 @@
1
- """
2
- Minimal YAML Parser and Unparser
3
-
4
- This module provides a very basic YAML parser and unparser that supports a subset
5
- of YAML syntax (mappings and sequences, with simple scalars). It uses only Python's
6
- built-in libraries and is intended for simple use cases.
7
-
8
- Usage:
9
- data = parse_yaml(yaml_string)
10
- yaml_string = unparse_yaml(data)
11
- """
12
-
13
-
14
- def get_indent(line):
15
- """Return the number of leading spaces in a line."""
16
- return len(line) - len(line.lstrip(" "))
17
-
18
-
19
- def parse_scalar(value):
20
- """Convert a scalar string into int, float, bool, None or leave as string."""
21
- # Remove quotes if present
22
- if (value.startswith('"') and value.endswith('"')) or (
23
- value.startswith("'") and value.endswith("'")
24
- ):
25
- return value[1:-1]
26
- lower = value.lower()
27
- if lower == "true":
28
- return True
29
- if lower == "false":
30
- return False
31
- if lower in ["null", "~"]:
32
- return None
33
- try:
34
- return int(value)
35
- except ValueError:
36
- pass
37
- try:
38
- return float(value)
39
- except ValueError:
40
- pass
41
- return value
42
-
43
-
44
- def parse_mapping(lines, indent):
45
- """Parse a block of lines representing a mapping."""
46
- mapping = {}
47
- while lines and get_indent(lines[0]) >= indent:
48
- if get_indent(lines[0]) != indent:
49
- break
50
- line = lines.pop(0)
51
- # Ignore comment lines
52
- if line.strip().startswith("#"):
53
- continue
54
- if ":" not in line:
55
- continue # Skip lines that do not look like key: value pairs
56
- key, _, value = line.strip().partition(":")
57
- key = key.strip()
58
- value = value.strip()
59
- if value == "":
60
- # If no inline value, check if a nested block follows.
61
- if lines and get_indent(lines[0]) > indent:
62
- value, _ = parse_block(lines, get_indent(lines[0]))
63
- else:
64
- value = None
65
- else:
66
- value = parse_scalar(value)
67
- mapping[key] = value
68
- return mapping
69
-
70
-
71
- def parse_list(lines, indent):
72
- """Parse a block of lines representing a list."""
73
- lst = []
74
- while (
75
- lines and get_indent(lines[0]) == indent and lines[0].lstrip().startswith("-")
76
- ):
77
- line = lines.pop(0)
78
- # Remove the dash marker and get the content.
79
- content = line.lstrip()[1:].strip()
80
- if content == "":
81
- # If nothing follows the dash, check for an indented block.
82
- if lines and get_indent(lines[0]) > indent:
83
- item, _ = parse_block(lines, get_indent(lines[0]))
84
- else:
85
- item = None
86
- else:
87
- item = parse_scalar(content)
88
- # If the next line is indented more, treat it as a nested block.
89
- if lines and get_indent(lines[0]) > indent:
90
- extra, _ = parse_block(lines, get_indent(lines[0]))
91
- # If the inline value is a mapping, merge the extra block.
92
- if isinstance(item, dict) and isinstance(extra, dict):
93
- item.update(extra)
94
- else:
95
- item = extra
96
- lst.append(item)
97
- return lst
98
-
99
-
100
- def parse_block(lines, indent):
101
- """Determine if the current block is a list or mapping and parse accordingly."""
102
- if not lines:
103
- return None, lines
104
- # If the current line starts with a dash, treat as a list; otherwise, a mapping.
105
- if lines[0].lstrip().startswith("-"):
106
- result = parse_list(lines, indent)
107
- else:
108
- result = parse_mapping(lines, indent)
109
- return result, lines
110
-
111
-
112
- def parse_yaml(yaml_str):
113
- """
114
- Parse a YAML string and return the corresponding Python data structure.
115
- Supports a minimal subset of YAML.
116
- """
117
- lines = yaml_str.splitlines()
118
- # Remove completely blank lines.
119
- lines = [line for line in lines if line.strip() != ""]
120
- result, _ = parse_block(lines, 0)
121
- return result
122
-
123
-
124
- def format_scalar(value):
125
- """Format a scalar value as a YAML string."""
126
- if value is None:
127
- return "null"
128
- if isinstance(value, bool):
129
- return "true" if value else "false"
130
- if isinstance(value, (int, float)):
131
- return str(value)
132
- if isinstance(value, str):
133
- # Quote the string if it contains spaces or special characters.
134
- if not value or any(c in value for c in [" ", ":", "-", "#"]):
135
- escaped = value.replace('"', '\\"')
136
- return f'"{escaped}"'
137
- return value
138
- return str(value)
139
-
140
-
141
- def unparse_yaml(data, indent=0):
142
- """
143
- Convert a Python data structure into a YAML-formatted string.
144
- Supports a minimal subset of YAML.
145
- """
146
- lines = []
147
- prefix = " " * indent
148
- if isinstance(data, dict):
149
- for key, value in data.items():
150
- if isinstance(value, (dict, list)):
151
- lines.append(f"{prefix}{key}:")
152
- lines.append(unparse_yaml(value, indent + 2))
153
- else:
154
- lines.append(f"{prefix}{key}: {format_scalar(value)}")
155
- elif isinstance(data, list):
156
- for item in data:
157
- if isinstance(item, (dict, list)):
158
- lines.append(f"{prefix}-")
159
- lines.append(unparse_yaml(item, indent + 2))
160
- else:
161
- lines.append(f"{prefix}- {format_scalar(item)}")
162
- else:
163
- lines.append(f"{prefix}{format_scalar(data)}")
164
- return "\n".join(lines)
165
-
166
-
167
- # Public API
168
- __all__ = ["parse_yaml", "unparse_yaml"]
File without changes