flow-toon-format 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toon_format/encoder.py ADDED
@@ -0,0 +1,56 @@
1
+ # Copyright (c) 2025 TOON Format Organization
2
+ # SPDX-License-Identifier: MIT
3
+ """Core TOON encoding functionality.
4
+
5
+ This module provides the main `encode()` function for converting Python values
6
+ to TOON format strings. Handles option resolution and coordinates the encoding
7
+ pipeline: normalization → encoding → writing.
8
+ """
9
+
10
+ from typing import Any, Optional
11
+
12
+ from .constants import DEFAULT_DELIMITER, DELIMITERS
13
+ from .encoders import encode_value
14
+ from .normalize import normalize_value
15
+ from .types import EncodeOptions, ResolvedEncodeOptions
16
+ from .writer import LineWriter
17
+
18
+
19
+ def encode(value: Any, options: Optional[EncodeOptions] = None) -> str:
20
+ """Encode a value into TOON format.
21
+
22
+ Args:
23
+ value: The value to encode (must be JSON-serializable)
24
+ options: Optional encoding options
25
+
26
+ Returns:
27
+ TOON-formatted string
28
+ """
29
+ normalized = normalize_value(value)
30
+ resolved_options = resolve_options(options)
31
+ writer = LineWriter(resolved_options.indent)
32
+ encode_value(normalized, resolved_options, writer, 0)
33
+ return writer.to_string()
34
+
35
+
36
+ def resolve_options(options: Optional[EncodeOptions]) -> ResolvedEncodeOptions:
37
+ """Resolve encoding options with defaults.
38
+
39
+ Args:
40
+ options: Optional user-provided options
41
+
42
+ Returns:
43
+ Resolved options with defaults applied
44
+ """
45
+ if options is None:
46
+ return ResolvedEncodeOptions()
47
+
48
+ indent = options.get("indent", 2)
49
+ delimiter = options.get("delimiter", DEFAULT_DELIMITER)
50
+ length_marker = options.get("lengthMarker", False)
51
+
52
+ # Resolve delimiter if it's a key
53
+ if delimiter in DELIMITERS:
54
+ delimiter = DELIMITERS[delimiter]
55
+
56
+ return ResolvedEncodeOptions(indent=indent, delimiter=delimiter, length_marker=length_marker)
@@ -0,0 +1,456 @@
1
+ # Copyright (c) 2025 TOON Format Organization
2
+ # SPDX-License-Identifier: MIT
3
+ """Type-specific encoders for TOON format.
4
+
5
+ Provides encoding functions for different value types: objects, arrays (primitive,
6
+ tabular, and list formats), and primitives. Includes format detection logic to
7
+ determine the most efficient TOON representation for arrays.
8
+ """
9
+
10
+ from typing import List, Optional, cast
11
+
12
+ from .constants import LIST_ITEM_PREFIX
13
+ from .normalize import (
14
+ is_array_of_arrays,
15
+ is_array_of_objects,
16
+ is_array_of_primitives,
17
+ is_json_array,
18
+ is_json_object,
19
+ is_json_primitive,
20
+ )
21
+ from .primitives import encode_key, encode_primitive, format_header, join_encoded_values
22
+ from .types import (
23
+ Depth,
24
+ JsonArray,
25
+ JsonObject,
26
+ JsonPrimitive,
27
+ JsonValue,
28
+ ResolvedEncodeOptions,
29
+ )
30
+ from .writer import LineWriter
31
+
32
+
33
+ def encode_value(
34
+ value: JsonValue,
35
+ options: ResolvedEncodeOptions,
36
+ writer: LineWriter,
37
+ depth: Depth = 0,
38
+ ) -> None:
39
+ """Encode a value to TOON format.
40
+
41
+ Args:
42
+ value: Normalized JSON value
43
+ options: Resolved encoding options
44
+ writer: Line writer for output
45
+ depth: Current indentation depth
46
+ """
47
+ if is_json_primitive(value):
48
+ writer.push(depth, encode_primitive(cast(JsonPrimitive, value), options.delimiter))
49
+ elif is_json_array(value):
50
+ encode_array(cast(JsonArray, value), options, writer, depth, None)
51
+ elif is_json_object(value):
52
+ encode_object(cast(JsonObject, value), options, writer, depth, None)
53
+
54
+
55
+ def encode_object(
56
+ obj: JsonObject,
57
+ options: ResolvedEncodeOptions,
58
+ writer: LineWriter,
59
+ depth: Depth,
60
+ key: Optional[str],
61
+ ) -> None:
62
+ """Encode an object to TOON format.
63
+
64
+ Args:
65
+ obj: Dictionary object
66
+ options: Resolved encoding options
67
+ writer: Line writer for output
68
+ depth: Current indentation depth
69
+ key: Optional key name
70
+ """
71
+ if key:
72
+ writer.push(depth, f"{encode_key(key)}:")
73
+
74
+ for obj_key, obj_value in obj.items():
75
+ encode_key_value_pair(obj_key, obj_value, options, writer, depth if not key else depth + 1)
76
+
77
+
78
+ def encode_key_value_pair(
79
+ key: str,
80
+ value: JsonValue,
81
+ options: ResolvedEncodeOptions,
82
+ writer: LineWriter,
83
+ depth: Depth,
84
+ ) -> None:
85
+ """Encode a key-value pair.
86
+
87
+ Args:
88
+ key: Key name
89
+ value: Value to encode
90
+ options: Resolved encoding options
91
+ writer: Line writer for output
92
+ depth: Current indentation depth
93
+ """
94
+ if is_json_primitive(value):
95
+ primitive_str = encode_primitive(cast(JsonPrimitive, value), options.delimiter)
96
+ writer.push(depth, f"{encode_key(key)}: {primitive_str}")
97
+ elif is_json_array(value):
98
+ encode_array(cast(JsonArray, value), options, writer, depth, key)
99
+ elif is_json_object(value):
100
+ encode_object(cast(JsonObject, value), options, writer, depth, key)
101
+
102
+
103
+ def encode_array(
104
+ arr: JsonArray,
105
+ options: ResolvedEncodeOptions,
106
+ writer: LineWriter,
107
+ depth: Depth,
108
+ key: Optional[str],
109
+ ) -> None:
110
+ """Encode an array to TOON format.
111
+
112
+ Args:
113
+ arr: List array
114
+ options: Resolved encoding options
115
+ writer: Line writer for output
116
+ depth: Current indentation depth
117
+ key: Optional key name
118
+ """
119
+ # Handle empty array
120
+ if not arr:
121
+ header = format_header(key, 0, None, options.delimiter, options.lengthMarker)
122
+ writer.push(depth, header)
123
+ return
124
+
125
+ # Check array type and encode accordingly
126
+ if is_array_of_primitives(arr):
127
+ encode_inline_primitive_array(arr, options, writer, depth, key)
128
+ elif is_array_of_arrays(arr):
129
+ encode_array_of_arrays(arr, options, writer, depth, key)
130
+ elif is_array_of_objects(arr):
131
+ tabular_header = detect_tabular_header(arr, options.delimiter)
132
+ if tabular_header:
133
+ encode_array_of_objects_as_tabular(arr, tabular_header, options, writer, depth, key)
134
+ else:
135
+ encode_mixed_array_as_list_items(arr, options, writer, depth, key)
136
+ else:
137
+ encode_mixed_array_as_list_items(arr, options, writer, depth, key)
138
+
139
+
140
+ def encode_array_content(
141
+ arr: JsonArray,
142
+ options: ResolvedEncodeOptions,
143
+ writer: LineWriter,
144
+ depth: Depth,
145
+ ) -> None:
146
+ """Encode array content without header (header already written).
147
+
148
+ Args:
149
+ arr: Array to encode
150
+ options: Resolved encoding options
151
+ writer: Line writer for output
152
+ depth: Current indentation depth for array items
153
+ """
154
+ # Handle empty array
155
+ if not arr:
156
+ return
157
+
158
+ # Check array type and encode accordingly
159
+ if is_array_of_primitives(arr):
160
+ # Inline primitive array - write values on same line as header
161
+ # But header was already written, so we need to append to last line
162
+ # Actually, we can't modify the last line, so this won't work for inline arrays
163
+ # For now, encode inline arrays separately
164
+ encoded_values = [encode_primitive(item, options.delimiter) for item in arr]
165
+ joined = join_encoded_values(encoded_values, options.delimiter)
166
+ # Get the last line and append to it
167
+ # This is tricky - we need to modify the writer to support this
168
+ # For now, let's just write at current depth
169
+ # Actually, looking at the expected output, inline arrays should have their content
170
+ # on the same line as the header. But we already wrote the header.
171
+ # The solution is to NOT use this function for inline primitive arrays
172
+ # Instead, we should write them completely inline
173
+ pass # Handled differently
174
+ elif is_array_of_arrays(arr):
175
+ for item in arr:
176
+ if is_array_of_primitives(item):
177
+ encoded_values = [encode_primitive(v, options.delimiter) for v in item]
178
+ joined = join_encoded_values(encoded_values, options.delimiter)
179
+ item_header = format_header(
180
+ None, len(item), None, options.delimiter, options.lengthMarker
181
+ )
182
+ line = f"{LIST_ITEM_PREFIX}{item_header}"
183
+ if joined:
184
+ line += f" {joined}"
185
+ writer.push(depth, line)
186
+ else:
187
+ encode_array(item, options, writer, depth, None)
188
+ elif is_array_of_objects(arr):
189
+ tabular_header = detect_tabular_header(arr, options.delimiter)
190
+ if tabular_header:
191
+ # Tabular format
192
+ for obj in arr:
193
+ row_values = [
194
+ encode_primitive(obj[field], options.delimiter) for field in tabular_header
195
+ ]
196
+ row = join_encoded_values(row_values, options.delimiter)
197
+ writer.push(depth, row)
198
+ else:
199
+ # List format
200
+ for item in arr:
201
+ encode_object_as_list_item(item, options, writer, depth)
202
+ else:
203
+ # Mixed array
204
+ for item in arr:
205
+ if is_json_primitive(item):
206
+ writer.push(
207
+ depth,
208
+ f"{LIST_ITEM_PREFIX}{encode_primitive(item, options.delimiter)}",
209
+ )
210
+ elif is_json_object(item):
211
+ encode_object_as_list_item(item, options, writer, depth)
212
+ elif is_json_array(item):
213
+ encode_array(item, options, writer, depth, None)
214
+
215
+
216
+ def encode_inline_primitive_array(
217
+ arr: JsonArray,
218
+ options: ResolvedEncodeOptions,
219
+ writer: LineWriter,
220
+ depth: Depth,
221
+ key: Optional[str],
222
+ ) -> None:
223
+ """Encode an array of primitives inline.
224
+
225
+ Args:
226
+ arr: Array of primitives
227
+ options: Resolved encoding options
228
+ writer: Line writer for output
229
+ depth: Current indentation depth
230
+ key: Optional key name
231
+ """
232
+ encoded_values = [encode_primitive(item, options.delimiter) for item in arr]
233
+ joined = join_encoded_values(encoded_values, options.delimiter)
234
+ header = format_header(key, len(arr), None, options.delimiter, options.lengthMarker)
235
+ writer.push(depth, f"{header} {joined}")
236
+
237
+
238
+ def encode_array_of_arrays(
239
+ arr: JsonArray,
240
+ options: ResolvedEncodeOptions,
241
+ writer: LineWriter,
242
+ depth: Depth,
243
+ key: Optional[str],
244
+ ) -> None:
245
+ """Encode an array of arrays.
246
+
247
+ Args:
248
+ arr: Array of arrays
249
+ options: Resolved encoding options
250
+ writer: Line writer for output
251
+ depth: Current indentation depth
252
+ key: Optional key name
253
+ """
254
+ header = format_header(key, len(arr), None, options.delimiter, options.lengthMarker)
255
+ writer.push(depth, header)
256
+
257
+ for item in arr:
258
+ if is_array_of_primitives(item):
259
+ encoded_values = [encode_primitive(v, options.delimiter) for v in item]
260
+ joined = join_encoded_values(encoded_values, options.delimiter)
261
+ # Use format_header for correct delimiter handling
262
+ item_header = format_header(
263
+ None, len(item), None, options.delimiter, options.lengthMarker
264
+ )
265
+ # Only add space and content if array is not empty
266
+ line = f"{LIST_ITEM_PREFIX}{item_header}"
267
+ if joined:
268
+ line += f" {joined}"
269
+ writer.push(depth + 1, line)
270
+ else:
271
+ encode_array(item, options, writer, depth + 1, None)
272
+
273
+
274
+ def detect_tabular_header(arr: List[JsonObject], delimiter: str) -> Optional[List[str]]:
275
+ """Detect if array can use tabular format and return header keys.
276
+
277
+ Args:
278
+ arr: Array of objects
279
+ delimiter: Delimiter character
280
+
281
+ Returns:
282
+ List of keys if tabular, None otherwise
283
+ """
284
+ if not arr:
285
+ return None
286
+
287
+ # Get keys from first object
288
+ first_keys = list(arr[0].keys())
289
+ first_keys_set = set(first_keys)
290
+
291
+ # Check all objects have same keys (regardless of order) and all values are primitives
292
+ for obj in arr:
293
+ if set(obj.keys()) != first_keys_set:
294
+ return None
295
+ if not all(is_json_primitive(value) for value in obj.values()):
296
+ return None
297
+
298
+ return first_keys
299
+
300
+
301
+ def is_tabular_array(arr: List[JsonObject], delimiter: str) -> bool:
302
+ """Check if array qualifies for tabular format.
303
+
304
+ Args:
305
+ arr: Array to check
306
+ delimiter: Delimiter character
307
+
308
+ Returns:
309
+ True if tabular format can be used
310
+ """
311
+ return detect_tabular_header(arr, delimiter) is not None
312
+
313
+
314
+ def encode_array_of_objects_as_tabular(
315
+ arr: List[JsonObject],
316
+ fields: List[str],
317
+ options: ResolvedEncodeOptions,
318
+ writer: LineWriter,
319
+ depth: Depth,
320
+ key: Optional[str],
321
+ ) -> None:
322
+ """Encode array of uniform objects in tabular format.
323
+
324
+ Args:
325
+ arr: Array of uniform objects
326
+ fields: Field names for header
327
+ options: Resolved encoding options
328
+ writer: Line writer for output
329
+ depth: Current indentation depth
330
+ key: Optional key name
331
+ """
332
+ header = format_header(key, len(arr), fields, options.delimiter, options.lengthMarker)
333
+ writer.push(depth, header)
334
+
335
+ for obj in arr:
336
+ row_values = [encode_primitive(obj[field], options.delimiter) for field in fields]
337
+ row = join_encoded_values(row_values, options.delimiter)
338
+ writer.push(depth + 1, row)
339
+
340
+
341
+ def encode_mixed_array_as_list_items(
342
+ arr: JsonArray,
343
+ options: ResolvedEncodeOptions,
344
+ writer: LineWriter,
345
+ depth: Depth,
346
+ key: Optional[str],
347
+ ) -> None:
348
+ """Encode mixed array as list items.
349
+
350
+ Args:
351
+ arr: Mixed array
352
+ options: Resolved encoding options
353
+ writer: Line writer for output
354
+ depth: Current indentation depth
355
+ key: Optional key name
356
+ """
357
+ header = format_header(key, len(arr), None, options.delimiter, options.lengthMarker)
358
+ writer.push(depth, header)
359
+
360
+ for item in arr:
361
+ if is_json_primitive(item):
362
+ writer.push(
363
+ depth + 1,
364
+ f"{LIST_ITEM_PREFIX}{encode_primitive(item, options.delimiter)}",
365
+ )
366
+ elif is_json_object(item):
367
+ encode_object_as_list_item(item, options, writer, depth + 1)
368
+ elif is_json_array(item):
369
+ # Arrays as list items need the "- " prefix with their header
370
+ item_arr = cast(JsonArray, item)
371
+ if is_array_of_primitives(item_arr):
372
+ # Inline primitive array: "- [N]: values"
373
+ encoded_values = [encode_primitive(v, options.delimiter) for v in item_arr]
374
+ joined = join_encoded_values(encoded_values, options.delimiter)
375
+ header = format_header(
376
+ None, len(item_arr), None, options.delimiter, options.lengthMarker
377
+ )
378
+ line = f"{LIST_ITEM_PREFIX}{header}"
379
+ if joined:
380
+ line += f" {joined}"
381
+ writer.push(depth + 1, line)
382
+ else:
383
+ # Non-inline array: "- [N]:" header, then content at depth + 2
384
+ tabular_fields = None
385
+ if is_array_of_objects(item_arr):
386
+ tabular_fields = detect_tabular_header(item_arr, options.delimiter)
387
+ header = format_header(
388
+ None,
389
+ len(item_arr),
390
+ tabular_fields,
391
+ options.delimiter,
392
+ options.lengthMarker,
393
+ )
394
+ writer.push(depth + 1, f"{LIST_ITEM_PREFIX}{header}")
395
+ encode_array_content(item_arr, options, writer, depth + 2)
396
+
397
+
398
+ def encode_object_as_list_item(
399
+ obj: JsonObject, options: ResolvedEncodeOptions, writer: LineWriter, depth: Depth
400
+ ) -> None:
401
+ """Encode object as a list item.
402
+
403
+ Args:
404
+ obj: Object to encode
405
+ options: Resolved encoding options
406
+ writer: Line writer for output
407
+ depth: Current indentation depth
408
+ """
409
+ # Get all keys
410
+ keys = list(obj.items())
411
+ if not keys:
412
+ writer.push(depth, LIST_ITEM_PREFIX.rstrip())
413
+ return
414
+
415
+ # First key-value pair goes on same line as the "-"
416
+ first_key, first_value = keys[0]
417
+ if is_json_primitive(first_value):
418
+ encoded_val = encode_primitive(first_value, options.delimiter)
419
+ writer.push(depth, f"{LIST_ITEM_PREFIX}{encode_key(first_key)}: {encoded_val}")
420
+ elif is_json_array(first_value):
421
+ # Arrays go on the same line as "-" with their header
422
+ first_arr = cast(JsonArray, first_value)
423
+ if is_array_of_primitives(first_arr):
424
+ # Inline primitive array: write header and content on same line
425
+ encoded_values = [encode_primitive(item, options.delimiter) for item in first_arr]
426
+ joined = join_encoded_values(encoded_values, options.delimiter)
427
+ header = format_header(
428
+ first_key, len(first_arr), None, options.delimiter, options.lengthMarker
429
+ )
430
+ line = f"{LIST_ITEM_PREFIX}{header}"
431
+ if joined:
432
+ line += f" {joined}"
433
+ writer.push(depth, line)
434
+ else:
435
+ # Non-inline array: write header on hyphen line, content below
436
+ tabular_fields = None
437
+ if is_array_of_objects(first_arr):
438
+ tabular_fields = detect_tabular_header(first_arr, options.delimiter)
439
+ header = format_header(
440
+ first_key,
441
+ len(first_arr),
442
+ tabular_fields,
443
+ options.delimiter,
444
+ options.lengthMarker,
445
+ )
446
+ writer.push(depth, f"{LIST_ITEM_PREFIX}{header}")
447
+ # Now encode the array content at depth + 1
448
+ encode_array_content(first_arr, options, writer, depth + 1)
449
+ else:
450
+ # If first value is an object, put "-" alone then encode normally
451
+ writer.push(depth, LIST_ITEM_PREFIX.rstrip())
452
+ encode_key_value_pair(first_key, first_value, options, writer, depth + 1)
453
+
454
+ # Rest of the keys go normally indented
455
+ for key, value in keys[1:]:
456
+ encode_key_value_pair(key, value, options, writer, depth + 1)
@@ -0,0 +1,92 @@
1
+ # Copyright (c) 2025 TOON Format Organization
2
+ # SPDX-License-Identifier: MIT
3
+ """Centralized logging configuration for toon_format.
4
+
5
+ This module provides consistent logging infrastructure across all toon_format
6
+ modules with support for the TOON_FORMAT_DEBUG environment variable for
7
+ enabling debug-level logging.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ from functools import lru_cache
13
+ from typing import Optional
14
+
15
+ # Constants
16
+ TOON_FORMAT_DEBUG_ENV_VAR = "TOON_FORMAT_DEBUG"
17
+ DEFAULT_LOG_LEVEL = logging.WARNING
18
+ DEBUG_LOG_LEVEL = logging.DEBUG
19
+
20
+
21
+ @lru_cache(maxsize=1)
22
+ def is_debug_enabled() -> bool:
23
+ """Check if TOON_FORMAT_DEBUG environment variable is set to truthy value.
24
+
25
+ Accepts: "1", "true", "True", "TRUE", "yes", "Yes", "YES"
26
+
27
+ Returns:
28
+ bool: True if debug mode is enabled, False otherwise.
29
+
30
+ Note:
31
+ Result is cached for performance.
32
+ """
33
+ value = os.environ.get(TOON_FORMAT_DEBUG_ENV_VAR, "").lower()
34
+ return value in ("1", "true", "yes")
35
+
36
+
37
+ def get_logger(name: str) -> logging.Logger:
38
+ """Create or retrieve logger for given module name.
39
+
40
+ Configures logger with appropriate level based on environment variable
41
+ and adds a StreamHandler with consistent formatting.
42
+
43
+ Args:
44
+ name: Module name (typically __name__).
45
+
46
+ Returns:
47
+ logging.Logger: Configured logger instance.
48
+
49
+ Examples:
50
+ >>> logger = get_logger(__name__)
51
+ >>> logger.debug("Debug message") # Only shown if TOON_FORMAT_DEBUG=1
52
+ """
53
+ logger = logging.getLogger(name)
54
+
55
+ # Set log level based on debug mode
56
+ level = DEBUG_LOG_LEVEL if is_debug_enabled() else DEFAULT_LOG_LEVEL
57
+ logger.setLevel(level)
58
+
59
+ # Add StreamHandler if not already present
60
+ if not logger.handlers:
61
+ handler = logging.StreamHandler()
62
+ handler.setLevel(level)
63
+ formatter = logging.Formatter("[%(name)s] %(levelname)s: %(message)s")
64
+ handler.setFormatter(formatter)
65
+ logger.addHandler(handler)
66
+
67
+ return logger
68
+
69
+
70
+ def configure_logging(level: Optional[int] = None) -> None:
71
+ """Configure log level programmatically for all toon_format loggers.
72
+
73
+ Useful for testing and programmatic control of logging.
74
+
75
+ Args:
76
+ level: Log level (e.g., logging.DEBUG, logging.INFO).
77
+ If None, uses environment variable or default.
78
+
79
+ Examples:
80
+ >>> configure_logging(logging.DEBUG) # Enable debug logging
81
+ >>> configure_logging(logging.WARNING) # Reset to default
82
+ """
83
+ if level is None:
84
+ level = DEBUG_LOG_LEVEL if is_debug_enabled() else DEFAULT_LOG_LEVEL
85
+
86
+ # Update all existing toon_format loggers
87
+ for name in list(logging.Logger.manager.loggerDict.keys()):
88
+ if name.startswith("toon_format"):
89
+ logger = logging.getLogger(name)
90
+ logger.setLevel(level)
91
+ for handler in logger.handlers:
92
+ handler.setLevel(level)