exonware-xwsystem 0.0.1.410__py3-none-any.whl → 0.0.1.411__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +1 -1
- exonware/conf.py +1 -1
- exonware/xwsystem/__init__.py +2 -2
- exonware/xwsystem/caching/__init__.py +1 -1
- exonware/xwsystem/caching/base.py +2 -2
- exonware/xwsystem/caching/bloom_cache.py +2 -2
- exonware/xwsystem/caching/cache_manager.py +1 -1
- exonware/xwsystem/caching/conditional.py +2 -2
- exonware/xwsystem/caching/contracts.py +1 -1
- exonware/xwsystem/caching/decorators.py +2 -2
- exonware/xwsystem/caching/defs.py +1 -1
- exonware/xwsystem/caching/disk_cache.py +1 -1
- exonware/xwsystem/caching/distributed.py +1 -1
- exonware/xwsystem/caching/errors.py +1 -1
- exonware/xwsystem/caching/events.py +2 -2
- exonware/xwsystem/caching/eviction_strategies.py +1 -1
- exonware/xwsystem/caching/fluent.py +1 -1
- exonware/xwsystem/caching/integrity.py +1 -1
- exonware/xwsystem/caching/lfu_cache.py +2 -2
- exonware/xwsystem/caching/lfu_optimized.py +3 -3
- exonware/xwsystem/caching/lru_cache.py +2 -2
- exonware/xwsystem/caching/memory_bounded.py +2 -2
- exonware/xwsystem/caching/metrics_exporter.py +2 -2
- exonware/xwsystem/caching/observable_cache.py +1 -1
- exonware/xwsystem/caching/pluggable_cache.py +2 -2
- exonware/xwsystem/caching/rate_limiter.py +1 -1
- exonware/xwsystem/caching/read_through.py +2 -2
- exonware/xwsystem/caching/secure_cache.py +1 -1
- exonware/xwsystem/caching/serializable.py +2 -2
- exonware/xwsystem/caching/stats.py +1 -1
- exonware/xwsystem/caching/tagging.py +2 -2
- exonware/xwsystem/caching/ttl_cache.py +1 -1
- exonware/xwsystem/caching/two_tier_cache.py +1 -1
- exonware/xwsystem/caching/utils.py +1 -1
- exonware/xwsystem/caching/validation.py +1 -1
- exonware/xwsystem/caching/warming.py +2 -2
- exonware/xwsystem/caching/write_behind.py +2 -2
- exonware/xwsystem/cli/__init__.py +1 -1
- exonware/xwsystem/cli/args.py +1 -1
- exonware/xwsystem/cli/base.py +1 -1
- exonware/xwsystem/cli/colors.py +1 -1
- exonware/xwsystem/cli/console.py +1 -1
- exonware/xwsystem/cli/contracts.py +1 -1
- exonware/xwsystem/cli/defs.py +1 -1
- exonware/xwsystem/cli/errors.py +1 -1
- exonware/xwsystem/cli/progress.py +1 -1
- exonware/xwsystem/cli/prompts.py +1 -1
- exonware/xwsystem/cli/tables.py +1 -1
- exonware/xwsystem/config/__init__.py +1 -1
- exonware/xwsystem/config/base.py +2 -2
- exonware/xwsystem/config/contracts.py +1 -1
- exonware/xwsystem/config/defaults.py +1 -1
- exonware/xwsystem/config/defs.py +1 -1
- exonware/xwsystem/config/errors.py +2 -2
- exonware/xwsystem/config/logging.py +1 -1
- exonware/xwsystem/config/logging_setup.py +2 -2
- exonware/xwsystem/config/performance.py +1 -1
- exonware/xwsystem/http_client/__init__.py +1 -1
- exonware/xwsystem/http_client/advanced_client.py +2 -2
- exonware/xwsystem/http_client/base.py +2 -2
- exonware/xwsystem/http_client/client.py +2 -2
- exonware/xwsystem/http_client/contracts.py +1 -1
- exonware/xwsystem/http_client/defs.py +1 -1
- exonware/xwsystem/http_client/errors.py +2 -2
- exonware/xwsystem/io/__init__.py +1 -1
- exonware/xwsystem/io/archive/__init__.py +1 -1
- exonware/xwsystem/io/archive/archive.py +1 -1
- exonware/xwsystem/io/archive/archive_files.py +1 -1
- exonware/xwsystem/io/archive/archivers.py +2 -2
- exonware/xwsystem/io/archive/base.py +6 -6
- exonware/xwsystem/io/archive/codec_integration.py +1 -1
- exonware/xwsystem/io/archive/compression.py +1 -1
- exonware/xwsystem/io/archive/formats/__init__.py +1 -1
- exonware/xwsystem/io/archive/formats/brotli_format.py +6 -3
- exonware/xwsystem/io/archive/formats/lz4_format.py +6 -3
- exonware/xwsystem/io/archive/formats/rar.py +6 -3
- exonware/xwsystem/io/archive/formats/sevenzip.py +6 -3
- exonware/xwsystem/io/archive/formats/squashfs_format.py +1 -1
- exonware/xwsystem/io/archive/formats/tar.py +1 -1
- exonware/xwsystem/io/archive/formats/wim_format.py +6 -3
- exonware/xwsystem/io/archive/formats/zip.py +1 -1
- exonware/xwsystem/io/archive/formats/zpaq_format.py +1 -1
- exonware/xwsystem/io/archive/formats/zstandard.py +6 -3
- exonware/xwsystem/io/base.py +1 -1
- exonware/xwsystem/io/codec/__init__.py +1 -1
- exonware/xwsystem/io/codec/base.py +6 -6
- exonware/xwsystem/io/codec/contracts.py +1 -1
- exonware/xwsystem/io/codec/registry.py +5 -5
- exonware/xwsystem/io/common/__init__.py +1 -1
- exonware/xwsystem/io/common/base.py +1 -1
- exonware/xwsystem/io/common/lock.py +1 -1
- exonware/xwsystem/io/common/watcher.py +1 -1
- exonware/xwsystem/io/contracts.py +1 -1
- exonware/xwsystem/io/data_operations.py +480 -0
- exonware/xwsystem/io/defs.py +1 -1
- exonware/xwsystem/io/errors.py +1 -1
- exonware/xwsystem/io/facade.py +2 -2
- exonware/xwsystem/io/file/__init__.py +1 -1
- exonware/xwsystem/io/file/base.py +1 -1
- exonware/xwsystem/io/file/conversion.py +1 -1
- exonware/xwsystem/io/file/file.py +8 -6
- exonware/xwsystem/io/file/paged_source.py +8 -1
- exonware/xwsystem/io/file/paging/__init__.py +1 -1
- exonware/xwsystem/io/file/paging/byte_paging.py +1 -1
- exonware/xwsystem/io/file/paging/line_paging.py +1 -1
- exonware/xwsystem/io/file/paging/record_paging.py +1 -1
- exonware/xwsystem/io/file/paging/registry.py +4 -4
- exonware/xwsystem/io/file/source.py +20 -9
- exonware/xwsystem/io/filesystem/__init__.py +1 -1
- exonware/xwsystem/io/filesystem/base.py +1 -1
- exonware/xwsystem/io/filesystem/local.py +9 -1
- exonware/xwsystem/io/folder/__init__.py +1 -1
- exonware/xwsystem/io/folder/base.py +1 -1
- exonware/xwsystem/io/folder/folder.py +2 -2
- exonware/xwsystem/io/serialization/__init__.py +1 -1
- exonware/xwsystem/io/serialization/auto_serializer.py +52 -39
- exonware/xwsystem/io/serialization/base.py +165 -1
- exonware/xwsystem/io/serialization/contracts.py +88 -1
- exonware/xwsystem/io/serialization/defs.py +1 -1
- exonware/xwsystem/io/serialization/errors.py +1 -1
- exonware/xwsystem/io/serialization/flyweight.py +10 -10
- exonware/xwsystem/io/serialization/format_detector.py +8 -5
- exonware/xwsystem/io/serialization/formats/__init__.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/bson.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/cbor.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/marshal.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/msgpack.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/pickle.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/plistlib.py +1 -1
- exonware/xwsystem/io/serialization/formats/database/dbm.py +53 -1
- exonware/xwsystem/io/serialization/formats/database/shelve.py +48 -1
- exonware/xwsystem/io/serialization/formats/database/sqlite3.py +85 -1
- exonware/xwsystem/io/serialization/formats/text/configparser.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/csv.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/formdata.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/json.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/json5.py +7 -5
- exonware/xwsystem/io/serialization/formats/text/jsonlines.py +229 -19
- exonware/xwsystem/io/serialization/formats/text/multipart.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/toml.py +19 -3
- exonware/xwsystem/io/serialization/formats/text/xml.py +8 -1
- exonware/xwsystem/io/serialization/formats/text/yaml.py +52 -2
- exonware/xwsystem/io/serialization/registry.py +1 -1
- exonware/xwsystem/io/serialization/serializer.py +175 -3
- exonware/xwsystem/io/serialization/utils/__init__.py +1 -1
- exonware/xwsystem/io/serialization/utils/path_ops.py +1 -1
- exonware/xwsystem/io/stream/__init__.py +1 -1
- exonware/xwsystem/io/stream/async_operations.py +1 -1
- exonware/xwsystem/io/stream/base.py +1 -1
- exonware/xwsystem/io/stream/codec_io.py +1 -1
- exonware/xwsystem/ipc/async_fabric.py +1 -2
- exonware/xwsystem/ipc/base.py +2 -2
- exonware/xwsystem/ipc/contracts.py +2 -2
- exonware/xwsystem/ipc/defs.py +1 -1
- exonware/xwsystem/ipc/errors.py +2 -2
- exonware/xwsystem/ipc/pipes.py +2 -2
- exonware/xwsystem/ipc/shared_memory.py +2 -2
- exonware/xwsystem/monitoring/base.py +2 -2
- exonware/xwsystem/monitoring/contracts.py +1 -1
- exonware/xwsystem/monitoring/defs.py +1 -1
- exonware/xwsystem/monitoring/error_recovery.py +2 -2
- exonware/xwsystem/monitoring/errors.py +2 -2
- exonware/xwsystem/monitoring/memory_monitor.py +1 -1
- exonware/xwsystem/monitoring/performance_manager_generic.py +2 -2
- exonware/xwsystem/monitoring/performance_validator.py +1 -1
- exonware/xwsystem/monitoring/system_monitor.py +2 -2
- exonware/xwsystem/monitoring/tracing.py +2 -2
- exonware/xwsystem/monitoring/tracker.py +1 -1
- exonware/xwsystem/operations/__init__.py +1 -1
- exonware/xwsystem/operations/base.py +1 -1
- exonware/xwsystem/operations/defs.py +1 -1
- exonware/xwsystem/operations/diff.py +1 -1
- exonware/xwsystem/operations/merge.py +1 -1
- exonware/xwsystem/operations/patch.py +1 -1
- exonware/xwsystem/patterns/base.py +2 -2
- exonware/xwsystem/patterns/context_manager.py +2 -2
- exonware/xwsystem/patterns/contracts.py +9 -9
- exonware/xwsystem/patterns/defs.py +1 -1
- exonware/xwsystem/patterns/dynamic_facade.py +8 -8
- exonware/xwsystem/patterns/errors.py +5 -5
- exonware/xwsystem/patterns/handler_factory.py +6 -6
- exonware/xwsystem/patterns/object_pool.py +7 -7
- exonware/xwsystem/patterns/registry.py +3 -3
- exonware/xwsystem/plugins/__init__.py +1 -1
- exonware/xwsystem/plugins/base.py +5 -5
- exonware/xwsystem/plugins/contracts.py +5 -5
- exonware/xwsystem/plugins/defs.py +1 -1
- exonware/xwsystem/plugins/errors.py +4 -4
- exonware/xwsystem/runtime/__init__.py +1 -1
- exonware/xwsystem/runtime/base.py +6 -6
- exonware/xwsystem/runtime/contracts.py +6 -6
- exonware/xwsystem/runtime/defs.py +1 -1
- exonware/xwsystem/runtime/env.py +2 -2
- exonware/xwsystem/runtime/errors.py +1 -1
- exonware/xwsystem/runtime/reflection.py +8 -8
- exonware/xwsystem/security/auth.py +1 -1
- exonware/xwsystem/security/base.py +2 -2
- exonware/xwsystem/security/contracts.py +1 -1
- exonware/xwsystem/security/crypto.py +2 -2
- exonware/xwsystem/security/defs.py +1 -1
- exonware/xwsystem/security/errors.py +2 -2
- exonware/xwsystem/security/hazmat.py +2 -2
- exonware/xwsystem/shared/__init__.py +1 -1
- exonware/xwsystem/shared/base.py +1 -1
- exonware/xwsystem/shared/contracts.py +1 -1
- exonware/xwsystem/shared/defs.py +1 -1
- exonware/xwsystem/shared/errors.py +1 -1
- exonware/xwsystem/structures/__init__.py +1 -1
- exonware/xwsystem/structures/base.py +2 -2
- exonware/xwsystem/structures/contracts.py +1 -1
- exonware/xwsystem/structures/defs.py +1 -1
- exonware/xwsystem/structures/errors.py +2 -2
- exonware/xwsystem/threading/async_primitives.py +2 -2
- exonware/xwsystem/threading/base.py +2 -2
- exonware/xwsystem/threading/contracts.py +1 -1
- exonware/xwsystem/threading/defs.py +1 -1
- exonware/xwsystem/threading/errors.py +2 -2
- exonware/xwsystem/threading/safe_factory.py +6 -6
- exonware/xwsystem/utils/base.py +2 -2
- exonware/xwsystem/utils/contracts.py +1 -1
- exonware/xwsystem/utils/dt/__init__.py +1 -1
- exonware/xwsystem/utils/dt/base.py +2 -2
- exonware/xwsystem/utils/dt/contracts.py +1 -1
- exonware/xwsystem/utils/dt/defs.py +1 -1
- exonware/xwsystem/utils/dt/errors.py +2 -2
- exonware/xwsystem/utils/dt/formatting.py +1 -1
- exonware/xwsystem/utils/dt/humanize.py +2 -2
- exonware/xwsystem/utils/dt/parsing.py +1 -1
- exonware/xwsystem/utils/dt/timezone_utils.py +1 -1
- exonware/xwsystem/utils/errors.py +2 -2
- exonware/xwsystem/utils/test_runner.py +1 -1
- exonware/xwsystem/utils/utils_contracts.py +1 -1
- exonware/xwsystem/validation/__init__.py +1 -1
- exonware/xwsystem/validation/base.py +15 -15
- exonware/xwsystem/validation/contracts.py +1 -1
- exonware/xwsystem/validation/data_validator.py +10 -0
- exonware/xwsystem/validation/declarative.py +9 -9
- exonware/xwsystem/validation/defs.py +1 -1
- exonware/xwsystem/validation/errors.py +2 -2
- exonware/xwsystem/validation/fluent_validator.py +4 -4
- exonware/xwsystem/version.py +2 -2
- {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.0.1.411.dist-info}/METADATA +3 -3
- exonware_xwsystem-0.0.1.411.dist-info/RECORD +274 -0
- exonware_xwsystem-0.0.1.410.dist-info/RECORD +0 -273
- {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.0.1.411.dist-info}/WHEEL +0 -0
- {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.0.1.411.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Company: eXonware.com
|
|
5
5
|
Author: Eng. Muhammad AlShehri
|
|
6
6
|
Email: connect@exonware.com
|
|
7
|
-
Version: 0.0.1.
|
|
7
|
+
Version: 0.0.1.411
|
|
8
8
|
Generation Date: 02-Nov-2025
|
|
9
9
|
|
|
10
10
|
JSON Lines (JSONL/NDJSON) Serialization - Newline-Delimited JSON
|
|
@@ -26,11 +26,12 @@ from typing import Any, Optional, Union
|
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
import json
|
|
28
28
|
|
|
29
|
-
from
|
|
30
|
-
from
|
|
29
|
+
from .json import JsonSerializer
|
|
30
|
+
from ....errors import SerializationError
|
|
31
|
+
from ....common.atomic import AtomicFileWriter
|
|
31
32
|
|
|
32
33
|
|
|
33
|
-
class JsonLinesSerializer(
|
|
34
|
+
class JsonLinesSerializer(JsonSerializer):
|
|
34
35
|
"""
|
|
35
36
|
JSON Lines (JSONL/NDJSON) serializer for streaming data.
|
|
36
37
|
|
|
@@ -67,8 +68,36 @@ class JsonLinesSerializer(ASerialization):
|
|
|
67
68
|
def codec_types(self) -> list[str]:
|
|
68
69
|
"""JSON Lines is a data exchange format."""
|
|
69
70
|
return ["data", "serialization"]
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
|
|
72
|
+
# -------------------------------------------------------------------------
|
|
73
|
+
# RECORD / STREAMING CAPABILITIES
|
|
74
|
+
# -------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def supports_record_streaming(self) -> bool:
|
|
78
|
+
"""
|
|
79
|
+
JSONL is explicitly designed for record-level streaming.
|
|
80
|
+
|
|
81
|
+
This enables stream_read_record / stream_update_record to operate in a
|
|
82
|
+
true streaming fashion (line-by-line) without loading the entire file.
|
|
83
|
+
"""
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def supports_record_paging(self) -> bool:
|
|
88
|
+
"""
|
|
89
|
+
JSONL supports efficient record-level paging.
|
|
90
|
+
|
|
91
|
+
Paging is implemented as a lightweight line counter that only parses
|
|
92
|
+
the requested slice of records.
|
|
93
|
+
"""
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# -------------------------------------------------------------------------
|
|
97
|
+
# CORE ENCODE / DECODE
|
|
98
|
+
# -------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def encode(self, data: Any, *, options: Optional[dict[str, Any]] = None) -> str:
|
|
72
101
|
"""
|
|
73
102
|
Encode data to JSON Lines string.
|
|
74
103
|
|
|
@@ -82,14 +111,17 @@ class JsonLinesSerializer(ASerialization):
|
|
|
82
111
|
if not isinstance(data, list):
|
|
83
112
|
# Single object - wrap in list
|
|
84
113
|
data = [data]
|
|
85
|
-
|
|
86
|
-
|
|
114
|
+
|
|
115
|
+
opts = options or {}
|
|
116
|
+
ensure_ascii = opts.get("ensure_ascii", False)
|
|
117
|
+
|
|
118
|
+
lines: list[str] = []
|
|
87
119
|
for item in data:
|
|
88
|
-
lines.append(json.dumps(item, ensure_ascii=
|
|
89
|
-
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
def decode(self, data: Union[str, bytes], options: Optional[dict[str, Any]] = None) -> list[Any]:
|
|
120
|
+
lines.append(json.dumps(item, ensure_ascii=ensure_ascii))
|
|
121
|
+
|
|
122
|
+
return "\n".join(lines)
|
|
123
|
+
|
|
124
|
+
def decode(self, data: Union[str, bytes], *, options: Optional[dict[str, Any]] = None) -> list[Any]:
|
|
93
125
|
"""
|
|
94
126
|
Decode JSON Lines string to list of Python objects.
|
|
95
127
|
|
|
@@ -101,16 +133,194 @@ class JsonLinesSerializer(ASerialization):
|
|
|
101
133
|
List of decoded Python objects
|
|
102
134
|
"""
|
|
103
135
|
if isinstance(data, bytes):
|
|
104
|
-
data = data.decode(
|
|
105
|
-
|
|
136
|
+
data = data.decode("utf-8")
|
|
137
|
+
|
|
106
138
|
# Split by newlines and parse each line
|
|
107
|
-
lines = data.strip().split(
|
|
108
|
-
results = []
|
|
109
|
-
|
|
139
|
+
lines = data.strip().split("\n")
|
|
140
|
+
results: list[Any] = []
|
|
141
|
+
|
|
110
142
|
for line in lines:
|
|
111
143
|
line = line.strip()
|
|
112
144
|
if line: # Skip empty lines
|
|
113
145
|
results.append(json.loads(line))
|
|
114
|
-
|
|
146
|
+
|
|
147
|
+
return results
|
|
148
|
+
|
|
149
|
+
# -------------------------------------------------------------------------
|
|
150
|
+
# RECORD-LEVEL OPERATIONS (True streaming, line-by-line)
|
|
151
|
+
# -------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
def stream_read_record(
|
|
154
|
+
self,
|
|
155
|
+
file_path: Union[str, Path],
|
|
156
|
+
match: callable,
|
|
157
|
+
projection: Optional[list[Any]] = None,
|
|
158
|
+
**options: Any,
|
|
159
|
+
) -> Any:
|
|
160
|
+
"""
|
|
161
|
+
Stream-style read of a single logical record from a JSONL file.
|
|
162
|
+
|
|
163
|
+
Reads the file line-by-line, parsing each JSON object and returning the
|
|
164
|
+
first record that satisfies match(record). Optional projection is
|
|
165
|
+
applied using the base helper to avoid duplicating logic.
|
|
166
|
+
"""
|
|
167
|
+
path = Path(file_path)
|
|
168
|
+
if not path.exists():
|
|
169
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
170
|
+
|
|
171
|
+
# Line-by-line scan – no full-file load
|
|
172
|
+
with path.open("r", encoding="utf-8") as f:
|
|
173
|
+
for line in f:
|
|
174
|
+
line = line.strip()
|
|
175
|
+
if not line:
|
|
176
|
+
continue
|
|
177
|
+
record = json.loads(line)
|
|
178
|
+
if match(record):
|
|
179
|
+
return self._apply_projection(record, projection)
|
|
180
|
+
|
|
181
|
+
raise KeyError("No matching record found")
|
|
182
|
+
|
|
183
|
+
def stream_update_record(
|
|
184
|
+
self,
|
|
185
|
+
file_path: Union[str, Path],
|
|
186
|
+
match: callable,
|
|
187
|
+
updater: callable,
|
|
188
|
+
*,
|
|
189
|
+
atomic: bool = True,
|
|
190
|
+
**options: Any,
|
|
191
|
+
) -> int:
|
|
192
|
+
"""
|
|
193
|
+
Stream-style update of logical records in a JSONL file.
|
|
194
|
+
|
|
195
|
+
Implementation uses a temp file + AtomicFileWriter pattern to ensure
|
|
196
|
+
atomicity when atomic=True. Records are processed line-by-line and only
|
|
197
|
+
the matching records are materialized and updated.
|
|
198
|
+
"""
|
|
199
|
+
path = Path(file_path)
|
|
200
|
+
if not path.exists():
|
|
201
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
202
|
+
|
|
203
|
+
updated = 0
|
|
204
|
+
backup = options.get("backup", True)
|
|
205
|
+
ensure_ascii = options.get("ensure_ascii", False)
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
if atomic:
|
|
209
|
+
# Atomic path: use AtomicFileWriter for temp+replace semantics
|
|
210
|
+
with AtomicFileWriter(path, backup=backup) as writer:
|
|
211
|
+
with path.open("r", encoding="utf-8") as src:
|
|
212
|
+
for line in src:
|
|
213
|
+
raw = line.rstrip("\n")
|
|
214
|
+
if not raw.strip():
|
|
215
|
+
# Preserve structural empty lines
|
|
216
|
+
writer.write(b"\n")
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
record = json.loads(raw)
|
|
220
|
+
if match(record):
|
|
221
|
+
record = updater(record)
|
|
222
|
+
updated += 1
|
|
223
|
+
|
|
224
|
+
out_line = json.dumps(record, ensure_ascii=ensure_ascii) + "\n"
|
|
225
|
+
writer.write(out_line.encode("utf-8"))
|
|
226
|
+
else:
|
|
227
|
+
# Non-atomic fallback: read + rewrite line-by-line
|
|
228
|
+
new_lines: list[str] = []
|
|
229
|
+
with path.open("r", encoding="utf-8") as src:
|
|
230
|
+
for line in src:
|
|
231
|
+
raw = line.rstrip("\n")
|
|
232
|
+
if not raw.strip():
|
|
233
|
+
new_lines.append("\n")
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
record = json.loads(raw)
|
|
237
|
+
if match(record):
|
|
238
|
+
record = updater(record)
|
|
239
|
+
updated += 1
|
|
240
|
+
|
|
241
|
+
new_lines.append(json.dumps(record, ensure_ascii=ensure_ascii) + "\n")
|
|
242
|
+
|
|
243
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
244
|
+
path.write_text("".join(new_lines), encoding="utf-8")
|
|
245
|
+
|
|
246
|
+
return updated
|
|
247
|
+
except Exception as e:
|
|
248
|
+
raise SerializationError(
|
|
249
|
+
f"Failed to stream-update JSONL records in {path}: {e}",
|
|
250
|
+
format_name=self.format_name,
|
|
251
|
+
original_error=e,
|
|
252
|
+
) from e
|
|
253
|
+
|
|
254
|
+
def get_record_page(
|
|
255
|
+
self,
|
|
256
|
+
file_path: Union[str, Path],
|
|
257
|
+
page_number: int,
|
|
258
|
+
page_size: int,
|
|
259
|
+
**options: Any,
|
|
260
|
+
) -> list[Any]:
|
|
261
|
+
"""
|
|
262
|
+
Retrieve a logical page of records from a JSONL file.
|
|
263
|
+
|
|
264
|
+
Pages are computed by counting logical records (non-empty lines). Only
|
|
265
|
+
the requested slice is parsed and returned, keeping memory usage
|
|
266
|
+
proportional to page_size rather than file size.
|
|
267
|
+
"""
|
|
268
|
+
if page_number < 1 or page_size <= 0:
|
|
269
|
+
raise ValueError("Invalid page_number or page_size")
|
|
270
|
+
|
|
271
|
+
path = Path(file_path)
|
|
272
|
+
if not path.exists():
|
|
273
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
274
|
+
|
|
275
|
+
start_index = (page_number - 1) * page_size
|
|
276
|
+
end_index = start_index + page_size
|
|
277
|
+
|
|
278
|
+
results: list[Any] = []
|
|
279
|
+
current_index = 0
|
|
280
|
+
|
|
281
|
+
with path.open("r", encoding="utf-8") as f:
|
|
282
|
+
for line in f:
|
|
283
|
+
line = line.strip()
|
|
284
|
+
if not line:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
if current_index >= end_index:
|
|
288
|
+
break
|
|
289
|
+
|
|
290
|
+
if current_index >= start_index:
|
|
291
|
+
results.append(json.loads(line))
|
|
292
|
+
|
|
293
|
+
current_index += 1
|
|
294
|
+
|
|
115
295
|
return results
|
|
116
296
|
|
|
297
|
+
def get_record_by_id(
|
|
298
|
+
self,
|
|
299
|
+
file_path: Union[str, Path],
|
|
300
|
+
id_value: Any,
|
|
301
|
+
*,
|
|
302
|
+
id_field: str = "id",
|
|
303
|
+
**options: Any,
|
|
304
|
+
) -> Any:
|
|
305
|
+
"""
|
|
306
|
+
Retrieve a logical record by identifier from a JSONL file.
|
|
307
|
+
|
|
308
|
+
Performs a streaming linear scan over records, returning the first
|
|
309
|
+
record where record[id_field] == id_value.
|
|
310
|
+
"""
|
|
311
|
+
path = Path(file_path)
|
|
312
|
+
if not path.exists():
|
|
313
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
314
|
+
|
|
315
|
+
with path.open("r", encoding="utf-8") as f:
|
|
316
|
+
for line in f:
|
|
317
|
+
line = line.strip()
|
|
318
|
+
if not line:
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
record = json.loads(line)
|
|
322
|
+
if isinstance(record, dict) and record.get(id_field) == id_value:
|
|
323
|
+
return record
|
|
324
|
+
|
|
325
|
+
raise KeyError(f"Record with {id_field}={id_value!r} not found")
|
|
326
|
+
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Company: eXonware.com
|
|
3
3
|
Author: Eng. Muhammad AlShehri
|
|
4
4
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.1.
|
|
5
|
+
Version: 0.0.1.411
|
|
6
6
|
Generation Date: November 2, 2025
|
|
7
7
|
|
|
8
8
|
TOML serialization - Configuration file format.
|
|
@@ -180,8 +180,17 @@ class TomlSerializer(ASerialization):
|
|
|
180
180
|
"""
|
|
181
181
|
try:
|
|
182
182
|
if not isinstance(value, dict):
|
|
183
|
-
|
|
184
|
-
|
|
183
|
+
# TOML requires a table (dict) at the top level. For data-oriented
|
|
184
|
+
# use cases (e.g. record lists), transparently wrap common patterns
|
|
185
|
+
# so that higher-level APIs (record paging, etc.) can still work
|
|
186
|
+
# uniformly across formats.
|
|
187
|
+
if isinstance(value, list):
|
|
188
|
+
# Auto-wrap top-level list into "items" table.
|
|
189
|
+
value = {"items": value}
|
|
190
|
+
else:
|
|
191
|
+
# Fallback: wrap primitive/other types into a single "value" key.
|
|
192
|
+
value = {"value": value}
|
|
193
|
+
|
|
185
194
|
opts = options or {}
|
|
186
195
|
|
|
187
196
|
# Root cause fixed: Remove None values before encoding (TOML doesn't support None).
|
|
@@ -227,6 +236,13 @@ class TomlSerializer(ASerialization):
|
|
|
227
236
|
|
|
228
237
|
# Decode from TOML string
|
|
229
238
|
data = tomllib.loads(repr)
|
|
239
|
+
|
|
240
|
+
# If this looks like an auto-wrapped list payload (see encode),
|
|
241
|
+
# unwrap it for callers so that higher-level APIs (including the
|
|
242
|
+
# generic record-level operations in ASerialization) see the
|
|
243
|
+
# natural Python structure (a list of records).
|
|
244
|
+
if isinstance(data, dict) and set(data.keys()) == {"items"} and isinstance(data["items"], list):
|
|
245
|
+
return data["items"]
|
|
230
246
|
|
|
231
247
|
return data
|
|
232
248
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Company: eXonware.com
|
|
3
3
|
Author: Eng. Muhammad AlShehri
|
|
4
4
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.1.
|
|
5
|
+
Version: 0.0.1.411
|
|
6
6
|
Generation Date: November 2, 2025
|
|
7
7
|
|
|
8
8
|
XML serialization - Extensible Markup Language.
|
|
@@ -427,6 +427,13 @@ class XmlSerializer(ASerialization):
|
|
|
427
427
|
if isinstance(repr, bytes):
|
|
428
428
|
repr = repr.decode('utf-8')
|
|
429
429
|
|
|
430
|
+
# Trim leading BOM/whitespace before XML declaration.
|
|
431
|
+
# Root cause: Some producers emit a blank line or BOM before '<?xml ...?>',
|
|
432
|
+
# which causes ExpatError: "XML or text declaration not at start of entity".
|
|
433
|
+
# Priority #2 (Usability): Be forgiving on harmless leading whitespace/BOM
|
|
434
|
+
# while keeping strict parsing for the actual XML content.
|
|
435
|
+
repr = repr.lstrip("\ufeff\r\n\t ")
|
|
436
|
+
|
|
430
437
|
opts = options or {}
|
|
431
438
|
root_name = opts.get('root', 'root')
|
|
432
439
|
preserve_types = opts.get('preserve_types', False)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Company: eXonware.com
|
|
3
3
|
Author: Eng. Muhammad AlShehri
|
|
4
4
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.1.
|
|
5
|
+
Version: 0.0.1.411
|
|
6
6
|
Generation Date: November 2, 2025
|
|
7
7
|
|
|
8
8
|
YAML serialization - Human-readable data serialization format.
|
|
@@ -13,7 +13,7 @@ Following I→A pattern:
|
|
|
13
13
|
- Concrete: YamlSerializer
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
from typing import Any, Optional, Union
|
|
16
|
+
from typing import Any, Optional, Union, Iterator
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from ...base import ASerialization
|
|
@@ -88,6 +88,10 @@ class YamlSerializer(ASerialization):
|
|
|
88
88
|
def supports_streaming(self) -> bool:
|
|
89
89
|
return True # YAML supports multiple documents
|
|
90
90
|
|
|
91
|
+
@property
|
|
92
|
+
def supports_incremental_streaming(self) -> bool:
|
|
93
|
+
return True # YAML supports multi-document streaming
|
|
94
|
+
|
|
91
95
|
@property
|
|
92
96
|
def capabilities(self) -> CodecCapability:
|
|
93
97
|
return CodecCapability.BIDIRECTIONAL
|
|
@@ -178,4 +182,50 @@ class YamlSerializer(ASerialization):
|
|
|
178
182
|
format_name=self.format_name,
|
|
179
183
|
original_error=e
|
|
180
184
|
)
|
|
185
|
+
|
|
186
|
+
# ========================================================================
|
|
187
|
+
# INCREMENTAL STREAMING
|
|
188
|
+
# ========================================================================
|
|
189
|
+
|
|
190
|
+
def incremental_load(
|
|
191
|
+
self,
|
|
192
|
+
file_path: Union[str, Path],
|
|
193
|
+
**options: Any,
|
|
194
|
+
) -> Iterator[Any]:
|
|
195
|
+
"""
|
|
196
|
+
Stream YAML documents one at a time (supports multi-document YAML).
|
|
197
|
+
|
|
198
|
+
Uses PyYAML's safe_load_all() for true streaming without loading
|
|
199
|
+
entire file into memory.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
file_path: Path to the YAML file
|
|
203
|
+
**options: YAML options (Loader, etc.)
|
|
204
|
+
|
|
205
|
+
Yields:
|
|
206
|
+
Each document from the YAML file one at a time
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
FileNotFoundError: If file doesn't exist
|
|
210
|
+
SerializationError: If parsing fails
|
|
211
|
+
"""
|
|
212
|
+
path = Path(file_path)
|
|
213
|
+
if not path.exists():
|
|
214
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
215
|
+
|
|
216
|
+
opts = options or {}
|
|
217
|
+
loader = opts.get('Loader', yaml.SafeLoader)
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
with path.open("r", encoding="utf-8") as f:
|
|
221
|
+
# Use safe_load_all for multi-document streaming
|
|
222
|
+
for document in yaml.safe_load_all(f):
|
|
223
|
+
if document is not None: # Skip empty documents
|
|
224
|
+
yield document
|
|
225
|
+
except (yaml.YAMLError, UnicodeDecodeError) as e:
|
|
226
|
+
raise SerializationError(
|
|
227
|
+
f"Failed to incrementally load YAML: {e}",
|
|
228
|
+
format_name=self.format_name,
|
|
229
|
+
original_error=e
|
|
230
|
+
) from e
|
|
181
231
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Company: eXonware.com
|
|
3
3
|
Author: Eng. Muhammad AlShehri
|
|
4
4
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.1.
|
|
5
|
+
Version: 0.0.1.411
|
|
6
6
|
Generation Date: September 04, 2025
|
|
7
7
|
|
|
8
8
|
XWSerializer - Unified intelligent serializer with I/O integration and auto-serialization.
|
|
@@ -11,7 +11,7 @@ XWSerializer - Unified intelligent serializer with I/O integration and auto-seri
|
|
|
11
11
|
import os
|
|
12
12
|
import time
|
|
13
13
|
from pathlib import Path
|
|
14
|
-
from typing import Any, Optional, Union, Callable
|
|
14
|
+
from typing import Any, Optional, Union, Callable
|
|
15
15
|
|
|
16
16
|
from .base import ASerialization
|
|
17
17
|
from .contracts import ISerialization
|
|
@@ -97,7 +97,7 @@ class XWSerializer(ASerialization):
|
|
|
97
97
|
# FORMAT DETECTION AND TRANSFORMATION (from XWSerialization)
|
|
98
98
|
# ============================================================================
|
|
99
99
|
|
|
100
|
-
def _get_serializer_class(self, format_name: str) ->
|
|
100
|
+
def _get_serializer_class(self, format_name: str) -> type[ISerialization]:
|
|
101
101
|
"""Get serializer class for format name."""
|
|
102
102
|
module_map = {
|
|
103
103
|
'JSON': ('json', 'JsonSerializer'),
|
|
@@ -775,6 +775,178 @@ class XWSerializer(ASerialization):
|
|
|
775
775
|
except Exception as e:
|
|
776
776
|
logger.error(f"Merge failed for {target_path}: {e}")
|
|
777
777
|
raise SerializationError(f"Merge failed: {e}") from e
|
|
778
|
+
|
|
779
|
+
# ============================================================================
|
|
780
|
+
# RECORD-LEVEL OPERATIONS (delegated to specialized serializers)
|
|
781
|
+
# ============================================================================
|
|
782
|
+
|
|
783
|
+
def stream_read_record(
|
|
784
|
+
self,
|
|
785
|
+
file_path: Union[str, Path],
|
|
786
|
+
match: callable,
|
|
787
|
+
projection: Optional[list[Any]] = None,
|
|
788
|
+
**options: Any,
|
|
789
|
+
) -> Any:
|
|
790
|
+
"""
|
|
791
|
+
Stream-style read of a single logical record.
|
|
792
|
+
|
|
793
|
+
Delegates to the specialized serializer when available (e.g. JSONL /
|
|
794
|
+
NDJSON), falling back to the generic ASerialization implementation
|
|
795
|
+
which may load the entire file and scan in memory.
|
|
796
|
+
"""
|
|
797
|
+
target_path = Path(file_path)
|
|
798
|
+
|
|
799
|
+
if self.validate_paths:
|
|
800
|
+
self._path_validator.validate_path(target_path)
|
|
801
|
+
|
|
802
|
+
format_hint = self._detect_format_from_path(target_path)
|
|
803
|
+
specialized = self._ensure_specialized(
|
|
804
|
+
file_path=target_path,
|
|
805
|
+
format_hint=format_hint,
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
try:
|
|
809
|
+
return specialized.stream_read_record(
|
|
810
|
+
target_path,
|
|
811
|
+
match,
|
|
812
|
+
projection=projection,
|
|
813
|
+
**options,
|
|
814
|
+
)
|
|
815
|
+
except NotImplementedError:
|
|
816
|
+
# Fallback to generic full-load behavior from ASerialization
|
|
817
|
+
return super().stream_read_record(
|
|
818
|
+
target_path,
|
|
819
|
+
match,
|
|
820
|
+
projection=projection,
|
|
821
|
+
**options,
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
def stream_update_record(
|
|
825
|
+
self,
|
|
826
|
+
file_path: Union[str, Path],
|
|
827
|
+
match: callable,
|
|
828
|
+
updater: callable,
|
|
829
|
+
*,
|
|
830
|
+
atomic: bool = True,
|
|
831
|
+
**options: Any,
|
|
832
|
+
) -> int:
|
|
833
|
+
"""
|
|
834
|
+
Stream-style update of logical records.
|
|
835
|
+
|
|
836
|
+
Delegates to the specialized serializer when it provides a streaming
|
|
837
|
+
implementation (e.g. JSONL). Falls back to the generic
|
|
838
|
+
ASerialization implementation that may load the full file, but still
|
|
839
|
+
honours atomic save semantics.
|
|
840
|
+
"""
|
|
841
|
+
target_path = Path(file_path)
|
|
842
|
+
|
|
843
|
+
if self.validate_paths:
|
|
844
|
+
self._path_validator.validate_path(target_path)
|
|
845
|
+
|
|
846
|
+
format_hint = self._detect_format_from_path(target_path)
|
|
847
|
+
specialized = self._ensure_specialized(
|
|
848
|
+
file_path=target_path,
|
|
849
|
+
format_hint=format_hint,
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
try:
|
|
853
|
+
return specialized.stream_update_record(
|
|
854
|
+
target_path,
|
|
855
|
+
match,
|
|
856
|
+
updater,
|
|
857
|
+
atomic=atomic,
|
|
858
|
+
**options,
|
|
859
|
+
)
|
|
860
|
+
except NotImplementedError:
|
|
861
|
+
return super().stream_update_record(
|
|
862
|
+
target_path,
|
|
863
|
+
match,
|
|
864
|
+
updater,
|
|
865
|
+
atomic=atomic,
|
|
866
|
+
**options,
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
def get_record_page(
|
|
870
|
+
self,
|
|
871
|
+
file_path: Union[str, Path],
|
|
872
|
+
page_number: int,
|
|
873
|
+
page_size: int,
|
|
874
|
+
**options: Any,
|
|
875
|
+
) -> list[Any]:
|
|
876
|
+
"""
|
|
877
|
+
Retrieve a logical page of records from a file.
|
|
878
|
+
|
|
879
|
+
Delegates to the specialized serializer when supported (for example,
|
|
880
|
+
JSONL can implement true streaming paging). Falls back to the generic
|
|
881
|
+
ASerialization implementation, which may load the entire file and
|
|
882
|
+
slice a top-level list.
|
|
883
|
+
"""
|
|
884
|
+
target_path = Path(file_path)
|
|
885
|
+
|
|
886
|
+
if self.validate_paths:
|
|
887
|
+
self._path_validator.validate_path(target_path)
|
|
888
|
+
|
|
889
|
+
format_hint = self._detect_format_from_path(target_path)
|
|
890
|
+
specialized = self._ensure_specialized(
|
|
891
|
+
file_path=target_path,
|
|
892
|
+
format_hint=format_hint,
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
try:
|
|
896
|
+
return specialized.get_record_page(
|
|
897
|
+
target_path,
|
|
898
|
+
page_number,
|
|
899
|
+
page_size,
|
|
900
|
+
**options,
|
|
901
|
+
)
|
|
902
|
+
except NotImplementedError:
|
|
903
|
+
return super().get_record_page(
|
|
904
|
+
target_path,
|
|
905
|
+
page_number,
|
|
906
|
+
page_size,
|
|
907
|
+
**options,
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
def get_record_by_id(
|
|
911
|
+
self,
|
|
912
|
+
file_path: Union[str, Path],
|
|
913
|
+
id_value: Any,
|
|
914
|
+
*,
|
|
915
|
+
id_field: str = "id",
|
|
916
|
+
**options: Any,
|
|
917
|
+
) -> Any:
|
|
918
|
+
"""
|
|
919
|
+
Retrieve a logical record by identifier.
|
|
920
|
+
|
|
921
|
+
Delegates to the specialized serializer where possible; falls back to
|
|
922
|
+
the generic ASerialization implementation which performs a linear scan
|
|
923
|
+
over a top-level list.
|
|
924
|
+
"""
|
|
925
|
+
target_path = Path(file_path)
|
|
926
|
+
|
|
927
|
+
if self.validate_paths:
|
|
928
|
+
self._path_validator.validate_path(target_path)
|
|
929
|
+
|
|
930
|
+
format_hint = self._detect_format_from_path(target_path)
|
|
931
|
+
specialized = self._ensure_specialized(
|
|
932
|
+
file_path=target_path,
|
|
933
|
+
format_hint=format_hint,
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
try:
|
|
937
|
+
return specialized.get_record_by_id(
|
|
938
|
+
target_path,
|
|
939
|
+
id_value,
|
|
940
|
+
id_field=id_field,
|
|
941
|
+
**options,
|
|
942
|
+
)
|
|
943
|
+
except NotImplementedError:
|
|
944
|
+
return super().get_record_by_id(
|
|
945
|
+
target_path,
|
|
946
|
+
id_value,
|
|
947
|
+
id_field=id_field,
|
|
948
|
+
**options,
|
|
949
|
+
)
|
|
778
950
|
|
|
779
951
|
# ============================================================================
|
|
780
952
|
# BATCH OPERATIONS
|