exonware-xwsystem 0.0.1.411__py3-none-any.whl → 0.1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +1 -1
- exonware/conf.py +1 -1
- exonware/xwsystem/__init__.py +1 -1
- exonware/xwsystem/caching/__init__.py +1 -1
- exonware/xwsystem/caching/base.py +1 -1
- exonware/xwsystem/caching/bloom_cache.py +1 -1
- exonware/xwsystem/caching/cache_manager.py +1 -1
- exonware/xwsystem/caching/conditional.py +1 -1
- exonware/xwsystem/caching/contracts.py +1 -1
- exonware/xwsystem/caching/decorators.py +1 -1
- exonware/xwsystem/caching/defs.py +1 -1
- exonware/xwsystem/caching/disk_cache.py +1 -1
- exonware/xwsystem/caching/distributed.py +1 -1
- exonware/xwsystem/caching/errors.py +1 -1
- exonware/xwsystem/caching/events.py +1 -1
- exonware/xwsystem/caching/eviction_strategies.py +1 -1
- exonware/xwsystem/caching/fluent.py +1 -1
- exonware/xwsystem/caching/integrity.py +1 -1
- exonware/xwsystem/caching/lfu_cache.py +1 -1
- exonware/xwsystem/caching/lfu_optimized.py +1 -1
- exonware/xwsystem/caching/lru_cache.py +1 -1
- exonware/xwsystem/caching/memory_bounded.py +1 -1
- exonware/xwsystem/caching/metrics_exporter.py +1 -1
- exonware/xwsystem/caching/observable_cache.py +1 -1
- exonware/xwsystem/caching/pluggable_cache.py +1 -1
- exonware/xwsystem/caching/rate_limiter.py +1 -1
- exonware/xwsystem/caching/read_through.py +1 -1
- exonware/xwsystem/caching/secure_cache.py +1 -1
- exonware/xwsystem/caching/serializable.py +1 -1
- exonware/xwsystem/caching/stats.py +1 -1
- exonware/xwsystem/caching/tagging.py +1 -1
- exonware/xwsystem/caching/ttl_cache.py +1 -1
- exonware/xwsystem/caching/two_tier_cache.py +1 -1
- exonware/xwsystem/caching/utils.py +1 -1
- exonware/xwsystem/caching/validation.py +1 -1
- exonware/xwsystem/caching/warming.py +1 -1
- exonware/xwsystem/caching/write_behind.py +1 -1
- exonware/xwsystem/cli/__init__.py +1 -1
- exonware/xwsystem/cli/args.py +1 -1
- exonware/xwsystem/cli/base.py +1 -1
- exonware/xwsystem/cli/colors.py +1 -1
- exonware/xwsystem/cli/console.py +1 -1
- exonware/xwsystem/cli/contracts.py +1 -1
- exonware/xwsystem/cli/defs.py +1 -1
- exonware/xwsystem/cli/errors.py +1 -1
- exonware/xwsystem/cli/progress.py +1 -1
- exonware/xwsystem/cli/prompts.py +1 -1
- exonware/xwsystem/cli/tables.py +1 -1
- exonware/xwsystem/config/__init__.py +1 -1
- exonware/xwsystem/config/base.py +1 -1
- exonware/xwsystem/config/contracts.py +1 -1
- exonware/xwsystem/config/defaults.py +1 -1
- exonware/xwsystem/config/defs.py +1 -1
- exonware/xwsystem/config/errors.py +1 -1
- exonware/xwsystem/config/logging.py +1 -1
- exonware/xwsystem/config/logging_setup.py +1 -1
- exonware/xwsystem/config/performance.py +115 -388
- exonware/xwsystem/http_client/__init__.py +1 -1
- exonware/xwsystem/http_client/advanced_client.py +1 -1
- exonware/xwsystem/http_client/base.py +1 -1
- exonware/xwsystem/http_client/client.py +1 -1
- exonware/xwsystem/http_client/contracts.py +1 -1
- exonware/xwsystem/http_client/defs.py +1 -1
- exonware/xwsystem/http_client/errors.py +1 -1
- exonware/xwsystem/io/__init__.py +1 -1
- exonware/xwsystem/io/archive/__init__.py +1 -1
- exonware/xwsystem/io/archive/archive.py +1 -1
- exonware/xwsystem/io/archive/archive_files.py +1 -1
- exonware/xwsystem/io/archive/archivers.py +1 -1
- exonware/xwsystem/io/archive/base.py +1 -1
- exonware/xwsystem/io/archive/codec_integration.py +1 -1
- exonware/xwsystem/io/archive/compression.py +1 -1
- exonware/xwsystem/io/archive/formats/__init__.py +1 -1
- exonware/xwsystem/io/archive/formats/brotli_format.py +1 -1
- exonware/xwsystem/io/archive/formats/lz4_format.py +1 -1
- exonware/xwsystem/io/archive/formats/rar.py +1 -1
- exonware/xwsystem/io/archive/formats/sevenzip.py +1 -1
- exonware/xwsystem/io/archive/formats/squashfs_format.py +1 -1
- exonware/xwsystem/io/archive/formats/tar.py +1 -1
- exonware/xwsystem/io/archive/formats/wim_format.py +1 -1
- exonware/xwsystem/io/archive/formats/zip.py +1 -1
- exonware/xwsystem/io/archive/formats/zpaq_format.py +1 -1
- exonware/xwsystem/io/archive/formats/zstandard.py +1 -1
- exonware/xwsystem/io/base.py +1 -1
- exonware/xwsystem/io/codec/__init__.py +1 -1
- exonware/xwsystem/io/codec/base.py +1 -1
- exonware/xwsystem/io/codec/contracts.py +1 -1
- exonware/xwsystem/io/codec/registry.py +1 -1
- exonware/xwsystem/io/common/__init__.py +1 -1
- exonware/xwsystem/io/common/base.py +1 -1
- exonware/xwsystem/io/common/lock.py +1 -1
- exonware/xwsystem/io/common/watcher.py +1 -1
- exonware/xwsystem/io/contracts.py +1 -1
- exonware/xwsystem/io/data_operations.py +276 -10
- exonware/xwsystem/io/defs.py +1 -1
- exonware/xwsystem/io/errors.py +1 -1
- exonware/xwsystem/io/facade.py +1 -1
- exonware/xwsystem/io/file/__init__.py +1 -1
- exonware/xwsystem/io/file/base.py +1 -1
- exonware/xwsystem/io/file/conversion.py +1 -1
- exonware/xwsystem/io/file/file.py +1 -1
- exonware/xwsystem/io/file/paged_source.py +1 -1
- exonware/xwsystem/io/file/paging/__init__.py +1 -1
- exonware/xwsystem/io/file/paging/byte_paging.py +1 -1
- exonware/xwsystem/io/file/paging/line_paging.py +1 -1
- exonware/xwsystem/io/file/paging/record_paging.py +1 -1
- exonware/xwsystem/io/file/paging/registry.py +1 -1
- exonware/xwsystem/io/file/source.py +1 -1
- exonware/xwsystem/io/filesystem/__init__.py +1 -1
- exonware/xwsystem/io/filesystem/base.py +1 -1
- exonware/xwsystem/io/filesystem/local.py +1 -1
- exonware/xwsystem/io/folder/__init__.py +1 -1
- exonware/xwsystem/io/folder/base.py +1 -1
- exonware/xwsystem/io/folder/folder.py +1 -1
- exonware/xwsystem/io/serialization/__init__.py +1 -1
- exonware/xwsystem/io/serialization/auto_serializer.py +1 -1
- exonware/xwsystem/io/serialization/base.py +1 -1
- exonware/xwsystem/io/serialization/contracts.py +1 -1
- exonware/xwsystem/io/serialization/defs.py +1 -1
- exonware/xwsystem/io/serialization/errors.py +1 -1
- exonware/xwsystem/io/serialization/flyweight.py +1 -1
- exonware/xwsystem/io/serialization/format_detector.py +1 -1
- exonware/xwsystem/io/serialization/formats/__init__.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/bson.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/cbor.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/marshal.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/msgpack.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/pickle.py +1 -1
- exonware/xwsystem/io/serialization/formats/binary/plistlib.py +1 -1
- exonware/xwsystem/io/serialization/formats/database/dbm.py +1 -1
- exonware/xwsystem/io/serialization/formats/database/shelve.py +1 -1
- exonware/xwsystem/io/serialization/formats/database/sqlite3.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/append_only_log.py +201 -0
- exonware/xwsystem/io/serialization/formats/text/configparser.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/csv.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/formdata.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/json.py +43 -20
- exonware/xwsystem/io/serialization/formats/text/json5.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/jsonlines.py +99 -15
- exonware/xwsystem/io/serialization/formats/text/multipart.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/toml.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/xml.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/yaml.py +1 -1
- exonware/xwsystem/io/serialization/parsers/__init__.py +15 -0
- exonware/xwsystem/io/serialization/parsers/base.py +59 -0
- exonware/xwsystem/io/serialization/parsers/hybrid_parser.py +61 -0
- exonware/xwsystem/io/serialization/parsers/msgspec_parser.py +45 -0
- exonware/xwsystem/io/serialization/parsers/orjson_direct_parser.py +53 -0
- exonware/xwsystem/io/serialization/parsers/orjson_parser.py +59 -0
- exonware/xwsystem/io/serialization/parsers/pysimdjson_parser.py +51 -0
- exonware/xwsystem/io/serialization/parsers/rapidjson_parser.py +50 -0
- exonware/xwsystem/io/serialization/parsers/registry.py +90 -0
- exonware/xwsystem/io/serialization/parsers/standard.py +43 -0
- exonware/xwsystem/io/serialization/parsers/ujson_parser.py +50 -0
- exonware/xwsystem/io/serialization/registry.py +1 -1
- exonware/xwsystem/io/serialization/serializer.py +1 -1
- exonware/xwsystem/io/serialization/utils/__init__.py +1 -1
- exonware/xwsystem/io/serialization/utils/path_ops.py +1 -1
- exonware/xwsystem/io/stream/__init__.py +1 -1
- exonware/xwsystem/io/stream/async_operations.py +1 -1
- exonware/xwsystem/io/stream/base.py +1 -1
- exonware/xwsystem/io/stream/codec_io.py +1 -1
- exonware/xwsystem/ipc/async_fabric.py +1 -1
- exonware/xwsystem/ipc/base.py +1 -1
- exonware/xwsystem/ipc/contracts.py +1 -1
- exonware/xwsystem/ipc/defs.py +1 -1
- exonware/xwsystem/ipc/errors.py +1 -1
- exonware/xwsystem/monitoring/base.py +1 -1
- exonware/xwsystem/monitoring/contracts.py +1 -1
- exonware/xwsystem/monitoring/defs.py +1 -1
- exonware/xwsystem/monitoring/errors.py +1 -1
- exonware/xwsystem/monitoring/performance_manager_generic.py +1 -1
- exonware/xwsystem/monitoring/system_monitor.py +1 -1
- exonware/xwsystem/monitoring/tracing.py +1 -1
- exonware/xwsystem/monitoring/tracker.py +1 -1
- exonware/xwsystem/operations/__init__.py +1 -1
- exonware/xwsystem/operations/base.py +1 -1
- exonware/xwsystem/operations/defs.py +1 -1
- exonware/xwsystem/operations/diff.py +1 -1
- exonware/xwsystem/operations/merge.py +1 -1
- exonware/xwsystem/operations/patch.py +1 -1
- exonware/xwsystem/patterns/base.py +1 -1
- exonware/xwsystem/patterns/contracts.py +1 -1
- exonware/xwsystem/patterns/defs.py +1 -1
- exonware/xwsystem/patterns/errors.py +1 -1
- exonware/xwsystem/patterns/registry.py +1 -1
- exonware/xwsystem/plugins/__init__.py +1 -1
- exonware/xwsystem/plugins/base.py +1 -1
- exonware/xwsystem/plugins/contracts.py +1 -1
- exonware/xwsystem/plugins/defs.py +1 -1
- exonware/xwsystem/plugins/errors.py +1 -1
- exonware/xwsystem/runtime/__init__.py +1 -1
- exonware/xwsystem/runtime/base.py +1 -1
- exonware/xwsystem/runtime/contracts.py +1 -1
- exonware/xwsystem/runtime/defs.py +1 -1
- exonware/xwsystem/runtime/env.py +1 -1
- exonware/xwsystem/runtime/errors.py +1 -1
- exonware/xwsystem/runtime/reflection.py +1 -1
- exonware/xwsystem/security/auth.py +1 -1
- exonware/xwsystem/security/base.py +1 -1
- exonware/xwsystem/security/contracts.py +1 -1
- exonware/xwsystem/security/crypto.py +1 -1
- exonware/xwsystem/security/defs.py +1 -1
- exonware/xwsystem/security/errors.py +1 -1
- exonware/xwsystem/security/hazmat.py +1 -1
- exonware/xwsystem/shared/__init__.py +1 -1
- exonware/xwsystem/shared/base.py +1 -1
- exonware/xwsystem/shared/contracts.py +1 -1
- exonware/xwsystem/shared/defs.py +1 -1
- exonware/xwsystem/shared/errors.py +1 -1
- exonware/xwsystem/structures/base.py +1 -1
- exonware/xwsystem/structures/contracts.py +1 -1
- exonware/xwsystem/structures/defs.py +1 -1
- exonware/xwsystem/structures/errors.py +1 -1
- exonware/xwsystem/threading/async_primitives.py +1 -1
- exonware/xwsystem/threading/base.py +1 -1
- exonware/xwsystem/threading/contracts.py +1 -1
- exonware/xwsystem/threading/defs.py +1 -1
- exonware/xwsystem/threading/errors.py +1 -1
- exonware/xwsystem/utils/base.py +1 -1
- exonware/xwsystem/utils/contracts.py +1 -1
- exonware/xwsystem/utils/dt/__init__.py +1 -1
- exonware/xwsystem/utils/dt/base.py +1 -1
- exonware/xwsystem/utils/dt/contracts.py +1 -1
- exonware/xwsystem/utils/dt/defs.py +1 -1
- exonware/xwsystem/utils/dt/errors.py +1 -1
- exonware/xwsystem/utils/dt/formatting.py +1 -1
- exonware/xwsystem/utils/dt/humanize.py +1 -1
- exonware/xwsystem/utils/dt/parsing.py +1 -1
- exonware/xwsystem/utils/dt/timezone_utils.py +1 -1
- exonware/xwsystem/utils/errors.py +1 -1
- exonware/xwsystem/utils/utils_contracts.py +1 -1
- exonware/xwsystem/validation/__init__.py +1 -1
- exonware/xwsystem/validation/base.py +1 -1
- exonware/xwsystem/validation/contracts.py +1 -1
- exonware/xwsystem/validation/declarative.py +1 -1
- exonware/xwsystem/validation/defs.py +1 -1
- exonware/xwsystem/validation/errors.py +1 -1
- exonware/xwsystem/validation/fluent_validator.py +1 -1
- exonware/xwsystem/version.py +4 -4
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/METADATA +3 -3
- exonware_xwsystem-0.1.0.1.dist-info/RECORD +284 -0
- exonware/xwsystem/caching/USAGE_GUIDE.md +0 -779
- exonware/xwsystem/utils/test_runner.py +0 -526
- exonware_xwsystem-0.0.1.411.dist-info/RECORD +0 -274
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/WHEEL +0 -0
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Company: eXonware.com
|
|
5
5
|
Author: Eng. Muhammad AlShehri
|
|
6
6
|
Email: connect@exonware.com
|
|
7
|
-
Version: 0.0.1
|
|
7
|
+
Version: 0.1.0.1
|
|
8
8
|
Generation Date: 02-Nov-2025
|
|
9
9
|
|
|
10
10
|
JSON Lines (JSONL/NDJSON) Serialization - Newline-Delimited JSON
|
|
@@ -27,8 +27,14 @@ from pathlib import Path
|
|
|
27
27
|
import json
|
|
28
28
|
|
|
29
29
|
from .json import JsonSerializer
|
|
30
|
+
from ...parsers.registry import get_parser
|
|
31
|
+
from ...parsers.base import IJsonParser
|
|
30
32
|
from ....errors import SerializationError
|
|
31
33
|
from ....common.atomic import AtomicFileWriter
|
|
34
|
+
from exonware.xwsystem.config.logging_setup import get_logger
|
|
35
|
+
from exonware.xwsystem.config.performance import get_performance_config
|
|
36
|
+
|
|
37
|
+
logger = get_logger(__name__)
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class JsonLinesSerializer(JsonSerializer):
|
|
@@ -40,9 +46,16 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
40
46
|
Concrete: JsonLinesSerializer
|
|
41
47
|
"""
|
|
42
48
|
|
|
43
|
-
def __init__(self):
|
|
44
|
-
"""
|
|
45
|
-
|
|
49
|
+
def __init__(self, parser_name: Optional[str] = None):
|
|
50
|
+
"""
|
|
51
|
+
Initialize JSON Lines serializer with optional parser selection.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
parser_name: Parser name ("standard", "orjson", or None for auto-detect)
|
|
55
|
+
"""
|
|
56
|
+
super().__init__(parser_name=parser_name)
|
|
57
|
+
# Get parser instance for direct use in line-by-line operations
|
|
58
|
+
self._parser: IJsonParser = get_parser(parser_name)
|
|
46
59
|
|
|
47
60
|
@property
|
|
48
61
|
def codec_id(self) -> str:
|
|
@@ -117,7 +130,12 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
117
130
|
|
|
118
131
|
lines: list[str] = []
|
|
119
132
|
for item in data:
|
|
120
|
-
|
|
133
|
+
# Use pluggable parser
|
|
134
|
+
result = self._parser.dumps(item, ensure_ascii=ensure_ascii)
|
|
135
|
+
# Convert bytes to str if needed
|
|
136
|
+
if isinstance(result, bytes):
|
|
137
|
+
result = result.decode("utf-8")
|
|
138
|
+
lines.append(result)
|
|
121
139
|
|
|
122
140
|
return "\n".join(lines)
|
|
123
141
|
|
|
@@ -142,7 +160,8 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
142
160
|
for line in lines:
|
|
143
161
|
line = line.strip()
|
|
144
162
|
if line: # Skip empty lines
|
|
145
|
-
|
|
163
|
+
# Use pluggable parser
|
|
164
|
+
results.append(self._parser.loads(line))
|
|
146
165
|
|
|
147
166
|
return results
|
|
148
167
|
|
|
@@ -174,7 +193,8 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
174
193
|
line = line.strip()
|
|
175
194
|
if not line:
|
|
176
195
|
continue
|
|
177
|
-
|
|
196
|
+
# Use pluggable parser
|
|
197
|
+
record = self._parser.loads(line)
|
|
178
198
|
if match(record):
|
|
179
199
|
return self._apply_projection(record, projection)
|
|
180
200
|
|
|
@@ -195,11 +215,63 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
195
215
|
Implementation uses a temp file + AtomicFileWriter pattern to ensure
|
|
196
216
|
atomicity when atomic=True. Records are processed line-by-line and only
|
|
197
217
|
the matching records are materialized and updated.
|
|
218
|
+
|
|
219
|
+
Supports append-only log optimization for large files (use_append_log=True).
|
|
198
220
|
"""
|
|
199
221
|
path = Path(file_path)
|
|
200
222
|
if not path.exists():
|
|
201
223
|
raise FileNotFoundError(f"File not found: {path}")
|
|
202
224
|
|
|
225
|
+
# Check if append-only log should be used
|
|
226
|
+
perf_config = get_performance_config()
|
|
227
|
+
use_append_log = options.get("use_append_log", None)
|
|
228
|
+
if use_append_log is None:
|
|
229
|
+
if not perf_config.enable_append_log:
|
|
230
|
+
use_append_log = False
|
|
231
|
+
else:
|
|
232
|
+
# Auto-detect: use for files above threshold
|
|
233
|
+
file_size_mb = path.stat().st_size / (1024 * 1024)
|
|
234
|
+
use_append_log = file_size_mb > perf_config.append_log_threshold_mb
|
|
235
|
+
|
|
236
|
+
# Try append-only log if enabled
|
|
237
|
+
if use_append_log:
|
|
238
|
+
try:
|
|
239
|
+
from .append_only_log import AppendOnlyLog
|
|
240
|
+
log = AppendOnlyLog(path)
|
|
241
|
+
|
|
242
|
+
# For append-only log, we need to find matching records first
|
|
243
|
+
# and apply updates, then append to log
|
|
244
|
+
# This is a simplified version - full implementation would
|
|
245
|
+
# integrate with index for O(1) lookups
|
|
246
|
+
updated = 0
|
|
247
|
+
with path.open("r", encoding="utf-8") as src:
|
|
248
|
+
for line in src:
|
|
249
|
+
raw = line.rstrip("\n")
|
|
250
|
+
if not raw.strip():
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
record = self._parser.loads(raw)
|
|
255
|
+
if match(record):
|
|
256
|
+
# Apply updater
|
|
257
|
+
updated_record = updater(record)
|
|
258
|
+
|
|
259
|
+
# Extract type and id for log entry
|
|
260
|
+
type_name = record.get("@type") or record.get("type") or "Record"
|
|
261
|
+
id_value = str(record.get("id", ""))
|
|
262
|
+
|
|
263
|
+
# Append to log
|
|
264
|
+
log.update_record(type_name, id_value, lambda x: updated_record)
|
|
265
|
+
updated += 1
|
|
266
|
+
except Exception:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
return updated
|
|
270
|
+
except Exception as e:
|
|
271
|
+
# Fall back to full rewrite if append-only log fails
|
|
272
|
+
logger.debug(f"Append-only log failed, falling back to full rewrite: {e}")
|
|
273
|
+
|
|
274
|
+
# Original full-rewrite implementation
|
|
203
275
|
updated = 0
|
|
204
276
|
backup = options.get("backup", True)
|
|
205
277
|
ensure_ascii = options.get("ensure_ascii", False)
|
|
@@ -213,16 +285,21 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
213
285
|
raw = line.rstrip("\n")
|
|
214
286
|
if not raw.strip():
|
|
215
287
|
# Preserve structural empty lines
|
|
216
|
-
writer.write(
|
|
288
|
+
writer.write("\n")
|
|
217
289
|
continue
|
|
218
290
|
|
|
219
|
-
|
|
291
|
+
# Use pluggable parser
|
|
292
|
+
record = self._parser.loads(raw)
|
|
220
293
|
if match(record):
|
|
221
294
|
record = updater(record)
|
|
222
295
|
updated += 1
|
|
223
296
|
|
|
224
|
-
|
|
225
|
-
|
|
297
|
+
# Use pluggable parser for serialization
|
|
298
|
+
result = self._parser.dumps(record, ensure_ascii=ensure_ascii)
|
|
299
|
+
if isinstance(result, bytes):
|
|
300
|
+
result = result.decode("utf-8")
|
|
301
|
+
out_line = result + "\n"
|
|
302
|
+
writer.write(out_line)
|
|
226
303
|
else:
|
|
227
304
|
# Non-atomic fallback: read + rewrite line-by-line
|
|
228
305
|
new_lines: list[str] = []
|
|
@@ -233,12 +310,17 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
233
310
|
new_lines.append("\n")
|
|
234
311
|
continue
|
|
235
312
|
|
|
236
|
-
|
|
313
|
+
# Use pluggable parser
|
|
314
|
+
record = self._parser.loads(raw)
|
|
237
315
|
if match(record):
|
|
238
316
|
record = updater(record)
|
|
239
317
|
updated += 1
|
|
240
318
|
|
|
241
|
-
|
|
319
|
+
# Use pluggable parser for serialization
|
|
320
|
+
result = self._parser.dumps(record, ensure_ascii=ensure_ascii)
|
|
321
|
+
if isinstance(result, bytes):
|
|
322
|
+
result = result.decode("utf-8")
|
|
323
|
+
new_lines.append(result + "\n")
|
|
242
324
|
|
|
243
325
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
244
326
|
path.write_text("".join(new_lines), encoding="utf-8")
|
|
@@ -288,7 +370,8 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
288
370
|
break
|
|
289
371
|
|
|
290
372
|
if current_index >= start_index:
|
|
291
|
-
|
|
373
|
+
# Use pluggable parser
|
|
374
|
+
results.append(self._parser.loads(line))
|
|
292
375
|
|
|
293
376
|
current_index += 1
|
|
294
377
|
|
|
@@ -318,7 +401,8 @@ class JsonLinesSerializer(JsonSerializer):
|
|
|
318
401
|
if not line:
|
|
319
402
|
continue
|
|
320
403
|
|
|
321
|
-
|
|
404
|
+
# Use pluggable parser
|
|
405
|
+
record = self._parser.loads(line)
|
|
322
406
|
if isinstance(record, dict) and record.get(id_field) == id_value:
|
|
323
407
|
return record
|
|
324
408
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""JSON Parser abstraction layer for pluggable performance optimizations.
|
|
2
|
+
|
|
3
|
+
This module provides a pluggable parser system that allows switching between
|
|
4
|
+
different JSON parsing implementations (stdlib, orjson, etc.) for performance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import IJsonParser
|
|
8
|
+
from .registry import get_parser, get_best_available_parser, register_parser
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
'IJsonParser',
|
|
12
|
+
'get_parser',
|
|
13
|
+
'get_best_available_parser',
|
|
14
|
+
'register_parser',
|
|
15
|
+
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Base JSON parser interface."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class IJsonParser(ABC):
|
|
8
|
+
"""Abstract JSON parser interface for pluggable implementations."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
12
|
+
"""
|
|
13
|
+
Parse JSON string/bytes to Python object.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
s: JSON string or bytes
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Parsed Python object
|
|
20
|
+
"""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
25
|
+
"""
|
|
26
|
+
Serialize Python object to JSON.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
obj: Python object to serialize
|
|
30
|
+
**kwargs: Serialization options (ensure_ascii, indent, etc.)
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
JSON string or bytes
|
|
34
|
+
"""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def parser_name(self) -> str:
|
|
40
|
+
"""Parser identifier (e.g., 'standard', 'orjson')."""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def tier(self) -> int:
|
|
46
|
+
"""
|
|
47
|
+
Performance tier:
|
|
48
|
+
0 = stdlib (baseline)
|
|
49
|
+
1 = orjson (3-4x faster)
|
|
50
|
+
2 = Rust extension (5-7x faster, future)
|
|
51
|
+
3 = Pure Rust core (6-8x faster, future)
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def is_available(self) -> bool:
|
|
58
|
+
"""Check if parser is available (dependencies installed)."""
|
|
59
|
+
pass
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Hybrid parser: msgspec for reading, orjson for writing (direct, no try/catch)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
import msgspec # Direct import for reading
|
|
5
|
+
import orjson # Direct import for writing
|
|
6
|
+
|
|
7
|
+
from .base import IJsonParser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class HybridParser(IJsonParser):
|
|
11
|
+
"""
|
|
12
|
+
Hybrid parser - fastest combination:
|
|
13
|
+
- msgspec for reading (1.36x faster than orjson)
|
|
14
|
+
- orjson for writing (2.27x faster than msgspec)
|
|
15
|
+
|
|
16
|
+
Direct imports (no try/catch) - assumes both are available.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def parser_name(self) -> str:
|
|
21
|
+
return "hybrid"
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def tier(self) -> int:
|
|
25
|
+
return 1
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def is_available(self) -> bool:
|
|
29
|
+
return True # Assumes both msgspec and orjson are available
|
|
30
|
+
|
|
31
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
32
|
+
"""Parse JSON using msgspec.json.decode() - fastest for reading."""
|
|
33
|
+
if isinstance(s, str):
|
|
34
|
+
s = s.encode("utf-8")
|
|
35
|
+
# msgspec.json.decode accepts bytes directly
|
|
36
|
+
return msgspec.json.decode(s)
|
|
37
|
+
|
|
38
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
39
|
+
"""Serialize JSON using orjson.dumps() - fastest for writing."""
|
|
40
|
+
option = 0
|
|
41
|
+
|
|
42
|
+
# orjson options
|
|
43
|
+
if not kwargs.get("ensure_ascii", True):
|
|
44
|
+
# orjson always outputs UTF-8, so ensure_ascii=False is default
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
# Handle indent (orjson doesn't support indent directly)
|
|
48
|
+
indent = kwargs.get("indent", None)
|
|
49
|
+
if indent:
|
|
50
|
+
# For pretty printing, use orjson.OPT_INDENT_2
|
|
51
|
+
option |= orjson.OPT_INDENT_2
|
|
52
|
+
|
|
53
|
+
# Sort keys
|
|
54
|
+
if kwargs.get("sort_keys", False):
|
|
55
|
+
option |= orjson.OPT_SORT_KEYS
|
|
56
|
+
|
|
57
|
+
result = orjson.dumps(obj, option=option)
|
|
58
|
+
|
|
59
|
+
# Return as bytes (orjson returns bytes)
|
|
60
|
+
# Caller can decode if string is needed
|
|
61
|
+
return result
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""msgspec parser - Tier 1 (Rust-based, very fast)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import msgspec
|
|
7
|
+
MSGSPEC_AVAILABLE = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
MSGSPEC_AVAILABLE = False
|
|
10
|
+
msgspec = None
|
|
11
|
+
|
|
12
|
+
from .base import IJsonParser
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MsgspecParser(IJsonParser):
|
|
16
|
+
"""msgspec parser - Tier 1 (Rust-based, very fast, close to orjson)."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def parser_name(self) -> str:
|
|
20
|
+
return "msgspec"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def tier(self) -> int:
|
|
24
|
+
return 1
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_available(self) -> bool:
|
|
28
|
+
return MSGSPEC_AVAILABLE
|
|
29
|
+
|
|
30
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
31
|
+
"""Parse JSON using msgspec.json.decode()."""
|
|
32
|
+
if isinstance(s, str):
|
|
33
|
+
s = s.encode("utf-8")
|
|
34
|
+
# msgspec.json.decode accepts bytes directly
|
|
35
|
+
return msgspec.json.decode(s)
|
|
36
|
+
|
|
37
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
38
|
+
"""Serialize JSON using msgspec.json.encode()."""
|
|
39
|
+
result = msgspec.json.encode(obj)
|
|
40
|
+
|
|
41
|
+
# msgspec returns bytes, decode if string needed
|
|
42
|
+
if isinstance(result, bytes) and kwargs.get("return_str", False):
|
|
43
|
+
return result.decode("utf-8")
|
|
44
|
+
|
|
45
|
+
return result
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""orjson parser - DIRECT (no try/catch, assumes orjson is available)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
import orjson # Direct import, no try/catch
|
|
5
|
+
|
|
6
|
+
from .base import IJsonParser
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OrjsonDirectParser(IJsonParser):
|
|
10
|
+
"""orjson parser - DIRECT (no try/catch, assumes orjson is available)."""
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def parser_name(self) -> str:
|
|
14
|
+
return "orjson_direct"
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def tier(self) -> int:
|
|
18
|
+
return 1
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def is_available(self) -> bool:
|
|
22
|
+
return True # Assumes orjson is available
|
|
23
|
+
|
|
24
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
25
|
+
"""Parse JSON using orjson.loads()."""
|
|
26
|
+
if isinstance(s, str):
|
|
27
|
+
s = s.encode("utf-8")
|
|
28
|
+
return orjson.loads(s)
|
|
29
|
+
|
|
30
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
31
|
+
"""Serialize JSON using orjson.dumps()."""
|
|
32
|
+
option = 0
|
|
33
|
+
|
|
34
|
+
# orjson options
|
|
35
|
+
if not kwargs.get("ensure_ascii", True):
|
|
36
|
+
# orjson always outputs UTF-8, so ensure_ascii=False is default
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
# Handle indent (orjson doesn't support indent directly)
|
|
40
|
+
indent = kwargs.get("indent", None)
|
|
41
|
+
if indent:
|
|
42
|
+
# For pretty printing, use orjson.OPT_INDENT_2
|
|
43
|
+
option |= orjson.OPT_INDENT_2
|
|
44
|
+
|
|
45
|
+
# Sort keys
|
|
46
|
+
if kwargs.get("sort_keys", False):
|
|
47
|
+
option |= orjson.OPT_SORT_KEYS
|
|
48
|
+
|
|
49
|
+
result = orjson.dumps(obj, option=option)
|
|
50
|
+
|
|
51
|
+
# Return as bytes (orjson returns bytes)
|
|
52
|
+
# Caller can decode if string is needed
|
|
53
|
+
return result
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""orjson parser - Tier 1 (3-4x faster than stdlib)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import orjson
|
|
7
|
+
ORJSON_AVAILABLE = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
ORJSON_AVAILABLE = False
|
|
10
|
+
orjson = None
|
|
11
|
+
|
|
12
|
+
from .base import IJsonParser
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OrjsonParser(IJsonParser):
|
|
16
|
+
"""orjson parser - Tier 1 (3-4x faster than stdlib)."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def parser_name(self) -> str:
|
|
20
|
+
return "orjson"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def tier(self) -> int:
|
|
24
|
+
return 1
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_available(self) -> bool:
|
|
28
|
+
return ORJSON_AVAILABLE
|
|
29
|
+
|
|
30
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
31
|
+
"""Parse JSON using orjson.loads()."""
|
|
32
|
+
if isinstance(s, str):
|
|
33
|
+
s = s.encode("utf-8")
|
|
34
|
+
return orjson.loads(s)
|
|
35
|
+
|
|
36
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
37
|
+
"""Serialize JSON using orjson.dumps()."""
|
|
38
|
+
option = 0
|
|
39
|
+
|
|
40
|
+
# orjson options
|
|
41
|
+
if not kwargs.get("ensure_ascii", True):
|
|
42
|
+
# orjson always outputs UTF-8, so ensure_ascii=False is default
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
# Handle indent (orjson doesn't support indent directly)
|
|
46
|
+
indent = kwargs.get("indent", None)
|
|
47
|
+
if indent:
|
|
48
|
+
# For pretty printing, use orjson.OPT_INDENT_2
|
|
49
|
+
option |= orjson.OPT_INDENT_2
|
|
50
|
+
|
|
51
|
+
# Sort keys
|
|
52
|
+
if kwargs.get("sort_keys", False):
|
|
53
|
+
option |= orjson.OPT_SORT_KEYS
|
|
54
|
+
|
|
55
|
+
result = orjson.dumps(obj, option=option)
|
|
56
|
+
|
|
57
|
+
# Return as bytes (orjson returns bytes)
|
|
58
|
+
# Caller can decode if string is needed
|
|
59
|
+
return result
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""pysimdjson parser - Tier 1 (C++ simdjson, excellent for partial parsing)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import simdjson
|
|
7
|
+
PYSIMDJSON_AVAILABLE = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
PYSIMDJSON_AVAILABLE = False
|
|
10
|
+
simdjson = None
|
|
11
|
+
|
|
12
|
+
from .base import IJsonParser
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PysimdjsonParser(IJsonParser):
|
|
16
|
+
"""pysimdjson parser - Tier 1 (C++ simdjson, excellent for partial parsing)."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def parser_name(self) -> str:
|
|
20
|
+
return "pysimdjson"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def tier(self) -> int:
|
|
24
|
+
return 1
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_available(self) -> bool:
|
|
28
|
+
return PYSIMDJSON_AVAILABLE
|
|
29
|
+
|
|
30
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
31
|
+
"""Parse JSON using simdjson.loads()."""
|
|
32
|
+
if isinstance(s, str):
|
|
33
|
+
s = s.encode("utf-8")
|
|
34
|
+
return simdjson.loads(s)
|
|
35
|
+
|
|
36
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
37
|
+
"""Serialize JSON using pysimdjson.dumps()."""
|
|
38
|
+
# pysimdjson doesn't have dumps, fallback to orjson or stdlib
|
|
39
|
+
# For now, use orjson if available, else stdlib
|
|
40
|
+
try:
|
|
41
|
+
import orjson
|
|
42
|
+
result = orjson.dumps(obj)
|
|
43
|
+
if isinstance(result, bytes) and kwargs.get("return_str", False):
|
|
44
|
+
return result.decode("utf-8")
|
|
45
|
+
return result
|
|
46
|
+
except ImportError:
|
|
47
|
+
import json
|
|
48
|
+
result = json.dumps(obj, **kwargs)
|
|
49
|
+
if isinstance(result, str) and kwargs.get("return_bytes", False):
|
|
50
|
+
return result.encode("utf-8")
|
|
51
|
+
return result
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""python-rapidjson parser - Tier 1 (C++ rapidjson)."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Union
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import rapidjson
|
|
7
|
+
RAPIDJSON_AVAILABLE = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
RAPIDJSON_AVAILABLE = False
|
|
10
|
+
rapidjson = None
|
|
11
|
+
|
|
12
|
+
from .base import IJsonParser
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RapidjsonParser(IJsonParser):
|
|
16
|
+
"""python-rapidjson parser - Tier 1 (C++ rapidjson)."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def parser_name(self) -> str:
|
|
20
|
+
return "rapidjson"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def tier(self) -> int:
|
|
24
|
+
return 1
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def is_available(self) -> bool:
|
|
28
|
+
return RAPIDJSON_AVAILABLE
|
|
29
|
+
|
|
30
|
+
def loads(self, s: Union[str, bytes]) -> Any:
|
|
31
|
+
"""Parse JSON using rapidjson.loads()."""
|
|
32
|
+
if isinstance(s, bytes):
|
|
33
|
+
s = s.decode("utf-8")
|
|
34
|
+
return rapidjson.loads(s)
|
|
35
|
+
|
|
36
|
+
def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
|
|
37
|
+
"""Serialize JSON using rapidjson.dumps()."""
|
|
38
|
+
# rapidjson supports most stdlib kwargs
|
|
39
|
+
result = rapidjson.dumps(
|
|
40
|
+
obj,
|
|
41
|
+
ensure_ascii=kwargs.get("ensure_ascii", True),
|
|
42
|
+
indent=kwargs.get("indent", None),
|
|
43
|
+
sort_keys=kwargs.get("sort_keys", False),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# rapidjson returns str, encode if bytes needed
|
|
47
|
+
if isinstance(result, str) and kwargs.get("return_bytes", False):
|
|
48
|
+
return result.encode("utf-8")
|
|
49
|
+
|
|
50
|
+
return result
|