exonware-xwsystem 0.0.1.410__py3-none-any.whl → 0.1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. exonware/__init__.py +1 -1
  2. exonware/conf.py +1 -1
  3. exonware/xwsystem/__init__.py +2 -2
  4. exonware/xwsystem/caching/__init__.py +1 -1
  5. exonware/xwsystem/caching/base.py +2 -2
  6. exonware/xwsystem/caching/bloom_cache.py +2 -2
  7. exonware/xwsystem/caching/cache_manager.py +1 -1
  8. exonware/xwsystem/caching/conditional.py +2 -2
  9. exonware/xwsystem/caching/contracts.py +1 -1
  10. exonware/xwsystem/caching/decorators.py +2 -2
  11. exonware/xwsystem/caching/defs.py +1 -1
  12. exonware/xwsystem/caching/disk_cache.py +1 -1
  13. exonware/xwsystem/caching/distributed.py +1 -1
  14. exonware/xwsystem/caching/errors.py +1 -1
  15. exonware/xwsystem/caching/events.py +2 -2
  16. exonware/xwsystem/caching/eviction_strategies.py +1 -1
  17. exonware/xwsystem/caching/fluent.py +1 -1
  18. exonware/xwsystem/caching/integrity.py +1 -1
  19. exonware/xwsystem/caching/lfu_cache.py +2 -2
  20. exonware/xwsystem/caching/lfu_optimized.py +3 -3
  21. exonware/xwsystem/caching/lru_cache.py +2 -2
  22. exonware/xwsystem/caching/memory_bounded.py +2 -2
  23. exonware/xwsystem/caching/metrics_exporter.py +2 -2
  24. exonware/xwsystem/caching/observable_cache.py +1 -1
  25. exonware/xwsystem/caching/pluggable_cache.py +2 -2
  26. exonware/xwsystem/caching/rate_limiter.py +1 -1
  27. exonware/xwsystem/caching/read_through.py +2 -2
  28. exonware/xwsystem/caching/secure_cache.py +1 -1
  29. exonware/xwsystem/caching/serializable.py +2 -2
  30. exonware/xwsystem/caching/stats.py +1 -1
  31. exonware/xwsystem/caching/tagging.py +2 -2
  32. exonware/xwsystem/caching/ttl_cache.py +1 -1
  33. exonware/xwsystem/caching/two_tier_cache.py +1 -1
  34. exonware/xwsystem/caching/utils.py +1 -1
  35. exonware/xwsystem/caching/validation.py +1 -1
  36. exonware/xwsystem/caching/warming.py +2 -2
  37. exonware/xwsystem/caching/write_behind.py +2 -2
  38. exonware/xwsystem/cli/__init__.py +1 -1
  39. exonware/xwsystem/cli/args.py +1 -1
  40. exonware/xwsystem/cli/base.py +1 -1
  41. exonware/xwsystem/cli/colors.py +1 -1
  42. exonware/xwsystem/cli/console.py +1 -1
  43. exonware/xwsystem/cli/contracts.py +1 -1
  44. exonware/xwsystem/cli/defs.py +1 -1
  45. exonware/xwsystem/cli/errors.py +1 -1
  46. exonware/xwsystem/cli/progress.py +1 -1
  47. exonware/xwsystem/cli/prompts.py +1 -1
  48. exonware/xwsystem/cli/tables.py +1 -1
  49. exonware/xwsystem/config/__init__.py +1 -1
  50. exonware/xwsystem/config/base.py +2 -2
  51. exonware/xwsystem/config/contracts.py +1 -1
  52. exonware/xwsystem/config/defaults.py +1 -1
  53. exonware/xwsystem/config/defs.py +1 -1
  54. exonware/xwsystem/config/errors.py +2 -2
  55. exonware/xwsystem/config/logging.py +1 -1
  56. exonware/xwsystem/config/logging_setup.py +2 -2
  57. exonware/xwsystem/config/performance.py +115 -388
  58. exonware/xwsystem/http_client/__init__.py +1 -1
  59. exonware/xwsystem/http_client/advanced_client.py +2 -2
  60. exonware/xwsystem/http_client/base.py +2 -2
  61. exonware/xwsystem/http_client/client.py +2 -2
  62. exonware/xwsystem/http_client/contracts.py +1 -1
  63. exonware/xwsystem/http_client/defs.py +1 -1
  64. exonware/xwsystem/http_client/errors.py +2 -2
  65. exonware/xwsystem/io/__init__.py +1 -1
  66. exonware/xwsystem/io/archive/__init__.py +1 -1
  67. exonware/xwsystem/io/archive/archive.py +1 -1
  68. exonware/xwsystem/io/archive/archive_files.py +1 -1
  69. exonware/xwsystem/io/archive/archivers.py +2 -2
  70. exonware/xwsystem/io/archive/base.py +6 -6
  71. exonware/xwsystem/io/archive/codec_integration.py +1 -1
  72. exonware/xwsystem/io/archive/compression.py +1 -1
  73. exonware/xwsystem/io/archive/formats/__init__.py +1 -1
  74. exonware/xwsystem/io/archive/formats/brotli_format.py +6 -3
  75. exonware/xwsystem/io/archive/formats/lz4_format.py +6 -3
  76. exonware/xwsystem/io/archive/formats/rar.py +6 -3
  77. exonware/xwsystem/io/archive/formats/sevenzip.py +6 -3
  78. exonware/xwsystem/io/archive/formats/squashfs_format.py +1 -1
  79. exonware/xwsystem/io/archive/formats/tar.py +1 -1
  80. exonware/xwsystem/io/archive/formats/wim_format.py +6 -3
  81. exonware/xwsystem/io/archive/formats/zip.py +1 -1
  82. exonware/xwsystem/io/archive/formats/zpaq_format.py +1 -1
  83. exonware/xwsystem/io/archive/formats/zstandard.py +6 -3
  84. exonware/xwsystem/io/base.py +1 -1
  85. exonware/xwsystem/io/codec/__init__.py +1 -1
  86. exonware/xwsystem/io/codec/base.py +6 -6
  87. exonware/xwsystem/io/codec/contracts.py +1 -1
  88. exonware/xwsystem/io/codec/registry.py +5 -5
  89. exonware/xwsystem/io/common/__init__.py +1 -1
  90. exonware/xwsystem/io/common/base.py +1 -1
  91. exonware/xwsystem/io/common/lock.py +1 -1
  92. exonware/xwsystem/io/common/watcher.py +1 -1
  93. exonware/xwsystem/io/contracts.py +1 -1
  94. exonware/xwsystem/io/data_operations.py +746 -0
  95. exonware/xwsystem/io/defs.py +1 -1
  96. exonware/xwsystem/io/errors.py +1 -1
  97. exonware/xwsystem/io/facade.py +2 -2
  98. exonware/xwsystem/io/file/__init__.py +1 -1
  99. exonware/xwsystem/io/file/base.py +1 -1
  100. exonware/xwsystem/io/file/conversion.py +1 -1
  101. exonware/xwsystem/io/file/file.py +8 -6
  102. exonware/xwsystem/io/file/paged_source.py +8 -1
  103. exonware/xwsystem/io/file/paging/__init__.py +1 -1
  104. exonware/xwsystem/io/file/paging/byte_paging.py +1 -1
  105. exonware/xwsystem/io/file/paging/line_paging.py +1 -1
  106. exonware/xwsystem/io/file/paging/record_paging.py +1 -1
  107. exonware/xwsystem/io/file/paging/registry.py +4 -4
  108. exonware/xwsystem/io/file/source.py +20 -9
  109. exonware/xwsystem/io/filesystem/__init__.py +1 -1
  110. exonware/xwsystem/io/filesystem/base.py +1 -1
  111. exonware/xwsystem/io/filesystem/local.py +9 -1
  112. exonware/xwsystem/io/folder/__init__.py +1 -1
  113. exonware/xwsystem/io/folder/base.py +1 -1
  114. exonware/xwsystem/io/folder/folder.py +2 -2
  115. exonware/xwsystem/io/serialization/__init__.py +1 -1
  116. exonware/xwsystem/io/serialization/auto_serializer.py +52 -39
  117. exonware/xwsystem/io/serialization/base.py +165 -1
  118. exonware/xwsystem/io/serialization/contracts.py +88 -1
  119. exonware/xwsystem/io/serialization/defs.py +1 -1
  120. exonware/xwsystem/io/serialization/errors.py +1 -1
  121. exonware/xwsystem/io/serialization/flyweight.py +10 -10
  122. exonware/xwsystem/io/serialization/format_detector.py +8 -5
  123. exonware/xwsystem/io/serialization/formats/__init__.py +1 -1
  124. exonware/xwsystem/io/serialization/formats/binary/bson.py +1 -1
  125. exonware/xwsystem/io/serialization/formats/binary/cbor.py +1 -1
  126. exonware/xwsystem/io/serialization/formats/binary/marshal.py +1 -1
  127. exonware/xwsystem/io/serialization/formats/binary/msgpack.py +1 -1
  128. exonware/xwsystem/io/serialization/formats/binary/pickle.py +1 -1
  129. exonware/xwsystem/io/serialization/formats/binary/plistlib.py +1 -1
  130. exonware/xwsystem/io/serialization/formats/database/dbm.py +53 -1
  131. exonware/xwsystem/io/serialization/formats/database/shelve.py +48 -1
  132. exonware/xwsystem/io/serialization/formats/database/sqlite3.py +85 -1
  133. exonware/xwsystem/io/serialization/formats/text/append_only_log.py +201 -0
  134. exonware/xwsystem/io/serialization/formats/text/configparser.py +1 -1
  135. exonware/xwsystem/io/serialization/formats/text/csv.py +1 -1
  136. exonware/xwsystem/io/serialization/formats/text/formdata.py +1 -1
  137. exonware/xwsystem/io/serialization/formats/text/json.py +43 -20
  138. exonware/xwsystem/io/serialization/formats/text/json5.py +7 -5
  139. exonware/xwsystem/io/serialization/formats/text/jsonlines.py +316 -22
  140. exonware/xwsystem/io/serialization/formats/text/multipart.py +1 -1
  141. exonware/xwsystem/io/serialization/formats/text/toml.py +19 -3
  142. exonware/xwsystem/io/serialization/formats/text/xml.py +8 -1
  143. exonware/xwsystem/io/serialization/formats/text/yaml.py +52 -2
  144. exonware/xwsystem/io/serialization/parsers/__init__.py +15 -0
  145. exonware/xwsystem/io/serialization/parsers/base.py +59 -0
  146. exonware/xwsystem/io/serialization/parsers/hybrid_parser.py +61 -0
  147. exonware/xwsystem/io/serialization/parsers/msgspec_parser.py +45 -0
  148. exonware/xwsystem/io/serialization/parsers/orjson_direct_parser.py +53 -0
  149. exonware/xwsystem/io/serialization/parsers/orjson_parser.py +59 -0
  150. exonware/xwsystem/io/serialization/parsers/pysimdjson_parser.py +51 -0
  151. exonware/xwsystem/io/serialization/parsers/rapidjson_parser.py +50 -0
  152. exonware/xwsystem/io/serialization/parsers/registry.py +90 -0
  153. exonware/xwsystem/io/serialization/parsers/standard.py +43 -0
  154. exonware/xwsystem/io/serialization/parsers/ujson_parser.py +50 -0
  155. exonware/xwsystem/io/serialization/registry.py +1 -1
  156. exonware/xwsystem/io/serialization/serializer.py +175 -3
  157. exonware/xwsystem/io/serialization/utils/__init__.py +1 -1
  158. exonware/xwsystem/io/serialization/utils/path_ops.py +1 -1
  159. exonware/xwsystem/io/stream/__init__.py +1 -1
  160. exonware/xwsystem/io/stream/async_operations.py +1 -1
  161. exonware/xwsystem/io/stream/base.py +1 -1
  162. exonware/xwsystem/io/stream/codec_io.py +1 -1
  163. exonware/xwsystem/ipc/async_fabric.py +1 -2
  164. exonware/xwsystem/ipc/base.py +2 -2
  165. exonware/xwsystem/ipc/contracts.py +2 -2
  166. exonware/xwsystem/ipc/defs.py +1 -1
  167. exonware/xwsystem/ipc/errors.py +2 -2
  168. exonware/xwsystem/ipc/pipes.py +2 -2
  169. exonware/xwsystem/ipc/shared_memory.py +2 -2
  170. exonware/xwsystem/monitoring/base.py +2 -2
  171. exonware/xwsystem/monitoring/contracts.py +1 -1
  172. exonware/xwsystem/monitoring/defs.py +1 -1
  173. exonware/xwsystem/monitoring/error_recovery.py +2 -2
  174. exonware/xwsystem/monitoring/errors.py +2 -2
  175. exonware/xwsystem/monitoring/memory_monitor.py +1 -1
  176. exonware/xwsystem/monitoring/performance_manager_generic.py +2 -2
  177. exonware/xwsystem/monitoring/performance_validator.py +1 -1
  178. exonware/xwsystem/monitoring/system_monitor.py +2 -2
  179. exonware/xwsystem/monitoring/tracing.py +2 -2
  180. exonware/xwsystem/monitoring/tracker.py +1 -1
  181. exonware/xwsystem/operations/__init__.py +1 -1
  182. exonware/xwsystem/operations/base.py +1 -1
  183. exonware/xwsystem/operations/defs.py +1 -1
  184. exonware/xwsystem/operations/diff.py +1 -1
  185. exonware/xwsystem/operations/merge.py +1 -1
  186. exonware/xwsystem/operations/patch.py +1 -1
  187. exonware/xwsystem/patterns/base.py +2 -2
  188. exonware/xwsystem/patterns/context_manager.py +2 -2
  189. exonware/xwsystem/patterns/contracts.py +9 -9
  190. exonware/xwsystem/patterns/defs.py +1 -1
  191. exonware/xwsystem/patterns/dynamic_facade.py +8 -8
  192. exonware/xwsystem/patterns/errors.py +5 -5
  193. exonware/xwsystem/patterns/handler_factory.py +6 -6
  194. exonware/xwsystem/patterns/object_pool.py +7 -7
  195. exonware/xwsystem/patterns/registry.py +3 -3
  196. exonware/xwsystem/plugins/__init__.py +1 -1
  197. exonware/xwsystem/plugins/base.py +5 -5
  198. exonware/xwsystem/plugins/contracts.py +5 -5
  199. exonware/xwsystem/plugins/defs.py +1 -1
  200. exonware/xwsystem/plugins/errors.py +4 -4
  201. exonware/xwsystem/runtime/__init__.py +1 -1
  202. exonware/xwsystem/runtime/base.py +6 -6
  203. exonware/xwsystem/runtime/contracts.py +6 -6
  204. exonware/xwsystem/runtime/defs.py +1 -1
  205. exonware/xwsystem/runtime/env.py +2 -2
  206. exonware/xwsystem/runtime/errors.py +1 -1
  207. exonware/xwsystem/runtime/reflection.py +8 -8
  208. exonware/xwsystem/security/auth.py +1 -1
  209. exonware/xwsystem/security/base.py +2 -2
  210. exonware/xwsystem/security/contracts.py +1 -1
  211. exonware/xwsystem/security/crypto.py +2 -2
  212. exonware/xwsystem/security/defs.py +1 -1
  213. exonware/xwsystem/security/errors.py +2 -2
  214. exonware/xwsystem/security/hazmat.py +2 -2
  215. exonware/xwsystem/shared/__init__.py +1 -1
  216. exonware/xwsystem/shared/base.py +1 -1
  217. exonware/xwsystem/shared/contracts.py +1 -1
  218. exonware/xwsystem/shared/defs.py +1 -1
  219. exonware/xwsystem/shared/errors.py +1 -1
  220. exonware/xwsystem/structures/__init__.py +1 -1
  221. exonware/xwsystem/structures/base.py +2 -2
  222. exonware/xwsystem/structures/contracts.py +1 -1
  223. exonware/xwsystem/structures/defs.py +1 -1
  224. exonware/xwsystem/structures/errors.py +2 -2
  225. exonware/xwsystem/threading/async_primitives.py +2 -2
  226. exonware/xwsystem/threading/base.py +2 -2
  227. exonware/xwsystem/threading/contracts.py +1 -1
  228. exonware/xwsystem/threading/defs.py +1 -1
  229. exonware/xwsystem/threading/errors.py +2 -2
  230. exonware/xwsystem/threading/safe_factory.py +6 -6
  231. exonware/xwsystem/utils/base.py +2 -2
  232. exonware/xwsystem/utils/contracts.py +1 -1
  233. exonware/xwsystem/utils/dt/__init__.py +1 -1
  234. exonware/xwsystem/utils/dt/base.py +2 -2
  235. exonware/xwsystem/utils/dt/contracts.py +1 -1
  236. exonware/xwsystem/utils/dt/defs.py +1 -1
  237. exonware/xwsystem/utils/dt/errors.py +2 -2
  238. exonware/xwsystem/utils/dt/formatting.py +1 -1
  239. exonware/xwsystem/utils/dt/humanize.py +2 -2
  240. exonware/xwsystem/utils/dt/parsing.py +1 -1
  241. exonware/xwsystem/utils/dt/timezone_utils.py +1 -1
  242. exonware/xwsystem/utils/errors.py +2 -2
  243. exonware/xwsystem/utils/utils_contracts.py +1 -1
  244. exonware/xwsystem/validation/__init__.py +1 -1
  245. exonware/xwsystem/validation/base.py +15 -15
  246. exonware/xwsystem/validation/contracts.py +1 -1
  247. exonware/xwsystem/validation/data_validator.py +10 -0
  248. exonware/xwsystem/validation/declarative.py +9 -9
  249. exonware/xwsystem/validation/defs.py +1 -1
  250. exonware/xwsystem/validation/errors.py +2 -2
  251. exonware/xwsystem/validation/fluent_validator.py +4 -4
  252. exonware/xwsystem/version.py +4 -4
  253. {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/METADATA +3 -3
  254. exonware_xwsystem-0.1.0.1.dist-info/RECORD +284 -0
  255. exonware/xwsystem/caching/USAGE_GUIDE.md +0 -779
  256. exonware/xwsystem/utils/test_runner.py +0 -526
  257. exonware_xwsystem-0.0.1.410.dist-info/RECORD +0 -273
  258. {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/WHEEL +0 -0
  259. {exonware_xwsystem-0.0.1.410.dist-info → exonware_xwsystem-0.1.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@
4
4
  Company: eXonware.com
5
5
  Author: Eng. Muhammad AlShehri
6
6
  Email: connect@exonware.com
7
- Version: 0.0.1.410
7
+ Version: 0.1.0.1
8
8
  Generation Date: 02-Nov-2025
9
9
 
10
10
  JSON Lines (JSONL/NDJSON) Serialization - Newline-Delimited JSON
@@ -26,11 +26,18 @@ from typing import Any, Optional, Union
26
26
  from pathlib import Path
27
27
  import json
28
28
 
29
- from ...base import ASerialization
30
- from ...contracts import ISerialization
29
+ from .json import JsonSerializer
30
+ from ...parsers.registry import get_parser
31
+ from ...parsers.base import IJsonParser
32
+ from ....errors import SerializationError
33
+ from ....common.atomic import AtomicFileWriter
34
+ from exonware.xwsystem.config.logging_setup import get_logger
35
+ from exonware.xwsystem.config.performance import get_performance_config
31
36
 
37
+ logger = get_logger(__name__)
32
38
 
33
- class JsonLinesSerializer(ASerialization):
39
+
40
+ class JsonLinesSerializer(JsonSerializer):
34
41
  """
35
42
  JSON Lines (JSONL/NDJSON) serializer for streaming data.
36
43
 
@@ -39,9 +46,16 @@ class JsonLinesSerializer(ASerialization):
39
46
  Concrete: JsonLinesSerializer
40
47
  """
41
48
 
42
- def __init__(self):
43
- """Initialize JSON Lines serializer."""
44
- super().__init__()
49
+ def __init__(self, parser_name: Optional[str] = None):
50
+ """
51
+ Initialize JSON Lines serializer with optional parser selection.
52
+
53
+ Args:
54
+ parser_name: Parser name ("standard", "orjson", or None for auto-detect)
55
+ """
56
+ super().__init__(parser_name=parser_name)
57
+ # Get parser instance for direct use in line-by-line operations
58
+ self._parser: IJsonParser = get_parser(parser_name)
45
59
 
46
60
  @property
47
61
  def codec_id(self) -> str:
@@ -67,8 +81,36 @@ class JsonLinesSerializer(ASerialization):
67
81
  def codec_types(self) -> list[str]:
68
82
  """JSON Lines is a data exchange format."""
69
83
  return ["data", "serialization"]
70
-
71
- def encode(self, data: Any, options: Optional[dict[str, Any]] = None) -> str:
84
+
85
+ # -------------------------------------------------------------------------
86
+ # RECORD / STREAMING CAPABILITIES
87
+ # -------------------------------------------------------------------------
88
+
89
+ @property
90
+ def supports_record_streaming(self) -> bool:
91
+ """
92
+ JSONL is explicitly designed for record-level streaming.
93
+
94
+ This enables stream_read_record / stream_update_record to operate in a
95
+ true streaming fashion (line-by-line) without loading the entire file.
96
+ """
97
+ return True
98
+
99
+ @property
100
+ def supports_record_paging(self) -> bool:
101
+ """
102
+ JSONL supports efficient record-level paging.
103
+
104
+ Paging is implemented as a lightweight line counter that only parses
105
+ the requested slice of records.
106
+ """
107
+ return True
108
+
109
+ # -------------------------------------------------------------------------
110
+ # CORE ENCODE / DECODE
111
+ # -------------------------------------------------------------------------
112
+
113
+ def encode(self, data: Any, *, options: Optional[dict[str, Any]] = None) -> str:
72
114
  """
73
115
  Encode data to JSON Lines string.
74
116
 
@@ -82,14 +124,22 @@ class JsonLinesSerializer(ASerialization):
82
124
  if not isinstance(data, list):
83
125
  # Single object - wrap in list
84
126
  data = [data]
85
-
86
- lines = []
127
+
128
+ opts = options or {}
129
+ ensure_ascii = opts.get("ensure_ascii", False)
130
+
131
+ lines: list[str] = []
87
132
  for item in data:
88
- lines.append(json.dumps(item, ensure_ascii=False))
89
-
90
- return '\n'.join(lines)
91
-
92
- def decode(self, data: Union[str, bytes], options: Optional[dict[str, Any]] = None) -> list[Any]:
133
+ # Use pluggable parser
134
+ result = self._parser.dumps(item, ensure_ascii=ensure_ascii)
135
+ # Convert bytes to str if needed
136
+ if isinstance(result, bytes):
137
+ result = result.decode("utf-8")
138
+ lines.append(result)
139
+
140
+ return "\n".join(lines)
141
+
142
+ def decode(self, data: Union[str, bytes], *, options: Optional[dict[str, Any]] = None) -> list[Any]:
93
143
  """
94
144
  Decode JSON Lines string to list of Python objects.
95
145
 
@@ -101,16 +151,260 @@ class JsonLinesSerializer(ASerialization):
101
151
  List of decoded Python objects
102
152
  """
103
153
  if isinstance(data, bytes):
104
- data = data.decode('utf-8')
105
-
154
+ data = data.decode("utf-8")
155
+
106
156
  # Split by newlines and parse each line
107
- lines = data.strip().split('\n')
108
- results = []
109
-
157
+ lines = data.strip().split("\n")
158
+ results: list[Any] = []
159
+
110
160
  for line in lines:
111
161
  line = line.strip()
112
162
  if line: # Skip empty lines
113
- results.append(json.loads(line))
163
+ # Use pluggable parser
164
+ results.append(self._parser.loads(line))
165
+
166
+ return results
167
+
168
+ # -------------------------------------------------------------------------
169
+ # RECORD-LEVEL OPERATIONS (True streaming, line-by-line)
170
+ # -------------------------------------------------------------------------
171
+
172
+ def stream_read_record(
173
+ self,
174
+ file_path: Union[str, Path],
175
+ match: callable,
176
+ projection: Optional[list[Any]] = None,
177
+ **options: Any,
178
+ ) -> Any:
179
+ """
180
+ Stream-style read of a single logical record from a JSONL file.
181
+
182
+ Reads the file line-by-line, parsing each JSON object and returning the
183
+ first record that satisfies match(record). Optional projection is
184
+ applied using the base helper to avoid duplicating logic.
185
+ """
186
+ path = Path(file_path)
187
+ if not path.exists():
188
+ raise FileNotFoundError(f"File not found: {path}")
189
+
190
+ # Line-by-line scan – no full-file load
191
+ with path.open("r", encoding="utf-8") as f:
192
+ for line in f:
193
+ line = line.strip()
194
+ if not line:
195
+ continue
196
+ # Use pluggable parser
197
+ record = self._parser.loads(line)
198
+ if match(record):
199
+ return self._apply_projection(record, projection)
200
+
201
+ raise KeyError("No matching record found")
202
+
203
+ def stream_update_record(
204
+ self,
205
+ file_path: Union[str, Path],
206
+ match: callable,
207
+ updater: callable,
208
+ *,
209
+ atomic: bool = True,
210
+ **options: Any,
211
+ ) -> int:
212
+ """
213
+ Stream-style update of logical records in a JSONL file.
214
+
215
+ Implementation uses a temp file + AtomicFileWriter pattern to ensure
216
+ atomicity when atomic=True. Records are processed line-by-line and only
217
+ the matching records are materialized and updated.
114
218
 
219
+ Supports append-only log optimization for large files (use_append_log=True).
220
+ """
221
+ path = Path(file_path)
222
+ if not path.exists():
223
+ raise FileNotFoundError(f"File not found: {path}")
224
+
225
+ # Check if append-only log should be used
226
+ perf_config = get_performance_config()
227
+ use_append_log = options.get("use_append_log", None)
228
+ if use_append_log is None:
229
+ if not perf_config.enable_append_log:
230
+ use_append_log = False
231
+ else:
232
+ # Auto-detect: use for files above threshold
233
+ file_size_mb = path.stat().st_size / (1024 * 1024)
234
+ use_append_log = file_size_mb > perf_config.append_log_threshold_mb
235
+
236
+ # Try append-only log if enabled
237
+ if use_append_log:
238
+ try:
239
+ from .append_only_log import AppendOnlyLog
240
+ log = AppendOnlyLog(path)
241
+
242
+ # For append-only log, we need to find matching records first
243
+ # and apply updates, then append to log
244
+ # This is a simplified version - full implementation would
245
+ # integrate with index for O(1) lookups
246
+ updated = 0
247
+ with path.open("r", encoding="utf-8") as src:
248
+ for line in src:
249
+ raw = line.rstrip("\n")
250
+ if not raw.strip():
251
+ continue
252
+
253
+ try:
254
+ record = self._parser.loads(raw)
255
+ if match(record):
256
+ # Apply updater
257
+ updated_record = updater(record)
258
+
259
+ # Extract type and id for log entry
260
+ type_name = record.get("@type") or record.get("type") or "Record"
261
+ id_value = str(record.get("id", ""))
262
+
263
+ # Append to log
264
+ log.update_record(type_name, id_value, lambda x: updated_record)
265
+ updated += 1
266
+ except Exception:
267
+ continue
268
+
269
+ return updated
270
+ except Exception as e:
271
+ # Fall back to full rewrite if append-only log fails
272
+ logger.debug(f"Append-only log failed, falling back to full rewrite: {e}")
273
+
274
+ # Original full-rewrite implementation
275
+ updated = 0
276
+ backup = options.get("backup", True)
277
+ ensure_ascii = options.get("ensure_ascii", False)
278
+
279
+ try:
280
+ if atomic:
281
+ # Atomic path: use AtomicFileWriter for temp+replace semantics
282
+ with AtomicFileWriter(path, backup=backup) as writer:
283
+ with path.open("r", encoding="utf-8") as src:
284
+ for line in src:
285
+ raw = line.rstrip("\n")
286
+ if not raw.strip():
287
+ # Preserve structural empty lines
288
+ writer.write("\n")
289
+ continue
290
+
291
+ # Use pluggable parser
292
+ record = self._parser.loads(raw)
293
+ if match(record):
294
+ record = updater(record)
295
+ updated += 1
296
+
297
+ # Use pluggable parser for serialization
298
+ result = self._parser.dumps(record, ensure_ascii=ensure_ascii)
299
+ if isinstance(result, bytes):
300
+ result = result.decode("utf-8")
301
+ out_line = result + "\n"
302
+ writer.write(out_line)
303
+ else:
304
+ # Non-atomic fallback: read + rewrite line-by-line
305
+ new_lines: list[str] = []
306
+ with path.open("r", encoding="utf-8") as src:
307
+ for line in src:
308
+ raw = line.rstrip("\n")
309
+ if not raw.strip():
310
+ new_lines.append("\n")
311
+ continue
312
+
313
+ # Use pluggable parser
314
+ record = self._parser.loads(raw)
315
+ if match(record):
316
+ record = updater(record)
317
+ updated += 1
318
+
319
+ # Use pluggable parser for serialization
320
+ result = self._parser.dumps(record, ensure_ascii=ensure_ascii)
321
+ if isinstance(result, bytes):
322
+ result = result.decode("utf-8")
323
+ new_lines.append(result + "\n")
324
+
325
+ path.parent.mkdir(parents=True, exist_ok=True)
326
+ path.write_text("".join(new_lines), encoding="utf-8")
327
+
328
+ return updated
329
+ except Exception as e:
330
+ raise SerializationError(
331
+ f"Failed to stream-update JSONL records in {path}: {e}",
332
+ format_name=self.format_name,
333
+ original_error=e,
334
+ ) from e
335
+
336
+ def get_record_page(
337
+ self,
338
+ file_path: Union[str, Path],
339
+ page_number: int,
340
+ page_size: int,
341
+ **options: Any,
342
+ ) -> list[Any]:
343
+ """
344
+ Retrieve a logical page of records from a JSONL file.
345
+
346
+ Pages are computed by counting logical records (non-empty lines). Only
347
+ the requested slice is parsed and returned, keeping memory usage
348
+ proportional to page_size rather than file size.
349
+ """
350
+ if page_number < 1 or page_size <= 0:
351
+ raise ValueError("Invalid page_number or page_size")
352
+
353
+ path = Path(file_path)
354
+ if not path.exists():
355
+ raise FileNotFoundError(f"File not found: {path}")
356
+
357
+ start_index = (page_number - 1) * page_size
358
+ end_index = start_index + page_size
359
+
360
+ results: list[Any] = []
361
+ current_index = 0
362
+
363
+ with path.open("r", encoding="utf-8") as f:
364
+ for line in f:
365
+ line = line.strip()
366
+ if not line:
367
+ continue
368
+
369
+ if current_index >= end_index:
370
+ break
371
+
372
+ if current_index >= start_index:
373
+ # Use pluggable parser
374
+ results.append(self._parser.loads(line))
375
+
376
+ current_index += 1
377
+
115
378
  return results
116
379
 
380
+ def get_record_by_id(
381
+ self,
382
+ file_path: Union[str, Path],
383
+ id_value: Any,
384
+ *,
385
+ id_field: str = "id",
386
+ **options: Any,
387
+ ) -> Any:
388
+ """
389
+ Retrieve a logical record by identifier from a JSONL file.
390
+
391
+ Performs a streaming linear scan over records, returning the first
392
+ record where record[id_field] == id_value.
393
+ """
394
+ path = Path(file_path)
395
+ if not path.exists():
396
+ raise FileNotFoundError(f"File not found: {path}")
397
+
398
+ with path.open("r", encoding="utf-8") as f:
399
+ for line in f:
400
+ line = line.strip()
401
+ if not line:
402
+ continue
403
+
404
+ # Use pluggable parser
405
+ record = self._parser.loads(line)
406
+ if isinstance(record, dict) and record.get(id_field) == id_value:
407
+ return record
408
+
409
+ raise KeyError(f"Record with {id_field}={id_value!r} not found")
410
+
@@ -2,7 +2,7 @@
2
2
  Company: eXonware.com
3
3
  Author: Eng. Muhammad AlShehri
4
4
  Email: connect@exonware.com
5
- Version: 0.0.1.410
5
+ Version: 0.1.0.1
6
6
  Generation Date: November 2, 2025
7
7
 
8
8
  Multipart serialization - Multipart form data format.
@@ -2,7 +2,7 @@
2
2
  Company: eXonware.com
3
3
  Author: Eng. Muhammad AlShehri
4
4
  Email: connect@exonware.com
5
- Version: 0.0.1.410
5
+ Version: 0.1.0.1
6
6
  Generation Date: November 2, 2025
7
7
 
8
8
  TOML serialization - Configuration file format.
@@ -180,8 +180,17 @@ class TomlSerializer(ASerialization):
180
180
  """
181
181
  try:
182
182
  if not isinstance(value, dict):
183
- raise TypeError("TOML can only serialize dictionaries")
184
-
183
+ # TOML requires a table (dict) at the top level. For data-oriented
184
+ # use cases (e.g. record lists), transparently wrap common patterns
185
+ # so that higher-level APIs (record paging, etc.) can still work
186
+ # uniformly across formats.
187
+ if isinstance(value, list):
188
+ # Auto-wrap top-level list into "items" table.
189
+ value = {"items": value}
190
+ else:
191
+ # Fallback: wrap primitive/other types into a single "value" key.
192
+ value = {"value": value}
193
+
185
194
  opts = options or {}
186
195
 
187
196
  # Root cause fixed: Remove None values before encoding (TOML doesn't support None).
@@ -227,6 +236,13 @@ class TomlSerializer(ASerialization):
227
236
 
228
237
  # Decode from TOML string
229
238
  data = tomllib.loads(repr)
239
+
240
+ # If this looks like an auto-wrapped list payload (see encode),
241
+ # unwrap it for callers so that higher-level APIs (including the
242
+ # generic record-level operations in ASerialization) see the
243
+ # natural Python structure (a list of records).
244
+ if isinstance(data, dict) and set(data.keys()) == {"items"} and isinstance(data["items"], list):
245
+ return data["items"]
230
246
 
231
247
  return data
232
248
 
@@ -2,7 +2,7 @@
2
2
  Company: eXonware.com
3
3
  Author: Eng. Muhammad AlShehri
4
4
  Email: connect@exonware.com
5
- Version: 0.0.1.410
5
+ Version: 0.1.0.1
6
6
  Generation Date: November 2, 2025
7
7
 
8
8
  XML serialization - Extensible Markup Language.
@@ -427,6 +427,13 @@ class XmlSerializer(ASerialization):
427
427
  if isinstance(repr, bytes):
428
428
  repr = repr.decode('utf-8')
429
429
 
430
+ # Trim leading BOM/whitespace before XML declaration.
431
+ # Root cause: Some producers emit a blank line or BOM before '<?xml ...?>',
432
+ # which causes ExpatError: "XML or text declaration not at start of entity".
433
+ # Priority #2 (Usability): Be forgiving on harmless leading whitespace/BOM
434
+ # while keeping strict parsing for the actual XML content.
435
+ repr = repr.lstrip("\ufeff\r\n\t ")
436
+
430
437
  opts = options or {}
431
438
  root_name = opts.get('root', 'root')
432
439
  preserve_types = opts.get('preserve_types', False)
@@ -2,7 +2,7 @@
2
2
  Company: eXonware.com
3
3
  Author: Eng. Muhammad AlShehri
4
4
  Email: connect@exonware.com
5
- Version: 0.0.1.410
5
+ Version: 0.1.0.1
6
6
  Generation Date: November 2, 2025
7
7
 
8
8
  YAML serialization - Human-readable data serialization format.
@@ -13,7 +13,7 @@ Following I→A pattern:
13
13
  - Concrete: YamlSerializer
14
14
  """
15
15
 
16
- from typing import Any, Optional, Union
16
+ from typing import Any, Optional, Union, Iterator
17
17
  from pathlib import Path
18
18
 
19
19
  from ...base import ASerialization
@@ -88,6 +88,10 @@ class YamlSerializer(ASerialization):
88
88
  def supports_streaming(self) -> bool:
89
89
  return True # YAML supports multiple documents
90
90
 
91
+ @property
92
+ def supports_incremental_streaming(self) -> bool:
93
+ return True # YAML supports multi-document streaming
94
+
91
95
  @property
92
96
  def capabilities(self) -> CodecCapability:
93
97
  return CodecCapability.BIDIRECTIONAL
@@ -178,4 +182,50 @@ class YamlSerializer(ASerialization):
178
182
  format_name=self.format_name,
179
183
  original_error=e
180
184
  )
185
+
186
+ # ========================================================================
187
+ # INCREMENTAL STREAMING
188
+ # ========================================================================
189
+
190
+ def incremental_load(
191
+ self,
192
+ file_path: Union[str, Path],
193
+ **options: Any,
194
+ ) -> Iterator[Any]:
195
+ """
196
+ Stream YAML documents one at a time (supports multi-document YAML).
197
+
198
+ Uses PyYAML's safe_load_all() for true streaming without loading
199
+ entire file into memory.
200
+
201
+ Args:
202
+ file_path: Path to the YAML file
203
+ **options: YAML options (Loader, etc.)
204
+
205
+ Yields:
206
+ Each document from the YAML file one at a time
207
+
208
+ Raises:
209
+ FileNotFoundError: If file doesn't exist
210
+ SerializationError: If parsing fails
211
+ """
212
+ path = Path(file_path)
213
+ if not path.exists():
214
+ raise FileNotFoundError(f"File not found: {path}")
215
+
216
+ opts = options or {}
217
+ loader = opts.get('Loader', yaml.SafeLoader)
218
+
219
+ try:
220
+ with path.open("r", encoding="utf-8") as f:
221
+ # Use safe_load_all for multi-document streaming
222
+ for document in yaml.safe_load_all(f):
223
+ if document is not None: # Skip empty documents
224
+ yield document
225
+ except (yaml.YAMLError, UnicodeDecodeError) as e:
226
+ raise SerializationError(
227
+ f"Failed to incrementally load YAML: {e}",
228
+ format_name=self.format_name,
229
+ original_error=e
230
+ ) from e
181
231
 
@@ -0,0 +1,15 @@
1
+ """JSON Parser abstraction layer for pluggable performance optimizations.
2
+
3
+ This module provides a pluggable parser system that allows switching between
4
+ different JSON parsing implementations (stdlib, orjson, etc.) for performance.
5
+ """
6
+
7
+ from .base import IJsonParser
8
+ from .registry import get_parser, get_best_available_parser, register_parser
9
+
10
+ __all__ = [
11
+ 'IJsonParser',
12
+ 'get_parser',
13
+ 'get_best_available_parser',
14
+ 'register_parser',
15
+ ]
@@ -0,0 +1,59 @@
1
+ """Base JSON parser interface."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Union
5
+
6
+
7
+ class IJsonParser(ABC):
8
+ """Abstract JSON parser interface for pluggable implementations."""
9
+
10
+ @abstractmethod
11
+ def loads(self, s: Union[str, bytes]) -> Any:
12
+ """
13
+ Parse JSON string/bytes to Python object.
14
+
15
+ Args:
16
+ s: JSON string or bytes
17
+
18
+ Returns:
19
+ Parsed Python object
20
+ """
21
+ pass
22
+
23
+ @abstractmethod
24
+ def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
25
+ """
26
+ Serialize Python object to JSON.
27
+
28
+ Args:
29
+ obj: Python object to serialize
30
+ **kwargs: Serialization options (ensure_ascii, indent, etc.)
31
+
32
+ Returns:
33
+ JSON string or bytes
34
+ """
35
+ pass
36
+
37
+ @property
38
+ @abstractmethod
39
+ def parser_name(self) -> str:
40
+ """Parser identifier (e.g., 'standard', 'orjson')."""
41
+ pass
42
+
43
+ @property
44
+ @abstractmethod
45
+ def tier(self) -> int:
46
+ """
47
+ Performance tier:
48
+ 0 = stdlib (baseline)
49
+ 1 = orjson (3-4x faster)
50
+ 2 = Rust extension (5-7x faster, future)
51
+ 3 = Pure Rust core (6-8x faster, future)
52
+ """
53
+ pass
54
+
55
+ @property
56
+ @abstractmethod
57
+ def is_available(self) -> bool:
58
+ """Check if parser is available (dependencies installed)."""
59
+ pass
@@ -0,0 +1,61 @@
1
+ """Hybrid parser: msgspec for reading, orjson for writing (direct, no try/catch)."""
2
+
3
+ from typing import Any, Union
4
+ import msgspec # Direct import for reading
5
+ import orjson # Direct import for writing
6
+
7
+ from .base import IJsonParser
8
+
9
+
10
+ class HybridParser(IJsonParser):
11
+ """
12
+ Hybrid parser - fastest combination:
13
+ - msgspec for reading (1.36x faster than orjson)
14
+ - orjson for writing (2.27x faster than msgspec)
15
+
16
+ Direct imports (no try/catch) - assumes both are available.
17
+ """
18
+
19
+ @property
20
+ def parser_name(self) -> str:
21
+ return "hybrid"
22
+
23
+ @property
24
+ def tier(self) -> int:
25
+ return 1
26
+
27
+ @property
28
+ def is_available(self) -> bool:
29
+ return True # Assumes both msgspec and orjson are available
30
+
31
+ def loads(self, s: Union[str, bytes]) -> Any:
32
+ """Parse JSON using msgspec.json.decode() - fastest for reading."""
33
+ if isinstance(s, str):
34
+ s = s.encode("utf-8")
35
+ # msgspec.json.decode accepts bytes directly
36
+ return msgspec.json.decode(s)
37
+
38
+ def dumps(self, obj: Any, **kwargs) -> Union[str, bytes]:
39
+ """Serialize JSON using orjson.dumps() - fastest for writing."""
40
+ option = 0
41
+
42
+ # orjson options
43
+ if not kwargs.get("ensure_ascii", True):
44
+ # orjson always outputs UTF-8, so ensure_ascii=False is default
45
+ pass
46
+
47
+ # Handle indent (orjson doesn't support indent directly)
48
+ indent = kwargs.get("indent", None)
49
+ if indent:
50
+ # For pretty printing, use orjson.OPT_INDENT_2
51
+ option |= orjson.OPT_INDENT_2
52
+
53
+ # Sort keys
54
+ if kwargs.get("sort_keys", False):
55
+ option |= orjson.OPT_SORT_KEYS
56
+
57
+ result = orjson.dumps(obj, option=option)
58
+
59
+ # Return as bytes (orjson returns bytes)
60
+ # Caller can decode if string is needed
61
+ return result