exonware-xwsystem 0.0.1.411__py3-none-any.whl → 0.1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +2 -1
- exonware/conf.py +2 -2
- exonware/xwsystem/__init__.py +115 -43
- exonware/xwsystem/base.py +30 -0
- exonware/xwsystem/caching/__init__.py +39 -13
- exonware/xwsystem/caching/base.py +24 -6
- exonware/xwsystem/caching/bloom_cache.py +2 -2
- exonware/xwsystem/caching/cache_manager.py +2 -1
- exonware/xwsystem/caching/conditional.py +2 -2
- exonware/xwsystem/caching/contracts.py +85 -139
- exonware/xwsystem/caching/decorators.py +6 -19
- exonware/xwsystem/caching/defs.py +2 -1
- exonware/xwsystem/caching/disk_cache.py +2 -1
- exonware/xwsystem/caching/distributed.py +2 -1
- exonware/xwsystem/caching/errors.py +2 -1
- exonware/xwsystem/caching/events.py +110 -27
- exonware/xwsystem/caching/eviction_strategies.py +2 -2
- exonware/xwsystem/caching/external_caching_python.py +701 -0
- exonware/xwsystem/caching/facade.py +253 -0
- exonware/xwsystem/caching/factory.py +300 -0
- exonware/xwsystem/caching/fluent.py +14 -12
- exonware/xwsystem/caching/integrity.py +21 -6
- exonware/xwsystem/caching/lfu_cache.py +2 -1
- exonware/xwsystem/caching/lfu_optimized.py +18 -6
- exonware/xwsystem/caching/lru_cache.py +7 -4
- exonware/xwsystem/caching/memory_bounded.py +2 -2
- exonware/xwsystem/caching/metrics_exporter.py +2 -2
- exonware/xwsystem/caching/observable_cache.py +2 -2
- exonware/xwsystem/caching/pluggable_cache.py +2 -2
- exonware/xwsystem/caching/rate_limiter.py +2 -2
- exonware/xwsystem/caching/read_through.py +2 -2
- exonware/xwsystem/caching/secure_cache.py +81 -28
- exonware/xwsystem/caching/serializable.py +9 -7
- exonware/xwsystem/caching/stats.py +2 -2
- exonware/xwsystem/caching/tagging.py +2 -2
- exonware/xwsystem/caching/ttl_cache.py +4 -3
- exonware/xwsystem/caching/two_tier_cache.py +6 -3
- exonware/xwsystem/caching/utils.py +30 -12
- exonware/xwsystem/caching/validation.py +2 -2
- exonware/xwsystem/caching/warming.py +6 -3
- exonware/xwsystem/caching/write_behind.py +15 -6
- exonware/xwsystem/config/__init__.py +11 -17
- exonware/xwsystem/config/base.py +5 -5
- exonware/xwsystem/config/contracts.py +93 -153
- exonware/xwsystem/config/defaults.py +3 -2
- exonware/xwsystem/config/defs.py +3 -2
- exonware/xwsystem/config/errors.py +2 -5
- exonware/xwsystem/config/logging.py +12 -8
- exonware/xwsystem/config/logging_setup.py +3 -2
- exonware/xwsystem/config/performance.py +73 -391
- exonware/xwsystem/config/performance_modes.py +9 -8
- exonware/xwsystem/config/version_manager.py +1 -0
- exonware/xwsystem/config.py +27 -0
- exonware/xwsystem/console/__init__.py +53 -0
- exonware/xwsystem/console/base.py +133 -0
- exonware/xwsystem/console/cli/__init__.py +61 -0
- exonware/xwsystem/{cli → console/cli}/args.py +27 -24
- exonware/xwsystem/{cli → console/cli}/base.py +18 -87
- exonware/xwsystem/{cli → console/cli}/colors.py +15 -13
- exonware/xwsystem/console/cli/console.py +98 -0
- exonware/xwsystem/{cli → console/cli}/contracts.py +51 -69
- exonware/xwsystem/console/cli/defs.py +87 -0
- exonware/xwsystem/console/cli/encoding.py +69 -0
- exonware/xwsystem/{cli → console/cli}/errors.py +8 -3
- exonware/xwsystem/console/cli/event_logger.py +166 -0
- exonware/xwsystem/{cli → console/cli}/progress.py +25 -21
- exonware/xwsystem/{cli → console/cli}/prompts.py +3 -2
- exonware/xwsystem/{cli → console/cli}/tables.py +27 -24
- exonware/xwsystem/console/contracts.py +113 -0
- exonware/xwsystem/console/defs.py +154 -0
- exonware/xwsystem/console/errors.py +34 -0
- exonware/xwsystem/console/event_logger.py +385 -0
- exonware/xwsystem/console/writer.py +132 -0
- exonware/xwsystem/contracts.py +28 -0
- exonware/xwsystem/data_structures/__init__.py +23 -0
- exonware/xwsystem/data_structures/trie.py +34 -0
- exonware/xwsystem/data_structures/union_find.py +144 -0
- exonware/xwsystem/defs.py +17 -0
- exonware/xwsystem/errors.py +23 -0
- exonware/xwsystem/facade.py +62 -0
- exonware/xwsystem/http_client/__init__.py +22 -1
- exonware/xwsystem/http_client/advanced_client.py +8 -5
- exonware/xwsystem/http_client/base.py +3 -2
- exonware/xwsystem/http_client/client.py +7 -4
- exonware/xwsystem/http_client/contracts.py +42 -56
- exonware/xwsystem/http_client/defs.py +2 -1
- exonware/xwsystem/http_client/errors.py +2 -1
- exonware/xwsystem/http_client/facade.py +156 -0
- exonware/xwsystem/io/__init__.py +22 -3
- exonware/xwsystem/io/archive/__init__.py +8 -2
- exonware/xwsystem/io/archive/archive.py +1 -1
- exonware/xwsystem/io/archive/archive_files.py +4 -7
- exonware/xwsystem/io/archive/archivers.py +120 -10
- exonware/xwsystem/io/archive/base.py +4 -5
- exonware/xwsystem/io/archive/codec_integration.py +1 -2
- exonware/xwsystem/io/archive/compression.py +1 -2
- exonware/xwsystem/io/archive/facade.py +263 -0
- exonware/xwsystem/io/archive/formats/__init__.py +2 -3
- exonware/xwsystem/io/archive/formats/brotli_format.py +20 -7
- exonware/xwsystem/io/archive/formats/lz4_format.py +20 -7
- exonware/xwsystem/io/archive/formats/rar.py +11 -5
- exonware/xwsystem/io/archive/formats/sevenzip.py +12 -6
- exonware/xwsystem/io/archive/formats/squashfs_format.py +1 -2
- exonware/xwsystem/io/archive/formats/tar.py +52 -7
- exonware/xwsystem/io/archive/formats/wim_format.py +11 -5
- exonware/xwsystem/io/archive/formats/zip.py +1 -2
- exonware/xwsystem/io/archive/formats/zpaq_format.py +1 -2
- exonware/xwsystem/io/archive/formats/zstandard.py +20 -7
- exonware/xwsystem/io/base.py +119 -115
- exonware/xwsystem/io/codec/__init__.py +4 -2
- exonware/xwsystem/io/codec/base.py +19 -13
- exonware/xwsystem/io/codec/contracts.py +59 -2
- exonware/xwsystem/io/codec/registry.py +67 -21
- exonware/xwsystem/io/common/__init__.py +1 -1
- exonware/xwsystem/io/common/atomic.py +29 -16
- exonware/xwsystem/io/common/base.py +11 -10
- exonware/xwsystem/io/common/lock.py +6 -5
- exonware/xwsystem/io/common/path_manager.py +2 -1
- exonware/xwsystem/io/common/watcher.py +1 -2
- exonware/xwsystem/io/contracts.py +301 -433
- exonware/xwsystem/io/contracts_1.py +1180 -0
- exonware/xwsystem/io/data_operations.py +279 -14
- exonware/xwsystem/io/defs.py +4 -3
- exonware/xwsystem/io/errors.py +3 -2
- exonware/xwsystem/io/facade.py +87 -61
- exonware/xwsystem/io/file/__init__.py +1 -1
- exonware/xwsystem/io/file/base.py +8 -9
- exonware/xwsystem/io/file/conversion.py +2 -3
- exonware/xwsystem/io/file/file.py +61 -18
- exonware/xwsystem/io/file/paged_source.py +8 -8
- exonware/xwsystem/io/file/paging/__init__.py +1 -2
- exonware/xwsystem/io/file/paging/byte_paging.py +4 -5
- exonware/xwsystem/io/file/paging/line_paging.py +2 -3
- exonware/xwsystem/io/file/paging/record_paging.py +2 -3
- exonware/xwsystem/io/file/paging/registry.py +1 -2
- exonware/xwsystem/io/file/source.py +13 -17
- exonware/xwsystem/io/filesystem/__init__.py +1 -1
- exonware/xwsystem/io/filesystem/base.py +1 -2
- exonware/xwsystem/io/filesystem/local.py +3 -4
- exonware/xwsystem/io/folder/__init__.py +1 -1
- exonware/xwsystem/io/folder/base.py +1 -2
- exonware/xwsystem/io/folder/folder.py +16 -7
- exonware/xwsystem/io/indexing/__init__.py +14 -0
- exonware/xwsystem/io/indexing/facade.py +443 -0
- exonware/xwsystem/io/path_parser.py +98 -0
- exonware/xwsystem/io/serialization/__init__.py +21 -3
- exonware/xwsystem/io/serialization/auto_serializer.py +146 -20
- exonware/xwsystem/io/serialization/base.py +84 -34
- exonware/xwsystem/io/serialization/contracts.py +50 -73
- exonware/xwsystem/io/serialization/defs.py +2 -1
- exonware/xwsystem/io/serialization/errors.py +2 -1
- exonware/xwsystem/io/serialization/flyweight.py +154 -7
- exonware/xwsystem/io/serialization/format_detector.py +15 -14
- exonware/xwsystem/io/serialization/formats/__init__.py +8 -5
- exonware/xwsystem/io/serialization/formats/binary/bson.py +15 -6
- exonware/xwsystem/io/serialization/formats/binary/cbor.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/marshal.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/msgpack.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/pickle.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/plistlib.py +5 -5
- exonware/xwsystem/io/serialization/formats/database/dbm.py +7 -7
- exonware/xwsystem/io/serialization/formats/database/shelve.py +7 -7
- exonware/xwsystem/io/serialization/formats/database/sqlite3.py +7 -7
- exonware/xwsystem/io/serialization/formats/tabular/__init__.py +27 -0
- exonware/xwsystem/io/serialization/formats/tabular/base.py +89 -0
- exonware/xwsystem/io/serialization/formats/tabular/csv.py +319 -0
- exonware/xwsystem/io/serialization/formats/tabular/df.py +249 -0
- exonware/xwsystem/io/serialization/formats/tabular/excel.py +291 -0
- exonware/xwsystem/io/serialization/formats/tabular/googlesheets.py +374 -0
- exonware/xwsystem/io/serialization/formats/text/__init__.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/append_only_log.py +199 -0
- exonware/xwsystem/io/serialization/formats/text/configparser.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/csv.py +7 -5
- exonware/xwsystem/io/serialization/formats/text/formdata.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/json.py +65 -33
- exonware/xwsystem/io/serialization/formats/text/json5.py +8 -4
- exonware/xwsystem/io/serialization/formats/text/jsonlines.py +113 -25
- exonware/xwsystem/io/serialization/formats/text/multipart.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/toml.py +8 -6
- exonware/xwsystem/io/serialization/formats/text/xml.py +25 -20
- exonware/xwsystem/io/serialization/formats/text/yaml.py +8 -6
- exonware/xwsystem/io/serialization/parsers/__init__.py +16 -0
- exonware/xwsystem/io/serialization/parsers/base.py +60 -0
- exonware/xwsystem/io/serialization/parsers/hybrid_parser.py +62 -0
- exonware/xwsystem/io/serialization/parsers/msgspec_parser.py +48 -0
- exonware/xwsystem/io/serialization/parsers/orjson_direct_parser.py +54 -0
- exonware/xwsystem/io/serialization/parsers/orjson_parser.py +62 -0
- exonware/xwsystem/io/serialization/parsers/pysimdjson_parser.py +55 -0
- exonware/xwsystem/io/serialization/parsers/rapidjson_parser.py +53 -0
- exonware/xwsystem/io/serialization/parsers/registry.py +91 -0
- exonware/xwsystem/io/serialization/parsers/standard.py +44 -0
- exonware/xwsystem/io/serialization/parsers/ujson_parser.py +53 -0
- exonware/xwsystem/io/serialization/registry.py +4 -4
- exonware/xwsystem/io/serialization/serializer.py +168 -79
- exonware/xwsystem/io/serialization/universal_options.py +367 -0
- exonware/xwsystem/io/serialization/utils/__init__.py +1 -2
- exonware/xwsystem/io/serialization/utils/path_ops.py +5 -6
- exonware/xwsystem/io/source_reader.py +223 -0
- exonware/xwsystem/io/stream/__init__.py +1 -1
- exonware/xwsystem/io/stream/async_operations.py +61 -14
- exonware/xwsystem/io/stream/base.py +1 -2
- exonware/xwsystem/io/stream/codec_io.py +6 -7
- exonware/xwsystem/ipc/__init__.py +1 -0
- exonware/xwsystem/ipc/async_fabric.py +4 -4
- exonware/xwsystem/ipc/base.py +6 -5
- exonware/xwsystem/ipc/contracts.py +41 -66
- exonware/xwsystem/ipc/defs.py +2 -1
- exonware/xwsystem/ipc/errors.py +2 -1
- exonware/xwsystem/ipc/message_queue.py +5 -2
- exonware/xwsystem/ipc/pipes.py +70 -34
- exonware/xwsystem/ipc/process_manager.py +7 -5
- exonware/xwsystem/ipc/process_pool.py +6 -5
- exonware/xwsystem/ipc/shared_memory.py +64 -11
- exonware/xwsystem/monitoring/__init__.py +7 -0
- exonware/xwsystem/monitoring/base.py +11 -8
- exonware/xwsystem/monitoring/contracts.py +86 -144
- exonware/xwsystem/monitoring/defs.py +2 -1
- exonware/xwsystem/monitoring/error_recovery.py +16 -3
- exonware/xwsystem/monitoring/errors.py +2 -1
- exonware/xwsystem/monitoring/facade.py +183 -0
- exonware/xwsystem/monitoring/memory_monitor.py +1 -0
- exonware/xwsystem/monitoring/metrics.py +1 -0
- exonware/xwsystem/monitoring/performance_manager_generic.py +7 -7
- exonware/xwsystem/monitoring/performance_monitor.py +1 -0
- exonware/xwsystem/monitoring/performance_validator.py +1 -0
- exonware/xwsystem/monitoring/system_monitor.py +6 -5
- exonware/xwsystem/monitoring/tracing.py +18 -16
- exonware/xwsystem/monitoring/tracker.py +2 -1
- exonware/xwsystem/operations/__init__.py +5 -50
- exonware/xwsystem/operations/base.py +3 -44
- exonware/xwsystem/operations/contracts.py +25 -15
- exonware/xwsystem/operations/defs.py +1 -1
- exonware/xwsystem/operations/diff.py +5 -4
- exonware/xwsystem/operations/errors.py +1 -1
- exonware/xwsystem/operations/merge.py +6 -4
- exonware/xwsystem/operations/patch.py +5 -4
- exonware/xwsystem/patterns/__init__.py +1 -0
- exonware/xwsystem/patterns/base.py +2 -1
- exonware/xwsystem/patterns/context_manager.py +2 -1
- exonware/xwsystem/patterns/contracts.py +215 -256
- exonware/xwsystem/patterns/defs.py +2 -1
- exonware/xwsystem/patterns/dynamic_facade.py +1 -0
- exonware/xwsystem/patterns/errors.py +2 -4
- exonware/xwsystem/patterns/handler_factory.py +2 -3
- exonware/xwsystem/patterns/import_registry.py +1 -0
- exonware/xwsystem/patterns/object_pool.py +1 -0
- exonware/xwsystem/patterns/registry.py +4 -43
- exonware/xwsystem/plugins/__init__.py +2 -1
- exonware/xwsystem/plugins/base.py +6 -5
- exonware/xwsystem/plugins/contracts.py +94 -158
- exonware/xwsystem/plugins/defs.py +2 -1
- exonware/xwsystem/plugins/errors.py +2 -1
- exonware/xwsystem/py.typed +3 -0
- exonware/xwsystem/query/__init__.py +36 -0
- exonware/xwsystem/query/contracts.py +56 -0
- exonware/xwsystem/query/errors.py +22 -0
- exonware/xwsystem/query/registry.py +128 -0
- exonware/xwsystem/runtime/__init__.py +2 -1
- exonware/xwsystem/runtime/base.py +4 -3
- exonware/xwsystem/runtime/contracts.py +39 -60
- exonware/xwsystem/runtime/defs.py +2 -1
- exonware/xwsystem/runtime/env.py +11 -9
- exonware/xwsystem/runtime/errors.py +2 -1
- exonware/xwsystem/runtime/reflection.py +3 -2
- exonware/xwsystem/security/__init__.py +68 -11
- exonware/xwsystem/security/audit.py +167 -0
- exonware/xwsystem/security/base.py +121 -24
- exonware/xwsystem/security/contracts.py +91 -146
- exonware/xwsystem/security/crypto.py +17 -16
- exonware/xwsystem/security/defs.py +2 -1
- exonware/xwsystem/security/errors.py +2 -1
- exonware/xwsystem/security/facade.py +321 -0
- exonware/xwsystem/security/file_security.py +330 -0
- exonware/xwsystem/security/hazmat.py +11 -8
- exonware/xwsystem/security/monitor.py +372 -0
- exonware/xwsystem/security/path_validator.py +140 -18
- exonware/xwsystem/security/policy.py +357 -0
- exonware/xwsystem/security/resource_limits.py +1 -0
- exonware/xwsystem/security/validator.py +455 -0
- exonware/xwsystem/shared/__init__.py +14 -1
- exonware/xwsystem/shared/base.py +285 -2
- exonware/xwsystem/shared/contracts.py +415 -126
- exonware/xwsystem/shared/defs.py +2 -1
- exonware/xwsystem/shared/errors.py +2 -2
- exonware/xwsystem/shared/xwobject.py +316 -0
- exonware/xwsystem/structures/__init__.py +1 -0
- exonware/xwsystem/structures/base.py +3 -2
- exonware/xwsystem/structures/circular_detector.py +15 -14
- exonware/xwsystem/structures/contracts.py +53 -76
- exonware/xwsystem/structures/defs.py +2 -1
- exonware/xwsystem/structures/errors.py +2 -1
- exonware/xwsystem/structures/tree_walker.py +2 -1
- exonware/xwsystem/threading/__init__.py +21 -4
- exonware/xwsystem/threading/async_primitives.py +6 -5
- exonware/xwsystem/threading/base.py +3 -2
- exonware/xwsystem/threading/contracts.py +87 -143
- exonware/xwsystem/threading/defs.py +2 -1
- exonware/xwsystem/threading/errors.py +2 -1
- exonware/xwsystem/threading/facade.py +175 -0
- exonware/xwsystem/threading/locks.py +1 -0
- exonware/xwsystem/threading/safe_factory.py +1 -0
- exonware/xwsystem/utils/__init__.py +40 -0
- exonware/xwsystem/utils/base.py +22 -21
- exonware/xwsystem/utils/contracts.py +50 -73
- exonware/xwsystem/utils/dt/__init__.py +19 -3
- exonware/xwsystem/utils/dt/base.py +5 -4
- exonware/xwsystem/utils/dt/contracts.py +22 -29
- exonware/xwsystem/utils/dt/defs.py +2 -1
- exonware/xwsystem/utils/dt/errors.py +2 -5
- exonware/xwsystem/utils/dt/formatting.py +88 -2
- exonware/xwsystem/utils/dt/humanize.py +10 -9
- exonware/xwsystem/utils/dt/parsing.py +56 -5
- exonware/xwsystem/utils/dt/timezone_utils.py +2 -24
- exonware/xwsystem/utils/errors.py +2 -4
- exonware/xwsystem/utils/paths.py +1 -0
- exonware/xwsystem/utils/string.py +49 -0
- exonware/xwsystem/utils/test_runner.py +139 -480
- exonware/xwsystem/utils/utils_contracts.py +2 -1
- exonware/xwsystem/utils/web.py +110 -0
- exonware/xwsystem/validation/__init__.py +25 -1
- exonware/xwsystem/validation/base.py +6 -5
- exonware/xwsystem/validation/contracts.py +29 -41
- exonware/xwsystem/validation/data_validator.py +1 -0
- exonware/xwsystem/validation/declarative.py +11 -8
- exonware/xwsystem/validation/defs.py +2 -1
- exonware/xwsystem/validation/errors.py +2 -1
- exonware/xwsystem/validation/facade.py +198 -0
- exonware/xwsystem/validation/fluent_validator.py +22 -19
- exonware/xwsystem/validation/schema_discovery.py +210 -0
- exonware/xwsystem/validation/type_safety.py +2 -1
- exonware/xwsystem/version.py +4 -4
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/METADATA +71 -4
- exonware_xwsystem-0.1.0.3.dist-info/RECORD +337 -0
- exonware/xwsystem/caching/USAGE_GUIDE.md +0 -779
- exonware/xwsystem/cli/__init__.py +0 -43
- exonware/xwsystem/cli/console.py +0 -113
- exonware/xwsystem/cli/defs.py +0 -134
- exonware/xwsystem/conf.py +0 -44
- exonware/xwsystem/security/auth.py +0 -484
- exonware_xwsystem-0.0.1.411.dist-info/RECORD +0 -274
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/WHEEL +0 -0
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,7 +16,7 @@ lazy, paged, and atomic access features without re-implementing I/O logic.
|
|
|
16
16
|
Company: eXonware.com
|
|
17
17
|
Author: Eng. Muhammad AlShehri
|
|
18
18
|
Email: connect@exonware.com
|
|
19
|
-
Version: 0.0.
|
|
19
|
+
Version: 0.1.0.3
|
|
20
20
|
Generation Date: 15-Dec-2025
|
|
21
21
|
"""
|
|
22
22
|
|
|
@@ -28,16 +28,85 @@ from typing import Any, Callable, Optional
|
|
|
28
28
|
from abc import ABC, abstractmethod
|
|
29
29
|
import json
|
|
30
30
|
import os
|
|
31
|
+
import platform
|
|
32
|
+
import sys
|
|
31
33
|
import tempfile
|
|
34
|
+
import multiprocessing as mp
|
|
35
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
32
36
|
|
|
33
37
|
from .serialization.auto_serializer import AutoSerializer
|
|
34
|
-
from
|
|
38
|
+
from exonware.xwsystem.config.logging_setup import get_logger
|
|
39
|
+
from exonware.xwsystem.config.performance import get_performance_config
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
logger = get_logger(__name__)
|
|
38
43
|
|
|
39
44
|
|
|
40
45
|
JsonMatchFn = Callable[[Any], bool]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _process_chunk_worker(args: tuple[int, int, int, str, str, str | None, int | None, bool]) -> tuple[list[int] | None, dict[str, int], int]:
|
|
49
|
+
"""
|
|
50
|
+
Process a single chunk (runs in worker process).
|
|
51
|
+
|
|
52
|
+
This is a module-level function to make it picklable for multiprocessing.
|
|
53
|
+
"""
|
|
54
|
+
chunk_id, start_offset, end_offset, file_path_str, encoding, id_field_arg, max_id_index_arg, build_line_offsets_arg = args
|
|
55
|
+
chunk_line_offsets: list[int] | None = [] if build_line_offsets_arg else None
|
|
56
|
+
chunk_id_index: dict[str, int] = {}
|
|
57
|
+
lines_processed = 0
|
|
58
|
+
|
|
59
|
+
# Import parser in worker process (can't pickle serializer)
|
|
60
|
+
from exonware.xwsystem.io.serialization.parsers.registry import get_best_available_parser
|
|
61
|
+
parser = get_best_available_parser()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
with open(file_path_str, "rb") as f:
|
|
65
|
+
f.seek(start_offset)
|
|
66
|
+
current_offset = start_offset
|
|
67
|
+
|
|
68
|
+
while current_offset < end_offset:
|
|
69
|
+
line_start = current_offset
|
|
70
|
+
line = f.readline()
|
|
71
|
+
|
|
72
|
+
if not line:
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
current_offset = f.tell()
|
|
76
|
+
|
|
77
|
+
# Skip if we've gone past the end
|
|
78
|
+
if line_start >= end_offset:
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
# Optimize: Check empty lines early (match example code pattern)
|
|
82
|
+
raw = line.strip()
|
|
83
|
+
if not raw:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Track line offset if requested, calculate line_idx once
|
|
87
|
+
if build_line_offsets_arg:
|
|
88
|
+
chunk_line_offsets.append(line_start)
|
|
89
|
+
line_idx = len(chunk_line_offsets) - 1
|
|
90
|
+
else:
|
|
91
|
+
line_idx = lines_processed
|
|
92
|
+
|
|
93
|
+
if id_field_arg and (max_id_index_arg is None or len(chunk_id_index) < max_id_index_arg):
|
|
94
|
+
try:
|
|
95
|
+
# Parser accepts bytes directly (hybrid parser handles it)
|
|
96
|
+
obj = parser.loads(raw)
|
|
97
|
+
if isinstance(obj, dict) and id_field_arg in obj:
|
|
98
|
+
id_val = str(obj[id_field_arg])
|
|
99
|
+
chunk_id_index[id_val] = line_idx
|
|
100
|
+
except Exception:
|
|
101
|
+
# Skip invalid lines (fallback indexing)
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
lines_processed += 1
|
|
105
|
+
except Exception as e:
|
|
106
|
+
# Can't use logger in worker process, just pass
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
return (chunk_line_offsets, chunk_id_index, lines_processed)
|
|
41
110
|
JsonUpdateFn = Callable[[Any], Any]
|
|
42
111
|
|
|
43
112
|
|
|
@@ -283,7 +352,7 @@ class NDJSONDataOperations(ADataOperations):
|
|
|
283
352
|
try:
|
|
284
353
|
tmp_path.unlink()
|
|
285
354
|
except OSError:
|
|
286
|
-
#
|
|
355
|
+
# Cleanup attempt; do not mask original error.
|
|
287
356
|
logger.debug("Failed to cleanup temp file %s", tmp_path)
|
|
288
357
|
|
|
289
358
|
# ------------------------------------------------------------------
|
|
@@ -297,22 +366,75 @@ class NDJSONDataOperations(ADataOperations):
|
|
|
297
366
|
encoding: str = "utf-8",
|
|
298
367
|
id_field: str | None = None,
|
|
299
368
|
max_id_index: int | None = None,
|
|
369
|
+
use_parallel: bool | None = None,
|
|
370
|
+
num_workers: int | None = None,
|
|
371
|
+
chunk_size_mb: int = 100,
|
|
372
|
+
build_line_offsets: bool = True,
|
|
300
373
|
) -> JsonIndex:
|
|
301
374
|
"""
|
|
302
375
|
One-time full scan to build an index:
|
|
303
376
|
- line_offsets: byte offset of each JSON line
|
|
304
377
|
- optional id_index: obj[id_field] -> line_number
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
file_path: Path to JSONL file
|
|
381
|
+
encoding: File encoding (default: utf-8)
|
|
382
|
+
id_field: Optional field name to build id_index
|
|
383
|
+
max_id_index: Maximum entries in id_index (None = unlimited)
|
|
384
|
+
use_parallel: Enable parallel processing (None = auto-detect based on file size)
|
|
385
|
+
num_workers: Number of worker processes (None = CPU count)
|
|
386
|
+
chunk_size_mb: Chunk size in MB for parallel processing (default: 100MB)
|
|
387
|
+
build_line_offsets: If True, build line_offsets list (default: True, set False for faster id_index-only builds)
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
JsonIndex with line_offsets (if build_line_offsets=True) and optional id_index
|
|
305
391
|
"""
|
|
306
392
|
target = Path(file_path)
|
|
307
393
|
if not target.exists():
|
|
308
394
|
raise FileNotFoundError(str(target))
|
|
309
395
|
|
|
310
|
-
|
|
396
|
+
# Auto-detect parallel based on config
|
|
397
|
+
perf_config = get_performance_config()
|
|
398
|
+
if use_parallel is None:
|
|
399
|
+
if not perf_config.enable_parallel_index:
|
|
400
|
+
use_parallel = False
|
|
401
|
+
else:
|
|
402
|
+
file_size_mb = target.stat().st_size / 1_048_576 # 1024 * 1024
|
|
403
|
+
use_parallel = file_size_mb > perf_config.parallel_index_threshold_mb
|
|
404
|
+
|
|
405
|
+
# Use config defaults for workers and chunk size
|
|
406
|
+
if num_workers is None:
|
|
407
|
+
num_workers = perf_config.parallel_index_workers
|
|
408
|
+
if chunk_size_mb == 100: # Only use default if not explicitly set
|
|
409
|
+
chunk_size_mb = perf_config.parallel_index_chunk_size_mb
|
|
410
|
+
|
|
411
|
+
# Use parallel processing if enabled and file is large enough
|
|
412
|
+
if use_parallel:
|
|
413
|
+
try:
|
|
414
|
+
return self._build_index_parallel(
|
|
415
|
+
target,
|
|
416
|
+
encoding=encoding,
|
|
417
|
+
id_field=id_field,
|
|
418
|
+
max_id_index=max_id_index,
|
|
419
|
+
num_workers=num_workers,
|
|
420
|
+
chunk_size_mb=chunk_size_mb,
|
|
421
|
+
build_line_offsets=build_line_offsets,
|
|
422
|
+
)
|
|
423
|
+
except Exception as e:
|
|
424
|
+
logger.warning(f"Parallel index building failed, falling back to single-threaded: {e}")
|
|
425
|
+
# Fall through to single-threaded
|
|
426
|
+
|
|
427
|
+
# Single-threaded implementation (optimized - matches example code exactly)
|
|
428
|
+
line_offsets: list[int] | None = [] if build_line_offsets else None
|
|
311
429
|
id_index: dict[str, int] | None = {} if id_field else None
|
|
312
430
|
|
|
313
431
|
size = target.stat().st_size
|
|
314
432
|
mtime = target.stat().st_mtime
|
|
315
433
|
|
|
434
|
+
# Cache parser instance (matches example code pattern)
|
|
435
|
+
from exonware.xwsystem.io.serialization.parsers.registry import get_best_available_parser
|
|
436
|
+
parser = get_best_available_parser()
|
|
437
|
+
|
|
316
438
|
offset = 0
|
|
317
439
|
with target.open("rb") as f:
|
|
318
440
|
line_no = 0
|
|
@@ -320,28 +442,173 @@ class NDJSONDataOperations(ADataOperations):
|
|
|
320
442
|
line = f.readline()
|
|
321
443
|
if not line:
|
|
322
444
|
break
|
|
323
|
-
|
|
445
|
+
if build_line_offsets:
|
|
446
|
+
line_offsets.append(offset)
|
|
324
447
|
|
|
325
448
|
if id_index is not None:
|
|
326
449
|
try:
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
)
|
|
450
|
+
# Match example code exactly: strip bytes, parse directly
|
|
451
|
+
raw = line.strip()
|
|
452
|
+
if raw:
|
|
453
|
+
# Parser accepts bytes directly (hybrid parser handles it)
|
|
454
|
+
obj = parser.loads(raw)
|
|
332
455
|
if isinstance(obj, dict) and id_field in obj:
|
|
333
456
|
id_val = str(obj[id_field])
|
|
334
457
|
if max_id_index is None or len(id_index) < max_id_index:
|
|
335
458
|
id_index[id_val] = line_no
|
|
336
459
|
except Exception:
|
|
337
|
-
# Index
|
|
338
|
-
|
|
460
|
+
# Index is robust to bad lines.
|
|
461
|
+
# Skip invalid lines silently for performance
|
|
462
|
+
pass
|
|
339
463
|
|
|
340
464
|
offset += len(line)
|
|
341
465
|
line_no += 1
|
|
342
466
|
|
|
343
467
|
meta = JsonIndexMeta(path=str(target), size=size, mtime=mtime, version=1)
|
|
344
468
|
return JsonIndex(meta=meta, line_offsets=line_offsets, id_index=id_index)
|
|
469
|
+
|
|
470
|
+
def _build_index_parallel(
|
|
471
|
+
self,
|
|
472
|
+
file_path: Path,
|
|
473
|
+
*,
|
|
474
|
+
encoding: str = "utf-8",
|
|
475
|
+
id_field: str | None = None,
|
|
476
|
+
max_id_index: int | None = None,
|
|
477
|
+
num_workers: int | None = None,
|
|
478
|
+
chunk_size_mb: int = 100,
|
|
479
|
+
build_line_offsets: bool = True,
|
|
480
|
+
) -> JsonIndex:
|
|
481
|
+
"""
|
|
482
|
+
Parallel index building using multiple CPU cores.
|
|
483
|
+
|
|
484
|
+
This is an internal method called by build_index() when use_parallel=True.
|
|
485
|
+
"""
|
|
486
|
+
if num_workers is None:
|
|
487
|
+
# Optimize: Simple formula - 1 worker per 10MB (capped at ProcessPoolExecutor limit)
|
|
488
|
+
# ProcessPoolExecutor max_workers limit is 61 on Windows
|
|
489
|
+
file_size_mb = file_path.stat().st_size / 1_048_576 # 1024 * 1024
|
|
490
|
+
calculated_workers = int(file_size_mb / 10) # 1 worker per 10MB
|
|
491
|
+
cpu_count = mp.cpu_count()
|
|
492
|
+
# Windows has a hard limit of 61 workers for ProcessPoolExecutor
|
|
493
|
+
# Unix/Linux systems have no such limit
|
|
494
|
+
# Use Python's native platform module for cross-platform detection
|
|
495
|
+
if platform.system() == 'Windows':
|
|
496
|
+
num_workers = max(cpu_count, min(61, calculated_workers))
|
|
497
|
+
else:
|
|
498
|
+
num_workers = max(cpu_count, calculated_workers)
|
|
499
|
+
|
|
500
|
+
file_size = file_path.stat().st_size
|
|
501
|
+
chunk_size_bytes = chunk_size_mb * 1_048_576 # 1024 * 1024
|
|
502
|
+
|
|
503
|
+
# If file is too small, fall back to single-threaded
|
|
504
|
+
if file_size < chunk_size_bytes * 2:
|
|
505
|
+
raise ValueError("File too small for parallel processing")
|
|
506
|
+
|
|
507
|
+
# Split file into chunks
|
|
508
|
+
chunks = []
|
|
509
|
+
current_offset = 0
|
|
510
|
+
chunk_id = 0
|
|
511
|
+
|
|
512
|
+
while current_offset < file_size:
|
|
513
|
+
chunk_end = min(current_offset + chunk_size_bytes, file_size)
|
|
514
|
+
chunks.append((chunk_id, current_offset, chunk_end))
|
|
515
|
+
current_offset = chunk_end
|
|
516
|
+
chunk_id += 1
|
|
517
|
+
|
|
518
|
+
# Limit number of chunks
|
|
519
|
+
if len(chunks) > num_workers * 2:
|
|
520
|
+
merged_chunks = []
|
|
521
|
+
for i in range(0, len(chunks), max(1, len(chunks) // num_workers)):
|
|
522
|
+
chunk_group = chunks[i:i + max(1, len(chunks) // num_workers)]
|
|
523
|
+
if chunk_group:
|
|
524
|
+
merged_chunks.append((
|
|
525
|
+
chunk_group[0][0],
|
|
526
|
+
chunk_group[0][1],
|
|
527
|
+
chunk_group[-1][2]
|
|
528
|
+
))
|
|
529
|
+
chunks = merged_chunks
|
|
530
|
+
|
|
531
|
+
logger.debug(f"Processing {len(chunks)} chunks with {num_workers} workers")
|
|
532
|
+
|
|
533
|
+
# Process chunks in parallel
|
|
534
|
+
line_offsets: list[int] | None = [] if build_line_offsets else None
|
|
535
|
+
id_index: dict[str, int] | None = {} if id_field else None
|
|
536
|
+
|
|
537
|
+
# Prepare arguments for worker processes
|
|
538
|
+
chunk_args = [
|
|
539
|
+
(chunk[0], chunk[1], chunk[2], str(file_path), encoding, id_field, max_id_index, build_line_offsets)
|
|
540
|
+
for chunk in chunks
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
# Execute parallel processing
|
|
544
|
+
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
|
545
|
+
futures = {
|
|
546
|
+
executor.submit(_process_chunk_worker, args): args[0]
|
|
547
|
+
for args in chunk_args
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
# Optimize: Use dict for O(1) lookup instead of sorting
|
|
551
|
+
chunk_results_dict: dict[int, tuple[list[int], dict[str, int]]] = {}
|
|
552
|
+
for future in as_completed(futures):
|
|
553
|
+
try:
|
|
554
|
+
chunk_offsets, chunk_ids, _ = future.result()
|
|
555
|
+
chunk_id = futures[future]
|
|
556
|
+
chunk_results_dict[chunk_id] = (chunk_offsets, chunk_ids)
|
|
557
|
+
except Exception as e:
|
|
558
|
+
logger.warning(f"Chunk processing failed: {e}")
|
|
559
|
+
raise
|
|
560
|
+
|
|
561
|
+
# Merge results (process in order by chunk_id)
|
|
562
|
+
if build_line_offsets:
|
|
563
|
+
# Optimize: Pre-calculate total size for memory allocation
|
|
564
|
+
total_offsets = sum(len(offsets) if offsets else 0 for offsets, _ in chunk_results_dict.values())
|
|
565
|
+
if total_offsets > 0:
|
|
566
|
+
# Pre-allocate list for performance
|
|
567
|
+
line_offsets = [0] * total_offsets
|
|
568
|
+
current_idx = 0
|
|
569
|
+
else:
|
|
570
|
+
line_offsets = []
|
|
571
|
+
current_idx = 0
|
|
572
|
+
else:
|
|
573
|
+
current_idx = 0
|
|
574
|
+
|
|
575
|
+
for chunk_id in sorted(chunk_results_dict.keys()):
|
|
576
|
+
chunk_offsets, chunk_ids = chunk_results_dict[chunk_id]
|
|
577
|
+
|
|
578
|
+
# Merge line_offsets if building them
|
|
579
|
+
if build_line_offsets and chunk_offsets:
|
|
580
|
+
# Optimize: Use slice assignment for faster extend
|
|
581
|
+
if total_offsets > 0:
|
|
582
|
+
line_offsets[current_idx:current_idx + len(chunk_offsets)] = chunk_offsets
|
|
583
|
+
base_line = current_idx
|
|
584
|
+
current_idx += len(chunk_offsets)
|
|
585
|
+
else:
|
|
586
|
+
base_line = len(line_offsets)
|
|
587
|
+
line_offsets.extend(chunk_offsets)
|
|
588
|
+
else:
|
|
589
|
+
# Calculate base_line for id_index even without line_offsets
|
|
590
|
+
base_line = current_idx
|
|
591
|
+
if chunk_offsets:
|
|
592
|
+
current_idx += len(chunk_offsets)
|
|
593
|
+
else:
|
|
594
|
+
# Estimate: assume average line size if we don't have offsets
|
|
595
|
+
current_idx += 300 # Rough estimate
|
|
596
|
+
|
|
597
|
+
if id_index is not None and chunk_ids:
|
|
598
|
+
# Optimize: Batch update with dict.update() if no limit
|
|
599
|
+
if max_id_index is None:
|
|
600
|
+
# Fast path: no limit, use dict comprehension + update
|
|
601
|
+
id_index.update({id_val: base_line + rel_line for id_val, rel_line in chunk_ids.items()})
|
|
602
|
+
else:
|
|
603
|
+
# Slower path: check limit per item
|
|
604
|
+
for id_val, rel_line in chunk_ids.items():
|
|
605
|
+
if len(id_index) < max_id_index:
|
|
606
|
+
id_index[id_val] = base_line + rel_line
|
|
607
|
+
|
|
608
|
+
size = file_path.stat().st_size
|
|
609
|
+
mtime = file_path.stat().st_mtime
|
|
610
|
+
meta = JsonIndexMeta(path=str(file_path), size=size, mtime=mtime, version=1)
|
|
611
|
+
return JsonIndex(meta=meta, line_offsets=line_offsets, id_index=id_index)
|
|
345
612
|
|
|
346
613
|
def indexed_get_by_line(
|
|
347
614
|
self,
|
|
@@ -476,5 +743,3 @@ __all__ = [
|
|
|
476
743
|
"ADataOperations",
|
|
477
744
|
"NDJSONDataOperations",
|
|
478
745
|
]
|
|
479
|
-
|
|
480
|
-
|
exonware/xwsystem/io/defs.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
#exonware/xwsystem/src/exonware/xwsystem/io/defs.py
|
|
1
2
|
"""
|
|
2
3
|
Company: eXonware.com
|
|
3
4
|
Author: Eng. Muhammad AlShehri
|
|
4
5
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.
|
|
6
|
+
Version: 0.1.0.3
|
|
6
7
|
Generation Date: 30-Oct-2025
|
|
7
8
|
|
|
8
9
|
IO module definitions - ALL enums and types in ONE place.
|
|
@@ -117,7 +118,7 @@ class PagingMode(Enum):
|
|
|
117
118
|
LINE = "line" # Page by line counts
|
|
118
119
|
RECORD = "record" # Page by record boundaries
|
|
119
120
|
SMART = "smart" # Adaptive paging
|
|
120
|
-
AUTO = "auto" # Auto-detect
|
|
121
|
+
AUTO = "auto" # Auto-detect strategy
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
# From file
|
|
@@ -221,7 +222,7 @@ class CompressionLevel(Enum):
|
|
|
221
222
|
"""Compression level presets."""
|
|
222
223
|
FAST = 1 # Fastest compression
|
|
223
224
|
BALANCED = 6 # Balance speed/size
|
|
224
|
-
BEST = 9 #
|
|
225
|
+
BEST = 9 # Maximum compression
|
|
225
226
|
|
|
226
227
|
|
|
227
228
|
|
exonware/xwsystem/io/errors.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
#exonware/xwsystem/src/exonware/xwsystem/io/errors.py
|
|
1
2
|
"""
|
|
2
3
|
Company: eXonware.com
|
|
3
4
|
Author: Eng. Muhammad AlShehri
|
|
4
5
|
Email: connect@exonware.com
|
|
5
|
-
Version: 0.0.
|
|
6
|
+
Version: 0.1.0.3
|
|
6
7
|
Generation Date: 30-Oct-2025
|
|
7
8
|
|
|
8
9
|
IO module errors - ALL exceptions in ONE place.
|
|
@@ -10,7 +11,7 @@ IO module errors - ALL exceptions in ONE place.
|
|
|
10
11
|
Consolidated from all submodules for maintainability.
|
|
11
12
|
"""
|
|
12
13
|
|
|
13
|
-
from typing import Any, Optional
|
|
14
|
+
from typing import Any, Optional
|
|
14
15
|
from pathlib import Path
|
|
15
16
|
|
|
16
17
|
|