exonware-xwsystem 0.0.1.411__py3-none-any.whl → 0.1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +2 -1
- exonware/conf.py +2 -2
- exonware/xwsystem/__init__.py +115 -43
- exonware/xwsystem/base.py +30 -0
- exonware/xwsystem/caching/__init__.py +39 -13
- exonware/xwsystem/caching/base.py +24 -6
- exonware/xwsystem/caching/bloom_cache.py +2 -2
- exonware/xwsystem/caching/cache_manager.py +2 -1
- exonware/xwsystem/caching/conditional.py +2 -2
- exonware/xwsystem/caching/contracts.py +85 -139
- exonware/xwsystem/caching/decorators.py +6 -19
- exonware/xwsystem/caching/defs.py +2 -1
- exonware/xwsystem/caching/disk_cache.py +2 -1
- exonware/xwsystem/caching/distributed.py +2 -1
- exonware/xwsystem/caching/errors.py +2 -1
- exonware/xwsystem/caching/events.py +110 -27
- exonware/xwsystem/caching/eviction_strategies.py +2 -2
- exonware/xwsystem/caching/external_caching_python.py +701 -0
- exonware/xwsystem/caching/facade.py +253 -0
- exonware/xwsystem/caching/factory.py +300 -0
- exonware/xwsystem/caching/fluent.py +14 -12
- exonware/xwsystem/caching/integrity.py +21 -6
- exonware/xwsystem/caching/lfu_cache.py +2 -1
- exonware/xwsystem/caching/lfu_optimized.py +18 -6
- exonware/xwsystem/caching/lru_cache.py +7 -4
- exonware/xwsystem/caching/memory_bounded.py +2 -2
- exonware/xwsystem/caching/metrics_exporter.py +2 -2
- exonware/xwsystem/caching/observable_cache.py +2 -2
- exonware/xwsystem/caching/pluggable_cache.py +2 -2
- exonware/xwsystem/caching/rate_limiter.py +2 -2
- exonware/xwsystem/caching/read_through.py +2 -2
- exonware/xwsystem/caching/secure_cache.py +81 -28
- exonware/xwsystem/caching/serializable.py +9 -7
- exonware/xwsystem/caching/stats.py +2 -2
- exonware/xwsystem/caching/tagging.py +2 -2
- exonware/xwsystem/caching/ttl_cache.py +4 -3
- exonware/xwsystem/caching/two_tier_cache.py +6 -3
- exonware/xwsystem/caching/utils.py +30 -12
- exonware/xwsystem/caching/validation.py +2 -2
- exonware/xwsystem/caching/warming.py +6 -3
- exonware/xwsystem/caching/write_behind.py +15 -6
- exonware/xwsystem/config/__init__.py +11 -17
- exonware/xwsystem/config/base.py +5 -5
- exonware/xwsystem/config/contracts.py +93 -153
- exonware/xwsystem/config/defaults.py +3 -2
- exonware/xwsystem/config/defs.py +3 -2
- exonware/xwsystem/config/errors.py +2 -5
- exonware/xwsystem/config/logging.py +12 -8
- exonware/xwsystem/config/logging_setup.py +3 -2
- exonware/xwsystem/config/performance.py +73 -391
- exonware/xwsystem/config/performance_modes.py +9 -8
- exonware/xwsystem/config/version_manager.py +1 -0
- exonware/xwsystem/config.py +27 -0
- exonware/xwsystem/console/__init__.py +53 -0
- exonware/xwsystem/console/base.py +133 -0
- exonware/xwsystem/console/cli/__init__.py +61 -0
- exonware/xwsystem/{cli → console/cli}/args.py +27 -24
- exonware/xwsystem/{cli → console/cli}/base.py +18 -87
- exonware/xwsystem/{cli → console/cli}/colors.py +15 -13
- exonware/xwsystem/console/cli/console.py +98 -0
- exonware/xwsystem/{cli → console/cli}/contracts.py +51 -69
- exonware/xwsystem/console/cli/defs.py +87 -0
- exonware/xwsystem/console/cli/encoding.py +69 -0
- exonware/xwsystem/{cli → console/cli}/errors.py +8 -3
- exonware/xwsystem/console/cli/event_logger.py +166 -0
- exonware/xwsystem/{cli → console/cli}/progress.py +25 -21
- exonware/xwsystem/{cli → console/cli}/prompts.py +3 -2
- exonware/xwsystem/{cli → console/cli}/tables.py +27 -24
- exonware/xwsystem/console/contracts.py +113 -0
- exonware/xwsystem/console/defs.py +154 -0
- exonware/xwsystem/console/errors.py +34 -0
- exonware/xwsystem/console/event_logger.py +385 -0
- exonware/xwsystem/console/writer.py +132 -0
- exonware/xwsystem/contracts.py +28 -0
- exonware/xwsystem/data_structures/__init__.py +23 -0
- exonware/xwsystem/data_structures/trie.py +34 -0
- exonware/xwsystem/data_structures/union_find.py +144 -0
- exonware/xwsystem/defs.py +17 -0
- exonware/xwsystem/errors.py +23 -0
- exonware/xwsystem/facade.py +62 -0
- exonware/xwsystem/http_client/__init__.py +22 -1
- exonware/xwsystem/http_client/advanced_client.py +8 -5
- exonware/xwsystem/http_client/base.py +3 -2
- exonware/xwsystem/http_client/client.py +7 -4
- exonware/xwsystem/http_client/contracts.py +42 -56
- exonware/xwsystem/http_client/defs.py +2 -1
- exonware/xwsystem/http_client/errors.py +2 -1
- exonware/xwsystem/http_client/facade.py +156 -0
- exonware/xwsystem/io/__init__.py +22 -3
- exonware/xwsystem/io/archive/__init__.py +8 -2
- exonware/xwsystem/io/archive/archive.py +1 -1
- exonware/xwsystem/io/archive/archive_files.py +4 -7
- exonware/xwsystem/io/archive/archivers.py +120 -10
- exonware/xwsystem/io/archive/base.py +4 -5
- exonware/xwsystem/io/archive/codec_integration.py +1 -2
- exonware/xwsystem/io/archive/compression.py +1 -2
- exonware/xwsystem/io/archive/facade.py +263 -0
- exonware/xwsystem/io/archive/formats/__init__.py +2 -3
- exonware/xwsystem/io/archive/formats/brotli_format.py +20 -7
- exonware/xwsystem/io/archive/formats/lz4_format.py +20 -7
- exonware/xwsystem/io/archive/formats/rar.py +11 -5
- exonware/xwsystem/io/archive/formats/sevenzip.py +12 -6
- exonware/xwsystem/io/archive/formats/squashfs_format.py +1 -2
- exonware/xwsystem/io/archive/formats/tar.py +52 -7
- exonware/xwsystem/io/archive/formats/wim_format.py +11 -5
- exonware/xwsystem/io/archive/formats/zip.py +1 -2
- exonware/xwsystem/io/archive/formats/zpaq_format.py +1 -2
- exonware/xwsystem/io/archive/formats/zstandard.py +20 -7
- exonware/xwsystem/io/base.py +119 -115
- exonware/xwsystem/io/codec/__init__.py +4 -2
- exonware/xwsystem/io/codec/base.py +19 -13
- exonware/xwsystem/io/codec/contracts.py +59 -2
- exonware/xwsystem/io/codec/registry.py +67 -21
- exonware/xwsystem/io/common/__init__.py +1 -1
- exonware/xwsystem/io/common/atomic.py +29 -16
- exonware/xwsystem/io/common/base.py +11 -10
- exonware/xwsystem/io/common/lock.py +6 -5
- exonware/xwsystem/io/common/path_manager.py +2 -1
- exonware/xwsystem/io/common/watcher.py +1 -2
- exonware/xwsystem/io/contracts.py +301 -433
- exonware/xwsystem/io/contracts_1.py +1180 -0
- exonware/xwsystem/io/data_operations.py +279 -14
- exonware/xwsystem/io/defs.py +4 -3
- exonware/xwsystem/io/errors.py +3 -2
- exonware/xwsystem/io/facade.py +87 -61
- exonware/xwsystem/io/file/__init__.py +1 -1
- exonware/xwsystem/io/file/base.py +8 -9
- exonware/xwsystem/io/file/conversion.py +2 -3
- exonware/xwsystem/io/file/file.py +61 -18
- exonware/xwsystem/io/file/paged_source.py +8 -8
- exonware/xwsystem/io/file/paging/__init__.py +1 -2
- exonware/xwsystem/io/file/paging/byte_paging.py +4 -5
- exonware/xwsystem/io/file/paging/line_paging.py +2 -3
- exonware/xwsystem/io/file/paging/record_paging.py +2 -3
- exonware/xwsystem/io/file/paging/registry.py +1 -2
- exonware/xwsystem/io/file/source.py +13 -17
- exonware/xwsystem/io/filesystem/__init__.py +1 -1
- exonware/xwsystem/io/filesystem/base.py +1 -2
- exonware/xwsystem/io/filesystem/local.py +3 -4
- exonware/xwsystem/io/folder/__init__.py +1 -1
- exonware/xwsystem/io/folder/base.py +1 -2
- exonware/xwsystem/io/folder/folder.py +16 -7
- exonware/xwsystem/io/indexing/__init__.py +14 -0
- exonware/xwsystem/io/indexing/facade.py +443 -0
- exonware/xwsystem/io/path_parser.py +98 -0
- exonware/xwsystem/io/serialization/__init__.py +21 -3
- exonware/xwsystem/io/serialization/auto_serializer.py +146 -20
- exonware/xwsystem/io/serialization/base.py +84 -34
- exonware/xwsystem/io/serialization/contracts.py +50 -73
- exonware/xwsystem/io/serialization/defs.py +2 -1
- exonware/xwsystem/io/serialization/errors.py +2 -1
- exonware/xwsystem/io/serialization/flyweight.py +154 -7
- exonware/xwsystem/io/serialization/format_detector.py +15 -14
- exonware/xwsystem/io/serialization/formats/__init__.py +8 -5
- exonware/xwsystem/io/serialization/formats/binary/bson.py +15 -6
- exonware/xwsystem/io/serialization/formats/binary/cbor.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/marshal.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/msgpack.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/pickle.py +5 -5
- exonware/xwsystem/io/serialization/formats/binary/plistlib.py +5 -5
- exonware/xwsystem/io/serialization/formats/database/dbm.py +7 -7
- exonware/xwsystem/io/serialization/formats/database/shelve.py +7 -7
- exonware/xwsystem/io/serialization/formats/database/sqlite3.py +7 -7
- exonware/xwsystem/io/serialization/formats/tabular/__init__.py +27 -0
- exonware/xwsystem/io/serialization/formats/tabular/base.py +89 -0
- exonware/xwsystem/io/serialization/formats/tabular/csv.py +319 -0
- exonware/xwsystem/io/serialization/formats/tabular/df.py +249 -0
- exonware/xwsystem/io/serialization/formats/tabular/excel.py +291 -0
- exonware/xwsystem/io/serialization/formats/tabular/googlesheets.py +374 -0
- exonware/xwsystem/io/serialization/formats/text/__init__.py +1 -1
- exonware/xwsystem/io/serialization/formats/text/append_only_log.py +199 -0
- exonware/xwsystem/io/serialization/formats/text/configparser.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/csv.py +7 -5
- exonware/xwsystem/io/serialization/formats/text/formdata.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/json.py +65 -33
- exonware/xwsystem/io/serialization/formats/text/json5.py +8 -4
- exonware/xwsystem/io/serialization/formats/text/jsonlines.py +113 -25
- exonware/xwsystem/io/serialization/formats/text/multipart.py +5 -5
- exonware/xwsystem/io/serialization/formats/text/toml.py +8 -6
- exonware/xwsystem/io/serialization/formats/text/xml.py +25 -20
- exonware/xwsystem/io/serialization/formats/text/yaml.py +8 -6
- exonware/xwsystem/io/serialization/parsers/__init__.py +16 -0
- exonware/xwsystem/io/serialization/parsers/base.py +60 -0
- exonware/xwsystem/io/serialization/parsers/hybrid_parser.py +62 -0
- exonware/xwsystem/io/serialization/parsers/msgspec_parser.py +48 -0
- exonware/xwsystem/io/serialization/parsers/orjson_direct_parser.py +54 -0
- exonware/xwsystem/io/serialization/parsers/orjson_parser.py +62 -0
- exonware/xwsystem/io/serialization/parsers/pysimdjson_parser.py +55 -0
- exonware/xwsystem/io/serialization/parsers/rapidjson_parser.py +53 -0
- exonware/xwsystem/io/serialization/parsers/registry.py +91 -0
- exonware/xwsystem/io/serialization/parsers/standard.py +44 -0
- exonware/xwsystem/io/serialization/parsers/ujson_parser.py +53 -0
- exonware/xwsystem/io/serialization/registry.py +4 -4
- exonware/xwsystem/io/serialization/serializer.py +168 -79
- exonware/xwsystem/io/serialization/universal_options.py +367 -0
- exonware/xwsystem/io/serialization/utils/__init__.py +1 -2
- exonware/xwsystem/io/serialization/utils/path_ops.py +5 -6
- exonware/xwsystem/io/source_reader.py +223 -0
- exonware/xwsystem/io/stream/__init__.py +1 -1
- exonware/xwsystem/io/stream/async_operations.py +61 -14
- exonware/xwsystem/io/stream/base.py +1 -2
- exonware/xwsystem/io/stream/codec_io.py +6 -7
- exonware/xwsystem/ipc/__init__.py +1 -0
- exonware/xwsystem/ipc/async_fabric.py +4 -4
- exonware/xwsystem/ipc/base.py +6 -5
- exonware/xwsystem/ipc/contracts.py +41 -66
- exonware/xwsystem/ipc/defs.py +2 -1
- exonware/xwsystem/ipc/errors.py +2 -1
- exonware/xwsystem/ipc/message_queue.py +5 -2
- exonware/xwsystem/ipc/pipes.py +70 -34
- exonware/xwsystem/ipc/process_manager.py +7 -5
- exonware/xwsystem/ipc/process_pool.py +6 -5
- exonware/xwsystem/ipc/shared_memory.py +64 -11
- exonware/xwsystem/monitoring/__init__.py +7 -0
- exonware/xwsystem/monitoring/base.py +11 -8
- exonware/xwsystem/monitoring/contracts.py +86 -144
- exonware/xwsystem/monitoring/defs.py +2 -1
- exonware/xwsystem/monitoring/error_recovery.py +16 -3
- exonware/xwsystem/monitoring/errors.py +2 -1
- exonware/xwsystem/monitoring/facade.py +183 -0
- exonware/xwsystem/monitoring/memory_monitor.py +1 -0
- exonware/xwsystem/monitoring/metrics.py +1 -0
- exonware/xwsystem/monitoring/performance_manager_generic.py +7 -7
- exonware/xwsystem/monitoring/performance_monitor.py +1 -0
- exonware/xwsystem/monitoring/performance_validator.py +1 -0
- exonware/xwsystem/monitoring/system_monitor.py +6 -5
- exonware/xwsystem/monitoring/tracing.py +18 -16
- exonware/xwsystem/monitoring/tracker.py +2 -1
- exonware/xwsystem/operations/__init__.py +5 -50
- exonware/xwsystem/operations/base.py +3 -44
- exonware/xwsystem/operations/contracts.py +25 -15
- exonware/xwsystem/operations/defs.py +1 -1
- exonware/xwsystem/operations/diff.py +5 -4
- exonware/xwsystem/operations/errors.py +1 -1
- exonware/xwsystem/operations/merge.py +6 -4
- exonware/xwsystem/operations/patch.py +5 -4
- exonware/xwsystem/patterns/__init__.py +1 -0
- exonware/xwsystem/patterns/base.py +2 -1
- exonware/xwsystem/patterns/context_manager.py +2 -1
- exonware/xwsystem/patterns/contracts.py +215 -256
- exonware/xwsystem/patterns/defs.py +2 -1
- exonware/xwsystem/patterns/dynamic_facade.py +1 -0
- exonware/xwsystem/patterns/errors.py +2 -4
- exonware/xwsystem/patterns/handler_factory.py +2 -3
- exonware/xwsystem/patterns/import_registry.py +1 -0
- exonware/xwsystem/patterns/object_pool.py +1 -0
- exonware/xwsystem/patterns/registry.py +4 -43
- exonware/xwsystem/plugins/__init__.py +2 -1
- exonware/xwsystem/plugins/base.py +6 -5
- exonware/xwsystem/plugins/contracts.py +94 -158
- exonware/xwsystem/plugins/defs.py +2 -1
- exonware/xwsystem/plugins/errors.py +2 -1
- exonware/xwsystem/py.typed +3 -0
- exonware/xwsystem/query/__init__.py +36 -0
- exonware/xwsystem/query/contracts.py +56 -0
- exonware/xwsystem/query/errors.py +22 -0
- exonware/xwsystem/query/registry.py +128 -0
- exonware/xwsystem/runtime/__init__.py +2 -1
- exonware/xwsystem/runtime/base.py +4 -3
- exonware/xwsystem/runtime/contracts.py +39 -60
- exonware/xwsystem/runtime/defs.py +2 -1
- exonware/xwsystem/runtime/env.py +11 -9
- exonware/xwsystem/runtime/errors.py +2 -1
- exonware/xwsystem/runtime/reflection.py +3 -2
- exonware/xwsystem/security/__init__.py +68 -11
- exonware/xwsystem/security/audit.py +167 -0
- exonware/xwsystem/security/base.py +121 -24
- exonware/xwsystem/security/contracts.py +91 -146
- exonware/xwsystem/security/crypto.py +17 -16
- exonware/xwsystem/security/defs.py +2 -1
- exonware/xwsystem/security/errors.py +2 -1
- exonware/xwsystem/security/facade.py +321 -0
- exonware/xwsystem/security/file_security.py +330 -0
- exonware/xwsystem/security/hazmat.py +11 -8
- exonware/xwsystem/security/monitor.py +372 -0
- exonware/xwsystem/security/path_validator.py +140 -18
- exonware/xwsystem/security/policy.py +357 -0
- exonware/xwsystem/security/resource_limits.py +1 -0
- exonware/xwsystem/security/validator.py +455 -0
- exonware/xwsystem/shared/__init__.py +14 -1
- exonware/xwsystem/shared/base.py +285 -2
- exonware/xwsystem/shared/contracts.py +415 -126
- exonware/xwsystem/shared/defs.py +2 -1
- exonware/xwsystem/shared/errors.py +2 -2
- exonware/xwsystem/shared/xwobject.py +316 -0
- exonware/xwsystem/structures/__init__.py +1 -0
- exonware/xwsystem/structures/base.py +3 -2
- exonware/xwsystem/structures/circular_detector.py +15 -14
- exonware/xwsystem/structures/contracts.py +53 -76
- exonware/xwsystem/structures/defs.py +2 -1
- exonware/xwsystem/structures/errors.py +2 -1
- exonware/xwsystem/structures/tree_walker.py +2 -1
- exonware/xwsystem/threading/__init__.py +21 -4
- exonware/xwsystem/threading/async_primitives.py +6 -5
- exonware/xwsystem/threading/base.py +3 -2
- exonware/xwsystem/threading/contracts.py +87 -143
- exonware/xwsystem/threading/defs.py +2 -1
- exonware/xwsystem/threading/errors.py +2 -1
- exonware/xwsystem/threading/facade.py +175 -0
- exonware/xwsystem/threading/locks.py +1 -0
- exonware/xwsystem/threading/safe_factory.py +1 -0
- exonware/xwsystem/utils/__init__.py +40 -0
- exonware/xwsystem/utils/base.py +22 -21
- exonware/xwsystem/utils/contracts.py +50 -73
- exonware/xwsystem/utils/dt/__init__.py +19 -3
- exonware/xwsystem/utils/dt/base.py +5 -4
- exonware/xwsystem/utils/dt/contracts.py +22 -29
- exonware/xwsystem/utils/dt/defs.py +2 -1
- exonware/xwsystem/utils/dt/errors.py +2 -5
- exonware/xwsystem/utils/dt/formatting.py +88 -2
- exonware/xwsystem/utils/dt/humanize.py +10 -9
- exonware/xwsystem/utils/dt/parsing.py +56 -5
- exonware/xwsystem/utils/dt/timezone_utils.py +2 -24
- exonware/xwsystem/utils/errors.py +2 -4
- exonware/xwsystem/utils/paths.py +1 -0
- exonware/xwsystem/utils/string.py +49 -0
- exonware/xwsystem/utils/test_runner.py +139 -480
- exonware/xwsystem/utils/utils_contracts.py +2 -1
- exonware/xwsystem/utils/web.py +110 -0
- exonware/xwsystem/validation/__init__.py +25 -1
- exonware/xwsystem/validation/base.py +6 -5
- exonware/xwsystem/validation/contracts.py +29 -41
- exonware/xwsystem/validation/data_validator.py +1 -0
- exonware/xwsystem/validation/declarative.py +11 -8
- exonware/xwsystem/validation/defs.py +2 -1
- exonware/xwsystem/validation/errors.py +2 -1
- exonware/xwsystem/validation/facade.py +198 -0
- exonware/xwsystem/validation/fluent_validator.py +22 -19
- exonware/xwsystem/validation/schema_discovery.py +210 -0
- exonware/xwsystem/validation/type_safety.py +2 -1
- exonware/xwsystem/version.py +4 -4
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/METADATA +71 -4
- exonware_xwsystem-0.1.0.3.dist-info/RECORD +337 -0
- exonware/xwsystem/caching/USAGE_GUIDE.md +0 -779
- exonware/xwsystem/cli/__init__.py +0 -43
- exonware/xwsystem/cli/console.py +0 -113
- exonware/xwsystem/cli/defs.py +0 -134
- exonware/xwsystem/conf.py +0 -44
- exonware/xwsystem/security/auth.py +0 -484
- exonware_xwsystem-0.0.1.411.dist-info/RECORD +0 -274
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/WHEEL +0 -0
- {exonware_xwsystem-0.0.1.411.dist-info → exonware_xwsystem-0.1.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
#exonware/xwsystem/src/exonware/xwsystem/io/serialization/universal_options.py
|
|
2
|
+
"""
|
|
3
|
+
Universal Serialization Options
|
|
4
|
+
|
|
5
|
+
This module provides universal option mapping across different serialization formats.
|
|
6
|
+
Options like pretty, compact, sorted, canonical can be mapped to format-specific options.
|
|
7
|
+
|
|
8
|
+
Company: eXonware.com
|
|
9
|
+
Author: Eng. Muhammad AlShehri
|
|
10
|
+
Email: connect@exonware.com
|
|
11
|
+
Version: 0.1.0.3
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Dict, List, Set, Any, Optional
|
|
15
|
+
from enum import Enum
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class UniversalOption(Enum):
|
|
19
|
+
"""Universal serialization options."""
|
|
20
|
+
PRETTY = "pretty"
|
|
21
|
+
COMPACT = "compact"
|
|
22
|
+
SORTED = "sorted"
|
|
23
|
+
CANONICAL = "canonical"
|
|
24
|
+
INDENT = "indent"
|
|
25
|
+
ENSURE_ASCII = "ensure_ascii"
|
|
26
|
+
ALLOW_NAN = "allow_nan"
|
|
27
|
+
STRIP_WHITESPACE = "strip_whitespace"
|
|
28
|
+
PRESERVE_QUOTES = "preserve_quotes"
|
|
29
|
+
LINE_SEPARATOR = "line_separator"
|
|
30
|
+
ITEM_SEPARATOR = "item_separator"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Format-specific option mappings
|
|
34
|
+
_FORMAT_OPTION_MAPS: Dict[str, Dict[str, str]] = {
|
|
35
|
+
"json": {
|
|
36
|
+
"pretty": "indent",
|
|
37
|
+
"compact": "separators",
|
|
38
|
+
"sorted": "sort_keys",
|
|
39
|
+
"canonical": "sort_keys",
|
|
40
|
+
"indent": "indent",
|
|
41
|
+
"ensure_ascii": "ensure_ascii",
|
|
42
|
+
"allow_nan": "allow_nan",
|
|
43
|
+
},
|
|
44
|
+
"yaml": {
|
|
45
|
+
"pretty": "default_flow_style",
|
|
46
|
+
"compact": "default_flow_style",
|
|
47
|
+
"sorted": "sort_keys",
|
|
48
|
+
"canonical": "canonical",
|
|
49
|
+
"indent": "indent",
|
|
50
|
+
"strip_whitespace": "strip_whitespace",
|
|
51
|
+
},
|
|
52
|
+
"xml": {
|
|
53
|
+
"pretty": "pretty_print",
|
|
54
|
+
"compact": "pretty_print",
|
|
55
|
+
"sorted": "sort_attributes",
|
|
56
|
+
"canonical": "canonical",
|
|
57
|
+
"indent": "indent",
|
|
58
|
+
"preserve_quotes": "preserve_quotes",
|
|
59
|
+
"declaration": "full_document", # xmltodict uses full_document to control XML declaration
|
|
60
|
+
"encoding": "encoding",
|
|
61
|
+
},
|
|
62
|
+
"toml": {
|
|
63
|
+
"pretty": "pretty",
|
|
64
|
+
"compact": "indent",
|
|
65
|
+
"sorted": "sort_keys",
|
|
66
|
+
"indent": "indent",
|
|
67
|
+
},
|
|
68
|
+
"csv": {
|
|
69
|
+
"pretty": "lineterminator",
|
|
70
|
+
"compact": "lineterminator",
|
|
71
|
+
"sorted": "sort_columns",
|
|
72
|
+
"line_separator": "lineterminator",
|
|
73
|
+
"item_separator": "delimiter",
|
|
74
|
+
},
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Supported options per format
|
|
79
|
+
_FORMAT_SUPPORTED_OPTIONS: Dict[str, Set[str]] = {
|
|
80
|
+
"json": {
|
|
81
|
+
"pretty", "compact", "sorted", "canonical", "indent",
|
|
82
|
+
"ensure_ascii", "allow_nan"
|
|
83
|
+
},
|
|
84
|
+
"yaml": {
|
|
85
|
+
"pretty", "compact", "sorted", "canonical", "indent",
|
|
86
|
+
"strip_whitespace"
|
|
87
|
+
},
|
|
88
|
+
"xml": {
|
|
89
|
+
"pretty", "compact", "sorted", "canonical", "indent",
|
|
90
|
+
"preserve_quotes", "declaration", "encoding"
|
|
91
|
+
},
|
|
92
|
+
"toml": {
|
|
93
|
+
"pretty", "compact", "sorted", "indent"
|
|
94
|
+
},
|
|
95
|
+
"csv": {
|
|
96
|
+
"pretty", "compact", "sorted", "line_separator", "item_separator"
|
|
97
|
+
},
|
|
98
|
+
"msgpack": {
|
|
99
|
+
"compact" # Always compact
|
|
100
|
+
},
|
|
101
|
+
"bson": {
|
|
102
|
+
"compact" # Always compact
|
|
103
|
+
},
|
|
104
|
+
"cbor": {
|
|
105
|
+
"compact" # Always compact
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def map_universal_options(
|
|
111
|
+
format_name: str,
|
|
112
|
+
universal_options: Optional[Dict[str, Any]] = None,
|
|
113
|
+
**kwargs: Any
|
|
114
|
+
) -> Dict[str, Any]:
|
|
115
|
+
"""
|
|
116
|
+
Map universal options to format-specific options.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
format_name: Serialization format name (json, yaml, xml, etc.)
|
|
120
|
+
universal_options: Optional dictionary of universal options
|
|
121
|
+
**kwargs: Universal options as keyword arguments (pretty, compact, sorted, etc.)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Dictionary of format-specific options
|
|
125
|
+
|
|
126
|
+
Example:
|
|
127
|
+
>>> # Using dict
|
|
128
|
+
>>> options = map_universal_options("json", {"pretty": True, "sorted": True})
|
|
129
|
+
>>> # Returns: {"indent": 2, "sort_keys": True}
|
|
130
|
+
>>>
|
|
131
|
+
>>> # Using keyword arguments
|
|
132
|
+
>>> options = map_universal_options("json", pretty=True, sorted=True)
|
|
133
|
+
>>> # Returns: {"indent": 2, "sort_keys": True}
|
|
134
|
+
"""
|
|
135
|
+
# Merge dict and kwargs
|
|
136
|
+
if universal_options is None:
|
|
137
|
+
universal_options = {}
|
|
138
|
+
|
|
139
|
+
# Merge kwargs into universal_options (kwargs override dict values)
|
|
140
|
+
merged_options = {**universal_options, **kwargs}
|
|
141
|
+
|
|
142
|
+
format_lower = format_name.lower()
|
|
143
|
+
option_map = _FORMAT_OPTION_MAPS.get(format_lower, {})
|
|
144
|
+
format_options = {}
|
|
145
|
+
|
|
146
|
+
# Handle canonical first (it implies sorted)
|
|
147
|
+
if merged_options.get("canonical"):
|
|
148
|
+
merged_options["sorted"] = True
|
|
149
|
+
|
|
150
|
+
# Handle compact overriding pretty
|
|
151
|
+
if merged_options.get("compact") and merged_options.get("pretty"):
|
|
152
|
+
# Compact wins
|
|
153
|
+
merged_options.pop("pretty", None)
|
|
154
|
+
|
|
155
|
+
for universal_key, universal_value in merged_options.items():
|
|
156
|
+
if universal_key in option_map:
|
|
157
|
+
format_key = option_map[universal_key]
|
|
158
|
+
|
|
159
|
+
# Handle special mappings
|
|
160
|
+
if universal_key == "pretty" and format_lower == "json":
|
|
161
|
+
format_options[format_key] = 2 if universal_value else None
|
|
162
|
+
format_options["use_orjson"] = False
|
|
163
|
+
elif universal_key == "compact" and format_lower == "json":
|
|
164
|
+
format_options["indent"] = None
|
|
165
|
+
format_options["separators"] = (",", ":") if universal_value else None
|
|
166
|
+
format_options["use_orjson"] = True
|
|
167
|
+
elif universal_key == "pretty" and format_lower == "yaml":
|
|
168
|
+
format_options[format_key] = False if universal_value else True
|
|
169
|
+
format_options["indent"] = 2 if universal_value else None
|
|
170
|
+
elif universal_key == "compact" and format_lower == "yaml":
|
|
171
|
+
format_options[format_key] = True if universal_value else False
|
|
172
|
+
elif universal_key == "pretty" and format_lower == "xml":
|
|
173
|
+
format_options[format_key] = universal_value
|
|
174
|
+
elif universal_key == "compact" and format_lower == "xml":
|
|
175
|
+
format_options[format_key] = False if universal_value else True
|
|
176
|
+
elif universal_key == "canonical" and format_lower == "json":
|
|
177
|
+
format_options["sort_keys"] = True
|
|
178
|
+
format_options["ensure_ascii"] = True
|
|
179
|
+
format_options["indent"] = None
|
|
180
|
+
format_options["canonical"] = True
|
|
181
|
+
else:
|
|
182
|
+
format_options[format_key] = universal_value
|
|
183
|
+
|
|
184
|
+
return format_options
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def get_supported_universal_options(format_name: Optional[str] = None) -> Set[str] | Dict[str, Any]:
|
|
188
|
+
"""
|
|
189
|
+
Get list of supported universal options for a format, or all universal options info.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
format_name: Optional serialization format name. If None, returns all universal options info.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
If format_name provided: Set of supported universal option names for that format
|
|
196
|
+
If format_name is None: Dictionary with all universal options metadata
|
|
197
|
+
|
|
198
|
+
Example:
|
|
199
|
+
>>> # Get options for a specific format
|
|
200
|
+
>>> options = get_supported_universal_options("json")
|
|
201
|
+
>>> # Returns: {"pretty", "compact", "sorted", "canonical", ...}
|
|
202
|
+
>>>
|
|
203
|
+
>>> # Get all universal options info
|
|
204
|
+
>>> all_options = get_supported_universal_options()
|
|
205
|
+
>>> # Returns: {"pretty": {"type": bool, "default": False, "formats": [...]}, ...}
|
|
206
|
+
"""
|
|
207
|
+
if format_name is None:
|
|
208
|
+
# Return all universal options with metadata
|
|
209
|
+
option_info = {}
|
|
210
|
+
type_map = {
|
|
211
|
+
"pretty": bool,
|
|
212
|
+
"compact": bool,
|
|
213
|
+
"sorted": bool,
|
|
214
|
+
"canonical": bool,
|
|
215
|
+
"indent": int,
|
|
216
|
+
"ensure_ascii": bool,
|
|
217
|
+
"allow_nan": bool,
|
|
218
|
+
"preserve_quotes": bool,
|
|
219
|
+
"strip_whitespace": bool,
|
|
220
|
+
"encoding": str,
|
|
221
|
+
"declaration": bool,
|
|
222
|
+
"line_separator": str,
|
|
223
|
+
"item_separator": str,
|
|
224
|
+
}
|
|
225
|
+
default_map = {
|
|
226
|
+
"pretty": False,
|
|
227
|
+
"compact": False,
|
|
228
|
+
"sorted": False,
|
|
229
|
+
"canonical": False,
|
|
230
|
+
"indent": None,
|
|
231
|
+
"ensure_ascii": True,
|
|
232
|
+
"allow_nan": True,
|
|
233
|
+
"preserve_quotes": False,
|
|
234
|
+
"strip_whitespace": False,
|
|
235
|
+
"encoding": "utf-8",
|
|
236
|
+
"declaration": True,
|
|
237
|
+
"line_separator": "\n",
|
|
238
|
+
"item_separator": None,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
# Collect all options across all formats
|
|
242
|
+
all_option_names = set()
|
|
243
|
+
for format_opts in _FORMAT_SUPPORTED_OPTIONS.values():
|
|
244
|
+
all_option_names.update(format_opts)
|
|
245
|
+
|
|
246
|
+
# Build option info
|
|
247
|
+
for option_name in all_option_names:
|
|
248
|
+
# Find formats that support this option
|
|
249
|
+
supporting_formats = [
|
|
250
|
+
fmt.upper() for fmt, opts in _FORMAT_SUPPORTED_OPTIONS.items()
|
|
251
|
+
if option_name in opts
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
option_info[option_name] = {
|
|
255
|
+
"type": type_map.get(option_name, Any),
|
|
256
|
+
"default": default_map.get(option_name, None),
|
|
257
|
+
"formats": supporting_formats,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return option_info
|
|
261
|
+
|
|
262
|
+
format_lower = format_name.lower()
|
|
263
|
+
return _FORMAT_SUPPORTED_OPTIONS.get(format_lower, set())
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def validate_universal_options(
|
|
267
|
+
format_name: str,
|
|
268
|
+
universal_options: Optional[Dict[str, Any]] = None,
|
|
269
|
+
**kwargs: Any
|
|
270
|
+
) -> tuple[bool, Optional[str]]:
|
|
271
|
+
"""
|
|
272
|
+
Validate universal options for a format.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
format_name: Serialization format name
|
|
276
|
+
universal_options: Optional dictionary of universal options to validate
|
|
277
|
+
**kwargs: Universal options as keyword arguments
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Tuple of (is_valid, error_message)
|
|
281
|
+
|
|
282
|
+
Example:
|
|
283
|
+
>>> # Using dict
|
|
284
|
+
>>> is_valid, error = validate_universal_options(
|
|
285
|
+
... "json", {"pretty": True, "invalid_option": True}
|
|
286
|
+
... )
|
|
287
|
+
>>> # Returns: (False, "Unsupported option: invalid_option")
|
|
288
|
+
>>>
|
|
289
|
+
>>> # Using keyword arguments
|
|
290
|
+
>>> is_valid, error = validate_universal_options("json", pretty=True)
|
|
291
|
+
>>> # Returns: (True, None)
|
|
292
|
+
"""
|
|
293
|
+
# Merge dict and kwargs
|
|
294
|
+
if universal_options is None:
|
|
295
|
+
universal_options = {}
|
|
296
|
+
|
|
297
|
+
# Merge kwargs into universal_options (kwargs override dict values)
|
|
298
|
+
merged_options = {**universal_options, **kwargs}
|
|
299
|
+
|
|
300
|
+
format_lower = format_name.lower()
|
|
301
|
+
supported = get_supported_universal_options(format_lower)
|
|
302
|
+
|
|
303
|
+
# Check for unsupported options
|
|
304
|
+
unsupported = set(merged_options.keys()) - supported
|
|
305
|
+
if unsupported:
|
|
306
|
+
return False, f"Unsupported options for {format_name}: {', '.join(unsupported)}"
|
|
307
|
+
|
|
308
|
+
# Validate option types
|
|
309
|
+
type_map = {
|
|
310
|
+
"pretty": bool,
|
|
311
|
+
"compact": bool,
|
|
312
|
+
"sorted": bool,
|
|
313
|
+
"canonical": bool,
|
|
314
|
+
"ensure_ascii": bool,
|
|
315
|
+
"allow_nan": bool,
|
|
316
|
+
"preserve_quotes": bool,
|
|
317
|
+
"strip_whitespace": bool,
|
|
318
|
+
"indent": int,
|
|
319
|
+
"encoding": str,
|
|
320
|
+
"declaration": bool,
|
|
321
|
+
"line_separator": str,
|
|
322
|
+
"item_separator": str,
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
for key, value in merged_options.items():
|
|
326
|
+
expected_type = type_map.get(key)
|
|
327
|
+
if expected_type and not isinstance(value, expected_type):
|
|
328
|
+
return False, f"Option '{key}' expects type {expected_type.__name__}, got {type(value).__name__}"
|
|
329
|
+
|
|
330
|
+
# Check for conflicting options
|
|
331
|
+
if merged_options.get("pretty") and merged_options.get("compact"):
|
|
332
|
+
if merged_options["pretty"] and merged_options["compact"]:
|
|
333
|
+
return False, "Cannot use both 'pretty' and 'compact' options together"
|
|
334
|
+
|
|
335
|
+
return True, None
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def get_all_supported_formats() -> List[str]:
|
|
339
|
+
"""
|
|
340
|
+
Get list of all formats that support universal options.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
List of format names
|
|
344
|
+
"""
|
|
345
|
+
return list(_FORMAT_SUPPORTED_OPTIONS.keys())
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def get_format_option_info(format_name: str) -> Dict[str, Any]:
|
|
349
|
+
"""
|
|
350
|
+
Get detailed information about format option support.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
format_name: Serialization format name
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Dictionary with option information
|
|
357
|
+
"""
|
|
358
|
+
format_lower = format_name.lower()
|
|
359
|
+
supported = get_supported_universal_options(format_lower)
|
|
360
|
+
option_map = _FORMAT_OPTION_MAPS.get(format_lower, {})
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
"format": format_name,
|
|
364
|
+
"supported_options": list(supported),
|
|
365
|
+
"option_mappings": option_map,
|
|
366
|
+
"total_options": len(supported),
|
|
367
|
+
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Company: eXonware.com
|
|
5
5
|
Author: Eng. Muhammad AlShehri
|
|
6
6
|
Email: connect@exonware.com
|
|
7
|
-
Version: 0.0.
|
|
7
|
+
Version: 0.1.0.3
|
|
8
8
|
Generation Date: November 9, 2025
|
|
9
9
|
|
|
10
10
|
Serialization utilities module.
|
|
@@ -29,4 +29,3 @@ __all__ = [
|
|
|
29
29
|
"validate_path_security",
|
|
30
30
|
"normalize_path",
|
|
31
31
|
]
|
|
32
|
-
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Company: eXonware.com
|
|
5
5
|
Author: Eng. Muhammad AlShehri
|
|
6
6
|
Email: connect@exonware.com
|
|
7
|
-
Version: 0.0.
|
|
7
|
+
Version: 0.1.0.3
|
|
8
8
|
Generation Date: November 9, 2025
|
|
9
9
|
|
|
10
10
|
Path operations utilities for serialization formats.
|
|
@@ -13,7 +13,7 @@ Provides JSONPointer path parsing and manipulation utilities that serializers
|
|
|
13
13
|
can use for path-based operations. Includes path validation for security.
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
from typing import Any
|
|
16
|
+
from typing import Any
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from ...errors import SerializationError
|
|
@@ -42,7 +42,7 @@ def validate_json_pointer(path: str) -> bool:
|
|
|
42
42
|
Example:
|
|
43
43
|
>>> validate_json_pointer("/users/0/name") # Valid
|
|
44
44
|
True
|
|
45
|
-
>>> validate_json_pointer("invalid") # Invalid -
|
|
45
|
+
>>> validate_json_pointer("invalid") # Invalid - starts with /
|
|
46
46
|
ValueError
|
|
47
47
|
"""
|
|
48
48
|
if not isinstance(path, str):
|
|
@@ -51,7 +51,7 @@ def validate_json_pointer(path: str) -> bool:
|
|
|
51
51
|
if not path:
|
|
52
52
|
raise ValueError("Path cannot be empty")
|
|
53
53
|
|
|
54
|
-
# JSONPointer
|
|
54
|
+
# JSONPointer starts with / for non-empty paths
|
|
55
55
|
if path != "/" and not path.startswith("/"):
|
|
56
56
|
raise ValueError(f"JSONPointer path must start with '/', got: {path}")
|
|
57
57
|
|
|
@@ -65,7 +65,7 @@ def validate_json_pointer(path: str) -> bool:
|
|
|
65
65
|
return True
|
|
66
66
|
|
|
67
67
|
|
|
68
|
-
def parse_json_pointer(path: str) -> list[
|
|
68
|
+
def parse_json_pointer(path: str) -> list[str | int]:
|
|
69
69
|
"""
|
|
70
70
|
Parse a JSONPointer path into a list of keys/indices.
|
|
71
71
|
|
|
@@ -301,4 +301,3 @@ def normalize_path(path: str) -> str:
|
|
|
301
301
|
return "/"
|
|
302
302
|
|
|
303
303
|
return path
|
|
304
|
-
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Source reader: load text content from a URI or local path.
|
|
4
|
+
|
|
5
|
+
All logic for "read from path or URI" lives in xwsystem. Supports multiple
|
|
6
|
+
schemes (file, http, https, ftp, etc.) with configurable security:
|
|
7
|
+
- allowed_schemes: which protocols are allowed
|
|
8
|
+
- allow_external: whether non-file (remote) sources are allowed
|
|
9
|
+
- timeout_sec, max_size_mb: limits
|
|
10
|
+
|
|
11
|
+
Callers (e.g. xwdata) pass SourceLoadConfig from their own config; xwsystem
|
|
12
|
+
enforces it. No scheme or fetch logic in xwdata—single entry point here and
|
|
13
|
+
via XWFile.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import urllib.parse
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Optional
|
|
23
|
+
|
|
24
|
+
from .common.atomic import FileOperationError
|
|
25
|
+
from .stream.async_operations import async_safe_read_text
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# -----------------------------------------------------------------------------
|
|
29
|
+
# Configuration (consumed by xwdata / xwschema; defined here for single source)
|
|
30
|
+
# -----------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SourceLoadConfig:
|
|
34
|
+
"""
|
|
35
|
+
Configuration for loading from a URI or path. Enforced by xwsystem.
|
|
36
|
+
|
|
37
|
+
Callers (xwdata) set this from their SecurityConfig/ReferenceConfig;
|
|
38
|
+
xwsystem validates scheme and size against these values.
|
|
39
|
+
"""
|
|
40
|
+
allowed_schemes: tuple[str, ...] = ('file', 'http', 'https', 'ftp')
|
|
41
|
+
allow_external: bool = True
|
|
42
|
+
timeout_sec: float = 30.0
|
|
43
|
+
max_size_mb: float = 100.0
|
|
44
|
+
encoding: str = 'utf-8'
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def strict(cls) -> SourceLoadConfig:
|
|
48
|
+
"""Local files only, no external URIs."""
|
|
49
|
+
return cls(
|
|
50
|
+
allowed_schemes=('file',),
|
|
51
|
+
allow_external=False,
|
|
52
|
+
timeout_sec=10.0,
|
|
53
|
+
max_size_mb=10.0,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def relaxed(cls) -> SourceLoadConfig:
|
|
58
|
+
"""Allow common external schemes."""
|
|
59
|
+
return cls(
|
|
60
|
+
allowed_schemes=('file', 'http', 'https', 'ftp'),
|
|
61
|
+
allow_external=True,
|
|
62
|
+
timeout_sec=60.0,
|
|
63
|
+
max_size_mb=500.0,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# -----------------------------------------------------------------------------
|
|
68
|
+
# Scheme detection and validation
|
|
69
|
+
# -----------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
def get_scheme(uri_or_path: str) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Return the URI scheme, or 'file' for a bare path.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
uri_or_path: URI (http://..., file://...) or local path
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Lowercase scheme (e.g. 'file', 'http', 'https', 'ftp') or 'file' for paths.
|
|
80
|
+
"""
|
|
81
|
+
if not uri_or_path or not isinstance(uri_or_path, str):
|
|
82
|
+
return 'file'
|
|
83
|
+
parsed = urllib.parse.urlparse(uri_or_path)
|
|
84
|
+
scheme = (parsed.scheme or '').lower()
|
|
85
|
+
if scheme and parsed.netloc:
|
|
86
|
+
return scheme
|
|
87
|
+
return 'file'
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def is_external_scheme(scheme: str) -> bool:
|
|
91
|
+
"""Return True if scheme is not local file."""
|
|
92
|
+
return scheme not in ('file', '')
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def is_http_url(path_or_url: str) -> bool:
|
|
96
|
+
"""Return True if the string is an http(s) URL (convenience)."""
|
|
97
|
+
return get_scheme(path_or_url) in ('http', 'https')
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _validate_config(uri_or_path: str, config: SourceLoadConfig) -> None:
|
|
101
|
+
"""Raise FileOperationError if URI/path is not allowed by config."""
|
|
102
|
+
scheme = get_scheme(uri_or_path)
|
|
103
|
+
if scheme not in config.allowed_schemes:
|
|
104
|
+
raise FileOperationError(
|
|
105
|
+
f"Scheme '{scheme}' not allowed. Allowed: {config.allowed_schemes}"
|
|
106
|
+
)
|
|
107
|
+
if is_external_scheme(scheme) and not config.allow_external:
|
|
108
|
+
raise FileOperationError(
|
|
109
|
+
"External sources are disabled (allow_external=False)"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# -----------------------------------------------------------------------------
|
|
114
|
+
# Async read: single entry point for xwsystem (and XWFile / xwdata)
|
|
115
|
+
# -----------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
async def read_source_text(
|
|
118
|
+
uri_or_path: str,
|
|
119
|
+
config: Optional[SourceLoadConfig] = None,
|
|
120
|
+
timeout_sec: Optional[float] = None,
|
|
121
|
+
max_size_mb: Optional[float] = None,
|
|
122
|
+
encoding: Optional[str] = None,
|
|
123
|
+
) -> tuple[str, dict]:
|
|
124
|
+
"""
|
|
125
|
+
Read text from a URI or local path (async). All scheme and fetch logic here.
|
|
126
|
+
|
|
127
|
+
Security and behavior are driven by config (from xwdata). Supported schemes:
|
|
128
|
+
- file: local path (existing async_safe_read_text)
|
|
129
|
+
- http, https: aiohttp or urllib
|
|
130
|
+
- ftp: urllib (ftp://)
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
uri_or_path: File path or URI (file://, http://, https://, ftp://)
|
|
134
|
+
config: SourceLoadConfig (allowed_schemes, allow_external, limits). If None, relaxed defaults.
|
|
135
|
+
timeout_sec: Override config timeout (optional)
|
|
136
|
+
max_size_mb: Override config max size (optional)
|
|
137
|
+
encoding: Override config encoding for file reads (optional)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
(content_str, metadata) with at least 'source' and 'content_type'.
|
|
141
|
+
"""
|
|
142
|
+
cfg = config or SourceLoadConfig.relaxed()
|
|
143
|
+
timeout = timeout_sec if timeout_sec is not None else cfg.timeout_sec
|
|
144
|
+
max_mb = max_size_mb if max_size_mb is not None else cfg.max_size_mb
|
|
145
|
+
enc = encoding or cfg.encoding
|
|
146
|
+
|
|
147
|
+
_validate_config(uri_or_path, cfg)
|
|
148
|
+
scheme = get_scheme(uri_or_path)
|
|
149
|
+
|
|
150
|
+
if scheme == 'file':
|
|
151
|
+
path = uri_or_path
|
|
152
|
+
if path.startswith('file://'):
|
|
153
|
+
path = urllib.parse.unquote(urllib.parse.urlparse(path).path)
|
|
154
|
+
content = await async_safe_read_text(
|
|
155
|
+
Path(path), encoding=enc, max_size_mb=max_mb
|
|
156
|
+
)
|
|
157
|
+
return content, {'source': uri_or_path, 'content_type': None}
|
|
158
|
+
|
|
159
|
+
if scheme in ('http', 'https'):
|
|
160
|
+
return await _read_http_text(uri_or_path, timeout, max_mb)
|
|
161
|
+
|
|
162
|
+
if scheme == 'ftp':
|
|
163
|
+
return await _read_ftp_text(uri_or_path, timeout, max_mb)
|
|
164
|
+
|
|
165
|
+
raise FileOperationError(f"Unsupported scheme for reading: {scheme}")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
async def _read_http_text(
|
|
169
|
+
url: str, timeout_sec: float, max_size_mb: float
|
|
170
|
+
) -> tuple[str, dict]:
|
|
171
|
+
"""Fetch http(s) URL; return (text, metadata)."""
|
|
172
|
+
content_type: Optional[str] = None
|
|
173
|
+
try:
|
|
174
|
+
try:
|
|
175
|
+
import aiohttp
|
|
176
|
+
timeout = aiohttp.ClientTimeout(total=timeout_sec)
|
|
177
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
178
|
+
async with session.get(url) as response:
|
|
179
|
+
response.raise_for_status()
|
|
180
|
+
raw = await response.read()
|
|
181
|
+
content_type = response.headers.get('Content-Type') or None
|
|
182
|
+
except ImportError:
|
|
183
|
+
def _fetch() -> tuple[bytes, Optional[str]]:
|
|
184
|
+
req = urllib.request.Request(url)
|
|
185
|
+
with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
|
|
186
|
+
ct = resp.headers.get('Content-Type')
|
|
187
|
+
return resp.read(), ct if ct else None
|
|
188
|
+
|
|
189
|
+
loop = asyncio.get_event_loop()
|
|
190
|
+
raw, content_type = await loop.run_in_executor(None, _fetch)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
raise FileOperationError(f"Failed to load from URL: {e}") from e
|
|
193
|
+
|
|
194
|
+
size_mb = len(raw) / (1024 * 1024)
|
|
195
|
+
if size_mb > max_size_mb:
|
|
196
|
+
raise FileOperationError(
|
|
197
|
+
f"URL response size ({size_mb:.1f}MB) exceeds max ({max_size_mb}MB)"
|
|
198
|
+
)
|
|
199
|
+
text = raw.decode('utf-8', errors='replace')
|
|
200
|
+
return text, {'source': url, 'content_type': content_type}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def _read_ftp_text(
|
|
204
|
+
url: str, timeout_sec: float, max_size_mb: float
|
|
205
|
+
) -> tuple[str, dict]:
|
|
206
|
+
"""Fetch ftp URL via urllib in executor; return (text, metadata)."""
|
|
207
|
+
def _fetch() -> bytes:
|
|
208
|
+
with urllib.request.urlopen(url, timeout=timeout_sec) as resp:
|
|
209
|
+
return resp.read()
|
|
210
|
+
|
|
211
|
+
loop = asyncio.get_event_loop()
|
|
212
|
+
try:
|
|
213
|
+
raw = await loop.run_in_executor(None, _fetch)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
raise FileOperationError(f"Failed to load from FTP: {e}") from e
|
|
216
|
+
|
|
217
|
+
size_mb = len(raw) / (1024 * 1024)
|
|
218
|
+
if size_mb > max_size_mb:
|
|
219
|
+
raise FileOperationError(
|
|
220
|
+
f"FTP response size ({size_mb:.1f}MB) exceeds max ({max_size_mb}MB)"
|
|
221
|
+
)
|
|
222
|
+
text = raw.decode('utf-8', errors='replace')
|
|
223
|
+
return text, {'source': url, 'content_type': None}
|