sdmxlib 0.35.0__tar.gz → 0.35.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/PKG-INFO +1 -1
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/pyproject.toml +1 -1
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/registry.py +48 -1
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/writers.py +25 -10
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/README.md +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/_duckdb.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/client.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/federated.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/filters.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/policy.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/providers.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/query.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/registry.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/session.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/catalog.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/data_store.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/reader.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/writer.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/metadata.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/reader.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/writer.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml/_common.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/namespaces.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/reader.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/writer.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/metadata.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/namespaces.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/reader.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/writer.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/namespaces.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/reader.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/writer.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/data_store.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/annotations.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/base.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/binding.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/category.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/code_query.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/codelist.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/collections.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/concept.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/constraint.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/convert.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/dataflow.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/dataset.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/datastructure.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/expr.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/hierarchy.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/hierarchy_query.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/in_memory_registry.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/istring.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/mapping.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/message.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadataflow.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadataset.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadatastructure.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/organisation.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/provision.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/ref.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/registry.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/representation.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/urn.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/validation.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/polars.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/py.typed +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/rest.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/sql.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/__init__.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/_kinds.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/lazy.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/readers.py +0 -0
- {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/schema.py +0 -0
|
@@ -30,7 +30,9 @@ stashes the URI; the DuckDB connection is opened by ``open()`` /
|
|
|
30
30
|
local.add(...)
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
+
import logging
|
|
33
34
|
import threading
|
|
35
|
+
import time
|
|
34
36
|
from collections.abc import Iterable, Iterator
|
|
35
37
|
from contextlib import contextmanager
|
|
36
38
|
from pathlib import Path
|
|
@@ -42,7 +44,7 @@ import duckdb
|
|
|
42
44
|
from sdmxlib._duckdb import scalar
|
|
43
45
|
from sdmxlib.model.collections import ItemList
|
|
44
46
|
from sdmxlib.model.ref import Ref
|
|
45
|
-
from sdmxlib.model.registry import InMemoryRegistry
|
|
47
|
+
from sdmxlib.model.registry import InMemoryRegistry, artefact_urn
|
|
46
48
|
from sdmxlib.model.urn import SdmxUrn
|
|
47
49
|
from sdmxlib.storage import readers, writers
|
|
48
50
|
from sdmxlib.storage._kinds import NAME_TO_CLASS as _TYPE_REGISTRY
|
|
@@ -68,6 +70,26 @@ _ITEM_TO_SCHEME: dict[str, tuple[str, str]] = {
|
|
|
68
70
|
DEFAULT_RESOLVE_CACHE_SIZE = 256
|
|
69
71
|
|
|
70
72
|
|
|
73
|
+
_logger = logging.getLogger(__name__)
|
|
74
|
+
"""Per-artefact write timings are emitted at DEBUG level.
|
|
75
|
+
|
|
76
|
+
Enable with::
|
|
77
|
+
|
|
78
|
+
logging.getLogger("sdmxlib.local.registry").setLevel(logging.DEBUG)
|
|
79
|
+
|
|
80
|
+
Each ``add()`` call emits one line::
|
|
81
|
+
|
|
82
|
+
artefact_persist class=Codelist wall=0.0431 urn=urn:sdmx:...=ESTAT:CL_FREQ(1.0)
|
|
83
|
+
|
|
84
|
+
A ``transaction()`` block emits one summary at exit::
|
|
85
|
+
|
|
86
|
+
transaction_summary count=523 wall=187.4 by_class=[Codelist:1 Hierarchy:12 MetadataSet:498 …]
|
|
87
|
+
|
|
88
|
+
Default level is WARNING, so production callers see no output and pay
|
|
89
|
+
no measurement cost (the timer is gated on ``isEnabledFor(DEBUG)``).
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
|
|
71
93
|
class _FIFOCache(dict[str, Any]):
|
|
72
94
|
"""Bounded resolver cache. FIFO eviction, lock-free reads.
|
|
73
95
|
|
|
@@ -227,6 +249,9 @@ class Registry:
|
|
|
227
249
|
self._owns_connection = True
|
|
228
250
|
self._resolve_cache: _FIFOCache = _FIFOCache(resolve_cache_size)
|
|
229
251
|
self._in_transaction: bool = False
|
|
252
|
+
# Per-transaction telemetry collected when DEBUG logging is on.
|
|
253
|
+
# Tuple is (count, total_wall_seconds) keyed by artefact class.
|
|
254
|
+
self._txn_stats: dict[str, tuple[int, float]] = {}
|
|
230
255
|
# Per-thread read cursor (#80). Each thread that reads from this
|
|
231
256
|
# Registry lazily gets its own cursor on first call; subsequent
|
|
232
257
|
# calls on the same thread reuse it. Cleared on close().
|
|
@@ -296,6 +321,7 @@ class Registry:
|
|
|
296
321
|
instance._owns_connection = False # noqa: SLF001
|
|
297
322
|
instance._resolve_cache = _FIFOCache(resolve_cache_size) # noqa: SLF001
|
|
298
323
|
instance._in_transaction = False # noqa: SLF001
|
|
324
|
+
instance._txn_stats = {} # noqa: SLF001
|
|
299
325
|
instance._thread_local = threading.local() # noqa: SLF001
|
|
300
326
|
instance._initialise_schema() # noqa: SLF001
|
|
301
327
|
return instance
|
|
@@ -428,6 +454,10 @@ class Registry:
|
|
|
428
454
|
already-open block does not emit a second BEGIN — DuckDB
|
|
429
455
|
rejects nested transactions, and silently joining the outer
|
|
430
456
|
block is what callers usually want.
|
|
457
|
+
|
|
458
|
+
When the ``sdmxlib.local.registry`` logger is set to DEBUG,
|
|
459
|
+
a one-line ``transaction_summary`` is emitted at exit with the
|
|
460
|
+
artefact-count and total-wall-time breakdown by class.
|
|
431
461
|
"""
|
|
432
462
|
if self._in_transaction:
|
|
433
463
|
yield
|
|
@@ -435,6 +465,8 @@ class Registry:
|
|
|
435
465
|
conn = self._c
|
|
436
466
|
conn.begin()
|
|
437
467
|
self._in_transaction = True
|
|
468
|
+
self._txn_stats.clear()
|
|
469
|
+
t_start = time.perf_counter() if _logger.isEnabledFor(logging.DEBUG) else 0.0
|
|
438
470
|
try:
|
|
439
471
|
yield
|
|
440
472
|
conn.commit()
|
|
@@ -444,6 +476,12 @@ class Registry:
|
|
|
444
476
|
finally:
|
|
445
477
|
self._in_transaction = False
|
|
446
478
|
self._resolve_cache.clear()
|
|
479
|
+
if _logger.isEnabledFor(logging.DEBUG) and self._txn_stats:
|
|
480
|
+
total = time.perf_counter() - t_start
|
|
481
|
+
count = sum(n for n, _ in self._txn_stats.values())
|
|
482
|
+
by_class = " ".join(f"{cls}:{n}({wall:.2f}s)" for cls, (n, wall) in sorted(self._txn_stats.items()))
|
|
483
|
+
_logger.debug("transaction_summary count=%d wall=%.2f by_class=[%s]", count, total, by_class)
|
|
484
|
+
self._txn_stats.clear()
|
|
447
485
|
|
|
448
486
|
def add(self, artefact: Any, *, source_registry: str | None = None) -> None:
|
|
449
487
|
"""Add or replace an artefact in the store.
|
|
@@ -462,7 +500,16 @@ class Registry:
|
|
|
462
500
|
``docs/concurrency.md``.
|
|
463
501
|
"""
|
|
464
502
|
tag = source_registry if source_registry is not None else self._source_registry
|
|
503
|
+
timed = _logger.isEnabledFor(logging.DEBUG)
|
|
504
|
+
t0 = time.perf_counter() if timed else 0.0
|
|
465
505
|
writers.put_artefact(self._c, artefact, source_registry=tag)
|
|
506
|
+
if timed:
|
|
507
|
+
wall = time.perf_counter() - t0
|
|
508
|
+
cls = type(artefact).__name__
|
|
509
|
+
_logger.debug("artefact_persist class=%s wall=%.4f urn=%s", cls, wall, artefact_urn(artefact))
|
|
510
|
+
if self._in_transaction:
|
|
511
|
+
n, total = self._txn_stats.get(cls, (0, 0.0))
|
|
512
|
+
self._txn_stats[cls] = (n + 1, total + wall)
|
|
466
513
|
# Inside an open ``transaction()`` block the cache is cleared
|
|
467
514
|
# once at exit; per-call clearing would just churn it.
|
|
468
515
|
if not self._in_transaction:
|
|
@@ -294,16 +294,31 @@ def _put_refs(
|
|
|
294
294
|
"""
|
|
295
295
|
if not refs:
|
|
296
296
|
return
|
|
297
|
-
#
|
|
298
|
-
#
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
297
|
+
# Arrow columnar bulk insert. The 0.35.0 executemany rewrite assumed
|
|
298
|
+
# DuckDB would amortise the ON CONFLICT PK lookup across the batch;
|
|
299
|
+
# the v0.35.1 instrumentation showed it doesn't — per-row cost on a
|
|
300
|
+
# 295k-edge hierarchy was 11.5 ms (issue #146 / #140). The Arrow
|
|
301
|
+
# path inserts the whole batch in one SELECT and never pays the
|
|
302
|
+
# per-row index lookup.
|
|
303
|
+
#
|
|
304
|
+
# ON CONFLICT is dropped: within one ``_put_refs`` call, source_urn
|
|
305
|
+
# is constant (one artefact's outgoing edges) and
|
|
306
|
+
# ``_delete_artefact_cascade`` has already cleared this artefact's
|
|
307
|
+
# prior rows, so no PK collision is possible across calls. Intra-
|
|
308
|
+
# call duplicates (a DSD referencing the same Codelist via two
|
|
309
|
+
# dimensions) are dedup'd here in Python, keeping the first ordinal
|
|
310
|
+
# seen — same set-semantics behaviour the ON CONFLICT clause used to
|
|
311
|
+
# give. The ordinal on this table is only a positional hint; the
|
|
312
|
+
# canonical per-dimension position lives in ``dsd_component``.
|
|
313
|
+
seen: dict[tuple[str, str, str], int | None] = {}
|
|
314
|
+
for src, tgt, role, ordinal in refs:
|
|
315
|
+
seen.setdefault((src, tgt, role), ordinal)
|
|
316
|
+
rows: list[list[Any]] = [[src, tgt, role, ordinal] for (src, tgt, role), ordinal in seen.items()]
|
|
317
|
+
_execute_bulk_insert(
|
|
318
|
+
conn,
|
|
319
|
+
"artefact_ref",
|
|
320
|
+
["source_urn", "target_urn", "role", "ordinal"],
|
|
321
|
+
rows,
|
|
307
322
|
)
|
|
308
323
|
|
|
309
324
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|