sdmxlib 0.35.0__tar.gz → 0.35.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/PKG-INFO +1 -1
  2. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/pyproject.toml +1 -1
  3. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/registry.py +48 -1
  4. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/writers.py +25 -10
  5. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/README.md +0 -0
  6. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/__init__.py +0 -0
  7. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/_duckdb.py +0 -0
  8. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/__init__.py +0 -0
  9. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/client.py +0 -0
  10. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/federated.py +0 -0
  11. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/filters.py +0 -0
  12. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/policy.py +0 -0
  13. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/providers.py +0 -0
  14. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/query.py +0 -0
  15. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/registry.py +0 -0
  16. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/api/session.py +0 -0
  17. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/catalog.py +0 -0
  18. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/data_store.py +0 -0
  19. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/__init__.py +0 -0
  20. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/__init__.py +0 -0
  21. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/reader.py +0 -0
  22. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_csv/writer.py +0 -0
  23. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/__init__.py +0 -0
  24. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/metadata.py +0 -0
  25. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/reader.py +0 -0
  26. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_json/writer.py +0 -0
  27. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml/__init__.py +0 -0
  28. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml/_common.py +0 -0
  29. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/__init__.py +0 -0
  30. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/namespaces.py +0 -0
  31. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/reader.py +0 -0
  32. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml21/writer.py +0 -0
  33. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/__init__.py +0 -0
  34. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/metadata.py +0 -0
  35. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/namespaces.py +0 -0
  36. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/reader.py +0 -0
  37. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml30/writer.py +0 -0
  38. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/__init__.py +0 -0
  39. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/namespaces.py +0 -0
  40. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/reader.py +0 -0
  41. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/formats/sdmx_ml31/writer.py +0 -0
  42. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/__init__.py +0 -0
  43. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/local/data_store.py +0 -0
  44. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/__init__.py +0 -0
  45. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/annotations.py +0 -0
  46. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/base.py +0 -0
  47. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/binding.py +0 -0
  48. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/category.py +0 -0
  49. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/code_query.py +0 -0
  50. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/codelist.py +0 -0
  51. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/collections.py +0 -0
  52. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/concept.py +0 -0
  53. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/constraint.py +0 -0
  54. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/convert.py +0 -0
  55. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/dataflow.py +0 -0
  56. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/dataset.py +0 -0
  57. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/datastructure.py +0 -0
  58. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/expr.py +0 -0
  59. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/hierarchy.py +0 -0
  60. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/hierarchy_query.py +0 -0
  61. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/in_memory_registry.py +0 -0
  62. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/istring.py +0 -0
  63. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/mapping.py +0 -0
  64. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/message.py +0 -0
  65. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadataflow.py +0 -0
  66. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadataset.py +0 -0
  67. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/metadatastructure.py +0 -0
  68. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/organisation.py +0 -0
  69. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/provision.py +0 -0
  70. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/ref.py +0 -0
  71. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/registry.py +0 -0
  72. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/representation.py +0 -0
  73. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/urn.py +0 -0
  74. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/model/validation.py +0 -0
  75. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/polars.py +0 -0
  76. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/py.typed +0 -0
  77. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/rest.py +0 -0
  78. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/sql.py +0 -0
  79. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/__init__.py +0 -0
  80. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/_kinds.py +0 -0
  81. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/lazy.py +0 -0
  82. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/readers.py +0 -0
  83. {sdmxlib-0.35.0 → sdmxlib-0.35.2}/src/sdmxlib/storage/schema.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sdmxlib
3
- Version: 0.35.0
3
+ Version: 0.35.2
4
4
  Summary: SDMX structural metadata library for Python
5
5
  Keywords: sdmx,statistics,metadata,datastructure
6
6
  Author: gabrielgellner
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sdmxlib"
3
- version = "0.35.0"
3
+ version = "0.35.2"
4
4
  description = "SDMX structural metadata library for Python"
5
5
  readme = "README.md"
6
6
  license = { text = "Apache-2.0" }
@@ -30,7 +30,9 @@ stashes the URI; the DuckDB connection is opened by ``open()`` /
30
30
  local.add(...)
31
31
  """
32
32
 
33
+ import logging
33
34
  import threading
35
+ import time
34
36
  from collections.abc import Iterable, Iterator
35
37
  from contextlib import contextmanager
36
38
  from pathlib import Path
@@ -42,7 +44,7 @@ import duckdb
42
44
  from sdmxlib._duckdb import scalar
43
45
  from sdmxlib.model.collections import ItemList
44
46
  from sdmxlib.model.ref import Ref
45
- from sdmxlib.model.registry import InMemoryRegistry
47
+ from sdmxlib.model.registry import InMemoryRegistry, artefact_urn
46
48
  from sdmxlib.model.urn import SdmxUrn
47
49
  from sdmxlib.storage import readers, writers
48
50
  from sdmxlib.storage._kinds import NAME_TO_CLASS as _TYPE_REGISTRY
@@ -68,6 +70,26 @@ _ITEM_TO_SCHEME: dict[str, tuple[str, str]] = {
68
70
  DEFAULT_RESOLVE_CACHE_SIZE = 256
69
71
 
70
72
 
73
+ _logger = logging.getLogger(__name__)
74
+ """Per-artefact write timings are emitted at DEBUG level.
75
+
76
+ Enable with::
77
+
78
+ logging.getLogger("sdmxlib.local.registry").setLevel(logging.DEBUG)
79
+
80
+ Each ``add()`` call emits one line::
81
+
82
+ artefact_persist class=Codelist wall=0.0431 urn=urn:sdmx:...=ESTAT:CL_FREQ(1.0)
83
+
84
+ A ``transaction()`` block emits one summary at exit::
85
+
86
+ transaction_summary count=523 wall=187.4 by_class=[Codelist:1 Hierarchy:12 MetadataSet:498 …]
87
+
88
+ Default level is WARNING, so production callers see no output and pay
89
+ no measurement cost (the timer is gated on ``isEnabledFor(DEBUG)``).
90
+ """
91
+
92
+
71
93
  class _FIFOCache(dict[str, Any]):
72
94
  """Bounded resolver cache. FIFO eviction, lock-free reads.
73
95
 
@@ -227,6 +249,9 @@ class Registry:
227
249
  self._owns_connection = True
228
250
  self._resolve_cache: _FIFOCache = _FIFOCache(resolve_cache_size)
229
251
  self._in_transaction: bool = False
252
+ # Per-transaction telemetry collected when DEBUG logging is on.
253
+ # Tuple is (count, total_wall_seconds) keyed by artefact class.
254
+ self._txn_stats: dict[str, tuple[int, float]] = {}
230
255
  # Per-thread read cursor (#80). Each thread that reads from this
231
256
  # Registry lazily gets its own cursor on first call; subsequent
232
257
  # calls on the same thread reuse it. Cleared on close().
@@ -296,6 +321,7 @@ class Registry:
296
321
  instance._owns_connection = False # noqa: SLF001
297
322
  instance._resolve_cache = _FIFOCache(resolve_cache_size) # noqa: SLF001
298
323
  instance._in_transaction = False # noqa: SLF001
324
+ instance._txn_stats = {} # noqa: SLF001
299
325
  instance._thread_local = threading.local() # noqa: SLF001
300
326
  instance._initialise_schema() # noqa: SLF001
301
327
  return instance
@@ -428,6 +454,10 @@ class Registry:
428
454
  already-open block does not emit a second BEGIN — DuckDB
429
455
  rejects nested transactions, and silently joining the outer
430
456
  block is what callers usually want.
457
+
458
+ When the ``sdmxlib.local.registry`` logger is set to DEBUG,
459
+ a one-line ``transaction_summary`` is emitted at exit with the
460
+ artefact-count and total-wall-time breakdown by class.
431
461
  """
432
462
  if self._in_transaction:
433
463
  yield
@@ -435,6 +465,8 @@ class Registry:
435
465
  conn = self._c
436
466
  conn.begin()
437
467
  self._in_transaction = True
468
+ self._txn_stats.clear()
469
+ t_start = time.perf_counter() if _logger.isEnabledFor(logging.DEBUG) else 0.0
438
470
  try:
439
471
  yield
440
472
  conn.commit()
@@ -444,6 +476,12 @@ class Registry:
444
476
  finally:
445
477
  self._in_transaction = False
446
478
  self._resolve_cache.clear()
479
+ if _logger.isEnabledFor(logging.DEBUG) and self._txn_stats:
480
+ total = time.perf_counter() - t_start
481
+ count = sum(n for n, _ in self._txn_stats.values())
482
+ by_class = " ".join(f"{cls}:{n}({wall:.2f}s)" for cls, (n, wall) in sorted(self._txn_stats.items()))
483
+ _logger.debug("transaction_summary count=%d wall=%.2f by_class=[%s]", count, total, by_class)
484
+ self._txn_stats.clear()
447
485
 
448
486
  def add(self, artefact: Any, *, source_registry: str | None = None) -> None:
449
487
  """Add or replace an artefact in the store.
@@ -462,7 +500,16 @@ class Registry:
462
500
  ``docs/concurrency.md``.
463
501
  """
464
502
  tag = source_registry if source_registry is not None else self._source_registry
503
+ timed = _logger.isEnabledFor(logging.DEBUG)
504
+ t0 = time.perf_counter() if timed else 0.0
465
505
  writers.put_artefact(self._c, artefact, source_registry=tag)
506
+ if timed:
507
+ wall = time.perf_counter() - t0
508
+ cls = type(artefact).__name__
509
+ _logger.debug("artefact_persist class=%s wall=%.4f urn=%s", cls, wall, artefact_urn(artefact))
510
+ if self._in_transaction:
511
+ n, total = self._txn_stats.get(cls, (0, 0.0))
512
+ self._txn_stats[cls] = (n + 1, total + wall)
466
513
  # Inside an open ``transaction()`` block the cache is cleared
467
514
  # once at exit; per-call clearing would just churn it.
468
515
  if not self._in_transaction:
@@ -294,16 +294,31 @@ def _put_refs(
294
294
  """
295
295
  if not refs:
296
296
  return
297
- # executemany lets DuckDB amortize the PK index check across the
298
- # batch collapses ~2M individual INSERTs on StatCan-shape bundles
299
- # (issue #139) into one statement bind/execute cycle.
300
- conn.executemany(
301
- """
302
- INSERT INTO artefact_ref (source_urn, target_urn, role, ordinal)
303
- VALUES (?, ?, ?, ?)
304
- ON CONFLICT (source_urn, target_urn, role) DO NOTHING
305
- """,
306
- refs,
297
+ # Arrow columnar bulk insert. The 0.35.0 executemany rewrite assumed
298
+ # DuckDB would amortise the ON CONFLICT PK lookup across the batch;
299
+ # the v0.35.1 instrumentation showed it doesn't — per-row cost on a
300
+ # 295k-edge hierarchy was 11.5 ms (issue #146 / #140). The Arrow
301
+ # path inserts the whole batch in one SELECT and never pays the
302
+ # per-row index lookup.
303
+ #
304
+ # ON CONFLICT is dropped: within one ``_put_refs`` call, source_urn
305
+ # is constant (one artefact's outgoing edges) and
306
+ # ``_delete_artefact_cascade`` has already cleared this artefact's
307
+ # prior rows, so no PK collision is possible across calls. Intra-
308
+ # call duplicates (a DSD referencing the same Codelist via two
309
+ # dimensions) are dedup'd here in Python, keeping the first ordinal
310
+ # seen — same set-semantics behaviour the ON CONFLICT clause used to
311
+ # give. The ordinal on this table is only a positional hint; the
312
+ # canonical per-dimension position lives in ``dsd_component``.
313
+ seen: dict[tuple[str, str, str], int | None] = {}
314
+ for src, tgt, role, ordinal in refs:
315
+ seen.setdefault((src, tgt, role), ordinal)
316
+ rows: list[list[Any]] = [[src, tgt, role, ordinal] for (src, tgt, role), ordinal in seen.items()]
317
+ _execute_bulk_insert(
318
+ conn,
319
+ "artefact_ref",
320
+ ["source_urn", "target_urn", "role", "ordinal"],
321
+ rows,
307
322
  )
308
323
 
309
324
 
File without changes
File without changes
File without changes
File without changes
File without changes