atdata 0.2.3b1__py3-none-any.whl → 0.3.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. atdata/.gitignore +1 -0
  2. atdata/__init__.py +39 -0
  3. atdata/_cid.py +0 -21
  4. atdata/_exceptions.py +168 -0
  5. atdata/_helpers.py +41 -15
  6. atdata/_hf_api.py +95 -11
  7. atdata/_logging.py +70 -0
  8. atdata/_protocols.py +77 -238
  9. atdata/_schema_codec.py +7 -6
  10. atdata/_stub_manager.py +5 -25
  11. atdata/_type_utils.py +28 -2
  12. atdata/atmosphere/__init__.py +31 -20
  13. atdata/atmosphere/_types.py +4 -4
  14. atdata/atmosphere/client.py +64 -12
  15. atdata/atmosphere/lens.py +11 -12
  16. atdata/atmosphere/records.py +12 -12
  17. atdata/atmosphere/schema.py +16 -18
  18. atdata/atmosphere/store.py +6 -7
  19. atdata/cli/__init__.py +161 -175
  20. atdata/cli/diagnose.py +2 -2
  21. atdata/cli/{local.py → infra.py} +11 -11
  22. atdata/cli/inspect.py +69 -0
  23. atdata/cli/preview.py +63 -0
  24. atdata/cli/schema.py +109 -0
  25. atdata/dataset.py +583 -328
  26. atdata/index/__init__.py +54 -0
  27. atdata/index/_entry.py +157 -0
  28. atdata/index/_index.py +1198 -0
  29. atdata/index/_schema.py +380 -0
  30. atdata/lens.py +9 -2
  31. atdata/lexicons/__init__.py +121 -0
  32. atdata/lexicons/ac.foundation.dataset.arrayFormat.json +16 -0
  33. atdata/lexicons/ac.foundation.dataset.getLatestSchema.json +78 -0
  34. atdata/lexicons/ac.foundation.dataset.lens.json +99 -0
  35. atdata/lexicons/ac.foundation.dataset.record.json +96 -0
  36. atdata/lexicons/ac.foundation.dataset.schema.json +107 -0
  37. atdata/lexicons/ac.foundation.dataset.schemaType.json +16 -0
  38. atdata/lexicons/ac.foundation.dataset.storageBlobs.json +24 -0
  39. atdata/lexicons/ac.foundation.dataset.storageExternal.json +25 -0
  40. atdata/lexicons/ndarray_shim.json +16 -0
  41. atdata/local/__init__.py +70 -0
  42. atdata/local/_repo_legacy.py +218 -0
  43. atdata/manifest/__init__.py +28 -0
  44. atdata/manifest/_aggregates.py +156 -0
  45. atdata/manifest/_builder.py +163 -0
  46. atdata/manifest/_fields.py +154 -0
  47. atdata/manifest/_manifest.py +146 -0
  48. atdata/manifest/_query.py +150 -0
  49. atdata/manifest/_writer.py +74 -0
  50. atdata/promote.py +18 -14
  51. atdata/providers/__init__.py +25 -0
  52. atdata/providers/_base.py +140 -0
  53. atdata/providers/_factory.py +69 -0
  54. atdata/providers/_postgres.py +214 -0
  55. atdata/providers/_redis.py +171 -0
  56. atdata/providers/_sqlite.py +191 -0
  57. atdata/repository.py +323 -0
  58. atdata/stores/__init__.py +23 -0
  59. atdata/stores/_disk.py +123 -0
  60. atdata/stores/_s3.py +349 -0
  61. atdata/testing.py +341 -0
  62. {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/METADATA +5 -2
  63. atdata-0.3.1b1.dist-info/RECORD +67 -0
  64. atdata/local.py +0 -1720
  65. atdata-0.2.3b1.dist-info/RECORD +0 -28
  66. {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/WHEEL +0 -0
  67. {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/entry_points.txt +0 -0
  68. {atdata-0.2.3b1.dist-info → atdata-0.3.1b1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,214 @@
1
+ """PostgreSQL-backed index provider.
2
+
3
+ Stores dataset entries and schema records in PostgreSQL tables.
4
+ Requires the ``psycopg`` (v3) package, which is an optional dependency::
5
+
6
+ pip install "atdata[postgres]"
7
+
8
+ The provider lazily imports ``psycopg`` so that ``import atdata`` never
9
+ fails when the package is absent.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Iterator
15
+
16
+ import msgpack
17
+
18
+ from ._base import IndexProvider
19
+ from .._type_utils import parse_semver
20
+
21
+ _CREATE_TABLES = """\
22
+ CREATE TABLE IF NOT EXISTS dataset_entries (
23
+ cid TEXT PRIMARY KEY,
24
+ name TEXT NOT NULL,
25
+ schema_ref TEXT NOT NULL,
26
+ data_urls BYTEA NOT NULL,
27
+ metadata BYTEA,
28
+ legacy_uuid TEXT,
29
+ created_at TIMESTAMPTZ DEFAULT now()
30
+ );
31
+
32
+ CREATE INDEX IF NOT EXISTS idx_entries_name
33
+ ON dataset_entries(name);
34
+
35
+ CREATE TABLE IF NOT EXISTS schemas (
36
+ name TEXT NOT NULL,
37
+ version TEXT NOT NULL,
38
+ schema_json TEXT NOT NULL,
39
+ created_at TIMESTAMPTZ DEFAULT now(),
40
+ PRIMARY KEY (name, version)
41
+ );
42
+ """
43
+
44
+
45
+ class PostgresProvider(IndexProvider):
46
+ """Index provider backed by PostgreSQL.
47
+
48
+ Args:
49
+ dsn: PostgreSQL connection string, e.g.
50
+ ``"postgresql://user:pass@host:5432/dbname"``.
51
+
52
+ Raises:
53
+ ImportError: If ``psycopg`` is not installed.
54
+
55
+ Examples:
56
+ >>> provider = PostgresProvider(dsn="postgresql://localhost/atdata")
57
+ >>> provider.store_schema("MySample", "1.0.0", '{"name":"MySample"}')
58
+ """
59
+
60
+ def __init__(self, dsn: str) -> None:
61
+ try:
62
+ import psycopg
63
+ except ImportError as exc:
64
+ raise ImportError(
65
+ "The postgres provider requires the 'psycopg' package. "
66
+ "Install it with: pip install 'atdata[postgres]'"
67
+ ) from exc
68
+
69
+ self._conn = psycopg.connect(dsn, autocommit=False)
70
+ with self._conn.cursor() as cur:
71
+ cur.execute(_CREATE_TABLES)
72
+ self._conn.commit()
73
+
74
+ # ------------------------------------------------------------------
75
+ # Dataset entry operations
76
+ # ------------------------------------------------------------------
77
+
78
+ def store_entry(self, entry: "LocalDatasetEntry") -> None: # noqa: F821
79
+ with self._conn.cursor() as cur:
80
+ cur.execute(
81
+ """INSERT INTO dataset_entries
82
+ (cid, name, schema_ref, data_urls, metadata, legacy_uuid)
83
+ VALUES (%s, %s, %s, %s, %s, %s)
84
+ ON CONFLICT (cid) DO UPDATE SET
85
+ name = EXCLUDED.name,
86
+ schema_ref = EXCLUDED.schema_ref,
87
+ data_urls = EXCLUDED.data_urls,
88
+ metadata = EXCLUDED.metadata,
89
+ legacy_uuid = EXCLUDED.legacy_uuid""",
90
+ (
91
+ entry.cid,
92
+ entry.name,
93
+ entry.schema_ref,
94
+ msgpack.packb(entry.data_urls),
95
+ msgpack.packb(entry.metadata)
96
+ if entry.metadata is not None
97
+ else None,
98
+ entry._legacy_uuid,
99
+ ),
100
+ )
101
+ self._conn.commit()
102
+
103
+ def get_entry_by_cid(self, cid: str) -> "LocalDatasetEntry": # noqa: F821
104
+ with self._conn.cursor() as cur:
105
+ cur.execute(
106
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
107
+ "FROM dataset_entries WHERE cid = %s",
108
+ (cid,),
109
+ )
110
+ row = cur.fetchone()
111
+ if row is None:
112
+ raise KeyError(f"LocalDatasetEntry not found: {cid}")
113
+ return _row_to_entry(row)
114
+
115
+ def get_entry_by_name(self, name: str) -> "LocalDatasetEntry": # noqa: F821
116
+ with self._conn.cursor() as cur:
117
+ cur.execute(
118
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
119
+ "FROM dataset_entries WHERE name = %s LIMIT 1",
120
+ (name,),
121
+ )
122
+ row = cur.fetchone()
123
+ if row is None:
124
+ raise KeyError(f"No entry with name: {name}")
125
+ return _row_to_entry(row)
126
+
127
+ def iter_entries(self) -> Iterator["LocalDatasetEntry"]: # noqa: F821
128
+ with self._conn.cursor() as cur:
129
+ cur.execute(
130
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
131
+ "FROM dataset_entries"
132
+ )
133
+ for row in cur:
134
+ yield _row_to_entry(row)
135
+
136
+ # ------------------------------------------------------------------
137
+ # Schema operations
138
+ # ------------------------------------------------------------------
139
+
140
+ def store_schema(self, name: str, version: str, schema_json: str) -> None:
141
+ with self._conn.cursor() as cur:
142
+ cur.execute(
143
+ """INSERT INTO schemas (name, version, schema_json)
144
+ VALUES (%s, %s, %s)
145
+ ON CONFLICT (name, version) DO UPDATE SET
146
+ schema_json = EXCLUDED.schema_json""",
147
+ (name, version, schema_json),
148
+ )
149
+ self._conn.commit()
150
+
151
+ def get_schema_json(self, name: str, version: str) -> str | None:
152
+ with self._conn.cursor() as cur:
153
+ cur.execute(
154
+ "SELECT schema_json FROM schemas WHERE name = %s AND version = %s",
155
+ (name, version),
156
+ )
157
+ row = cur.fetchone()
158
+ if row is None:
159
+ return None
160
+ return row[0]
161
+
162
+ def iter_schemas(self) -> Iterator[tuple[str, str, str]]:
163
+ with self._conn.cursor() as cur:
164
+ cur.execute("SELECT name, version, schema_json FROM schemas")
165
+ for row in cur:
166
+ yield row[0], row[1], row[2]
167
+
168
+ def find_latest_version(self, name: str) -> str | None:
169
+ with self._conn.cursor() as cur:
170
+ cur.execute(
171
+ "SELECT version FROM schemas WHERE name = %s",
172
+ (name,),
173
+ )
174
+ latest: tuple[int, int, int] | None = None
175
+ latest_str: str | None = None
176
+ for (version_str,) in cur:
177
+ try:
178
+ v = parse_semver(version_str)
179
+ if latest is None or v > latest:
180
+ latest = v
181
+ latest_str = version_str
182
+ except ValueError:
183
+ continue
184
+ return latest_str
185
+
186
+ # ------------------------------------------------------------------
187
+ # Lifecycle
188
+ # ------------------------------------------------------------------
189
+
190
+ def close(self) -> None:
191
+ """Close the PostgreSQL connection."""
192
+ self._conn.close()
193
+
194
+
195
+ # ------------------------------------------------------------------
196
+ # Helpers
197
+ # ------------------------------------------------------------------
198
+
199
+
200
+ def _row_to_entry(row: tuple) -> "LocalDatasetEntry": # noqa: F821
201
+ """Convert a database row to a ``LocalDatasetEntry``."""
202
+ from ..local import LocalDatasetEntry
203
+
204
+ cid, name, schema_ref, data_urls_blob, metadata_blob, legacy_uuid = row
205
+ return LocalDatasetEntry(
206
+ name=name,
207
+ schema_ref=schema_ref,
208
+ data_urls=msgpack.unpackb(bytes(data_urls_blob)),
209
+ metadata=msgpack.unpackb(bytes(metadata_blob))
210
+ if metadata_blob is not None
211
+ else None,
212
+ _cid=cid,
213
+ _legacy_uuid=legacy_uuid,
214
+ )
@@ -0,0 +1,171 @@
1
+ """Redis-backed index provider.
2
+
3
+ This module extracts the Redis persistence logic that was previously
4
+ inlined in ``atdata.local.Index`` and ``LocalDatasetEntry`` into a
5
+ standalone ``IndexProvider`` implementation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Iterator
11
+
12
+ import msgpack
13
+ from redis import Redis
14
+
15
+ from ._base import IndexProvider
16
+ from .._type_utils import parse_semver
17
+
18
+ # Redis key prefixes — kept in sync with local.py constants
19
+ _KEY_DATASET_ENTRY = "LocalDatasetEntry"
20
+ _KEY_SCHEMA = "LocalSchema"
21
+
22
+
23
+ class RedisProvider(IndexProvider):
24
+ """Index provider backed by a Redis connection.
25
+
26
+ This reproduces the exact storage layout used by the original
27
+ ``Index`` class so that existing Redis data is fully compatible.
28
+
29
+ Args:
30
+ redis: An active ``redis.Redis`` connection.
31
+ """
32
+
33
+ def __init__(self, redis: Redis) -> None:
34
+ self._redis = redis
35
+
36
+ @property
37
+ def redis(self) -> Redis:
38
+ """The underlying Redis connection (for advanced use / migration)."""
39
+ return self._redis
40
+
41
+ # ------------------------------------------------------------------
42
+ # Dataset entry operations
43
+ # ------------------------------------------------------------------
44
+
45
+ def store_entry(self, entry: "LocalDatasetEntry") -> None: # noqa: F821
46
+ save_key = f"{_KEY_DATASET_ENTRY}:{entry.cid}"
47
+ data: dict[str, str | bytes] = {
48
+ "name": entry.name,
49
+ "schema_ref": entry.schema_ref,
50
+ "data_urls": msgpack.packb(entry.data_urls),
51
+ "cid": entry.cid,
52
+ }
53
+ if entry.metadata is not None:
54
+ data["metadata"] = msgpack.packb(entry.metadata)
55
+ if entry._legacy_uuid is not None:
56
+ data["legacy_uuid"] = entry._legacy_uuid
57
+
58
+ self._redis.hset(save_key, mapping=data) # type: ignore[arg-type]
59
+
60
+ def get_entry_by_cid(self, cid: str) -> "LocalDatasetEntry": # noqa: F821
61
+ save_key = f"{_KEY_DATASET_ENTRY}:{cid}"
62
+ raw_data = self._redis.hgetall(save_key)
63
+ if not raw_data:
64
+ raise KeyError(f"{_KEY_DATASET_ENTRY} not found: {cid}")
65
+
66
+ return _entry_from_redis_hash(raw_data)
67
+
68
+ def get_entry_by_name(self, name: str) -> "LocalDatasetEntry": # noqa: F821
69
+ for entry in self.iter_entries():
70
+ if entry.name == name:
71
+ return entry
72
+ raise KeyError(f"No entry with name: {name}")
73
+
74
+ def iter_entries(self) -> Iterator["LocalDatasetEntry"]: # noqa: F821
75
+ prefix = f"{_KEY_DATASET_ENTRY}:"
76
+ for key in self._redis.scan_iter(match=f"{prefix}*"):
77
+ key_str = key.decode("utf-8") if isinstance(key, bytes) else key
78
+ cid = key_str[len(prefix) :]
79
+ yield self.get_entry_by_cid(cid)
80
+
81
+ # ------------------------------------------------------------------
82
+ # Schema operations
83
+ # ------------------------------------------------------------------
84
+
85
+ def store_schema(self, name: str, version: str, schema_json: str) -> None:
86
+ redis_key = f"{_KEY_SCHEMA}:{name}@{version}"
87
+ self._redis.set(redis_key, schema_json)
88
+
89
+ def get_schema_json(self, name: str, version: str) -> str | None:
90
+ redis_key = f"{_KEY_SCHEMA}:{name}@{version}"
91
+ value = self._redis.get(redis_key)
92
+ if value is None:
93
+ return None
94
+ if isinstance(value, bytes):
95
+ return value.decode("utf-8")
96
+ return value # type: ignore[return-value]
97
+
98
+ def iter_schemas(self) -> Iterator[tuple[str, str, str]]:
99
+ prefix = f"{_KEY_SCHEMA}:"
100
+ for key in self._redis.scan_iter(match=f"{prefix}*"):
101
+ key_str = key.decode("utf-8") if isinstance(key, bytes) else key
102
+ schema_id = key_str[len(prefix) :]
103
+
104
+ if "@" not in schema_id:
105
+ continue
106
+
107
+ raw_name, version = schema_id.rsplit("@", 1)
108
+ # Handle legacy format: module.Class -> Class
109
+ if "." in raw_name:
110
+ raw_name = raw_name.rsplit(".", 1)[1]
111
+
112
+ value = self._redis.get(key)
113
+ if value is None:
114
+ continue
115
+ schema_json = value.decode("utf-8") if isinstance(value, bytes) else value
116
+ yield raw_name, version, schema_json # type: ignore[misc]
117
+
118
+ def find_latest_version(self, name: str) -> str | None:
119
+ latest: tuple[int, int, int] | None = None
120
+ latest_str: str | None = None
121
+
122
+ for schema_name, version, _ in self.iter_schemas():
123
+ if schema_name != name:
124
+ continue
125
+ try:
126
+ v = parse_semver(version)
127
+ if latest is None or v > latest:
128
+ latest = v
129
+ latest_str = version
130
+ except ValueError:
131
+ continue
132
+
133
+ return latest_str
134
+
135
+ # ------------------------------------------------------------------
136
+ # Lifecycle
137
+ # ------------------------------------------------------------------
138
+
139
+ def close(self) -> None:
140
+ """Close the Redis connection."""
141
+ self._redis.close()
142
+
143
+
144
+ # ------------------------------------------------------------------
145
+ # Helpers
146
+ # ------------------------------------------------------------------
147
+
148
+
149
+ def _entry_from_redis_hash(raw_data: dict) -> "LocalDatasetEntry": # noqa: F821
150
+ """Reconstruct a ``LocalDatasetEntry`` from a Redis hash mapping."""
151
+ from ..local import LocalDatasetEntry
152
+ from typing import cast
153
+
154
+ raw = cast(dict[bytes, bytes], raw_data)
155
+ name = raw[b"name"].decode("utf-8")
156
+ schema_ref = raw[b"schema_ref"].decode("utf-8")
157
+ cid_value = raw.get(b"cid", b"").decode("utf-8") or None
158
+ legacy_uuid = raw.get(b"legacy_uuid", b"").decode("utf-8") or None
159
+ data_urls = msgpack.unpackb(raw[b"data_urls"])
160
+ metadata = None
161
+ if b"metadata" in raw:
162
+ metadata = msgpack.unpackb(raw[b"metadata"])
163
+
164
+ return LocalDatasetEntry(
165
+ name=name,
166
+ schema_ref=schema_ref,
167
+ data_urls=data_urls,
168
+ metadata=metadata,
169
+ _cid=cid_value,
170
+ _legacy_uuid=legacy_uuid,
171
+ )
@@ -0,0 +1,191 @@
1
+ """SQLite-backed index provider.
2
+
3
+ Stores dataset entries and schema records in a local SQLite database file.
4
+ Uses WAL journal mode for concurrent read access and ``INSERT OR REPLACE``
5
+ for upsert semantics.
6
+
7
+ No external dependencies — uses Python's built-in ``sqlite3`` module.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import sqlite3
13
+ from pathlib import Path
14
+ from typing import Iterator
15
+
16
+ import msgpack
17
+
18
+ from ._base import IndexProvider
19
+ from .._type_utils import parse_semver
20
+
21
+ _CREATE_TABLES = """\
22
+ CREATE TABLE IF NOT EXISTS dataset_entries (
23
+ cid TEXT PRIMARY KEY,
24
+ name TEXT NOT NULL,
25
+ schema_ref TEXT NOT NULL,
26
+ data_urls BLOB NOT NULL,
27
+ metadata BLOB,
28
+ legacy_uuid TEXT,
29
+ created_at TEXT DEFAULT (datetime('now'))
30
+ );
31
+
32
+ CREATE INDEX IF NOT EXISTS idx_entries_name
33
+ ON dataset_entries(name);
34
+
35
+ CREATE TABLE IF NOT EXISTS schemas (
36
+ name TEXT NOT NULL,
37
+ version TEXT NOT NULL,
38
+ schema_json TEXT NOT NULL,
39
+ created_at TEXT DEFAULT (datetime('now')),
40
+ PRIMARY KEY (name, version)
41
+ );
42
+ """
43
+
44
+
45
+ class SqliteProvider(IndexProvider):
46
+ """Index provider backed by a local SQLite database.
47
+
48
+ Args:
49
+ path: Path to the database file. The parent directory is created
50
+ automatically. Defaults to ``~/.atdata/index.db``.
51
+
52
+ Examples:
53
+ >>> provider = SqliteProvider(path="/tmp/test-index.db")
54
+ >>> provider.store_schema("MySample", "1.0.0", '{"name":"MySample"}')
55
+ >>> provider.get_schema_json("MySample", "1.0.0")
56
+ '{"name":"MySample"}'
57
+ """
58
+
59
+ def __init__(self, path: str | Path | None = None) -> None:
60
+ if path is None:
61
+ path = Path.home() / ".atdata" / "index.db"
62
+ self._path = Path(path).expanduser()
63
+ self._path.parent.mkdir(parents=True, exist_ok=True)
64
+
65
+ self._conn = sqlite3.connect(str(self._path))
66
+ self._conn.execute("PRAGMA journal_mode=WAL")
67
+ self._conn.executescript(_CREATE_TABLES)
68
+ self._conn.commit()
69
+
70
+ @property
71
+ def path(self) -> Path:
72
+ """Path to the SQLite database file."""
73
+ return self._path
74
+
75
+ # ------------------------------------------------------------------
76
+ # Dataset entry operations
77
+ # ------------------------------------------------------------------
78
+
79
+ def store_entry(self, entry: "LocalDatasetEntry") -> None: # noqa: F821
80
+ self._conn.execute(
81
+ """INSERT OR REPLACE INTO dataset_entries
82
+ (cid, name, schema_ref, data_urls, metadata, legacy_uuid)
83
+ VALUES (?, ?, ?, ?, ?, ?)""",
84
+ (
85
+ entry.cid,
86
+ entry.name,
87
+ entry.schema_ref,
88
+ msgpack.packb(entry.data_urls),
89
+ msgpack.packb(entry.metadata) if entry.metadata is not None else None,
90
+ entry._legacy_uuid,
91
+ ),
92
+ )
93
+ self._conn.commit()
94
+
95
+ def get_entry_by_cid(self, cid: str) -> "LocalDatasetEntry": # noqa: F821
96
+ row = self._conn.execute(
97
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
98
+ "FROM dataset_entries WHERE cid = ?",
99
+ (cid,),
100
+ ).fetchone()
101
+ if row is None:
102
+ raise KeyError(f"LocalDatasetEntry not found: {cid}")
103
+ return _row_to_entry(row)
104
+
105
+ def get_entry_by_name(self, name: str) -> "LocalDatasetEntry": # noqa: F821
106
+ row = self._conn.execute(
107
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
108
+ "FROM dataset_entries WHERE name = ? LIMIT 1",
109
+ (name,),
110
+ ).fetchone()
111
+ if row is None:
112
+ raise KeyError(f"No entry with name: {name}")
113
+ return _row_to_entry(row)
114
+
115
+ def iter_entries(self) -> Iterator["LocalDatasetEntry"]: # noqa: F821
116
+ cursor = self._conn.execute(
117
+ "SELECT cid, name, schema_ref, data_urls, metadata, legacy_uuid "
118
+ "FROM dataset_entries"
119
+ )
120
+ for row in cursor:
121
+ yield _row_to_entry(row)
122
+
123
+ # ------------------------------------------------------------------
124
+ # Schema operations
125
+ # ------------------------------------------------------------------
126
+
127
+ def store_schema(self, name: str, version: str, schema_json: str) -> None:
128
+ self._conn.execute(
129
+ """INSERT OR REPLACE INTO schemas (name, version, schema_json)
130
+ VALUES (?, ?, ?)""",
131
+ (name, version, schema_json),
132
+ )
133
+ self._conn.commit()
134
+
135
+ def get_schema_json(self, name: str, version: str) -> str | None:
136
+ row = self._conn.execute(
137
+ "SELECT schema_json FROM schemas WHERE name = ? AND version = ?",
138
+ (name, version),
139
+ ).fetchone()
140
+ if row is None:
141
+ return None
142
+ return row[0]
143
+
144
+ def iter_schemas(self) -> Iterator[tuple[str, str, str]]:
145
+ cursor = self._conn.execute("SELECT name, version, schema_json FROM schemas")
146
+ yield from cursor
147
+
148
+ def find_latest_version(self, name: str) -> str | None:
149
+ cursor = self._conn.execute(
150
+ "SELECT version FROM schemas WHERE name = ?",
151
+ (name,),
152
+ )
153
+ latest: tuple[int, int, int] | None = None
154
+ latest_str: str | None = None
155
+ for (version_str,) in cursor:
156
+ try:
157
+ v = parse_semver(version_str)
158
+ if latest is None or v > latest:
159
+ latest = v
160
+ latest_str = version_str
161
+ except ValueError:
162
+ continue
163
+ return latest_str
164
+
165
+ # ------------------------------------------------------------------
166
+ # Lifecycle
167
+ # ------------------------------------------------------------------
168
+
169
+ def close(self) -> None:
170
+ """Close the SQLite connection."""
171
+ self._conn.close()
172
+
173
+
174
+ # ------------------------------------------------------------------
175
+ # Helpers
176
+ # ------------------------------------------------------------------
177
+
178
+
179
+ def _row_to_entry(row: tuple) -> "LocalDatasetEntry": # noqa: F821
180
+ """Convert a database row to a ``LocalDatasetEntry``."""
181
+ from ..local import LocalDatasetEntry
182
+
183
+ cid, name, schema_ref, data_urls_blob, metadata_blob, legacy_uuid = row
184
+ return LocalDatasetEntry(
185
+ name=name,
186
+ schema_ref=schema_ref,
187
+ data_urls=msgpack.unpackb(data_urls_blob),
188
+ metadata=msgpack.unpackb(metadata_blob) if metadata_blob is not None else None,
189
+ _cid=cid,
190
+ _legacy_uuid=legacy_uuid,
191
+ )