pandera-catalog 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandera_catalog/__init__.py +19 -0
- pandera_catalog/catalog.py +324 -0
- pandera_catalog/schemas.py +77 -0
- pandera_catalog/types.py +59 -0
- pandera_catalog/utils/__init__.py +3 -0
- pandera_catalog-0.2.0.dist-info/METADATA +104 -0
- pandera_catalog-0.2.0.dist-info/RECORD +9 -0
- pandera_catalog-0.2.0.dist-info/WHEEL +4 -0
- pandera_catalog-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from importlib import metadata
|
|
2
|
+
|
|
3
|
+
from .catalog import PanderaCatalog
|
|
4
|
+
from .schemas import load_schema_from_yaml
|
|
5
|
+
from .types import SchemaEntry, SchemaProjectionEntry, SchemaProjectionStep
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
__version__ = metadata.version("pandera_catalog")
|
|
9
|
+
except metadata.PackageNotFoundError:
|
|
10
|
+
__version__ = "0.0.0.dev0"
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"PanderaCatalog",
|
|
14
|
+
"load_schema_from_yaml",
|
|
15
|
+
"SchemaEntry",
|
|
16
|
+
"SchemaProjectionEntry",
|
|
17
|
+
"SchemaProjectionStep",
|
|
18
|
+
"__version__",
|
|
19
|
+
]
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Core catalog abstraction for pandera-catalog.
|
|
2
|
+
|
|
3
|
+
The :class:`PanderaCatalog` class is the primary entry-point for registering,
|
|
4
|
+
looking up, listing, and removing Pandera schema entries.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from collections.abc import Mapping
|
|
9
|
+
|
|
10
|
+
import pandera.pandas as pa
|
|
11
|
+
|
|
12
|
+
from .types import SchemaEntry, SchemaProjectionEntry, SchemaProjectionStep
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PanderaCatalog:
|
|
16
|
+
"""A registry for Pandera schemas.
|
|
17
|
+
|
|
18
|
+
Schemas are stored in memory by name. A future version will persist
|
|
19
|
+
entries to a SQLAlchemy-backed database (SQLite by default).
|
|
20
|
+
|
|
21
|
+
Examples
|
|
22
|
+
--------
|
|
23
|
+
>>> from pandera_catalog import PanderaCatalog
|
|
24
|
+
>>> import pandera.pandas as pa
|
|
25
|
+
>>> catalog = PanderaCatalog()
|
|
26
|
+
>>> schema = pa.DataFrameSchema({"value": pa.Column(float)})
|
|
27
|
+
>>> catalog.register("my_schema", schema)
|
|
28
|
+
>>> catalog.get("my_schema")
|
|
29
|
+
<Schema DataFrameSchema(columns={'value': ...}, ...)>
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
self._store: dict[str, SchemaEntry] = {}
|
|
34
|
+
self._projections: dict[str, SchemaProjectionEntry] = {}
|
|
35
|
+
|
|
36
|
+
# ------------------------------------------------------------------
|
|
37
|
+
# Mutating operations
|
|
38
|
+
# ------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
def register(
|
|
41
|
+
self,
|
|
42
|
+
name: str,
|
|
43
|
+
schema: pa.DataFrameSchema,
|
|
44
|
+
*,
|
|
45
|
+
description: str | None = None,
|
|
46
|
+
tags: list[str] | None = None,
|
|
47
|
+
overwrite: bool = False,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Register a Pandera schema under *name*.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
name:
|
|
54
|
+
Unique identifier for the schema within this catalog.
|
|
55
|
+
schema:
|
|
56
|
+
A :class:`pandera.DataFrameSchema` instance.
|
|
57
|
+
description:
|
|
58
|
+
Optional human-readable description of the schema.
|
|
59
|
+
tags:
|
|
60
|
+
Optional list of string tags for categorisation.
|
|
61
|
+
overwrite:
|
|
62
|
+
When ``True``, silently replace any existing entry with the same
|
|
63
|
+
name. When ``False`` (default), raise :class:`KeyError` if the
|
|
64
|
+
name is already registered.
|
|
65
|
+
|
|
66
|
+
Raises
|
|
67
|
+
------
|
|
68
|
+
KeyError
|
|
69
|
+
If *name* is already registered and *overwrite* is ``False``.
|
|
70
|
+
"""
|
|
71
|
+
if name in self._store and not overwrite:
|
|
72
|
+
raise KeyError(
|
|
73
|
+
f"Schema '{name}' is already registered. "
|
|
74
|
+
"Pass overwrite=True to replace it."
|
|
75
|
+
)
|
|
76
|
+
self._store[name] = SchemaEntry(
|
|
77
|
+
name=name,
|
|
78
|
+
schema=schema,
|
|
79
|
+
description=description,
|
|
80
|
+
tags=list(tags or []),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def register_projection(
|
|
84
|
+
self,
|
|
85
|
+
name: str,
|
|
86
|
+
steps: list[SchemaProjectionStep | Mapping[str, object]],
|
|
87
|
+
*,
|
|
88
|
+
description: str | None = None,
|
|
89
|
+
overwrite: bool = False,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Register a named projection from ordered step definitions.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
name:
|
|
96
|
+
Unique projection name within this catalog.
|
|
97
|
+
steps:
|
|
98
|
+
Ordered list of projection steps. Each step must include
|
|
99
|
+
``schema``, ``kind``, and ``names``.
|
|
100
|
+
description:
|
|
101
|
+
Optional human-readable description of the projection.
|
|
102
|
+
overwrite:
|
|
103
|
+
When ``True``, replace any existing projection with the same name.
|
|
104
|
+
|
|
105
|
+
Raises
|
|
106
|
+
------
|
|
107
|
+
KeyError
|
|
108
|
+
If any step schema is not registered, or if *name* already exists
|
|
109
|
+
and *overwrite* is ``False``.
|
|
110
|
+
ValueError
|
|
111
|
+
If *steps* are invalid, include duplicates, include unknown columns,
|
|
112
|
+
or include an unknown step kind.
|
|
113
|
+
NotImplementedError
|
|
114
|
+
If a ``kind: group`` step is provided.
|
|
115
|
+
"""
|
|
116
|
+
if name in self._projections and not overwrite:
|
|
117
|
+
raise KeyError(
|
|
118
|
+
f"Projection '{name}' is already registered. "
|
|
119
|
+
"Pass overwrite=True to replace it."
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
resolved_steps = self._normalise_projection_steps(steps)
|
|
123
|
+
resolved_columns = self._resolve_projection_columns(resolved_steps)
|
|
124
|
+
duplicate_columns = self._find_duplicate_columns(
|
|
125
|
+
[column for _, column in resolved_columns]
|
|
126
|
+
)
|
|
127
|
+
if duplicate_columns:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"Projection columns contain duplicates across steps: "
|
|
130
|
+
f"{duplicate_columns!r}."
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
self._projections[name] = SchemaProjectionEntry(
|
|
134
|
+
name=name,
|
|
135
|
+
steps=resolved_steps,
|
|
136
|
+
description=description,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def remove(self, name: str) -> None:
|
|
140
|
+
"""Remove the schema registered under *name*.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
name:
|
|
145
|
+
Name of the schema to remove.
|
|
146
|
+
|
|
147
|
+
Raises
|
|
148
|
+
------
|
|
149
|
+
KeyError
|
|
150
|
+
If *name* is not registered.
|
|
151
|
+
"""
|
|
152
|
+
if name not in self._store:
|
|
153
|
+
raise KeyError(f"Schema '{name}' is not registered.")
|
|
154
|
+
del self._store[name]
|
|
155
|
+
|
|
156
|
+
def remove_projection(self, name: str) -> None:
|
|
157
|
+
"""Remove the projection registered under *name*."""
|
|
158
|
+
if name not in self._projections:
|
|
159
|
+
raise KeyError(f"Projection '{name}' is not registered.")
|
|
160
|
+
del self._projections[name]
|
|
161
|
+
|
|
162
|
+
# ------------------------------------------------------------------
|
|
163
|
+
# Read operations
|
|
164
|
+
# ------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def get(self, name: str) -> pa.DataFrameSchema:
|
|
167
|
+
"""Return the schema registered under *name*.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
name:
|
|
172
|
+
Name of the schema to retrieve.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
pandera.DataFrameSchema
|
|
177
|
+
|
|
178
|
+
Raises
|
|
179
|
+
------
|
|
180
|
+
KeyError
|
|
181
|
+
If *name* is not registered.
|
|
182
|
+
"""
|
|
183
|
+
if name not in self._store:
|
|
184
|
+
raise KeyError(f"Schema '{name}' is not registered.")
|
|
185
|
+
return self._store[name].schema
|
|
186
|
+
|
|
187
|
+
def get_entry(self, name: str) -> SchemaEntry:
|
|
188
|
+
"""Return the full :class:`~pandera_catalog.types.SchemaEntry` for *name*.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
name:
|
|
193
|
+
Name of the schema entry to retrieve.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
SchemaEntry
|
|
198
|
+
|
|
199
|
+
Raises
|
|
200
|
+
------
|
|
201
|
+
KeyError
|
|
202
|
+
If *name* is not registered.
|
|
203
|
+
"""
|
|
204
|
+
if name not in self._store:
|
|
205
|
+
raise KeyError(f"Schema '{name}' is not registered.")
|
|
206
|
+
return self._store[name]
|
|
207
|
+
|
|
208
|
+
def get_projection_entry(self, name: str) -> SchemaProjectionEntry:
|
|
209
|
+
"""Return the full projection entry registered under *name*."""
|
|
210
|
+
if name not in self._projections:
|
|
211
|
+
raise KeyError(f"Projection '{name}' is not registered.")
|
|
212
|
+
return self._projections[name]
|
|
213
|
+
|
|
214
|
+
def export_projection(self, name: str) -> pa.DataFrameSchema:
|
|
215
|
+
"""Materialise and return the schema defined by projection *name*."""
|
|
216
|
+
projection = self.get_projection_entry(name)
|
|
217
|
+
resolved_columns = self._resolve_projection_columns(projection.steps)
|
|
218
|
+
columns: dict[str, pa.Column] = {}
|
|
219
|
+
for schema_name, column_name in resolved_columns:
|
|
220
|
+
source_schema = self.get(schema_name)
|
|
221
|
+
columns[column_name] = source_schema.columns[column_name]
|
|
222
|
+
return pa.DataFrameSchema(columns=columns, name=projection.name)
|
|
223
|
+
|
|
224
|
+
def list(self) -> list[str]:
|
|
225
|
+
"""Return a sorted list of all registered schema names.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
list[str]
|
|
230
|
+
"""
|
|
231
|
+
return sorted(self._store.keys())
|
|
232
|
+
|
|
233
|
+
def list_projections(self) -> list[str]:
|
|
234
|
+
"""Return a sorted list of all registered projection names."""
|
|
235
|
+
return sorted(self._projections.keys())
|
|
236
|
+
|
|
237
|
+
def __len__(self) -> int:
|
|
238
|
+
return len(self._store)
|
|
239
|
+
|
|
240
|
+
def __contains__(self, name: object) -> bool:
|
|
241
|
+
return name in self._store
|
|
242
|
+
|
|
243
|
+
def __repr__(self) -> str:
|
|
244
|
+
names = self.list()
|
|
245
|
+
projections = self.list_projections()
|
|
246
|
+
return f"PanderaCatalog(schemas={names!r}, projections={projections!r})"
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def _find_duplicate_columns(columns: list[str]) -> list[str]:
|
|
250
|
+
seen: set[str] = set()
|
|
251
|
+
duplicates: list[str] = []
|
|
252
|
+
for column in columns:
|
|
253
|
+
if column in seen and column not in duplicates:
|
|
254
|
+
duplicates.append(column)
|
|
255
|
+
seen.add(column)
|
|
256
|
+
return duplicates
|
|
257
|
+
|
|
258
|
+
def _normalise_projection_steps(
|
|
259
|
+
self, steps: list[SchemaProjectionStep | Mapping[str, object]]
|
|
260
|
+
) -> list[SchemaProjectionStep]:
|
|
261
|
+
if not steps:
|
|
262
|
+
raise ValueError("Projection steps cannot be empty.")
|
|
263
|
+
|
|
264
|
+
normalised_steps: list[SchemaProjectionStep] = []
|
|
265
|
+
for step in steps:
|
|
266
|
+
if isinstance(step, SchemaProjectionStep):
|
|
267
|
+
normalised = step
|
|
268
|
+
elif isinstance(step, Mapping):
|
|
269
|
+
normalised = SchemaProjectionStep(
|
|
270
|
+
schema=self._require_string(step, "schema"),
|
|
271
|
+
kind=self._require_string(step, "kind"),
|
|
272
|
+
names=self._require_string_list(step, "names"),
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
raise ValueError("Projection steps must be mappings or SchemaProjectionStep.")
|
|
276
|
+
|
|
277
|
+
if normalised.kind not in {"columns", "group"}:
|
|
278
|
+
raise ValueError(f"Unknown projection step kind: {normalised.kind!r}.")
|
|
279
|
+
|
|
280
|
+
if normalised.kind == "group":
|
|
281
|
+
raise NotImplementedError("Projection step kind 'group' is not implemented.")
|
|
282
|
+
|
|
283
|
+
if normalised.schema not in self._store:
|
|
284
|
+
raise KeyError(f"Schema '{normalised.schema}' is not registered.")
|
|
285
|
+
|
|
286
|
+
if not normalised.names:
|
|
287
|
+
raise ValueError("Projection step names cannot be empty.")
|
|
288
|
+
|
|
289
|
+
normalised_steps.append(normalised)
|
|
290
|
+
|
|
291
|
+
return normalised_steps
|
|
292
|
+
|
|
293
|
+
def _resolve_projection_columns(
|
|
294
|
+
self, steps: list[SchemaProjectionStep]
|
|
295
|
+
) -> list[tuple[str, str]]:
|
|
296
|
+
resolved: list[tuple[str, str]] = []
|
|
297
|
+
for step in steps:
|
|
298
|
+
source_columns = set(self._store[step.schema].schema.columns.keys())
|
|
299
|
+
unknown_columns = [name for name in step.names if name not in source_columns]
|
|
300
|
+
if unknown_columns:
|
|
301
|
+
raise ValueError(
|
|
302
|
+
f"Projection columns not found in source schema '{step.schema}': "
|
|
303
|
+
f"{unknown_columns!r}."
|
|
304
|
+
)
|
|
305
|
+
resolved.extend((step.schema, name) for name in step.names)
|
|
306
|
+
return resolved
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def _require_string(step: Mapping[str, object], key: str) -> str:
|
|
310
|
+
value = step.get(key)
|
|
311
|
+
if not isinstance(value, str) or not value:
|
|
312
|
+
raise ValueError(f"Projection step '{key}' must be a non-empty string.")
|
|
313
|
+
return value
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def _require_string_list(step: Mapping[str, object], key: str) -> list[str]:
|
|
317
|
+
value = step.get(key)
|
|
318
|
+
if not isinstance(value, list) or not all(
|
|
319
|
+
isinstance(item, str) and item for item in value
|
|
320
|
+
):
|
|
321
|
+
raise ValueError(
|
|
322
|
+
f"Projection step '{key}' must be a list of non-empty strings."
|
|
323
|
+
)
|
|
324
|
+
return list(value)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Schema loading helpers for pandera-catalog.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for loading Pandera schemas from YAML files
|
|
4
|
+
and from plain Python dictionaries.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
import pandera.pandas as pa
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_schema_from_yaml(path: Union[str, Path]) -> pa.DataFrameSchema:
|
|
16
|
+
"""Load a Pandera :class:`~pandera.DataFrameSchema` from a YAML file.
|
|
17
|
+
|
|
18
|
+
The YAML file must follow the format produced by
|
|
19
|
+
:meth:`pandera.DataFrameSchema.to_yaml`.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
path:
|
|
24
|
+
Path to the ``.yaml`` (or ``.yml``) schema definition file.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
pandera.DataFrameSchema
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
FileNotFoundError
|
|
33
|
+
If the file does not exist.
|
|
34
|
+
ValueError
|
|
35
|
+
If the file cannot be parsed as a Pandera schema.
|
|
36
|
+
|
|
37
|
+
Examples
|
|
38
|
+
--------
|
|
39
|
+
>>> from pathlib import Path
|
|
40
|
+
>>> from pandera_catalog.schemas import load_schema_from_yaml
|
|
41
|
+
>>> schema = load_schema_from_yaml(Path("schemas/my_schema.yaml"))
|
|
42
|
+
"""
|
|
43
|
+
path = Path(path)
|
|
44
|
+
if not path.exists():
|
|
45
|
+
raise FileNotFoundError(f"Schema file not found: {path}")
|
|
46
|
+
return pa.DataFrameSchema.from_yaml(path)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_schema_from_dict(definition: dict) -> pa.DataFrameSchema:
|
|
50
|
+
"""Build a Pandera :class:`~pandera.DataFrameSchema` from a dictionary.
|
|
51
|
+
|
|
52
|
+
The dictionary must follow the same structure as Pandera's YAML
|
|
53
|
+
serialisation format.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
definition:
|
|
58
|
+
Mapping that describes the schema (columns, checks, etc.).
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
pandera.DataFrameSchema
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
>>> from pandera_catalog.schemas import load_schema_from_dict
|
|
67
|
+
>>> defn = {"columns": {"value": {"dtype": "float64"}}}
|
|
68
|
+
>>> schema = load_schema_from_dict(defn)
|
|
69
|
+
"""
|
|
70
|
+
yaml_str = yaml.dump(definition)
|
|
71
|
+
return pa.DataFrameSchema.from_yaml(yaml_str)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
__all__ = [
|
|
75
|
+
"load_schema_from_yaml",
|
|
76
|
+
"load_schema_from_dict",
|
|
77
|
+
]
|
pandera_catalog/types.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Shared type aliases, dataclasses, and protocol helpers for pandera-catalog."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
import pandera.pandas as pa
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class SchemaEntry:
|
|
10
|
+
"""A catalog entry wrapping a Pandera schema with metadata.
|
|
11
|
+
|
|
12
|
+
Attributes
|
|
13
|
+
----------
|
|
14
|
+
name:
|
|
15
|
+
Unique name of the schema within the catalog.
|
|
16
|
+
schema:
|
|
17
|
+
The Pandera :class:`~pandera.DataFrameSchema` instance.
|
|
18
|
+
description:
|
|
19
|
+
Optional human-readable description.
|
|
20
|
+
tags:
|
|
21
|
+
Optional list of string tags for categorisation or filtering.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name: str
|
|
25
|
+
schema: pa.DataFrameSchema
|
|
26
|
+
description: str | None = None
|
|
27
|
+
tags: list[str] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
tag_str = f", tags={self.tags!r}" if self.tags else ""
|
|
31
|
+
desc_str = f", description={self.description!r}" if self.description else ""
|
|
32
|
+
return f"SchemaEntry(name={self.name!r}{desc_str}{tag_str})"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class SchemaProjectionEntry:
|
|
37
|
+
"""A named projection built from ordered schema selection steps."""
|
|
38
|
+
|
|
39
|
+
name: str
|
|
40
|
+
steps: list["SchemaProjectionStep"]
|
|
41
|
+
description: str | None = None
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
desc_str = f", description={self.description!r}" if self.description else ""
|
|
45
|
+
return (
|
|
46
|
+
f"SchemaProjectionEntry(name={self.name!r}, steps={self.steps!r}{desc_str})"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class SchemaProjectionStep:
|
|
52
|
+
"""A single schema projection step."""
|
|
53
|
+
|
|
54
|
+
schema: str
|
|
55
|
+
kind: str
|
|
56
|
+
names: list[str]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
__all__ = ["SchemaEntry", "SchemaProjectionEntry", "SchemaProjectionStep"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pandera-catalog
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A database-backed catalog for defining and managing Pandera schemas.
|
|
5
|
+
Author-email: Greg Elphick <11791585+elphick@users.noreply.github.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
+
Requires-Python: <3.14,>=3.11
|
|
12
|
+
Requires-Dist: pandas>=2.0
|
|
13
|
+
Requires-Dist: pandera>=0.19
|
|
14
|
+
Requires-Dist: pyyaml>=6.0
|
|
15
|
+
Provides-Extra: all
|
|
16
|
+
Requires-Dist: plotly<7,>=6.0; extra == 'all'
|
|
17
|
+
Requires-Dist: pydantic>=2.0; extra == 'all'
|
|
18
|
+
Provides-Extra: schema
|
|
19
|
+
Requires-Dist: pydantic>=2.0; extra == 'schema'
|
|
20
|
+
Provides-Extra: viz
|
|
21
|
+
Requires-Dist: plotly<7,>=6.0; extra == 'viz'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
<h1 style="display: inline-flex; align-items: center; gap: 0.4rem; margin: 0;">
|
|
25
|
+
<img src="https://raw.githubusercontent.com/elphick/pandera-catalog/main/docs/_static/branding/pandera-catalog.svg" alt="pandera-catalog logo" width="72" style="display: block; margin-top: 20px;" />
|
|
26
|
+
<span>pandera-catalog</span>
|
|
27
|
+
</h1>
|
|
28
|
+
|
|
29
|
+
[](https://github.com/elphick/pandera-catalog/actions/workflows/build_and_test.yml)
|
|
30
|
+
[](https://pypi.org/project/pandera-catalog/)
|
|
31
|
+

|
|
32
|
+
[](https://pypi.org/project/pandera-catalog/)
|
|
33
|
+
[](https://pypi.org/project/pandera-catalog/)
|
|
34
|
+
[](https://github.com/elphick/pandera-catalog/issues)
|
|
35
|
+
|
|
36
|
+
## Overview
|
|
37
|
+
|
|
38
|
+
`pandera-catalog` is a Python package that provides a database-backed catalog for registering, looking up, and
|
|
39
|
+
managing [Pandera](https://pandera.readthedocs.io/) schemas. It is designed to grow into a SQLAlchemy-connected
|
|
40
|
+
(SQLite initially) registry of schema entries, validation rules, and schema metadata — keeping schemas
|
|
41
|
+
organized, discoverable, and version-aware.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
Install the base package:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install pandera-catalog
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Or with `uv`:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv add pandera-catalog
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Install optional extras for schema validation helpers using Pydantic:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install "pandera-catalog[schema]"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Quick Usage
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import pandera.pandas as pa
|
|
67
|
+
from pandera_catalog import PanderaCatalog
|
|
68
|
+
|
|
69
|
+
# Create a catalog
|
|
70
|
+
catalog = PanderaCatalog()
|
|
71
|
+
|
|
72
|
+
# Define a schema
|
|
73
|
+
schema = pa.DataFrameSchema(
|
|
74
|
+
columns={
|
|
75
|
+
"id": pa.Column(int),
|
|
76
|
+
"value": pa.Column(float, pa.Check.greater_than(0)),
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Register the schema
|
|
81
|
+
catalog.register("my_schema", schema)
|
|
82
|
+
|
|
83
|
+
# Look up the schema
|
|
84
|
+
retrieved = catalog.get("my_schema")
|
|
85
|
+
print(retrieved)
|
|
86
|
+
|
|
87
|
+
# List all registered schemas
|
|
88
|
+
print(catalog.list())
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Loading schemas from YAML
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from pathlib import Path
|
|
95
|
+
from pandera_catalog import PanderaCatalog
|
|
96
|
+
from pandera_catalog.schemas import load_schema_from_yaml
|
|
97
|
+
|
|
98
|
+
catalog = PanderaCatalog()
|
|
99
|
+
schema = load_schema_from_yaml(Path("schemas/my_schema.yaml"))
|
|
100
|
+
catalog.register("my_schema", schema)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
See the [documentation](https://elphick.github.io/pandera-catalog/) and
|
|
104
|
+
[examples](examples/) for more detail.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
pandera_catalog/__init__.py,sha256=5h7UHUqUFlNtA5CyNye4cYtUmBZSAJMEggyABN64uT4,477
|
|
2
|
+
pandera_catalog/catalog.py,sha256=if6C6V-KTUiLW3INduWSgpp8mdjs8AA3pyYaRA30iw4,11165
|
|
3
|
+
pandera_catalog/schemas.py,sha256=of4S_pUT_0BcWs00kdl4XIDHTqzInkrIWwqVp11LrYQ,1993
|
|
4
|
+
pandera_catalog/types.py,sha256=BEyZhg9nJ51pLlizP6rfYplIqw0gKwX3aMVEe-NUI1w,1634
|
|
5
|
+
pandera_catalog/utils/__init__.py,sha256=duRo6yjdtevOp8nneLMHqRA3OEah2FXyePeLZ1xx68U,68
|
|
6
|
+
pandera_catalog-0.2.0.dist-info/METADATA,sha256=fjPHrCsOp-pjN1Vod1MqWssDlYcAa2EL10SujxLG1I0,3551
|
|
7
|
+
pandera_catalog-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
pandera_catalog-0.2.0.dist-info/licenses/LICENSE,sha256=WUlvMlvb1bMwFWopOL0z-7OO-JlkC5nYuzJj8e5Db5Y,1069
|
|
9
|
+
pandera_catalog-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Greg Elphick
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|