pinaxlib 5.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pinax/__init__.py +135 -0
- pinax/expr.py +167 -0
- pinax/model/__init__.py +56 -0
- pinax/model/agents.py +32 -0
- pinax/model/catalog.py +58 -0
- pinax/model/classification.py +19 -0
- pinax/model/dataset.py +165 -0
- pinax/model/distribution.py +50 -0
- pinax/model/licence.py +18 -0
- pinax/model/quality.py +31 -0
- pinax/model/skos.py +141 -0
- pinax/model/spatial.py +19 -0
- pinax/model/temporal.py +33 -0
- pinax/model/types.py +5 -0
- pinax/py.typed +0 -0
- pinax/query.py +563 -0
- pinax/sources/__init__.py +13 -0
- pinax/sources/ckan.py +429 -0
- pinax/sources/sdmx.py +304 -0
- pinax/store/__init__.py +18 -0
- pinax/store/lance.py +474 -0
- pinax/store/row.py +32 -0
- pinax/store/schema.py +388 -0
- pinax/store/scopes.py +1355 -0
- pinax/store/sentinel.py +87 -0
- pinax/store/store.py +4169 -0
- pinax/urn.py +110 -0
- pinaxlib-5.7.0.dist-info/METADATA +422 -0
- pinaxlib-5.7.0.dist-info/RECORD +31 -0
- pinaxlib-5.7.0.dist-info/WHEEL +4 -0
- pinaxlib-5.7.0.dist-info/licenses/LICENSE +180 -0
pinax/__init__.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""pinax — a queryable open data catalog engine."""
|
|
2
|
+
|
|
3
|
+
from pinax.model import (
|
|
4
|
+
Agent,
|
|
5
|
+
AggregateDataset,
|
|
6
|
+
BaseDataset,
|
|
7
|
+
Catalog,
|
|
8
|
+
CatalogRecord,
|
|
9
|
+
Concept,
|
|
10
|
+
ConceptRef,
|
|
11
|
+
ConceptScheme,
|
|
12
|
+
ConceptSchemeRef,
|
|
13
|
+
ContactPoint,
|
|
14
|
+
Dataset,
|
|
15
|
+
DataService,
|
|
16
|
+
DatasetT,
|
|
17
|
+
Distribution,
|
|
18
|
+
Frequency,
|
|
19
|
+
GeospatialDataset,
|
|
20
|
+
InternationalString,
|
|
21
|
+
LicenceDocument,
|
|
22
|
+
MicrodataDataset,
|
|
23
|
+
OpenDataset,
|
|
24
|
+
ProvenanceStatement,
|
|
25
|
+
PublicationDataset,
|
|
26
|
+
QualityAnnotation,
|
|
27
|
+
SpatialCoverage,
|
|
28
|
+
Standard,
|
|
29
|
+
StatisticalDataset,
|
|
30
|
+
TemporalCoverage,
|
|
31
|
+
Variable,
|
|
32
|
+
is_concept_in_scheme,
|
|
33
|
+
)
|
|
34
|
+
from pinax.sources import (
|
|
35
|
+
dataset_from_dataflow,
|
|
36
|
+
ingest_ckan,
|
|
37
|
+
ingest_data,
|
|
38
|
+
ingest_sdmx,
|
|
39
|
+
ingest_sdmx_registry,
|
|
40
|
+
prepare_sdmx,
|
|
41
|
+
)
|
|
42
|
+
from pinax import query, urn
|
|
43
|
+
from pinax.expr import (
|
|
44
|
+
AliasedExpr,
|
|
45
|
+
CountExpr,
|
|
46
|
+
ExistsExpr,
|
|
47
|
+
Expr,
|
|
48
|
+
TraversalExpr,
|
|
49
|
+
each,
|
|
50
|
+
)
|
|
51
|
+
from pinax.store import (
|
|
52
|
+
CatalogStore,
|
|
53
|
+
CodeScope,
|
|
54
|
+
CodeSearchResult,
|
|
55
|
+
CodelistScope,
|
|
56
|
+
CodelistsScope,
|
|
57
|
+
ConceptScope,
|
|
58
|
+
ConceptSchemeScope,
|
|
59
|
+
ConceptSchemesScope,
|
|
60
|
+
DatasetScope,
|
|
61
|
+
DimensionInfo,
|
|
62
|
+
DimensionScope,
|
|
63
|
+
DimensionsScope,
|
|
64
|
+
QueryBuilder,
|
|
65
|
+
Row,
|
|
66
|
+
SchemeScope,
|
|
67
|
+
SearchResult,
|
|
68
|
+
ThemesScope,
|
|
69
|
+
)
|
|
70
|
+
from pinax.store.sentinel import NotLoadedError, is_unloaded
|
|
71
|
+
|
|
72
|
+
__all__ = [
|
|
73
|
+
"Agent",
|
|
74
|
+
"AggregateDataset",
|
|
75
|
+
"AliasedExpr",
|
|
76
|
+
"BaseDataset",
|
|
77
|
+
"Catalog",
|
|
78
|
+
"CatalogRecord",
|
|
79
|
+
"CatalogStore",
|
|
80
|
+
"CodeScope",
|
|
81
|
+
"CodeSearchResult",
|
|
82
|
+
"CodelistScope",
|
|
83
|
+
"CodelistsScope",
|
|
84
|
+
"Concept",
|
|
85
|
+
"ConceptRef",
|
|
86
|
+
"ConceptScheme",
|
|
87
|
+
"ConceptSchemeRef",
|
|
88
|
+
"ConceptSchemeScope",
|
|
89
|
+
"ConceptSchemesScope",
|
|
90
|
+
"ConceptScope",
|
|
91
|
+
"ContactPoint",
|
|
92
|
+
"CountExpr",
|
|
93
|
+
"DataService",
|
|
94
|
+
"Dataset",
|
|
95
|
+
"DatasetScope",
|
|
96
|
+
"DatasetT",
|
|
97
|
+
"DimensionInfo",
|
|
98
|
+
"DimensionScope",
|
|
99
|
+
"DimensionsScope",
|
|
100
|
+
"Distribution",
|
|
101
|
+
"ExistsExpr",
|
|
102
|
+
"Expr",
|
|
103
|
+
"Frequency",
|
|
104
|
+
"GeospatialDataset",
|
|
105
|
+
"InternationalString",
|
|
106
|
+
"LicenceDocument",
|
|
107
|
+
"MicrodataDataset",
|
|
108
|
+
"NotLoadedError",
|
|
109
|
+
"OpenDataset",
|
|
110
|
+
"ProvenanceStatement",
|
|
111
|
+
"PublicationDataset",
|
|
112
|
+
"QualityAnnotation",
|
|
113
|
+
"QueryBuilder",
|
|
114
|
+
"Row",
|
|
115
|
+
"SchemeScope",
|
|
116
|
+
"SearchResult",
|
|
117
|
+
"SpatialCoverage",
|
|
118
|
+
"Standard",
|
|
119
|
+
"StatisticalDataset",
|
|
120
|
+
"TemporalCoverage",
|
|
121
|
+
"ThemesScope",
|
|
122
|
+
"TraversalExpr",
|
|
123
|
+
"Variable",
|
|
124
|
+
"dataset_from_dataflow",
|
|
125
|
+
"each",
|
|
126
|
+
"ingest_ckan",
|
|
127
|
+
"ingest_data",
|
|
128
|
+
"ingest_sdmx",
|
|
129
|
+
"ingest_sdmx_registry",
|
|
130
|
+
"is_concept_in_scheme",
|
|
131
|
+
"is_unloaded",
|
|
132
|
+
"prepare_sdmx",
|
|
133
|
+
"query",
|
|
134
|
+
"urn",
|
|
135
|
+
]
|
pinax/expr.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Sub-traversal expression language for scope-based graph queries.
|
|
2
|
+
|
|
3
|
+
Expressions describe computations over related graph data without executing
|
|
4
|
+
any SQL. They are context-free objects — like ``pl.col()`` in Polars — and
|
|
5
|
+
can be reused across ``.enrich()``, ``.filter()``, and ``.sort_by()`` on any
|
|
6
|
+
collection scope.
|
|
7
|
+
|
|
8
|
+
Usage::
|
|
9
|
+
|
|
10
|
+
import pinax as pk
|
|
11
|
+
|
|
12
|
+
n_datasets = pk.each("datasets").count()
|
|
13
|
+
has_data = pk.each("datasets").exists()
|
|
14
|
+
|
|
15
|
+
store.themes["statcan"].enrich(n=n_datasets).filter(has_data).collect()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from attrs import define
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Base expression types
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@define(frozen=True)
|
|
28
|
+
class Expr:
|
|
29
|
+
"""Base class for all expression nodes."""
|
|
30
|
+
|
|
31
|
+
def alias(self, name: str) -> AliasedExpr:
|
|
32
|
+
"""Name this expression result (like Polars ``.alias()``)."""
|
|
33
|
+
return AliasedExpr(expr=self, name=name)
|
|
34
|
+
|
|
35
|
+
def count(self) -> CountExpr:
|
|
36
|
+
"""Count items at the far end of the traversal."""
|
|
37
|
+
return CountExpr(source=self)
|
|
38
|
+
|
|
39
|
+
def exists(self) -> ExistsExpr:
|
|
40
|
+
"""Check if any items exist at the far end."""
|
|
41
|
+
return ExistsExpr(source=self)
|
|
42
|
+
|
|
43
|
+
def sum(self, field: str) -> SumExpr:
|
|
44
|
+
"""Sum a numeric field at the far end."""
|
|
45
|
+
return SumExpr(source=self, field=field)
|
|
46
|
+
|
|
47
|
+
def ids(self) -> IdsExpr:
|
|
48
|
+
"""Collect identifiers at the far end."""
|
|
49
|
+
return IdsExpr(source=self)
|
|
50
|
+
|
|
51
|
+
def distinct(self, field: str) -> DistinctExpr:
|
|
52
|
+
"""Select distinct values of a field at the far end."""
|
|
53
|
+
return DistinctExpr(source=self, field=field)
|
|
54
|
+
|
|
55
|
+
def filter(self, **kwargs: object) -> FilteredExpr:
|
|
56
|
+
"""Filter far-side nodes by attribute values."""
|
|
57
|
+
return FilteredExpr(source=self, conditions=kwargs)
|
|
58
|
+
|
|
59
|
+
def count_distinct(self) -> CountDistinctExpr:
|
|
60
|
+
"""Count distinct items at the far end."""
|
|
61
|
+
return CountDistinctExpr(source=self)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@define(frozen=True)
|
|
65
|
+
class TraversalExpr(Expr):
|
|
66
|
+
"""A traversal along one or more edges from the current scope position.
|
|
67
|
+
|
|
68
|
+
Created by ``pk.each("edge1", "edge2", ...)``. Strings are always
|
|
69
|
+
edge labels (table names or relationship names in the graph).
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
edges: tuple[str, ...]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@define(frozen=True)
|
|
76
|
+
class AliasedExpr:
|
|
77
|
+
"""An expression with a named output column.
|
|
78
|
+
|
|
79
|
+
Created by ``expr.alias("name")`` or via ``**kwargs`` in ``.enrich()``.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
expr: Expr
|
|
83
|
+
name: str
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Terminal reduction expressions
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@define(frozen=True)
|
|
92
|
+
class CountExpr(Expr):
|
|
93
|
+
"""COUNT(*) at the far end of a traversal."""
|
|
94
|
+
|
|
95
|
+
source: Expr
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@define(frozen=True)
|
|
99
|
+
class ExistsExpr(Expr):
|
|
100
|
+
"""EXISTS(...) — boolean, true if any far-side items exist."""
|
|
101
|
+
|
|
102
|
+
source: Expr
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@define(frozen=True)
|
|
106
|
+
class SumExpr(Expr):
|
|
107
|
+
"""SUM(field) at the far end of a traversal."""
|
|
108
|
+
|
|
109
|
+
source: Expr
|
|
110
|
+
field: str
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@define(frozen=True)
|
|
114
|
+
class IdsExpr(Expr):
|
|
115
|
+
"""LIST(id) — collect identifiers at the far end."""
|
|
116
|
+
|
|
117
|
+
source: Expr
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@define(frozen=True)
|
|
121
|
+
class DistinctExpr(Expr):
|
|
122
|
+
"""SELECT DISTINCT field — narrows to unique values."""
|
|
123
|
+
|
|
124
|
+
source: Expr
|
|
125
|
+
field: str
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@define(frozen=True)
|
|
129
|
+
class CountDistinctExpr(Expr):
|
|
130
|
+
"""COUNT(DISTINCT ...) at the far end."""
|
|
131
|
+
|
|
132
|
+
source: Expr
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@define(frozen=True)
|
|
136
|
+
class FilteredExpr(Expr):
|
|
137
|
+
"""A traversal with far-side filtering applied.
|
|
138
|
+
|
|
139
|
+
Created by ``pk.each("datasets").filter(status="current")``.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
source: Expr
|
|
143
|
+
conditions: dict[str, object]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
# Public entry point
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def each(*edges: str) -> TraversalExpr:
|
|
152
|
+
"""Create a sub-traversal expression starting from the current scope.
|
|
153
|
+
|
|
154
|
+
Each argument is an edge label (relationship name). Multiple arguments
|
|
155
|
+
describe a multi-hop path::
|
|
156
|
+
|
|
157
|
+
pk.each("datasets") # one hop
|
|
158
|
+
pk.each("datasets", "dimensions") # two hops
|
|
159
|
+
pk.each("datasets", "dimensions", "codelist") # three hops
|
|
160
|
+
|
|
161
|
+
The returned expression is context-free and can be used in ``.enrich()``,
|
|
162
|
+
``.filter()``, and ``.sort_by()`` on any collection scope.
|
|
163
|
+
"""
|
|
164
|
+
if not edges:
|
|
165
|
+
msg = "each() requires at least one edge label"
|
|
166
|
+
raise ValueError(msg)
|
|
167
|
+
return TraversalExpr(edges=edges)
|
pinax/model/__init__.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Domain model for pinax."""
|
|
2
|
+
|
|
3
|
+
from pinax.model.agents import Agent, ContactPoint
|
|
4
|
+
from pinax.model.catalog import Catalog, CatalogRecord
|
|
5
|
+
from pinax.model.classification import Standard
|
|
6
|
+
from pinax.model.dataset import (
|
|
7
|
+
AggregateDataset,
|
|
8
|
+
BaseDataset,
|
|
9
|
+
Dataset,
|
|
10
|
+
DatasetT,
|
|
11
|
+
GeospatialDataset,
|
|
12
|
+
MicrodataDataset,
|
|
13
|
+
OpenDataset,
|
|
14
|
+
PublicationDataset,
|
|
15
|
+
StatisticalDataset,
|
|
16
|
+
Variable,
|
|
17
|
+
)
|
|
18
|
+
from pinax.model.distribution import DataService, Distribution
|
|
19
|
+
from pinax.model.licence import LicenceDocument
|
|
20
|
+
from pinax.model.quality import ProvenanceStatement, QualityAnnotation
|
|
21
|
+
from pinax.model.spatial import SpatialCoverage
|
|
22
|
+
from pinax.model.temporal import Frequency, TemporalCoverage
|
|
23
|
+
from pinax.model.skos import Concept, ConceptRef, ConceptScheme, ConceptSchemeRef, is_concept_in_scheme
|
|
24
|
+
from pinax.model.types import InternationalString
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"Agent",
|
|
28
|
+
"AggregateDataset",
|
|
29
|
+
"BaseDataset",
|
|
30
|
+
"Catalog",
|
|
31
|
+
"CatalogRecord",
|
|
32
|
+
"Concept",
|
|
33
|
+
"ConceptRef",
|
|
34
|
+
"ConceptScheme",
|
|
35
|
+
"ConceptSchemeRef",
|
|
36
|
+
"ContactPoint",
|
|
37
|
+
"DataService",
|
|
38
|
+
"Dataset",
|
|
39
|
+
"DatasetT",
|
|
40
|
+
"Distribution",
|
|
41
|
+
"Frequency",
|
|
42
|
+
"GeospatialDataset",
|
|
43
|
+
"InternationalString",
|
|
44
|
+
"LicenceDocument",
|
|
45
|
+
"MicrodataDataset",
|
|
46
|
+
"OpenDataset",
|
|
47
|
+
"ProvenanceStatement",
|
|
48
|
+
"PublicationDataset",
|
|
49
|
+
"QualityAnnotation",
|
|
50
|
+
"SpatialCoverage",
|
|
51
|
+
"Standard",
|
|
52
|
+
"StatisticalDataset",
|
|
53
|
+
"TemporalCoverage",
|
|
54
|
+
"Variable",
|
|
55
|
+
"is_concept_in_scheme",
|
|
56
|
+
]
|
pinax/model/agents.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Agent and contact types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from attrs import define
|
|
4
|
+
from sdmxlib import InternationalString
|
|
5
|
+
|
|
6
|
+
__all__ = ["Agent", "ContactPoint"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@define
|
|
10
|
+
class Agent:
|
|
11
|
+
"""An organization or person responsible for a dataset.
|
|
12
|
+
|
|
13
|
+
Maps to foaf:Agent / dct:publisher / dct:creator in DCAT-AP.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
name: InternationalString
|
|
17
|
+
id: str | None = None
|
|
18
|
+
type: str | None = None
|
|
19
|
+
homepage: str | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@define
|
|
23
|
+
class ContactPoint:
|
|
24
|
+
"""Contact information for a dataset.
|
|
25
|
+
|
|
26
|
+
Maps to dcat:contactPoint → vcard:Kind in DCAT-AP.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
name: InternationalString | None = None
|
|
30
|
+
email: str | None = None
|
|
31
|
+
phone: str | None = None
|
|
32
|
+
url: str | None = None
|
pinax/model/catalog.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Catalog and CatalogRecord types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from attrs import Factory, define
|
|
6
|
+
from sdmxlib import InternationalString
|
|
7
|
+
|
|
8
|
+
from pinax.model.agents import Agent
|
|
9
|
+
from pinax.model.classification import Standard
|
|
10
|
+
from pinax.model.dataset import BaseDataset
|
|
11
|
+
from pinax.model.distribution import DataService
|
|
12
|
+
from pinax.model.licence import LicenceDocument
|
|
13
|
+
from pinax.model.skos import ConceptSchemeRef
|
|
14
|
+
from pinax.model.spatial import SpatialCoverage
|
|
15
|
+
from pinax.model.temporal import TemporalCoverage
|
|
16
|
+
|
|
17
|
+
__all__ = ["Catalog", "CatalogRecord"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@define
|
|
21
|
+
class CatalogRecord:
|
|
22
|
+
"""Metadata about a dataset's entry in the catalog.
|
|
23
|
+
|
|
24
|
+
Maps to dcat:CatalogRecord in DCAT-AP. This is metadata about
|
|
25
|
+
the catalog entry itself — when it was listed, where it was
|
|
26
|
+
harvested from, etc. Optional — not all catalogs use this.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
identifier: str
|
|
30
|
+
primary_topic: BaseDataset
|
|
31
|
+
listing_date: datetime | None = None
|
|
32
|
+
modified: datetime | None = None
|
|
33
|
+
source_catalog: str | None = None
|
|
34
|
+
conforms_to: Standard | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@define
|
|
38
|
+
class Catalog:
|
|
39
|
+
"""A catalog of datasets.
|
|
40
|
+
|
|
41
|
+
Maps to dcat:Catalog in DCAT-AP. The top-level container.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
identifier: str
|
|
45
|
+
title: InternationalString
|
|
46
|
+
description: InternationalString
|
|
47
|
+
publisher: Agent
|
|
48
|
+
datasets: list[BaseDataset] = Factory(list)
|
|
49
|
+
services: list[DataService] = Factory(list)
|
|
50
|
+
records: list[CatalogRecord] = Factory(list)
|
|
51
|
+
homepage: str | None = None
|
|
52
|
+
language: list[str] = Factory(list)
|
|
53
|
+
licence: LicenceDocument | None = None
|
|
54
|
+
spatial_coverage: list[SpatialCoverage] = Factory(list)
|
|
55
|
+
temporal_coverage: TemporalCoverage | None = None
|
|
56
|
+
theme_taxonomy: list[ConceptSchemeRef] = Factory(list)
|
|
57
|
+
issued: datetime | None = None
|
|
58
|
+
modified: datetime | None = None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Classification types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from attrs import define
|
|
4
|
+
from sdmxlib import InternationalString
|
|
5
|
+
|
|
6
|
+
__all__ = ["Standard"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@define
|
|
10
|
+
class Standard:
|
|
11
|
+
"""A standard or specification that the dataset conforms to.
|
|
12
|
+
|
|
13
|
+
Maps to dct:conformsTo in DCAT-AP. Used to link to SDMX DSDs,
|
|
14
|
+
CSV schemas, API specifications, etc.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
id: str
|
|
18
|
+
label: InternationalString | None = None
|
|
19
|
+
uri: str | None = None
|
pinax/model/dataset.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Core dataset types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Literal, TypeVar
|
|
7
|
+
|
|
8
|
+
from attrs import Factory, define
|
|
9
|
+
from sdmxlib import InternationalString
|
|
10
|
+
|
|
11
|
+
from pinax.model.agents import Agent, ContactPoint
|
|
12
|
+
from pinax.model.classification import Standard
|
|
13
|
+
from pinax.model.distribution import Distribution
|
|
14
|
+
from pinax.model.licence import LicenceDocument
|
|
15
|
+
from pinax.model.quality import ProvenanceStatement, QualityAnnotation
|
|
16
|
+
from pinax.model.skos import ConceptRef
|
|
17
|
+
from pinax.model.spatial import SpatialCoverage
|
|
18
|
+
from pinax.model.temporal import Frequency, TemporalCoverage
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"AggregateDataset",
|
|
22
|
+
"BaseDataset",
|
|
23
|
+
"Dataset",
|
|
24
|
+
"DatasetT",
|
|
25
|
+
"GeospatialDataset",
|
|
26
|
+
"MicrodataDataset",
|
|
27
|
+
"OpenDataset",
|
|
28
|
+
"PublicationDataset",
|
|
29
|
+
"StatisticalDataset",
|
|
30
|
+
"Variable",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@define
|
|
35
|
+
class BaseDataset:
|
|
36
|
+
"""Common fields for all dataset variants. Maps to DCAT-AP 3.0.
|
|
37
|
+
|
|
38
|
+
Not intended for direct construction — use one of the five concrete
|
|
39
|
+
variants: OpenDataset, AggregateDataset, MicrodataDataset,
|
|
40
|
+
GeospatialDataset, PublicationDataset.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# --- Identity (always required) ---
|
|
44
|
+
identifier: str
|
|
45
|
+
title: InternationalString
|
|
46
|
+
description: InternationalString
|
|
47
|
+
kind: str # discriminator — each variant narrows to Literal and provides a default
|
|
48
|
+
|
|
49
|
+
# --- Core catalog metadata ---
|
|
50
|
+
publisher: Agent | None = None
|
|
51
|
+
contact_point: ContactPoint | None = None
|
|
52
|
+
keywords: list[InternationalString] = Factory(list)
|
|
53
|
+
themes: list[ConceptRef] = Factory(list) # dcat:theme — URI refs, not owned
|
|
54
|
+
subject: list[ConceptRef] = Factory(list) # dcterms:subject — URI refs
|
|
55
|
+
dataset_type: ConceptRef | None = None # dcterms:type — URI ref
|
|
56
|
+
issued: datetime | None = None
|
|
57
|
+
modified: datetime | None = None
|
|
58
|
+
frequency: Frequency | None = None
|
|
59
|
+
spatial_coverage: list[SpatialCoverage] = Factory(list)
|
|
60
|
+
temporal_coverage: TemporalCoverage | None = None
|
|
61
|
+
distributions: list[Distribution] = Factory(list)
|
|
62
|
+
licence: LicenceDocument | None = None
|
|
63
|
+
language: list[str] = Factory(list)
|
|
64
|
+
|
|
65
|
+
# --- Extended metadata ---
|
|
66
|
+
landing_page: str | None = None
|
|
67
|
+
version: str | None = None
|
|
68
|
+
version_notes: InternationalString | None = None
|
|
69
|
+
conforms_to: list[Standard] = Factory(list)
|
|
70
|
+
creator: Agent | None = None
|
|
71
|
+
access_rights: str | None = None
|
|
72
|
+
status: str | None = None
|
|
73
|
+
provenance: list[ProvenanceStatement] = Factory(list)
|
|
74
|
+
quality_annotations: list[QualityAnnotation] = Factory(list)
|
|
75
|
+
|
|
76
|
+
# --- Extensibility ---
|
|
77
|
+
extras: dict[str, Any] = Factory(dict)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@define
|
|
81
|
+
class OpenDataset(BaseDataset):
|
|
82
|
+
"""Generic open dataset — the default bucket for non-specialized data."""
|
|
83
|
+
|
|
84
|
+
kind: Literal["open"] = "open" # pyright: ignore[reportIncompatibleVariableOverride]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@define
|
|
88
|
+
class AggregateDataset(BaseDataset):
|
|
89
|
+
"""A statistical aggregate dataset (tabular, with dimensions and measures).
|
|
90
|
+
|
|
91
|
+
Extends BaseDataset with StatDCAT-AP discovery fields and an optional link
|
|
92
|
+
to the full SDMX structural record in sdmxlib's shared DuckDB tables.
|
|
93
|
+
dimension_names carries enough info for catalog search without a JOIN.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
kind: Literal["aggregate"] = "aggregate" # pyright: ignore[reportIncompatibleVariableOverride]
|
|
97
|
+
dimension_names: list[InternationalString] = Factory(list)
|
|
98
|
+
num_series: int | None = None
|
|
99
|
+
# SDMX link: FK into sdmxlib's dataflows.urn in the shared DuckDB
|
|
100
|
+
sdmx_dataflow_urn: str | None = None
|
|
101
|
+
# Pivot axis dimension for ingest_data(). None = auto-detect from DSD.
|
|
102
|
+
measure_dim: str | None = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@define
|
|
106
|
+
class Variable:
|
|
107
|
+
"""A variable in a microdata dataset."""
|
|
108
|
+
|
|
109
|
+
id: str
|
|
110
|
+
label: InternationalString
|
|
111
|
+
description: InternationalString | None = None
|
|
112
|
+
value_type: str | None = None
|
|
113
|
+
classification_id: str | None = None
|
|
114
|
+
universe: str | None = None
|
|
115
|
+
question_text: str | None = None
|
|
116
|
+
valid_range: str | None = None
|
|
117
|
+
derivation: str | None = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@define
|
|
121
|
+
class MicrodataDataset(BaseDataset):
|
|
122
|
+
"""A microdata (survey/record-level) dataset."""
|
|
123
|
+
|
|
124
|
+
kind: Literal["microdata"] = "microdata" # pyright: ignore[reportIncompatibleVariableOverride]
|
|
125
|
+
variables: list[Variable] = Factory(list)
|
|
126
|
+
sample_size: int | None = None
|
|
127
|
+
universe: str | None = None
|
|
128
|
+
collection_method: str | None = None
|
|
129
|
+
anonymization: str | None = None
|
|
130
|
+
survey_id: str | None = None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@define
|
|
134
|
+
class GeospatialDataset(BaseDataset):
|
|
135
|
+
"""A geospatial dataset."""
|
|
136
|
+
|
|
137
|
+
kind: Literal["geospatial"] = "geospatial" # pyright: ignore[reportIncompatibleVariableOverride]
|
|
138
|
+
crs: str | None = None
|
|
139
|
+
spatial_resolution: str | None = None
|
|
140
|
+
feature_types: list[str] = Factory(list)
|
|
141
|
+
scale: str | None = None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@define
|
|
145
|
+
class PublicationDataset(BaseDataset):
|
|
146
|
+
"""A publication (article, report, monograph)."""
|
|
147
|
+
|
|
148
|
+
kind: Literal["publication"] = "publication" # pyright: ignore[reportIncompatibleVariableOverride]
|
|
149
|
+
authors: list[str] = Factory(list)
|
|
150
|
+
doi: str | None = None
|
|
151
|
+
isbn: str | None = None
|
|
152
|
+
publication_type: str | None = None
|
|
153
|
+
pages: int | None = None
|
|
154
|
+
series_title: InternationalString | None = None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# Public union type alias — the type you work with day-to-day.
|
|
158
|
+
# Use BaseDataset as the generic bound in query() and other generic contexts.
|
|
159
|
+
type Dataset = OpenDataset | AggregateDataset | MicrodataDataset | GeospatialDataset | PublicationDataset
|
|
160
|
+
|
|
161
|
+
# TypeVar for pre-PEP-695 generic code
|
|
162
|
+
DatasetT = TypeVar("DatasetT", bound=BaseDataset)
|
|
163
|
+
|
|
164
|
+
# Backward compatibility alias — AggregateDataset is the successor
|
|
165
|
+
StatisticalDataset = AggregateDataset
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Distribution and DataService types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from attrs import Factory, define
|
|
6
|
+
from sdmxlib import InternationalString
|
|
7
|
+
|
|
8
|
+
from pinax.model.classification import Standard
|
|
9
|
+
from pinax.model.licence import LicenceDocument
|
|
10
|
+
|
|
11
|
+
__all__ = ["DataService", "Distribution"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@define
|
|
15
|
+
class DataService:
|
|
16
|
+
"""An API endpoint that serves one or more datasets.
|
|
17
|
+
|
|
18
|
+
Maps to dcat:DataService in DCAT-AP. For SDMX sources, the
|
|
19
|
+
endpoint_url is the SDMX REST base URL.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
id: str
|
|
23
|
+
title: InternationalString
|
|
24
|
+
endpoint_url: str
|
|
25
|
+
description: InternationalString | None = None
|
|
26
|
+
serves_datasets: list[str] = Factory(list)
|
|
27
|
+
conforms_to: list[Standard] = Factory(list)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@define
|
|
31
|
+
class Distribution:
|
|
32
|
+
"""A specific available form of a dataset.
|
|
33
|
+
|
|
34
|
+
Maps to dcat:Distribution in DCAT-AP. A dataset may have
|
|
35
|
+
multiple distributions: CSV download, API endpoint, visualization, etc.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
id: str
|
|
39
|
+
title: InternationalString | None = None
|
|
40
|
+
description: InternationalString | None = None
|
|
41
|
+
access_url: str | None = None
|
|
42
|
+
download_url: str | None = None
|
|
43
|
+
media_type: str | None = None
|
|
44
|
+
format: str | None = None
|
|
45
|
+
byte_size: int | None = None
|
|
46
|
+
issued: datetime | None = None
|
|
47
|
+
modified: datetime | None = None
|
|
48
|
+
licence: LicenceDocument | None = None
|
|
49
|
+
access_service: DataService | None = None
|
|
50
|
+
distribution_type: str | None = None
|
pinax/model/licence.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Licence types for the pinax domain model."""
|
|
2
|
+
|
|
3
|
+
from attrs import define
|
|
4
|
+
from sdmxlib import InternationalString
|
|
5
|
+
|
|
6
|
+
__all__ = ["LicenceDocument"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@define
|
|
10
|
+
class LicenceDocument:
|
|
11
|
+
"""A licence under which the dataset is made available.
|
|
12
|
+
|
|
13
|
+
Maps to dct:license → dct:LicenseDocument in DCAT-AP.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
id: str
|
|
17
|
+
label: InternationalString | None = None
|
|
18
|
+
uri: str | None = None
|