fairspec-metadata 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fairspec_metadata/__init__.py +504 -0
- fairspec_metadata/actions/column/create.py +175 -0
- fairspec_metadata/actions/column/create_spec.py +83 -0
- fairspec_metadata/actions/column/property.py +28 -0
- fairspec_metadata/actions/column/property_spec.py +29 -0
- fairspec_metadata/actions/data_schema/assert_.py +16 -0
- fairspec_metadata/actions/data_schema/load.py +11 -0
- fairspec_metadata/actions/data_schema/resolve.py +16 -0
- fairspec_metadata/actions/data_schema/save.py +22 -0
- fairspec_metadata/actions/data_schema/validate.py +46 -0
- fairspec_metadata/actions/dataset/assert_.py +16 -0
- fairspec_metadata/actions/dataset/denormalize.py +16 -0
- fairspec_metadata/actions/dataset/fixtures/dataset-invalid.json +8 -0
- fairspec_metadata/actions/dataset/fixtures/dataset.json +66 -0
- fairspec_metadata/actions/dataset/fixtures/schema.json +12 -0
- fairspec_metadata/actions/dataset/fixtures/table.csv +3 -0
- fairspec_metadata/actions/dataset/load.py +13 -0
- fairspec_metadata/actions/dataset/normalize.py +16 -0
- fairspec_metadata/actions/dataset/save.py +30 -0
- fairspec_metadata/actions/dataset/save_spec.py +102 -0
- fairspec_metadata/actions/dataset/validate.py +77 -0
- fairspec_metadata/actions/dataset/validate_spec.py +30 -0
- fairspec_metadata/actions/descriptor/copy.py +9 -0
- fairspec_metadata/actions/descriptor/fixtures/schema.json +1 -0
- fairspec_metadata/actions/descriptor/general.py +7 -0
- fairspec_metadata/actions/descriptor/load.py +47 -0
- fairspec_metadata/actions/descriptor/load_spec.py +40 -0
- fairspec_metadata/actions/descriptor/parse.py +16 -0
- fairspec_metadata/actions/descriptor/save.py +19 -0
- fairspec_metadata/actions/descriptor/save_spec.py +47 -0
- fairspec_metadata/actions/descriptor/stringify.py +9 -0
- fairspec_metadata/actions/descriptor/validate.py +31 -0
- fairspec_metadata/actions/file_dialect/assert_.py +20 -0
- fairspec_metadata/actions/file_dialect/infer.py +35 -0
- fairspec_metadata/actions/file_dialect/infer_spec.py +44 -0
- fairspec_metadata/actions/file_dialect/load.py +15 -0
- fairspec_metadata/actions/file_dialect/resolve.py +20 -0
- fairspec_metadata/actions/file_dialect/save.py +21 -0
- fairspec_metadata/actions/file_dialect/support.py +41 -0
- fairspec_metadata/actions/file_dialect/validate.py +49 -0
- fairspec_metadata/actions/json/inspect.py +59 -0
- fairspec_metadata/actions/json/inspect_spec.py +49 -0
- fairspec_metadata/actions/json_schema/assert_.py +21 -0
- fairspec_metadata/actions/json_schema/inspect.py +43 -0
- fairspec_metadata/actions/json_schema/inspect_spec.py +68 -0
- fairspec_metadata/actions/json_schema/load.py +23 -0
- fairspec_metadata/actions/json_schema/resolve.py +16 -0
- fairspec_metadata/actions/json_schema/save.py +13 -0
- fairspec_metadata/actions/path/basepath.py +37 -0
- fairspec_metadata/actions/path/basepath_spec.py +59 -0
- fairspec_metadata/actions/path/denormalize.py +43 -0
- fairspec_metadata/actions/path/denormalize_spec.py +56 -0
- fairspec_metadata/actions/path/general.py +71 -0
- fairspec_metadata/actions/path/general_spec.py +167 -0
- fairspec_metadata/actions/path/normalize.py +55 -0
- fairspec_metadata/actions/path/normalize_spec.py +76 -0
- fairspec_metadata/actions/profile/assert_.py +37 -0
- fairspec_metadata/actions/profile/assert_spec.py +89 -0
- fairspec_metadata/actions/profile/load.py +19 -0
- fairspec_metadata/actions/profile/registry.py +51 -0
- fairspec_metadata/actions/report/create.py +14 -0
- fairspec_metadata/actions/report/create_spec.py +28 -0
- fairspec_metadata/actions/resource/data.py +50 -0
- fairspec_metadata/actions/resource/data_spec.py +64 -0
- fairspec_metadata/actions/resource/denormalize.py +30 -0
- fairspec_metadata/actions/resource/general.py +19 -0
- fairspec_metadata/actions/resource/infer.py +23 -0
- fairspec_metadata/actions/resource/infer_spec.py +33 -0
- fairspec_metadata/actions/resource/normalize.py +30 -0
- fairspec_metadata/actions/table_schema/assert_.py +16 -0
- fairspec_metadata/actions/table_schema/column.py +24 -0
- fairspec_metadata/actions/table_schema/column_spec.py +55 -0
- fairspec_metadata/actions/table_schema/load.py +11 -0
- fairspec_metadata/actions/table_schema/resolve.py +17 -0
- fairspec_metadata/actions/table_schema/save.py +21 -0
- fairspec_metadata/actions/table_schema/validate.py +47 -0
- fairspec_metadata/actions/table_schema/validate_spec.py +45 -0
- fairspec_metadata/models/base.py +5 -0
- fairspec_metadata/models/catalog.py +20 -0
- fairspec_metadata/models/column/array.py +65 -0
- fairspec_metadata/models/column/base.py +41 -0
- fairspec_metadata/models/column/base64.py +15 -0
- fairspec_metadata/models/column/boolean.py +57 -0
- fairspec_metadata/models/column/categorical.py +52 -0
- fairspec_metadata/models/column/column.py +127 -0
- fairspec_metadata/models/column/date.py +21 -0
- fairspec_metadata/models/column/date_time.py +21 -0
- fairspec_metadata/models/column/decimal.py +54 -0
- fairspec_metadata/models/column/duration.py +15 -0
- fairspec_metadata/models/column/email.py +15 -0
- fairspec_metadata/models/column/geojson.py +15 -0
- fairspec_metadata/models/column/hex.py +15 -0
- fairspec_metadata/models/column/integer.py +83 -0
- fairspec_metadata/models/column/list.py +48 -0
- fairspec_metadata/models/column/number.py +90 -0
- fairspec_metadata/models/column/object.py +69 -0
- fairspec_metadata/models/column/string.py +66 -0
- fairspec_metadata/models/column/time.py +21 -0
- fairspec_metadata/models/column/topojson.py +15 -0
- fairspec_metadata/models/column/unknown.py +43 -0
- fairspec_metadata/models/column/url.py +15 -0
- fairspec_metadata/models/column/wkb.py +15 -0
- fairspec_metadata/models/column/wkt.py +15 -0
- fairspec_metadata/models/data.py +16 -0
- fairspec_metadata/models/data_schema.py +11 -0
- fairspec_metadata/models/datacite/alternate_identifier.py +24 -0
- fairspec_metadata/models/datacite/common.py +187 -0
- fairspec_metadata/models/datacite/content_type.py +17 -0
- fairspec_metadata/models/datacite/contributor.py +22 -0
- fairspec_metadata/models/datacite/creator.py +79 -0
- fairspec_metadata/models/datacite/datacite.py +117 -0
- fairspec_metadata/models/datacite/date.py +35 -0
- fairspec_metadata/models/datacite/description.py +30 -0
- fairspec_metadata/models/datacite/formats.py +12 -0
- fairspec_metadata/models/datacite/funding_reference.py +41 -0
- fairspec_metadata/models/datacite/geo_location.py +67 -0
- fairspec_metadata/models/datacite/identifier.py +29 -0
- fairspec_metadata/models/datacite/language.py +12 -0
- fairspec_metadata/models/datacite/publication_year.py +13 -0
- fairspec_metadata/models/datacite/publisher.py +27 -0
- fairspec_metadata/models/datacite/related_identifier.py +61 -0
- fairspec_metadata/models/datacite/related_item.py +96 -0
- fairspec_metadata/models/datacite/rights.py +40 -0
- fairspec_metadata/models/datacite/size.py +12 -0
- fairspec_metadata/models/datacite/subject.py +41 -0
- fairspec_metadata/models/datacite/title.py +30 -0
- fairspec_metadata/models/datacite/version.py +12 -0
- fairspec_metadata/models/dataset.py +34 -0
- fairspec_metadata/models/descriptor.py +5 -0
- fairspec_metadata/models/error/base.py +13 -0
- fairspec_metadata/models/error/cell.py +118 -0
- fairspec_metadata/models/error/column.py +28 -0
- fairspec_metadata/models/error/data.py +13 -0
- fairspec_metadata/models/error/error.py +11 -0
- fairspec_metadata/models/error/file.py +28 -0
- fairspec_metadata/models/error/foreign_key.py +18 -0
- fairspec_metadata/models/error/metadata.py +13 -0
- fairspec_metadata/models/error/resource.py +31 -0
- fairspec_metadata/models/error/row.py +29 -0
- fairspec_metadata/models/error/table.py +10 -0
- fairspec_metadata/models/exception.py +14 -0
- fairspec_metadata/models/file_dialect/arrow.py +9 -0
- fairspec_metadata/models/file_dialect/base.py +23 -0
- fairspec_metadata/models/file_dialect/common.py +24 -0
- fairspec_metadata/models/file_dialect/csv.py +29 -0
- fairspec_metadata/models/file_dialect/file_dialect.py +30 -0
- fairspec_metadata/models/file_dialect/json.py +25 -0
- fairspec_metadata/models/file_dialect/jsonl.py +23 -0
- fairspec_metadata/models/file_dialect/ods.py +25 -0
- fairspec_metadata/models/file_dialect/parquet.py +9 -0
- fairspec_metadata/models/file_dialect/sqlite.py +11 -0
- fairspec_metadata/models/file_dialect/tsv.py +25 -0
- fairspec_metadata/models/file_dialect/unknown.py +7 -0
- fairspec_metadata/models/file_dialect/xlsx.py +25 -0
- fairspec_metadata/models/foreign_key.py +22 -0
- fairspec_metadata/models/integrity.py +17 -0
- fairspec_metadata/models/json_schema.py +5 -0
- fairspec_metadata/models/path.py +16 -0
- fairspec_metadata/models/profile.py +27 -0
- fairspec_metadata/models/report.py +13 -0
- fairspec_metadata/models/resource.py +49 -0
- fairspec_metadata/models/table_schema.py +79 -0
- fairspec_metadata/models/unique_key.py +13 -0
- fairspec_metadata/plugin.py +56 -0
- fairspec_metadata/profiles/catalog.json +27 -0
- fairspec_metadata/profiles/data-schema.json +23 -0
- fairspec_metadata/profiles/dataset.json +710 -0
- fairspec_metadata/profiles/file-dialect.json +216 -0
- fairspec_metadata/profiles/table-schema.json +715 -0
- fairspec_metadata/py.typed +0 -0
- fairspec_metadata/settings.py +1 -0
- fairspec_metadata-0.0.0.dev0.dist-info/METADATA +21 -0
- fairspec_metadata-0.0.0.dev0.dist-info/RECORD +174 -0
- fairspec_metadata-0.0.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from .create import create_column_from_property
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TestCreateColumnFromProperty:
|
|
5
|
+
def test_creates_string_column(self):
|
|
6
|
+
column = create_column_from_property("name", {"type": "string"})
|
|
7
|
+
assert column.type == "string"
|
|
8
|
+
assert column.nullable is None
|
|
9
|
+
|
|
10
|
+
def test_creates_nullable_column_for_type_null(self):
|
|
11
|
+
column = create_column_from_property("name", {"type": ["string", "null"]})
|
|
12
|
+
assert column.type == "string"
|
|
13
|
+
assert column.nullable is True
|
|
14
|
+
|
|
15
|
+
def test_creates_nullable_column_for_null_type(self):
|
|
16
|
+
column = create_column_from_property("name", {"type": ["null", "string"]})
|
|
17
|
+
assert column.type == "string"
|
|
18
|
+
assert column.nullable is True
|
|
19
|
+
|
|
20
|
+
def test_creates_nullable_date_column(self):
|
|
21
|
+
column = create_column_from_property(
|
|
22
|
+
"created", {"type": ["string", "null"], "format": "date"}
|
|
23
|
+
)
|
|
24
|
+
assert column.type == "date"
|
|
25
|
+
assert column.nullable is True
|
|
26
|
+
|
|
27
|
+
def test_creates_integer_column(self):
|
|
28
|
+
column = create_column_from_property("id", {"type": "integer"})
|
|
29
|
+
assert column.type == "integer"
|
|
30
|
+
|
|
31
|
+
def test_creates_boolean_column(self):
|
|
32
|
+
column = create_column_from_property("flag", {"type": "boolean"})
|
|
33
|
+
assert column.type == "boolean"
|
|
34
|
+
|
|
35
|
+
def test_creates_number_column(self):
|
|
36
|
+
column = create_column_from_property("value", {"type": "number"})
|
|
37
|
+
assert column.type == "number"
|
|
38
|
+
|
|
39
|
+
def test_creates_array_column(self):
|
|
40
|
+
column = create_column_from_property("items", {"type": "array"})
|
|
41
|
+
assert column.type == "array"
|
|
42
|
+
|
|
43
|
+
def test_creates_object_column(self):
|
|
44
|
+
column = create_column_from_property("meta", {"type": "object"})
|
|
45
|
+
assert column.type == "object"
|
|
46
|
+
|
|
47
|
+
def test_creates_geojson_column(self):
|
|
48
|
+
column = create_column_from_property(
|
|
49
|
+
"geo", {"type": "object", "format": "geojson"}
|
|
50
|
+
)
|
|
51
|
+
assert column.type == "geojson"
|
|
52
|
+
|
|
53
|
+
def test_creates_topojson_column(self):
|
|
54
|
+
column = create_column_from_property(
|
|
55
|
+
"topo", {"type": "object", "format": "topojson"}
|
|
56
|
+
)
|
|
57
|
+
assert column.type == "topojson"
|
|
58
|
+
|
|
59
|
+
def test_creates_categorical_column_from_string(self):
|
|
60
|
+
column = create_column_from_property(
|
|
61
|
+
"cat", {"type": "string", "format": "categorical"}
|
|
62
|
+
)
|
|
63
|
+
assert column.type == "categorical"
|
|
64
|
+
|
|
65
|
+
def test_creates_categorical_column_from_integer(self):
|
|
66
|
+
column = create_column_from_property(
|
|
67
|
+
"cat", {"type": "integer", "format": "categorical"}
|
|
68
|
+
)
|
|
69
|
+
assert column.type == "categorical"
|
|
70
|
+
|
|
71
|
+
def test_creates_unknown_column_for_none_type(self):
|
|
72
|
+
column = create_column_from_property("x", {})
|
|
73
|
+
assert column.type == "unknown"
|
|
74
|
+
|
|
75
|
+
def test_creates_email_column(self):
|
|
76
|
+
column = create_column_from_property(
|
|
77
|
+
"email", {"type": "string", "format": "email"}
|
|
78
|
+
)
|
|
79
|
+
assert column.type == "email"
|
|
80
|
+
|
|
81
|
+
def test_creates_url_column(self):
|
|
82
|
+
column = create_column_from_property("url", {"type": "string", "format": "url"})
|
|
83
|
+
assert column.type == "url"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata.models.column.column import Column, ColumnProperty
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_base_property_type(type: str | Sequence[str] | None) -> str | None:
|
|
9
|
+
if type is None:
|
|
10
|
+
return None
|
|
11
|
+
if isinstance(type, str):
|
|
12
|
+
return type
|
|
13
|
+
for t in type:
|
|
14
|
+
if t != "null":
|
|
15
|
+
return t
|
|
16
|
+
return "null"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_is_nullable_property_type(type: str | Sequence[str] | None) -> bool:
|
|
20
|
+
if type is None:
|
|
21
|
+
return False
|
|
22
|
+
if isinstance(type, str):
|
|
23
|
+
return False
|
|
24
|
+
return "null" in type
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_column_properties(columns: list[Column]) -> dict[str, ColumnProperty]:
|
|
28
|
+
return {column.name: column.property for column in columns}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from .property import get_base_property_type, get_is_nullable_property_type
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TestGetBasePropertyType:
|
|
5
|
+
def test_returns_type_for_string(self):
|
|
6
|
+
assert get_base_property_type("string") == "string"
|
|
7
|
+
|
|
8
|
+
def test_returns_base_type_for_type_null(self):
|
|
9
|
+
assert get_base_property_type(["string", "null"]) == "string"
|
|
10
|
+
|
|
11
|
+
def test_returns_base_type_for_null_type(self):
|
|
12
|
+
assert get_base_property_type(["null", "string"]) == "string"
|
|
13
|
+
|
|
14
|
+
def test_returns_none_for_none(self):
|
|
15
|
+
assert get_base_property_type(None) is None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestGetIsNullablePropertyType:
|
|
19
|
+
def test_returns_false_for_string(self):
|
|
20
|
+
assert get_is_nullable_property_type("string") is False
|
|
21
|
+
|
|
22
|
+
def test_returns_true_for_type_null(self):
|
|
23
|
+
assert get_is_nullable_property_type(["string", "null"]) is True
|
|
24
|
+
|
|
25
|
+
def test_returns_true_for_null_type(self):
|
|
26
|
+
assert get_is_nullable_property_type(["null", "string"]) is True
|
|
27
|
+
|
|
28
|
+
def test_returns_false_for_none(self):
|
|
29
|
+
assert get_is_nullable_property_type(None) is False
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.models.data_schema import DataSchema
|
|
4
|
+
from fairspec_metadata.models.descriptor import Descriptor
|
|
5
|
+
from fairspec_metadata.models.exception import FairspecException
|
|
6
|
+
|
|
7
|
+
from .validate import validate_data_schema
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def assert_data_schema(source: Descriptor) -> DataSchema:
|
|
11
|
+
result = validate_data_schema(source)
|
|
12
|
+
|
|
13
|
+
if not result.data_schema:
|
|
14
|
+
raise FairspecException("Invalid Data Schema", report=result)
|
|
15
|
+
|
|
16
|
+
return result.data_schema
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.descriptor.load import load_descriptor
|
|
4
|
+
from fairspec_metadata.models.data_schema import DataSchema
|
|
5
|
+
|
|
6
|
+
from .assert_ import assert_data_schema
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_data_schema(path: str) -> DataSchema:
|
|
10
|
+
descriptor = load_descriptor(path)
|
|
11
|
+
return assert_data_schema(descriptor)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.json_schema.load import load_json_schema
|
|
4
|
+
from fairspec_metadata.models.data_schema import DataSchema
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def resolve_data_schema(
|
|
8
|
+
data_schema: DataSchema | str | None = None,
|
|
9
|
+
) -> DataSchema | None:
|
|
10
|
+
if data_schema is None:
|
|
11
|
+
return None
|
|
12
|
+
|
|
13
|
+
if not isinstance(data_schema, str):
|
|
14
|
+
return data_schema
|
|
15
|
+
|
|
16
|
+
return load_json_schema(data_schema)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.descriptor.copy import copy_descriptor
|
|
4
|
+
from fairspec_metadata.actions.descriptor.save import save_descriptor
|
|
5
|
+
from fairspec_metadata.models.data_schema import DataSchema
|
|
6
|
+
from fairspec_metadata.settings import FAIRSPEC_VERSION
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def save_data_schema(
|
|
10
|
+
data_schema: DataSchema,
|
|
11
|
+
*,
|
|
12
|
+
path: str,
|
|
13
|
+
overwrite: bool = False,
|
|
14
|
+
) -> None:
|
|
15
|
+
descriptor = copy_descriptor(data_schema)
|
|
16
|
+
|
|
17
|
+
if "$schema" not in descriptor:
|
|
18
|
+
descriptor["$schema"] = (
|
|
19
|
+
f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/data-schema.json"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
save_descriptor(descriptor, path=path, overwrite=overwrite)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.descriptor.load import load_descriptor
|
|
4
|
+
from fairspec_metadata.actions.descriptor.validate import validate_descriptor
|
|
5
|
+
from fairspec_metadata.actions.profile.load import load_profile
|
|
6
|
+
from fairspec_metadata.models.data_schema import DataSchema
|
|
7
|
+
from fairspec_metadata.models.descriptor import Descriptor
|
|
8
|
+
from fairspec_metadata.models.profile import ProfileType
|
|
9
|
+
from fairspec_metadata.models.report import Report
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataSchemaValidationResult(Report):
|
|
13
|
+
data_schema: DataSchema | None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def validate_data_schema(
|
|
17
|
+
source: Descriptor | str,
|
|
18
|
+
*,
|
|
19
|
+
root_json_pointer: str | None = None,
|
|
20
|
+
) -> DataSchemaValidationResult:
|
|
21
|
+
descriptor = load_descriptor(source) if isinstance(source, str) else source
|
|
22
|
+
|
|
23
|
+
schema = descriptor.get("$schema")
|
|
24
|
+
schema_url = (
|
|
25
|
+
schema
|
|
26
|
+
if isinstance(schema, str)
|
|
27
|
+
else "https://fairspec.org/profiles/latest/data-schema.json"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
profile = load_profile(schema_url, profile_type=ProfileType.data_schema)
|
|
31
|
+
|
|
32
|
+
report = validate_descriptor(
|
|
33
|
+
descriptor,
|
|
34
|
+
profile=profile,
|
|
35
|
+
root_json_pointer=root_json_pointer,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
data_schema: DataSchema | None = None
|
|
39
|
+
if report.valid:
|
|
40
|
+
data_schema = descriptor
|
|
41
|
+
|
|
42
|
+
return DataSchemaValidationResult(
|
|
43
|
+
valid=report.valid,
|
|
44
|
+
errors=report.errors,
|
|
45
|
+
data_schema=data_schema,
|
|
46
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
4
|
+
from fairspec_metadata.models.descriptor import Descriptor
|
|
5
|
+
from fairspec_metadata.models.exception import FairspecException
|
|
6
|
+
|
|
7
|
+
from .validate import validate_dataset_descriptor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def assert_dataset(source: Descriptor, *, basepath: str | None = None) -> Dataset:
|
|
11
|
+
result = validate_dataset_descriptor(source, basepath=basepath)
|
|
12
|
+
|
|
13
|
+
if not result.dataset:
|
|
14
|
+
raise FairspecException("Invalid Dataset", report=result)
|
|
15
|
+
|
|
16
|
+
return result.dataset
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.resource.denormalize import denormalize_resource
|
|
4
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def denormalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset:
|
|
8
|
+
result = dataset.model_dump(by_alias=True, exclude_none=True)
|
|
9
|
+
|
|
10
|
+
if dataset.resources:
|
|
11
|
+
result["resources"] = [
|
|
12
|
+
denormalize_resource(resource, basepath=basepath)
|
|
13
|
+
for resource in dataset.resources
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
return Dataset(**result)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://fairspec.org/profiles/latest/dataset.json",
|
|
3
|
+
"titles": [
|
|
4
|
+
{
|
|
5
|
+
"title": "title"
|
|
6
|
+
}
|
|
7
|
+
],
|
|
8
|
+
"creators": [
|
|
9
|
+
{
|
|
10
|
+
"name": "title"
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"publisher": {
|
|
14
|
+
"name": "publisher"
|
|
15
|
+
},
|
|
16
|
+
"publicationYear": "2017",
|
|
17
|
+
"subjects": [
|
|
18
|
+
{
|
|
19
|
+
"subject": "keyword1"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"subject": "keyword2"
|
|
23
|
+
}
|
|
24
|
+
],
|
|
25
|
+
"contributors": [
|
|
26
|
+
{
|
|
27
|
+
"name": "title",
|
|
28
|
+
"contributorType": "ContactPerson"
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"dates": [
|
|
32
|
+
{
|
|
33
|
+
"date": "2017-01-01",
|
|
34
|
+
"dateType": "Created"
|
|
35
|
+
}
|
|
36
|
+
],
|
|
37
|
+
"version": "1.0",
|
|
38
|
+
"rightsList": [
|
|
39
|
+
{
|
|
40
|
+
"rights": "MIT"
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"descriptions": [
|
|
44
|
+
{
|
|
45
|
+
"description": "description",
|
|
46
|
+
"descriptionType": "Abstract"
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"relatedIdentifiers": [
|
|
50
|
+
{
|
|
51
|
+
"relatedIdentifier": "http://example.com",
|
|
52
|
+
"relatedIdentifierType": "URL",
|
|
53
|
+
"relationType": "IsDescribedBy"
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
"resources": [
|
|
57
|
+
{
|
|
58
|
+
"name": "name",
|
|
59
|
+
"data": "table.csv",
|
|
60
|
+
"fileDialect": {
|
|
61
|
+
"format": "csv"
|
|
62
|
+
},
|
|
63
|
+
"tableSchema": "schema.json"
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.descriptor.load import load_descriptor
|
|
4
|
+
from fairspec_metadata.actions.path.basepath import resolve_basepath
|
|
5
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
6
|
+
|
|
7
|
+
from .assert_ import assert_dataset
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_dataset_descriptor(path: str) -> Dataset:
|
|
11
|
+
basepath = resolve_basepath(path)
|
|
12
|
+
descriptor = load_descriptor(path)
|
|
13
|
+
return assert_dataset(descriptor, basepath=basepath)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.resource.normalize import normalize_resource
|
|
4
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def normalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset:
|
|
8
|
+
result = dataset.model_dump(by_alias=True, exclude_none=True)
|
|
9
|
+
|
|
10
|
+
if dataset.resources:
|
|
11
|
+
result["resources"] = [
|
|
12
|
+
normalize_resource(resource, basepath=basepath)
|
|
13
|
+
for resource in dataset.resources
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
return Dataset(**result)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata.actions.descriptor.save import save_descriptor
|
|
6
|
+
from fairspec_metadata.actions.path.basepath import get_basepath
|
|
7
|
+
from fairspec_metadata.settings import FAIRSPEC_VERSION
|
|
8
|
+
|
|
9
|
+
from .denormalize import denormalize_dataset
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def save_dataset_descriptor(
|
|
16
|
+
dataset: Dataset,
|
|
17
|
+
*,
|
|
18
|
+
path: str,
|
|
19
|
+
overwrite: bool = False,
|
|
20
|
+
) -> None:
|
|
21
|
+
basepath = get_basepath(path)
|
|
22
|
+
denormalized = denormalize_dataset(dataset, basepath=basepath)
|
|
23
|
+
descriptor = denormalized.model_dump(by_alias=True, exclude_none=True)
|
|
24
|
+
|
|
25
|
+
if "$schema" not in descriptor:
|
|
26
|
+
descriptor["$schema"] = (
|
|
27
|
+
f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
save_descriptor(descriptor, path=path, overwrite=overwrite)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata.models.datacite.creator import Creator
|
|
6
|
+
from fairspec_metadata.models.datacite.title import Title
|
|
7
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
8
|
+
from fairspec_metadata.models.resource import Resource
|
|
9
|
+
from fairspec_metadata.settings import FAIRSPEC_VERSION
|
|
10
|
+
|
|
11
|
+
from .save import save_dataset_descriptor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestSaveDatasetDescriptor:
|
|
15
|
+
def test_saves_dataset(self, tmp_path):
|
|
16
|
+
path = str(tmp_path / "dataset.json")
|
|
17
|
+
dataset = Dataset(
|
|
18
|
+
creators=[Creator(name="Test Creator")],
|
|
19
|
+
titles=[Title(title="Test Dataset")],
|
|
20
|
+
resources=[
|
|
21
|
+
Resource(name="test_resource", data=str(tmp_path / "data.csv")),
|
|
22
|
+
],
|
|
23
|
+
)
|
|
24
|
+
save_dataset_descriptor(dataset, path=path)
|
|
25
|
+
with open(path, encoding="utf-8") as f:
|
|
26
|
+
content = json.load(f)
|
|
27
|
+
assert content["$schema"].endswith("dataset.json")
|
|
28
|
+
assert content["creators"][0]["name"] == "Test Creator"
|
|
29
|
+
assert content["resources"][0]["name"] == "test_resource"
|
|
30
|
+
|
|
31
|
+
def test_sets_default_schema(self, tmp_path):
|
|
32
|
+
path = str(tmp_path / "dataset.json")
|
|
33
|
+
dataset = Dataset(
|
|
34
|
+
resources=[
|
|
35
|
+
Resource(data=str(tmp_path / "data.csv")),
|
|
36
|
+
],
|
|
37
|
+
)
|
|
38
|
+
save_dataset_descriptor(dataset, path=path)
|
|
39
|
+
with open(path, encoding="utf-8") as f:
|
|
40
|
+
content = json.load(f)
|
|
41
|
+
expected = f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json"
|
|
42
|
+
assert content["$schema"] == expected
|
|
43
|
+
|
|
44
|
+
def test_preserves_custom_schema(self, tmp_path):
|
|
45
|
+
path = str(tmp_path / "dataset.json")
|
|
46
|
+
dataset = Dataset(
|
|
47
|
+
profile="https://custom.schema.url/dataset.json",
|
|
48
|
+
resources=[
|
|
49
|
+
Resource(data=str(tmp_path / "data.csv")),
|
|
50
|
+
],
|
|
51
|
+
)
|
|
52
|
+
save_dataset_descriptor(dataset, path=path)
|
|
53
|
+
with open(path, encoding="utf-8") as f:
|
|
54
|
+
content = json.load(f)
|
|
55
|
+
assert content["$schema"] == "https://custom.schema.url/dataset.json"
|
|
56
|
+
|
|
57
|
+
def test_throws_when_file_exists(self, tmp_path):
|
|
58
|
+
path = str(tmp_path / "dataset.json")
|
|
59
|
+
dataset = Dataset(resources=[Resource(data=str(tmp_path / "data.csv"))])
|
|
60
|
+
save_dataset_descriptor(dataset, path=path)
|
|
61
|
+
with pytest.raises(FileExistsError):
|
|
62
|
+
save_dataset_descriptor(dataset, path=path)
|
|
63
|
+
|
|
64
|
+
def test_overwrites_when_flag_set(self, tmp_path):
|
|
65
|
+
path = str(tmp_path / "dataset.json")
|
|
66
|
+
dataset1 = Dataset(
|
|
67
|
+
creators=[Creator(name="Initial")],
|
|
68
|
+
resources=[Resource(data=str(tmp_path / "data.csv"))],
|
|
69
|
+
)
|
|
70
|
+
dataset2 = Dataset(
|
|
71
|
+
creators=[Creator(name="Updated")],
|
|
72
|
+
resources=[Resource(data=str(tmp_path / "data.csv"))],
|
|
73
|
+
)
|
|
74
|
+
save_dataset_descriptor(dataset1, path=path)
|
|
75
|
+
save_dataset_descriptor(dataset2, path=path, overwrite=True)
|
|
76
|
+
with open(path, encoding="utf-8") as f:
|
|
77
|
+
content = json.load(f)
|
|
78
|
+
assert content["creators"][0]["name"] == "Updated"
|
|
79
|
+
|
|
80
|
+
def test_saves_to_nested_directory(self, tmp_path):
|
|
81
|
+
path = str(tmp_path / "nested" / "dir" / "dataset.json")
|
|
82
|
+
dataset = Dataset(
|
|
83
|
+
resources=[
|
|
84
|
+
Resource(data=str(tmp_path / "nested" / "dir" / "data.csv")),
|
|
85
|
+
],
|
|
86
|
+
)
|
|
87
|
+
save_dataset_descriptor(dataset, path=path)
|
|
88
|
+
with open(path, encoding="utf-8") as f:
|
|
89
|
+
content = json.load(f)
|
|
90
|
+
assert "resources" in content
|
|
91
|
+
|
|
92
|
+
def test_denormalizes_resource_paths(self, tmp_path):
|
|
93
|
+
path = str(tmp_path / "dataset.json")
|
|
94
|
+
dataset = Dataset(
|
|
95
|
+
resources=[
|
|
96
|
+
Resource(name="test", data=str(tmp_path / "data.csv")),
|
|
97
|
+
],
|
|
98
|
+
)
|
|
99
|
+
save_dataset_descriptor(dataset, path=path)
|
|
100
|
+
with open(path, encoding="utf-8") as f:
|
|
101
|
+
content = json.load(f)
|
|
102
|
+
assert content["resources"][0]["data"] == "data.csv"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata.actions.data_schema.validate import validate_data_schema
|
|
4
|
+
from fairspec_metadata.actions.descriptor.load import load_descriptor
|
|
5
|
+
from fairspec_metadata.actions.descriptor.validate import validate_descriptor
|
|
6
|
+
from fairspec_metadata.actions.file_dialect.validate import validate_file_dialect
|
|
7
|
+
from fairspec_metadata.actions.profile.load import load_profile
|
|
8
|
+
from fairspec_metadata.actions.table_schema.validate import validate_table_schema
|
|
9
|
+
from fairspec_metadata.models.dataset import Dataset
|
|
10
|
+
from fairspec_metadata.models.descriptor import Descriptor
|
|
11
|
+
from fairspec_metadata.models.profile import ProfileType
|
|
12
|
+
from fairspec_metadata.models.report import Report
|
|
13
|
+
|
|
14
|
+
from .normalize import normalize_dataset
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DatasetValidationResult(Report):
|
|
18
|
+
dataset: Dataset | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def validate_dataset_descriptor(
|
|
22
|
+
source: Descriptor | str,
|
|
23
|
+
*,
|
|
24
|
+
basepath: str | None = None,
|
|
25
|
+
) -> DatasetValidationResult:
|
|
26
|
+
descriptor = load_descriptor(source) if isinstance(source, str) else source
|
|
27
|
+
|
|
28
|
+
schema = descriptor.get("$schema")
|
|
29
|
+
schema_url = (
|
|
30
|
+
schema
|
|
31
|
+
if isinstance(schema, str)
|
|
32
|
+
else "https://fairspec.org/profiles/latest/dataset.json"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
profile = load_profile(schema_url, profile_type=ProfileType.dataset)
|
|
36
|
+
|
|
37
|
+
report = validate_descriptor(descriptor, profile=profile)
|
|
38
|
+
|
|
39
|
+
normalized: Dataset | None = None
|
|
40
|
+
if report.valid:
|
|
41
|
+
# Valid -> we can cast
|
|
42
|
+
normalized = normalize_dataset(Dataset(**descriptor), basepath=basepath)
|
|
43
|
+
|
|
44
|
+
if normalized:
|
|
45
|
+
for index, resource in enumerate(normalized.resources or []):
|
|
46
|
+
root_json_pointer = f"/resources/{index}"
|
|
47
|
+
|
|
48
|
+
if isinstance(resource.fileDialect, str):
|
|
49
|
+
file_dialect_result = validate_file_dialect(
|
|
50
|
+
resource.fileDialect,
|
|
51
|
+
root_json_pointer=root_json_pointer,
|
|
52
|
+
)
|
|
53
|
+
report.errors.extend(file_dialect_result.errors)
|
|
54
|
+
|
|
55
|
+
if isinstance(resource.dataSchema, str):
|
|
56
|
+
data_schema_result = validate_data_schema(
|
|
57
|
+
resource.dataSchema,
|
|
58
|
+
root_json_pointer=root_json_pointer,
|
|
59
|
+
)
|
|
60
|
+
report.errors.extend(data_schema_result.errors)
|
|
61
|
+
|
|
62
|
+
if isinstance(resource.tableSchema, str):
|
|
63
|
+
table_schema_result = validate_table_schema(
|
|
64
|
+
resource.tableSchema,
|
|
65
|
+
root_json_pointer=root_json_pointer,
|
|
66
|
+
)
|
|
67
|
+
report.errors.extend(table_schema_result.errors)
|
|
68
|
+
|
|
69
|
+
if report.errors:
|
|
70
|
+
normalized = None
|
|
71
|
+
report.valid = False
|
|
72
|
+
|
|
73
|
+
return DatasetValidationResult(
|
|
74
|
+
valid=report.valid,
|
|
75
|
+
errors=report.errors,
|
|
76
|
+
dataset=normalized,
|
|
77
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .validate import validate_dataset_descriptor
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TestValidateDatasetDescriptor:
|
|
5
|
+
def test_valid_dataset(self):
|
|
6
|
+
dataset = {"resources": [{"data": "data.csv"}]}
|
|
7
|
+
result = validate_dataset_descriptor(dataset)
|
|
8
|
+
assert result.valid is True
|
|
9
|
+
assert result.errors == []
|
|
10
|
+
|
|
11
|
+
def test_invalid_dataset(self):
|
|
12
|
+
dataset = {"resources": "not-an-array"}
|
|
13
|
+
result = validate_dataset_descriptor(dataset)
|
|
14
|
+
assert result.valid is False
|
|
15
|
+
assert len(result.errors) > 0
|
|
16
|
+
|
|
17
|
+
def test_missing_schema_is_valid(self):
|
|
18
|
+
dataset = {"resources": [{"data": "data.csv"}]}
|
|
19
|
+
result = validate_dataset_descriptor(dataset)
|
|
20
|
+
assert result.valid is True
|
|
21
|
+
|
|
22
|
+
def test_dataset_with_datacite(self):
|
|
23
|
+
dataset = {
|
|
24
|
+
"creators": [{"name": "John Doe"}],
|
|
25
|
+
"titles": [{"title": "Example Dataset"}],
|
|
26
|
+
"resources": [{"data": "data.csv"}],
|
|
27
|
+
}
|
|
28
|
+
result = validate_dataset_descriptor(dataset)
|
|
29
|
+
assert result.valid is True
|
|
30
|
+
assert result.errors == []
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"fields": [{"name": "id", "type": "integer"}, {"name": "name", "type": "string"}]}
|