fairspec-metadata 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. fairspec_metadata/__init__.py +504 -0
  2. fairspec_metadata/actions/column/create.py +175 -0
  3. fairspec_metadata/actions/column/create_spec.py +83 -0
  4. fairspec_metadata/actions/column/property.py +28 -0
  5. fairspec_metadata/actions/column/property_spec.py +29 -0
  6. fairspec_metadata/actions/data_schema/assert_.py +16 -0
  7. fairspec_metadata/actions/data_schema/load.py +11 -0
  8. fairspec_metadata/actions/data_schema/resolve.py +16 -0
  9. fairspec_metadata/actions/data_schema/save.py +22 -0
  10. fairspec_metadata/actions/data_schema/validate.py +46 -0
  11. fairspec_metadata/actions/dataset/assert_.py +16 -0
  12. fairspec_metadata/actions/dataset/denormalize.py +16 -0
  13. fairspec_metadata/actions/dataset/fixtures/dataset-invalid.json +8 -0
  14. fairspec_metadata/actions/dataset/fixtures/dataset.json +66 -0
  15. fairspec_metadata/actions/dataset/fixtures/schema.json +12 -0
  16. fairspec_metadata/actions/dataset/fixtures/table.csv +3 -0
  17. fairspec_metadata/actions/dataset/load.py +13 -0
  18. fairspec_metadata/actions/dataset/normalize.py +16 -0
  19. fairspec_metadata/actions/dataset/save.py +30 -0
  20. fairspec_metadata/actions/dataset/save_spec.py +102 -0
  21. fairspec_metadata/actions/dataset/validate.py +77 -0
  22. fairspec_metadata/actions/dataset/validate_spec.py +30 -0
  23. fairspec_metadata/actions/descriptor/copy.py +9 -0
  24. fairspec_metadata/actions/descriptor/fixtures/schema.json +1 -0
  25. fairspec_metadata/actions/descriptor/general.py +7 -0
  26. fairspec_metadata/actions/descriptor/load.py +47 -0
  27. fairspec_metadata/actions/descriptor/load_spec.py +40 -0
  28. fairspec_metadata/actions/descriptor/parse.py +16 -0
  29. fairspec_metadata/actions/descriptor/save.py +19 -0
  30. fairspec_metadata/actions/descriptor/save_spec.py +47 -0
  31. fairspec_metadata/actions/descriptor/stringify.py +9 -0
  32. fairspec_metadata/actions/descriptor/validate.py +31 -0
  33. fairspec_metadata/actions/file_dialect/assert_.py +20 -0
  34. fairspec_metadata/actions/file_dialect/infer.py +35 -0
  35. fairspec_metadata/actions/file_dialect/infer_spec.py +44 -0
  36. fairspec_metadata/actions/file_dialect/load.py +15 -0
  37. fairspec_metadata/actions/file_dialect/resolve.py +20 -0
  38. fairspec_metadata/actions/file_dialect/save.py +21 -0
  39. fairspec_metadata/actions/file_dialect/support.py +41 -0
  40. fairspec_metadata/actions/file_dialect/validate.py +49 -0
  41. fairspec_metadata/actions/json/inspect.py +59 -0
  42. fairspec_metadata/actions/json/inspect_spec.py +49 -0
  43. fairspec_metadata/actions/json_schema/assert_.py +21 -0
  44. fairspec_metadata/actions/json_schema/inspect.py +43 -0
  45. fairspec_metadata/actions/json_schema/inspect_spec.py +68 -0
  46. fairspec_metadata/actions/json_schema/load.py +23 -0
  47. fairspec_metadata/actions/json_schema/resolve.py +16 -0
  48. fairspec_metadata/actions/json_schema/save.py +13 -0
  49. fairspec_metadata/actions/path/basepath.py +37 -0
  50. fairspec_metadata/actions/path/basepath_spec.py +59 -0
  51. fairspec_metadata/actions/path/denormalize.py +43 -0
  52. fairspec_metadata/actions/path/denormalize_spec.py +56 -0
  53. fairspec_metadata/actions/path/general.py +71 -0
  54. fairspec_metadata/actions/path/general_spec.py +167 -0
  55. fairspec_metadata/actions/path/normalize.py +55 -0
  56. fairspec_metadata/actions/path/normalize_spec.py +76 -0
  57. fairspec_metadata/actions/profile/assert_.py +37 -0
  58. fairspec_metadata/actions/profile/assert_spec.py +89 -0
  59. fairspec_metadata/actions/profile/load.py +19 -0
  60. fairspec_metadata/actions/profile/registry.py +51 -0
  61. fairspec_metadata/actions/report/create.py +14 -0
  62. fairspec_metadata/actions/report/create_spec.py +28 -0
  63. fairspec_metadata/actions/resource/data.py +50 -0
  64. fairspec_metadata/actions/resource/data_spec.py +64 -0
  65. fairspec_metadata/actions/resource/denormalize.py +30 -0
  66. fairspec_metadata/actions/resource/general.py +19 -0
  67. fairspec_metadata/actions/resource/infer.py +23 -0
  68. fairspec_metadata/actions/resource/infer_spec.py +33 -0
  69. fairspec_metadata/actions/resource/normalize.py +30 -0
  70. fairspec_metadata/actions/table_schema/assert_.py +16 -0
  71. fairspec_metadata/actions/table_schema/column.py +24 -0
  72. fairspec_metadata/actions/table_schema/column_spec.py +55 -0
  73. fairspec_metadata/actions/table_schema/load.py +11 -0
  74. fairspec_metadata/actions/table_schema/resolve.py +17 -0
  75. fairspec_metadata/actions/table_schema/save.py +21 -0
  76. fairspec_metadata/actions/table_schema/validate.py +47 -0
  77. fairspec_metadata/actions/table_schema/validate_spec.py +45 -0
  78. fairspec_metadata/models/base.py +5 -0
  79. fairspec_metadata/models/catalog.py +20 -0
  80. fairspec_metadata/models/column/array.py +65 -0
  81. fairspec_metadata/models/column/base.py +41 -0
  82. fairspec_metadata/models/column/base64.py +15 -0
  83. fairspec_metadata/models/column/boolean.py +57 -0
  84. fairspec_metadata/models/column/categorical.py +52 -0
  85. fairspec_metadata/models/column/column.py +127 -0
  86. fairspec_metadata/models/column/date.py +21 -0
  87. fairspec_metadata/models/column/date_time.py +21 -0
  88. fairspec_metadata/models/column/decimal.py +54 -0
  89. fairspec_metadata/models/column/duration.py +15 -0
  90. fairspec_metadata/models/column/email.py +15 -0
  91. fairspec_metadata/models/column/geojson.py +15 -0
  92. fairspec_metadata/models/column/hex.py +15 -0
  93. fairspec_metadata/models/column/integer.py +83 -0
  94. fairspec_metadata/models/column/list.py +48 -0
  95. fairspec_metadata/models/column/number.py +90 -0
  96. fairspec_metadata/models/column/object.py +69 -0
  97. fairspec_metadata/models/column/string.py +66 -0
  98. fairspec_metadata/models/column/time.py +21 -0
  99. fairspec_metadata/models/column/topojson.py +15 -0
  100. fairspec_metadata/models/column/unknown.py +43 -0
  101. fairspec_metadata/models/column/url.py +15 -0
  102. fairspec_metadata/models/column/wkb.py +15 -0
  103. fairspec_metadata/models/column/wkt.py +15 -0
  104. fairspec_metadata/models/data.py +16 -0
  105. fairspec_metadata/models/data_schema.py +11 -0
  106. fairspec_metadata/models/datacite/alternate_identifier.py +24 -0
  107. fairspec_metadata/models/datacite/common.py +187 -0
  108. fairspec_metadata/models/datacite/content_type.py +17 -0
  109. fairspec_metadata/models/datacite/contributor.py +22 -0
  110. fairspec_metadata/models/datacite/creator.py +79 -0
  111. fairspec_metadata/models/datacite/datacite.py +117 -0
  112. fairspec_metadata/models/datacite/date.py +35 -0
  113. fairspec_metadata/models/datacite/description.py +30 -0
  114. fairspec_metadata/models/datacite/formats.py +12 -0
  115. fairspec_metadata/models/datacite/funding_reference.py +41 -0
  116. fairspec_metadata/models/datacite/geo_location.py +67 -0
  117. fairspec_metadata/models/datacite/identifier.py +29 -0
  118. fairspec_metadata/models/datacite/language.py +12 -0
  119. fairspec_metadata/models/datacite/publication_year.py +13 -0
  120. fairspec_metadata/models/datacite/publisher.py +27 -0
  121. fairspec_metadata/models/datacite/related_identifier.py +61 -0
  122. fairspec_metadata/models/datacite/related_item.py +96 -0
  123. fairspec_metadata/models/datacite/rights.py +40 -0
  124. fairspec_metadata/models/datacite/size.py +12 -0
  125. fairspec_metadata/models/datacite/subject.py +41 -0
  126. fairspec_metadata/models/datacite/title.py +30 -0
  127. fairspec_metadata/models/datacite/version.py +12 -0
  128. fairspec_metadata/models/dataset.py +34 -0
  129. fairspec_metadata/models/descriptor.py +5 -0
  130. fairspec_metadata/models/error/base.py +13 -0
  131. fairspec_metadata/models/error/cell.py +118 -0
  132. fairspec_metadata/models/error/column.py +28 -0
  133. fairspec_metadata/models/error/data.py +13 -0
  134. fairspec_metadata/models/error/error.py +11 -0
  135. fairspec_metadata/models/error/file.py +28 -0
  136. fairspec_metadata/models/error/foreign_key.py +18 -0
  137. fairspec_metadata/models/error/metadata.py +13 -0
  138. fairspec_metadata/models/error/resource.py +31 -0
  139. fairspec_metadata/models/error/row.py +29 -0
  140. fairspec_metadata/models/error/table.py +10 -0
  141. fairspec_metadata/models/exception.py +14 -0
  142. fairspec_metadata/models/file_dialect/arrow.py +9 -0
  143. fairspec_metadata/models/file_dialect/base.py +23 -0
  144. fairspec_metadata/models/file_dialect/common.py +24 -0
  145. fairspec_metadata/models/file_dialect/csv.py +29 -0
  146. fairspec_metadata/models/file_dialect/file_dialect.py +30 -0
  147. fairspec_metadata/models/file_dialect/json.py +25 -0
  148. fairspec_metadata/models/file_dialect/jsonl.py +23 -0
  149. fairspec_metadata/models/file_dialect/ods.py +25 -0
  150. fairspec_metadata/models/file_dialect/parquet.py +9 -0
  151. fairspec_metadata/models/file_dialect/sqlite.py +11 -0
  152. fairspec_metadata/models/file_dialect/tsv.py +25 -0
  153. fairspec_metadata/models/file_dialect/unknown.py +7 -0
  154. fairspec_metadata/models/file_dialect/xlsx.py +25 -0
  155. fairspec_metadata/models/foreign_key.py +22 -0
  156. fairspec_metadata/models/integrity.py +17 -0
  157. fairspec_metadata/models/json_schema.py +5 -0
  158. fairspec_metadata/models/path.py +16 -0
  159. fairspec_metadata/models/profile.py +27 -0
  160. fairspec_metadata/models/report.py +13 -0
  161. fairspec_metadata/models/resource.py +49 -0
  162. fairspec_metadata/models/table_schema.py +79 -0
  163. fairspec_metadata/models/unique_key.py +13 -0
  164. fairspec_metadata/plugin.py +56 -0
  165. fairspec_metadata/profiles/catalog.json +27 -0
  166. fairspec_metadata/profiles/data-schema.json +23 -0
  167. fairspec_metadata/profiles/dataset.json +710 -0
  168. fairspec_metadata/profiles/file-dialect.json +216 -0
  169. fairspec_metadata/profiles/table-schema.json +715 -0
  170. fairspec_metadata/py.typed +0 -0
  171. fairspec_metadata/settings.py +1 -0
  172. fairspec_metadata-0.0.0.dev0.dist-info/METADATA +21 -0
  173. fairspec_metadata-0.0.0.dev0.dist-info/RECORD +174 -0
  174. fairspec_metadata-0.0.0.dev0.dist-info/WHEEL +4 -0
@@ -0,0 +1,83 @@
1
+ from .create import create_column_from_property
2
+
3
+
4
+ class TestCreateColumnFromProperty:
5
+ def test_creates_string_column(self):
6
+ column = create_column_from_property("name", {"type": "string"})
7
+ assert column.type == "string"
8
+ assert column.nullable is None
9
+
10
+ def test_creates_nullable_column_for_type_null(self):
11
+ column = create_column_from_property("name", {"type": ["string", "null"]})
12
+ assert column.type == "string"
13
+ assert column.nullable is True
14
+
15
+ def test_creates_nullable_column_for_null_type(self):
16
+ column = create_column_from_property("name", {"type": ["null", "string"]})
17
+ assert column.type == "string"
18
+ assert column.nullable is True
19
+
20
+ def test_creates_nullable_date_column(self):
21
+ column = create_column_from_property(
22
+ "created", {"type": ["string", "null"], "format": "date"}
23
+ )
24
+ assert column.type == "date"
25
+ assert column.nullable is True
26
+
27
+ def test_creates_integer_column(self):
28
+ column = create_column_from_property("id", {"type": "integer"})
29
+ assert column.type == "integer"
30
+
31
+ def test_creates_boolean_column(self):
32
+ column = create_column_from_property("flag", {"type": "boolean"})
33
+ assert column.type == "boolean"
34
+
35
+ def test_creates_number_column(self):
36
+ column = create_column_from_property("value", {"type": "number"})
37
+ assert column.type == "number"
38
+
39
+ def test_creates_array_column(self):
40
+ column = create_column_from_property("items", {"type": "array"})
41
+ assert column.type == "array"
42
+
43
+ def test_creates_object_column(self):
44
+ column = create_column_from_property("meta", {"type": "object"})
45
+ assert column.type == "object"
46
+
47
+ def test_creates_geojson_column(self):
48
+ column = create_column_from_property(
49
+ "geo", {"type": "object", "format": "geojson"}
50
+ )
51
+ assert column.type == "geojson"
52
+
53
+ def test_creates_topojson_column(self):
54
+ column = create_column_from_property(
55
+ "topo", {"type": "object", "format": "topojson"}
56
+ )
57
+ assert column.type == "topojson"
58
+
59
+ def test_creates_categorical_column_from_string(self):
60
+ column = create_column_from_property(
61
+ "cat", {"type": "string", "format": "categorical"}
62
+ )
63
+ assert column.type == "categorical"
64
+
65
+ def test_creates_categorical_column_from_integer(self):
66
+ column = create_column_from_property(
67
+ "cat", {"type": "integer", "format": "categorical"}
68
+ )
69
+ assert column.type == "categorical"
70
+
71
+ def test_creates_unknown_column_for_none_type(self):
72
+ column = create_column_from_property("x", {})
73
+ assert column.type == "unknown"
74
+
75
+ def test_creates_email_column(self):
76
+ column = create_column_from_property(
77
+ "email", {"type": "string", "format": "email"}
78
+ )
79
+ assert column.type == "email"
80
+
81
+ def test_creates_url_column(self):
82
+ column = create_column_from_property("url", {"type": "string", "format": "url"})
83
+ assert column.type == "url"
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+
5
+ from fairspec_metadata.models.column.column import Column, ColumnProperty
6
+
7
+
8
+ def get_base_property_type(type: str | Sequence[str] | None) -> str | None:
9
+ if type is None:
10
+ return None
11
+ if isinstance(type, str):
12
+ return type
13
+ for t in type:
14
+ if t != "null":
15
+ return t
16
+ return "null"
17
+
18
+
19
+ def get_is_nullable_property_type(type: str | Sequence[str] | None) -> bool:
20
+ if type is None:
21
+ return False
22
+ if isinstance(type, str):
23
+ return False
24
+ return "null" in type
25
+
26
+
27
+ def get_column_properties(columns: list[Column]) -> dict[str, ColumnProperty]:
28
+ return {column.name: column.property for column in columns}
@@ -0,0 +1,29 @@
1
+ from .property import get_base_property_type, get_is_nullable_property_type
2
+
3
+
4
+ class TestGetBasePropertyType:
5
+ def test_returns_type_for_string(self):
6
+ assert get_base_property_type("string") == "string"
7
+
8
+ def test_returns_base_type_for_type_null(self):
9
+ assert get_base_property_type(["string", "null"]) == "string"
10
+
11
+ def test_returns_base_type_for_null_type(self):
12
+ assert get_base_property_type(["null", "string"]) == "string"
13
+
14
+ def test_returns_none_for_none(self):
15
+ assert get_base_property_type(None) is None
16
+
17
+
18
+ class TestGetIsNullablePropertyType:
19
+ def test_returns_false_for_string(self):
20
+ assert get_is_nullable_property_type("string") is False
21
+
22
+ def test_returns_true_for_type_null(self):
23
+ assert get_is_nullable_property_type(["string", "null"]) is True
24
+
25
+ def test_returns_true_for_null_type(self):
26
+ assert get_is_nullable_property_type(["null", "string"]) is True
27
+
28
+ def test_returns_false_for_none(self):
29
+ assert get_is_nullable_property_type(None) is False
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.models.data_schema import DataSchema
4
+ from fairspec_metadata.models.descriptor import Descriptor
5
+ from fairspec_metadata.models.exception import FairspecException
6
+
7
+ from .validate import validate_data_schema
8
+
9
+
10
+ def assert_data_schema(source: Descriptor) -> DataSchema:
11
+ result = validate_data_schema(source)
12
+
13
+ if not result.data_schema:
14
+ raise FairspecException("Invalid Data Schema", report=result)
15
+
16
+ return result.data_schema
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.descriptor.load import load_descriptor
4
+ from fairspec_metadata.models.data_schema import DataSchema
5
+
6
+ from .assert_ import assert_data_schema
7
+
8
+
9
+ def load_data_schema(path: str) -> DataSchema:
10
+ descriptor = load_descriptor(path)
11
+ return assert_data_schema(descriptor)
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.json_schema.load import load_json_schema
4
+ from fairspec_metadata.models.data_schema import DataSchema
5
+
6
+
7
+ def resolve_data_schema(
8
+ data_schema: DataSchema | str | None = None,
9
+ ) -> DataSchema | None:
10
+ if data_schema is None:
11
+ return None
12
+
13
+ if not isinstance(data_schema, str):
14
+ return data_schema
15
+
16
+ return load_json_schema(data_schema)
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.descriptor.copy import copy_descriptor
4
+ from fairspec_metadata.actions.descriptor.save import save_descriptor
5
+ from fairspec_metadata.models.data_schema import DataSchema
6
+ from fairspec_metadata.settings import FAIRSPEC_VERSION
7
+
8
+
9
+ def save_data_schema(
10
+ data_schema: DataSchema,
11
+ *,
12
+ path: str,
13
+ overwrite: bool = False,
14
+ ) -> None:
15
+ descriptor = copy_descriptor(data_schema)
16
+
17
+ if "$schema" not in descriptor:
18
+ descriptor["$schema"] = (
19
+ f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/data-schema.json"
20
+ )
21
+
22
+ save_descriptor(descriptor, path=path, overwrite=overwrite)
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.descriptor.load import load_descriptor
4
+ from fairspec_metadata.actions.descriptor.validate import validate_descriptor
5
+ from fairspec_metadata.actions.profile.load import load_profile
6
+ from fairspec_metadata.models.data_schema import DataSchema
7
+ from fairspec_metadata.models.descriptor import Descriptor
8
+ from fairspec_metadata.models.profile import ProfileType
9
+ from fairspec_metadata.models.report import Report
10
+
11
+
12
+ class DataSchemaValidationResult(Report):
13
+ data_schema: DataSchema | None
14
+
15
+
16
+ def validate_data_schema(
17
+ source: Descriptor | str,
18
+ *,
19
+ root_json_pointer: str | None = None,
20
+ ) -> DataSchemaValidationResult:
21
+ descriptor = load_descriptor(source) if isinstance(source, str) else source
22
+
23
+ schema = descriptor.get("$schema")
24
+ schema_url = (
25
+ schema
26
+ if isinstance(schema, str)
27
+ else "https://fairspec.org/profiles/latest/data-schema.json"
28
+ )
29
+
30
+ profile = load_profile(schema_url, profile_type=ProfileType.data_schema)
31
+
32
+ report = validate_descriptor(
33
+ descriptor,
34
+ profile=profile,
35
+ root_json_pointer=root_json_pointer,
36
+ )
37
+
38
+ data_schema: DataSchema | None = None
39
+ if report.valid:
40
+ data_schema = descriptor
41
+
42
+ return DataSchemaValidationResult(
43
+ valid=report.valid,
44
+ errors=report.errors,
45
+ data_schema=data_schema,
46
+ )
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.models.dataset import Dataset
4
+ from fairspec_metadata.models.descriptor import Descriptor
5
+ from fairspec_metadata.models.exception import FairspecException
6
+
7
+ from .validate import validate_dataset_descriptor
8
+
9
+
10
+ def assert_dataset(source: Descriptor, *, basepath: str | None = None) -> Dataset:
11
+ result = validate_dataset_descriptor(source, basepath=basepath)
12
+
13
+ if not result.dataset:
14
+ raise FairspecException("Invalid Dataset", report=result)
15
+
16
+ return result.dataset
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.resource.denormalize import denormalize_resource
4
+ from fairspec_metadata.models.dataset import Dataset
5
+
6
+
7
+ def denormalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset:
8
+ result = dataset.model_dump(by_alias=True, exclude_none=True)
9
+
10
+ if dataset.resources:
11
+ result["resources"] = [
12
+ denormalize_resource(resource, basepath=basepath)
13
+ for resource in dataset.resources
14
+ ]
15
+
16
+ return Dataset(**result)
@@ -0,0 +1,8 @@
1
+ {
2
+ "resources": [
3
+ {
4
+ "name": 1,
5
+ "data": "table.csv"
6
+ }
7
+ ]
8
+ }
@@ -0,0 +1,66 @@
1
+ {
2
+ "$schema": "https://fairspec.org/profiles/latest/dataset.json",
3
+ "titles": [
4
+ {
5
+ "title": "title"
6
+ }
7
+ ],
8
+ "creators": [
9
+ {
10
+ "name": "title"
11
+ }
12
+ ],
13
+ "publisher": {
14
+ "name": "publisher"
15
+ },
16
+ "publicationYear": "2017",
17
+ "subjects": [
18
+ {
19
+ "subject": "keyword1"
20
+ },
21
+ {
22
+ "subject": "keyword2"
23
+ }
24
+ ],
25
+ "contributors": [
26
+ {
27
+ "name": "title",
28
+ "contributorType": "ContactPerson"
29
+ }
30
+ ],
31
+ "dates": [
32
+ {
33
+ "date": "2017-01-01",
34
+ "dateType": "Created"
35
+ }
36
+ ],
37
+ "version": "1.0",
38
+ "rightsList": [
39
+ {
40
+ "rights": "MIT"
41
+ }
42
+ ],
43
+ "descriptions": [
44
+ {
45
+ "description": "description",
46
+ "descriptionType": "Abstract"
47
+ }
48
+ ],
49
+ "relatedIdentifiers": [
50
+ {
51
+ "relatedIdentifier": "http://example.com",
52
+ "relatedIdentifierType": "URL",
53
+ "relationType": "IsDescribedBy"
54
+ }
55
+ ],
56
+ "resources": [
57
+ {
58
+ "name": "name",
59
+ "data": "table.csv",
60
+ "fileDialect": {
61
+ "format": "csv"
62
+ },
63
+ "tableSchema": "schema.json"
64
+ }
65
+ ]
66
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "$schema": "https://fairspec.org/profiles/latest/table-schema.json",
3
+ "required": ["id", "name"],
4
+ "properties": {
5
+ "id": {
6
+ "type": "integer"
7
+ },
8
+ "name": {
9
+ "type": "string"
10
+ }
11
+ }
12
+ }
@@ -0,0 +1,3 @@
1
+ id,name
2
+ 1,english
3
+ 2,中文
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.descriptor.load import load_descriptor
4
+ from fairspec_metadata.actions.path.basepath import resolve_basepath
5
+ from fairspec_metadata.models.dataset import Dataset
6
+
7
+ from .assert_ import assert_dataset
8
+
9
+
10
+ def load_dataset_descriptor(path: str) -> Dataset:
11
+ basepath = resolve_basepath(path)
12
+ descriptor = load_descriptor(path)
13
+ return assert_dataset(descriptor, basepath=basepath)
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.resource.normalize import normalize_resource
4
+ from fairspec_metadata.models.dataset import Dataset
5
+
6
+
7
+ def normalize_dataset(dataset: Dataset, *, basepath: str | None = None) -> Dataset:
8
+ result = dataset.model_dump(by_alias=True, exclude_none=True)
9
+
10
+ if dataset.resources:
11
+ result["resources"] = [
12
+ normalize_resource(resource, basepath=basepath)
13
+ for resource in dataset.resources
14
+ ]
15
+
16
+ return Dataset(**result)
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from fairspec_metadata.actions.descriptor.save import save_descriptor
6
+ from fairspec_metadata.actions.path.basepath import get_basepath
7
+ from fairspec_metadata.settings import FAIRSPEC_VERSION
8
+
9
+ from .denormalize import denormalize_dataset
10
+
11
+ if TYPE_CHECKING:
12
+ from fairspec_metadata.models.dataset import Dataset
13
+
14
+
15
+ def save_dataset_descriptor(
16
+ dataset: Dataset,
17
+ *,
18
+ path: str,
19
+ overwrite: bool = False,
20
+ ) -> None:
21
+ basepath = get_basepath(path)
22
+ denormalized = denormalize_dataset(dataset, basepath=basepath)
23
+ descriptor = denormalized.model_dump(by_alias=True, exclude_none=True)
24
+
25
+ if "$schema" not in descriptor:
26
+ descriptor["$schema"] = (
27
+ f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json"
28
+ )
29
+
30
+ save_descriptor(descriptor, path=path, overwrite=overwrite)
@@ -0,0 +1,102 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from fairspec_metadata.models.datacite.creator import Creator
6
+ from fairspec_metadata.models.datacite.title import Title
7
+ from fairspec_metadata.models.dataset import Dataset
8
+ from fairspec_metadata.models.resource import Resource
9
+ from fairspec_metadata.settings import FAIRSPEC_VERSION
10
+
11
+ from .save import save_dataset_descriptor
12
+
13
+
14
+ class TestSaveDatasetDescriptor:
15
+ def test_saves_dataset(self, tmp_path):
16
+ path = str(tmp_path / "dataset.json")
17
+ dataset = Dataset(
18
+ creators=[Creator(name="Test Creator")],
19
+ titles=[Title(title="Test Dataset")],
20
+ resources=[
21
+ Resource(name="test_resource", data=str(tmp_path / "data.csv")),
22
+ ],
23
+ )
24
+ save_dataset_descriptor(dataset, path=path)
25
+ with open(path, encoding="utf-8") as f:
26
+ content = json.load(f)
27
+ assert content["$schema"].endswith("dataset.json")
28
+ assert content["creators"][0]["name"] == "Test Creator"
29
+ assert content["resources"][0]["name"] == "test_resource"
30
+
31
+ def test_sets_default_schema(self, tmp_path):
32
+ path = str(tmp_path / "dataset.json")
33
+ dataset = Dataset(
34
+ resources=[
35
+ Resource(data=str(tmp_path / "data.csv")),
36
+ ],
37
+ )
38
+ save_dataset_descriptor(dataset, path=path)
39
+ with open(path, encoding="utf-8") as f:
40
+ content = json.load(f)
41
+ expected = f"https://fairspec.org/profiles/{FAIRSPEC_VERSION}/dataset.json"
42
+ assert content["$schema"] == expected
43
+
44
+ def test_preserves_custom_schema(self, tmp_path):
45
+ path = str(tmp_path / "dataset.json")
46
+ dataset = Dataset(
47
+ profile="https://custom.schema.url/dataset.json",
48
+ resources=[
49
+ Resource(data=str(tmp_path / "data.csv")),
50
+ ],
51
+ )
52
+ save_dataset_descriptor(dataset, path=path)
53
+ with open(path, encoding="utf-8") as f:
54
+ content = json.load(f)
55
+ assert content["$schema"] == "https://custom.schema.url/dataset.json"
56
+
57
+ def test_throws_when_file_exists(self, tmp_path):
58
+ path = str(tmp_path / "dataset.json")
59
+ dataset = Dataset(resources=[Resource(data=str(tmp_path / "data.csv"))])
60
+ save_dataset_descriptor(dataset, path=path)
61
+ with pytest.raises(FileExistsError):
62
+ save_dataset_descriptor(dataset, path=path)
63
+
64
+ def test_overwrites_when_flag_set(self, tmp_path):
65
+ path = str(tmp_path / "dataset.json")
66
+ dataset1 = Dataset(
67
+ creators=[Creator(name="Initial")],
68
+ resources=[Resource(data=str(tmp_path / "data.csv"))],
69
+ )
70
+ dataset2 = Dataset(
71
+ creators=[Creator(name="Updated")],
72
+ resources=[Resource(data=str(tmp_path / "data.csv"))],
73
+ )
74
+ save_dataset_descriptor(dataset1, path=path)
75
+ save_dataset_descriptor(dataset2, path=path, overwrite=True)
76
+ with open(path, encoding="utf-8") as f:
77
+ content = json.load(f)
78
+ assert content["creators"][0]["name"] == "Updated"
79
+
80
+ def test_saves_to_nested_directory(self, tmp_path):
81
+ path = str(tmp_path / "nested" / "dir" / "dataset.json")
82
+ dataset = Dataset(
83
+ resources=[
84
+ Resource(data=str(tmp_path / "nested" / "dir" / "data.csv")),
85
+ ],
86
+ )
87
+ save_dataset_descriptor(dataset, path=path)
88
+ with open(path, encoding="utf-8") as f:
89
+ content = json.load(f)
90
+ assert "resources" in content
91
+
92
+ def test_denormalizes_resource_paths(self, tmp_path):
93
+ path = str(tmp_path / "dataset.json")
94
+ dataset = Dataset(
95
+ resources=[
96
+ Resource(name="test", data=str(tmp_path / "data.csv")),
97
+ ],
98
+ )
99
+ save_dataset_descriptor(dataset, path=path)
100
+ with open(path, encoding="utf-8") as f:
101
+ content = json.load(f)
102
+ assert content["resources"][0]["data"] == "data.csv"
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata.actions.data_schema.validate import validate_data_schema
4
+ from fairspec_metadata.actions.descriptor.load import load_descriptor
5
+ from fairspec_metadata.actions.descriptor.validate import validate_descriptor
6
+ from fairspec_metadata.actions.file_dialect.validate import validate_file_dialect
7
+ from fairspec_metadata.actions.profile.load import load_profile
8
+ from fairspec_metadata.actions.table_schema.validate import validate_table_schema
9
+ from fairspec_metadata.models.dataset import Dataset
10
+ from fairspec_metadata.models.descriptor import Descriptor
11
+ from fairspec_metadata.models.profile import ProfileType
12
+ from fairspec_metadata.models.report import Report
13
+
14
+ from .normalize import normalize_dataset
15
+
16
+
17
+ class DatasetValidationResult(Report):
18
+ dataset: Dataset | None = None
19
+
20
+
21
+ def validate_dataset_descriptor(
22
+ source: Descriptor | str,
23
+ *,
24
+ basepath: str | None = None,
25
+ ) -> DatasetValidationResult:
26
+ descriptor = load_descriptor(source) if isinstance(source, str) else source
27
+
28
+ schema = descriptor.get("$schema")
29
+ schema_url = (
30
+ schema
31
+ if isinstance(schema, str)
32
+ else "https://fairspec.org/profiles/latest/dataset.json"
33
+ )
34
+
35
+ profile = load_profile(schema_url, profile_type=ProfileType.dataset)
36
+
37
+ report = validate_descriptor(descriptor, profile=profile)
38
+
39
+ normalized: Dataset | None = None
40
+ if report.valid:
41
+ # Valid -> we can cast
42
+ normalized = normalize_dataset(Dataset(**descriptor), basepath=basepath)
43
+
44
+ if normalized:
45
+ for index, resource in enumerate(normalized.resources or []):
46
+ root_json_pointer = f"/resources/{index}"
47
+
48
+ if isinstance(resource.fileDialect, str):
49
+ file_dialect_result = validate_file_dialect(
50
+ resource.fileDialect,
51
+ root_json_pointer=root_json_pointer,
52
+ )
53
+ report.errors.extend(file_dialect_result.errors)
54
+
55
+ if isinstance(resource.dataSchema, str):
56
+ data_schema_result = validate_data_schema(
57
+ resource.dataSchema,
58
+ root_json_pointer=root_json_pointer,
59
+ )
60
+ report.errors.extend(data_schema_result.errors)
61
+
62
+ if isinstance(resource.tableSchema, str):
63
+ table_schema_result = validate_table_schema(
64
+ resource.tableSchema,
65
+ root_json_pointer=root_json_pointer,
66
+ )
67
+ report.errors.extend(table_schema_result.errors)
68
+
69
+ if report.errors:
70
+ normalized = None
71
+ report.valid = False
72
+
73
+ return DatasetValidationResult(
74
+ valid=report.valid,
75
+ errors=report.errors,
76
+ dataset=normalized,
77
+ )
@@ -0,0 +1,30 @@
1
+ from .validate import validate_dataset_descriptor
2
+
3
+
4
+ class TestValidateDatasetDescriptor:
5
+ def test_valid_dataset(self):
6
+ dataset = {"resources": [{"data": "data.csv"}]}
7
+ result = validate_dataset_descriptor(dataset)
8
+ assert result.valid is True
9
+ assert result.errors == []
10
+
11
+ def test_invalid_dataset(self):
12
+ dataset = {"resources": "not-an-array"}
13
+ result = validate_dataset_descriptor(dataset)
14
+ assert result.valid is False
15
+ assert len(result.errors) > 0
16
+
17
+ def test_missing_schema_is_valid(self):
18
+ dataset = {"resources": [{"data": "data.csv"}]}
19
+ result = validate_dataset_descriptor(dataset)
20
+ assert result.valid is True
21
+
22
+ def test_dataset_with_datacite(self):
23
+ dataset = {
24
+ "creators": [{"name": "John Doe"}],
25
+ "titles": [{"title": "Example Dataset"}],
26
+ "resources": [{"data": "data.csv"}],
27
+ }
28
+ result = validate_dataset_descriptor(dataset)
29
+ assert result.valid is True
30
+ assert result.errors == []
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+
5
+ from fairspec_metadata.models.descriptor import Descriptor
6
+
7
+
8
+ def copy_descriptor(descriptor: Descriptor) -> Descriptor:
9
+ return copy.deepcopy(descriptor)
@@ -0,0 +1 @@
1
+ {"fields": [{"name": "id", "type": "integer"}, {"name": "name", "type": "string"}]}