fairspec-library 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fairspec_library/__init__.py +29 -0
- fairspec_library/actions/__init__.py +0 -0
- fairspec_library/actions/data/__init__.py +0 -0
- fairspec_library/actions/data/load.py +27 -0
- fairspec_library/actions/data/load_spec.py +38 -0
- fairspec_library/actions/data/validate.py +41 -0
- fairspec_library/actions/data/validate_spec.py +46 -0
- fairspec_library/actions/data_schema/__init__.py +0 -0
- fairspec_library/actions/data_schema/infer.py +23 -0
- fairspec_library/actions/data_schema/infer_spec.py +40 -0
- fairspec_library/actions/data_schema/render.py +19 -0
- fairspec_library/actions/dataset/__init__.py +0 -0
- fairspec_library/actions/dataset/foreign_key.py +98 -0
- fairspec_library/actions/dataset/foreign_key_spec.py +114 -0
- fairspec_library/actions/dataset/infer.py +17 -0
- fairspec_library/actions/dataset/infer_spec.py +32 -0
- fairspec_library/actions/dataset/load.py +20 -0
- fairspec_library/actions/dataset/load_spec.py +26 -0
- fairspec_library/actions/dataset/render.py +17 -0
- fairspec_library/actions/dataset/save.py +20 -0
- fairspec_library/actions/dataset/save_spec.py +19 -0
- fairspec_library/actions/dataset/validate.py +57 -0
- fairspec_library/actions/dataset/validate_spec.py +54 -0
- fairspec_library/actions/file_dialect/__init__.py +0 -0
- fairspec_library/actions/file_dialect/infer.py +22 -0
- fairspec_library/actions/file_dialect/infer_spec.py +35 -0
- fairspec_library/actions/resource/__init__.py +0 -0
- fairspec_library/actions/resource/infer.py +42 -0
- fairspec_library/actions/resource/infer_spec.py +37 -0
- fairspec_library/actions/resource/validate.py +34 -0
- fairspec_library/actions/resource/validate_spec.py +42 -0
- fairspec_library/actions/table/__init__.py +0 -0
- fairspec_library/actions/table/infer.py +36 -0
- fairspec_library/actions/table/infer_spec.py +22 -0
- fairspec_library/actions/table/load.py +24 -0
- fairspec_library/actions/table/load_spec.py +41 -0
- fairspec_library/actions/table/save.py +18 -0
- fairspec_library/actions/table/save_spec.py +26 -0
- fairspec_library/actions/table/validate.py +37 -0
- fairspec_library/actions/table/validate_spec.py +43 -0
- fairspec_library/actions/table_schema/__init__.py +0 -0
- fairspec_library/actions/table_schema/infer.py +28 -0
- fairspec_library/actions/table_schema/infer_spec.py +25 -0
- fairspec_library/actions/table_schema/render.py +19 -0
- fairspec_library/models/__init__.py +0 -0
- fairspec_library/models/table.py +6 -0
- fairspec_library/plugin.py +3 -0
- fairspec_library/py.typed +0 -0
- fairspec_library/system.py +48 -0
- fairspec_library-0.0.0.dev0.dist-info/METADATA +22 -0
- fairspec_library-0.0.0.dev0.dist-info/RECORD +52 -0
- fairspec_library-0.0.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from fairspec_dataset import * # noqa: F403
|
|
2
|
+
from fairspec_metadata import * # noqa: F403
|
|
3
|
+
from fairspec_table import * # noqa: F403
|
|
4
|
+
|
|
5
|
+
from .actions.data.load import load_data as load_data
|
|
6
|
+
from .actions.data.validate import validate_data as validate_data
|
|
7
|
+
from .actions.data_schema.infer import infer_data_schema as infer_data_schema
|
|
8
|
+
from .actions.data_schema.render import render_data_schema_as as render_data_schema_as
|
|
9
|
+
from .actions.dataset.foreign_key import (
|
|
10
|
+
validate_dataset_foreign_keys as validate_dataset_foreign_keys,
|
|
11
|
+
)
|
|
12
|
+
from .actions.dataset.infer import infer_dataset as infer_dataset
|
|
13
|
+
from .actions.dataset.load import load_dataset as load_dataset
|
|
14
|
+
from .actions.dataset.render import render_dataset_as as render_dataset_as
|
|
15
|
+
from .actions.dataset.save import save_dataset as save_dataset
|
|
16
|
+
from .actions.dataset.validate import validate_dataset as validate_dataset
|
|
17
|
+
from .actions.file_dialect.infer import infer_file_dialect as infer_file_dialect
|
|
18
|
+
from .actions.resource.infer import infer_resource as infer_resource
|
|
19
|
+
from .actions.resource.validate import validate_resource as validate_resource
|
|
20
|
+
from .actions.table.infer import infer_table as infer_table
|
|
21
|
+
from .actions.table.load import load_table as load_table
|
|
22
|
+
from .actions.table.save import save_table as save_table
|
|
23
|
+
from .actions.table.validate import validate_table as validate_table
|
|
24
|
+
from .actions.table_schema.infer import infer_table_schema as infer_table_schema
|
|
25
|
+
from .actions.table_schema.render import render_table_schema_as as render_table_schema_as
|
|
26
|
+
from .models.table import ValidateTableOptions as ValidateTableOptions
|
|
27
|
+
from .plugin import Plugin as Plugin
|
|
28
|
+
from .system import System as System
|
|
29
|
+
from .system import system as system
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata import (
|
|
6
|
+
get_data_first_path,
|
|
7
|
+
get_data_value,
|
|
8
|
+
get_supported_file_dialect,
|
|
9
|
+
load_descriptor,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from fairspec_metadata import Resource
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_data(resource: Resource) -> object | None:
|
|
17
|
+
data_value = get_data_value(resource)
|
|
18
|
+
if data_value:
|
|
19
|
+
return data_value
|
|
20
|
+
|
|
21
|
+
first_path = get_data_first_path(resource)
|
|
22
|
+
if first_path:
|
|
23
|
+
dialect = get_supported_file_dialect(resource, ["json"])
|
|
24
|
+
if dialect:
|
|
25
|
+
return load_descriptor(first_path)
|
|
26
|
+
|
|
27
|
+
return None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import write_temp_file
|
|
6
|
+
from fairspec_metadata import Resource
|
|
7
|
+
|
|
8
|
+
from .load import load_data
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLoadData:
|
|
12
|
+
def test_should_return_inline_data(self):
|
|
13
|
+
resource = Resource(data=[{"id": 1}, {"id": 2}])
|
|
14
|
+
result = load_data(resource)
|
|
15
|
+
assert result == [{"id": 1}, {"id": 2}]
|
|
16
|
+
|
|
17
|
+
def test_should_return_inline_object(self):
|
|
18
|
+
resource = Resource(data={"key": "value"})
|
|
19
|
+
result = load_data(resource)
|
|
20
|
+
assert result == {"key": "value"}
|
|
21
|
+
|
|
22
|
+
def test_should_load_json_file(self):
|
|
23
|
+
data = {"key": "value"}
|
|
24
|
+
path = write_temp_file(json.dumps(data), format="json")
|
|
25
|
+
resource = Resource(data=path)
|
|
26
|
+
result = load_data(resource)
|
|
27
|
+
assert result == data
|
|
28
|
+
|
|
29
|
+
def test_should_return_none_for_non_json_file(self):
|
|
30
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
31
|
+
resource = Resource(data=path)
|
|
32
|
+
result = load_data(resource)
|
|
33
|
+
assert result is None
|
|
34
|
+
|
|
35
|
+
def test_should_return_none_for_empty_resource(self):
|
|
36
|
+
resource = Resource()
|
|
37
|
+
result = load_data(resource)
|
|
38
|
+
assert result is None
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata import (
|
|
6
|
+
DataError,
|
|
7
|
+
FairspecError,
|
|
8
|
+
Report,
|
|
9
|
+
create_report,
|
|
10
|
+
inspect_json,
|
|
11
|
+
resolve_data_schema,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .load import load_data
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from fairspec_metadata import Resource
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def validate_data(resource: Resource) -> Report:
|
|
21
|
+
errors: list[FairspecError] = []
|
|
22
|
+
|
|
23
|
+
data_schema = resolve_data_schema(resource.dataSchema)
|
|
24
|
+
if not data_schema:
|
|
25
|
+
return create_report()
|
|
26
|
+
|
|
27
|
+
data = load_data(resource)
|
|
28
|
+
if data is None:
|
|
29
|
+
return create_report()
|
|
30
|
+
|
|
31
|
+
notes = inspect_json(data, json_schema=data_schema)
|
|
32
|
+
for note in notes:
|
|
33
|
+
errors.append(
|
|
34
|
+
DataError(
|
|
35
|
+
type="data",
|
|
36
|
+
message=note["message"],
|
|
37
|
+
jsonPointer=note["jsonPointer"],
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return create_report(errors)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata import Resource
|
|
4
|
+
|
|
5
|
+
from .validate import validate_data
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestValidateData:
|
|
9
|
+
def test_should_return_valid_when_no_schema(self):
|
|
10
|
+
resource = Resource(data=[{"id": 1}])
|
|
11
|
+
report = validate_data(resource)
|
|
12
|
+
assert report.valid is True
|
|
13
|
+
|
|
14
|
+
def test_should_validate_inline_data(self):
|
|
15
|
+
resource = Resource(
|
|
16
|
+
data={"name": "test", "age": 25},
|
|
17
|
+
dataSchema={
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"name": {"type": "string"},
|
|
21
|
+
"age": {"type": "integer"},
|
|
22
|
+
},
|
|
23
|
+
"required": ["name", "age"],
|
|
24
|
+
},
|
|
25
|
+
)
|
|
26
|
+
report = validate_data(resource)
|
|
27
|
+
assert report.valid is True
|
|
28
|
+
|
|
29
|
+
def test_should_detect_invalid_data(self):
|
|
30
|
+
resource = Resource(
|
|
31
|
+
data={"name": 123},
|
|
32
|
+
dataSchema={
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {"name": {"type": "string"}},
|
|
35
|
+
},
|
|
36
|
+
)
|
|
37
|
+
report = validate_data(resource)
|
|
38
|
+
assert report.valid is False
|
|
39
|
+
assert len(report.errors) > 0
|
|
40
|
+
|
|
41
|
+
def test_should_return_valid_for_no_data(self):
|
|
42
|
+
resource = Resource(
|
|
43
|
+
dataSchema={"type": "object"},
|
|
44
|
+
)
|
|
45
|
+
report = validate_data(resource)
|
|
46
|
+
assert report.valid is True
|
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from genson import SchemaBuilder
|
|
6
|
+
|
|
7
|
+
from fairspec_library.actions.data.load import load_data
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from fairspec_metadata import JsonSchema, Resource
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def infer_data_schema(resource: Resource) -> JsonSchema | None:
|
|
14
|
+
data = load_data(resource)
|
|
15
|
+
if not data:
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
builder = SchemaBuilder()
|
|
20
|
+
builder.add_object(data)
|
|
21
|
+
return builder.to_schema()
|
|
22
|
+
except Exception:
|
|
23
|
+
return None
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import write_temp_file
|
|
6
|
+
from fairspec_metadata import Resource
|
|
7
|
+
|
|
8
|
+
from .infer import infer_data_schema
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestInferDataSchema:
|
|
12
|
+
def test_should_infer_schema_from_inline_data(self):
|
|
13
|
+
resource = Resource(data={"name": "test", "age": 25})
|
|
14
|
+
schema = infer_data_schema(resource)
|
|
15
|
+
assert schema is not None
|
|
16
|
+
assert schema.get("type") == "object"
|
|
17
|
+
|
|
18
|
+
def test_should_infer_schema_from_inline_array(self):
|
|
19
|
+
resource = Resource(data=[{"id": 1}, {"id": 2}])
|
|
20
|
+
schema = infer_data_schema(resource)
|
|
21
|
+
assert schema is not None
|
|
22
|
+
|
|
23
|
+
def test_should_infer_schema_from_json_file(self):
|
|
24
|
+
data = {"name": "test", "value": 42}
|
|
25
|
+
path = write_temp_file(json.dumps(data), format="json")
|
|
26
|
+
resource = Resource(data=path)
|
|
27
|
+
schema = infer_data_schema(resource)
|
|
28
|
+
assert schema is not None
|
|
29
|
+
assert schema.get("type") == "object"
|
|
30
|
+
|
|
31
|
+
def test_should_return_none_for_no_data(self):
|
|
32
|
+
resource = Resource()
|
|
33
|
+
schema = infer_data_schema(resource)
|
|
34
|
+
assert schema is None
|
|
35
|
+
|
|
36
|
+
def test_should_return_none_for_csv_file(self):
|
|
37
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
38
|
+
resource = Resource(data=path)
|
|
39
|
+
schema = infer_data_schema(resource)
|
|
40
|
+
assert schema is None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_library.system import system
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from fairspec_metadata import DataSchema, RenderDataSchemaOptions
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_data_schema_as(
|
|
12
|
+
data_schema: DataSchema, options: RenderDataSchemaOptions
|
|
13
|
+
) -> str | None:
|
|
14
|
+
for plugin in system.plugins:
|
|
15
|
+
result = plugin.render_data_schema_as(data_schema, options)
|
|
16
|
+
if result is not None:
|
|
17
|
+
return result
|
|
18
|
+
|
|
19
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
from fairspec_metadata import (
|
|
7
|
+
FairspecError,
|
|
8
|
+
ForeignKey,
|
|
9
|
+
ForeignKeyError,
|
|
10
|
+
Report,
|
|
11
|
+
create_report,
|
|
12
|
+
resolve_table_schema,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from fairspec_library.actions.table.load import load_table
|
|
16
|
+
from fairspec_library.models.table import ValidateTableOptions
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from fairspec_metadata import Dataset, Resource
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def validate_dataset_foreign_keys(
|
|
23
|
+
dataset: Dataset, **options: Unpack[ValidateTableOptions]
|
|
24
|
+
) -> Report:
|
|
25
|
+
errors: list[FairspecError] = []
|
|
26
|
+
|
|
27
|
+
for resource in dataset.resources or []:
|
|
28
|
+
table_schema = resolve_table_schema(resource.tableSchema)
|
|
29
|
+
if not table_schema:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
for foreign_key in table_schema.foreignKeys or []:
|
|
33
|
+
fk_errors = _validate_foreign_key(
|
|
34
|
+
resource, foreign_key, dataset, **options
|
|
35
|
+
)
|
|
36
|
+
errors.extend(fk_errors)
|
|
37
|
+
|
|
38
|
+
return create_report(errors)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _validate_foreign_key(
|
|
42
|
+
resource: Resource,
|
|
43
|
+
foreign_key: ForeignKey,
|
|
44
|
+
dataset: Dataset,
|
|
45
|
+
**options: Unpack[ValidateTableOptions],
|
|
46
|
+
) -> list[ForeignKeyError]:
|
|
47
|
+
reference = foreign_key.reference
|
|
48
|
+
columns = foreign_key.columns
|
|
49
|
+
ref_columns = reference.columns
|
|
50
|
+
|
|
51
|
+
if not columns or not ref_columns or len(columns) != len(ref_columns):
|
|
52
|
+
return []
|
|
53
|
+
|
|
54
|
+
ref_resource = _find_resource(dataset, reference.resource)
|
|
55
|
+
if not ref_resource:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
table = load_table(resource, denormalized=True, **options)
|
|
59
|
+
if table is None:
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
ref_table = load_table(ref_resource, denormalized=True, **options)
|
|
63
|
+
if ref_table is None:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
rename_mapping = dict(zip(ref_columns, columns))
|
|
67
|
+
ref_selected = ref_table.select(
|
|
68
|
+
[pl.col(name).alias(rename_mapping[name]) for name in ref_columns]
|
|
69
|
+
).unique()
|
|
70
|
+
|
|
71
|
+
violations: pl.DataFrame = table.select(columns).join( # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278
|
|
72
|
+
ref_selected, on=columns, how="anti"
|
|
73
|
+
).unique().collect()
|
|
74
|
+
|
|
75
|
+
errors: list[ForeignKeyError] = []
|
|
76
|
+
for row in violations.to_dicts():
|
|
77
|
+
cells = [str(row[c]) for c in columns]
|
|
78
|
+
errors.append(
|
|
79
|
+
ForeignKeyError(
|
|
80
|
+
type="foreignKey",
|
|
81
|
+
resourceName=resource.name,
|
|
82
|
+
foreignKey=foreign_key,
|
|
83
|
+
cells=cells,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return errors
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _find_resource(dataset: Dataset, name: str | None) -> Resource | None:
|
|
91
|
+
if not name:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
for resource in dataset.resources or []:
|
|
95
|
+
if resource.name == name:
|
|
96
|
+
return resource
|
|
97
|
+
|
|
98
|
+
return None
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import (
|
|
5
|
+
Dataset,
|
|
6
|
+
ForeignKey,
|
|
7
|
+
ForeignKeyReference,
|
|
8
|
+
IntegerColumnProperty,
|
|
9
|
+
Resource,
|
|
10
|
+
StringColumnProperty,
|
|
11
|
+
TableSchema,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .foreign_key import validate_dataset_foreign_keys
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestValidateDatasetForeignKeys:
|
|
18
|
+
def test_should_validate_valid_foreign_keys(self):
|
|
19
|
+
path1 = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
20
|
+
path2 = write_temp_file("id,name_id\n1,1\n2,2", format="csv")
|
|
21
|
+
dataset = Dataset(
|
|
22
|
+
resources=[
|
|
23
|
+
Resource(
|
|
24
|
+
data=path1,
|
|
25
|
+
name="names",
|
|
26
|
+
tableSchema=TableSchema(
|
|
27
|
+
properties={
|
|
28
|
+
"id": IntegerColumnProperty(),
|
|
29
|
+
"name": StringColumnProperty(),
|
|
30
|
+
}
|
|
31
|
+
),
|
|
32
|
+
),
|
|
33
|
+
Resource(
|
|
34
|
+
data=path2,
|
|
35
|
+
name="refs",
|
|
36
|
+
tableSchema=TableSchema(
|
|
37
|
+
properties={
|
|
38
|
+
"id": IntegerColumnProperty(),
|
|
39
|
+
"name_id": IntegerColumnProperty(),
|
|
40
|
+
},
|
|
41
|
+
foreignKeys=[
|
|
42
|
+
ForeignKey(
|
|
43
|
+
columns=["name_id"],
|
|
44
|
+
reference=ForeignKeyReference(
|
|
45
|
+
resource="names",
|
|
46
|
+
columns=["id"],
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
],
|
|
50
|
+
),
|
|
51
|
+
),
|
|
52
|
+
]
|
|
53
|
+
)
|
|
54
|
+
report = validate_dataset_foreign_keys(dataset)
|
|
55
|
+
assert report.valid is True
|
|
56
|
+
|
|
57
|
+
def test_should_detect_foreign_key_violation(self):
|
|
58
|
+
path1 = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
59
|
+
path2 = write_temp_file("id,name_id\n1,1\n2,999", format="csv")
|
|
60
|
+
dataset = Dataset(
|
|
61
|
+
resources=[
|
|
62
|
+
Resource(
|
|
63
|
+
data=path1,
|
|
64
|
+
name="names",
|
|
65
|
+
tableSchema=TableSchema(
|
|
66
|
+
properties={
|
|
67
|
+
"id": IntegerColumnProperty(),
|
|
68
|
+
"name": StringColumnProperty(),
|
|
69
|
+
}
|
|
70
|
+
),
|
|
71
|
+
),
|
|
72
|
+
Resource(
|
|
73
|
+
data=path2,
|
|
74
|
+
name="refs",
|
|
75
|
+
tableSchema=TableSchema(
|
|
76
|
+
properties={
|
|
77
|
+
"id": IntegerColumnProperty(),
|
|
78
|
+
"name_id": IntegerColumnProperty(),
|
|
79
|
+
},
|
|
80
|
+
foreignKeys=[
|
|
81
|
+
ForeignKey(
|
|
82
|
+
columns=["name_id"],
|
|
83
|
+
reference=ForeignKeyReference(
|
|
84
|
+
resource="names",
|
|
85
|
+
columns=["id"],
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
],
|
|
89
|
+
),
|
|
90
|
+
),
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
report = validate_dataset_foreign_keys(dataset)
|
|
94
|
+
assert report.valid is False
|
|
95
|
+
assert len(report.errors) > 0
|
|
96
|
+
|
|
97
|
+
def test_should_handle_no_foreign_keys(self):
|
|
98
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
99
|
+
dataset = Dataset(
|
|
100
|
+
resources=[
|
|
101
|
+
Resource(
|
|
102
|
+
data=path,
|
|
103
|
+
name="test",
|
|
104
|
+
tableSchema=TableSchema(
|
|
105
|
+
properties={
|
|
106
|
+
"id": IntegerColumnProperty(),
|
|
107
|
+
"name": StringColumnProperty(),
|
|
108
|
+
}
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
report = validate_dataset_foreign_keys(dataset)
|
|
114
|
+
assert report.valid is True
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata import Dataset
|
|
4
|
+
|
|
5
|
+
from fairspec_library.actions.resource.infer import infer_resource
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def infer_dataset(dataset: Dataset) -> Dataset:
|
|
9
|
+
dataset = dataset.model_copy(deep=True)
|
|
10
|
+
|
|
11
|
+
if dataset.resources:
|
|
12
|
+
for index, resource in enumerate(dataset.resources):
|
|
13
|
+
dataset.resources[index] = infer_resource(
|
|
14
|
+
resource, resource_number=index + 1
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
return dataset
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import Dataset, Resource
|
|
5
|
+
|
|
6
|
+
from .infer import infer_dataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestInferDataset:
|
|
10
|
+
def test_should_infer_resource_names(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
12
|
+
dataset = Dataset(resources=[Resource(data=path)])
|
|
13
|
+
result = infer_dataset(dataset)
|
|
14
|
+
assert result.resources is not None
|
|
15
|
+
assert len(result.resources) == 1
|
|
16
|
+
assert result.resources[0].name is not None
|
|
17
|
+
|
|
18
|
+
def test_should_not_mutate_original(self):
|
|
19
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
20
|
+
dataset = Dataset(resources=[Resource(data=path)])
|
|
21
|
+
result = infer_dataset(dataset)
|
|
22
|
+
assert result is not dataset
|
|
23
|
+
|
|
24
|
+
def test_should_handle_empty_resources(self):
|
|
25
|
+
dataset = Dataset(resources=[])
|
|
26
|
+
result = infer_dataset(dataset)
|
|
27
|
+
assert result.resources == []
|
|
28
|
+
|
|
29
|
+
def test_should_handle_no_resources(self):
|
|
30
|
+
dataset = Dataset()
|
|
31
|
+
result = infer_dataset(dataset)
|
|
32
|
+
assert result.resources is None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import DatasetPlugin
|
|
6
|
+
|
|
7
|
+
from fairspec_library.system import system
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from fairspec_metadata import Descriptor
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def load_dataset(source: str) -> Descriptor | None:
|
|
14
|
+
for plugin in system.plugins:
|
|
15
|
+
if isinstance(plugin, DatasetPlugin):
|
|
16
|
+
result = plugin.load_dataset(source)
|
|
17
|
+
if result is not None:
|
|
18
|
+
return result
|
|
19
|
+
|
|
20
|
+
return None
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from fairspec_dataset import get_temp_folder_path
|
|
7
|
+
|
|
8
|
+
from .load import load_dataset
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLoadDataset:
|
|
12
|
+
def test_should_load_dataset_from_descriptor_path(self):
|
|
13
|
+
folder = get_temp_folder_path()
|
|
14
|
+
descriptor = {
|
|
15
|
+
"resources": [{"data": "data.csv", "name": "data"}]
|
|
16
|
+
}
|
|
17
|
+
path = os.path.join(folder, "datapackage.json")
|
|
18
|
+
with open(path, "w") as f:
|
|
19
|
+
json.dump(descriptor, f)
|
|
20
|
+
result = load_dataset(path)
|
|
21
|
+
assert result is not None
|
|
22
|
+
assert "resources" in result
|
|
23
|
+
|
|
24
|
+
def test_should_return_none_for_unsupported_source(self):
|
|
25
|
+
result = load_dataset("nonexistent-source")
|
|
26
|
+
assert result is None
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_library.system import system
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from fairspec_metadata import Dataset, RenderDatasetOptions
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_dataset_as(dataset: Dataset, options: RenderDatasetOptions) -> str | None:
|
|
12
|
+
for plugin in system.plugins:
|
|
13
|
+
result = plugin.render_dataset_as(dataset, options)
|
|
14
|
+
if result is not None:
|
|
15
|
+
return result
|
|
16
|
+
|
|
17
|
+
return None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import DatasetPlugin, SaveDatasetOptions, SaveDatasetResult
|
|
6
|
+
from fairspec_metadata import Dataset
|
|
7
|
+
|
|
8
|
+
from fairspec_library.system import system
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save_dataset(
|
|
12
|
+
dataset: Dataset, **options: Unpack[SaveDatasetOptions]
|
|
13
|
+
) -> SaveDatasetResult | None:
|
|
14
|
+
for plugin in system.plugins:
|
|
15
|
+
if isinstance(plugin, DatasetPlugin):
|
|
16
|
+
result = plugin.save_dataset(dataset, **options)
|
|
17
|
+
if result is not None:
|
|
18
|
+
return result
|
|
19
|
+
|
|
20
|
+
return None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import get_temp_file_path
|
|
4
|
+
from fairspec_metadata import Dataset, Resource
|
|
5
|
+
|
|
6
|
+
from .save import save_dataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestSaveDataset:
|
|
10
|
+
def test_should_save_dataset_to_zip(self):
|
|
11
|
+
path = get_temp_file_path(format="zip")
|
|
12
|
+
dataset = Dataset(resources=[Resource(data=[{"id": 1}], name="data")])
|
|
13
|
+
result = save_dataset(dataset, target=path)
|
|
14
|
+
assert result is not None
|
|
15
|
+
|
|
16
|
+
def test_should_return_none_for_unsupported_target(self):
|
|
17
|
+
dataset = Dataset(resources=[Resource(data=[{"id": 1}], name="data")])
|
|
18
|
+
result = save_dataset(dataset, target="/tmp/unknown.xyz")
|
|
19
|
+
assert result is None
|