fairspec-library 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fairspec_library/__init__.py +29 -0
- fairspec_library/actions/__init__.py +0 -0
- fairspec_library/actions/data/__init__.py +0 -0
- fairspec_library/actions/data/load.py +27 -0
- fairspec_library/actions/data/load_spec.py +38 -0
- fairspec_library/actions/data/validate.py +41 -0
- fairspec_library/actions/data/validate_spec.py +46 -0
- fairspec_library/actions/data_schema/__init__.py +0 -0
- fairspec_library/actions/data_schema/infer.py +23 -0
- fairspec_library/actions/data_schema/infer_spec.py +40 -0
- fairspec_library/actions/data_schema/render.py +19 -0
- fairspec_library/actions/dataset/__init__.py +0 -0
- fairspec_library/actions/dataset/foreign_key.py +98 -0
- fairspec_library/actions/dataset/foreign_key_spec.py +114 -0
- fairspec_library/actions/dataset/infer.py +17 -0
- fairspec_library/actions/dataset/infer_spec.py +32 -0
- fairspec_library/actions/dataset/load.py +20 -0
- fairspec_library/actions/dataset/load_spec.py +26 -0
- fairspec_library/actions/dataset/render.py +17 -0
- fairspec_library/actions/dataset/save.py +20 -0
- fairspec_library/actions/dataset/save_spec.py +19 -0
- fairspec_library/actions/dataset/validate.py +57 -0
- fairspec_library/actions/dataset/validate_spec.py +54 -0
- fairspec_library/actions/file_dialect/__init__.py +0 -0
- fairspec_library/actions/file_dialect/infer.py +22 -0
- fairspec_library/actions/file_dialect/infer_spec.py +35 -0
- fairspec_library/actions/resource/__init__.py +0 -0
- fairspec_library/actions/resource/infer.py +42 -0
- fairspec_library/actions/resource/infer_spec.py +37 -0
- fairspec_library/actions/resource/validate.py +34 -0
- fairspec_library/actions/resource/validate_spec.py +42 -0
- fairspec_library/actions/table/__init__.py +0 -0
- fairspec_library/actions/table/infer.py +36 -0
- fairspec_library/actions/table/infer_spec.py +22 -0
- fairspec_library/actions/table/load.py +24 -0
- fairspec_library/actions/table/load_spec.py +41 -0
- fairspec_library/actions/table/save.py +18 -0
- fairspec_library/actions/table/save_spec.py +26 -0
- fairspec_library/actions/table/validate.py +37 -0
- fairspec_library/actions/table/validate_spec.py +43 -0
- fairspec_library/actions/table_schema/__init__.py +0 -0
- fairspec_library/actions/table_schema/infer.py +28 -0
- fairspec_library/actions/table_schema/infer_spec.py +25 -0
- fairspec_library/actions/table_schema/render.py +19 -0
- fairspec_library/models/__init__.py +0 -0
- fairspec_library/models/table.py +6 -0
- fairspec_library/plugin.py +3 -0
- fairspec_library/py.typed +0 -0
- fairspec_library/system.py +48 -0
- fairspec_library-0.0.0.dev0.dist-info/METADATA +22 -0
- fairspec_library-0.0.0.dev0.dist-info/RECORD +52 -0
- fairspec_library-0.0.0.dev0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata import (
|
|
6
|
+
Dataset,
|
|
7
|
+
FairspecException,
|
|
8
|
+
Report,
|
|
9
|
+
create_report,
|
|
10
|
+
infer_resource_name,
|
|
11
|
+
load_dataset_descriptor,
|
|
12
|
+
normalize_dataset,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from fairspec_library.actions.dataset.foreign_key import validate_dataset_foreign_keys
|
|
16
|
+
from fairspec_library.actions.resource.validate import validate_resource
|
|
17
|
+
from fairspec_library.models.table import ValidateTableOptions
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from fairspec_metadata import FairspecError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def validate_dataset(
|
|
24
|
+
source: Dataset | str, **options: Unpack[ValidateTableOptions]
|
|
25
|
+
) -> Report:
|
|
26
|
+
if isinstance(source, str):
|
|
27
|
+
try:
|
|
28
|
+
descriptor = load_dataset_descriptor(source)
|
|
29
|
+
source = Dataset.model_validate(descriptor)
|
|
30
|
+
except FairspecException as exception:
|
|
31
|
+
if exception.report:
|
|
32
|
+
return exception.report
|
|
33
|
+
return create_report()
|
|
34
|
+
|
|
35
|
+
dataset = normalize_dataset(source)
|
|
36
|
+
errors = _validate_dataset_resources(dataset, **options)
|
|
37
|
+
fk_report = validate_dataset_foreign_keys(dataset, **options)
|
|
38
|
+
errors.extend(fk_report.errors)
|
|
39
|
+
|
|
40
|
+
return create_report(errors)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _validate_dataset_resources(
|
|
44
|
+
dataset: Dataset, **options: Unpack[ValidateTableOptions]
|
|
45
|
+
) -> list[FairspecError]:
|
|
46
|
+
errors: list[FairspecError] = []
|
|
47
|
+
|
|
48
|
+
for index, resource in enumerate(dataset.resources or []):
|
|
49
|
+
if not resource.name:
|
|
50
|
+
resource.name = infer_resource_name(resource, resource_number=index + 1)
|
|
51
|
+
|
|
52
|
+
report = validate_resource(resource, **options)
|
|
53
|
+
for error in report.errors:
|
|
54
|
+
error.resourceName = resource.name
|
|
55
|
+
errors.extend(report.errors)
|
|
56
|
+
|
|
57
|
+
return errors
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata import (
|
|
4
|
+
Dataset,
|
|
5
|
+
IntegerColumnProperty,
|
|
6
|
+
Resource,
|
|
7
|
+
StringColumnProperty,
|
|
8
|
+
TableSchema,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from .validate import validate_dataset
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TestValidateDataset:
|
|
15
|
+
def test_should_validate_valid_dataset(self):
|
|
16
|
+
dataset = Dataset(
|
|
17
|
+
resources=[
|
|
18
|
+
Resource(
|
|
19
|
+
data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}],
|
|
20
|
+
name="test",
|
|
21
|
+
tableSchema=TableSchema(
|
|
22
|
+
properties={
|
|
23
|
+
"id": IntegerColumnProperty(),
|
|
24
|
+
"name": StringColumnProperty(),
|
|
25
|
+
}
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
]
|
|
29
|
+
)
|
|
30
|
+
report = validate_dataset(dataset)
|
|
31
|
+
assert report.valid is True
|
|
32
|
+
|
|
33
|
+
def test_should_detect_invalid_resource(self):
|
|
34
|
+
dataset = Dataset(
|
|
35
|
+
resources=[
|
|
36
|
+
Resource(
|
|
37
|
+
data=[{"id": "BAD", "name": "english"}],
|
|
38
|
+
name="test",
|
|
39
|
+
tableSchema=TableSchema(
|
|
40
|
+
properties={
|
|
41
|
+
"id": IntegerColumnProperty(),
|
|
42
|
+
"name": StringColumnProperty(),
|
|
43
|
+
}
|
|
44
|
+
),
|
|
45
|
+
)
|
|
46
|
+
]
|
|
47
|
+
)
|
|
48
|
+
report = validate_dataset(dataset)
|
|
49
|
+
assert report.valid is False
|
|
50
|
+
|
|
51
|
+
def test_should_handle_empty_dataset(self):
|
|
52
|
+
dataset = Dataset(resources=[])
|
|
53
|
+
report = validate_dataset(dataset)
|
|
54
|
+
assert report.valid is True
|
|
File without changes
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import DatasetPlugin, InferFileDialectOptions
|
|
6
|
+
|
|
7
|
+
from fairspec_library.system import system
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from fairspec_metadata import FileDialect, Resource
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def infer_file_dialect(
|
|
14
|
+
resource: Resource, **options: Unpack[InferFileDialectOptions]
|
|
15
|
+
) -> FileDialect | None:
|
|
16
|
+
for plugin in system.plugins:
|
|
17
|
+
if isinstance(plugin, DatasetPlugin):
|
|
18
|
+
result = plugin.infer_file_dialect(resource, **options)
|
|
19
|
+
if result is not None:
|
|
20
|
+
return result
|
|
21
|
+
|
|
22
|
+
return None
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import Resource
|
|
5
|
+
|
|
6
|
+
from .infer import infer_file_dialect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestInferFileDialect:
|
|
10
|
+
def test_should_infer_csv_dialect(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
12
|
+
resource = Resource(data=path)
|
|
13
|
+
dialect = infer_file_dialect(resource)
|
|
14
|
+
assert dialect is not None
|
|
15
|
+
|
|
16
|
+
def test_should_return_none_for_json_file(self):
|
|
17
|
+
path = write_temp_file('[{"id": 1}]', format="json")
|
|
18
|
+
resource = Resource(data=path)
|
|
19
|
+
dialect = infer_file_dialect(resource)
|
|
20
|
+
assert dialect is None
|
|
21
|
+
|
|
22
|
+
def test_should_infer_xlsx_dialect(self):
|
|
23
|
+
resource = Resource(data="test.xlsx")
|
|
24
|
+
dialect = infer_file_dialect(resource)
|
|
25
|
+
assert dialect is not None
|
|
26
|
+
|
|
27
|
+
def test_should_return_none_for_unknown_format(self):
|
|
28
|
+
resource = Resource(data="test.unknown")
|
|
29
|
+
dialect = infer_file_dialect(resource)
|
|
30
|
+
assert dialect is None
|
|
31
|
+
|
|
32
|
+
def test_should_return_none_for_no_data(self):
|
|
33
|
+
resource = Resource()
|
|
34
|
+
dialect = infer_file_dialect(resource)
|
|
35
|
+
assert dialect is None
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_metadata import (
|
|
4
|
+
Resource,
|
|
5
|
+
infer_resource_name,
|
|
6
|
+
resolve_file_dialect,
|
|
7
|
+
)
|
|
8
|
+
from fairspec_dataset import infer_integrity, infer_textual
|
|
9
|
+
|
|
10
|
+
from fairspec_library.actions.data_schema.infer import infer_data_schema
|
|
11
|
+
from fairspec_library.actions.file_dialect.infer import infer_file_dialect
|
|
12
|
+
from fairspec_library.actions.table_schema.infer import infer_table_schema
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def infer_resource(
|
|
16
|
+
resource: Resource, *, resource_number: int | None = None
|
|
17
|
+
) -> Resource:
|
|
18
|
+
resource = resource.model_copy(deep=True)
|
|
19
|
+
|
|
20
|
+
if not resource.name:
|
|
21
|
+
resource.name = infer_resource_name(
|
|
22
|
+
resource, resource_number=resource_number
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if not resource.fileDialect:
|
|
26
|
+
resource.fileDialect = infer_file_dialect(resource)
|
|
27
|
+
|
|
28
|
+
if resource.textual is None:
|
|
29
|
+
resolved_dialect = resolve_file_dialect(resource.fileDialect)
|
|
30
|
+
if resolved_dialect:
|
|
31
|
+
resource.textual = infer_textual(resource)
|
|
32
|
+
|
|
33
|
+
if not resource.integrity:
|
|
34
|
+
resource.integrity = infer_integrity(resource)
|
|
35
|
+
|
|
36
|
+
if not resource.dataSchema:
|
|
37
|
+
resource.dataSchema = infer_data_schema(resource)
|
|
38
|
+
|
|
39
|
+
if not resource.tableSchema:
|
|
40
|
+
resource.tableSchema = infer_table_schema(resource)
|
|
41
|
+
|
|
42
|
+
return resource
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import Resource
|
|
5
|
+
|
|
6
|
+
from .infer import infer_resource
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestInferResource:
|
|
10
|
+
def test_should_infer_name(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
12
|
+
resource = Resource(data=path)
|
|
13
|
+
result = infer_resource(resource)
|
|
14
|
+
assert result.name is not None
|
|
15
|
+
|
|
16
|
+
def test_should_infer_file_dialect(self):
|
|
17
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
18
|
+
resource = Resource(data=path)
|
|
19
|
+
result = infer_resource(resource)
|
|
20
|
+
assert result.fileDialect is not None
|
|
21
|
+
|
|
22
|
+
def test_should_not_overwrite_existing_name(self):
|
|
23
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
24
|
+
resource = Resource(data=path, name="custom")
|
|
25
|
+
result = infer_resource(resource)
|
|
26
|
+
assert result.name == "custom"
|
|
27
|
+
|
|
28
|
+
def test_should_not_mutate_original(self):
|
|
29
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
30
|
+
resource = Resource(data=path)
|
|
31
|
+
result = infer_resource(resource)
|
|
32
|
+
assert result is not resource
|
|
33
|
+
|
|
34
|
+
def test_should_handle_resource_number(self):
|
|
35
|
+
resource = Resource(data=[{"id": 1}])
|
|
36
|
+
result = infer_resource(resource, resource_number=5)
|
|
37
|
+
assert result.name == "resource5"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_dataset import validate_file
|
|
6
|
+
from fairspec_metadata import Report, create_report
|
|
7
|
+
|
|
8
|
+
from fairspec_library.actions.data.validate import validate_data
|
|
9
|
+
from fairspec_library.actions.table.validate import validate_table
|
|
10
|
+
from fairspec_library.models.table import ValidateTableOptions
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from fairspec_metadata import FairspecError, Resource
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def validate_resource(
|
|
17
|
+
resource: Resource, **options: Unpack[ValidateTableOptions]
|
|
18
|
+
) -> Report:
|
|
19
|
+
errors: list[FairspecError] = []
|
|
20
|
+
|
|
21
|
+
file_report = validate_file(resource)
|
|
22
|
+
errors.extend(file_report.errors)
|
|
23
|
+
if not file_report.valid:
|
|
24
|
+
return create_report(errors)
|
|
25
|
+
|
|
26
|
+
data_report = validate_data(resource)
|
|
27
|
+
errors.extend(data_report.errors)
|
|
28
|
+
if not data_report.valid:
|
|
29
|
+
return create_report(errors)
|
|
30
|
+
|
|
31
|
+
table_report = validate_table(resource, **options)
|
|
32
|
+
errors.extend(table_report.errors)
|
|
33
|
+
|
|
34
|
+
return create_report(errors)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import IntegerColumnProperty, Resource, StringColumnProperty, TableSchema
|
|
5
|
+
|
|
6
|
+
from .validate import validate_resource
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestValidateResource:
|
|
10
|
+
def test_should_validate_valid_resource(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
12
|
+
resource = Resource(
|
|
13
|
+
data=path,
|
|
14
|
+
tableSchema=TableSchema(
|
|
15
|
+
properties={
|
|
16
|
+
"id": IntegerColumnProperty(),
|
|
17
|
+
"name": StringColumnProperty(),
|
|
18
|
+
}
|
|
19
|
+
),
|
|
20
|
+
)
|
|
21
|
+
report = validate_resource(resource)
|
|
22
|
+
assert report.valid is True
|
|
23
|
+
|
|
24
|
+
def test_should_detect_type_error(self):
|
|
25
|
+
path = write_temp_file("id,name\nBAD,english", format="csv")
|
|
26
|
+
resource = Resource(
|
|
27
|
+
data=path,
|
|
28
|
+
tableSchema=TableSchema(
|
|
29
|
+
properties={
|
|
30
|
+
"id": IntegerColumnProperty(),
|
|
31
|
+
"name": StringColumnProperty(),
|
|
32
|
+
}
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
report = validate_resource(resource)
|
|
36
|
+
assert report.valid is False
|
|
37
|
+
|
|
38
|
+
def test_should_validate_resource_without_schema(self):
|
|
39
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
40
|
+
resource = Resource(data=path)
|
|
41
|
+
report = validate_resource(resource)
|
|
42
|
+
assert report.valid is True
|
|
File without changes
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata import Resource, resolve_table_schema
|
|
6
|
+
from fairspec_table import denormalize_table
|
|
7
|
+
from fairspec_table.models.table import LoadTableOptions
|
|
8
|
+
|
|
9
|
+
from fairspec_library.actions.file_dialect.infer import infer_file_dialect
|
|
10
|
+
from fairspec_library.actions.table.load import load_table
|
|
11
|
+
from fairspec_library.actions.table_schema.infer import infer_table_schema
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from fairspec_table import Table
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def infer_table(
|
|
18
|
+
resource: Resource, **options: Unpack[LoadTableOptions]
|
|
19
|
+
) -> Table | None:
|
|
20
|
+
resource = resource.model_copy(deep=True)
|
|
21
|
+
|
|
22
|
+
if not resource.fileDialect:
|
|
23
|
+
resource.fileDialect = infer_file_dialect(resource)
|
|
24
|
+
|
|
25
|
+
table = load_table(resource, denormalized=True, **options)
|
|
26
|
+
if table is None:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
table_schema = resolve_table_schema(resource.tableSchema)
|
|
30
|
+
if not table_schema:
|
|
31
|
+
table_schema = infer_table_schema(resource, **options)
|
|
32
|
+
|
|
33
|
+
if table_schema:
|
|
34
|
+
table = denormalize_table(table, table_schema)
|
|
35
|
+
|
|
36
|
+
return table
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from fairspec_dataset import write_temp_file
|
|
5
|
+
from fairspec_metadata import Resource
|
|
6
|
+
|
|
7
|
+
from .infer import infer_table
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestInferTable:
|
|
11
|
+
def test_should_infer_and_load_table(self):
|
|
12
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
13
|
+
resource = Resource(data=path)
|
|
14
|
+
table = infer_table(resource)
|
|
15
|
+
assert table is not None
|
|
16
|
+
frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278
|
|
17
|
+
assert len(frame) == 2
|
|
18
|
+
|
|
19
|
+
def test_should_return_none_for_empty_resource(self):
|
|
20
|
+
resource = Resource()
|
|
21
|
+
table = infer_table(resource)
|
|
22
|
+
assert table is None
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_table import TablePlugin
|
|
6
|
+
from fairspec_table.models.table import LoadTableOptions
|
|
7
|
+
|
|
8
|
+
from fairspec_library.system import system
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from fairspec_metadata import Resource
|
|
12
|
+
from fairspec_table import Table
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_table(
|
|
16
|
+
resource: Resource, **options: Unpack[LoadTableOptions]
|
|
17
|
+
) -> Table | None:
|
|
18
|
+
for plugin in system.plugins:
|
|
19
|
+
if isinstance(plugin, TablePlugin):
|
|
20
|
+
result = plugin.load_table(resource, **options)
|
|
21
|
+
if result is not None:
|
|
22
|
+
return result
|
|
23
|
+
|
|
24
|
+
return None
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from fairspec_dataset import write_temp_file
|
|
5
|
+
from fairspec_metadata import Resource
|
|
6
|
+
|
|
7
|
+
from .load import load_table
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestLoadTable:
|
|
11
|
+
def test_should_load_csv_table(self):
|
|
12
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
13
|
+
resource = Resource(data=path)
|
|
14
|
+
table = load_table(resource)
|
|
15
|
+
assert table is not None
|
|
16
|
+
frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278
|
|
17
|
+
assert frame.to_dicts() == [
|
|
18
|
+
{"id": 1, "name": "english"},
|
|
19
|
+
{"id": 2, "name": "中文"},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
def test_should_load_inline_table(self):
|
|
23
|
+
resource = Resource(data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}])
|
|
24
|
+
table = load_table(resource)
|
|
25
|
+
assert table is not None
|
|
26
|
+
frame: pl.DataFrame = table.collect() # ty: ignore[invalid-assignment] https://github.com/astral-sh/ty/issues/2278
|
|
27
|
+
assert frame.to_dicts() == [
|
|
28
|
+
{"id": 1, "name": "english"},
|
|
29
|
+
{"id": 2, "name": "中文"},
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
def test_should_return_none_for_empty_resource(self):
|
|
33
|
+
resource = Resource()
|
|
34
|
+
table = load_table(resource)
|
|
35
|
+
assert table is None
|
|
36
|
+
|
|
37
|
+
def test_should_load_json_table(self):
|
|
38
|
+
path = write_temp_file('[{"id": 1, "name": "english"}]', format="json")
|
|
39
|
+
resource = Resource(data=path)
|
|
40
|
+
table = load_table(resource)
|
|
41
|
+
assert table is not None
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_table import TablePlugin
|
|
6
|
+
from fairspec_table.models.table import SaveTableOptions, Table
|
|
7
|
+
|
|
8
|
+
from fairspec_library.system import system
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save_table(table: Table, **options: Unpack[SaveTableOptions]) -> str | None:
|
|
12
|
+
for plugin in system.plugins:
|
|
13
|
+
if isinstance(plugin, TablePlugin):
|
|
14
|
+
result = plugin.save_table(table, **options)
|
|
15
|
+
if result is not None:
|
|
16
|
+
return result
|
|
17
|
+
|
|
18
|
+
return None
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from fairspec_dataset import get_temp_file_path
|
|
5
|
+
|
|
6
|
+
from .save import save_table
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestSaveTable:
|
|
10
|
+
def test_should_save_table_to_csv(self):
|
|
11
|
+
path = get_temp_file_path(format="csv")
|
|
12
|
+
table = pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).lazy()
|
|
13
|
+
result = save_table(table, path=path)
|
|
14
|
+
assert result is not None
|
|
15
|
+
|
|
16
|
+
def test_should_save_table_to_json(self):
|
|
17
|
+
path = get_temp_file_path(format="json")
|
|
18
|
+
table = pl.DataFrame({"id": [1, 2], "name": ["english", "中文"]}).lazy()
|
|
19
|
+
result = save_table(table, path=path)
|
|
20
|
+
assert result is not None
|
|
21
|
+
|
|
22
|
+
def test_should_return_none_for_unknown_format(self):
|
|
23
|
+
path = get_temp_file_path(format="unknown")
|
|
24
|
+
table = pl.DataFrame({"id": [1, 2]}).lazy()
|
|
25
|
+
result = save_table(table, path=path)
|
|
26
|
+
assert result is None
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_metadata import Report, create_report, resolve_table_schema
|
|
6
|
+
from fairspec_table import inspect_table
|
|
7
|
+
|
|
8
|
+
from fairspec_library.actions.file_dialect.infer import infer_file_dialect
|
|
9
|
+
from fairspec_library.actions.table.load import load_table
|
|
10
|
+
from fairspec_library.actions.table_schema.infer import infer_table_schema
|
|
11
|
+
from fairspec_library.models.table import ValidateTableOptions
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from fairspec_metadata import Resource
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def validate_table(
|
|
18
|
+
resource: Resource, **options: Unpack[ValidateTableOptions]
|
|
19
|
+
) -> Report:
|
|
20
|
+
resource = resource.model_copy(deep=True)
|
|
21
|
+
|
|
22
|
+
if not resource.fileDialect:
|
|
23
|
+
resource.fileDialect = infer_file_dialect(resource)
|
|
24
|
+
|
|
25
|
+
no_infer = options.get("noInfer", False)
|
|
26
|
+
max_errors = options.get("maxErrors", 1000)
|
|
27
|
+
|
|
28
|
+
table_schema = resolve_table_schema(resource.tableSchema)
|
|
29
|
+
if not table_schema and not no_infer:
|
|
30
|
+
table_schema = infer_table_schema(resource, **options)
|
|
31
|
+
|
|
32
|
+
table = load_table(resource, denormalized=True, **options)
|
|
33
|
+
if table is None:
|
|
34
|
+
return create_report()
|
|
35
|
+
|
|
36
|
+
errors = inspect_table(table, table_schema=table_schema, max_errors=max_errors)
|
|
37
|
+
return create_report(list(errors))
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import IntegerColumnProperty, Resource, StringColumnProperty, TableSchema
|
|
5
|
+
|
|
6
|
+
from .validate import validate_table
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestValidateTable:
|
|
10
|
+
def test_should_validate_valid_table(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
12
|
+
resource = Resource(
|
|
13
|
+
data=path,
|
|
14
|
+
tableSchema=TableSchema(
|
|
15
|
+
properties={
|
|
16
|
+
"id": IntegerColumnProperty(),
|
|
17
|
+
"name": StringColumnProperty(),
|
|
18
|
+
}
|
|
19
|
+
),
|
|
20
|
+
)
|
|
21
|
+
report = validate_table(resource)
|
|
22
|
+
assert report.valid is True
|
|
23
|
+
|
|
24
|
+
def test_should_detect_type_error(self):
|
|
25
|
+
path = write_temp_file("id,name\nBAD,english", format="csv")
|
|
26
|
+
resource = Resource(
|
|
27
|
+
data=path,
|
|
28
|
+
tableSchema=TableSchema(
|
|
29
|
+
properties={
|
|
30
|
+
"id": IntegerColumnProperty(),
|
|
31
|
+
"name": StringColumnProperty(),
|
|
32
|
+
}
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
report = validate_table(resource)
|
|
36
|
+
assert report.valid is False
|
|
37
|
+
assert len(report.errors) > 0
|
|
38
|
+
|
|
39
|
+
def test_should_validate_without_schema(self):
|
|
40
|
+
path = write_temp_file("id,name\n1,english", format="csv")
|
|
41
|
+
resource = Resource(data=path)
|
|
42
|
+
report = validate_table(resource)
|
|
43
|
+
assert report.valid is True
|
|
File without changes
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Unpack
|
|
4
|
+
|
|
5
|
+
from fairspec_table import TablePlugin, infer_table_schema_from_table
|
|
6
|
+
from fairspec_table.models.schema import InferTableSchemaOptions
|
|
7
|
+
|
|
8
|
+
from fairspec_library.actions.table.load import load_table
|
|
9
|
+
from fairspec_library.system import system
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from fairspec_metadata import Resource, TableSchema
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def infer_table_schema(
|
|
16
|
+
resource: Resource, **options: Unpack[InferTableSchemaOptions]
|
|
17
|
+
) -> TableSchema | None:
|
|
18
|
+
for plugin in system.plugins:
|
|
19
|
+
if isinstance(plugin, TablePlugin):
|
|
20
|
+
result = plugin.infer_table_schema(resource, **options)
|
|
21
|
+
if result is not None:
|
|
22
|
+
return result
|
|
23
|
+
|
|
24
|
+
table = load_table(resource, denormalized=True)
|
|
25
|
+
if table is None:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
return infer_table_schema_from_table(table, **options)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fairspec_dataset import write_temp_file
|
|
4
|
+
from fairspec_metadata import Resource
|
|
5
|
+
|
|
6
|
+
from .infer import infer_table_schema
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestInferTableSchema:
|
|
10
|
+
def test_should_infer_schema_from_csv(self):
|
|
11
|
+
path = write_temp_file("id,name\n1,english\n2,中文", format="csv")
|
|
12
|
+
resource = Resource(data=path)
|
|
13
|
+
schema = infer_table_schema(resource)
|
|
14
|
+
assert schema is not None
|
|
15
|
+
assert schema.properties is not None
|
|
16
|
+
|
|
17
|
+
def test_should_return_none_for_empty_resource(self):
|
|
18
|
+
resource = Resource()
|
|
19
|
+
schema = infer_table_schema(resource)
|
|
20
|
+
assert schema is None
|
|
21
|
+
|
|
22
|
+
def test_should_infer_schema_from_inline_data(self):
|
|
23
|
+
resource = Resource(data=[{"id": 1, "name": "english"}, {"id": 2, "name": "中文"}])
|
|
24
|
+
schema = infer_table_schema(resource)
|
|
25
|
+
assert schema is not None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from fairspec_library.system import system
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from fairspec_metadata import RenderTableSchemaOptions, TableSchema
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_table_schema_as(
|
|
12
|
+
table_schema: TableSchema, options: RenderTableSchemaOptions
|
|
13
|
+
) -> str | None:
|
|
14
|
+
for plugin in system.plugins:
|
|
15
|
+
result = plugin.render_table_schema_as(table_schema, options)
|
|
16
|
+
if result is not None:
|
|
17
|
+
return result
|
|
18
|
+
|
|
19
|
+
return None
|
|
File without changes
|