pybiolib 1.1.2155__py3-none-any.whl → 1.1.2169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/_data_record/data_record.py +9 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +85 -0
- biolib/_internal/types/data_record.py +23 -3
- {pybiolib-1.1.2155.dist-info → pybiolib-1.1.2169.dist-info}/METADATA +1 -1
- {pybiolib-1.1.2155.dist-info → pybiolib-1.1.2169.dist-info}/RECORD +9 -9
- {pybiolib-1.1.2155.dist-info → pybiolib-1.1.2169.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.2155.dist-info → pybiolib-1.1.2169.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.2155.dist-info → pybiolib-1.1.2169.dist-info}/entry_points.txt +0 -0
@@ -2,12 +2,14 @@ import os
|
|
2
2
|
from collections import namedtuple
|
3
3
|
from datetime import datetime
|
4
4
|
from fnmatch import fnmatch
|
5
|
+
from pathlib import Path
|
5
6
|
from struct import Struct
|
6
7
|
from typing import Callable, Dict, List, Union, cast
|
7
8
|
|
8
9
|
from biolib import api, utils
|
9
10
|
from biolib._internal import types
|
10
11
|
from biolib._internal.data_record import get_data_record_state_from_uri
|
12
|
+
from biolib._internal.data_record.data_record import validate_sqlite_v1
|
11
13
|
from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
|
12
14
|
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
13
15
|
from biolib._internal.http_client import HttpClient
|
@@ -106,6 +108,13 @@ class DataRecord:
|
|
106
108
|
logger.info(f"Validating data record of type {data_record_type['name']}")
|
107
109
|
for rule in data_record_type['validation_rules']:
|
108
110
|
logger.info(f"Validating rule {rule['type']} for {rule['path']}...")
|
111
|
+
if rule['type'] == "sqlite-v1":
|
112
|
+
try:
|
113
|
+
validate_sqlite_v1(schema=rule['rule'], sqlite_file=Path(rule['path']))
|
114
|
+
except Exception as error:
|
115
|
+
raise Exception("Data Record Validation failed") from error
|
116
|
+
else:
|
117
|
+
raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
|
109
118
|
|
110
119
|
min_chunk_size_bytes = 10_000_000
|
111
120
|
chunk_size_in_bytes: int
|
@@ -1 +1 @@
|
|
1
|
-
from .data_record import get_data_record_state_from_uri
|
1
|
+
from .data_record import get_data_record_state_from_uri, validate_sqlite_v1
|
@@ -1,8 +1,86 @@
|
|
1
|
+
import sqlite3
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from biolib._internal.types.data_record import SqliteV1DatabaseSchema
|
1
5
|
from biolib.api import client as api_client
|
2
6
|
from biolib.biolib_api_client import AppGetResponse
|
3
7
|
from biolib.biolib_api_client.lfs_types import DataRecordVersionInfo
|
4
8
|
|
5
9
|
|
10
|
+
def get_actual_schema(db_path):
|
11
|
+
if not db_path.exists():
|
12
|
+
raise Exception(f'File {db_path} not found.')
|
13
|
+
conn = sqlite3.connect(db_path)
|
14
|
+
cursor = conn.cursor()
|
15
|
+
|
16
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
17
|
+
tables = cursor.fetchall()
|
18
|
+
|
19
|
+
actual_schema: SqliteV1DatabaseSchema = {'tables': []}
|
20
|
+
for table in tables:
|
21
|
+
table_name = table[0]
|
22
|
+
cursor.execute(f'PRAGMA table_info({table_name});')
|
23
|
+
columns = cursor.fetchall()
|
24
|
+
actual_schema['tables'][table_name] = {'columns': {}}
|
25
|
+
for column in columns:
|
26
|
+
actual_schema['tables'][table_name]['columns'][column[1]] = {
|
27
|
+
'type': column[2],
|
28
|
+
'nullable': not bool(column[3]),
|
29
|
+
}
|
30
|
+
|
31
|
+
cursor.execute(f'PRAGMA foreign_key_list({table_name});')
|
32
|
+
foreign_keys = cursor.fetchall()
|
33
|
+
for fk in foreign_keys:
|
34
|
+
actual_schema['tables'][table_name]['columns'][fk[3]]['foreign_key'] = {'table': fk[2], 'column': fk[4]}
|
35
|
+
|
36
|
+
conn.close()
|
37
|
+
return actual_schema
|
38
|
+
|
39
|
+
|
40
|
+
def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1DatabaseSchema):
|
41
|
+
for table_name, table_spec in specification['tables'].items():
|
42
|
+
if table_name not in actual_schema['tables']:
|
43
|
+
raise Exception(f"Error: Table '{table_name}' is missing.")
|
44
|
+
|
45
|
+
for column_name, column_spec in table_spec['columns'].items():
|
46
|
+
if column_name not in actual_schema['tables'][table_name]['columns']:
|
47
|
+
raise Exception(f"Error: Column '{column_name}' in table '{table_name}' is missing.")
|
48
|
+
|
49
|
+
actual_column = actual_schema['tables'][table_name]['columns'][column_name]
|
50
|
+
if actual_column['type'] != column_spec['type']:
|
51
|
+
raise Exception(
|
52
|
+
f"Error: Column '{column_name}' in table '{table_name}' "
|
53
|
+
"has type '{actual_column['type']}' but expected '{column_spec['type']}'."
|
54
|
+
)
|
55
|
+
|
56
|
+
if not actual_column['nullable'] and column_spec.get('nullable', True):
|
57
|
+
raise Exception(
|
58
|
+
f"Error: Column '{column_name}' in table '{table_name}' is "
|
59
|
+
'not nullable but should be nullable according to the specification.'
|
60
|
+
)
|
61
|
+
|
62
|
+
for column_name, column_spec in table_spec['columns'].items():
|
63
|
+
if column_spec.get('foreign_key'):
|
64
|
+
foreign_key_spec = column_spec['foreign_key']
|
65
|
+
if actual_schema['tables'][table_name]['columns'][column_name].get('foreign_key'):
|
66
|
+
fk = actual_schema['tables'][table_name]['columns'][column_name]['foreign_key']
|
67
|
+
if (
|
68
|
+
fk
|
69
|
+
and foreign_key_spec
|
70
|
+
and fk['table'] == foreign_key_spec['table']
|
71
|
+
and fk['column'] == foreign_key_spec['column']
|
72
|
+
):
|
73
|
+
raise Exception(
|
74
|
+
f"Error: Column '{column_name}' in table '{table_name}' does "
|
75
|
+
'not have the correct foreign key constraint.'
|
76
|
+
)
|
77
|
+
else:
|
78
|
+
raise Exception(
|
79
|
+
f"Error: Column '{column_name}' in table '{table_name}' does "
|
80
|
+
'not have a foreign key constraint.'
|
81
|
+
)
|
82
|
+
|
83
|
+
|
6
84
|
def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
|
7
85
|
app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
|
8
86
|
return DataRecordVersionInfo(
|
@@ -10,3 +88,10 @@ def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
|
|
10
88
|
resource_uuid=app_response['app']['public_id'],
|
11
89
|
resource_version_uuid=app_response['app_version']['public_id'],
|
12
90
|
)
|
91
|
+
|
92
|
+
|
93
|
+
def validate_sqlite_v1(schema: SqliteV1DatabaseSchema, sqlite_file: Path):
|
94
|
+
actual_schema = get_actual_schema(sqlite_file)
|
95
|
+
print(schema)
|
96
|
+
print(sqlite_file)
|
97
|
+
verify_schema(specification=schema, actual_schema=actual_schema)
|
@@ -1,15 +1,35 @@
|
|
1
|
-
from .typing import Dict, List, Optional, TypedDict
|
1
|
+
from .typing import Dict, List, Literal, Optional, TypedDict, Union
|
2
|
+
|
3
|
+
|
4
|
+
class SqliteV1ForeignKey(TypedDict):
|
5
|
+
table: str
|
6
|
+
column: str
|
7
|
+
|
8
|
+
|
9
|
+
class SqliteV1Column(TypedDict):
|
10
|
+
type: Literal['INTEGER', 'REAL', 'TEXT', 'JSON'] # noqa:F821
|
11
|
+
nullable: Optional[bool]
|
12
|
+
foreign_key: Optional[SqliteV1ForeignKey]
|
13
|
+
json_schema: Optional[Dict]
|
14
|
+
|
15
|
+
|
16
|
+
class SqliteV1Table(TypedDict):
|
17
|
+
columns: Dict[str, SqliteV1Column]
|
18
|
+
|
19
|
+
|
20
|
+
class SqliteV1DatabaseSchema(TypedDict):
|
21
|
+
tables: Dict[str, SqliteV1Table]
|
2
22
|
|
3
23
|
|
4
24
|
class DataRecordValidationRuleDict(TypedDict):
|
5
25
|
path: str
|
6
26
|
type: str
|
7
|
-
rule:
|
27
|
+
rule: Union[SqliteV1DatabaseSchema]
|
8
28
|
|
9
29
|
|
10
30
|
class DataRecordTypeDict(TypedDict):
|
11
31
|
name: str
|
12
|
-
validation_rules: List[
|
32
|
+
validation_rules: List[DataRecordValidationRuleDict]
|
13
33
|
|
14
34
|
|
15
35
|
class DataRecordSlimDict(TypedDict):
|
@@ -1,10 +1,10 @@
|
|
1
1
|
LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
2
2
|
README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
|
3
3
|
biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
|
4
|
-
biolib/_data_record/data_record.py,sha256=
|
4
|
+
biolib/_data_record/data_record.py,sha256=XC3BsxnmA20odM7r4dsRHNnwYqhYp054f4BNkA5J5dA,12685
|
5
5
|
biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
biolib/_internal/data_record/__init__.py,sha256=
|
7
|
-
biolib/_internal/data_record/data_record.py,sha256=
|
6
|
+
biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
|
7
|
+
biolib/_internal/data_record/data_record.py,sha256=iUwLGQEQtXt09iLUMWrE_-Gj3ZkQfXxB7Zcp1WcwsF0,4297
|
8
8
|
biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
|
9
9
|
biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
|
10
10
|
biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
|
@@ -18,7 +18,7 @@ biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_i
|
|
18
18
|
biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
|
19
19
|
biolib/_internal/types/__init__.py,sha256=11ZucS8jKeLGAAswXyKI7FH2KLHd6T9Sh8ZK2Ar3jlk,152
|
20
20
|
biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
|
21
|
-
biolib/_internal/types/data_record.py,sha256=
|
21
|
+
biolib/_internal/types/data_record.py,sha256=AHoIiwVqeHj0HozQxFRAyxk-d3XJgLWno4ic1z9eTrQ,865
|
22
22
|
biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4ypggKX2ys,176
|
23
23
|
biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
|
24
24
|
biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
|
@@ -116,8 +116,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
|
|
116
116
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
117
117
|
biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
|
118
118
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
119
|
-
pybiolib-1.1.
|
120
|
-
pybiolib-1.1.
|
121
|
-
pybiolib-1.1.
|
122
|
-
pybiolib-1.1.
|
123
|
-
pybiolib-1.1.
|
119
|
+
pybiolib-1.1.2169.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
120
|
+
pybiolib-1.1.2169.dist-info/METADATA,sha256=Ah2VUsAv-zV4lYA_0OTbPHmZuRHNBbNUE8hyRY5icYs,1508
|
121
|
+
pybiolib-1.1.2169.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
122
|
+
pybiolib-1.1.2169.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
123
|
+
pybiolib-1.1.2169.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|