pybiolib 1.1.2155__py3-none-any.whl → 1.1.2169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,14 @@ import os
2
2
  from collections import namedtuple
3
3
  from datetime import datetime
4
4
  from fnmatch import fnmatch
5
+ from pathlib import Path
5
6
  from struct import Struct
6
7
  from typing import Callable, Dict, List, Union, cast
7
8
 
8
9
  from biolib import api, utils
9
10
  from biolib._internal import types
10
11
  from biolib._internal.data_record import get_data_record_state_from_uri
12
+ from biolib._internal.data_record.data_record import validate_sqlite_v1
11
13
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
12
14
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
13
15
  from biolib._internal.http_client import HttpClient
@@ -106,6 +108,13 @@ class DataRecord:
106
108
  logger.info(f"Validating data record of type {data_record_type['name']}")
107
109
  for rule in data_record_type['validation_rules']:
108
110
  logger.info(f"Validating rule {rule['type']} for {rule['path']}...")
111
+ if rule['type'] == "sqlite-v1":
112
+ try:
113
+ validate_sqlite_v1(schema=rule['rule'], sqlite_file=Path(rule['path']))
114
+ except Exception as error:
115
+ raise Exception("Data Record Validation failed") from error
116
+ else:
117
+ raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
109
118
 
110
119
  min_chunk_size_bytes = 10_000_000
111
120
  chunk_size_in_bytes: int
@@ -1 +1 @@
1
- from .data_record import get_data_record_state_from_uri
1
+ from .data_record import get_data_record_state_from_uri, validate_sqlite_v1
@@ -1,8 +1,86 @@
1
+ import sqlite3
2
+ from pathlib import Path
3
+
4
+ from biolib._internal.types.data_record import SqliteV1DatabaseSchema
1
5
  from biolib.api import client as api_client
2
6
  from biolib.biolib_api_client import AppGetResponse
3
7
  from biolib.biolib_api_client.lfs_types import DataRecordVersionInfo
4
8
 
5
9
 
10
+ def get_actual_schema(db_path):
11
+ if not db_path.exists():
12
+ raise Exception(f'File {db_path} not found.')
13
+ conn = sqlite3.connect(db_path)
14
+ cursor = conn.cursor()
15
+
16
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
17
+ tables = cursor.fetchall()
18
+
19
+ actual_schema: SqliteV1DatabaseSchema = {'tables': []}
20
+ for table in tables:
21
+ table_name = table[0]
22
+ cursor.execute(f'PRAGMA table_info({table_name});')
23
+ columns = cursor.fetchall()
24
+ actual_schema['tables'][table_name] = {'columns': {}}
25
+ for column in columns:
26
+ actual_schema['tables'][table_name]['columns'][column[1]] = {
27
+ 'type': column[2],
28
+ 'nullable': not bool(column[3]),
29
+ }
30
+
31
+ cursor.execute(f'PRAGMA foreign_key_list({table_name});')
32
+ foreign_keys = cursor.fetchall()
33
+ for fk in foreign_keys:
34
+ actual_schema['tables'][table_name]['columns'][fk[3]]['foreign_key'] = {'table': fk[2], 'column': fk[4]}
35
+
36
+ conn.close()
37
+ return actual_schema
38
+
39
+
40
+ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1DatabaseSchema):
41
+ for table_name, table_spec in specification['tables'].items():
42
+ if table_name not in actual_schema['tables']:
43
+ raise Exception(f"Error: Table '{table_name}' is missing.")
44
+
45
+ for column_name, column_spec in table_spec['columns'].items():
46
+ if column_name not in actual_schema['tables'][table_name]['columns']:
47
+ raise Exception(f"Error: Column '{column_name}' in table '{table_name}' is missing.")
48
+
49
+ actual_column = actual_schema['tables'][table_name]['columns'][column_name]
50
+ if actual_column['type'] != column_spec['type']:
51
+ raise Exception(
52
+ f"Error: Column '{column_name}' in table '{table_name}' "
53
+ "has type '{actual_column['type']}' but expected '{column_spec['type']}'."
54
+ )
55
+
56
+ if not actual_column['nullable'] and column_spec.get('nullable', True):
57
+ raise Exception(
58
+ f"Error: Column '{column_name}' in table '{table_name}' is "
59
+ 'not nullable but should be nullable according to the specification.'
60
+ )
61
+
62
+ for column_name, column_spec in table_spec['columns'].items():
63
+ if column_spec.get('foreign_key'):
64
+ foreign_key_spec = column_spec['foreign_key']
65
+ if actual_schema['tables'][table_name]['columns'][column_name].get('foreign_key'):
66
+ fk = actual_schema['tables'][table_name]['columns'][column_name]['foreign_key']
67
+ if (
68
+ fk
69
+ and foreign_key_spec
70
+ and fk['table'] == foreign_key_spec['table']
71
+ and fk['column'] == foreign_key_spec['column']
72
+ ):
73
+ raise Exception(
74
+ f"Error: Column '{column_name}' in table '{table_name}' does "
75
+ 'not have the correct foreign key constraint.'
76
+ )
77
+ else:
78
+ raise Exception(
79
+ f"Error: Column '{column_name}' in table '{table_name}' does "
80
+ 'not have a foreign key constraint.'
81
+ )
82
+
83
+
6
84
  def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
7
85
  app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
8
86
  return DataRecordVersionInfo(
@@ -10,3 +88,10 @@ def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
10
88
  resource_uuid=app_response['app']['public_id'],
11
89
  resource_version_uuid=app_response['app_version']['public_id'],
12
90
  )
91
+
92
+
93
+ def validate_sqlite_v1(schema: SqliteV1DatabaseSchema, sqlite_file: Path):
94
+ actual_schema = get_actual_schema(sqlite_file)
95
+ print(schema)
96
+ print(sqlite_file)
97
+ verify_schema(specification=schema, actual_schema=actual_schema)
@@ -1,15 +1,35 @@
1
- from .typing import Dict, List, Optional, TypedDict
1
+ from .typing import Dict, List, Literal, Optional, TypedDict, Union
2
+
3
+
4
+ class SqliteV1ForeignKey(TypedDict):
5
+ table: str
6
+ column: str
7
+
8
+
9
+ class SqliteV1Column(TypedDict):
10
+ type: Literal['INTEGER', 'REAL', 'TEXT', 'JSON'] # noqa:F821
11
+ nullable: Optional[bool]
12
+ foreign_key: Optional[SqliteV1ForeignKey]
13
+ json_schema: Optional[Dict]
14
+
15
+
16
+ class SqliteV1Table(TypedDict):
17
+ columns: Dict[str, SqliteV1Column]
18
+
19
+
20
+ class SqliteV1DatabaseSchema(TypedDict):
21
+ tables: Dict[str, SqliteV1Table]
2
22
 
3
23
 
4
24
  class DataRecordValidationRuleDict(TypedDict):
5
25
  path: str
6
26
  type: str
7
- rule: Dict
27
+ rule: Union[SqliteV1DatabaseSchema]
8
28
 
9
29
 
10
30
  class DataRecordTypeDict(TypedDict):
11
31
  name: str
12
- validation_rules: List[Dict]
32
+ validation_rules: List[DataRecordValidationRuleDict]
13
33
 
14
34
 
15
35
  class DataRecordSlimDict(TypedDict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2155
3
+ Version: 1.1.2169
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -1,10 +1,10 @@
1
1
  LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
2
2
  README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
3
  biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
4
- biolib/_data_record/data_record.py,sha256=Sud8yXz7yR6YW4V6OqE7nO6I4a0TdqijmMTZwwU59j8,12152
4
+ biolib/_data_record/data_record.py,sha256=XC3BsxnmA20odM7r4dsRHNnwYqhYp054f4BNkA5J5dA,12685
5
5
  biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- biolib/_internal/data_record/__init__.py,sha256=wLOy3Pb7dWYik5eQtQM00DH2AWC-M5RbTjwh9InPiqo,56
7
- biolib/_internal/data_record/data_record.py,sha256=If4SQj-XwKSPzCpaWA01LEGKalZ6DEjD5PJZRtl3Mao,556
6
+ biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
7
+ biolib/_internal/data_record/data_record.py,sha256=iUwLGQEQtXt09iLUMWrE_-Gj3ZkQfXxB7Zcp1WcwsF0,4297
8
8
  biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
9
  biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
10
  biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
@@ -18,7 +18,7 @@ biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_i
18
18
  biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
19
19
  biolib/_internal/types/__init__.py,sha256=11ZucS8jKeLGAAswXyKI7FH2KLHd6T9Sh8ZK2Ar3jlk,152
20
20
  biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
21
- biolib/_internal/types/data_record.py,sha256=U1e9mqEAOOcjR2QzL8eK_xdttkN44yhxvslTyQk1QOo,369
21
+ biolib/_internal/types/data_record.py,sha256=AHoIiwVqeHj0HozQxFRAyxk-d3XJgLWno4ic1z9eTrQ,865
22
22
  biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4ypggKX2ys,176
23
23
  biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
24
24
  biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
@@ -116,8 +116,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
116
116
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
117
  biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
118
118
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
- pybiolib-1.1.2155.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
- pybiolib-1.1.2155.dist-info/METADATA,sha256=tu73TYRGUjbJ3MfO15gYq2UO81SYCy508-wwmoHN3WQ,1508
121
- pybiolib-1.1.2155.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
- pybiolib-1.1.2155.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
- pybiolib-1.1.2155.dist-info/RECORD,,
119
+ pybiolib-1.1.2169.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
+ pybiolib-1.1.2169.dist-info/METADATA,sha256=Ah2VUsAv-zV4lYA_0OTbPHmZuRHNBbNUE8hyRY5icYs,1508
121
+ pybiolib-1.1.2169.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
+ pybiolib-1.1.2169.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
+ pybiolib-1.1.2169.dist-info/RECORD,,