sapiopycommons 2024.6.6a248__tar.gz → 2024.6.11a252__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/PKG-INFO +1 -1
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/pyproject.toml +1 -1
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_util.py +13 -6
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_validator.py +71 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/.gitignore +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/LICENSE +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/README.md +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/callbacks/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/callbacks/callback_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/IndigoMolecules.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/Molecules.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/datatype/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/datatype/attachment_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/experiment_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/plate_designer.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/complex_data_loader.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_bridge.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_bridge_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_data_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_writer.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/aliases.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/custom_report_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/exceptions.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/popup_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/storage_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/time_util.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/multimodal/multimodal.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/multimodal/multimodal_data.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/processtracking/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/processtracking/endpoints.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/recordmodel/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/recordmodel/record_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/eln_rule_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/on_save_rule_handler.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/webhook/__init__.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/webhook/webhook_handlers.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/_do_not_add_init_py_here +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/bio_reg_test.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/chem_test.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/data_type_models.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/kappa.chains.fasta +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/mafft_test.py +0 -0
- {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/test.gb +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sapiopycommons
|
|
3
|
-
Version: 2024.6.
|
|
3
|
+
Version: 2024.6.11a252
|
|
4
4
|
Summary: Official Sapio Python API Utilities Package
|
|
5
5
|
Project-URL: Homepage, https://github.com/sapiosciences
|
|
6
6
|
Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_util.py
RENAMED
|
@@ -21,7 +21,7 @@ class FileUtil:
|
|
|
21
21
|
"""
|
|
22
22
|
@staticmethod
|
|
23
23
|
def tokenize_csv(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0,
|
|
24
|
-
seperator: str = ",") -> tuple[list[dict[str, str]], list[list[str]]]:
|
|
24
|
+
seperator: str = ",", *, encoding: str | None = None) -> tuple[list[dict[str, str]], list[list[str]]]:
|
|
25
25
|
"""
|
|
26
26
|
Tokenize a CSV file. The provided file must be uniform. That is, if row 1 has 10 cells, all the rows in the file
|
|
27
27
|
must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
|
|
@@ -34,13 +34,17 @@ class FileUtil:
|
|
|
34
34
|
meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
|
|
35
35
|
is assumed to be the header row.
|
|
36
36
|
:param seperator: The character that separates cells in the table.
|
|
37
|
+
:param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
|
|
38
|
+
contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
|
|
39
|
+
ISO-8859-1 as the encoding.
|
|
37
40
|
:return: The CSV parsed into a list of dicts where each dict is a row, mapping the headers to the cells for
|
|
38
41
|
that row. Also returns a list of each row above the headers (the metadata), parsed into a list of each cell.
|
|
39
42
|
If the header row index is 0 or None, this list will be empty.
|
|
40
43
|
"""
|
|
41
44
|
# Parse the file bytes into two DataFrames. The first is metadata of the file located above the header row,
|
|
42
45
|
# while the second is the body of the file below the header row.
|
|
43
|
-
file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator
|
|
46
|
+
file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator,
|
|
47
|
+
encoding=encoding)
|
|
44
48
|
# Parse the metadata from above the header row index into a list of lists.
|
|
45
49
|
metadata: list[list[str]] = FileUtil.data_frame_to_lists(file_metadata)
|
|
46
50
|
# Parse the data from the file body into a list of dicts.
|
|
@@ -74,8 +78,8 @@ class FileUtil:
|
|
|
74
78
|
return rows, metadata
|
|
75
79
|
|
|
76
80
|
@staticmethod
|
|
77
|
-
def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ","
|
|
78
|
-
|
|
81
|
+
def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",",
|
|
82
|
+
*, encoding: str | None = None) -> tuple[DataFrame, DataFrame | None]:
|
|
79
83
|
"""
|
|
80
84
|
Parse the file bytes for a CSV into DataFrames. The provided file must be uniform. That is, if row 1 has 10
|
|
81
85
|
cells, all the rows in the file must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
|
|
@@ -86,6 +90,9 @@ class FileUtil:
|
|
|
86
90
|
meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
|
|
87
91
|
is assumed to be the header row.
|
|
88
92
|
:param seperator: The character that separates cells in the table.
|
|
93
|
+
:param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
|
|
94
|
+
contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
|
|
95
|
+
ISO-8859-1 as the encoding.
|
|
89
96
|
:return: A tuple of two DataFrames. The first is the frame for the CSV table body, while the second is for the
|
|
90
97
|
metadata from above the header row, or None if there is no metadata.
|
|
91
98
|
"""
|
|
@@ -97,13 +104,13 @@ class FileUtil:
|
|
|
97
104
|
# can throw off the header row index.
|
|
98
105
|
file_metadata = pandas.read_csv(file_io, header=None, dtype=dtype(str),
|
|
99
106
|
skiprows=lambda x: x >= header_row_index,
|
|
100
|
-
skip_blank_lines=False, sep=seperator)
|
|
107
|
+
skip_blank_lines=False, sep=seperator, encoding=encoding)
|
|
101
108
|
with io.BytesIO(file_bytes) as file_io:
|
|
102
109
|
# The use of the dtype argument is to ensure that everything from the file gets read as a string. Added
|
|
103
110
|
# because some numerical values would get ".0" appended to them, even when casting the DataFrame cell to a
|
|
104
111
|
# string.
|
|
105
112
|
file_body: DataFrame = pandas.read_csv(file_io, header=header_row_index, dtype=dtype(str),
|
|
106
|
-
skip_blank_lines=False, sep=seperator)
|
|
113
|
+
skip_blank_lines=False, sep=seperator, encoding=encoding)
|
|
107
114
|
|
|
108
115
|
return file_body, file_metadata
|
|
109
116
|
|
|
@@ -4,12 +4,15 @@ from abc import abstractmethod
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from sapiopylib.rest.User import SapioUser
|
|
7
|
+
from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
|
|
7
8
|
from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
|
|
8
9
|
AbstractVeloxFieldDefinition
|
|
10
|
+
from sapiopylib.rest.pojo.webhook.WebhookContext import SapioWebhookContext
|
|
9
11
|
from sapiopylib.rest.pojo.webhook.WebhookResult import SapioWebhookResult
|
|
10
12
|
|
|
11
13
|
from sapiopycommons.callbacks.callback_util import CallbackUtil
|
|
12
14
|
from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
|
|
15
|
+
from sapiopycommons.general.custom_report_util import CustomReportUtil
|
|
13
16
|
from sapiopycommons.general.time_util import TimeUtil
|
|
14
17
|
|
|
15
18
|
|
|
@@ -480,3 +483,71 @@ class ContainsSubstringFromCellRule(RowRule):
|
|
|
480
483
|
|
|
481
484
|
def validate(self, row: dict[str, Any]) -> bool:
|
|
482
485
|
return row.get(self.second) in row.get(self.first)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
class UniqueSystemValueRule(ColumnRule):
|
|
489
|
+
"""
|
|
490
|
+
Requires that every cell in the column has a value that is not already in use in the system for a given data type
|
|
491
|
+
and field name.
|
|
492
|
+
"""
|
|
493
|
+
user: SapioUser
|
|
494
|
+
data_type_name: str
|
|
495
|
+
data_field_name: str
|
|
496
|
+
|
|
497
|
+
def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
|
|
498
|
+
data_field_name: str):
|
|
499
|
+
"""
|
|
500
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
501
|
+
:param header: The header that this rule acts upon.
|
|
502
|
+
:param data_type_name: The data type name to search on.
|
|
503
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
504
|
+
"""
|
|
505
|
+
self.user = context.user if isinstance(context, SapioWebhookContext) else context
|
|
506
|
+
self.data_type_name = data_type_name
|
|
507
|
+
self.data_field_name = data_field_name
|
|
508
|
+
super().__init__(header, f"This value already exists in the system.")
|
|
509
|
+
|
|
510
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
511
|
+
file_handler = FileDataHandler(rows)
|
|
512
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
513
|
+
|
|
514
|
+
# Run a quick report for all records of this type that match these field values.
|
|
515
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
516
|
+
"{" + ",".join(values) + "}")
|
|
517
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
518
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
519
|
+
return file_handler.get_in_list(self.header, existing_values)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class ExistingSystemValueRule(ColumnRule):
|
|
523
|
+
"""
|
|
524
|
+
Requires that every cell in the column has a value that is already in use in the system for a given data type
|
|
525
|
+
and field name.
|
|
526
|
+
"""
|
|
527
|
+
user: SapioUser
|
|
528
|
+
data_type_name: str
|
|
529
|
+
data_field_name: str
|
|
530
|
+
|
|
531
|
+
def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
|
|
532
|
+
data_field_name: str):
|
|
533
|
+
"""
|
|
534
|
+
:param context: The current webhook context or a user object to send requests from.
|
|
535
|
+
:param header: The header that this rule acts upon.
|
|
536
|
+
:param data_type_name: The data type name to search on.
|
|
537
|
+
:param data_field_name: The data field name to search on. This is expected to be a string field.
|
|
538
|
+
"""
|
|
539
|
+
self.user = context.user if isinstance(context, SapioWebhookContext) else context
|
|
540
|
+
self.data_type_name = data_type_name
|
|
541
|
+
self.data_field_name = data_field_name
|
|
542
|
+
super().__init__(header, f"This value doesn't exist in the system.")
|
|
543
|
+
|
|
544
|
+
def validate(self, rows: list[dict[str, Any]]) -> list[int]:
|
|
545
|
+
file_handler = FileDataHandler(rows)
|
|
546
|
+
values: list[str] = file_handler.get_values_list(self.header)
|
|
547
|
+
|
|
548
|
+
# Run a quick report for all records of this type that match these field values.
|
|
549
|
+
term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
|
|
550
|
+
"{" + ",".join(values) + "}")
|
|
551
|
+
results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
|
|
552
|
+
existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
|
|
553
|
+
return file_handler.get_not_in_list(self.header, existing_values)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/Molecules.py
RENAMED
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/__init__.py
RENAMED
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/datatype/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_bridge.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_writer.py
RENAMED
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/__init__.py
RENAMED
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/aliases.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/time_util.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/webhook/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|