sapiopycommons 2024.6.6a248__tar.gz → 2024.6.11a252__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sapiopycommons might be problematic. Click here for more details.

Files changed (49) hide show
  1. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/PKG-INFO +1 -1
  2. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/pyproject.toml +1 -1
  3. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_util.py +13 -6
  4. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_validator.py +71 -0
  5. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/.gitignore +0 -0
  6. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/LICENSE +0 -0
  7. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/README.md +0 -0
  8. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/__init__.py +0 -0
  9. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/callbacks/__init__.py +0 -0
  10. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/callbacks/callback_util.py +0 -0
  11. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/IndigoMolecules.py +0 -0
  12. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/Molecules.py +0 -0
  13. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/chem/__init__.py +0 -0
  14. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/datatype/__init__.py +0 -0
  15. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/datatype/attachment_util.py +0 -0
  16. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/__init__.py +0 -0
  17. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/experiment_handler.py +0 -0
  18. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/eln/plate_designer.py +0 -0
  19. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/__init__.py +0 -0
  20. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/complex_data_loader.py +0 -0
  21. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_bridge.py +0 -0
  22. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_bridge_handler.py +0 -0
  23. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_data_handler.py +0 -0
  24. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/files/file_writer.py +0 -0
  25. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/__init__.py +0 -0
  26. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/aliases.py +0 -0
  27. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/custom_report_util.py +0 -0
  28. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/exceptions.py +0 -0
  29. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/popup_util.py +0 -0
  30. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/storage_util.py +0 -0
  31. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/general/time_util.py +0 -0
  32. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/multimodal/multimodal.py +0 -0
  33. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/multimodal/multimodal_data.py +0 -0
  34. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/processtracking/__init__.py +0 -0
  35. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/processtracking/endpoints.py +0 -0
  36. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/recordmodel/__init__.py +0 -0
  37. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/recordmodel/record_handler.py +0 -0
  38. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/__init__.py +0 -0
  39. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/eln_rule_handler.py +0 -0
  40. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/rules/on_save_rule_handler.py +0 -0
  41. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/webhook/__init__.py +0 -0
  42. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/src/sapiopycommons/webhook/webhook_handlers.py +0 -0
  43. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/_do_not_add_init_py_here +0 -0
  44. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/bio_reg_test.py +0 -0
  45. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/chem_test.py +0 -0
  46. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/data_type_models.py +0 -0
  47. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/kappa.chains.fasta +0 -0
  48. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/mafft_test.py +0 -0
  49. {sapiopycommons-2024.6.6a248 → sapiopycommons-2024.6.11a252}/tests/test.gb +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sapiopycommons
3
- Version: 2024.6.6a248
3
+ Version: 2024.6.11a252
4
4
  Summary: Official Sapio Python API Utilities Package
5
5
  Project-URL: Homepage, https://github.com/sapiosciences
6
6
  Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sapiopycommons"
7
- version='2024.06.06a248'
7
+ version='2024.06.11a252'
8
8
  authors = [
9
9
  { name="Jonathan Steck", email="jsteck@sapiosciences.com" },
10
10
  { name="Yechen Qiao", email="yqiao@sapiosciences.com" },
@@ -21,7 +21,7 @@ class FileUtil:
21
21
  """
22
22
  @staticmethod
23
23
  def tokenize_csv(file_bytes: bytes, required_headers: list[str] | None = None, header_row_index: int | None = 0,
24
- seperator: str = ",") -> tuple[list[dict[str, str]], list[list[str]]]:
24
+ seperator: str = ",", *, encoding: str | None = None) -> tuple[list[dict[str, str]], list[list[str]]]:
25
25
  """
26
26
  Tokenize a CSV file. The provided file must be uniform. That is, if row 1 has 10 cells, all the rows in the file
27
27
  must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
@@ -34,13 +34,17 @@ class FileUtil:
34
34
  meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
35
35
  is assumed to be the header row.
36
36
  :param seperator: The character that separates cells in the table.
37
+ :param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
38
+ contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
39
+ ISO-8859-1 as the encoding.
37
40
  :return: The CSV parsed into a list of dicts where each dict is a row, mapping the headers to the cells for
38
41
  that row. Also returns a list of each row above the headers (the metadata), parsed into a list of each cell.
39
42
  If the header row index is 0 or None, this list will be empty.
40
43
  """
41
44
  # Parse the file bytes into two DataFrames. The first is metadata of the file located above the header row,
42
45
  # while the second is the body of the file below the header row.
43
- file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator)
46
+ file_body, file_metadata = FileUtil.csv_to_data_frames(file_bytes, header_row_index, seperator,
47
+ encoding=encoding)
44
48
  # Parse the metadata from above the header row index into a list of lists.
45
49
  metadata: list[list[str]] = FileUtil.data_frame_to_lists(file_metadata)
46
50
  # Parse the data from the file body into a list of dicts.
@@ -74,8 +78,8 @@ class FileUtil:
74
78
  return rows, metadata
75
79
 
76
80
  @staticmethod
77
- def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",") \
78
- -> tuple[DataFrame, DataFrame | None]:
81
+ def csv_to_data_frames(file_bytes: bytes, header_row_index: int | None = 0, seperator: str = ",",
82
+ *, encoding: str | None = None) -> tuple[DataFrame, DataFrame | None]:
79
83
  """
80
84
  Parse the file bytes for a CSV into DataFrames. The provided file must be uniform. That is, if row 1 has 10
81
85
  cells, all the rows in the file must have 10 cells. Otherwise, the Pandas parser throws a tokenizer exception.
@@ -86,6 +90,9 @@ class FileUtil:
86
90
  meaning that required headers are also ignored if any are provided. By default, the first row (0th index)
87
91
  is assumed to be the header row.
88
92
  :param seperator: The character that separates cells in the table.
93
+ :param encoding: The encoding used to read the given file bytes. If not provided, uses utf-8. If your file
94
+ contains a non-utf-8 character, then a UnicodeDecodeError will be thrown. If this happens, consider using
95
+ ISO-8859-1 as the encoding.
89
96
  :return: A tuple of two DataFrames. The first is the frame for the CSV table body, while the second is for the
90
97
  metadata from above the header row, or None if there is no metadata.
91
98
  """
@@ -97,13 +104,13 @@ class FileUtil:
97
104
  # can throw off the header row index.
98
105
  file_metadata = pandas.read_csv(file_io, header=None, dtype=dtype(str),
99
106
  skiprows=lambda x: x >= header_row_index,
100
- skip_blank_lines=False, sep=seperator)
107
+ skip_blank_lines=False, sep=seperator, encoding=encoding)
101
108
  with io.BytesIO(file_bytes) as file_io:
102
109
  # The use of the dtype argument is to ensure that everything from the file gets read as a string. Added
103
110
  # because some numerical values would get ".0" appended to them, even when casting the DataFrame cell to a
104
111
  # string.
105
112
  file_body: DataFrame = pandas.read_csv(file_io, header=header_row_index, dtype=dtype(str),
106
- skip_blank_lines=False, sep=seperator)
113
+ skip_blank_lines=False, sep=seperator, encoding=encoding)
107
114
 
108
115
  return file_body, file_metadata
109
116
 
@@ -4,12 +4,15 @@ from abc import abstractmethod
4
4
  from typing import Any
5
5
 
6
6
  from sapiopylib.rest.User import SapioUser
7
+ from sapiopylib.rest.pojo.CustomReport import RawReportTerm, RawTermOperation
7
8
  from sapiopylib.rest.pojo.datatype.FieldDefinition import VeloxIntegerFieldDefinition, VeloxStringFieldDefinition, \
8
9
  AbstractVeloxFieldDefinition
10
+ from sapiopylib.rest.pojo.webhook.WebhookContext import SapioWebhookContext
9
11
  from sapiopylib.rest.pojo.webhook.WebhookResult import SapioWebhookResult
10
12
 
11
13
  from sapiopycommons.callbacks.callback_util import CallbackUtil
12
14
  from sapiopycommons.files.file_data_handler import FileDataHandler, FilterList
15
+ from sapiopycommons.general.custom_report_util import CustomReportUtil
13
16
  from sapiopycommons.general.time_util import TimeUtil
14
17
 
15
18
 
@@ -480,3 +483,71 @@ class ContainsSubstringFromCellRule(RowRule):
480
483
 
481
484
  def validate(self, row: dict[str, Any]) -> bool:
482
485
  return row.get(self.second) in row.get(self.first)
486
+
487
+
488
+ class UniqueSystemValueRule(ColumnRule):
489
+ """
490
+ Requires that every cell in the column has a value that is not already in use in the system for a given data type
491
+ and field name.
492
+ """
493
+ user: SapioUser
494
+ data_type_name: str
495
+ data_field_name: str
496
+
497
+ def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
498
+ data_field_name: str):
499
+ """
500
+ :param context: The current webhook context or a user object to send requests from.
501
+ :param header: The header that this rule acts upon.
502
+ :param data_type_name: The data type name to search on.
503
+ :param data_field_name: The data field name to search on. This is expected to be a string field.
504
+ """
505
+ self.user = context.user if isinstance(context, SapioWebhookContext) else context
506
+ self.data_type_name = data_type_name
507
+ self.data_field_name = data_field_name
508
+ super().__init__(header, f"This value already exists in the system.")
509
+
510
+ def validate(self, rows: list[dict[str, Any]]) -> list[int]:
511
+ file_handler = FileDataHandler(rows)
512
+ values: list[str] = file_handler.get_values_list(self.header)
513
+
514
+ # Run a quick report for all records of this type that match these field values.
515
+ term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
516
+ "{" + ",".join(values) + "}")
517
+ results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
518
+ existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
519
+ return file_handler.get_in_list(self.header, existing_values)
520
+
521
+
522
+ class ExistingSystemValueRule(ColumnRule):
523
+ """
524
+ Requires that every cell in the column has a value that is already in use in the system for a given data type
525
+ and field name.
526
+ """
527
+ user: SapioUser
528
+ data_type_name: str
529
+ data_field_name: str
530
+
531
+ def __init__(self, context: SapioWebhookContext | SapioUser, header: str, data_type_name: str,
532
+ data_field_name: str):
533
+ """
534
+ :param context: The current webhook context or a user object to send requests from.
535
+ :param header: The header that this rule acts upon.
536
+ :param data_type_name: The data type name to search on.
537
+ :param data_field_name: The data field name to search on. This is expected to be a string field.
538
+ """
539
+ self.user = context.user if isinstance(context, SapioWebhookContext) else context
540
+ self.data_type_name = data_type_name
541
+ self.data_field_name = data_field_name
542
+ super().__init__(header, f"This value doesn't exist in the system.")
543
+
544
+ def validate(self, rows: list[dict[str, Any]]) -> list[int]:
545
+ file_handler = FileDataHandler(rows)
546
+ values: list[str] = file_handler.get_values_list(self.header)
547
+
548
+ # Run a quick report for all records of this type that match these field values.
549
+ term = RawReportTerm(self.data_type_name, self.data_field_name, RawTermOperation.EQUAL_TO_OPERATOR,
550
+ "{" + ",".join(values) + "}")
551
+ results: list[dict[str, Any]] = CustomReportUtil.run_quick_report(self.user, term)
552
+ existing_values: list[Any] = [x.get(self.data_field_name) for x in results]
553
+ return file_handler.get_not_in_list(self.header, existing_values)