mgtx-benchling-wrapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mgtx_benchling_wrapper/__init__.py +1 -0
- mgtx_benchling_wrapper/client/__init__.py +0 -0
- mgtx_benchling_wrapper/client/benchling_client.py +67 -0
- mgtx_benchling_wrapper/context/__init__.py +0 -0
- mgtx_benchling_wrapper/context/benchling_context.py +56 -0
- mgtx_benchling_wrapper/utils/__init__.py +0 -0
- mgtx_benchling_wrapper/utils/chunking.py +6 -0
- mgtx_benchling_wrapper/utils/compare_dataframe_cols.py +19 -0
- mgtx_benchling_wrapper/utils/create_blob_payload.py +38 -0
- mgtx_benchling_wrapper/utils/datetime_parser.py +43 -0
- mgtx_benchling_wrapper/utils/deprecated.py +78 -0
- mgtx_benchling_wrapper/utils/fields.py +15 -0
- mgtx_benchling_wrapper/utils/list_unique_values_col.py +11 -0
- mgtx_benchling_wrapper/utils/logger.py +38 -0
- mgtx_benchling_wrapper/utils/substitute_df_col_dict.py +10 -0
- mgtx_benchling_wrapper/utils/validation_summary.py +23 -0
- mgtx_benchling_wrapper/workflows/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/assays_result_ingestion.py +256 -0
- mgtx_benchling_wrapper/workflows/handlers/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/handlers/blob_handler.py +61 -0
- mgtx_benchling_wrapper/workflows/handlers/exceptions.py +19 -0
- mgtx_benchling_wrapper/workflows/handlers/result_archiver.py +183 -0
- mgtx_benchling_wrapper/workflows/handlers/result_ingestion.py +75 -0
- mgtx_benchling_wrapper/workflows/handlers/schema_handler.py +94 -0
- mgtx_benchling_wrapper/workflows/models/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/models/types.py +47 -0
- mgtx_benchling_wrapper/workflows/resolution/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/resolution/exceptions.py +21 -0
- mgtx_benchling_wrapper/workflows/resolution/links_resolver.py +70 -0
- mgtx_benchling_wrapper/workflows/resolution/schema_resolver.py +27 -0
- mgtx_benchling_wrapper/workflows/transformation/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/transformation/blob_transformer.py +94 -0
- mgtx_benchling_wrapper/workflows/transformation/container_transformer.py +290 -0
- mgtx_benchling_wrapper/workflows/transformation/datetime_converter.py +43 -0
- mgtx_benchling_wrapper/workflows/transformation/dropdown_transformer.py +74 -0
- mgtx_benchling_wrapper/workflows/transformation/exceptions.py +84 -0
- mgtx_benchling_wrapper/workflows/transformation/link_transformer.py +155 -0
- mgtx_benchling_wrapper/workflows/validation/__init__.py +0 -0
- mgtx_benchling_wrapper/workflows/validation/api_variable_validation.py +138 -0
- mgtx_benchling_wrapper/workflows/validation/dataframe_validator.py +203 -0
- mgtx_benchling_wrapper/workflows/validation/exceptions.py +190 -0
- mgtx_benchling_wrapper/workflows/validation/input_param_validator.py +151 -0
- mgtx_benchling_wrapper/wrapper/__init__.py +0 -0
- mgtx_benchling_wrapper/wrapper/_assayresults.py +196 -0
- mgtx_benchling_wrapper/wrapper/_blobs.py +68 -0
- mgtx_benchling_wrapper/wrapper/_containers.py +128 -0
- mgtx_benchling_wrapper/wrapper/_customentities.py +242 -0
- mgtx_benchling_wrapper/wrapper/_dropdowns.py +56 -0
- mgtx_benchling_wrapper/wrapper/_entry.py +158 -0
- mgtx_benchling_wrapper/wrapper/_mixtures.py +117 -0
- mgtx_benchling_wrapper/wrapper/_projects.py +18 -0
- mgtx_benchling_wrapper/wrapper/_schemas.py +166 -0
- mgtx_benchling_wrapper/wrapper/_task.py +18 -0
- mgtx_benchling_wrapper/wrapper/facade.py +33 -0
- mgtx_benchling_wrapper-0.1.0.dist-info/METADATA +104 -0
- mgtx_benchling_wrapper-0.1.0.dist-info/RECORD +59 -0
- mgtx_benchling_wrapper-0.1.0.dist-info/WHEEL +5 -0
- mgtx_benchling_wrapper-0.1.0.dist-info/licenses/LICENSE +21 -0
- mgtx_benchling_wrapper-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.2"
|
|
File without changes
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
from benchling_sdk.benchling import Benchling
|
|
4
|
+
from benchling_api_client.benchling_client import BenchlingApiClient
|
|
5
|
+
from benchling_sdk.helpers.retry_helpers import RetryStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# -----------------------------
|
|
9
|
+
# Configuration defaults
|
|
10
|
+
# -----------------------------
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIMEOUT_SECONDS = 180
|
|
13
|
+
MAX_RETRIES = 10
|
|
14
|
+
BACKOFF_FACTOR = 2.0
|
|
15
|
+
|
|
16
|
+
# -----------------------------
|
|
17
|
+
# Client decorators
|
|
18
|
+
# -----------------------------
|
|
19
|
+
|
|
20
|
+
def with_default_timeout(client: BenchlingApiClient) -> BenchlingApiClient:
|
|
21
|
+
"""
|
|
22
|
+
Apply default timeout to all Benchling API calls.
|
|
23
|
+
"""
|
|
24
|
+
return client.with_timeout(DEFAULT_TIMEOUT_SECONDS)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# -----------------------------
|
|
28
|
+
# Factory
|
|
29
|
+
# -----------------------------
|
|
30
|
+
|
|
31
|
+
def with_backoff_timeout():
|
|
32
|
+
return RetryStrategy(
|
|
33
|
+
max_tries=MAX_RETRIES,
|
|
34
|
+
backoff_factor=BACKOFF_FACTOR
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def create_benchling_client(
|
|
38
|
+
*,
|
|
39
|
+
url: str,
|
|
40
|
+
auth,
|
|
41
|
+
client_decorator: Callable[[BenchlingApiClient], BenchlingApiClient] = with_default_timeout,
|
|
42
|
+
) -> Benchling:
|
|
43
|
+
"""
|
|
44
|
+
Factory for creating a Benchling SDK client.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
url (str):
|
|
48
|
+
Benchling tenant base URL (e.g. https://acme.benchling.com)
|
|
49
|
+
|
|
50
|
+
auth:
|
|
51
|
+
Authentication method compatible with Benchling SDK
|
|
52
|
+
(e.g. ClientCredentialsOAuth2)
|
|
53
|
+
|
|
54
|
+
client_decorator (callable):
|
|
55
|
+
Optional decorator to customize the underlying API client
|
|
56
|
+
(timeouts, retries, logging, etc.)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Benchling:
|
|
60
|
+
Configured Benchling SDK client
|
|
61
|
+
"""
|
|
62
|
+
return Benchling(
|
|
63
|
+
url=url,
|
|
64
|
+
auth_method=auth,
|
|
65
|
+
client_decorator=client_decorator,
|
|
66
|
+
retry_strategy=with_backoff_timeout(),
|
|
67
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from functools import cache
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from benchling_sdk.benchling import Benchling
|
|
5
|
+
from benchling_sdk.apps.framework import App
|
|
6
|
+
from benchling_sdk.auth.client_credentials_oauth2 import ClientCredentialsOAuth2
|
|
7
|
+
from benchling_sdk.models.webhooks.v0 import WebhookEnvelopeV0
|
|
8
|
+
|
|
9
|
+
from mgtx_benchling_wrapper.client.benchling_client import create_benchling_client
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BenchlingContext:
|
|
13
|
+
"""
|
|
14
|
+
Unified authentication + tenant context.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
client_id: str,
|
|
21
|
+
client_secret: str,
|
|
22
|
+
base_url: str,
|
|
23
|
+
token_url: Optional[str] = None,
|
|
24
|
+
app_id: Optional[str] = None,
|
|
25
|
+
):
|
|
26
|
+
self._client_id = client_id
|
|
27
|
+
self._client_secret = client_secret
|
|
28
|
+
self._base_url = base_url
|
|
29
|
+
self._token_url = token_url
|
|
30
|
+
self._app_id = app_id
|
|
31
|
+
|
|
32
|
+
@cache
|
|
33
|
+
def _auth(self):
|
|
34
|
+
return ClientCredentialsOAuth2(self._client_id, self._client_secret, self._token_url)
|
|
35
|
+
|
|
36
|
+
@cache
|
|
37
|
+
def benchling(self) -> Benchling:
|
|
38
|
+
return create_benchling_client(
|
|
39
|
+
url=self._base_url,
|
|
40
|
+
auth=self._auth(),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@cache
|
|
44
|
+
def app(self) -> App:
|
|
45
|
+
if self._app_id is None:
|
|
46
|
+
raise RuntimeError("App requested but no app_id provided")
|
|
47
|
+
return App(self._app_id, self.benchling())
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_webhook(cls, webhook: WebhookEnvelopeV0, *, client_id, client_secret):
|
|
51
|
+
return cls(
|
|
52
|
+
client_id=client_id,
|
|
53
|
+
client_secret=client_secret,
|
|
54
|
+
base_url=webhook.base_url,
|
|
55
|
+
app_id=webhook.app.id,
|
|
56
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
def compare_dataframe_columns(
|
|
4
|
+
dataframe: pd.DataFrame,
|
|
5
|
+
new_dataframe: pd.DataFrame,
|
|
6
|
+
column_name: str
|
|
7
|
+
) -> list[str]:
|
|
8
|
+
|
|
9
|
+
list_missing_values = []
|
|
10
|
+
org_column = list(dataframe[column_name])
|
|
11
|
+
new_column = list(new_dataframe[column_name])
|
|
12
|
+
|
|
13
|
+
for value in org_column:
|
|
14
|
+
if value in new_column:
|
|
15
|
+
if value not in list_missing_values:
|
|
16
|
+
if value is not None:
|
|
17
|
+
list_missing_values.append(value)
|
|
18
|
+
|
|
19
|
+
return list_missing_values
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
3
|
+
import os.path
|
|
4
|
+
|
|
5
|
+
def create_image_blob_payload(
|
|
6
|
+
image_path: str,
|
|
7
|
+
mime_type="image/png",
|
|
8
|
+
blob_type="VISUALIZATION"):
|
|
9
|
+
"""
|
|
10
|
+
Create a payload for uploading an image blob through the API.
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
with open(image_path, "rb") as image_file:
|
|
14
|
+
image_data = image_file.read()
|
|
15
|
+
except FileNotFoundError:
|
|
16
|
+
raise Exception(f"Blob file not found: {image_path}")
|
|
17
|
+
|
|
18
|
+
#get name of the image
|
|
19
|
+
_,tail = os.path.split(image_path)
|
|
20
|
+
|
|
21
|
+
name = tail.split(".")[0]
|
|
22
|
+
|
|
23
|
+
# Encode the binary data to base64
|
|
24
|
+
data64 = base64.b64encode(image_data).decode('utf-8')
|
|
25
|
+
|
|
26
|
+
# Calculate the MD5 hash of the image data
|
|
27
|
+
md5_hash = hashlib.md5(image_data).hexdigest()
|
|
28
|
+
|
|
29
|
+
# Create the payload
|
|
30
|
+
payload = {
|
|
31
|
+
"data64": data64,
|
|
32
|
+
"md5": md5_hash,
|
|
33
|
+
"mimeType": mime_type,
|
|
34
|
+
"name": name,
|
|
35
|
+
"type": blob_type
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return payload, name
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import re
|
|
4
|
+
import datetime
|
|
5
|
+
|
|
6
|
+
def _parse_datetime(value):
|
|
7
|
+
if pd.isna(value):
|
|
8
|
+
return None
|
|
9
|
+
|
|
10
|
+
if isinstance(value, (pd.Timestamp, datetime.date)):
|
|
11
|
+
return pd.Timestamp(value)
|
|
12
|
+
|
|
13
|
+
if _is_ambiguous_date(value):
|
|
14
|
+
warnings.warn(
|
|
15
|
+
f"Ambiguous date format detected '{value}'. "
|
|
16
|
+
f"Assuming day-first format (DD/MM/YYYY).",
|
|
17
|
+
UserWarning
|
|
18
|
+
)
|
|
19
|
+
return pd.to_datetime(value, errors="raise", dayfirst=True, format='mixed')
|
|
20
|
+
|
|
21
|
+
return pd.to_datetime(
|
|
22
|
+
value,
|
|
23
|
+
errors="raise",
|
|
24
|
+
dayfirst=True
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _is_ambiguous_date(value: str) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
Detect ambiguous numeric date formats like 01/02/2024
|
|
31
|
+
where both day and month <= 12.
|
|
32
|
+
"""
|
|
33
|
+
if not isinstance(value, str):
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
pattern = r"^\d{1,2}[/-]\d{1,2}[/-]\d{4}$"
|
|
37
|
+
if not re.match(pattern, value.strip()):
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
parts = re.split(r"[/-]", value)
|
|
41
|
+
day, month, _ = map(int, parts)
|
|
42
|
+
|
|
43
|
+
return day <= 12 and month <= 12
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import inspect
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
string_types = (type(b''), type(u''))
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def deprecated(reason):
|
|
9
|
+
"""
|
|
10
|
+
This is a decorator which can be used to mark functions
|
|
11
|
+
as deprecated. It will result in a warning being emitted
|
|
12
|
+
when the function is used.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
if isinstance(reason, string_types):
|
|
16
|
+
|
|
17
|
+
# The @deprecated is used with a 'reason'.
|
|
18
|
+
#
|
|
19
|
+
# .. code-block:: python
|
|
20
|
+
#
|
|
21
|
+
# @deprecated("please, use another function")
|
|
22
|
+
# def old_function(x, y):
|
|
23
|
+
# pass
|
|
24
|
+
|
|
25
|
+
def decorator(func1):
|
|
26
|
+
|
|
27
|
+
if inspect.isclass(func1):
|
|
28
|
+
fmt1 = "Call to deprecated class {name} ({reason})."
|
|
29
|
+
else:
|
|
30
|
+
fmt1 = "Call to deprecated function {name} ({reason})."
|
|
31
|
+
|
|
32
|
+
@functools.wraps(func1)
|
|
33
|
+
def new_func1(*args, **kwargs):
|
|
34
|
+
warnings.simplefilter('always', DeprecationWarning)
|
|
35
|
+
warnings.warn(
|
|
36
|
+
fmt1.format(name=func1.__name__, reason=reason),
|
|
37
|
+
category=DeprecationWarning,
|
|
38
|
+
stacklevel=2
|
|
39
|
+
)
|
|
40
|
+
warnings.simplefilter('default', DeprecationWarning)
|
|
41
|
+
return func1(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
return new_func1
|
|
44
|
+
|
|
45
|
+
return decorator
|
|
46
|
+
|
|
47
|
+
elif inspect.isclass(reason) or inspect.isfunction(reason):
|
|
48
|
+
|
|
49
|
+
# The @deprecated is used without any 'reason'.
|
|
50
|
+
#
|
|
51
|
+
# .. code-block:: python
|
|
52
|
+
#
|
|
53
|
+
# @deprecated
|
|
54
|
+
# def old_function(x, y):
|
|
55
|
+
# pass
|
|
56
|
+
|
|
57
|
+
func2 = reason
|
|
58
|
+
|
|
59
|
+
if inspect.isclass(func2):
|
|
60
|
+
fmt2 = "Call to deprecated class {name}."
|
|
61
|
+
else:
|
|
62
|
+
fmt2 = "Call to deprecated function {name}."
|
|
63
|
+
|
|
64
|
+
@functools.wraps(func2)
|
|
65
|
+
def new_func2(*args, **kwargs):
|
|
66
|
+
warnings.simplefilter('always', DeprecationWarning)
|
|
67
|
+
warnings.warn(
|
|
68
|
+
fmt2.format(name=func2.__name__),
|
|
69
|
+
category=DeprecationWarning,
|
|
70
|
+
stacklevel=2
|
|
71
|
+
)
|
|
72
|
+
warnings.simplefilter('default', DeprecationWarning)
|
|
73
|
+
return func2(*args, **kwargs)
|
|
74
|
+
|
|
75
|
+
return new_func2
|
|
76
|
+
|
|
77
|
+
else:
|
|
78
|
+
raise TypeError(repr(type(reason)))
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
def create_fields_dict(
|
|
2
|
+
list_field_names: list,
|
|
3
|
+
list_field_values: list
|
|
4
|
+
) -> dict:
|
|
5
|
+
""" Creates a dictionary with the format {field_name:'value':{field_value}}
|
|
6
|
+
Args:
|
|
7
|
+
list_field_names (list): list containing the field names (usually the column names of dataframe)
|
|
8
|
+
list_field_values (list): values of the fields (usually a row in a dataframe)
|
|
9
|
+
Return:
|
|
10
|
+
final_dict (dict): a dictionary of the format to input in fields.
|
|
11
|
+
"""
|
|
12
|
+
list_word_value = ['value'] * len(list_field_names)
|
|
13
|
+
final_dict = {u: {v: w} for (u, v, w) in zip(list_field_names, list_word_value, list_field_values)}
|
|
14
|
+
|
|
15
|
+
return final_dict
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
def list_unique_values_in_column(
|
|
4
|
+
dataframe: pd.DataFrame,
|
|
5
|
+
column_name: str,
|
|
6
|
+
drop_na: bool = False
|
|
7
|
+
) -> list[str]:
|
|
8
|
+
if drop_na:
|
|
9
|
+
return dataframe[column_name].dropna().unique().tolist()
|
|
10
|
+
else:
|
|
11
|
+
return dataframe[column_name].unique().tolist()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
def get_logger(name: str, file_log_level: str | int, console_log_level: str | int, console_filter: str | None = None) -> logging.Logger:
|
|
4
|
+
logger = logging.getLogger(name)
|
|
5
|
+
logger.setLevel(logging.DEBUG)
|
|
6
|
+
|
|
7
|
+
# Factory function for creating level filters
|
|
8
|
+
def level_filter(level_name: str):
|
|
9
|
+
def filter_func(record):
|
|
10
|
+
return record.levelname == level_name
|
|
11
|
+
return filter_func
|
|
12
|
+
|
|
13
|
+
# Create formatters
|
|
14
|
+
console_format = logging.Formatter('%(asctime)s | %(name)s | %(levelname)s | line:%(lineno)d | %(message)s')
|
|
15
|
+
file_format = logging.Formatter('%(asctime)s | %(name)s | %(levelname)s | %(message)s')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# initiating a console handler
|
|
19
|
+
console_handler = logging.StreamHandler()
|
|
20
|
+
console_handler.setLevel((console_log_level))
|
|
21
|
+
console_handler.setFormatter(console_format)
|
|
22
|
+
# Add filter if specified
|
|
23
|
+
if console_filter:
|
|
24
|
+
console_handler.addFilter(level_filter(console_filter))
|
|
25
|
+
|
|
26
|
+
# initiating a file handler
|
|
27
|
+
file_handler = logging.FileHandler(
|
|
28
|
+
filename='app.log',
|
|
29
|
+
mode='a',
|
|
30
|
+
encoding='utf-8')
|
|
31
|
+
file_handler.setLevel((file_log_level))
|
|
32
|
+
file_handler.setFormatter(file_format)
|
|
33
|
+
|
|
34
|
+
# add the handlers to the logger
|
|
35
|
+
logger.addHandler(console_handler)
|
|
36
|
+
logger.addHandler(file_handler)
|
|
37
|
+
|
|
38
|
+
return logger
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from mgtx_benchling_wrapper.workflows.models.types import ValidationResult
|
|
2
|
+
|
|
3
|
+
def get_validation_summary(result: ValidationResult) -> str:
|
|
4
|
+
"""Generate human-readable validation summary."""
|
|
5
|
+
if result.is_valid:
|
|
6
|
+
summary = "✓ Validation passed"
|
|
7
|
+
if result.warnings:
|
|
8
|
+
summary += f" with {len(result.warnings)} warning(s)"
|
|
9
|
+
return summary
|
|
10
|
+
|
|
11
|
+
summary = f"✗ Validation failed with {len(result.errors)} error(s):\n"
|
|
12
|
+
|
|
13
|
+
for i, error in enumerate(result.errors, 1):
|
|
14
|
+
summary += f"\n{i}. {error.message}"
|
|
15
|
+
if hasattr(error, 'context') and error.context:
|
|
16
|
+
summary += f"\n Context: {error.context}"
|
|
17
|
+
|
|
18
|
+
if result.warnings:
|
|
19
|
+
summary += f"\n\nWarnings ({len(result.warnings)}):\n"
|
|
20
|
+
for warning in result.warnings:
|
|
21
|
+
summary += f" - {warning}\n"
|
|
22
|
+
|
|
23
|
+
return summary
|
|
File without changes
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from mgtx_benchling_wrapper.wrapper.facade import BenchlingWrapperFacade
|
|
2
|
+
from mgtx_benchling_wrapper.utils.logger import get_logger
|
|
3
|
+
from mgtx_benchling_wrapper.workflows.models.types import BlobType
|
|
4
|
+
from .transformation.container_transformer import ContainersTransformer
|
|
5
|
+
from .transformation.link_transformer import LinkTransformer
|
|
6
|
+
from .transformation.blob_transformer import BlobTransformer
|
|
7
|
+
from .transformation.dropdown_transformer import DropdownTransformer
|
|
8
|
+
from .transformation.datetime_converter import DateTimeConverter
|
|
9
|
+
from .transformation.exceptions import TransformationError
|
|
10
|
+
from .handlers.result_archiver import ResultArchiver
|
|
11
|
+
from .handlers.blob_handler import BlobHandler
|
|
12
|
+
from .handlers.result_ingestion import ResultIngestion
|
|
13
|
+
from .handlers.exceptions import HandlerError
|
|
14
|
+
from .handlers.schema_handler import SchemaHandler
|
|
15
|
+
from .validation.api_variable_validation import ApiParametersValidation
|
|
16
|
+
from .validation.dataframe_validator import DataFrameValidator
|
|
17
|
+
from .validation.input_param_validator import InputParamValidator
|
|
18
|
+
from .validation.exceptions import ValidationError
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__,
|
|
23
|
+
file_log_level='DEBUG',
|
|
24
|
+
console_log_level='DEBUG',)
|
|
25
|
+
|
|
26
|
+
class AssayResultIngestionWorkflow:
|
|
27
|
+
""" """
|
|
28
|
+
def __init__(self, wrapper: BenchlingWrapperFacade):
|
|
29
|
+
self._wrapper = wrapper
|
|
30
|
+
self._api_param_validator : ApiParametersValidation
|
|
31
|
+
self._schema_handler = SchemaHandler
|
|
32
|
+
self._input_param_validator : Optional[InputParamValidator] = None
|
|
33
|
+
self._df_validator : Optional[DataFrameValidator] = None
|
|
34
|
+
self._container_transformer : Optional[ContainersTransformer] = None
|
|
35
|
+
self._link_transformer : Optional[LinkTransformer] = None
|
|
36
|
+
self._blob_transformer : Optional[BlobTransformer] = None
|
|
37
|
+
self._dropdown_transformer : Optional[DropdownTransformer] = None
|
|
38
|
+
self._datetime_converter : Optional[DateTimeConverter] = None
|
|
39
|
+
self._result_archiver : Optional[ResultArchiver] = None
|
|
40
|
+
self._blob_handler: Optional[BlobHandler] = None
|
|
41
|
+
self._result_ingestion: Optional[ResultIngestion] = None
|
|
42
|
+
|
|
43
|
+
def assay_results_ingestion(
|
|
44
|
+
self,
|
|
45
|
+
list_dataframes: list[pd.DataFrame],
|
|
46
|
+
schema_id: str,
|
|
47
|
+
project_id: str,
|
|
48
|
+
unique_identifiers: list[str],
|
|
49
|
+
dict_variable_to_entity_type: dict[str, list[str]] = None,
|
|
50
|
+
archive: bool = False,
|
|
51
|
+
compare_on: list[str] = None,
|
|
52
|
+
entry_name: str = None,
|
|
53
|
+
commit_in_transaction: bool = True,
|
|
54
|
+
blob_type: BlobType = "image/png"
|
|
55
|
+
) -> (list[list[str]]):
|
|
56
|
+
"""
|
|
57
|
+
A function to ingest assay results on Benchling following the logic at MeiraGTx DSC.
|
|
58
|
+
|
|
59
|
+
Ingests a lists of results dataframes.
|
|
60
|
+
|
|
61
|
+
Data parser requires datetime formats to be in the form of DD/MM/YYYY formats.
|
|
62
|
+
Empty values for non-required columns to be set to None.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
list_dataframes: list[pd.DataFrame] :
|
|
67
|
+
A list of dataframes to ingest. Columns need to be named with the warehouse name of the target assay
|
|
68
|
+
results schema.
|
|
69
|
+
|
|
70
|
+
schema_id: str :
|
|
71
|
+
assay results schema api id.
|
|
72
|
+
|
|
73
|
+
project_id: str :
|
|
74
|
+
project api id.
|
|
75
|
+
|
|
76
|
+
unique_identifiers: list[str] :
|
|
77
|
+
a list of unique identifiers of the assay results schema. To be provided as the warehouse name.
|
|
78
|
+
|
|
79
|
+
dict_variable_to_entity_type: dict[str:list[str]] :
|
|
80
|
+
(Default value = None)
|
|
81
|
+
To be specified for assay result schema variables set to AnyEntity. Allows to restrict the entity or
|
|
82
|
+
container types to be ingested to an AnyEntity or AnyInventory variable.
|
|
83
|
+
The entity type should refer the front end name of the entity schema or container schema.
|
|
84
|
+
|
|
85
|
+
archive: bool :
|
|
86
|
+
(Default value = False)
|
|
87
|
+
Defines whether pre-existing results should be archived.
|
|
88
|
+
|
|
89
|
+
compare_on: list[str] :
|
|
90
|
+
(Default value = None)
|
|
91
|
+
A list of variables in the assay results schema to build a dataframe of pre-existing results on Benchling.
|
|
92
|
+
Variable names need to be provided as the warehouse name.
|
|
93
|
+
|
|
94
|
+
When archive is False and compare_on is None : Pre-existing results are searched for unique_identifiers.
|
|
95
|
+
If pre-existing results are found for an identifier, new results are not ingested for that identifier.
|
|
96
|
+
|
|
97
|
+
When archive is False and compare_on is provided : Pre-existing results retrieved based on
|
|
98
|
+
unique_identifiers and compare_on list, and a pre-existing results dataframe is created. Any rows matching
|
|
99
|
+
pre-existing values will not be ingested.
|
|
100
|
+
|
|
101
|
+
When archive is True and compare_on is None : Pre-existing results are retrieved based on variables in
|
|
102
|
+
unique_identifiers. Retrieved data is archived and new results are ingested.
|
|
103
|
+
|
|
104
|
+
When archive is True, compare_on cannot be provided.
|
|
105
|
+
|
|
106
|
+
entry_name: str :
|
|
107
|
+
(Default value = None)
|
|
108
|
+
Name of the entry to visualized ingested data. Can't be provided in commit_in_transaction is True
|
|
109
|
+
|
|
110
|
+
commit_in_transaction: bool :
|
|
111
|
+
(Default value = True)
|
|
112
|
+
When commit_in_transaction is True : Data is ingested to the back-end. entry_name needs to be set to None.
|
|
113
|
+
When commit_in_transaction is False : Data is ingested to a table in an entry mentioned in entry_name.
|
|
114
|
+
entry_name needs to be provided.
|
|
115
|
+
|
|
116
|
+
blob_type: BlobType :
|
|
117
|
+
(Default value = "image/png")
|
|
118
|
+
Defines the expected blob type. Allowed values are "image/png" and "image/jpeg".
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
list_missing_variables_per_df: a list of lists. Each list will contain missing entities or containers not found
|
|
123
|
+
on Benchling.
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
#validate api inputs
|
|
127
|
+
self._api_param_validator = ApiParametersValidation(wrapper=self._wrapper)
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
api_param_validation, table_id = self._api_param_validator.validation(
|
|
131
|
+
schema_id=schema_id,
|
|
132
|
+
project_id=project_id,
|
|
133
|
+
entry_name=entry_name,
|
|
134
|
+
commit_in_transaction = commit_in_transaction,
|
|
135
|
+
|
|
136
|
+
)
|
|
137
|
+
logger.info(f"Api input parameters passed validation.")
|
|
138
|
+
except ValidationError as e:
|
|
139
|
+
logger.error(f"Api Input validation failed: {e.message}", extra=e.context)
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
#create schema after validation
|
|
143
|
+
self._schema_handler = SchemaHandler(wrapper=self._wrapper)
|
|
144
|
+
|
|
145
|
+
schema = self._schema_handler._build_schema_definition(schema_id, 'assay_results_schema')
|
|
146
|
+
|
|
147
|
+
#validation of input parameters
|
|
148
|
+
self._input_param_validator = InputParamValidator(schema=schema, wrapper=self._wrapper)
|
|
149
|
+
try:
|
|
150
|
+
input_param_result = self._input_param_validator.validate(
|
|
151
|
+
unique_identifiers = unique_identifiers,
|
|
152
|
+
compare_on = compare_on,
|
|
153
|
+
archive = archive,
|
|
154
|
+
blob_type = blob_type,
|
|
155
|
+
dict_variable_to_entity_type = dict_variable_to_entity_type)
|
|
156
|
+
logger.info(f"Input parameters passed validation.")
|
|
157
|
+
except ValidationError as e:
|
|
158
|
+
logger.error(f"Input validation failed: {e.message}", extra=e.context)
|
|
159
|
+
raise
|
|
160
|
+
|
|
161
|
+
list_missing_variables_per_df = []
|
|
162
|
+
for dataframe in list_dataframes:
|
|
163
|
+
# create dataframe validator
|
|
164
|
+
self._df_validator = DataFrameValidator(schema)
|
|
165
|
+
|
|
166
|
+
# validate dataframe Before continuing
|
|
167
|
+
try:
|
|
168
|
+
df_validation_result = self._df_validator.validate(dataframe, strict=True)
|
|
169
|
+
logger.info(f"Schema validation passed for {len(dataframe)} rows")
|
|
170
|
+
except ValidationError as e:
|
|
171
|
+
logger.error(f"Schema validation failed: {e.message}", extra=e.context)
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
# create container_id column if needed
|
|
175
|
+
self._container_transformer = ContainersTransformer(schema, wrapper=self._wrapper)
|
|
176
|
+
try:
|
|
177
|
+
dataframe = self._container_transformer.transform(dataframe, dict_variable_to_entity_type)
|
|
178
|
+
except TransformationError as e:
|
|
179
|
+
logger.error(f"Container column transformation failed: {e.message}", extra=e.context)
|
|
180
|
+
raise
|
|
181
|
+
|
|
182
|
+
# create dataframe transformer
|
|
183
|
+
self._link_transformer = LinkTransformer(schema, wrapper=self._wrapper)
|
|
184
|
+
|
|
185
|
+
# transform dataframe
|
|
186
|
+
dataframe, list_missing_variables = self._link_transformer.transform(dataframe, dict_variable_to_entity_type)
|
|
187
|
+
list_missing_variables_per_df.append(list_missing_variables)
|
|
188
|
+
|
|
189
|
+
if len(dataframe) == 0: #go to the next dataframe if all entities are missing
|
|
190
|
+
logger.warning(f"No entities were found on benchling: {list_missing_variables}.")
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# create blobs transformer
|
|
194
|
+
self._blob_transformer = BlobTransformer(schema, wrapper=self._wrapper)
|
|
195
|
+
|
|
196
|
+
# transform blobs in dataframe if they exist
|
|
197
|
+
try:
|
|
198
|
+
dataframe, dict_name_to_payload = self._blob_transformer.transform(dataframe, blob_type=blob_type)
|
|
199
|
+
except TransformationError as e:
|
|
200
|
+
logger.error(f"Blob transformation failed: {e.message}", extra=e.context)
|
|
201
|
+
raise
|
|
202
|
+
|
|
203
|
+
# create dropdown transformer
|
|
204
|
+
self._dropdown_transformer = DropdownTransformer(schema, wrapper=self._wrapper)
|
|
205
|
+
|
|
206
|
+
#transform dropdown columns if they exist
|
|
207
|
+
try:
|
|
208
|
+
dataframe = self._dropdown_transformer.transform(dataframe)
|
|
209
|
+
except TransformationError as e:
|
|
210
|
+
logger.error(f"Dropdown transformation failed: {e.message}", extra=e.context)
|
|
211
|
+
raise
|
|
212
|
+
|
|
213
|
+
#create datetime converter
|
|
214
|
+
self._datetime_converter = DateTimeConverter(schema)
|
|
215
|
+
|
|
216
|
+
#convert date time if they exist
|
|
217
|
+
try:
|
|
218
|
+
dataframe = self._datetime_converter.transform(dataframe)
|
|
219
|
+
except TransformationError as e:
|
|
220
|
+
logger.error(f"Datetime transformation has failed: {e.message}", extra=e.context)
|
|
221
|
+
raise
|
|
222
|
+
|
|
223
|
+
# create result archiver
|
|
224
|
+
self._result_archiver = ResultArchiver(schema, wrapper=self._wrapper)
|
|
225
|
+
|
|
226
|
+
# archive results
|
|
227
|
+
dataframe = self._result_archiver.transform(dataframe, unique_identifiers, archive, compare_on)
|
|
228
|
+
if dataframe is None: #when all results have been ingested for this dataframe
|
|
229
|
+
logger.info(f"All results were already ingested for this dataframe, "
|
|
230
|
+
f"the code will proceed to the next dataframe or exit.")
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
#create blob handler
|
|
234
|
+
self._blob_handler = BlobHandler(schema, wrapper=self._wrapper)
|
|
235
|
+
|
|
236
|
+
#transform blobs if they exist
|
|
237
|
+
try:
|
|
238
|
+
dataframe = self._blob_handler.ingest(dataframe, dict_name_to_payload)
|
|
239
|
+
except HandlerError as e:
|
|
240
|
+
logger.error(f"Blob transformation: {e.message}", extra=e.context)
|
|
241
|
+
raise
|
|
242
|
+
|
|
243
|
+
#create results ingestor
|
|
244
|
+
self._result_ingestion = ResultIngestion(schema, wrapper=self._wrapper)
|
|
245
|
+
|
|
246
|
+
#ingest df
|
|
247
|
+
try:
|
|
248
|
+
self._result_ingestion.ingest(dataframe, project_id, table_id, commit_in_transaction)
|
|
249
|
+
except HandlerError as e:
|
|
250
|
+
logger.error(f"Ingestion has failed: {e.message}", extra=e.context)
|
|
251
|
+
raise
|
|
252
|
+
|
|
253
|
+
return list_missing_variables_per_df
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
|