omnata-plugin-runtime 0.11.6a323__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnata_plugin_runtime-0.12.0/PKG-INFO +56 -0
- omnata_plugin_runtime-0.12.0/pyproject.toml +57 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/omnata_plugin.py +131 -102
- omnata_plugin_runtime-0.11.6a323/PKG-INFO +0 -55
- omnata_plugin_runtime-0.11.6a323/pyproject.toml +0 -57
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/LICENSE +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/README.md +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/__init__.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/api.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/configuration.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/forms.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/json_schema.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/logging.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/plugin_entrypoints.py +0 -0
- {omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/rate_limiting.py +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: omnata-plugin-runtime
|
|
3
|
+
Version: 0.12.0
|
|
4
|
+
Summary: Classes and common runtime components for building and running Omnata Plugins
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Author: James Weakley
|
|
7
|
+
Author-email: james.weakley@omnata.com
|
|
8
|
+
Requires-Python: >=3.10,<=3.13
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Dist: annotated-types (<=0.6.0)
|
|
15
|
+
Requires-Dist: certifi (<=2025.11.12)
|
|
16
|
+
Requires-Dist: cffi (<=2.0.0)
|
|
17
|
+
Requires-Dist: charset-normalizer (<=3.4.4)
|
|
18
|
+
Requires-Dist: cryptography (<=46.0.3)
|
|
19
|
+
Requires-Dist: filelock (<=3.20.0)
|
|
20
|
+
Requires-Dist: idna (<=3.11)
|
|
21
|
+
Requires-Dist: jinja2 (>=3.1.2,<=3.1.6)
|
|
22
|
+
Requires-Dist: markupsafe (<=3.0.2)
|
|
23
|
+
Requires-Dist: numpy (<=2.3.5)
|
|
24
|
+
Requires-Dist: opentelemetry-api (<=1.38.0)
|
|
25
|
+
Requires-Dist: packaging (<=25.0)
|
|
26
|
+
Requires-Dist: pandas (<=2.3.3)
|
|
27
|
+
Requires-Dist: platformdirs (<=4.5.0)
|
|
28
|
+
Requires-Dist: protobuf (<=6.33.0)
|
|
29
|
+
Requires-Dist: pyarrow (<=21.0.0)
|
|
30
|
+
Requires-Dist: pycparser (<=2.23)
|
|
31
|
+
Requires-Dist: pydantic (>=2,<=2.12.4)
|
|
32
|
+
Requires-Dist: pydantic-core (<=2.41.5)
|
|
33
|
+
Requires-Dist: pyjwt (<=2.10.1)
|
|
34
|
+
Requires-Dist: pyopenssl (<=225.3.0)
|
|
35
|
+
Requires-Dist: pytz (<=2025.2)
|
|
36
|
+
Requires-Dist: pyyaml (<=6.0.3)
|
|
37
|
+
Requires-Dist: requests (>=2,<=2.32.5)
|
|
38
|
+
Requires-Dist: setuptools (<=80.9.0)
|
|
39
|
+
Requires-Dist: snowflake-connector-python (>=3,<4)
|
|
40
|
+
Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.43.0)
|
|
41
|
+
Requires-Dist: snowflake-telemetry-python (<=0.5.0)
|
|
42
|
+
Requires-Dist: tenacity (>=8,<9)
|
|
43
|
+
Requires-Dist: tomlkit (<=0.13.3)
|
|
44
|
+
Requires-Dist: urllib3 (<=2.5.0)
|
|
45
|
+
Requires-Dist: wheel (<=0.45.1)
|
|
46
|
+
Requires-Dist: wrapt (<=2.0.1)
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# omnata-plugin-runtime
|
|
50
|
+
This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
|
|
51
|
+
|
|
52
|
+
It contains data classes, interfaces and application logic used to perform plugin operations.
|
|
53
|
+
|
|
54
|
+
For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
|
|
55
|
+
|
|
56
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "omnata-plugin-runtime"
|
|
3
|
+
version = "0.12.0"
|
|
4
|
+
description = "Classes and common runtime components for building and running Omnata Plugins"
|
|
5
|
+
authors = ["James Weakley <james.weakley@omnata.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
packages = [{include = "omnata_plugin_runtime", from = "src"}]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = ">=3.10, <=3.13"
|
|
11
|
+
snowflake-snowpark-python = ">=1.20.0,<=1.43.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
|
|
12
|
+
snowflake-connector-python = "^3, <=4.1.0" # latest version available on Snowflake Anaconda
|
|
13
|
+
cryptography = "<=46.0.3"
|
|
14
|
+
annotated-types = "<=0.6.0"
|
|
15
|
+
pycparser = "<=2.23"
|
|
16
|
+
filelock = "<=3.20.0"
|
|
17
|
+
pydantic-core = "<=2.41.5"
|
|
18
|
+
# had to relax some of these thanks to snowcli pinning newer versions
|
|
19
|
+
certifi = "<=2025.11.12" # latest version available on Snowflake Anaconda
|
|
20
|
+
charset-normalizer = "<=3.4.4" # latest version available on Snowflake Anaconda
|
|
21
|
+
idna = "<=3.11" # latest version available on Snowflake Anaconda
|
|
22
|
+
jinja2 = ">=3.1.2,<=3.1.6" # 3.1.6 was latest version available on Snowflake Anaconda
|
|
23
|
+
markupsafe = "<=3.0.2" # latest version available on Snowflake Anaconda
|
|
24
|
+
numpy = "<=2.3.5" # latest version available on Snowflake Anaconda
|
|
25
|
+
packaging = "<=25.0" # latest version available on Snowflake Anaconda
|
|
26
|
+
pandas = "<=2.3.3" # latest version available on Snowflake Anaconda
|
|
27
|
+
platformdirs = "<=4.5.0" # latest version available on Snowflake Anaconda
|
|
28
|
+
pydantic = "^2, <=2.12.4" # latest version available on Snowflake Anaconda
|
|
29
|
+
pyjwt = "<=2.10.1" # latest version available on Snowflake Anaconda
|
|
30
|
+
pyopenssl = "<=225.3.0" # latest version available on Snowflake Anaconda
|
|
31
|
+
pytz = "<=2025.2" # latest version available on Snowflake Anaconda
|
|
32
|
+
requests = "^2, <=2.32.5" # latest version available on Snowflake Anaconda
|
|
33
|
+
setuptools = "<=80.9.0" # latest version available on Snowflake Anaconda
|
|
34
|
+
tomlkit = "<=0.13.3" # latest version available on Snowflake Anaconda
|
|
35
|
+
tenacity = "^8, <=9.1.2" # latest version available on Snowflake Anaconda
|
|
36
|
+
urllib3 = "<=2.5.0" # latest version available on Snowflake Anaconda
|
|
37
|
+
wheel = "<=0.45.1" # latest version available on Snowflake Anaconda
|
|
38
|
+
pyyaml = "<=6.0.3" # latest version available on Snowflake Anaconda
|
|
39
|
+
cffi = "<=2.0.0" # latest version available on Snowflake Anaconda
|
|
40
|
+
pyarrow = "<=21.0.0" # latest version available on Snowflake Anaconda
|
|
41
|
+
wrapt = "<=2.0.1" # latest version available on Snowflake Anaconda
|
|
42
|
+
opentelemetry-api = "<=1.38.0" # latest version available on Snowflake Anaconda
|
|
43
|
+
snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
|
|
44
|
+
protobuf = "<=6.33.0" # latest version available on Snowflake Anaconda
|
|
45
|
+
|
|
46
|
+
[tool.poetry.dev-dependencies]
|
|
47
|
+
pytest = "^6.2.4"
|
|
48
|
+
deepdiff = "^6"
|
|
49
|
+
requests-mock = ">=1.9.3"
|
|
50
|
+
|
|
51
|
+
[tool.pytest.ini_options]
|
|
52
|
+
addopts = ["--import-mode=importlib"]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
|
|
55
|
+
[build-system]
|
|
56
|
+
requires = ["poetry-core"]
|
|
57
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -15,7 +15,7 @@ if tuple(sys.version_info[:2]) >= (3, 9):
|
|
|
15
15
|
else:
|
|
16
16
|
# Python 3.8 and below
|
|
17
17
|
from typing_extensions import Annotated
|
|
18
|
-
|
|
18
|
+
from dataclasses import dataclass
|
|
19
19
|
import zipfile
|
|
20
20
|
import datetime
|
|
21
21
|
import http
|
|
@@ -49,6 +49,11 @@ from snowflake.snowpark.functions import col
|
|
|
49
49
|
from tenacity import Retrying, stop_after_attempt, wait_fixed, retry_if_exception_message
|
|
50
50
|
|
|
51
51
|
from .logging import OmnataPluginLogHandler, logger, tracer, meter
|
|
52
|
+
stream_duration_gauge = meter.create_gauge(
|
|
53
|
+
name="omnata.sync_run.stream_duration",
|
|
54
|
+
description="The duration of stream processing",
|
|
55
|
+
unit="s",
|
|
56
|
+
)
|
|
52
57
|
from opentelemetry import context
|
|
53
58
|
import math
|
|
54
59
|
import numpy as np
|
|
@@ -265,6 +270,29 @@ def jinja_filter(func):
|
|
|
265
270
|
func.is_jinja_filter = True
|
|
266
271
|
return func
|
|
267
272
|
|
|
273
|
+
@dataclass
|
|
274
|
+
class StateResult:
|
|
275
|
+
"""
|
|
276
|
+
Represents the current cursor state of a stream. This simple wrapper just helps us identify what type of
|
|
277
|
+
object is in the apply_results list.
|
|
278
|
+
"""
|
|
279
|
+
new_state: Any
|
|
280
|
+
|
|
281
|
+
@dataclass
|
|
282
|
+
class RecordsToUploadResult:
|
|
283
|
+
"""
|
|
284
|
+
Represents the records to upload for a stream. This simple wrapper just helps us identify what type of
|
|
285
|
+
object is in the apply_results list.
|
|
286
|
+
"""
|
|
287
|
+
records: pandas.DataFrame
|
|
288
|
+
|
|
289
|
+
@dataclass
|
|
290
|
+
class CriteriaDeleteResult:
|
|
291
|
+
"""
|
|
292
|
+
Represents the result of processing criteria deletes for a stream. This simple wrapper just helps us identify what type of
|
|
293
|
+
object is in the apply_results list.
|
|
294
|
+
"""
|
|
295
|
+
criteria_deletes: pandas.DataFrame
|
|
268
296
|
|
|
269
297
|
class SyncRequest(ABC):
|
|
270
298
|
"""
|
|
@@ -1057,7 +1085,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1057
1085
|
}
|
|
1058
1086
|
|
|
1059
1087
|
# These are similar to the results, but represent requests to delete records by some criteria
|
|
1060
|
-
self._apply_results_criteria_deletes: Dict[str, List[pandas.DataFrame]] = {}
|
|
1061
1088
|
self._temp_tables = {}
|
|
1062
1089
|
self._temp_table_lock = threading.Lock()
|
|
1063
1090
|
self._results_exist: Dict[
|
|
@@ -1096,7 +1123,9 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1096
1123
|
self._criteria_deletes_table_name = results_table.get_fully_qualified_criteria_deletes_table_name()
|
|
1097
1124
|
self.state_register_table_name = results_table.get_fully_qualified_state_register_table_name()
|
|
1098
1125
|
# this is keyed on stream name, each containing a list of dataframes and state updates mixed
|
|
1099
|
-
self._apply_results: Dict[str, List[
|
|
1126
|
+
self._apply_results: Dict[str, List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = {}
|
|
1127
|
+
# track the start times of each stream, so we can calculate durations. The int is a epoch (time.time()) value
|
|
1128
|
+
self._stream_start_times: Dict[str, int] = {}
|
|
1100
1129
|
|
|
1101
1130
|
def apply_results_queue(self):
|
|
1102
1131
|
"""
|
|
@@ -1105,7 +1134,8 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1105
1134
|
logger.debug("InboundSyncRequest apply_results_queue")
|
|
1106
1135
|
if self._apply_results is not None:
|
|
1107
1136
|
with self._apply_results_lock:
|
|
1108
|
-
|
|
1137
|
+
records_to_upload:List[pandas.DataFrame] = []
|
|
1138
|
+
criteria_deletes_to_upload:List[pandas.DataFrame] = []
|
|
1109
1139
|
stream_states_for_upload:Dict[str, Dict[str, Any]] = {}
|
|
1110
1140
|
for stream_name, stream_results in self._apply_results.items():
|
|
1111
1141
|
# the stream results contains an ordered sequence of dataframes and state updates (append only)
|
|
@@ -1113,9 +1143,9 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1113
1143
|
# so first, we iterate backwards to find the last state update
|
|
1114
1144
|
last_state_index = -1
|
|
1115
1145
|
for i in range(len(stream_results) - 1, -1, -1):
|
|
1116
|
-
if isinstance(stream_results[i],
|
|
1146
|
+
if isinstance(stream_results[i], StateResult):
|
|
1117
1147
|
last_state_index = i
|
|
1118
|
-
stream_states_for_upload[stream_name] = stream_results[i]
|
|
1148
|
+
stream_states_for_upload[stream_name] = stream_results[i].new_state
|
|
1119
1149
|
break
|
|
1120
1150
|
# if there are no state updates, we can't do anything with this stream
|
|
1121
1151
|
if last_state_index == -1:
|
|
@@ -1124,56 +1154,54 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1124
1154
|
)
|
|
1125
1155
|
continue
|
|
1126
1156
|
assert isinstance(stream_states_for_upload[stream_name], dict), "Latest state must be a dictionary"
|
|
1127
|
-
# now we can take the dataframes up to the last state update
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
x for x in
|
|
1157
|
+
# now we can take the record dataframes up to the last state update
|
|
1158
|
+
results_subset = stream_results[:last_state_index]
|
|
1159
|
+
non_empty_record_dfs:List[pandas.DataFrame] = [
|
|
1160
|
+
x.records for x in results_subset
|
|
1161
|
+
if x is not None and isinstance(x, RecordsToUploadResult) and len(x.records) > 0
|
|
1131
1162
|
]
|
|
1132
1163
|
# get the total length of all the dataframes
|
|
1133
|
-
total_length = sum([len(x) for x in
|
|
1164
|
+
total_length = sum([len(x) for x in non_empty_record_dfs])
|
|
1134
1165
|
# add the count of this batch to the total for this stream
|
|
1135
1166
|
self._stream_record_counts[
|
|
1136
1167
|
stream_name
|
|
1137
1168
|
] = self._stream_record_counts[stream_name] + total_length
|
|
1138
|
-
|
|
1169
|
+
records_to_upload.extend(non_empty_record_dfs)
|
|
1170
|
+
# also handle any criteria deletes
|
|
1171
|
+
criteria_deletes_to_upload.extend([
|
|
1172
|
+
x.criteria_deletes for x in results_subset
|
|
1173
|
+
if x is not None and isinstance(x, CriteriaDeleteResult) and len(x.criteria_deletes) > 0
|
|
1174
|
+
])
|
|
1139
1175
|
# now remove everything up to the last state update
|
|
1140
1176
|
# we do this so that we don't apply the same state update multiple times
|
|
1177
|
+
# keep everything after the last state update
|
|
1141
1178
|
self._apply_results[stream_name] = stream_results[
|
|
1142
1179
|
last_state_index + 1 :
|
|
1143
|
-
]
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1180
|
+
]
|
|
1181
|
+
|
|
1182
|
+
if len(records_to_upload) > 0 or len(criteria_deletes_to_upload) > 0:
|
|
1183
|
+
if len(records_to_upload) > 0:
|
|
1184
|
+
logger.debug(
|
|
1185
|
+
f"Applying {len(records_to_upload)} batches of queued results"
|
|
1186
|
+
)
|
|
1187
|
+
# upload all cached apply results
|
|
1188
|
+
records_to_upload_combined = pandas.concat(records_to_upload)
|
|
1189
|
+
self._apply_results_dataframe(list(stream_states_for_upload.keys()), records_to_upload_combined)
|
|
1190
|
+
# now that the results have been updated, we need to insert records into the state register table
|
|
1191
|
+
# we do this by inserting the latest state for each stream
|
|
1192
|
+
if len(criteria_deletes_to_upload) > 0:
|
|
1193
|
+
logger.debug(
|
|
1194
|
+
f"Applying {len(criteria_deletes_to_upload)} batches of queued criteria deletes"
|
|
1195
|
+
)
|
|
1196
|
+
# upload all cached apply results
|
|
1197
|
+
all_criteria_deletes = pandas.concat(criteria_deletes_to_upload)
|
|
1198
|
+
self._apply_criteria_deletes_dataframe(all_criteria_deletes)
|
|
1199
|
+
|
|
1200
|
+
query_id = self._get_query_id_for_now()
|
|
1153
1201
|
self._directly_insert_to_state_register(
|
|
1154
1202
|
stream_states_for_upload, query_id=query_id
|
|
1155
1203
|
)
|
|
1156
1204
|
|
|
1157
|
-
# also take care of uploading delete requests
|
|
1158
|
-
# technically these should be managed along with the state, however there aren't any scenarios where checkpointing is done
|
|
1159
|
-
# and deletes have an impact. This is because we only checkpoint in scenarios where the target table is empty first
|
|
1160
|
-
if hasattr(self,'_apply_results_criteria_deletes') and self._apply_results_criteria_deletes is not None:
|
|
1161
|
-
with self._apply_results_lock:
|
|
1162
|
-
results:List[pandas.DataFrame] = []
|
|
1163
|
-
for stream_name, stream_results in self._apply_results_criteria_deletes.items():
|
|
1164
|
-
results.extend([
|
|
1165
|
-
x for x in stream_results if x is not None and len(x) > 0
|
|
1166
|
-
])
|
|
1167
|
-
if len(results) > 0:
|
|
1168
|
-
logger.debug(
|
|
1169
|
-
f"Applying {len(results)} batches of queued criteria deletes"
|
|
1170
|
-
)
|
|
1171
|
-
# upload all cached apply results
|
|
1172
|
-
all_dfs = pandas.concat(results)
|
|
1173
|
-
self._apply_criteria_deletes_dataframe(all_dfs)
|
|
1174
|
-
# clear the delete requests
|
|
1175
|
-
self._apply_results_criteria_deletes = {}
|
|
1176
|
-
|
|
1177
1205
|
|
|
1178
1206
|
# update the inbound stream record counts, so we can see progress
|
|
1179
1207
|
# we do this last, because marking a stream as completed will cause the sync engine to process it
|
|
@@ -1281,29 +1309,40 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1281
1309
|
if stream_name is None or len(stream_name) == 0:
|
|
1282
1310
|
raise ValueError("Stream name cannot be empty")
|
|
1283
1311
|
with self._apply_results_lock:
|
|
1284
|
-
existing_results: List[
|
|
1312
|
+
existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
|
|
1285
1313
|
if stream_name in self._apply_results:
|
|
1286
1314
|
existing_results = self._apply_results[stream_name]
|
|
1287
|
-
existing_results.append(
|
|
1315
|
+
existing_results.append(RecordsToUploadResult(
|
|
1316
|
+
records=self._preprocess_results_list(stream_name, results, is_delete)
|
|
1317
|
+
))
|
|
1288
1318
|
if new_state is not None:
|
|
1289
|
-
existing_results.append(
|
|
1319
|
+
existing_results.append(
|
|
1320
|
+
StateResult(new_state=new_state)
|
|
1321
|
+
) # append the new state at the end
|
|
1290
1322
|
self._apply_results[stream_name] = existing_results
|
|
1291
|
-
# if the total size of all the dataframes exceeds 200MB, apply the results immediately
|
|
1292
|
-
# we'll use df.memory_usage(index=True) for this
|
|
1293
1323
|
if self.development_mode is False:
|
|
1294
1324
|
# note: we want to do it for all values in self._apply_results, not just the new one
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1325
|
+
self._apply_results_if_size_exceeded()
|
|
1326
|
+
|
|
1327
|
+
def _apply_results_if_size_exceeded(self,):
|
|
1328
|
+
# so first we need to get the list of lists from the dictionary values and flatten it
|
|
1329
|
+
# then we can sum the memory usage of each dataframe
|
|
1330
|
+
# if the total exceeds 200MB, we apply the results immediately
|
|
1331
|
+
all_df_lists:List[List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = list(self._apply_results.values())
|
|
1332
|
+
# flatten
|
|
1333
|
+
all_dfs:List[pandas.DataFrame] = []
|
|
1334
|
+
for sublist in all_df_lists:
|
|
1335
|
+
for x in sublist:
|
|
1336
|
+
if isinstance(x, RecordsToUploadResult):
|
|
1337
|
+
all_dfs.append(x.records)
|
|
1338
|
+
if isinstance(x, CriteriaDeleteResult):
|
|
1339
|
+
all_dfs.append(x.criteria_deletes)
|
|
1340
|
+
combined_length = sum([len(x) for x in all_dfs])
|
|
1341
|
+
# first, don't bother if the count is less than 10000, since it's unlikely to be even close
|
|
1342
|
+
if combined_length > 10000:
|
|
1343
|
+
if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
|
|
1344
|
+
logger.debug(f"Applying results queue immediately due to combined dataframe size")
|
|
1345
|
+
self.apply_results_queue()
|
|
1307
1346
|
|
|
1308
1347
|
def delete_by_criteria(self, stream_name: str, criteria: Dict[str, Any]):
|
|
1309
1348
|
"""
|
|
@@ -1329,27 +1368,22 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1329
1368
|
logger.debug(
|
|
1330
1369
|
f"Enqueuing {len(criteria)} delete criteria for stream {stream_name} for upload"
|
|
1331
1370
|
)
|
|
1332
|
-
existing_results: List[
|
|
1333
|
-
if stream_name in self.
|
|
1334
|
-
existing_results = self.
|
|
1335
|
-
existing_results.append(
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1371
|
+
existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
|
|
1372
|
+
if stream_name in self._apply_results:
|
|
1373
|
+
existing_results = self._apply_results[stream_name]
|
|
1374
|
+
existing_results.append(
|
|
1375
|
+
CriteriaDeleteResult(
|
|
1376
|
+
criteria_deletes=pandas.DataFrame([{"STREAM_NAME":stream_name,"DELETE_CRITERIA": criteria}])))
|
|
1377
|
+
self._apply_results[stream_name] = existing_results
|
|
1339
1378
|
if self.development_mode is False:
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
# first, don't both if the count is less than 10000, since it's unlikely to be even close
|
|
1349
|
-
if combined_length > 10000:
|
|
1350
|
-
if sum([x.memory_usage(index=True).sum() for x in all_dfs if isinstance(x, pandas.DataFrame)]) > 200000000:
|
|
1351
|
-
logger.debug(f"Applying criteria deletes queue immediately due to combined dataframe size")
|
|
1352
|
-
self.apply_results_queue()
|
|
1379
|
+
self._apply_results_if_size_exceeded()
|
|
1380
|
+
|
|
1381
|
+
def mark_stream_started(self, stream_name: str):
|
|
1382
|
+
"""
|
|
1383
|
+
Marks a stream as started, this is called automatically per stream when using @managed_inbound_processing.
|
|
1384
|
+
"""
|
|
1385
|
+
logger.debug(f"Marking stream {stream_name} as started locally")
|
|
1386
|
+
self._stream_start_times[stream_name] = time.time()
|
|
1353
1387
|
|
|
1354
1388
|
def mark_stream_complete(self, stream_name: str):
|
|
1355
1389
|
"""
|
|
@@ -1357,6 +1391,20 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1357
1391
|
If @managed_inbound_processing is not used, call this whenever a stream has finished recieving records.
|
|
1358
1392
|
"""
|
|
1359
1393
|
logger.debug(f"Marking stream {stream_name} as completed locally")
|
|
1394
|
+
if stream_name in self._stream_start_times:
|
|
1395
|
+
start_time = self._stream_start_times[stream_name]
|
|
1396
|
+
duration = time.time() - start_time
|
|
1397
|
+
stream_duration_gauge.set(
|
|
1398
|
+
amount=duration,
|
|
1399
|
+
attributes={
|
|
1400
|
+
"stream_name": stream_name,
|
|
1401
|
+
"sync_run_id": str(self._run_id),
|
|
1402
|
+
"sync_id": str(self._sync_id),
|
|
1403
|
+
"branch_name": str(self._branch_name) if self._branch_name is not None else 'main',
|
|
1404
|
+
"sync_direction": "inbound",
|
|
1405
|
+
"plugin_id": self.plugin_instance.get_manifest().plugin_id,
|
|
1406
|
+
},
|
|
1407
|
+
)
|
|
1360
1408
|
with self._apply_results_lock:
|
|
1361
1409
|
self._completed_streams.append(stream_name)
|
|
1362
1410
|
# dedup just in case it's called twice
|
|
@@ -1463,7 +1511,7 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1463
1511
|
logger.debug(f"Failure to convert inbound data: {str(exception)}")
|
|
1464
1512
|
return data
|
|
1465
1513
|
|
|
1466
|
-
def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]):
|
|
1514
|
+
def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]) -> pandas.DataFrame:
|
|
1467
1515
|
"""
|
|
1468
1516
|
Creates a dataframe from the enqueued list, ready to upload.
|
|
1469
1517
|
The result is a dataframe contain all (and only):
|
|
@@ -1608,7 +1656,7 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1608
1656
|
hash_object = hashlib.sha256(key_string.encode())
|
|
1609
1657
|
return hash_object.hexdigest()
|
|
1610
1658
|
|
|
1611
|
-
def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame)
|
|
1659
|
+
def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame):
|
|
1612
1660
|
"""
|
|
1613
1661
|
Applies results for an inbound sync. The results are staged into a temporary
|
|
1614
1662
|
table in Snowflake, so that we can make an atomic commit at the end.
|
|
@@ -1635,7 +1683,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1635
1683
|
raise ValueError(
|
|
1636
1684
|
f"Failed to write results to table {self._full_results_table_name}"
|
|
1637
1685
|
)
|
|
1638
|
-
query_id = self._get_query_id_for_now()
|
|
1639
1686
|
logger.debug(
|
|
1640
1687
|
f"Wrote {nrows} rows and {nchunks} chunks to table {self._full_results_table_name}"
|
|
1641
1688
|
)
|
|
@@ -1648,7 +1695,6 @@ class InboundSyncRequest(SyncRequest):
|
|
|
1648
1695
|
# )
|
|
1649
1696
|
for stream_name in stream_names:
|
|
1650
1697
|
self._results_exist[stream_name] = True
|
|
1651
|
-
return query_id
|
|
1652
1698
|
else:
|
|
1653
1699
|
logger.debug("Results dataframe is empty, not applying")
|
|
1654
1700
|
|
|
@@ -2333,12 +2379,8 @@ def __managed_inbound_processing_worker(
|
|
|
2333
2379
|
sync_request: InboundSyncRequest = cast(
|
|
2334
2380
|
InboundSyncRequest, plugin_class_obj._sync_request
|
|
2335
2381
|
) # pylint: disable=protected-access
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
description="The duration of stream processing",
|
|
2339
|
-
unit="s",
|
|
2340
|
-
)
|
|
2341
|
-
start_time = time.time()
|
|
2382
|
+
if stream.stream_name not in sync_request._stream_start_times:
|
|
2383
|
+
sync_request.mark_stream_started(stream.stream_name)
|
|
2342
2384
|
# restore the first argument, was originally the dataframe/generator but now it's the appropriately sized dataframe
|
|
2343
2385
|
try:
|
|
2344
2386
|
with tracer.start_as_current_span("managed_inbound_processing") as managed_inbound_processing_span:
|
|
@@ -2370,19 +2412,6 @@ def __managed_inbound_processing_worker(
|
|
|
2370
2412
|
omnata_plugin_logger.error(f"{type(e).__name__} syncing stream {stream.stream_name}",
|
|
2371
2413
|
exc_info=True,
|
|
2372
2414
|
extra={'stream_name':stream.stream_name})
|
|
2373
|
-
finally:
|
|
2374
|
-
duration = time.time() - start_time
|
|
2375
|
-
stream_duration_counter.record(
|
|
2376
|
-
duration,
|
|
2377
|
-
attributes={
|
|
2378
|
-
"stream_name": stream.stream_name,
|
|
2379
|
-
"sync_run_id": str(sync_request._run_id),
|
|
2380
|
-
"sync_id": str(sync_request._sync_id),
|
|
2381
|
-
"branch_name": str(sync_request._branch_name) if sync_request._branch_name is not None else 'main',
|
|
2382
|
-
"sync_direction": "inbound",
|
|
2383
|
-
"plugin_id": plugin_class_obj.get_manifest().plugin_id,
|
|
2384
|
-
},
|
|
2385
|
-
)
|
|
2386
2415
|
except queue.Empty:
|
|
2387
2416
|
logger.debug("streams queue is empty")
|
|
2388
2417
|
return
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: omnata-plugin-runtime
|
|
3
|
-
Version: 0.11.6a323
|
|
4
|
-
Summary: Classes and common runtime components for building and running Omnata Plugins
|
|
5
|
-
License-File: LICENSE
|
|
6
|
-
Author: James Weakley
|
|
7
|
-
Author-email: james.weakley@omnata.com
|
|
8
|
-
Requires-Python: >=3.9,<=3.11
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: annotated-types (<=0.6.0)
|
|
14
|
-
Requires-Dist: certifi (<=2024.8.30)
|
|
15
|
-
Requires-Dist: cffi (<=1.16.0)
|
|
16
|
-
Requires-Dist: charset-normalizer (<=3.3.2)
|
|
17
|
-
Requires-Dist: cryptography (<=43.0.0)
|
|
18
|
-
Requires-Dist: filelock (<=3.13.1)
|
|
19
|
-
Requires-Dist: idna (<=3.7)
|
|
20
|
-
Requires-Dist: jinja2 (>=3.1.2,<=3.1.4)
|
|
21
|
-
Requires-Dist: markupsafe (<=2.1.3)
|
|
22
|
-
Requires-Dist: numpy (<=2.1.3)
|
|
23
|
-
Requires-Dist: opentelemetry-api (<=1.23.0)
|
|
24
|
-
Requires-Dist: packaging (<=24.1)
|
|
25
|
-
Requires-Dist: pandas (<=2.2.3)
|
|
26
|
-
Requires-Dist: platformdirs (<=3.10.0)
|
|
27
|
-
Requires-Dist: protobuf (<=4.25.3)
|
|
28
|
-
Requires-Dist: pyarrow (<=16.1.0)
|
|
29
|
-
Requires-Dist: pycparser (<=2.21)
|
|
30
|
-
Requires-Dist: pydantic (>=2,<=2.8.2)
|
|
31
|
-
Requires-Dist: pydantic-core (<=2.21.0)
|
|
32
|
-
Requires-Dist: pyjwt (<=2.8.0)
|
|
33
|
-
Requires-Dist: pyopenssl (<=24.2.1)
|
|
34
|
-
Requires-Dist: pytz (<=2024.1)
|
|
35
|
-
Requires-Dist: pyyaml (<=6.0.1)
|
|
36
|
-
Requires-Dist: requests (>=2,<=2.32.3)
|
|
37
|
-
Requires-Dist: setuptools (<=72.1.0)
|
|
38
|
-
Requires-Dist: snowflake-connector-python (>=3,<=3.12.0)
|
|
39
|
-
Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.24.0)
|
|
40
|
-
Requires-Dist: snowflake-telemetry-python (<=0.5.0)
|
|
41
|
-
Requires-Dist: tenacity (>=8,<=8.2.3)
|
|
42
|
-
Requires-Dist: tomlkit (<=0.11.1)
|
|
43
|
-
Requires-Dist: urllib3 (<=2.2.2)
|
|
44
|
-
Requires-Dist: wheel (<=0.43.0)
|
|
45
|
-
Requires-Dist: wrapt (<=1.14.1)
|
|
46
|
-
Description-Content-Type: text/markdown
|
|
47
|
-
|
|
48
|
-
# omnata-plugin-runtime
|
|
49
|
-
This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
|
|
50
|
-
|
|
51
|
-
It contains data classes, interfaces and application logic used to perform plugin operations.
|
|
52
|
-
|
|
53
|
-
For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
|
|
54
|
-
|
|
55
|
-
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
[tool.poetry]
|
|
2
|
-
name = "omnata-plugin-runtime"
|
|
3
|
-
version = "0.11.6-a323"
|
|
4
|
-
description = "Classes and common runtime components for building and running Omnata Plugins"
|
|
5
|
-
authors = ["James Weakley <james.weakley@omnata.com>"]
|
|
6
|
-
readme = "README.md"
|
|
7
|
-
packages = [{include = "omnata_plugin_runtime", from = "src"}]
|
|
8
|
-
|
|
9
|
-
[tool.poetry.dependencies]
|
|
10
|
-
python = ">=3.9, <=3.11"
|
|
11
|
-
snowflake-snowpark-python = ">=1.20.0,<=1.24.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
|
|
12
|
-
snowflake-connector-python = "^3, <=3.12.0" # latest version available on Snowflake Anaconda
|
|
13
|
-
cryptography = "<=43.0.0"
|
|
14
|
-
annotated-types = "<=0.6.0"
|
|
15
|
-
pycparser = "<=2.21"
|
|
16
|
-
filelock = "<=3.13.1"
|
|
17
|
-
pydantic-core = "<=2.21.0"
|
|
18
|
-
# had to relax some of these thanks to snowcli pinning newer versions
|
|
19
|
-
certifi = "<=2024.8.30" # latest version available on Snowflake Anaconda
|
|
20
|
-
charset-normalizer = "<=3.3.2" # latest version available on Snowflake Anaconda
|
|
21
|
-
idna = "<=3.7" # latest version available on Snowflake Anaconda
|
|
22
|
-
jinja2 = ">=3.1.2,<=3.1.4" # 3.1.4 was latest version available on Snowflake Anaconda
|
|
23
|
-
markupsafe = "<=2.1.3" # latest version available on Snowflake Anaconda
|
|
24
|
-
numpy = "<=2.1.3" # latest version available on Snowflake Anaconda
|
|
25
|
-
packaging = "<=24.1" # latest version available on Snowflake Anaconda
|
|
26
|
-
pandas = "<=2.2.3" # latest version available on Snowflake Anaconda
|
|
27
|
-
platformdirs = "<=3.10.0" # latest version available on Snowflake Anaconda
|
|
28
|
-
pydantic = "^2, <=2.8.2" # latest version available on Snowflake Anaconda
|
|
29
|
-
pyjwt = "<=2.8.0" # latest version available on Snowflake Anaconda
|
|
30
|
-
pyopenssl = "<=24.2.1" # latest version available on Snowflake Anaconda
|
|
31
|
-
pytz = "<=2024.1" # latest version available on Snowflake Anaconda
|
|
32
|
-
requests = "^2, <=2.32.3" # latest version available on Snowflake Anaconda
|
|
33
|
-
setuptools = "<=72.1.0" # latest version available on Snowflake Anaconda
|
|
34
|
-
tomlkit = "<=0.11.1" # latest version available on Snowflake Anaconda
|
|
35
|
-
tenacity = "^8, <=8.2.3" # latest version available on Snowflake Anaconda
|
|
36
|
-
urllib3 = "<=2.2.2" # latest version available on Snowflake Anaconda
|
|
37
|
-
wheel = "<=0.43.0" # latest version available on Snowflake Anaconda
|
|
38
|
-
pyyaml = "<=6.0.1" # latest version available on Snowflake Anaconda
|
|
39
|
-
cffi = "<=1.16.0" # latest version available on Snowflake Anaconda
|
|
40
|
-
pyarrow = "<=16.1.0" # latest version available on Snowflake Anaconda
|
|
41
|
-
wrapt = "<=1.14.1" # latest version available on Snowflake Anaconda
|
|
42
|
-
opentelemetry-api = "<=1.23.0" # latest version available on Snowflake Anaconda
|
|
43
|
-
snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
|
|
44
|
-
protobuf = "<=4.25.3" # latest version available on Snowflake Anaconda
|
|
45
|
-
|
|
46
|
-
[tool.poetry.dev-dependencies]
|
|
47
|
-
pytest = "^6.2.4"
|
|
48
|
-
deepdiff = "^6"
|
|
49
|
-
requests-mock = ">=1.9.3"
|
|
50
|
-
|
|
51
|
-
[tool.pytest.ini_options]
|
|
52
|
-
addopts = ["--import-mode=importlib"]
|
|
53
|
-
testpaths = ["tests"]
|
|
54
|
-
|
|
55
|
-
[build-system]
|
|
56
|
-
requires = ["poetry-core"]
|
|
57
|
-
build-backend = "poetry.core.masonry.api"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/api.py
RENAMED
|
File without changes
|
|
File without changes
|
{omnata_plugin_runtime-0.11.6a323 → omnata_plugin_runtime-0.12.0}/src/omnata_plugin_runtime/forms.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|