omnata-plugin-runtime 0.11.6a323__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: omnata-plugin-runtime
3
+ Version: 0.12.0
4
+ Summary: Classes and common runtime components for building and running Omnata Plugins
5
+ License-File: LICENSE
6
+ Author: James Weakley
7
+ Author-email: james.weakley@omnata.com
8
+ Requires-Python: >=3.10,<=3.13
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: annotated-types (<=0.6.0)
15
+ Requires-Dist: certifi (<=2025.11.12)
16
+ Requires-Dist: cffi (<=2.0.0)
17
+ Requires-Dist: charset-normalizer (<=3.4.4)
18
+ Requires-Dist: cryptography (<=46.0.3)
19
+ Requires-Dist: filelock (<=3.20.0)
20
+ Requires-Dist: idna (<=3.11)
21
+ Requires-Dist: jinja2 (>=3.1.2,<=3.1.6)
22
+ Requires-Dist: markupsafe (<=3.0.2)
23
+ Requires-Dist: numpy (<=2.3.5)
24
+ Requires-Dist: opentelemetry-api (<=1.38.0)
25
+ Requires-Dist: packaging (<=25.0)
26
+ Requires-Dist: pandas (<=2.3.3)
27
+ Requires-Dist: platformdirs (<=4.5.0)
28
+ Requires-Dist: protobuf (<=6.33.0)
29
+ Requires-Dist: pyarrow (<=21.0.0)
30
+ Requires-Dist: pycparser (<=2.23)
31
+ Requires-Dist: pydantic (>=2,<=2.12.4)
32
+ Requires-Dist: pydantic-core (<=2.41.5)
33
+ Requires-Dist: pyjwt (<=2.10.1)
34
+ Requires-Dist: pyopenssl (<=225.3.0)
35
+ Requires-Dist: pytz (<=2025.2)
36
+ Requires-Dist: pyyaml (<=6.0.3)
37
+ Requires-Dist: requests (>=2,<=2.32.5)
38
+ Requires-Dist: setuptools (<=80.9.0)
39
+ Requires-Dist: snowflake-connector-python (>=3,<4)
40
+ Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.43.0)
41
+ Requires-Dist: snowflake-telemetry-python (<=0.5.0)
42
+ Requires-Dist: tenacity (>=8,<9)
43
+ Requires-Dist: tomlkit (<=0.13.3)
44
+ Requires-Dist: urllib3 (<=2.5.0)
45
+ Requires-Dist: wheel (<=0.45.1)
46
+ Requires-Dist: wrapt (<=2.0.1)
47
+ Description-Content-Type: text/markdown
48
+
49
+ # omnata-plugin-runtime
50
+ This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
51
+
52
+ It contains data classes, interfaces and application logic used to perform plugin operations.
53
+
54
+ For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
55
+
56
+
@@ -0,0 +1,57 @@
1
+ [tool.poetry]
2
+ name = "omnata-plugin-runtime"
3
+ version = "0.12.0"
4
+ description = "Classes and common runtime components for building and running Omnata Plugins"
5
+ authors = ["James Weakley <james.weakley@omnata.com>"]
6
+ readme = "README.md"
7
+ packages = [{include = "omnata_plugin_runtime", from = "src"}]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = ">=3.10, <=3.13"
11
+ snowflake-snowpark-python = ">=1.20.0,<=1.43.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
12
+ snowflake-connector-python = "^3, <=4.1.0" # latest version available on Snowflake Anaconda
13
+ cryptography = "<=46.0.3"
14
+ annotated-types = "<=0.6.0"
15
+ pycparser = "<=2.23"
16
+ filelock = "<=3.20.0"
17
+ pydantic-core = "<=2.41.5"
18
+ # had to relax some of these thanks to snowcli pinning newer versions
19
+ certifi = "<=2025.11.12" # latest version available on Snowflake Anaconda
20
+ charset-normalizer = "<=3.4.4" # latest version available on Snowflake Anaconda
21
+ idna = "<=3.11" # latest version available on Snowflake Anaconda
22
+ jinja2 = ">=3.1.2,<=3.1.6" # 3.1.6 was latest version available on Snowflake Anaconda
23
+ markupsafe = "<=3.0.2" # latest version available on Snowflake Anaconda
24
+ numpy = "<=2.3.5" # latest version available on Snowflake Anaconda
25
+ packaging = "<=25.0" # latest version available on Snowflake Anaconda
26
+ pandas = "<=2.3.3" # latest version available on Snowflake Anaconda
27
+ platformdirs = "<=4.5.0" # latest version available on Snowflake Anaconda
28
+ pydantic = "^2, <=2.12.4" # latest version available on Snowflake Anaconda
29
+ pyjwt = "<=2.10.1" # latest version available on Snowflake Anaconda
30
+ pyopenssl = "<=225.3.0" # latest version available on Snowflake Anaconda
31
+ pytz = "<=2025.2" # latest version available on Snowflake Anaconda
32
+ requests = "^2, <=2.32.5" # latest version available on Snowflake Anaconda
33
+ setuptools = "<=80.9.0" # latest version available on Snowflake Anaconda
34
+ tomlkit = "<=0.13.3" # latest version available on Snowflake Anaconda
35
+ tenacity = "^8, <=9.1.2" # latest version available on Snowflake Anaconda
36
+ urllib3 = "<=2.5.0" # latest version available on Snowflake Anaconda
37
+ wheel = "<=0.45.1" # latest version available on Snowflake Anaconda
38
+ pyyaml = "<=6.0.3" # latest version available on Snowflake Anaconda
39
+ cffi = "<=2.0.0" # latest version available on Snowflake Anaconda
40
+ pyarrow = "<=21.0.0" # latest version available on Snowflake Anaconda
41
+ wrapt = "<=2.0.1" # latest version available on Snowflake Anaconda
42
+ opentelemetry-api = "<=1.38.0" # latest version available on Snowflake Anaconda
43
+ snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
44
+ protobuf = "<=6.33.0" # latest version available on Snowflake Anaconda
45
+
46
+ [tool.poetry.dev-dependencies]
47
+ pytest = "^6.2.4"
48
+ deepdiff = "^6"
49
+ requests-mock = ">=1.9.3"
50
+
51
+ [tool.pytest.ini_options]
52
+ addopts = ["--import-mode=importlib"]
53
+ testpaths = ["tests"]
54
+
55
+ [build-system]
56
+ requires = ["poetry-core"]
57
+ build-backend = "poetry.core.masonry.api"
@@ -15,7 +15,7 @@ if tuple(sys.version_info[:2]) >= (3, 9):
15
15
  else:
16
16
  # Python 3.8 and below
17
17
  from typing_extensions import Annotated
18
-
18
+ from dataclasses import dataclass
19
19
  import zipfile
20
20
  import datetime
21
21
  import http
@@ -49,6 +49,11 @@ from snowflake.snowpark.functions import col
49
49
  from tenacity import Retrying, stop_after_attempt, wait_fixed, retry_if_exception_message
50
50
 
51
51
  from .logging import OmnataPluginLogHandler, logger, tracer, meter
52
+ stream_duration_gauge = meter.create_gauge(
53
+ name="omnata.sync_run.stream_duration",
54
+ description="The duration of stream processing",
55
+ unit="s",
56
+ )
52
57
  from opentelemetry import context
53
58
  import math
54
59
  import numpy as np
@@ -265,6 +270,29 @@ def jinja_filter(func):
265
270
  func.is_jinja_filter = True
266
271
  return func
267
272
 
273
+ @dataclass
274
+ class StateResult:
275
+ """
276
+ Represents the current cursor state of a stream. This simple wrapper just helps us identify what type of
277
+ object is in the apply_results list.
278
+ """
279
+ new_state: Any
280
+
281
+ @dataclass
282
+ class RecordsToUploadResult:
283
+ """
284
+ Represents the records to upload for a stream. This simple wrapper just helps us identify what type of
285
+ object is in the apply_results list.
286
+ """
287
+ records: pandas.DataFrame
288
+
289
+ @dataclass
290
+ class CriteriaDeleteResult:
291
+ """
292
+ Represents the result of processing criteria deletes for a stream. This simple wrapper just helps us identify what type of
293
+ object is in the apply_results list.
294
+ """
295
+ criteria_deletes: pandas.DataFrame
268
296
 
269
297
  class SyncRequest(ABC):
270
298
  """
@@ -1057,7 +1085,6 @@ class InboundSyncRequest(SyncRequest):
1057
1085
  }
1058
1086
 
1059
1087
  # These are similar to the results, but represent requests to delete records by some criteria
1060
- self._apply_results_criteria_deletes: Dict[str, List[pandas.DataFrame]] = {}
1061
1088
  self._temp_tables = {}
1062
1089
  self._temp_table_lock = threading.Lock()
1063
1090
  self._results_exist: Dict[
@@ -1096,7 +1123,9 @@ class InboundSyncRequest(SyncRequest):
1096
1123
  self._criteria_deletes_table_name = results_table.get_fully_qualified_criteria_deletes_table_name()
1097
1124
  self.state_register_table_name = results_table.get_fully_qualified_state_register_table_name()
1098
1125
  # this is keyed on stream name, each containing a list of dataframes and state updates mixed
1099
- self._apply_results: Dict[str, List[pandas.DataFrame | Dict]] = {}
1126
+ self._apply_results: Dict[str, List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = {}
1127
+ # track the start times of each stream, so we can calculate durations. The int is a epoch (time.time()) value
1128
+ self._stream_start_times: Dict[str, int] = {}
1100
1129
 
1101
1130
  def apply_results_queue(self):
1102
1131
  """
@@ -1105,7 +1134,8 @@ class InboundSyncRequest(SyncRequest):
1105
1134
  logger.debug("InboundSyncRequest apply_results_queue")
1106
1135
  if self._apply_results is not None:
1107
1136
  with self._apply_results_lock:
1108
- results:List[pandas.DataFrame] = []
1137
+ records_to_upload:List[pandas.DataFrame] = []
1138
+ criteria_deletes_to_upload:List[pandas.DataFrame] = []
1109
1139
  stream_states_for_upload:Dict[str, Dict[str, Any]] = {}
1110
1140
  for stream_name, stream_results in self._apply_results.items():
1111
1141
  # the stream results contains an ordered sequence of dataframes and state updates (append only)
@@ -1113,9 +1143,9 @@ class InboundSyncRequest(SyncRequest):
1113
1143
  # so first, we iterate backwards to find the last state update
1114
1144
  last_state_index = -1
1115
1145
  for i in range(len(stream_results) - 1, -1, -1):
1116
- if isinstance(stream_results[i], dict):
1146
+ if isinstance(stream_results[i], StateResult):
1117
1147
  last_state_index = i
1118
- stream_states_for_upload[stream_name] = stream_results[i]
1148
+ stream_states_for_upload[stream_name] = stream_results[i].new_state
1119
1149
  break
1120
1150
  # if there are no state updates, we can't do anything with this stream
1121
1151
  if last_state_index == -1:
@@ -1124,56 +1154,54 @@ class InboundSyncRequest(SyncRequest):
1124
1154
  )
1125
1155
  continue
1126
1156
  assert isinstance(stream_states_for_upload[stream_name], dict), "Latest state must be a dictionary"
1127
- # now we can take the dataframes up to the last state update
1128
- dfs = stream_results[:last_state_index]
1129
- non_empty_dfs = [
1130
- x for x in dfs if x is not None and isinstance(x, pandas.DataFrame) and len(x) > 0
1157
+ # now we can take the record dataframes up to the last state update
1158
+ results_subset = stream_results[:last_state_index]
1159
+ non_empty_record_dfs:List[pandas.DataFrame] = [
1160
+ x.records for x in results_subset
1161
+ if x is not None and isinstance(x, RecordsToUploadResult) and len(x.records) > 0
1131
1162
  ]
1132
1163
  # get the total length of all the dataframes
1133
- total_length = sum([len(x) for x in non_empty_dfs])
1164
+ total_length = sum([len(x) for x in non_empty_record_dfs])
1134
1165
  # add the count of this batch to the total for this stream
1135
1166
  self._stream_record_counts[
1136
1167
  stream_name
1137
1168
  ] = self._stream_record_counts[stream_name] + total_length
1138
- results.extend(non_empty_dfs)
1169
+ records_to_upload.extend(non_empty_record_dfs)
1170
+ # also handle any criteria deletes
1171
+ criteria_deletes_to_upload.extend([
1172
+ x.criteria_deletes for x in results_subset
1173
+ if x is not None and isinstance(x, CriteriaDeleteResult) and len(x.criteria_deletes) > 0
1174
+ ])
1139
1175
  # now remove everything up to the last state update
1140
1176
  # we do this so that we don't apply the same state update multiple times
1177
+ # keep everything after the last state update
1141
1178
  self._apply_results[stream_name] = stream_results[
1142
1179
  last_state_index + 1 :
1143
- ] # keep everything after the last state update
1144
- if len(results) > 0:
1145
- logger.debug(
1146
- f"Applying {len(results)} batches of queued results"
1147
- )
1148
- # upload all cached apply results
1149
- all_dfs = pandas.concat(results)
1150
- query_id = self._apply_results_dataframe(list(stream_states_for_upload.keys()), all_dfs)
1151
- # now that the results have been updated, we need to insert records into the state register table
1152
- # we do this by inserting the latest state for each stream
1180
+ ]
1181
+
1182
+ if len(records_to_upload) > 0 or len(criteria_deletes_to_upload) > 0:
1183
+ if len(records_to_upload) > 0:
1184
+ logger.debug(
1185
+ f"Applying {len(records_to_upload)} batches of queued results"
1186
+ )
1187
+ # upload all cached apply results
1188
+ records_to_upload_combined = pandas.concat(records_to_upload)
1189
+ self._apply_results_dataframe(list(stream_states_for_upload.keys()), records_to_upload_combined)
1190
+ # now that the results have been updated, we need to insert records into the state register table
1191
+ # we do this by inserting the latest state for each stream
1192
+ if len(criteria_deletes_to_upload) > 0:
1193
+ logger.debug(
1194
+ f"Applying {len(criteria_deletes_to_upload)} batches of queued criteria deletes"
1195
+ )
1196
+ # upload all cached apply results
1197
+ all_criteria_deletes = pandas.concat(criteria_deletes_to_upload)
1198
+ self._apply_criteria_deletes_dataframe(all_criteria_deletes)
1199
+
1200
+ query_id = self._get_query_id_for_now()
1153
1201
  self._directly_insert_to_state_register(
1154
1202
  stream_states_for_upload, query_id=query_id
1155
1203
  )
1156
1204
 
1157
- # also take care of uploading delete requests
1158
- # technically these should be managed along with the state, however there aren't any scenarios where checkpointing is done
1159
- # and deletes have an impact. This is because we only checkpoint in scenarios where the target table is empty first
1160
- if hasattr(self,'_apply_results_criteria_deletes') and self._apply_results_criteria_deletes is not None:
1161
- with self._apply_results_lock:
1162
- results:List[pandas.DataFrame] = []
1163
- for stream_name, stream_results in self._apply_results_criteria_deletes.items():
1164
- results.extend([
1165
- x for x in stream_results if x is not None and len(x) > 0
1166
- ])
1167
- if len(results) > 0:
1168
- logger.debug(
1169
- f"Applying {len(results)} batches of queued criteria deletes"
1170
- )
1171
- # upload all cached apply results
1172
- all_dfs = pandas.concat(results)
1173
- self._apply_criteria_deletes_dataframe(all_dfs)
1174
- # clear the delete requests
1175
- self._apply_results_criteria_deletes = {}
1176
-
1177
1205
 
1178
1206
  # update the inbound stream record counts, so we can see progress
1179
1207
  # we do this last, because marking a stream as completed will cause the sync engine to process it
@@ -1281,29 +1309,40 @@ class InboundSyncRequest(SyncRequest):
1281
1309
  if stream_name is None or len(stream_name) == 0:
1282
1310
  raise ValueError("Stream name cannot be empty")
1283
1311
  with self._apply_results_lock:
1284
- existing_results: List[pandas.DataFrame] = []
1312
+ existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
1285
1313
  if stream_name in self._apply_results:
1286
1314
  existing_results = self._apply_results[stream_name]
1287
- existing_results.append(self._preprocess_results_list(stream_name, results, is_delete))
1315
+ existing_results.append(RecordsToUploadResult(
1316
+ records=self._preprocess_results_list(stream_name, results, is_delete)
1317
+ ))
1288
1318
  if new_state is not None:
1289
- existing_results.append(new_state) # append the new state at the end
1319
+ existing_results.append(
1320
+ StateResult(new_state=new_state)
1321
+ ) # append the new state at the end
1290
1322
  self._apply_results[stream_name] = existing_results
1291
- # if the total size of all the dataframes exceeds 200MB, apply the results immediately
1292
- # we'll use df.memory_usage(index=True) for this
1293
1323
  if self.development_mode is False:
1294
1324
  # note: we want to do it for all values in self._apply_results, not just the new one
1295
- # so first we need to get the list of lists from the dictionary values and flatten it
1296
- # then we can sum the memory usage of each dataframe
1297
- # if the total exceeds 200MB, we apply the results immediately
1298
- all_df_lists:List[List[pandas.DataFrame]] = list(self._apply_results.values())
1299
- # flatten
1300
- all_dfs:List[pandas.DataFrame] = [x for sublist in all_df_lists for x in sublist if isinstance(x, pandas.DataFrame)]
1301
- combined_length = sum([len(x) for x in all_dfs])
1302
- # first, don't bother if the count is less than 10000, since it's unlikely to be even close
1303
- if combined_length > 10000:
1304
- if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
1305
- logger.debug(f"Applying results queue immediately due to combined dataframe size")
1306
- self.apply_results_queue()
1325
+ self._apply_results_if_size_exceeded()
1326
+
1327
+ def _apply_results_if_size_exceeded(self,):
1328
+ # so first we need to get the list of lists from the dictionary values and flatten it
1329
+ # then we can sum the memory usage of each dataframe
1330
+ # if the total exceeds 200MB, we apply the results immediately
1331
+ all_df_lists:List[List[RecordsToUploadResult | StateResult | CriteriaDeleteResult]] = list(self._apply_results.values())
1332
+ # flatten
1333
+ all_dfs:List[pandas.DataFrame] = []
1334
+ for sublist in all_df_lists:
1335
+ for x in sublist:
1336
+ if isinstance(x, RecordsToUploadResult):
1337
+ all_dfs.append(x.records)
1338
+ if isinstance(x, CriteriaDeleteResult):
1339
+ all_dfs.append(x.criteria_deletes)
1340
+ combined_length = sum([len(x) for x in all_dfs])
1341
+ # first, don't bother if the count is less than 10000, since it's unlikely to be even close
1342
+ if combined_length > 10000:
1343
+ if sum([x.memory_usage(index=True).sum() for x in all_dfs]) > 200000000:
1344
+ logger.debug(f"Applying results queue immediately due to combined dataframe size")
1345
+ self.apply_results_queue()
1307
1346
 
1308
1347
  def delete_by_criteria(self, stream_name: str, criteria: Dict[str, Any]):
1309
1348
  """
@@ -1329,27 +1368,22 @@ class InboundSyncRequest(SyncRequest):
1329
1368
  logger.debug(
1330
1369
  f"Enqueuing {len(criteria)} delete criteria for stream {stream_name} for upload"
1331
1370
  )
1332
- existing_results: List[pandas.DataFrame] = []
1333
- if stream_name in self._apply_results_criteria_deletes:
1334
- existing_results = self._apply_results_criteria_deletes[stream_name]
1335
- existing_results.append(pandas.DataFrame([{"STREAM_NAME":stream_name,"DELETE_CRITERIA": criteria}]))
1336
- self._apply_results_criteria_deletes[stream_name] = existing_results
1337
- # if the total size of all the dataframes exceeds 200MB, apply the results immediately
1338
- # we'll use df.memory_usage(index=True) for this
1371
+ existing_results: List[RecordsToUploadResult | StateResult | CriteriaDeleteResult] = []
1372
+ if stream_name in self._apply_results:
1373
+ existing_results = self._apply_results[stream_name]
1374
+ existing_results.append(
1375
+ CriteriaDeleteResult(
1376
+ criteria_deletes=pandas.DataFrame([{"STREAM_NAME":stream_name,"DELETE_CRITERIA": criteria}])))
1377
+ self._apply_results[stream_name] = existing_results
1339
1378
  if self.development_mode is False:
1340
- # note: we want to do it for all values in self._apply_results_criteria_deletes, not just the new one
1341
- # so first we need to get the list of lists from the dictionary values and flatten it
1342
- # then we can sum the memory usage of each dataframe
1343
- # if the total exceeds 200MB, we apply the results immediately
1344
- all_df_lists:List[List[pandas.DataFrame]] = list(self._apply_results_criteria_deletes.values())
1345
- # flatten
1346
- all_dfs:List[pandas.DataFrame] = [x for sublist in all_df_lists for x in sublist]
1347
- combined_length = sum([len(x) for x in all_dfs])
1348
- # first, don't both if the count is less than 10000, since it's unlikely to be even close
1349
- if combined_length > 10000:
1350
- if sum([x.memory_usage(index=True).sum() for x in all_dfs if isinstance(x, pandas.DataFrame)]) > 200000000:
1351
- logger.debug(f"Applying criteria deletes queue immediately due to combined dataframe size")
1352
- self.apply_results_queue()
1379
+ self._apply_results_if_size_exceeded()
1380
+
1381
+ def mark_stream_started(self, stream_name: str):
1382
+ """
1383
+ Marks a stream as started, this is called automatically per stream when using @managed_inbound_processing.
1384
+ """
1385
+ logger.debug(f"Marking stream {stream_name} as started locally")
1386
+ self._stream_start_times[stream_name] = time.time()
1353
1387
 
1354
1388
  def mark_stream_complete(self, stream_name: str):
1355
1389
  """
@@ -1357,6 +1391,20 @@ class InboundSyncRequest(SyncRequest):
1357
1391
  If @managed_inbound_processing is not used, call this whenever a stream has finished recieving records.
1358
1392
  """
1359
1393
  logger.debug(f"Marking stream {stream_name} as completed locally")
1394
+ if stream_name in self._stream_start_times:
1395
+ start_time = self._stream_start_times[stream_name]
1396
+ duration = time.time() - start_time
1397
+ stream_duration_gauge.set(
1398
+ amount=duration,
1399
+ attributes={
1400
+ "stream_name": stream_name,
1401
+ "sync_run_id": str(self._run_id),
1402
+ "sync_id": str(self._sync_id),
1403
+ "branch_name": str(self._branch_name) if self._branch_name is not None else 'main',
1404
+ "sync_direction": "inbound",
1405
+ "plugin_id": self.plugin_instance.get_manifest().plugin_id,
1406
+ },
1407
+ )
1360
1408
  with self._apply_results_lock:
1361
1409
  self._completed_streams.append(stream_name)
1362
1410
  # dedup just in case it's called twice
@@ -1463,7 +1511,7 @@ class InboundSyncRequest(SyncRequest):
1463
1511
  logger.debug(f"Failure to convert inbound data: {str(exception)}")
1464
1512
  return data
1465
1513
 
1466
- def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]):
1514
+ def _preprocess_results_list(self, stream_name: str, results: List[Dict],is_delete:Union[bool,List[bool]]) -> pandas.DataFrame:
1467
1515
  """
1468
1516
  Creates a dataframe from the enqueued list, ready to upload.
1469
1517
  The result is a dataframe contain all (and only):
@@ -1608,7 +1656,7 @@ class InboundSyncRequest(SyncRequest):
1608
1656
  hash_object = hashlib.sha256(key_string.encode())
1609
1657
  return hash_object.hexdigest()
1610
1658
 
1611
- def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame) -> Optional[str]:
1659
+ def _apply_results_dataframe(self, stream_names: List[str], results_df: pandas.DataFrame):
1612
1660
  """
1613
1661
  Applies results for an inbound sync. The results are staged into a temporary
1614
1662
  table in Snowflake, so that we can make an atomic commit at the end.
@@ -1635,7 +1683,6 @@ class InboundSyncRequest(SyncRequest):
1635
1683
  raise ValueError(
1636
1684
  f"Failed to write results to table {self._full_results_table_name}"
1637
1685
  )
1638
- query_id = self._get_query_id_for_now()
1639
1686
  logger.debug(
1640
1687
  f"Wrote {nrows} rows and {nchunks} chunks to table {self._full_results_table_name}"
1641
1688
  )
@@ -1648,7 +1695,6 @@ class InboundSyncRequest(SyncRequest):
1648
1695
  # )
1649
1696
  for stream_name in stream_names:
1650
1697
  self._results_exist[stream_name] = True
1651
- return query_id
1652
1698
  else:
1653
1699
  logger.debug("Results dataframe is empty, not applying")
1654
1700
 
@@ -2333,12 +2379,8 @@ def __managed_inbound_processing_worker(
2333
2379
  sync_request: InboundSyncRequest = cast(
2334
2380
  InboundSyncRequest, plugin_class_obj._sync_request
2335
2381
  ) # pylint: disable=protected-access
2336
- stream_duration_counter = meter.create_histogram(
2337
- name="omnata.sync_run.stream_duration",
2338
- description="The duration of stream processing",
2339
- unit="s",
2340
- )
2341
- start_time = time.time()
2382
+ if stream.stream_name not in sync_request._stream_start_times:
2383
+ sync_request.mark_stream_started(stream.stream_name)
2342
2384
  # restore the first argument, was originally the dataframe/generator but now it's the appropriately sized dataframe
2343
2385
  try:
2344
2386
  with tracer.start_as_current_span("managed_inbound_processing") as managed_inbound_processing_span:
@@ -2370,19 +2412,6 @@ def __managed_inbound_processing_worker(
2370
2412
  omnata_plugin_logger.error(f"{type(e).__name__} syncing stream {stream.stream_name}",
2371
2413
  exc_info=True,
2372
2414
  extra={'stream_name':stream.stream_name})
2373
- finally:
2374
- duration = time.time() - start_time
2375
- stream_duration_counter.record(
2376
- duration,
2377
- attributes={
2378
- "stream_name": stream.stream_name,
2379
- "sync_run_id": str(sync_request._run_id),
2380
- "sync_id": str(sync_request._sync_id),
2381
- "branch_name": str(sync_request._branch_name) if sync_request._branch_name is not None else 'main',
2382
- "sync_direction": "inbound",
2383
- "plugin_id": plugin_class_obj.get_manifest().plugin_id,
2384
- },
2385
- )
2386
2415
  except queue.Empty:
2387
2416
  logger.debug("streams queue is empty")
2388
2417
  return
@@ -1,55 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: omnata-plugin-runtime
3
- Version: 0.11.6a323
4
- Summary: Classes and common runtime components for building and running Omnata Plugins
5
- License-File: LICENSE
6
- Author: James Weakley
7
- Author-email: james.weakley@omnata.com
8
- Requires-Python: >=3.9,<=3.11
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Requires-Dist: annotated-types (<=0.6.0)
14
- Requires-Dist: certifi (<=2024.8.30)
15
- Requires-Dist: cffi (<=1.16.0)
16
- Requires-Dist: charset-normalizer (<=3.3.2)
17
- Requires-Dist: cryptography (<=43.0.0)
18
- Requires-Dist: filelock (<=3.13.1)
19
- Requires-Dist: idna (<=3.7)
20
- Requires-Dist: jinja2 (>=3.1.2,<=3.1.4)
21
- Requires-Dist: markupsafe (<=2.1.3)
22
- Requires-Dist: numpy (<=2.1.3)
23
- Requires-Dist: opentelemetry-api (<=1.23.0)
24
- Requires-Dist: packaging (<=24.1)
25
- Requires-Dist: pandas (<=2.2.3)
26
- Requires-Dist: platformdirs (<=3.10.0)
27
- Requires-Dist: protobuf (<=4.25.3)
28
- Requires-Dist: pyarrow (<=16.1.0)
29
- Requires-Dist: pycparser (<=2.21)
30
- Requires-Dist: pydantic (>=2,<=2.8.2)
31
- Requires-Dist: pydantic-core (<=2.21.0)
32
- Requires-Dist: pyjwt (<=2.8.0)
33
- Requires-Dist: pyopenssl (<=24.2.1)
34
- Requires-Dist: pytz (<=2024.1)
35
- Requires-Dist: pyyaml (<=6.0.1)
36
- Requires-Dist: requests (>=2,<=2.32.3)
37
- Requires-Dist: setuptools (<=72.1.0)
38
- Requires-Dist: snowflake-connector-python (>=3,<=3.12.0)
39
- Requires-Dist: snowflake-snowpark-python (>=1.20.0,<=1.24.0)
40
- Requires-Dist: snowflake-telemetry-python (<=0.5.0)
41
- Requires-Dist: tenacity (>=8,<=8.2.3)
42
- Requires-Dist: tomlkit (<=0.11.1)
43
- Requires-Dist: urllib3 (<=2.2.2)
44
- Requires-Dist: wheel (<=0.43.0)
45
- Requires-Dist: wrapt (<=1.14.1)
46
- Description-Content-Type: text/markdown
47
-
48
- # omnata-plugin-runtime
49
- This package is a runtime dependency for [Omnata Plugins](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins).
50
-
51
- It contains data classes, interfaces and application logic used to perform plugin operations.
52
-
53
- For instructions on creating plugins, visit our [docs site](https://docs.omnata.com/omnata-product-documentation/omnata-sync-for-snowflake/plugins/creating-plugins).
54
-
55
-
@@ -1,57 +0,0 @@
1
- [tool.poetry]
2
- name = "omnata-plugin-runtime"
3
- version = "0.11.6-a323"
4
- description = "Classes and common runtime components for building and running Omnata Plugins"
5
- authors = ["James Weakley <james.weakley@omnata.com>"]
6
- readme = "README.md"
7
- packages = [{include = "omnata_plugin_runtime", from = "src"}]
8
-
9
- [tool.poetry.dependencies]
10
- python = ">=3.9, <=3.11"
11
- snowflake-snowpark-python = ">=1.20.0,<=1.24.0" # latest version available on Snowflake Anaconda, but allow pinning to 1.20.0 for to_pandas_batches workaround
12
- snowflake-connector-python = "^3, <=3.12.0" # latest version available on Snowflake Anaconda
13
- cryptography = "<=43.0.0"
14
- annotated-types = "<=0.6.0"
15
- pycparser = "<=2.21"
16
- filelock = "<=3.13.1"
17
- pydantic-core = "<=2.21.0"
18
- # had to relax some of these thanks to snowcli pinning newer versions
19
- certifi = "<=2024.8.30" # latest version available on Snowflake Anaconda
20
- charset-normalizer = "<=3.3.2" # latest version available on Snowflake Anaconda
21
- idna = "<=3.7" # latest version available on Snowflake Anaconda
22
- jinja2 = ">=3.1.2,<=3.1.4" # 3.1.4 was latest version available on Snowflake Anaconda
23
- markupsafe = "<=2.1.3" # latest version available on Snowflake Anaconda
24
- numpy = "<=2.1.3" # latest version available on Snowflake Anaconda
25
- packaging = "<=24.1" # latest version available on Snowflake Anaconda
26
- pandas = "<=2.2.3" # latest version available on Snowflake Anaconda
27
- platformdirs = "<=3.10.0" # latest version available on Snowflake Anaconda
28
- pydantic = "^2, <=2.8.2" # latest version available on Snowflake Anaconda
29
- pyjwt = "<=2.8.0" # latest version available on Snowflake Anaconda
30
- pyopenssl = "<=24.2.1" # latest version available on Snowflake Anaconda
31
- pytz = "<=2024.1" # latest version available on Snowflake Anaconda
32
- requests = "^2, <=2.32.3" # latest version available on Snowflake Anaconda
33
- setuptools = "<=72.1.0" # latest version available on Snowflake Anaconda
34
- tomlkit = "<=0.11.1" # latest version available on Snowflake Anaconda
35
- tenacity = "^8, <=8.2.3" # latest version available on Snowflake Anaconda
36
- urllib3 = "<=2.2.2" # latest version available on Snowflake Anaconda
37
- wheel = "<=0.43.0" # latest version available on Snowflake Anaconda
38
- pyyaml = "<=6.0.1" # latest version available on Snowflake Anaconda
39
- cffi = "<=1.16.0" # latest version available on Snowflake Anaconda
40
- pyarrow = "<=16.1.0" # latest version available on Snowflake Anaconda
41
- wrapt = "<=1.14.1" # latest version available on Snowflake Anaconda
42
- opentelemetry-api = "<=1.23.0" # latest version available on Snowflake Anaconda
43
- snowflake-telemetry-python = "<=0.5.0" # latest version available on Snowflake Anaconda
44
- protobuf = "<=4.25.3" # latest version available on Snowflake Anaconda
45
-
46
- [tool.poetry.dev-dependencies]
47
- pytest = "^6.2.4"
48
- deepdiff = "^6"
49
- requests-mock = ">=1.9.3"
50
-
51
- [tool.pytest.ini_options]
52
- addopts = ["--import-mode=importlib"]
53
- testpaths = ["tests"]
54
-
55
- [build-system]
56
- requires = ["poetry-core"]
57
- build-backend = "poetry.core.masonry.api"