omnata-plugin-runtime 0.11.7a324__tar.gz → 0.11.7a325__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnata-plugin-runtime
3
- Version: 0.11.7a324
3
+ Version: 0.11.7a325
4
4
  Summary: Classes and common runtime components for building and running Omnata Plugins
5
5
  License-File: LICENSE
6
6
  Author: James Weakley
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "omnata-plugin-runtime"
3
- version = "0.11.7-a324"
3
+ version = "0.11.7-a325"
4
4
  description = "Classes and common runtime components for building and running Omnata Plugins"
5
5
  authors = ["James Weakley <james.weakley@omnata.com>"]
6
6
  readme = "README.md"
@@ -49,6 +49,11 @@ from snowflake.snowpark.functions import col
49
49
  from tenacity import Retrying, stop_after_attempt, wait_fixed, retry_if_exception_message
50
50
 
51
51
  from .logging import OmnataPluginLogHandler, logger, tracer, meter
52
+ stream_duration_gauge = meter.create_gauge(
53
+ name="omnata.sync_run.stream_duration",
54
+ description="The duration of stream processing",
55
+ unit="s",
56
+ )
52
57
  from opentelemetry import context
53
58
  import math
54
59
  import numpy as np
@@ -1097,6 +1102,8 @@ class InboundSyncRequest(SyncRequest):
1097
1102
  self.state_register_table_name = results_table.get_fully_qualified_state_register_table_name()
1098
1103
  # this is keyed on stream name, each containing a list of dataframes and state updates mixed
1099
1104
  self._apply_results: Dict[str, List[pandas.DataFrame | Dict]] = {}
1105
+ # track the start times of each stream, so we can calculate durations. The int is a epoch (time.time()) value
1106
+ self._stream_start_times: Dict[str, int] = {}
1100
1107
 
1101
1108
  def apply_results_queue(self):
1102
1109
  """
@@ -1350,6 +1357,13 @@ class InboundSyncRequest(SyncRequest):
1350
1357
  if sum([x.memory_usage(index=True).sum() for x in all_dfs if isinstance(x, pandas.DataFrame)]) > 200000000:
1351
1358
  logger.debug(f"Applying criteria deletes queue immediately due to combined dataframe size")
1352
1359
  self.apply_results_queue()
1360
+
1361
+ def mark_stream_started(self, stream_name: str):
1362
+ """
1363
+ Marks a stream as started, this is called automatically per stream when using @managed_inbound_processing.
1364
+ """
1365
+ logger.debug(f"Marking stream {stream_name} as started locally")
1366
+ self._stream_start_times[stream_name] = time.time()
1353
1367
 
1354
1368
  def mark_stream_complete(self, stream_name: str):
1355
1369
  """
@@ -1357,6 +1371,20 @@ class InboundSyncRequest(SyncRequest):
1357
1371
  If @managed_inbound_processing is not used, call this whenever a stream has finished recieving records.
1358
1372
  """
1359
1373
  logger.debug(f"Marking stream {stream_name} as completed locally")
1374
+ if stream_name in self._stream_start_times:
1375
+ start_time = self._stream_start_times[stream_name]
1376
+ duration = time.time() - start_time
1377
+ stream_duration_gauge.set(
1378
+ amount=duration,
1379
+ attributes={
1380
+ "stream_name": stream_name,
1381
+ "sync_run_id": str(self._run_id),
1382
+ "sync_id": str(self._sync_id),
1383
+ "branch_name": str(self._branch_name) if self._branch_name is not None else 'main',
1384
+ "sync_direction": "inbound",
1385
+ "plugin_id": self.plugin_instance.get_manifest().plugin_id,
1386
+ },
1387
+ )
1360
1388
  with self._apply_results_lock:
1361
1389
  self._completed_streams.append(stream_name)
1362
1390
  # dedup just in case it's called twice
@@ -2333,12 +2361,8 @@ def __managed_inbound_processing_worker(
2333
2361
  sync_request: InboundSyncRequest = cast(
2334
2362
  InboundSyncRequest, plugin_class_obj._sync_request
2335
2363
  ) # pylint: disable=protected-access
2336
- stream_duration_gauge = meter.create_gauge(
2337
- name="omnata.sync_run.stream_duration",
2338
- description="The duration of stream processing",
2339
- unit="s",
2340
- )
2341
- start_time = time.time()
2364
+ if stream.stream_name not in sync_request._stream_start_times:
2365
+ sync_request.mark_stream_started(stream.stream_name)
2342
2366
  # restore the first argument, was originally the dataframe/generator but now it's the appropriately sized dataframe
2343
2367
  try:
2344
2368
  with tracer.start_as_current_span("managed_inbound_processing") as managed_inbound_processing_span:
@@ -2370,19 +2394,6 @@ def __managed_inbound_processing_worker(
2370
2394
  omnata_plugin_logger.error(f"{type(e).__name__} syncing stream {stream.stream_name}",
2371
2395
  exc_info=True,
2372
2396
  extra={'stream_name':stream.stream_name})
2373
- finally:
2374
- duration = time.time() - start_time
2375
- stream_duration_gauge.set(
2376
- amount=duration,
2377
- attributes={
2378
- "stream_name": stream.stream_name,
2379
- "sync_run_id": str(sync_request._run_id),
2380
- "sync_id": str(sync_request._sync_id),
2381
- "branch_name": str(sync_request._branch_name) if sync_request._branch_name is not None else 'main',
2382
- "sync_direction": "inbound",
2383
- "plugin_id": plugin_class_obj.get_manifest().plugin_id,
2384
- },
2385
- )
2386
2397
  except queue.Empty:
2387
2398
  logger.debug("streams queue is empty")
2388
2399
  return