port-ocean 0.28.2__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. integrations/_infra/Dockerfile.Deb +6 -1
  2. integrations/_infra/Dockerfile.local +1 -0
  3. port_ocean/clients/port/authentication.py +19 -0
  4. port_ocean/clients/port/client.py +3 -0
  5. port_ocean/clients/port/mixins/actions.py +93 -0
  6. port_ocean/clients/port/mixins/blueprints.py +0 -12
  7. port_ocean/clients/port/mixins/entities.py +79 -44
  8. port_ocean/clients/port/mixins/integrations.py +7 -2
  9. port_ocean/config/settings.py +35 -3
  10. port_ocean/context/ocean.py +7 -5
  11. port_ocean/core/defaults/initialize.py +12 -5
  12. port_ocean/core/event_listener/__init__.py +7 -0
  13. port_ocean/core/event_listener/actions_only.py +42 -0
  14. port_ocean/core/event_listener/base.py +4 -1
  15. port_ocean/core/event_listener/factory.py +18 -9
  16. port_ocean/core/event_listener/http.py +4 -3
  17. port_ocean/core/event_listener/kafka.py +3 -2
  18. port_ocean/core/event_listener/once.py +5 -2
  19. port_ocean/core/event_listener/polling.py +4 -3
  20. port_ocean/core/event_listener/webhooks_only.py +3 -2
  21. port_ocean/core/handlers/actions/__init__.py +7 -0
  22. port_ocean/core/handlers/actions/abstract_executor.py +150 -0
  23. port_ocean/core/handlers/actions/execution_manager.py +434 -0
  24. port_ocean/core/handlers/entity_processor/jq_entity_processor.py +479 -17
  25. port_ocean/core/handlers/entity_processor/jq_input_evaluator.py +137 -0
  26. port_ocean/core/handlers/port_app_config/models.py +4 -2
  27. port_ocean/core/handlers/resync_state_updater/updater.py +4 -2
  28. port_ocean/core/handlers/webhook/abstract_webhook_processor.py +16 -0
  29. port_ocean/core/handlers/webhook/processor_manager.py +30 -12
  30. port_ocean/core/integrations/mixins/sync_raw.py +10 -5
  31. port_ocean/core/integrations/mixins/utils.py +250 -29
  32. port_ocean/core/models.py +35 -2
  33. port_ocean/core/utils/utils.py +16 -5
  34. port_ocean/exceptions/execution_manager.py +22 -0
  35. port_ocean/helpers/metric/metric.py +1 -1
  36. port_ocean/helpers/retry.py +4 -40
  37. port_ocean/log/logger_setup.py +2 -2
  38. port_ocean/ocean.py +31 -5
  39. port_ocean/tests/clients/port/mixins/test_entities.py +71 -5
  40. port_ocean/tests/core/event_listener/test_kafka.py +14 -7
  41. port_ocean/tests/core/handlers/actions/test_execution_manager.py +837 -0
  42. port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +932 -1
  43. port_ocean/tests/core/handlers/entity_processor/test_jq_input_evaluator.py +932 -0
  44. port_ocean/tests/core/handlers/webhook/test_processor_manager.py +3 -1
  45. port_ocean/tests/core/utils/test_get_port_diff.py +164 -0
  46. port_ocean/tests/helpers/test_retry.py +241 -1
  47. port_ocean/tests/utils/test_cache.py +240 -0
  48. port_ocean/utils/cache.py +45 -9
  49. {port_ocean-0.28.2.dist-info → port_ocean-0.29.0.dist-info}/METADATA +2 -1
  50. {port_ocean-0.28.2.dist-info → port_ocean-0.29.0.dist-info}/RECORD +53 -43
  51. {port_ocean-0.28.2.dist-info → port_ocean-0.29.0.dist-info}/LICENSE.md +0 -0
  52. {port_ocean-0.28.2.dist-info → port_ocean-0.29.0.dist-info}/WHEEL +0 -0
  53. {port_ocean-0.28.2.dist-info → port_ocean-0.29.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,434 @@
1
+ import time
2
+ from typing import Dict, Set
3
+ from loguru import logger
4
+ from port_ocean.core.models import (
5
+ ActionRun,
6
+ RunStatus,
7
+ )
8
+ import asyncio
9
+ from port_ocean.core.handlers.actions.abstract_executor import AbstractExecutor
10
+ from port_ocean.core.handlers.queue.abstract_queue import AbstractQueue
11
+ from port_ocean.core.handlers.queue.local_queue import LocalQueue
12
+ from port_ocean.core.handlers.webhook.processor_manager import (
13
+ LiveEventsProcessorManager,
14
+ )
15
+ from port_ocean.context.ocean import ocean
16
+ from port_ocean.core.models import IntegrationFeatureFlag
17
+ from port_ocean.exceptions.execution_manager import (
18
+ DuplicateActionExecutorError,
19
+ PartitionKeyNotFoundError,
20
+ RunAlreadyAcknowledgedError,
21
+ )
22
+ from port_ocean.utils.signal import SignalHandler
23
+
24
+ RATE_LIMIT_MAX_BACKOFF_SECONDS = 10
25
+ GLOBAL_SOURCE = "__global__"
26
+
27
+
28
+ class ExecutionManager:
29
+ """
30
+ Orchestrates action executors, polling, and webhook handlers for integration actions.
31
+
32
+ The manager uses a queue-based system with support for:
33
+ - Global queue for non-partitioned actions
34
+ - Partition-specific queues for actions requiring sequential execution
35
+ - Round-robin worker distribution
36
+ - Deduplication of runs
37
+ - High watermark-based flow control
38
+
39
+ Attributes:
40
+ _webhook_manager (LiveEventsProcessorManager): Manages webhook processors for async updates
41
+ _polling_task (asyncio.Task[None] | None): Task that polls for new action runs
42
+ _workers_pool (set[asyncio.Task[None]]): Pool of worker tasks processing runs
43
+ _actions_executors (Dict[str, AbstractExecutor]): Registered action executors
44
+ _is_shutting_down (asyncio.Event): Event flag for graceful shutdown
45
+ _global_queue (LocalQueue[ActionRun]): Queue for non-partitioned actions
46
+ _partition_queues (Dict[str, AbstractQueue[ActionRun]]): Queues for partitioned actions
47
+ _deduplication_set (Set[str]): Set of run IDs for deduplication
48
+ _queues_locks (Dict[str, asyncio.Lock]): Locks for queue access synchronization
49
+ _active_sources (AbstractQueue[str]): Queue of active sources (global or partition-specific) used for round-robin distribution of work among workers
50
+ _workers_count (int): Number of workers to start
51
+ _high_watermark (int): Maximum total runs in all queues
52
+ _poll_check_interval_seconds (int): Seconds between polling attempts
53
+ _visibility_timeout_ms (int): Visibility timeout for runs
54
+ _max_wait_seconds_before_shutdown (float): Maximum wait time during shutdown
55
+
56
+ Example:
57
+ ```python
58
+ # Create and configure execution manager
59
+ manager = ExecutionManager(
60
+ webhook_manager=webhook_mgr,
61
+ signal_handler=signal_handler,
62
+ workers_count=3,
63
+ runs_buffer_high_watermark=1000,
64
+ poll_check_interval_seconds=5,
65
+ visibility_timeout_ms=30000,
66
+ max_wait_seconds_before_shutdown=30.0
67
+ )
68
+
69
+ # Register action executors
70
+ manager.register_executor(MyActionExecutor())
71
+
72
+ # Start processing
73
+ await manager.start_processing_action_runs()
74
+ ```
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ webhook_manager: LiveEventsProcessorManager,
80
+ signal_handler: SignalHandler,
81
+ runs_buffer_high_watermark: int,
82
+ workers_count: int,
83
+ poll_check_interval_seconds: int,
84
+ visibility_timeout_ms: int,
85
+ max_wait_seconds_before_shutdown: float,
86
+ ):
87
+ self._webhook_manager = webhook_manager
88
+ self._polling_task: asyncio.Task[None] | None = None
89
+ self._workers_pool: set[asyncio.Task[None]] = set()
90
+ self._actions_executors: Dict[str, AbstractExecutor] = {}
91
+ self._is_shutting_down = asyncio.Event()
92
+ self._global_queue = LocalQueue[ActionRun]()
93
+ self._partition_queues: Dict[str, AbstractQueue[ActionRun]] = {}
94
+ self._deduplication_set: Set[str] = set()
95
+ self._queues_locks: Dict[str, asyncio.Lock] = {GLOBAL_SOURCE: asyncio.Lock()}
96
+ self._active_sources: AbstractQueue[str] = LocalQueue[str]()
97
+ self._workers_count: int = workers_count
98
+ self._high_watermark: int = runs_buffer_high_watermark
99
+ self._poll_check_interval_seconds: int = poll_check_interval_seconds
100
+ self._visibility_timeout_ms: int = visibility_timeout_ms
101
+ self._max_wait_seconds_before_shutdown: float = max_wait_seconds_before_shutdown
102
+
103
+ signal_handler.register(self.shutdown)
104
+
105
+ def register_executor(self, executor: AbstractExecutor) -> None:
106
+ """
107
+ Register an action executor with the execution manager.
108
+ """
109
+ action_name = executor.ACTION_NAME
110
+ if action_name in self._actions_executors:
111
+ raise DuplicateActionExecutorError(
112
+ f"Executor for action '{action_name}' is already registered"
113
+ )
114
+
115
+ webhook_processor_cls = executor.WEBHOOK_PROCESSOR_CLASS
116
+ if webhook_processor_cls:
117
+ self._webhook_manager.register_processor(
118
+ executor.WEBHOOK_PATH,
119
+ webhook_processor_cls,
120
+ )
121
+ logger.info(
122
+ "Registered executor webhook processor",
123
+ action=action_name,
124
+ webhook_path=executor.WEBHOOK_PATH,
125
+ )
126
+
127
+ self._actions_executors[action_name] = executor
128
+ logger.info("Registered action executor", action=action_name)
129
+
130
+ async def start_processing_action_runs(self) -> None:
131
+ """
132
+ Start polling and processing action runs for all registered actions.
133
+ """
134
+ flags = await ocean.port_client.get_organization_feature_flags()
135
+ if IntegrationFeatureFlag.OCEAN_ACTIONS_PROCESSING_ENABLED not in flags:
136
+ logger.warning(
137
+ "Actions processing is not allowed for your organization, skipping actions processing"
138
+ )
139
+ return
140
+
141
+ if not await ocean.port_client.auth.is_machine_user():
142
+ logger.warning(
143
+ "Actions processing is allowed only for machine users, skipping actions processing"
144
+ )
145
+ return
146
+
147
+ self._polling_task = asyncio.create_task(self._poll_action_runs())
148
+
149
+ workers_count = max(1, self._workers_count)
150
+ for _ in range(workers_count):
151
+ task = asyncio.create_task(self._process_actions_runs())
152
+ self._workers_pool.add(task)
153
+ task.add_done_callback(self._workers_pool.discard)
154
+
155
+ async def _poll_action_runs(self) -> None:
156
+ """
157
+ Poll action runs for all registered actions.
158
+ Respects high watermark for queue size management.
159
+ """
160
+ while True:
161
+ try:
162
+ # Yield control to the event loop to handle any pending cancellation requests.
163
+ await asyncio.sleep(0)
164
+ queues_size = await self._get_queues_size()
165
+ if queues_size >= self._high_watermark:
166
+ logger.info(
167
+ "Queue size at high watermark, waiting for processing to catch up",
168
+ current_size=queues_size,
169
+ high_watermark=self._high_watermark,
170
+ )
171
+ await asyncio.sleep(self._poll_check_interval_seconds)
172
+ continue
173
+
174
+ poll_limit = self._high_watermark - queues_size
175
+ runs: list[ActionRun] = await ocean.port_client.claim_pending_runs(
176
+ limit=poll_limit,
177
+ visibility_timeout_ms=self._visibility_timeout_ms,
178
+ )
179
+
180
+ if not runs:
181
+ logger.debug(
182
+ "No runs to process, waiting for next poll",
183
+ current_size=queues_size,
184
+ high_watermark=self._high_watermark,
185
+ )
186
+ await asyncio.sleep(self._poll_check_interval_seconds)
187
+ continue
188
+
189
+ for run in runs:
190
+ try:
191
+ action_type = run.payload.integrationActionType
192
+ if action_type not in self._actions_executors:
193
+ logger.warning(
194
+ "No Executors registered to handle this action, skipping run...",
195
+ action_type=action_type,
196
+ run_id=run.id,
197
+ )
198
+ continue
199
+
200
+ if run.id in self._deduplication_set:
201
+ logger.info(
202
+ "Run is already being processed, skipping...",
203
+ run_id=run.id,
204
+ )
205
+ continue
206
+
207
+ partition_key = await self._actions_executors[
208
+ action_type
209
+ ]._get_partition_key(run)
210
+
211
+ queue_name = (
212
+ GLOBAL_SOURCE
213
+ if not partition_key
214
+ else f"{action_type}:{partition_key}"
215
+ )
216
+ await self._add_run_to_queue(run, queue_name)
217
+ except PartitionKeyNotFoundError as e:
218
+ logger.warning(
219
+ "Partition key not found in invocation payload, skipping run...",
220
+ run_id=run.id,
221
+ action_type=action_type,
222
+ error=e,
223
+ )
224
+ except Exception as e:
225
+ logger.exception(
226
+ "Error adding run to queue",
227
+ run_id=run.id,
228
+ action_type=action_type,
229
+ error=e,
230
+ )
231
+ except Exception as e:
232
+ logger.exception(
233
+ "Unexpected error in poll action runs, will attempt to re-poll",
234
+ error=e,
235
+ )
236
+
237
+ async def _get_queues_size(self) -> int:
238
+ """
239
+ Get the total size of all queues (global and partition queues).
240
+ """
241
+ global_size = await self._global_queue.size()
242
+ partition_sizes = []
243
+ for queue in self._partition_queues.values():
244
+ partition_sizes.append(await queue.size())
245
+ return global_size + sum(partition_sizes)
246
+
247
+ async def _add_run_to_queue(
248
+ self,
249
+ run: ActionRun,
250
+ queue_name: str,
251
+ ) -> None:
252
+ """
253
+ Add a run to the queue, if the queue is empty, add the source to the active sources.
254
+ """
255
+ if queue_name != GLOBAL_SOURCE and queue_name not in self._partition_queues:
256
+ self._partition_queues[queue_name] = LocalQueue()
257
+ self._queues_locks[queue_name] = asyncio.Lock()
258
+
259
+ queue = (
260
+ self._global_queue
261
+ if queue_name == GLOBAL_SOURCE
262
+ else self._partition_queues[queue_name]
263
+ )
264
+ async with self._queues_locks[queue_name]:
265
+ if await queue.size() == 0:
266
+ await self._active_sources.put(queue_name)
267
+ self._deduplication_set.add(run.id)
268
+ logger.info(f"Adding run to queue {queue_name}", run_id=run.id)
269
+ await queue.put(run)
270
+
271
+ async def _add_source_if_not_empty(self, source_name: str) -> None:
272
+ """
273
+ Add a source back to the active sources if the queue is not empty.
274
+ """
275
+ async with self._queues_locks[source_name]:
276
+ queue = (
277
+ self._global_queue
278
+ if source_name == GLOBAL_SOURCE
279
+ else self._partition_queues[source_name]
280
+ )
281
+ if await queue.size() > 0:
282
+ await self._active_sources.put(source_name)
283
+
284
+ async def _process_actions_runs(self) -> None:
285
+ """
286
+ Round-robin worker across global and partitions queues.
287
+ """
288
+ while not self._is_shutting_down.is_set():
289
+ try:
290
+ # Enable graceful worker shutdown when there are no active sources to process
291
+ # Using asyncio.Queue.get without a timeout would block indefinitely if active sources are empty
292
+ try:
293
+ source = await asyncio.wait_for(
294
+ self._active_sources.get(),
295
+ timeout=self._max_wait_seconds_before_shutdown / 3,
296
+ )
297
+ except asyncio.TimeoutError:
298
+ continue
299
+
300
+ if source == GLOBAL_SOURCE:
301
+ await self._handle_global_queue_once()
302
+ else:
303
+ await self._handle_partition_queue_once(source)
304
+ except Exception as e:
305
+ logger.exception("Worker processing error", source=source, error=e)
306
+
307
+ async def _handle_global_queue_once(self) -> None:
308
+ try:
309
+ async with self._queues_locks[GLOBAL_SOURCE]:
310
+ run = await self._global_queue.get()
311
+ if run.id in self._deduplication_set:
312
+ self._deduplication_set.remove(run.id)
313
+
314
+ await self._add_source_if_not_empty(GLOBAL_SOURCE)
315
+ await self._execute_run(run)
316
+ finally:
317
+ await self._global_queue.commit()
318
+
319
+ async def _handle_partition_queue_once(self, partition_name: str) -> None:
320
+ """
321
+ Try to process a single run from the given partition queue.
322
+ Returns True if work was done, False otherwise.
323
+ """
324
+ queue = self._partition_queues[partition_name]
325
+ try:
326
+ async with self._queues_locks[partition_name]:
327
+ run = await queue.get()
328
+ if run.id in self._deduplication_set:
329
+ self._deduplication_set.remove(run.id)
330
+ await self._execute_run(run)
331
+ finally:
332
+ await queue.commit()
333
+ await self._add_source_if_not_empty(partition_name)
334
+
335
+ async def _execute_run(self, run: ActionRun) -> None:
336
+ """
337
+ Execute a run using its registered executor.
338
+ """
339
+ with logger.contextualize(
340
+ run_id=run.id, action=run.payload.integrationActionType
341
+ ):
342
+ error_summary: str | None = None
343
+ try:
344
+ executor = self._actions_executors[run.payload.integrationActionType]
345
+ while (
346
+ await executor.is_close_to_rate_limit()
347
+ and not self._is_shutting_down.is_set()
348
+ ):
349
+ backoff_seconds = min(
350
+ RATE_LIMIT_MAX_BACKOFF_SECONDS,
351
+ await executor.get_remaining_seconds_until_rate_limit(),
352
+ )
353
+ logger.info(
354
+ "Encountered rate limit, will attempt to re-run in {backoff_seconds} seconds",
355
+ backoff_seconds=backoff_seconds,
356
+ )
357
+ await ocean.port_client.post_run_log(
358
+ run.id,
359
+ f"Delayed due to low remaining rate limit. Will attempt to re-run in {backoff_seconds} seconds",
360
+ )
361
+ await asyncio.sleep(backoff_seconds)
362
+
363
+ if self._is_shutting_down.is_set():
364
+ logger.warning(
365
+ "Shutting down execution manager, skipping execution"
366
+ )
367
+ return
368
+
369
+ await ocean.port_client.acknowledge_run(run.id)
370
+ logger.debug("Run acknowledged successfully")
371
+ except RunAlreadyAcknowledgedError:
372
+ logger.warning(
373
+ "Run already being processed by another worker, skipping execution",
374
+ )
375
+ return
376
+ except Exception as e:
377
+ logger.error(
378
+ "Error occurred while trying to trigger run execution",
379
+ error=e,
380
+ )
381
+ error_summary = "Failed to trigger run execution"
382
+
383
+ try:
384
+ start_time = time.monotonic()
385
+ await executor.execute(run)
386
+ logger.info(
387
+ "Run executed successfully",
388
+ elapsed_ms=(time.monotonic() - start_time) * 1000,
389
+ )
390
+ except Exception as e:
391
+ logger.exception("Error executing run", error=e)
392
+ error_summary = f"Failed to execute run: {str(e)}"
393
+
394
+ if error_summary:
395
+ await ocean.port_client.patch_run(
396
+ run.id,
397
+ {
398
+ "summary": error_summary,
399
+ "status": RunStatus.FAILURE,
400
+ },
401
+ should_raise=False,
402
+ )
403
+
404
+ async def _gracefully_cancel_task(self, task: asyncio.Task[None] | None) -> None:
405
+ """
406
+ Gracefully cancel a task.
407
+ """
408
+ if task and not task.done():
409
+ task.cancel()
410
+ try:
411
+ await task
412
+ except asyncio.CancelledError:
413
+ pass
414
+
415
+ async def shutdown(self) -> None:
416
+ """
417
+ Gracefully shutdown poller and all action queue workers.
418
+ """
419
+ logger.warning("Shutting down execution manager")
420
+
421
+ self._is_shutting_down.set()
422
+ logger.info("Waiting for workers to complete their current tasks...")
423
+
424
+ try:
425
+ await asyncio.wait_for(
426
+ asyncio.gather(
427
+ self._gracefully_cancel_task(self._polling_task),
428
+ *(worker for worker in self._workers_pool),
429
+ ),
430
+ timeout=self._max_wait_seconds_before_shutdown,
431
+ )
432
+ logger.info("All workers completed gracefully")
433
+ except asyncio.TimeoutError:
434
+ logger.warning("Shutdown timed out waiting for workers to complete")