port-ocean 0.28.5__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- integrations/_infra/Dockerfile.Deb +1 -0
- integrations/_infra/Dockerfile.local +1 -0
- port_ocean/clients/port/authentication.py +19 -0
- port_ocean/clients/port/client.py +3 -0
- port_ocean/clients/port/mixins/actions.py +93 -0
- port_ocean/clients/port/mixins/blueprints.py +0 -12
- port_ocean/clients/port/mixins/entities.py +79 -44
- port_ocean/clients/port/mixins/integrations.py +7 -2
- port_ocean/config/settings.py +35 -3
- port_ocean/context/ocean.py +7 -5
- port_ocean/core/defaults/initialize.py +12 -5
- port_ocean/core/event_listener/__init__.py +7 -0
- port_ocean/core/event_listener/actions_only.py +42 -0
- port_ocean/core/event_listener/base.py +4 -1
- port_ocean/core/event_listener/factory.py +18 -9
- port_ocean/core/event_listener/http.py +4 -3
- port_ocean/core/event_listener/kafka.py +3 -2
- port_ocean/core/event_listener/once.py +5 -2
- port_ocean/core/event_listener/polling.py +4 -3
- port_ocean/core/event_listener/webhooks_only.py +3 -2
- port_ocean/core/handlers/actions/__init__.py +7 -0
- port_ocean/core/handlers/actions/abstract_executor.py +150 -0
- port_ocean/core/handlers/actions/execution_manager.py +434 -0
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +479 -17
- port_ocean/core/handlers/entity_processor/jq_input_evaluator.py +137 -0
- port_ocean/core/handlers/port_app_config/models.py +4 -2
- port_ocean/core/handlers/webhook/abstract_webhook_processor.py +16 -0
- port_ocean/core/handlers/webhook/processor_manager.py +30 -12
- port_ocean/core/integrations/mixins/sync_raw.py +4 -4
- port_ocean/core/integrations/mixins/utils.py +250 -29
- port_ocean/core/models.py +35 -2
- port_ocean/core/utils/utils.py +16 -5
- port_ocean/exceptions/execution_manager.py +22 -0
- port_ocean/helpers/retry.py +4 -40
- port_ocean/log/logger_setup.py +2 -2
- port_ocean/ocean.py +30 -4
- port_ocean/tests/clients/port/mixins/test_entities.py +71 -5
- port_ocean/tests/core/event_listener/test_kafka.py +14 -7
- port_ocean/tests/core/handlers/actions/test_execution_manager.py +837 -0
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +932 -1
- port_ocean/tests/core/handlers/entity_processor/test_jq_input_evaluator.py +932 -0
- port_ocean/tests/core/handlers/webhook/test_processor_manager.py +3 -1
- port_ocean/tests/core/utils/test_get_port_diff.py +164 -0
- port_ocean/tests/helpers/test_retry.py +241 -1
- port_ocean/tests/utils/test_cache.py +240 -0
- port_ocean/utils/cache.py +45 -9
- {port_ocean-0.28.5.dist-info → port_ocean-0.29.0.dist-info}/METADATA +2 -1
- {port_ocean-0.28.5.dist-info → port_ocean-0.29.0.dist-info}/RECORD +51 -41
- {port_ocean-0.28.5.dist-info → port_ocean-0.29.0.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.28.5.dist-info → port_ocean-0.29.0.dist-info}/WHEEL +0 -0
- {port_ocean-0.28.5.dist-info → port_ocean-0.29.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
# This file is used to classify the input that a jq expression to run on.
|
|
5
|
+
# It is used to determine if the jq expression can be executed without providing any JSON input (const expressions)
|
|
6
|
+
# or on a single item (in items to parse situation)
|
|
7
|
+
# or on all the data
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InputClassifyingResult(Enum):
|
|
11
|
+
NONE = 1
|
|
12
|
+
SINGLE = 2
|
|
13
|
+
ALL = 3
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Functions/filters that (even without ".") still require/assume input
|
|
17
|
+
_INPUT_DEPENDENT_FUNCS = r"""
|
|
18
|
+
\b(
|
|
19
|
+
map|select|reverse|sort|sort_by|unique|unique_by|group_by|flatten|transpose|
|
|
20
|
+
split|explode|join|add|length|has|in|index|indices|contains|
|
|
21
|
+
paths|leaf_paths|keys|keys_unsorted|values|to_entries|with_entries|from_entries|
|
|
22
|
+
del|delpaths|walk|reduce|foreach|input|inputs|limit|first|last|nth|
|
|
23
|
+
while|until|recurse|recurse_down|bsearch|combinations|permutations
|
|
24
|
+
)\b
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
_INPUT_DEPENDENT_RE = re.compile(_INPUT_DEPENDENT_FUNCS, re.VERBOSE)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# String literal handling (jq uses double quotes for strings)
|
|
31
|
+
_STRING_LITERAL_RE = re.compile(r'"(?:\\.|[^"\\])*"')
|
|
32
|
+
_STRING_ONLY_RE = re.compile(r'^\s*"(?:\\.|[^"\\])*"\s*$')
|
|
33
|
+
_NUMBER_ONLY_RE = re.compile(r"^\s*-?\d+(\.\d+)?\s*$")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _mask_strings(expr: str) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Replace string literals with 'S' strings so '.' inside quotes don't count.
|
|
39
|
+
Example:
|
|
40
|
+
- '"this is a string"' ---> 'S'
|
|
41
|
+
- '"sting" + .field'. ---> 'S + .field'
|
|
42
|
+
"""
|
|
43
|
+
return _STRING_LITERAL_RE.sub("S", expr)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _mask_numbers(expr: str) -> str:
|
|
47
|
+
"""
|
|
48
|
+
Replace number literals with 'N' so decimal points in numbers don't count as input references.
|
|
49
|
+
Example:
|
|
50
|
+
- '3.14' ---> 'N'
|
|
51
|
+
- '42 + 3.14' ---> 'N + N'
|
|
52
|
+
"""
|
|
53
|
+
# Pattern to match numbers (integers and decimals, with optional sign)
|
|
54
|
+
number_pattern = re.compile(r"[-+]?\d+(?:\.\d+)?")
|
|
55
|
+
return number_pattern.sub("N", expr)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def can_expression_run_with_no_input(selector_query: str) -> bool:
|
|
59
|
+
"""
|
|
60
|
+
Returns True if the jq expression can be executed without providing any JSON input.
|
|
61
|
+
Rules:
|
|
62
|
+
- Whitespace-only => No Input Required
|
|
63
|
+
- A pure string literal => No Input Required (even if it contains '.')
|
|
64
|
+
- After masking strings, if it contains '.' => Input Required
|
|
65
|
+
- Disallow known input-dependent functions (functions that require input)
|
|
66
|
+
- After masking strings, if it contains only operators and numbers and 'S' => No Input Required
|
|
67
|
+
- Allow null/true/false/number/range/empty, and array/object literals that
|
|
68
|
+
don't reference input (no '.' after masking strings) => No Input Required
|
|
69
|
+
Example:
|
|
70
|
+
- blueprint: '"newRelicService"' in mapping, selector_query param would be '"newRelicService"' => No Input Required
|
|
71
|
+
"""
|
|
72
|
+
s = selector_query.strip()
|
|
73
|
+
if s == "":
|
|
74
|
+
return True # whitespace-only
|
|
75
|
+
|
|
76
|
+
# Pure string literal is nullary
|
|
77
|
+
if _STRING_ONLY_RE.match(s):
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
# First mask strings, then mask numbers to prevent decimal points in numbers from being treated as input references
|
|
81
|
+
masked = _mask_strings(s).strip()
|
|
82
|
+
masked = _mask_numbers(masked).strip()
|
|
83
|
+
|
|
84
|
+
# If it contains any known input-dependent functions, don't shortcut
|
|
85
|
+
if _INPUT_DEPENDENT_RE.search(masked):
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
# If it contains only operators and 'S'/'N', it can be executed with no input
|
|
89
|
+
# Example:
|
|
90
|
+
# - '"abc" + "def"' ---> 'S + S' => No Input Required
|
|
91
|
+
# - '3.14 + 2.5' ---> 'N + N' => No Input Required
|
|
92
|
+
# if re.fullmatch(
|
|
93
|
+
# r"(?:S|N)(?:\s*[+\-*/]\s*(?:S|N))*",
|
|
94
|
+
# masked,
|
|
95
|
+
# ):
|
|
96
|
+
# return True
|
|
97
|
+
|
|
98
|
+
if "." not in masked:
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _can_expression_run_on_single_item(expr: str, key: str) -> bool:
|
|
105
|
+
"""
|
|
106
|
+
Detect `.key` outside of quotes, as a standalone path segment beginning
|
|
107
|
+
after a non-word boundary (start, space, |, (, [, {, , or :) and not part
|
|
108
|
+
of `.something.key`.
|
|
109
|
+
assuming key = 'item'
|
|
110
|
+
Examples:
|
|
111
|
+
- .item.yaeli => true
|
|
112
|
+
- map(.item.yaeli) => true
|
|
113
|
+
- .body.item => false
|
|
114
|
+
"""
|
|
115
|
+
if not key:
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
masked = _mask_strings(expr)
|
|
119
|
+
masked = _mask_numbers(masked)
|
|
120
|
+
pattern = re.compile(rf"(?<![A-Za-z0-9_])\.{re.escape(key)}(?![A-Za-z0-9_])")
|
|
121
|
+
return bool(pattern.search(masked))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def classify_input(
|
|
125
|
+
selector_query: str, single_item_key: str | None = None
|
|
126
|
+
) -> InputClassifyingResult:
|
|
127
|
+
"""
|
|
128
|
+
Returns the input evaluation result for the jq expression.
|
|
129
|
+
Conservative: requires NO '.' and must match a known nullary-safe pattern.
|
|
130
|
+
"""
|
|
131
|
+
if can_expression_run_with_no_input(selector_query):
|
|
132
|
+
return InputClassifyingResult.NONE
|
|
133
|
+
if single_item_key and _can_expression_run_on_single_item(
|
|
134
|
+
selector_query, single_item_key
|
|
135
|
+
):
|
|
136
|
+
return InputClassifyingResult.SINGLE
|
|
137
|
+
return InputClassifyingResult.ALL
|
|
@@ -29,7 +29,9 @@ class EntityMapping(BaseModel):
|
|
|
29
29
|
|
|
30
30
|
@property
|
|
31
31
|
def is_using_search_identifier(self) -> bool:
|
|
32
|
-
return isinstance(self.identifier, dict)
|
|
32
|
+
return isinstance(self.identifier, dict) or isinstance(
|
|
33
|
+
self.identifier, IngestSearchQuery
|
|
34
|
+
)
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
class MappingsConfig(BaseModel):
|
|
@@ -39,7 +41,7 @@ class MappingsConfig(BaseModel):
|
|
|
39
41
|
class PortResourceConfig(BaseModel):
|
|
40
42
|
entity: MappingsConfig
|
|
41
43
|
items_to_parse: str | None = Field(alias="itemsToParse")
|
|
42
|
-
items_to_parse_name: str
|
|
44
|
+
items_to_parse_name: str = Field(alias="itemsToParseName", default="item")
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
class Selector(BaseModel):
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import StrEnum
|
|
2
3
|
from loguru import logger
|
|
3
4
|
|
|
4
5
|
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
@@ -12,6 +13,17 @@ from .webhook_event import (
|
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
class WebhookProcessorType(StrEnum):
|
|
17
|
+
"""Type of webhook processor"""
|
|
18
|
+
|
|
19
|
+
# For action-related webhooks
|
|
20
|
+
# (e.g. update finished action using the workflow runs webhook)
|
|
21
|
+
ACTION = "action"
|
|
22
|
+
# For regular webhooks
|
|
23
|
+
# (e.g. repository events that should be reflected as Entities in Port)
|
|
24
|
+
WEBHOOK = "webhook"
|
|
25
|
+
|
|
26
|
+
|
|
15
27
|
class AbstractWebhookProcessor(ABC):
|
|
16
28
|
"""
|
|
17
29
|
Abstract base class for webhook processors
|
|
@@ -47,6 +59,10 @@ class AbstractWebhookProcessor(ABC):
|
|
|
47
59
|
self.event = event
|
|
48
60
|
self.retry_count = 0
|
|
49
61
|
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_processor_type(cls) -> WebhookProcessorType:
|
|
64
|
+
return WebhookProcessorType.WEBHOOK
|
|
65
|
+
|
|
50
66
|
async def on_error(self, error: Exception) -> None:
|
|
51
67
|
"""Hook to handle errors during processing. Override if needed"""
|
|
52
68
|
delay = self.calculate_retry_delay()
|
|
@@ -11,11 +11,17 @@ from port_ocean.core.handlers.queue.abstract_queue import AbstractQueue
|
|
|
11
11
|
from port_ocean.core.integrations.mixins.events import EventsMixin
|
|
12
12
|
from port_ocean.core.integrations.mixins.live_events import LiveEventsMixin
|
|
13
13
|
from port_ocean.exceptions.webhook_processor import WebhookEventNotSupportedError
|
|
14
|
-
from .webhook_event import
|
|
14
|
+
from port_ocean.core.handlers.webhook.webhook_event import (
|
|
15
|
+
WebhookEvent,
|
|
16
|
+
WebhookEventRawResults,
|
|
17
|
+
LiveEventTimestamp,
|
|
18
|
+
)
|
|
15
19
|
from port_ocean.context.event import event
|
|
16
20
|
|
|
17
|
-
|
|
18
|
-
|
|
21
|
+
from port_ocean.core.handlers.webhook.abstract_webhook_processor import (
|
|
22
|
+
AbstractWebhookProcessor,
|
|
23
|
+
WebhookProcessorType,
|
|
24
|
+
)
|
|
19
25
|
from port_ocean.utils.signal import SignalHandler
|
|
20
26
|
from port_ocean.core.handlers.queue import LocalQueue
|
|
21
27
|
|
|
@@ -56,7 +62,7 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
56
62
|
while True:
|
|
57
63
|
event = None
|
|
58
64
|
matching_processors: List[
|
|
59
|
-
Tuple[ResourceConfig, AbstractWebhookProcessor]
|
|
65
|
+
Tuple[ResourceConfig | None, AbstractWebhookProcessor]
|
|
60
66
|
] = []
|
|
61
67
|
try:
|
|
62
68
|
event = await queue.get()
|
|
@@ -133,16 +139,22 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
133
139
|
|
|
134
140
|
async def _extract_matching_processors(
|
|
135
141
|
self, webhook_event: WebhookEvent, path: str
|
|
136
|
-
) -> list[tuple[ResourceConfig, AbstractWebhookProcessor]]:
|
|
142
|
+
) -> list[tuple[ResourceConfig | None, AbstractWebhookProcessor]]:
|
|
137
143
|
"""Find and extract the matching processor for an event"""
|
|
138
144
|
|
|
139
|
-
created_processors: list[
|
|
145
|
+
created_processors: list[
|
|
146
|
+
tuple[ResourceConfig | None, AbstractWebhookProcessor]
|
|
147
|
+
] = []
|
|
140
148
|
event_processor_names = []
|
|
141
149
|
|
|
142
150
|
for processor_class in self._processors_classes[path]:
|
|
143
151
|
processor = processor_class(webhook_event.clone())
|
|
144
152
|
if await processor.should_process_event(webhook_event):
|
|
145
153
|
event_processor_names.append(processor.__class__.__name__)
|
|
154
|
+
if processor.get_processor_type() == WebhookProcessorType.ACTION:
|
|
155
|
+
created_processors.append((None, processor))
|
|
156
|
+
continue
|
|
157
|
+
|
|
146
158
|
kinds = await processor.get_matching_kinds(webhook_event)
|
|
147
159
|
for kind in kinds:
|
|
148
160
|
for resource in event.port_app_config.resources:
|
|
@@ -179,7 +191,10 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
179
191
|
event.set_timestamp(LiveEventTimestamp.FinishedProcessingWithError)
|
|
180
192
|
|
|
181
193
|
async def _process_single_event(
|
|
182
|
-
self,
|
|
194
|
+
self,
|
|
195
|
+
processor: AbstractWebhookProcessor,
|
|
196
|
+
path: str,
|
|
197
|
+
resource: ResourceConfig | None,
|
|
183
198
|
) -> WebhookEventRawResults:
|
|
184
199
|
"""Process a single event with a specific processor"""
|
|
185
200
|
try:
|
|
@@ -199,7 +214,7 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
199
214
|
raise
|
|
200
215
|
|
|
201
216
|
async def _execute_processor(
|
|
202
|
-
self, processor: AbstractWebhookProcessor, resource: ResourceConfig
|
|
217
|
+
self, processor: AbstractWebhookProcessor, resource: ResourceConfig | None
|
|
203
218
|
) -> WebhookEventRawResults:
|
|
204
219
|
"""Execute a single processor within a max processing time"""
|
|
205
220
|
try:
|
|
@@ -213,7 +228,7 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
213
228
|
)
|
|
214
229
|
|
|
215
230
|
async def _process_webhook_request(
|
|
216
|
-
self, processor: AbstractWebhookProcessor, resource: ResourceConfig
|
|
231
|
+
self, processor: AbstractWebhookProcessor, resource: ResourceConfig | None
|
|
217
232
|
) -> WebhookEventRawResults:
|
|
218
233
|
"""Process a webhook request with retry logic
|
|
219
234
|
|
|
@@ -235,9 +250,10 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
235
250
|
while True:
|
|
236
251
|
try:
|
|
237
252
|
webhook_event_raw_results = await processor.handle_event(
|
|
238
|
-
payload, resource
|
|
253
|
+
payload, resource # type: ignore[arg-type]
|
|
239
254
|
)
|
|
240
|
-
|
|
255
|
+
if resource is not None:
|
|
256
|
+
webhook_event_raw_results.resource = resource
|
|
241
257
|
break
|
|
242
258
|
|
|
243
259
|
except Exception as e:
|
|
@@ -258,7 +274,9 @@ class LiveEventsProcessorManager(LiveEventsMixin, EventsMixin):
|
|
|
258
274
|
return webhook_event_raw_results
|
|
259
275
|
|
|
260
276
|
def register_processor(
|
|
261
|
-
self,
|
|
277
|
+
self,
|
|
278
|
+
path: str,
|
|
279
|
+
processor: Type[AbstractWebhookProcessor],
|
|
262
280
|
) -> None:
|
|
263
281
|
"""Register a webhook processor for a specific path with optional filter
|
|
264
282
|
|
|
@@ -24,7 +24,7 @@ from port_ocean.core.integrations.mixins.utils import (
|
|
|
24
24
|
resync_generator_wrapper,
|
|
25
25
|
resync_function_wrapper,
|
|
26
26
|
)
|
|
27
|
-
from port_ocean.core.models import Entity, ProcessExecutionMode
|
|
27
|
+
from port_ocean.core.models import Entity, IntegrationFeatureFlag, ProcessExecutionMode
|
|
28
28
|
from port_ocean.core.ocean_types import (
|
|
29
29
|
RAW_RESULT,
|
|
30
30
|
RESYNC_RESULT,
|
|
@@ -117,13 +117,13 @@ class SyncRawMixin(HandlerMixin, EventsMixin):
|
|
|
117
117
|
logger.info(
|
|
118
118
|
f"Found async generator function for {resource_config.kind} name: {task.__qualname__}"
|
|
119
119
|
)
|
|
120
|
-
results.append(resync_generator_wrapper(task, resource_config.kind,resource_config.port.items_to_parse))
|
|
120
|
+
results.append(resync_generator_wrapper(task, resource_config.kind, resource_config.port.items_to_parse_name, resource_config.port.items_to_parse))
|
|
121
121
|
else:
|
|
122
122
|
logger.info(
|
|
123
123
|
f"Found sync function for {resource_config.kind} name: {task.__qualname__}"
|
|
124
124
|
)
|
|
125
125
|
task = typing.cast(Callable[[str], Awaitable[RAW_RESULT]], task)
|
|
126
|
-
tasks.append(resync_function_wrapper(task, resource_config.kind))
|
|
126
|
+
tasks.append(resync_function_wrapper(task, resource_config.kind, resource_config.port.items_to_parse))
|
|
127
127
|
|
|
128
128
|
logger.info(
|
|
129
129
|
f"Found {len(tasks) + len(results)} resync tasks for {resource_config.kind}"
|
|
@@ -478,7 +478,7 @@ class SyncRawMixin(HandlerMixin, EventsMixin):
|
|
|
478
478
|
bool: True if lakehouse data is enabled, False otherwise
|
|
479
479
|
"""
|
|
480
480
|
flags = await ocean.port_client.get_organization_feature_flags()
|
|
481
|
-
if
|
|
481
|
+
if IntegrationFeatureFlag.LAKEHOUSE_ELIGIBLE in flags and ocean.config.lakehouse_enabled:
|
|
482
482
|
return True
|
|
483
483
|
return False
|
|
484
484
|
|
|
@@ -1,13 +1,21 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import stat
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
1
10
|
from contextlib import contextmanager
|
|
2
|
-
from typing import
|
|
11
|
+
from typing import Any, AsyncGenerator, Awaitable, Callable, Generator, cast
|
|
3
12
|
|
|
13
|
+
import ijson
|
|
4
14
|
from loguru import logger
|
|
5
15
|
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
|
|
16
|
+
from port_ocean.clients.port.utils import _http_client as _port_http_client
|
|
17
|
+
from port_ocean.context.ocean import ocean
|
|
9
18
|
from port_ocean.core.handlers.entity_processor.jq_entity_processor import JQEntityProcessor
|
|
10
|
-
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
11
19
|
from port_ocean.core.ocean_types import (
|
|
12
20
|
ASYNC_GENERATOR_RESYNC_TYPE,
|
|
13
21
|
RAW_RESULT,
|
|
@@ -20,11 +28,58 @@ from port_ocean.exceptions.core import (
|
|
|
20
28
|
OceanAbortException,
|
|
21
29
|
KindNotImplementedException,
|
|
22
30
|
)
|
|
23
|
-
|
|
24
|
-
from port_ocean.utils.async_http import _http_client
|
|
25
|
-
from port_ocean.clients.port.utils import _http_client as _port_http_client
|
|
26
31
|
from port_ocean.helpers.metric.metric import MetricType, MetricPhase
|
|
27
|
-
from port_ocean.
|
|
32
|
+
from port_ocean.utils.async_http import _http_client
|
|
33
|
+
|
|
34
|
+
def _process_path_type_items(
|
|
35
|
+
result: RAW_RESULT, items_to_parse: str | None = None
|
|
36
|
+
) -> RAW_RESULT:
|
|
37
|
+
"""
|
|
38
|
+
Process items in the result array to check for "__type": "path" fields.
|
|
39
|
+
If found, read the file contents and load them into a "content" field.
|
|
40
|
+
Skip processing if we're on the items_to_parse branch.
|
|
41
|
+
"""
|
|
42
|
+
if not isinstance(result, list):
|
|
43
|
+
return result
|
|
44
|
+
|
|
45
|
+
# Skip processing if we're on the items_to_parse branch
|
|
46
|
+
if items_to_parse:
|
|
47
|
+
return result
|
|
48
|
+
|
|
49
|
+
processed_result = []
|
|
50
|
+
for item in result:
|
|
51
|
+
if isinstance(item, dict) and item.get("__type") == "path":
|
|
52
|
+
try:
|
|
53
|
+
# Read the file content and parse as JSON
|
|
54
|
+
file_path = item.get("file", {}).get("content", {}).get("path")
|
|
55
|
+
if file_path and os.path.exists(file_path):
|
|
56
|
+
with open(file_path, "r") as f:
|
|
57
|
+
content = json.loads(f.read())
|
|
58
|
+
# Create a copy of the item with the content field
|
|
59
|
+
processed_item = item.copy()
|
|
60
|
+
processed_item["file"]["content"] = content
|
|
61
|
+
processed_result.append(processed_item)
|
|
62
|
+
else:
|
|
63
|
+
# If file doesn't exist, keep the original item
|
|
64
|
+
processed_result.append(item)
|
|
65
|
+
except (json.JSONDecodeError, IOError, OSError) as e:
|
|
66
|
+
if isinstance(item, dict) and item.get("file") is not None:
|
|
67
|
+
content = item["file"].get("content") if isinstance(item["file"].get("content"), dict) else {}
|
|
68
|
+
data_path = content.get("path", None)
|
|
69
|
+
logger.warning(
|
|
70
|
+
f"Failed to read or parse file content for path {data_path}: {e}"
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
logger.warning(
|
|
74
|
+
f"Failed to read or parse file content for unknown path: {e}. item: {json.dumps(item)}"
|
|
75
|
+
)
|
|
76
|
+
# Keep the original item if there's an error
|
|
77
|
+
processed_result.append(item)
|
|
78
|
+
else:
|
|
79
|
+
# Keep non-path type items as is
|
|
80
|
+
processed_result.append(item)
|
|
81
|
+
|
|
82
|
+
return processed_result
|
|
28
83
|
|
|
29
84
|
@contextmanager
|
|
30
85
|
def resync_error_handling() -> Generator[None, None, None]:
|
|
@@ -43,15 +98,16 @@ def resync_error_handling() -> Generator[None, None, None]:
|
|
|
43
98
|
|
|
44
99
|
|
|
45
100
|
async def resync_function_wrapper(
|
|
46
|
-
fn: Callable[[str], Awaitable[RAW_RESULT]], kind: str
|
|
101
|
+
fn: Callable[[str], Awaitable[RAW_RESULT]], kind: str, items_to_parse: str | None = None
|
|
47
102
|
) -> RAW_RESULT:
|
|
48
103
|
with resync_error_handling():
|
|
49
104
|
results = await fn(kind)
|
|
50
|
-
|
|
105
|
+
validated_results = validate_result(results)
|
|
106
|
+
return _process_path_type_items(validated_results, items_to_parse)
|
|
51
107
|
|
|
52
108
|
|
|
53
109
|
async def resync_generator_wrapper(
|
|
54
|
-
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse: str | None = None
|
|
110
|
+
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse_name: str, items_to_parse: str | None = None
|
|
55
111
|
) -> ASYNC_GENERATOR_RESYNC_TYPE:
|
|
56
112
|
generator = fn(kind)
|
|
57
113
|
errors = []
|
|
@@ -61,27 +117,23 @@ async def resync_generator_wrapper(
|
|
|
61
117
|
with resync_error_handling():
|
|
62
118
|
result = await anext(generator)
|
|
63
119
|
if not ocean.config.yield_items_to_parse:
|
|
64
|
-
|
|
120
|
+
validated_result = validate_result(result)
|
|
121
|
+
processed_result = _process_path_type_items(validated_result,items_to_parse)
|
|
122
|
+
yield processed_result
|
|
65
123
|
else:
|
|
66
|
-
batch_size = ocean.config.yield_items_to_parse_batch_size
|
|
67
124
|
if items_to_parse:
|
|
68
125
|
for data in result:
|
|
69
|
-
|
|
70
|
-
if
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
yield
|
|
76
|
-
raw_data = [{"item": item, **data} for item in items]
|
|
77
|
-
while True:
|
|
78
|
-
raw_data_batch = raw_data[:batch_size]
|
|
79
|
-
yield raw_data_batch
|
|
80
|
-
raw_data = raw_data[batch_size:]
|
|
81
|
-
if len(raw_data) == 0:
|
|
82
|
-
break
|
|
126
|
+
data_path: str | None = None
|
|
127
|
+
if isinstance(data, dict) and data.get("file") is not None:
|
|
128
|
+
content = data["file"].get("content") if isinstance(data["file"].get("content"), dict) else {}
|
|
129
|
+
data_path = content.get("path", None)
|
|
130
|
+
bulks = get_items_to_parse_bulks(data, data_path, items_to_parse, items_to_parse_name, data.get("__base_jq", ".file.content"))
|
|
131
|
+
async for bulk in bulks:
|
|
132
|
+
yield bulk
|
|
83
133
|
else:
|
|
84
|
-
|
|
134
|
+
validated_result = validate_result(result)
|
|
135
|
+
processed_result = _process_path_type_items(validated_result, items_to_parse)
|
|
136
|
+
yield processed_result
|
|
85
137
|
except OceanAbortException as error:
|
|
86
138
|
errors.append(error)
|
|
87
139
|
ocean.metrics.inc_metric(
|
|
@@ -101,6 +153,106 @@ def is_resource_supported(
|
|
|
101
153
|
) -> bool:
|
|
102
154
|
return bool(resync_event_mapping[kind] or resync_event_mapping[None])
|
|
103
155
|
|
|
156
|
+
def _validate_jq_expression(expression: str) -> None:
|
|
157
|
+
"""Validate jq expression to prevent command injection."""
|
|
158
|
+
try:
|
|
159
|
+
_ = cast(JQEntityProcessor, ocean.app.integration.entity_processor)._compile(expression)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
raise ValueError(f"Invalid jq expression: {e}") from e
|
|
162
|
+
# Basic validation - reject expressions that could be dangerous
|
|
163
|
+
# Check for dangerous patterns (include, import, module)
|
|
164
|
+
dangerous_patterns = ['include', 'import', 'module', 'env', 'debug']
|
|
165
|
+
for pattern in dangerous_patterns:
|
|
166
|
+
# Use word boundary regex to match only complete words, not substrings
|
|
167
|
+
if re.search(rf'\b{re.escape(pattern)}\b', expression):
|
|
168
|
+
raise ValueError(f"Potentially dangerous pattern '{pattern}' found in jq expression")
|
|
169
|
+
|
|
170
|
+
# Special handling for 'env' - block environment variable access
|
|
171
|
+
if re.search(r'(?<!\w)\$ENV(?:\.)?', expression):
|
|
172
|
+
raise ValueError("Environment variable access '$ENV.' found in jq expression")
|
|
173
|
+
if re.search(r'\benv\.', expression):
|
|
174
|
+
raise ValueError("Environment variable access 'env.' found in jq expression")
|
|
175
|
+
|
|
176
|
+
def _create_secure_temp_file(suffix: str = ".json") -> str:
|
|
177
|
+
"""Create a secure temporary file with restricted permissions."""
|
|
178
|
+
# Create temp directory if it doesn't exist
|
|
179
|
+
temp_dir = "/tmp/ocean"
|
|
180
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
181
|
+
|
|
182
|
+
# Create temporary file with secure permissions
|
|
183
|
+
fd, temp_path = tempfile.mkstemp(suffix=suffix, dir=temp_dir)
|
|
184
|
+
try:
|
|
185
|
+
# Set restrictive permissions (owner read/write only)
|
|
186
|
+
os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR)
|
|
187
|
+
return temp_path
|
|
188
|
+
finally:
|
|
189
|
+
os.close(fd)
|
|
190
|
+
|
|
191
|
+
async def get_items_to_parse_bulks(raw_data: dict[Any, Any], data_path: str, items_to_parse: str, items_to_parse_name: str, base_jq: str) -> AsyncGenerator[list[dict[str, Any]], None]:
|
|
192
|
+
# Validate inputs to prevent command injection
|
|
193
|
+
_validate_jq_expression(items_to_parse)
|
|
194
|
+
items_to_parse = items_to_parse.replace(base_jq, ".") if data_path else items_to_parse
|
|
195
|
+
|
|
196
|
+
temp_data_path = None
|
|
197
|
+
temp_output_path = None
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
# Create secure temporary files
|
|
201
|
+
if not data_path:
|
|
202
|
+
raw_data_serialized = json.dumps(raw_data)
|
|
203
|
+
temp_data_path = _create_secure_temp_file("_input.json")
|
|
204
|
+
with open(temp_data_path, "w") as f:
|
|
205
|
+
f.write(raw_data_serialized)
|
|
206
|
+
data_path = temp_data_path
|
|
207
|
+
|
|
208
|
+
temp_output_path = _create_secure_temp_file("_parsed.json")
|
|
209
|
+
|
|
210
|
+
delete_target = items_to_parse.split('|', 1)[0].strip() if not items_to_parse.startswith('map(') else '.'
|
|
211
|
+
base_jq_object_string = await _build_base_jq_object_string(raw_data, base_jq, delete_target)
|
|
212
|
+
|
|
213
|
+
# Build jq expression safely
|
|
214
|
+
jq_expression = f""". as $all
|
|
215
|
+
| ($all | {items_to_parse}) as $items
|
|
216
|
+
| $items
|
|
217
|
+
| map({{{items_to_parse_name}: ., {base_jq_object_string}}})"""
|
|
218
|
+
|
|
219
|
+
# Use subprocess with list arguments instead of shell=True
|
|
220
|
+
|
|
221
|
+
jq_path = shutil.which("jq") or "/bin/jq"
|
|
222
|
+
jq_args = [jq_path, jq_expression, data_path]
|
|
223
|
+
|
|
224
|
+
with open(temp_output_path, "w") as output_file:
|
|
225
|
+
result = subprocess.run(
|
|
226
|
+
jq_args,
|
|
227
|
+
stdout=output_file,
|
|
228
|
+
stderr=subprocess.PIPE,
|
|
229
|
+
text=True,
|
|
230
|
+
check=False # Don't raise exception, handle errors manually
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if result.returncode != 0:
|
|
234
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {result.stderr}")
|
|
235
|
+
yield []
|
|
236
|
+
else:
|
|
237
|
+
with open(temp_output_path, "r") as f:
|
|
238
|
+
events_stream = get_events_as_a_stream(f, 'item', ocean.config.yield_items_to_parse_batch_size)
|
|
239
|
+
for items_bulk in events_stream:
|
|
240
|
+
yield items_bulk
|
|
241
|
+
|
|
242
|
+
except ValueError as e:
|
|
243
|
+
logger.error(f"Invalid jq expression: {e}")
|
|
244
|
+
yield []
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {e}")
|
|
247
|
+
yield []
|
|
248
|
+
finally:
|
|
249
|
+
# Cleanup temporary files
|
|
250
|
+
for temp_path in [temp_data_path, temp_output_path]:
|
|
251
|
+
if temp_path and os.path.exists(temp_path):
|
|
252
|
+
try:
|
|
253
|
+
os.remove(temp_path)
|
|
254
|
+
except OSError as e:
|
|
255
|
+
logger.warning(f"Failed to cleanup temporary file {temp_path}: {e}")
|
|
104
256
|
|
|
105
257
|
def unsupported_kind_response(
|
|
106
258
|
kind: str, available_resync_kinds: list[str]
|
|
@@ -108,6 +260,44 @@ def unsupported_kind_response(
|
|
|
108
260
|
logger.error(f"Kind {kind} is not supported in this integration")
|
|
109
261
|
return [], [KindNotImplementedException(kind, available_resync_kinds)]
|
|
110
262
|
|
|
263
|
+
async def _build_base_jq_object_string(raw_data: dict[Any, Any], base_jq: str, delete_target: str) -> str:
|
|
264
|
+
base_jq_object_before_parsing = await cast(JQEntityProcessor, ocean.app.integration.entity_processor)._search(raw_data, f"{base_jq} = {json.dumps("__all")}")
|
|
265
|
+
base_jq_object_before_parsing_serialized = json.dumps(base_jq_object_before_parsing)
|
|
266
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized[1:-1] if len(base_jq_object_before_parsing_serialized) >= 2 else base_jq_object_before_parsing_serialized
|
|
267
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized.replace("\"__all\"", f"(($all | del({delete_target})) // {{}})")
|
|
268
|
+
return base_jq_object_before_parsing_serialized
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def get_events_as_a_stream(
|
|
272
|
+
stream: Any,
|
|
273
|
+
target_items: str = "item",
|
|
274
|
+
max_buffer_size_mb: int = 1
|
|
275
|
+
) -> Generator[list[dict[str, Any]], None, None]:
|
|
276
|
+
events = ijson.sendable_list()
|
|
277
|
+
coro = ijson.items_coro(events, target_items, use_float=True)
|
|
278
|
+
|
|
279
|
+
# Convert MB to bytes for the buffer size
|
|
280
|
+
buffer_size = max_buffer_size_mb * 1024 * 1024
|
|
281
|
+
|
|
282
|
+
# Read chunks from the stream until exhausted
|
|
283
|
+
while True:
|
|
284
|
+
chunk = stream.read(buffer_size)
|
|
285
|
+
if not chunk: # End of stream
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
# Convert string to bytes if necessary (for text mode files)
|
|
289
|
+
if isinstance(chunk, str):
|
|
290
|
+
chunk = chunk.encode('utf-8')
|
|
291
|
+
|
|
292
|
+
coro.send(chunk)
|
|
293
|
+
yield events
|
|
294
|
+
del events[:]
|
|
295
|
+
try:
|
|
296
|
+
coro.close()
|
|
297
|
+
finally:
|
|
298
|
+
if events:
|
|
299
|
+
yield events
|
|
300
|
+
events[:] = []
|
|
111
301
|
|
|
112
302
|
class ProcessWrapper(multiprocessing.Process):
|
|
113
303
|
def __init__(self, *args, **kwargs):
|
|
@@ -134,3 +324,34 @@ def clear_http_client_context() -> None:
|
|
|
134
324
|
_port_http_client.pop()
|
|
135
325
|
except (RuntimeError, AttributeError):
|
|
136
326
|
pass
|
|
327
|
+
|
|
328
|
+
class _AiterReader:
|
|
329
|
+
"""
|
|
330
|
+
Wraps an iterable of byte chunks (e.g., response.iter_bytes())
|
|
331
|
+
and exposes a .read(n) method that ijson expects.
|
|
332
|
+
"""
|
|
333
|
+
def __init__(self, iterable):
|
|
334
|
+
self._iter = iter(iterable)
|
|
335
|
+
self._buf = bytearray()
|
|
336
|
+
self._eof = False
|
|
337
|
+
|
|
338
|
+
def read(self, n=-1):
|
|
339
|
+
# If n < 0, return everything until EOF
|
|
340
|
+
if n is None or n < 0:
|
|
341
|
+
chunks = [bytes(self._buf)]
|
|
342
|
+
self._buf.clear()
|
|
343
|
+
chunks.extend(self._iter) # drain the iterator
|
|
344
|
+
return b"".join(chunks)
|
|
345
|
+
|
|
346
|
+
# Fill buffer until we have n bytes or hit EOF
|
|
347
|
+
while len(self._buf) < n and not self._eof:
|
|
348
|
+
try:
|
|
349
|
+
self._buf.extend(next(self._iter))
|
|
350
|
+
except StopIteration:
|
|
351
|
+
self._eof = True
|
|
352
|
+
break
|
|
353
|
+
|
|
354
|
+
# Serve up to n bytes
|
|
355
|
+
out = bytes(self._buf[:n])
|
|
356
|
+
del self._buf[:n]
|
|
357
|
+
return out
|