port-ocean 0.28.11__py3-none-any.whl → 0.28.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of port-ocean might be problematic. Click here for more details.
- integrations/_infra/Dockerfile.Deb +1 -0
- integrations/_infra/Dockerfile.local +1 -0
- port_ocean/clients/port/mixins/integrations.py +1 -1
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +472 -17
- port_ocean/core/handlers/entity_processor/jq_input_evaluator.py +137 -0
- port_ocean/core/handlers/port_app_config/models.py +1 -1
- port_ocean/core/integrations/mixins/sync_raw.py +1 -1
- port_ocean/core/integrations/mixins/utils.py +241 -23
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +932 -1
- port_ocean/tests/core/handlers/entity_processor/test_jq_input_evaluator.py +932 -0
- port_ocean/tests/utils/test_cache.py +240 -0
- port_ocean/utils/cache.py +45 -9
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/METADATA +1 -1
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/RECORD +17 -15
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/WHEEL +0 -0
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
# This file is used to classify the input that a jq expression to run on.
|
|
5
|
+
# It is used to determine if the jq expression can be executed without providing any JSON input (const expressions)
|
|
6
|
+
# or on a single item (in items to parse situation)
|
|
7
|
+
# or on all the data
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InputClassifyingResult(Enum):
|
|
11
|
+
NONE = 1
|
|
12
|
+
SINGLE = 2
|
|
13
|
+
ALL = 3
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Functions/filters that (even without ".") still require/assume input
|
|
17
|
+
_INPUT_DEPENDENT_FUNCS = r"""
|
|
18
|
+
\b(
|
|
19
|
+
map|select|reverse|sort|sort_by|unique|unique_by|group_by|flatten|transpose|
|
|
20
|
+
split|explode|join|add|length|has|in|index|indices|contains|
|
|
21
|
+
paths|leaf_paths|keys|keys_unsorted|values|to_entries|with_entries|from_entries|
|
|
22
|
+
del|delpaths|walk|reduce|foreach|input|inputs|limit|first|last|nth|
|
|
23
|
+
while|until|recurse|recurse_down|bsearch|combinations|permutations
|
|
24
|
+
)\b
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
_INPUT_DEPENDENT_RE = re.compile(_INPUT_DEPENDENT_FUNCS, re.VERBOSE)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# String literal handling (jq uses double quotes for strings)
|
|
31
|
+
_STRING_LITERAL_RE = re.compile(r'"(?:\\.|[^"\\])*"')
|
|
32
|
+
_STRING_ONLY_RE = re.compile(r'^\s*"(?:\\.|[^"\\])*"\s*$')
|
|
33
|
+
_NUMBER_ONLY_RE = re.compile(r"^\s*-?\d+(\.\d+)?\s*$")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _mask_strings(expr: str) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Replace string literals with 'S' strings so '.' inside quotes don't count.
|
|
39
|
+
Example:
|
|
40
|
+
- '"this is a string"' ---> 'S'
|
|
41
|
+
- '"sting" + .field'. ---> 'S + .field'
|
|
42
|
+
"""
|
|
43
|
+
return _STRING_LITERAL_RE.sub("S", expr)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _mask_numbers(expr: str) -> str:
|
|
47
|
+
"""
|
|
48
|
+
Replace number literals with 'N' so decimal points in numbers don't count as input references.
|
|
49
|
+
Example:
|
|
50
|
+
- '3.14' ---> 'N'
|
|
51
|
+
- '42 + 3.14' ---> 'N + N'
|
|
52
|
+
"""
|
|
53
|
+
# Pattern to match numbers (integers and decimals, with optional sign)
|
|
54
|
+
number_pattern = re.compile(r"[-+]?\d+(?:\.\d+)?")
|
|
55
|
+
return number_pattern.sub("N", expr)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def can_expression_run_with_no_input(selector_query: str) -> bool:
|
|
59
|
+
"""
|
|
60
|
+
Returns True if the jq expression can be executed without providing any JSON input.
|
|
61
|
+
Rules:
|
|
62
|
+
- Whitespace-only => No Input Required
|
|
63
|
+
- A pure string literal => No Input Required (even if it contains '.')
|
|
64
|
+
- After masking strings, if it contains '.' => Input Required
|
|
65
|
+
- Disallow known input-dependent functions (functions that require input)
|
|
66
|
+
- After masking strings, if it contains only operators and numbers and 'S' => No Input Required
|
|
67
|
+
- Allow null/true/false/number/range/empty, and array/object literals that
|
|
68
|
+
don't reference input (no '.' after masking strings) => No Input Required
|
|
69
|
+
Example:
|
|
70
|
+
- blueprint: '"newRelicService"' in mapping, selector_query param would be '"newRelicService"' => No Input Required
|
|
71
|
+
"""
|
|
72
|
+
s = selector_query.strip()
|
|
73
|
+
if s == "":
|
|
74
|
+
return True # whitespace-only
|
|
75
|
+
|
|
76
|
+
# Pure string literal is nullary
|
|
77
|
+
if _STRING_ONLY_RE.match(s):
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
# First mask strings, then mask numbers to prevent decimal points in numbers from being treated as input references
|
|
81
|
+
masked = _mask_strings(s).strip()
|
|
82
|
+
masked = _mask_numbers(masked).strip()
|
|
83
|
+
|
|
84
|
+
# If it contains any known input-dependent functions, don't shortcut
|
|
85
|
+
if _INPUT_DEPENDENT_RE.search(masked):
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
# If it contains only operators and 'S'/'N', it can be executed with no input
|
|
89
|
+
# Example:
|
|
90
|
+
# - '"abc" + "def"' ---> 'S + S' => No Input Required
|
|
91
|
+
# - '3.14 + 2.5' ---> 'N + N' => No Input Required
|
|
92
|
+
# if re.fullmatch(
|
|
93
|
+
# r"(?:S|N)(?:\s*[+\-*/]\s*(?:S|N))*",
|
|
94
|
+
# masked,
|
|
95
|
+
# ):
|
|
96
|
+
# return True
|
|
97
|
+
|
|
98
|
+
if "." not in masked:
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _can_expression_run_on_single_item(expr: str, key: str) -> bool:
|
|
105
|
+
"""
|
|
106
|
+
Detect `.key` outside of quotes, as a standalone path segment beginning
|
|
107
|
+
after a non-word boundary (start, space, |, (, [, {, , or :) and not part
|
|
108
|
+
of `.something.key`.
|
|
109
|
+
assuming key = 'item'
|
|
110
|
+
Examples:
|
|
111
|
+
- .item.yaeli => true
|
|
112
|
+
- map(.item.yaeli) => true
|
|
113
|
+
- .body.item => false
|
|
114
|
+
"""
|
|
115
|
+
if not key:
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
masked = _mask_strings(expr)
|
|
119
|
+
masked = _mask_numbers(masked)
|
|
120
|
+
pattern = re.compile(rf"(?<![A-Za-z0-9_])\.{re.escape(key)}(?![A-Za-z0-9_])")
|
|
121
|
+
return bool(pattern.search(masked))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def classify_input(
|
|
125
|
+
selector_query: str, single_item_key: str | None = None
|
|
126
|
+
) -> InputClassifyingResult:
|
|
127
|
+
"""
|
|
128
|
+
Returns the input evaluation result for the jq expression.
|
|
129
|
+
Conservative: requires NO '.' and must match a known nullary-safe pattern.
|
|
130
|
+
"""
|
|
131
|
+
if can_expression_run_with_no_input(selector_query):
|
|
132
|
+
return InputClassifyingResult.NONE
|
|
133
|
+
if single_item_key and _can_expression_run_on_single_item(
|
|
134
|
+
selector_query, single_item_key
|
|
135
|
+
):
|
|
136
|
+
return InputClassifyingResult.SINGLE
|
|
137
|
+
return InputClassifyingResult.ALL
|
|
@@ -41,7 +41,7 @@ class MappingsConfig(BaseModel):
|
|
|
41
41
|
class PortResourceConfig(BaseModel):
|
|
42
42
|
entity: MappingsConfig
|
|
43
43
|
items_to_parse: str | None = Field(alias="itemsToParse")
|
|
44
|
-
items_to_parse_name: str
|
|
44
|
+
items_to_parse_name: str = Field(alias="itemsToParseName", default="item")
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class Selector(BaseModel):
|
|
@@ -117,7 +117,7 @@ class SyncRawMixin(HandlerMixin, EventsMixin):
|
|
|
117
117
|
logger.info(
|
|
118
118
|
f"Found async generator function for {resource_config.kind} name: {task.__qualname__}"
|
|
119
119
|
)
|
|
120
|
-
results.append(resync_generator_wrapper(task, resource_config.kind,resource_config.port.items_to_parse))
|
|
120
|
+
results.append(resync_generator_wrapper(task, resource_config.kind, resource_config.port.items_to_parse_name, resource_config.port.items_to_parse))
|
|
121
121
|
else:
|
|
122
122
|
logger.info(
|
|
123
123
|
f"Found sync function for {resource_config.kind} name: {task.__qualname__}"
|
|
@@ -2,12 +2,11 @@ from contextlib import contextmanager
|
|
|
2
2
|
from typing import Awaitable, Generator, Callable, cast
|
|
3
3
|
|
|
4
4
|
from loguru import logger
|
|
5
|
-
|
|
6
5
|
import asyncio
|
|
7
6
|
import multiprocessing
|
|
8
|
-
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
9
|
from port_ocean.core.handlers.entity_processor.jq_entity_processor import JQEntityProcessor
|
|
10
|
-
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
11
10
|
from port_ocean.core.ocean_types import (
|
|
12
11
|
ASYNC_GENERATOR_RESYNC_TYPE,
|
|
13
12
|
RAW_RESULT,
|
|
@@ -20,11 +19,66 @@ from port_ocean.exceptions.core import (
|
|
|
20
19
|
OceanAbortException,
|
|
21
20
|
KindNotImplementedException,
|
|
22
21
|
)
|
|
23
|
-
|
|
22
|
+
import os
|
|
24
23
|
from port_ocean.utils.async_http import _http_client
|
|
25
24
|
from port_ocean.clients.port.utils import _http_client as _port_http_client
|
|
26
25
|
from port_ocean.helpers.metric.metric import MetricType, MetricPhase
|
|
27
26
|
from port_ocean.context.ocean import ocean
|
|
27
|
+
import subprocess
|
|
28
|
+
import tempfile
|
|
29
|
+
import stat
|
|
30
|
+
import ijson
|
|
31
|
+
from typing import Any, AsyncGenerator
|
|
32
|
+
|
|
33
|
+
def _process_path_type_items(
|
|
34
|
+
result: RAW_RESULT, items_to_parse: str | None = None
|
|
35
|
+
) -> RAW_RESULT:
|
|
36
|
+
"""
|
|
37
|
+
Process items in the result array to check for "__type": "path" fields.
|
|
38
|
+
If found, read the file contents and load them into a "content" field.
|
|
39
|
+
Skip processing if we're on the items_to_parse branch.
|
|
40
|
+
"""
|
|
41
|
+
if not isinstance(result, list):
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
# Skip processing if we're on the items_to_parse branch
|
|
45
|
+
if items_to_parse:
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
processed_result = []
|
|
49
|
+
for item in result:
|
|
50
|
+
if isinstance(item, dict) and item.get("__type") == "path":
|
|
51
|
+
try:
|
|
52
|
+
# Read the file content and parse as JSON
|
|
53
|
+
file_path = item.get("file", {}).get("content", {}).get("path")
|
|
54
|
+
if file_path and os.path.exists(file_path):
|
|
55
|
+
with open(file_path, "r") as f:
|
|
56
|
+
content = json.loads(f.read())
|
|
57
|
+
# Create a copy of the item with the content field
|
|
58
|
+
processed_item = item.copy()
|
|
59
|
+
processed_item["content"] = content
|
|
60
|
+
processed_result.append(processed_item)
|
|
61
|
+
else:
|
|
62
|
+
# If file doesn't exist, keep the original item
|
|
63
|
+
processed_result.append(item)
|
|
64
|
+
except (json.JSONDecodeError, IOError, OSError) as e:
|
|
65
|
+
if isinstance(item, dict) and item.get("file") is not None:
|
|
66
|
+
content = item["file"].get("content") if isinstance(item["file"].get("content"), dict) else {}
|
|
67
|
+
data_path = content.get("path", None)
|
|
68
|
+
logger.warning(
|
|
69
|
+
f"Failed to read or parse file content for path {data_path}: {e}"
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Failed to read or parse file content for unknown path: {e}. item: {json.dumps(item)}"
|
|
74
|
+
)
|
|
75
|
+
# Keep the original item if there's an error
|
|
76
|
+
processed_result.append(item)
|
|
77
|
+
else:
|
|
78
|
+
# Keep non-path type items as is
|
|
79
|
+
processed_result.append(item)
|
|
80
|
+
|
|
81
|
+
return processed_result
|
|
28
82
|
|
|
29
83
|
@contextmanager
|
|
30
84
|
def resync_error_handling() -> Generator[None, None, None]:
|
|
@@ -47,11 +101,12 @@ async def resync_function_wrapper(
|
|
|
47
101
|
) -> RAW_RESULT:
|
|
48
102
|
with resync_error_handling():
|
|
49
103
|
results = await fn(kind)
|
|
50
|
-
|
|
104
|
+
validated_results = validate_result(results)
|
|
105
|
+
return _process_path_type_items(validated_results)
|
|
51
106
|
|
|
52
107
|
|
|
53
108
|
async def resync_generator_wrapper(
|
|
54
|
-
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse: str | None = None
|
|
109
|
+
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse_name: str, items_to_parse: str | None = None
|
|
55
110
|
) -> ASYNC_GENERATOR_RESYNC_TYPE:
|
|
56
111
|
generator = fn(kind)
|
|
57
112
|
errors = []
|
|
@@ -61,27 +116,23 @@ async def resync_generator_wrapper(
|
|
|
61
116
|
with resync_error_handling():
|
|
62
117
|
result = await anext(generator)
|
|
63
118
|
if not ocean.config.yield_items_to_parse:
|
|
64
|
-
|
|
119
|
+
validated_result = validate_result(result)
|
|
120
|
+
processed_result = _process_path_type_items(validated_result)
|
|
121
|
+
yield processed_result
|
|
65
122
|
else:
|
|
66
|
-
batch_size = ocean.config.yield_items_to_parse_batch_size
|
|
67
123
|
if items_to_parse:
|
|
68
124
|
for data in result:
|
|
69
|
-
|
|
70
|
-
if
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
yield
|
|
76
|
-
raw_data = [{"item": item, **data} for item in items]
|
|
77
|
-
while True:
|
|
78
|
-
raw_data_batch = raw_data[:batch_size]
|
|
79
|
-
yield raw_data_batch
|
|
80
|
-
raw_data = raw_data[batch_size:]
|
|
81
|
-
if len(raw_data) == 0:
|
|
82
|
-
break
|
|
125
|
+
data_path: str | None = None
|
|
126
|
+
if isinstance(data, dict) and data.get("file") is not None:
|
|
127
|
+
content = data["file"].get("content") if isinstance(data["file"].get("content"), dict) else {}
|
|
128
|
+
data_path = content.get("path", None)
|
|
129
|
+
bulks = get_items_to_parse_bulks(data, data_path, items_to_parse, items_to_parse_name, data.get("__base_jq", ".file.content"))
|
|
130
|
+
async for bulk in bulks:
|
|
131
|
+
yield bulk
|
|
83
132
|
else:
|
|
84
|
-
|
|
133
|
+
validated_result = validate_result(result)
|
|
134
|
+
processed_result = _process_path_type_items(validated_result, items_to_parse)
|
|
135
|
+
yield processed_result
|
|
85
136
|
except OceanAbortException as error:
|
|
86
137
|
errors.append(error)
|
|
87
138
|
ocean.metrics.inc_metric(
|
|
@@ -101,6 +152,104 @@ def is_resource_supported(
|
|
|
101
152
|
) -> bool:
|
|
102
153
|
return bool(resync_event_mapping[kind] or resync_event_mapping[None])
|
|
103
154
|
|
|
155
|
+
def _validate_jq_expression(expression: str) -> None:
|
|
156
|
+
"""Validate jq expression to prevent command injection."""
|
|
157
|
+
try:
|
|
158
|
+
_ = cast(JQEntityProcessor, ocean.app.integration.entity_processor)._compile(expression)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
raise ValueError(f"Invalid jq expression: {e}") from e
|
|
161
|
+
# Basic validation - reject expressions that could be dangerous
|
|
162
|
+
# Check for dangerous patterns (include, import, module)
|
|
163
|
+
dangerous_patterns = ['include', 'import', 'module', 'env', 'debug']
|
|
164
|
+
for pattern in dangerous_patterns:
|
|
165
|
+
# Use word boundary regex to match only complete words, not substrings
|
|
166
|
+
if re.search(rf'\b{re.escape(pattern)}\b', expression):
|
|
167
|
+
raise ValueError(f"Potentially dangerous pattern '{pattern}' found in jq expression")
|
|
168
|
+
|
|
169
|
+
# Special handling for 'env' - block environment variable access
|
|
170
|
+
if re.search(r'(?<!\w)\$ENV(?:\.)?', expression):
|
|
171
|
+
raise ValueError("Environment variable access '$ENV.' found in jq expression")
|
|
172
|
+
if re.search(r'\benv\.', expression):
|
|
173
|
+
raise ValueError("Environment variable access 'env.' found in jq expression")
|
|
174
|
+
|
|
175
|
+
def _create_secure_temp_file(suffix: str = ".json") -> str:
|
|
176
|
+
"""Create a secure temporary file with restricted permissions."""
|
|
177
|
+
# Create temp directory if it doesn't exist
|
|
178
|
+
temp_dir = "/tmp/ocean"
|
|
179
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
180
|
+
|
|
181
|
+
# Create temporary file with secure permissions
|
|
182
|
+
fd, temp_path = tempfile.mkstemp(suffix=suffix, dir=temp_dir)
|
|
183
|
+
try:
|
|
184
|
+
# Set restrictive permissions (owner read/write only)
|
|
185
|
+
os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR)
|
|
186
|
+
return temp_path
|
|
187
|
+
finally:
|
|
188
|
+
os.close(fd)
|
|
189
|
+
|
|
190
|
+
async def get_items_to_parse_bulks(raw_data: dict[Any, Any], data_path: str, items_to_parse: str, items_to_parse_name: str, base_jq: str) -> AsyncGenerator[list[dict[str, Any]], None]:
|
|
191
|
+
# Validate inputs to prevent command injection
|
|
192
|
+
_validate_jq_expression(items_to_parse)
|
|
193
|
+
items_to_parse = items_to_parse.replace(base_jq, ".") if data_path else items_to_parse
|
|
194
|
+
|
|
195
|
+
temp_data_path = None
|
|
196
|
+
temp_output_path = None
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
# Create secure temporary files
|
|
200
|
+
if not data_path:
|
|
201
|
+
raw_data_serialized = json.dumps(raw_data)
|
|
202
|
+
temp_data_path = _create_secure_temp_file("_input.json")
|
|
203
|
+
with open(temp_data_path, "w") as f:
|
|
204
|
+
f.write(raw_data_serialized)
|
|
205
|
+
data_path = temp_data_path
|
|
206
|
+
|
|
207
|
+
temp_output_path = _create_secure_temp_file("_parsed.json")
|
|
208
|
+
|
|
209
|
+
delete_target = items_to_parse.split('|', 1)[0].strip() if not items_to_parse.startswith('map(') else '.'
|
|
210
|
+
base_jq_object_string = await _build_base_jq_object_string(raw_data, base_jq, delete_target)
|
|
211
|
+
|
|
212
|
+
# Build jq expression safely
|
|
213
|
+
jq_expression = f""". as $all
|
|
214
|
+
| ($all | {items_to_parse}) as $items
|
|
215
|
+
| $items
|
|
216
|
+
| map({{{items_to_parse_name}: ., {base_jq_object_string}}})"""
|
|
217
|
+
|
|
218
|
+
# Use subprocess with list arguments instead of shell=True
|
|
219
|
+
jq_args = ["/bin/jq", jq_expression, data_path]
|
|
220
|
+
|
|
221
|
+
with open(temp_output_path, "w") as output_file:
|
|
222
|
+
result = subprocess.run(
|
|
223
|
+
jq_args,
|
|
224
|
+
stdout=output_file,
|
|
225
|
+
stderr=subprocess.PIPE,
|
|
226
|
+
text=True,
|
|
227
|
+
check=False # Don't raise exception, handle errors manually
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if result.returncode != 0:
|
|
231
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {result.stderr}")
|
|
232
|
+
yield []
|
|
233
|
+
else:
|
|
234
|
+
with open(temp_output_path, "r") as f:
|
|
235
|
+
events_stream = get_events_as_a_stream(f, 'item', ocean.config.yield_items_to_parse_batch_size)
|
|
236
|
+
for items_bulk in events_stream:
|
|
237
|
+
yield items_bulk
|
|
238
|
+
|
|
239
|
+
except ValueError as e:
|
|
240
|
+
logger.error(f"Invalid jq expression: {e}")
|
|
241
|
+
yield []
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {e}")
|
|
244
|
+
yield []
|
|
245
|
+
finally:
|
|
246
|
+
# Cleanup temporary files
|
|
247
|
+
for temp_path in [temp_data_path, temp_output_path]:
|
|
248
|
+
if temp_path and os.path.exists(temp_path):
|
|
249
|
+
try:
|
|
250
|
+
os.remove(temp_path)
|
|
251
|
+
except OSError as e:
|
|
252
|
+
logger.warning(f"Failed to cleanup temporary file {temp_path}: {e}")
|
|
104
253
|
|
|
105
254
|
def unsupported_kind_response(
|
|
106
255
|
kind: str, available_resync_kinds: list[str]
|
|
@@ -108,6 +257,44 @@ def unsupported_kind_response(
|
|
|
108
257
|
logger.error(f"Kind {kind} is not supported in this integration")
|
|
109
258
|
return [], [KindNotImplementedException(kind, available_resync_kinds)]
|
|
110
259
|
|
|
260
|
+
async def _build_base_jq_object_string(raw_data: dict[Any, Any], base_jq: str, delete_target: str) -> str:
|
|
261
|
+
base_jq_object_before_parsing = await cast(JQEntityProcessor, ocean.app.integration.entity_processor)._search(raw_data, f"{base_jq} = {json.dumps("__all")}")
|
|
262
|
+
base_jq_object_before_parsing_serialized = json.dumps(base_jq_object_before_parsing)
|
|
263
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized[1:-1] if len(base_jq_object_before_parsing_serialized) >= 2 else base_jq_object_before_parsing_serialized
|
|
264
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized.replace("\"__all\"", f"(($all | del({delete_target})) // {{}})")
|
|
265
|
+
return base_jq_object_before_parsing_serialized
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def get_events_as_a_stream(
|
|
269
|
+
stream: Any,
|
|
270
|
+
target_items: str = "item",
|
|
271
|
+
max_buffer_size_mb: int = 1
|
|
272
|
+
) -> Generator[list[dict[str, Any]], None, None]:
|
|
273
|
+
events = ijson.sendable_list()
|
|
274
|
+
coro = ijson.items_coro(events, target_items)
|
|
275
|
+
|
|
276
|
+
# Convert MB to bytes for the buffer size
|
|
277
|
+
buffer_size = max_buffer_size_mb * 1024 * 1024
|
|
278
|
+
|
|
279
|
+
# Read chunks from the stream until exhausted
|
|
280
|
+
while True:
|
|
281
|
+
chunk = stream.read(buffer_size)
|
|
282
|
+
if not chunk: # End of stream
|
|
283
|
+
break
|
|
284
|
+
|
|
285
|
+
# Convert string to bytes if necessary (for text mode files)
|
|
286
|
+
if isinstance(chunk, str):
|
|
287
|
+
chunk = chunk.encode('utf-8')
|
|
288
|
+
|
|
289
|
+
coro.send(chunk)
|
|
290
|
+
yield events
|
|
291
|
+
del events[:]
|
|
292
|
+
try:
|
|
293
|
+
coro.close()
|
|
294
|
+
finally:
|
|
295
|
+
if events:
|
|
296
|
+
yield events
|
|
297
|
+
events[:] = []
|
|
111
298
|
|
|
112
299
|
class ProcessWrapper(multiprocessing.Process):
|
|
113
300
|
def __init__(self, *args, **kwargs):
|
|
@@ -134,3 +321,34 @@ def clear_http_client_context() -> None:
|
|
|
134
321
|
_port_http_client.pop()
|
|
135
322
|
except (RuntimeError, AttributeError):
|
|
136
323
|
pass
|
|
324
|
+
|
|
325
|
+
class _AiterReader:
|
|
326
|
+
"""
|
|
327
|
+
Wraps an iterable of byte chunks (e.g., response.iter_bytes())
|
|
328
|
+
and exposes a .read(n) method that ijson expects.
|
|
329
|
+
"""
|
|
330
|
+
def __init__(self, iterable):
|
|
331
|
+
self._iter = iter(iterable)
|
|
332
|
+
self._buf = bytearray()
|
|
333
|
+
self._eof = False
|
|
334
|
+
|
|
335
|
+
def read(self, n=-1):
|
|
336
|
+
# If n < 0, return everything until EOF
|
|
337
|
+
if n is None or n < 0:
|
|
338
|
+
chunks = [bytes(self._buf)]
|
|
339
|
+
self._buf.clear()
|
|
340
|
+
chunks.extend(self._iter) # drain the iterator
|
|
341
|
+
return b"".join(chunks)
|
|
342
|
+
|
|
343
|
+
# Fill buffer until we have n bytes or hit EOF
|
|
344
|
+
while len(self._buf) < n and not self._eof:
|
|
345
|
+
try:
|
|
346
|
+
self._buf.extend(next(self._iter))
|
|
347
|
+
except StopIteration:
|
|
348
|
+
self._eof = True
|
|
349
|
+
break
|
|
350
|
+
|
|
351
|
+
# Serve up to n bytes
|
|
352
|
+
out = bytes(self._buf[:n])
|
|
353
|
+
del self._buf[:n]
|
|
354
|
+
return out
|