port-ocean 0.30.7__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,14 @@
1
1
  import asyncio
2
- import json
3
2
  import multiprocessing
4
- import os
5
3
  import re
6
- import shutil
7
- import stat
8
- import subprocess
9
- import tempfile
10
4
  from contextlib import contextmanager
11
- from typing import Any, AsyncGenerator, Awaitable, Callable, Generator, cast
12
- import copy
5
+ from typing import Any, AsyncGenerator, Awaitable, Callable, Generator
13
6
 
14
7
  import ijson
15
8
  from loguru import logger
16
9
 
17
10
  from port_ocean.clients.port.utils import _http_client as _port_http_client
18
11
  from port_ocean.context.ocean import ocean
19
- from port_ocean.core.handlers.entity_processor.jq_entity_processor import JQEntityProcessor
20
12
  from port_ocean.core.ocean_types import (
21
13
  ASYNC_GENERATOR_RESYNC_TYPE,
22
14
  RAW_RESULT,
@@ -32,122 +24,6 @@ from port_ocean.exceptions.core import (
32
24
  from port_ocean.helpers.metric.metric import MetricType, MetricPhase
33
25
  from port_ocean.utils.async_http import _http_client
34
26
 
35
- def _process_path_type_items(
36
- result: RAW_RESULT, items_to_parse: str | None = None
37
- ) -> RAW_RESULT:
38
- """
39
- Process items in the result array to check for "__type": "path" fields.
40
- If found, read the file contents and load them into a "content" field.
41
- Skip processing if we're on the items_to_parse branch.
42
- """
43
- if not isinstance(result, list):
44
- return result
45
-
46
- # Skip processing if we're on the items_to_parse branch
47
- if items_to_parse:
48
- return result
49
-
50
- processed_result = []
51
- for item in result:
52
- if isinstance(item, dict) and item.get("__type") == "path":
53
- try:
54
- # Read the file content and parse as JSON
55
- file_path = item.get("file", {}).get("content", {}).get("path")
56
- if file_path and os.path.exists(file_path):
57
- with open(file_path, "r") as f:
58
- content = json.loads(f.read())
59
- # Create a copy of the item with the content field
60
- processed_item = item.copy()
61
- processed_item["file"]["content"] = content
62
- processed_result.append(processed_item)
63
- else:
64
- # If file doesn't exist, keep the original item
65
- processed_result.append(item)
66
- except (json.JSONDecodeError, IOError, OSError) as e:
67
- if isinstance(item, dict) and item.get("file") is not None:
68
- content = item["file"].get("content") if isinstance(item["file"].get("content"), dict) else {}
69
- data_path = content.get("path", None)
70
- logger.warning(
71
- f"Failed to read or parse file content for path {data_path}: {e}"
72
- )
73
- else:
74
- logger.warning(
75
- f"Failed to read or parse file content for unknown path: {e}. item: {json.dumps(item)}"
76
- )
77
- # Keep the original item if there's an error
78
- processed_result.append(item)
79
- else:
80
- # Keep non-path type items as is
81
- processed_result.append(item)
82
-
83
- return processed_result
84
-
85
- @contextmanager
86
- def resync_error_handling() -> Generator[None, None, None]:
87
- try:
88
- yield
89
- except RawObjectValidationException as error:
90
- err_msg = f"Failed to validate raw data for returned data from resync function, error: {error}"
91
- logger.exception(err_msg)
92
- raise OceanAbortException(err_msg) from error
93
- except StopAsyncIteration:
94
- raise
95
- except Exception as error:
96
- err_msg = f"Failed to execute resync function, error: {error}"
97
- logger.exception(err_msg)
98
- raise OceanAbortException(err_msg) from error
99
-
100
-
101
- async def resync_function_wrapper(
102
- fn: Callable[[str], Awaitable[RAW_RESULT]], kind: str, items_to_parse: str | None = None
103
- ) -> RAW_RESULT:
104
- with resync_error_handling():
105
- results = await fn(kind)
106
- validated_results = validate_result(results)
107
- return _process_path_type_items(validated_results, items_to_parse)
108
-
109
-
110
- async def resync_generator_wrapper(
111
- fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse_name: str, items_to_parse: str | None = None
112
- ) -> ASYNC_GENERATOR_RESYNC_TYPE:
113
- generator = fn(kind)
114
- errors = []
115
- try:
116
- while True:
117
- try:
118
- with resync_error_handling():
119
- result = await anext(generator)
120
- if not ocean.config.yield_items_to_parse:
121
- validated_result = validate_result(result)
122
- processed_result = _process_path_type_items(validated_result,items_to_parse)
123
- yield processed_result
124
- else:
125
- if items_to_parse:
126
- for data in result:
127
- data_path: str | None = None
128
- if isinstance(data, dict) and data.get("__type") == "path":
129
- content = data.get("file", {}).get("content") if isinstance(data["file"].get("content"), dict) else {}
130
- data_path = content.get("path", None)
131
- bulks = get_items_to_parse_bulks(data, data_path, items_to_parse, items_to_parse_name, data.get("__base_jq", ".file.content"))
132
- async for bulk in bulks:
133
- yield bulk
134
- else:
135
- validated_result = validate_result(result)
136
- processed_result = _process_path_type_items(validated_result, items_to_parse)
137
- yield processed_result
138
- except OceanAbortException as error:
139
- errors.append(error)
140
- ocean.metrics.inc_metric(
141
- name=MetricType.OBJECT_COUNT_NAME,
142
- labels=[ocean.metrics.current_resource_kind(), MetricPhase.EXTRACT , MetricPhase.ExtractResult.FAILED],
143
- value=1
144
- )
145
- except StopAsyncIteration:
146
- if errors:
147
- raise ExceptionGroup(
148
- "At least one of the resync generator iterations failed", errors
149
- )
150
-
151
27
  def extract_jq_deletion_path_revised(jq_expression: str) -> str | None:
152
28
  """
153
29
  Revised function to extract a simple path suitable for del() by analyzing pipe segments.
@@ -204,113 +80,90 @@ def extract_jq_deletion_path_revised(jq_expression: str) -> str | None:
204
80
  # Default case: No suitable path found after checking all segments
205
81
  return None
206
82
 
207
-
208
- def is_resource_supported(
209
- kind: str, resync_event_mapping: dict[str | None, list[RESYNC_EVENT_LISTENER]]
210
- ) -> bool:
211
- return bool(resync_event_mapping[kind] or resync_event_mapping[None])
212
-
213
- def _validate_jq_expression(expression: str) -> None:
214
- """Validate jq expression to prevent command injection."""
83
+ @contextmanager
84
+ def resync_error_handling() -> Generator[None, None, None]:
215
85
  try:
216
- _ = cast(JQEntityProcessor, ocean.app.integration.entity_processor)._compile(expression)
217
- except Exception as e:
218
- raise ValueError(f"Invalid jq expression: {e}") from e
219
- # Basic validation - reject expressions that could be dangerous
220
- # Check for dangerous patterns (include, import, module)
221
- dangerous_patterns = ['include', 'import', 'module', 'env', 'debug']
222
- for pattern in dangerous_patterns:
223
- # Use word boundary regex to match only complete words, not substrings
224
- if re.search(rf'\b{re.escape(pattern)}\b', expression):
225
- raise ValueError(f"Potentially dangerous pattern '{pattern}' found in jq expression")
226
-
227
- # Special handling for 'env' - block environment variable access
228
- if re.search(r'(?<!\w)\$ENV(?:\.)?', expression):
229
- raise ValueError("Environment variable access '$ENV.' found in jq expression")
230
- if re.search(r'\benv\.', expression):
231
- raise ValueError("Environment variable access 'env.' found in jq expression")
86
+ yield
87
+ except RawObjectValidationException as error:
88
+ err_msg = f"Failed to validate raw data for returned data from resync function, error: {error}"
89
+ logger.exception(err_msg)
90
+ raise OceanAbortException(err_msg) from error
91
+ except StopAsyncIteration:
92
+ raise
93
+ except Exception as error:
94
+ err_msg = f"Failed to execute resync function, error: {error}"
95
+ logger.exception(err_msg)
96
+ raise OceanAbortException(err_msg) from error
232
97
 
233
- def _create_secure_temp_file(suffix: str = ".json") -> str:
234
- """Create a secure temporary file with restricted permissions."""
235
- # Create temp directory if it doesn't exist
236
- temp_dir = "/tmp/ocean"
237
- os.makedirs(temp_dir, exist_ok=True)
238
98
 
239
- # Create temporary file with secure permissions
240
- fd, temp_path = tempfile.mkstemp(suffix=suffix, dir=temp_dir)
241
- try:
242
- # Set restrictive permissions (owner read/write only)
243
- os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR)
244
- return temp_path
245
- finally:
246
- os.close(fd)
99
+ async def resync_function_wrapper(
100
+ fn: Callable[[str], Awaitable[RAW_RESULT]], kind: str, items_to_parse: str | None = None
101
+ ) -> RAW_RESULT:
102
+ with resync_error_handling():
103
+ results = await fn(kind)
104
+ return validate_result(results)
247
105
 
248
- async def get_items_to_parse_bulks(raw_data: dict[Any, Any], data_path: str, items_to_parse: str, items_to_parse_name: str, base_jq: str) -> AsyncGenerator[list[dict[str, Any]], None]:
249
- # Validate inputs to prevent command injection
250
- _validate_jq_expression(items_to_parse)
251
- items_to_parse = items_to_parse.replace(base_jq, ".") if data_path else items_to_parse
106
+ async def handle_items_to_parse(result: RAW_RESULT, items_to_parse_name: str, items_to_parse: str | None = None) -> AsyncGenerator[list[dict[str, Any]], None]:
107
+ delete_target = extract_jq_deletion_path_revised(items_to_parse) or '.'
108
+ jq_expression = f". | del({delete_target})"
109
+ batch_size = ocean.config.yield_items_to_parse_batch_size
252
110
 
253
- temp_data_path = None
254
- temp_output_path = None
111
+ for item in result:
112
+ lean_item = await ocean.app.integration.entity_processor._search(item, jq_expression)
113
+ items_to_parse_data = await ocean.app.integration.entity_processor._search(item, items_to_parse)
114
+ if not isinstance(items_to_parse_data, list):
115
+ logger.warning(
116
+ f"Failed to parse items for JQ expression {items_to_parse}, Expected list but got {type(items_to_parse_data)}."
117
+ f" Skipping..."
118
+ )
119
+ continue
120
+ batch = []
121
+ while len(items_to_parse_data) > 0:
122
+ if (len(batch) >= batch_size):
123
+ yield batch
124
+ batch = []
125
+ merged_item = {**lean_item}
126
+ merged_item[items_to_parse_name] = items_to_parse_data.pop(0)
127
+ batch.append(merged_item)
128
+ if len(batch) > 0:
129
+ yield batch
255
130
 
131
+ async def resync_generator_wrapper(
132
+ fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse_name: str, items_to_parse: str | None = None
133
+ ) -> ASYNC_GENERATOR_RESYNC_TYPE:
134
+ generator = fn(kind)
135
+ errors = []
256
136
  try:
257
- is_path_type = True
258
- # Create secure temporary files
259
- if not data_path:
260
- raw_data_serialized = json.dumps(raw_data)
261
- temp_data_path = _create_secure_temp_file("_input.json")
262
- with open(temp_data_path, "w") as f:
263
- f.write(raw_data_serialized)
264
- data_path = temp_data_path
265
- is_path_type = False
266
-
267
- temp_output_path = _create_secure_temp_file("_parsed.json")
268
-
269
- delete_target = extract_jq_deletion_path_revised(items_to_parse) or '.'
270
-
271
- # Build jq expression safely
272
- jq_expression = f""". as $all
273
- | ($all | {items_to_parse}) as $items
274
- | $items
275
- | {_build_mapping_jq_expression(items_to_parse_name, base_jq, delete_target, is_path_type)}"""
137
+ while True:
138
+ try:
139
+ with resync_error_handling():
140
+ result = validate_result(await anext(generator))
276
141
 
277
- # Use subprocess with list arguments instead of shell=True
142
+ if items_to_parse:
143
+ async for batch in handle_items_to_parse(result, items_to_parse_name, items_to_parse):
144
+ yield batch
145
+ else:
146
+ yield result
278
147
 
279
- jq_path = shutil.which("jq") or "/bin/jq"
280
- jq_args = [jq_path, jq_expression, data_path]
281
148
 
282
- with open(temp_output_path, "w") as output_file:
283
- result = subprocess.run(
284
- jq_args,
285
- stdout=output_file,
286
- stderr=subprocess.PIPE,
287
- text=True,
288
- check=False # Don't raise exception, handle errors manually
149
+ except OceanAbortException as error:
150
+ errors.append(error)
151
+ ocean.metrics.inc_metric(
152
+ name=MetricType.OBJECT_COUNT_NAME,
153
+ labels=[ocean.metrics.current_resource_kind(), MetricPhase.EXTRACT , MetricPhase.ExtractResult.FAILED],
154
+ value=1
155
+ )
156
+ except StopAsyncIteration:
157
+ if errors:
158
+ raise ExceptionGroup(
159
+ "At least one of the resync generator iterations failed", errors
289
160
  )
290
161
 
291
- if result.returncode != 0:
292
- logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {result.stderr}")
293
- yield []
294
- else:
295
- with open(temp_output_path, "r") as f:
296
- events_stream = get_events_as_a_stream(f, 'item', ocean.config.yield_items_to_parse_batch_size)
297
- for items_bulk in events_stream:
298
- yield items_bulk if not is_path_type else [merge_raw_data_to_item(item, raw_data) for item in items_bulk]
299
162
 
300
- except ValueError as e:
301
- logger.error(f"Invalid jq expression: {e}")
302
- yield []
303
- except Exception as e:
304
- logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {e}")
305
- yield []
306
- finally:
307
- # Cleanup temporary files
308
- for temp_path in [temp_data_path, temp_output_path]:
309
- if temp_path and os.path.exists(temp_path):
310
- try:
311
- os.remove(temp_path)
312
- except OSError as e:
313
- logger.warning(f"Failed to cleanup temporary file {temp_path}: {e}")
163
+ def is_resource_supported(
164
+ kind: str, resync_event_mapping: dict[str | None, list[RESYNC_EVENT_LISTENER]]
165
+ ) -> bool:
166
+ return bool(resync_event_mapping[kind] or resync_event_mapping[None])
314
167
 
315
168
  def unsupported_kind_response(
316
169
  kind: str, available_resync_kinds: list[str]
@@ -318,89 +171,6 @@ def unsupported_kind_response(
318
171
  logger.error(f"Kind {kind} is not supported in this integration")
319
172
  return [], [KindNotImplementedException(kind, available_resync_kinds)]
320
173
 
321
- def _build_mapping_jq_expression(items_to_parse_name: str, base_jq: str, delete_target: str, is_path_type: bool = False) -> str:
322
- if is_path_type:
323
- return f"map({{{items_to_parse_name}: . }} | {base_jq} = (($all | del({delete_target})) // {{}}))"
324
- return f"map(($all | del({delete_target})) + {{{items_to_parse_name}: . }})"
325
-
326
- def merge_raw_data_to_item(item: dict[str, Any], raw_data: dict[str, Any]) -> dict[str, Any]:
327
- return recursive_dict_merge(raw_data, item)
328
-
329
- def recursive_dict_merge(d1: dict[Any, Any], d2: dict[Any, Any]) -> dict[Any, Any]:
330
- """
331
- Recursively merges dict d2 into dict d1.
332
-
333
- If a key exists in both dictionaries:
334
- 1. If the value in d2 is an empty dictionary (e.g., {}), it overwrites the value in d1.
335
- 2. If both values are non-empty dictionaries, they are merged recursively.
336
- 3. Otherwise, the value from d2 overwrites the value from d1.
337
-
338
- The original dictionaries are not modified (d1 is copied).
339
-
340
- Args:
341
- d1: The base dictionary (will be copied and modified).
342
- d2: The dictionary to merge into d1.
343
-
344
- Returns:
345
- The merged dictionary.
346
- """
347
- # Start with a copy of d1 to ensure d1 is not mutated
348
- merged_dict = copy.deepcopy(d1)
349
-
350
- for key, value in d2.items():
351
- # Condition to trigger recursive deep merge:
352
- # 1. Key exists in merged_dict
353
- # 2. Both values are dictionaries
354
- # 3. The value from d2 is NOT an empty dictionary ({}).
355
- # If d2's value is {}, we treat it as an explicit instruction to overwrite/clear.
356
- is_deep_merge = (
357
- key in merged_dict and
358
- isinstance(merged_dict[key], dict) and
359
- isinstance(value, dict) and
360
- value != {}
361
- )
362
-
363
- if is_deep_merge:
364
- # If both are dictionaries and d2 is not empty, recurse
365
- merged_dict[key] = recursive_dict_merge(merged_dict[key], value)
366
- else:
367
- # Otherwise (new key, non-dict value, or explicit {} overwrite),
368
- # overwrite the value from d1 with the value from d2
369
- merged_dict[key] = value
370
-
371
- return merged_dict
372
-
373
- def get_events_as_a_stream(
374
- stream: Any,
375
- target_items: str = "item",
376
- max_buffer_size_mb: int = 1
377
- ) -> Generator[list[dict[str, Any]], None, None]:
378
- events = ijson.sendable_list()
379
- coro = ijson.items_coro(events, target_items, use_float=True)
380
-
381
- # Convert MB to bytes for the buffer size
382
- buffer_size = max_buffer_size_mb * 1024 * 1024
383
-
384
- # Read chunks from the stream until exhausted
385
- while True:
386
- chunk = stream.read(buffer_size)
387
- if not chunk: # End of stream
388
- break
389
-
390
- # Convert string to bytes if necessary (for text mode files)
391
- if isinstance(chunk, str):
392
- chunk = chunk.encode('utf-8')
393
-
394
- coro.send(chunk)
395
- yield events
396
- del events[:]
397
- try:
398
- coro.close()
399
- finally:
400
- if events:
401
- yield events
402
- events[:] = []
403
-
404
174
  class ProcessWrapper(multiprocessing.Process):
405
175
  def __init__(self, *args, **kwargs):
406
176
  super().__init__(*args, **kwargs)