port-ocean 0.28.4__py3-none-any.whl → 0.28.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of port-ocean might be problematic. Click here for more details.
- integrations/_infra/Dockerfile.Deb +6 -1
- integrations/_infra/Dockerfile.local +1 -0
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +339 -17
- port_ocean/core/handlers/entity_processor/jq_input_evaluator.py +69 -0
- port_ocean/core/handlers/port_app_config/models.py +1 -1
- port_ocean/core/integrations/mixins/sync_raw.py +1 -1
- port_ocean/core/integrations/mixins/utils.py +235 -23
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +1 -1
- {port_ocean-0.28.4.dist-info → port_ocean-0.28.7.dist-info}/METADATA +1 -1
- {port_ocean-0.28.4.dist-info → port_ocean-0.28.7.dist-info}/RECORD +13 -12
- {port_ocean-0.28.4.dist-info → port_ocean-0.28.7.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.28.4.dist-info → port_ocean-0.28.7.dist-info}/WHEEL +0 -0
- {port_ocean-0.28.4.dist-info → port_ocean-0.28.7.dist-info}/entry_points.txt +0 -0
|
@@ -48,6 +48,7 @@ RUN apt-get update \
|
|
|
48
48
|
curl \
|
|
49
49
|
acl \
|
|
50
50
|
sudo \
|
|
51
|
+
jq \
|
|
51
52
|
&& apt-get clean
|
|
52
53
|
|
|
53
54
|
LABEL INTEGRATION_VERSION=${INTEGRATION_VERSION}
|
|
@@ -69,18 +70,22 @@ COPY --from=base /app/.venv /app/.venv
|
|
|
69
70
|
COPY ./integrations/_infra/init.sh /app/init.sh
|
|
70
71
|
|
|
71
72
|
USER root
|
|
73
|
+
|
|
72
74
|
# Ensure that ocean is available for all in path
|
|
73
75
|
RUN chmod a+x /app/.venv/bin/ocean
|
|
74
76
|
|
|
75
77
|
RUN chmod a+x /app/init.sh
|
|
76
78
|
RUN ln -s /app/.venv/bin/ocean /usr/bin/ocean
|
|
79
|
+
|
|
77
80
|
# Add ocean user to ssl certs group
|
|
78
|
-
RUN setfacl -m u:ocean:rwX /etc/ssl/certs
|
|
81
|
+
RUN setfacl -R -m u:ocean:rwX /etc/ssl/certs \
|
|
82
|
+
&& setfacl -d -m u:ocean:rwX /etc/ssl/certs
|
|
79
83
|
|
|
80
84
|
# Allow ocean user to run update-ca-certificates without password (secure, limited sudo)
|
|
81
85
|
RUN echo "ocean ALL=(root) NOPASSWD: /usr/sbin/update-ca-certificates" >> /etc/sudoers.d/ocean-certs \
|
|
82
86
|
&& chmod 440 /etc/sudoers.d/ocean-certs
|
|
83
87
|
|
|
84
88
|
USER ocean
|
|
89
|
+
|
|
85
90
|
# Run the application
|
|
86
91
|
CMD ["bash", "/app/init.sh"]
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from asyncio import Task
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
|
|
5
4
|
from functools import lru_cache
|
|
5
|
+
import json
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
import jq # type: ignore
|
|
8
8
|
from loguru import logger
|
|
9
|
-
|
|
10
9
|
from port_ocean.context.ocean import ocean
|
|
11
10
|
from port_ocean.core.handlers.entity_processor.base import BaseEntityProcessor
|
|
12
11
|
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
@@ -22,6 +21,11 @@ from port_ocean.core.utils.utils import (
|
|
|
22
21
|
)
|
|
23
22
|
from port_ocean.exceptions.core import EntityProcessorException
|
|
24
23
|
from port_ocean.utils.queue_utils import process_in_queue
|
|
24
|
+
from port_ocean.core.handlers.entity_processor.jq_input_evaluator import (
|
|
25
|
+
InputEvaluationResult,
|
|
26
|
+
evaluate_input,
|
|
27
|
+
should_shortcut_no_input,
|
|
28
|
+
)
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
class ExampleStates:
|
|
@@ -76,7 +80,7 @@ class MappedEntity:
|
|
|
76
80
|
|
|
77
81
|
entity: dict[str, Any] = field(default_factory=dict)
|
|
78
82
|
did_entity_pass_selector: bool = False
|
|
79
|
-
raw_data: Optional[dict[str, Any]] = None
|
|
83
|
+
raw_data: Optional[dict[str, Any] | tuple[dict[str, Any], str]] = None
|
|
80
84
|
misconfigurations: dict[str, str] = field(default_factory=dict)
|
|
81
85
|
|
|
82
86
|
|
|
@@ -133,17 +137,35 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
133
137
|
return await loop.run_in_executor(
|
|
134
138
|
None, self._stop_iterator_handler(func.first)
|
|
135
139
|
)
|
|
140
|
+
except Exception as exc:
|
|
141
|
+
logger.error(
|
|
142
|
+
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
143
|
+
)
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
@lru_cache
|
|
147
|
+
async def _search_stringified(self, data: str, pattern: str) -> Any:
|
|
148
|
+
try:
|
|
149
|
+
loop = asyncio.get_event_loop()
|
|
150
|
+
compiled_pattern = self._compile(pattern)
|
|
151
|
+
func = compiled_pattern.input_text(data)
|
|
152
|
+
return await loop.run_in_executor(
|
|
153
|
+
None, self._stop_iterator_handler(func.first)
|
|
154
|
+
)
|
|
136
155
|
except Exception as exc:
|
|
137
156
|
logger.debug(
|
|
138
157
|
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
139
158
|
)
|
|
140
159
|
return None
|
|
141
160
|
|
|
142
|
-
async def _search_as_bool(self, data: dict[str, Any], pattern: str) -> bool:
|
|
161
|
+
async def _search_as_bool(self, data: dict[str, Any] | str, pattern: str) -> bool:
|
|
143
162
|
loop = asyncio.get_event_loop()
|
|
144
163
|
|
|
145
164
|
compiled_pattern = self._compile(pattern)
|
|
146
|
-
|
|
165
|
+
if isinstance(data, str):
|
|
166
|
+
func = compiled_pattern.input_text(data)
|
|
167
|
+
else:
|
|
168
|
+
func = compiled_pattern.input_value(data)
|
|
147
169
|
|
|
148
170
|
value = await loop.run_in_executor(
|
|
149
171
|
None, self._stop_iterator_handler(func.first)
|
|
@@ -156,7 +178,7 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
156
178
|
|
|
157
179
|
async def _search_as_object(
|
|
158
180
|
self,
|
|
159
|
-
data: dict[str, Any],
|
|
181
|
+
data: dict[str, Any] | str,
|
|
160
182
|
obj: dict[str, Any],
|
|
161
183
|
misconfigurations: dict[str, str] | None = None,
|
|
162
184
|
) -> dict[str, Any | None]:
|
|
@@ -188,7 +210,12 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
188
210
|
self._search_as_object(data, value, misconfigurations)
|
|
189
211
|
)
|
|
190
212
|
else:
|
|
191
|
-
|
|
213
|
+
if isinstance(data, str):
|
|
214
|
+
search_tasks[key] = asyncio.create_task(
|
|
215
|
+
self._search_stringified(data, value)
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
search_tasks[key] = asyncio.create_task(self._search(data, value))
|
|
192
219
|
|
|
193
220
|
result: dict[str, Any | None] = {}
|
|
194
221
|
for key, task in search_tasks.items():
|
|
@@ -212,16 +239,18 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
212
239
|
|
|
213
240
|
async def _get_mapped_entity(
|
|
214
241
|
self,
|
|
215
|
-
data: dict[str, Any],
|
|
242
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
216
243
|
raw_entity_mappings: dict[str, Any],
|
|
244
|
+
items_to_parse_key: str | None,
|
|
217
245
|
selector_query: str,
|
|
218
246
|
parse_all: bool = False,
|
|
219
247
|
) -> MappedEntity:
|
|
220
|
-
should_run = await self.
|
|
248
|
+
should_run = await self._should_map_entity(
|
|
249
|
+
data, selector_query, items_to_parse_key
|
|
250
|
+
)
|
|
221
251
|
if parse_all or should_run:
|
|
222
|
-
misconfigurations
|
|
223
|
-
|
|
224
|
-
data, raw_entity_mappings, misconfigurations
|
|
252
|
+
misconfigurations, mapped_entity = await self._map_entity(
|
|
253
|
+
data, raw_entity_mappings, items_to_parse_key
|
|
225
254
|
)
|
|
226
255
|
return MappedEntity(
|
|
227
256
|
mapped_entity,
|
|
@@ -237,6 +266,69 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
237
266
|
misconfigurations={},
|
|
238
267
|
)
|
|
239
268
|
|
|
269
|
+
async def _map_entity(
|
|
270
|
+
self,
|
|
271
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
272
|
+
raw_entity_mappings: dict[str, Any],
|
|
273
|
+
items_to_parse_key: str | None,
|
|
274
|
+
) -> tuple[dict[str, str], dict[str, Any]]:
|
|
275
|
+
if not items_to_parse_key:
|
|
276
|
+
misconfigurations: dict[str, str] = {}
|
|
277
|
+
data_to_search = data if isinstance(data, dict) else data[0]
|
|
278
|
+
mapped_entity = await self._search_as_object(
|
|
279
|
+
data_to_search, raw_entity_mappings, misconfigurations
|
|
280
|
+
)
|
|
281
|
+
return misconfigurations, mapped_entity
|
|
282
|
+
|
|
283
|
+
modified_data: tuple[dict[str, Any], str | dict[str, Any]] = (
|
|
284
|
+
data
|
|
285
|
+
if isinstance(data, tuple)
|
|
286
|
+
else (
|
|
287
|
+
{items_to_parse_key: data[items_to_parse_key]},
|
|
288
|
+
data,
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
misconfigurations_item: dict[str, str] = {}
|
|
293
|
+
misconfigurations_all: dict[str, str] = {}
|
|
294
|
+
mapped_entity_item = await self._search_as_object(
|
|
295
|
+
modified_data[0], raw_entity_mappings["item"], misconfigurations_item
|
|
296
|
+
)
|
|
297
|
+
if misconfigurations_item:
|
|
298
|
+
filtered_item_mappings = self._filter_mappings_by_keys(
|
|
299
|
+
raw_entity_mappings["item"], list(misconfigurations_item.keys())
|
|
300
|
+
)
|
|
301
|
+
raw_entity_mappings["all"] = self._deep_merge(
|
|
302
|
+
raw_entity_mappings["all"], filtered_item_mappings
|
|
303
|
+
)
|
|
304
|
+
mapped_entity_all = await self._search_as_object(
|
|
305
|
+
modified_data[1], raw_entity_mappings["all"], misconfigurations_all
|
|
306
|
+
)
|
|
307
|
+
mapped_entity_empty = await self._search_as_object(
|
|
308
|
+
{}, raw_entity_mappings["empty"], misconfigurations_all
|
|
309
|
+
)
|
|
310
|
+
mapped_entity = self._deep_merge(mapped_entity_item, mapped_entity_all)
|
|
311
|
+
mapped_entity = self._deep_merge(mapped_entity, mapped_entity_empty)
|
|
312
|
+
return misconfigurations_all, mapped_entity
|
|
313
|
+
|
|
314
|
+
async def _should_map_entity(
|
|
315
|
+
self,
|
|
316
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
317
|
+
selector_query: str,
|
|
318
|
+
items_to_parse_key: str | None,
|
|
319
|
+
) -> bool:
|
|
320
|
+
if should_shortcut_no_input(selector_query):
|
|
321
|
+
return await self._search_as_bool({}, selector_query)
|
|
322
|
+
if isinstance(data, tuple):
|
|
323
|
+
return await self._search_as_bool(
|
|
324
|
+
data[0], selector_query
|
|
325
|
+
) or await self._search_as_bool(data[1], selector_query)
|
|
326
|
+
if items_to_parse_key:
|
|
327
|
+
return await self._search_as_bool(
|
|
328
|
+
data[items_to_parse_key], selector_query
|
|
329
|
+
) or await self._search_as_bool(data, selector_query)
|
|
330
|
+
return await self._search_as_bool(data, selector_query)
|
|
331
|
+
|
|
240
332
|
async def _calculate_entity(
|
|
241
333
|
self,
|
|
242
334
|
data: dict[str, Any],
|
|
@@ -246,9 +338,16 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
246
338
|
selector_query: str,
|
|
247
339
|
parse_all: bool = False,
|
|
248
340
|
) -> tuple[list[MappedEntity], list[Exception]]:
|
|
249
|
-
raw_data = [
|
|
250
|
-
|
|
251
|
-
|
|
341
|
+
raw_data: list[dict[str, Any]] | list[tuple[dict[str, Any], str]] = [
|
|
342
|
+
data.copy()
|
|
343
|
+
]
|
|
344
|
+
items_to_parse_key = None
|
|
345
|
+
if items_to_parse:
|
|
346
|
+
items_to_parse_key = items_to_parse_name
|
|
347
|
+
if not ocean.config.yield_items_to_parse:
|
|
348
|
+
if data.get("file", {}).get("content", {}).get("path", None):
|
|
349
|
+
with open(data["file"]["content"]["path"], "r") as f:
|
|
350
|
+
data["file"]["content"] = json.loads(f.read())
|
|
252
351
|
items = await self._search(data, items_to_parse)
|
|
253
352
|
if not isinstance(items, list):
|
|
254
353
|
logger.warning(
|
|
@@ -256,13 +355,28 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
256
355
|
f" Skipping..."
|
|
257
356
|
)
|
|
258
357
|
return [], []
|
|
259
|
-
|
|
358
|
+
raw_all_payload_stringified = json.dumps(data)
|
|
359
|
+
raw_data = [
|
|
360
|
+
({items_to_parse_name: item}, raw_all_payload_stringified)
|
|
361
|
+
for item in items
|
|
362
|
+
]
|
|
363
|
+
single_item_mappings, all_items_mappings, empty_items_mappings = (
|
|
364
|
+
self._build_raw_entity_mappings(
|
|
365
|
+
raw_entity_mappings, items_to_parse_name
|
|
366
|
+
)
|
|
367
|
+
)
|
|
368
|
+
raw_entity_mappings = {
|
|
369
|
+
"item": single_item_mappings,
|
|
370
|
+
"all": all_items_mappings,
|
|
371
|
+
"empty": empty_items_mappings,
|
|
372
|
+
}
|
|
260
373
|
|
|
261
374
|
entities, errors = await gather_and_split_errors_from_results(
|
|
262
375
|
[
|
|
263
376
|
self._get_mapped_entity(
|
|
264
377
|
raw,
|
|
265
378
|
raw_entity_mappings,
|
|
379
|
+
items_to_parse_key,
|
|
266
380
|
selector_query,
|
|
267
381
|
parse_all,
|
|
268
382
|
)
|
|
@@ -275,6 +389,201 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
275
389
|
)
|
|
276
390
|
return entities, errors
|
|
277
391
|
|
|
392
|
+
def _build_raw_entity_mappings(
|
|
393
|
+
self, raw_entity_mappings: dict[str, Any], items_to_parse_name: str
|
|
394
|
+
) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
|
|
395
|
+
"""Filter entity mappings to only include values that start with f'.{items_to_parse_name}'"""
|
|
396
|
+
mappings: dict[InputEvaluationResult, dict[str, Any]] = {
|
|
397
|
+
InputEvaluationResult.NONE: {},
|
|
398
|
+
InputEvaluationResult.SINGLE: {},
|
|
399
|
+
InputEvaluationResult.ALL: {},
|
|
400
|
+
}
|
|
401
|
+
pattern = f".{items_to_parse_name}"
|
|
402
|
+
for key, value in raw_entity_mappings.items():
|
|
403
|
+
if isinstance(value, str):
|
|
404
|
+
# Direct string values (identifier, title, icon, blueprint, team)
|
|
405
|
+
self.group_string_mapping_value(
|
|
406
|
+
pattern,
|
|
407
|
+
mappings,
|
|
408
|
+
key,
|
|
409
|
+
value,
|
|
410
|
+
)
|
|
411
|
+
elif isinstance(value, dict):
|
|
412
|
+
# Complex objects (IngestSearchQuery for identifier/team, properties, relations)
|
|
413
|
+
self.group_complex_mapping_value(
|
|
414
|
+
pattern,
|
|
415
|
+
mappings,
|
|
416
|
+
key,
|
|
417
|
+
value,
|
|
418
|
+
)
|
|
419
|
+
return (
|
|
420
|
+
mappings[InputEvaluationResult.SINGLE],
|
|
421
|
+
mappings[InputEvaluationResult.ALL],
|
|
422
|
+
mappings[InputEvaluationResult.NONE],
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
def group_complex_mapping_value(
|
|
426
|
+
self,
|
|
427
|
+
pattern: str,
|
|
428
|
+
mappings: dict[InputEvaluationResult, dict[str, Any]],
|
|
429
|
+
key: str,
|
|
430
|
+
value: dict[str, Any],
|
|
431
|
+
) -> None:
|
|
432
|
+
mapping_dicts: dict[InputEvaluationResult, dict[str, Any]] = {
|
|
433
|
+
InputEvaluationResult.SINGLE: {},
|
|
434
|
+
InputEvaluationResult.ALL: {},
|
|
435
|
+
InputEvaluationResult.NONE: {},
|
|
436
|
+
}
|
|
437
|
+
if key in ["properties", "relations"]:
|
|
438
|
+
# For properties and relations, filter the dictionary values
|
|
439
|
+
for dict_key, dict_value in value.items():
|
|
440
|
+
if isinstance(dict_value, str):
|
|
441
|
+
self.group_string_mapping_value(
|
|
442
|
+
pattern,
|
|
443
|
+
mapping_dicts,
|
|
444
|
+
dict_key,
|
|
445
|
+
dict_value,
|
|
446
|
+
)
|
|
447
|
+
elif isinstance(dict_value, dict):
|
|
448
|
+
# Handle IngestSearchQuery objects
|
|
449
|
+
self.group_search_query_mapping_value(
|
|
450
|
+
pattern,
|
|
451
|
+
mapping_dicts[InputEvaluationResult.SINGLE],
|
|
452
|
+
mapping_dicts[InputEvaluationResult.ALL],
|
|
453
|
+
dict_key,
|
|
454
|
+
dict_value,
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
# For identifier/team IngestSearchQuery objects
|
|
458
|
+
self.group_search_query_mapping_value(
|
|
459
|
+
pattern,
|
|
460
|
+
mapping_dicts[InputEvaluationResult.SINGLE],
|
|
461
|
+
mapping_dicts[InputEvaluationResult.ALL],
|
|
462
|
+
key,
|
|
463
|
+
value,
|
|
464
|
+
)
|
|
465
|
+
if mapping_dicts[InputEvaluationResult.SINGLE]:
|
|
466
|
+
mappings[InputEvaluationResult.SINGLE][key] = mapping_dicts[
|
|
467
|
+
InputEvaluationResult.SINGLE
|
|
468
|
+
][key]
|
|
469
|
+
if mapping_dicts[InputEvaluationResult.ALL]:
|
|
470
|
+
mappings[InputEvaluationResult.ALL][key] = mapping_dicts[
|
|
471
|
+
InputEvaluationResult.ALL
|
|
472
|
+
][key]
|
|
473
|
+
if mapping_dicts[InputEvaluationResult.NONE]:
|
|
474
|
+
mappings[InputEvaluationResult.NONE][key] = mapping_dicts[
|
|
475
|
+
InputEvaluationResult.NONE
|
|
476
|
+
][key]
|
|
477
|
+
|
|
478
|
+
def group_search_query_mapping_value(
|
|
479
|
+
self,
|
|
480
|
+
pattern: str,
|
|
481
|
+
single_item_dict: dict[str, Any],
|
|
482
|
+
all_item_dict: dict[str, Any],
|
|
483
|
+
dict_key: str,
|
|
484
|
+
dict_value: dict[str, Any],
|
|
485
|
+
) -> None:
|
|
486
|
+
if self._should_keep_ingest_search_query(dict_value, pattern):
|
|
487
|
+
single_item_dict[dict_key] = dict_value
|
|
488
|
+
else:
|
|
489
|
+
all_item_dict[dict_key] = dict_value
|
|
490
|
+
|
|
491
|
+
def group_string_mapping_value(
|
|
492
|
+
self,
|
|
493
|
+
pattern: str,
|
|
494
|
+
mappings: dict[InputEvaluationResult, dict[str, Any]],
|
|
495
|
+
key: str,
|
|
496
|
+
value: str,
|
|
497
|
+
) -> None:
|
|
498
|
+
input_evaluation_result = evaluate_input(value, pattern)
|
|
499
|
+
mappings[input_evaluation_result][key] = value
|
|
500
|
+
|
|
501
|
+
def _should_keep_ingest_search_query(
|
|
502
|
+
self, query_dict: dict[str, Any], pattern: str
|
|
503
|
+
) -> bool:
|
|
504
|
+
"""Check if an IngestSearchQuery should be kept based on its rules"""
|
|
505
|
+
if "rules" not in query_dict:
|
|
506
|
+
return False
|
|
507
|
+
|
|
508
|
+
rules = query_dict["rules"]
|
|
509
|
+
if not isinstance(rules, list):
|
|
510
|
+
return False
|
|
511
|
+
|
|
512
|
+
# Check if any rule contains a value starting with the pattern
|
|
513
|
+
for rule in rules:
|
|
514
|
+
if isinstance(rule, dict):
|
|
515
|
+
if "value" in rule and isinstance(rule["value"], str):
|
|
516
|
+
if pattern in rule["value"]:
|
|
517
|
+
return True
|
|
518
|
+
# Recursively check nested IngestSearchQuery objects
|
|
519
|
+
elif "rules" in rule:
|
|
520
|
+
if self._should_keep_ingest_search_query(rule, pattern):
|
|
521
|
+
return True
|
|
522
|
+
return False
|
|
523
|
+
|
|
524
|
+
def _filter_mappings_by_keys(
|
|
525
|
+
self, mappings: dict[str, Any], target_keys: list[str]
|
|
526
|
+
) -> dict[str, Any]:
|
|
527
|
+
"""
|
|
528
|
+
Filter mappings to preserve structure with only the specified keys present.
|
|
529
|
+
Recursively handles nested dictionaries and lists, searching for keys at any level.
|
|
530
|
+
"""
|
|
531
|
+
if not target_keys:
|
|
532
|
+
return {}
|
|
533
|
+
|
|
534
|
+
filtered_mappings: dict[str, Any] = {}
|
|
535
|
+
|
|
536
|
+
for key, value in mappings.items():
|
|
537
|
+
filtered_value = self._process_mapping_value(key, value, target_keys)
|
|
538
|
+
|
|
539
|
+
# Include if it's a direct match or contains nested target keys
|
|
540
|
+
if key in target_keys or filtered_value:
|
|
541
|
+
filtered_mappings[key] = filtered_value
|
|
542
|
+
|
|
543
|
+
return filtered_mappings
|
|
544
|
+
|
|
545
|
+
def _process_mapping_value(
|
|
546
|
+
self, key: str, value: Any, target_keys: list[str]
|
|
547
|
+
) -> Any:
|
|
548
|
+
"""Process a single mapping value, handling different types recursively."""
|
|
549
|
+
if isinstance(value, dict):
|
|
550
|
+
# Recursively filter nested dictionary
|
|
551
|
+
filtered_dict = self._filter_mappings_by_keys(value, target_keys)
|
|
552
|
+
return filtered_dict if filtered_dict else None
|
|
553
|
+
else:
|
|
554
|
+
# Return simple values as-is
|
|
555
|
+
return value if key in target_keys else None
|
|
556
|
+
|
|
557
|
+
def _deep_merge(
|
|
558
|
+
self, dict1: dict[str, Any], dict2: dict[str, Any]
|
|
559
|
+
) -> dict[str, Any]:
|
|
560
|
+
"""
|
|
561
|
+
Deep merge two dictionaries, preserving nested structures.
|
|
562
|
+
Values from dict2 override values from dict1 for the same keys.
|
|
563
|
+
"""
|
|
564
|
+
result = dict1.copy()
|
|
565
|
+
|
|
566
|
+
for key, value in dict2.items():
|
|
567
|
+
if (
|
|
568
|
+
key in result
|
|
569
|
+
and isinstance(result[key], dict)
|
|
570
|
+
and isinstance(value, dict)
|
|
571
|
+
):
|
|
572
|
+
# Recursively merge nested dictionaries
|
|
573
|
+
result[key] = self._deep_merge(result[key], value)
|
|
574
|
+
elif (
|
|
575
|
+
key in result
|
|
576
|
+
and isinstance(result[key], list)
|
|
577
|
+
and isinstance(value, list)
|
|
578
|
+
):
|
|
579
|
+
# Merge lists by extending
|
|
580
|
+
result[key].extend(value)
|
|
581
|
+
else:
|
|
582
|
+
# Override with value from dict2
|
|
583
|
+
result[key] = value
|
|
584
|
+
|
|
585
|
+
return result
|
|
586
|
+
|
|
278
587
|
@staticmethod
|
|
279
588
|
async def _send_examples(data: list[dict[str, Any]], kind: str) -> None:
|
|
280
589
|
try:
|
|
@@ -329,7 +638,10 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
329
638
|
and result.raw_data is not None
|
|
330
639
|
):
|
|
331
640
|
examples_to_send.add_example(
|
|
332
|
-
result.did_entity_pass_selector,
|
|
641
|
+
result.did_entity_pass_selector,
|
|
642
|
+
self._get_raw_data_for_example(
|
|
643
|
+
result.raw_data, mapping.port.items_to_parse_name
|
|
644
|
+
),
|
|
333
645
|
)
|
|
334
646
|
|
|
335
647
|
if result.entity.get("identifier") and result.entity.get("blueprint"):
|
|
@@ -355,3 +667,13 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
355
667
|
errors,
|
|
356
668
|
misconfigured_entity_keys=entity_misconfigurations,
|
|
357
669
|
)
|
|
670
|
+
|
|
671
|
+
def _get_raw_data_for_example(
|
|
672
|
+
self,
|
|
673
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
674
|
+
items_to_parse_name: str,
|
|
675
|
+
) -> dict[str, Any]:
|
|
676
|
+
if isinstance(data, tuple):
|
|
677
|
+
raw_data = json.loads(data[1])
|
|
678
|
+
return {items_to_parse_name: data[0], **raw_data}
|
|
679
|
+
return data
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class InputEvaluationResult(Enum):
|
|
6
|
+
NONE = 1
|
|
7
|
+
SINGLE = 2
|
|
8
|
+
ALL = 3
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Conservative allowlist: truly nullary jq expressions
|
|
12
|
+
_ALLOWLIST_PATTERNS = [
|
|
13
|
+
r"^\s*null\s*$", # null
|
|
14
|
+
r"^\s*true\s*$", # true
|
|
15
|
+
r"^\s*false\s*$", # false
|
|
16
|
+
r"^\s*-?\d+(\.\d+)?\s*$", # number literal
|
|
17
|
+
r'^\s*".*"\s*$', # string literal (simple heuristic)
|
|
18
|
+
r"^\s*\[.*\]\s*$", # array literal (includes [])
|
|
19
|
+
r"^\s*\{.*\}\s*$", # object literal (includes {})
|
|
20
|
+
r"^\s*range\s*\(.*\)\s*$", # range(...)
|
|
21
|
+
r"^\s*empty\s*$", # empty
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Functions/filters that (even without ".") still require/assume input
|
|
25
|
+
_INPUT_DEPENDENT_FUNCS = r"""
|
|
26
|
+
\b(
|
|
27
|
+
map|select|reverse|sort|sort_by|unique|unique_by|group_by|flatten|transpose|
|
|
28
|
+
split|explode|join|add|length|has|in|index|indices|contains|
|
|
29
|
+
paths|leaf_paths|keys|keys_unsorted|values|to_entries|with_entries|from_entries|
|
|
30
|
+
del|delpaths|walk|reduce|foreach|input|inputs|limit|first|last|nth|
|
|
31
|
+
while|until|recurse|recurse_down|bsearch|combinations|permutations
|
|
32
|
+
)\b
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
_INPUT_DEPENDENT_RE = re.compile(_INPUT_DEPENDENT_FUNCS, re.VERBOSE)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def should_shortcut_no_input(selector_query: str) -> bool:
|
|
39
|
+
"""
|
|
40
|
+
Returns True if the jq expression can be executed without providing any JSON input.
|
|
41
|
+
Conservative: requires NO '.' and must match a known nullary-safe pattern.
|
|
42
|
+
"""
|
|
43
|
+
if "." in selector_query:
|
|
44
|
+
return False # explicit JSON reference -> needs input
|
|
45
|
+
|
|
46
|
+
# If it contains any known input-dependent functions, don't shortcut
|
|
47
|
+
if _INPUT_DEPENDENT_RE.search(selector_query):
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
# Allow only if it matches one of the nullary-safe patterns
|
|
51
|
+
for pat in _ALLOWLIST_PATTERNS:
|
|
52
|
+
if re.match(pat, selector_query):
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def evaluate_input(
|
|
59
|
+
selector_query: str, single_item_key: str | None = None
|
|
60
|
+
) -> InputEvaluationResult:
|
|
61
|
+
"""
|
|
62
|
+
Returns the input evaluation result for the jq expression.
|
|
63
|
+
Conservative: requires NO '.' and must match a known nullary-safe pattern.
|
|
64
|
+
"""
|
|
65
|
+
if should_shortcut_no_input(selector_query):
|
|
66
|
+
return InputEvaluationResult.NONE
|
|
67
|
+
if single_item_key and single_item_key in selector_query:
|
|
68
|
+
return InputEvaluationResult.SINGLE
|
|
69
|
+
return InputEvaluationResult.ALL
|
|
@@ -39,7 +39,7 @@ class MappingsConfig(BaseModel):
|
|
|
39
39
|
class PortResourceConfig(BaseModel):
|
|
40
40
|
entity: MappingsConfig
|
|
41
41
|
items_to_parse: str | None = Field(alias="itemsToParse")
|
|
42
|
-
items_to_parse_name: str
|
|
42
|
+
items_to_parse_name: str = Field(alias="itemsToParseName", default="item")
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class Selector(BaseModel):
|
|
@@ -117,7 +117,7 @@ class SyncRawMixin(HandlerMixin, EventsMixin):
|
|
|
117
117
|
logger.info(
|
|
118
118
|
f"Found async generator function for {resource_config.kind} name: {task.__qualname__}"
|
|
119
119
|
)
|
|
120
|
-
results.append(resync_generator_wrapper(task, resource_config.kind,resource_config.port.items_to_parse))
|
|
120
|
+
results.append(resync_generator_wrapper(task, resource_config.kind, resource_config.port.items_to_parse_name, resource_config.port.items_to_parse))
|
|
121
121
|
else:
|
|
122
122
|
logger.info(
|
|
123
123
|
f"Found sync function for {resource_config.kind} name: {task.__qualname__}"
|
|
@@ -2,12 +2,11 @@ from contextlib import contextmanager
|
|
|
2
2
|
from typing import Awaitable, Generator, Callable, cast
|
|
3
3
|
|
|
4
4
|
from loguru import logger
|
|
5
|
-
|
|
6
5
|
import asyncio
|
|
7
6
|
import multiprocessing
|
|
8
|
-
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
9
|
from port_ocean.core.handlers.entity_processor.jq_entity_processor import JQEntityProcessor
|
|
10
|
-
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
11
10
|
from port_ocean.core.ocean_types import (
|
|
12
11
|
ASYNC_GENERATOR_RESYNC_TYPE,
|
|
13
12
|
RAW_RESULT,
|
|
@@ -20,11 +19,60 @@ from port_ocean.exceptions.core import (
|
|
|
20
19
|
OceanAbortException,
|
|
21
20
|
KindNotImplementedException,
|
|
22
21
|
)
|
|
23
|
-
|
|
22
|
+
import os
|
|
24
23
|
from port_ocean.utils.async_http import _http_client
|
|
25
24
|
from port_ocean.clients.port.utils import _http_client as _port_http_client
|
|
26
25
|
from port_ocean.helpers.metric.metric import MetricType, MetricPhase
|
|
27
26
|
from port_ocean.context.ocean import ocean
|
|
27
|
+
import subprocess
|
|
28
|
+
import tempfile
|
|
29
|
+
import stat
|
|
30
|
+
import ijson
|
|
31
|
+
from typing import Any, AsyncGenerator
|
|
32
|
+
|
|
33
|
+
def _process_path_type_items(
|
|
34
|
+
result: RAW_RESULT, items_to_parse: str | None = None
|
|
35
|
+
) -> RAW_RESULT:
|
|
36
|
+
"""
|
|
37
|
+
Process items in the result array to check for "__type": "path" fields.
|
|
38
|
+
If found, read the file contents and load them into a "content" field.
|
|
39
|
+
Skip processing if we're on the items_to_parse branch.
|
|
40
|
+
"""
|
|
41
|
+
if not isinstance(result, list):
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
# Skip processing if we're on the items_to_parse branch
|
|
45
|
+
if items_to_parse:
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
processed_result = []
|
|
49
|
+
for item in result:
|
|
50
|
+
if isinstance(item, dict) and item.get("__type") == "path":
|
|
51
|
+
try:
|
|
52
|
+
# Read the file content and parse as JSON
|
|
53
|
+
file_path = item.get("file", {}).get("content", {}).get("path")
|
|
54
|
+
if file_path and os.path.exists(file_path):
|
|
55
|
+
with open(file_path, "r") as f:
|
|
56
|
+
content = json.loads(f.read())
|
|
57
|
+
# Create a copy of the item with the content field
|
|
58
|
+
processed_item = item.copy()
|
|
59
|
+
processed_item["content"] = content
|
|
60
|
+
processed_result.append(processed_item)
|
|
61
|
+
else:
|
|
62
|
+
# If file doesn't exist, keep the original item
|
|
63
|
+
processed_result.append(item)
|
|
64
|
+
except (json.JSONDecodeError, IOError, OSError) as e:
|
|
65
|
+
logger.warning(
|
|
66
|
+
f"Failed to read or parse file content for path "
|
|
67
|
+
f"{item.get('file', {}).get('content', {}).get('path')}: {e}"
|
|
68
|
+
)
|
|
69
|
+
# Keep the original item if there's an error
|
|
70
|
+
processed_result.append(item)
|
|
71
|
+
else:
|
|
72
|
+
# Keep non-path type items as is
|
|
73
|
+
processed_result.append(item)
|
|
74
|
+
|
|
75
|
+
return processed_result
|
|
28
76
|
|
|
29
77
|
@contextmanager
|
|
30
78
|
def resync_error_handling() -> Generator[None, None, None]:
|
|
@@ -47,11 +95,12 @@ async def resync_function_wrapper(
|
|
|
47
95
|
) -> RAW_RESULT:
|
|
48
96
|
with resync_error_handling():
|
|
49
97
|
results = await fn(kind)
|
|
50
|
-
|
|
98
|
+
validated_results = validate_result(results)
|
|
99
|
+
return _process_path_type_items(validated_results)
|
|
51
100
|
|
|
52
101
|
|
|
53
102
|
async def resync_generator_wrapper(
|
|
54
|
-
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse: str | None = None
|
|
103
|
+
fn: Callable[[str], ASYNC_GENERATOR_RESYNC_TYPE], kind: str, items_to_parse_name: str, items_to_parse: str | None = None
|
|
55
104
|
) -> ASYNC_GENERATOR_RESYNC_TYPE:
|
|
56
105
|
generator = fn(kind)
|
|
57
106
|
errors = []
|
|
@@ -61,27 +110,23 @@ async def resync_generator_wrapper(
|
|
|
61
110
|
with resync_error_handling():
|
|
62
111
|
result = await anext(generator)
|
|
63
112
|
if not ocean.config.yield_items_to_parse:
|
|
64
|
-
|
|
113
|
+
validated_result = validate_result(result)
|
|
114
|
+
processed_result = _process_path_type_items(validated_result)
|
|
115
|
+
yield processed_result
|
|
65
116
|
else:
|
|
66
|
-
batch_size = ocean.config.yield_items_to_parse_batch_size
|
|
67
117
|
if items_to_parse:
|
|
68
118
|
for data in result:
|
|
69
|
-
|
|
70
|
-
if
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
yield
|
|
76
|
-
raw_data = [{"item": item, **data} for item in items]
|
|
77
|
-
while True:
|
|
78
|
-
raw_data_batch = raw_data[:batch_size]
|
|
79
|
-
yield raw_data_batch
|
|
80
|
-
raw_data = raw_data[batch_size:]
|
|
81
|
-
if len(raw_data) == 0:
|
|
82
|
-
break
|
|
119
|
+
data_path: str | None = None
|
|
120
|
+
if isinstance(data, dict) and data.get("file") is not None:
|
|
121
|
+
content = data["file"].get("content") if isinstance(data["file"].get("content"), dict) else {}
|
|
122
|
+
data_path = content.get("path", None)
|
|
123
|
+
bulks = get_items_to_parse_bulks(data, data_path, items_to_parse, items_to_parse_name, data.get("__base_jq", ".file.content"))
|
|
124
|
+
async for bulk in bulks:
|
|
125
|
+
yield bulk
|
|
83
126
|
else:
|
|
84
|
-
|
|
127
|
+
validated_result = validate_result(result)
|
|
128
|
+
processed_result = _process_path_type_items(validated_result, items_to_parse)
|
|
129
|
+
yield processed_result
|
|
85
130
|
except OceanAbortException as error:
|
|
86
131
|
errors.append(error)
|
|
87
132
|
ocean.metrics.inc_metric(
|
|
@@ -101,6 +146,104 @@ def is_resource_supported(
|
|
|
101
146
|
) -> bool:
|
|
102
147
|
return bool(resync_event_mapping[kind] or resync_event_mapping[None])
|
|
103
148
|
|
|
149
|
+
def _validate_jq_expression(expression: str) -> None:
|
|
150
|
+
"""Validate jq expression to prevent command injection."""
|
|
151
|
+
try:
|
|
152
|
+
_ = cast(JQEntityProcessor, ocean.app.integration.entity_processor)._compile(expression)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
raise ValueError(f"Invalid jq expression: {e}") from e
|
|
155
|
+
# Basic validation - reject expressions that could be dangerous
|
|
156
|
+
# Check for dangerous patterns (include, import, module)
|
|
157
|
+
dangerous_patterns = ['include', 'import', 'module', 'env']
|
|
158
|
+
for pattern in dangerous_patterns:
|
|
159
|
+
# Use word boundary regex to match only complete words, not substrings
|
|
160
|
+
if re.search(rf'\b{re.escape(pattern)}\b', expression):
|
|
161
|
+
raise ValueError(f"Potentially dangerous pattern '{pattern}' found in jq expression")
|
|
162
|
+
|
|
163
|
+
# Special handling for 'env' - block environment variable access
|
|
164
|
+
if re.search(r'(?<!\w)\$ENV(?:\.)?', expression):
|
|
165
|
+
raise ValueError("Environment variable access '$ENV.' found in jq expression")
|
|
166
|
+
if re.search(r'\benv\.', expression):
|
|
167
|
+
raise ValueError("Environment variable access 'env.' found in jq expression")
|
|
168
|
+
|
|
169
|
+
def _create_secure_temp_file(suffix: str = ".json") -> str:
|
|
170
|
+
"""Create a secure temporary file with restricted permissions."""
|
|
171
|
+
# Create temp directory if it doesn't exist
|
|
172
|
+
temp_dir = "/tmp/ocean"
|
|
173
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
174
|
+
|
|
175
|
+
# Create temporary file with secure permissions
|
|
176
|
+
fd, temp_path = tempfile.mkstemp(suffix=suffix, dir=temp_dir)
|
|
177
|
+
try:
|
|
178
|
+
# Set restrictive permissions (owner read/write only)
|
|
179
|
+
os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR)
|
|
180
|
+
return temp_path
|
|
181
|
+
finally:
|
|
182
|
+
os.close(fd)
|
|
183
|
+
|
|
184
|
+
async def get_items_to_parse_bulks(raw_data: dict[Any, Any], data_path: str, items_to_parse: str, items_to_parse_name: str, base_jq: str) -> AsyncGenerator[list[dict[str, Any]], None]:
|
|
185
|
+
# Validate inputs to prevent command injection
|
|
186
|
+
_validate_jq_expression(items_to_parse)
|
|
187
|
+
items_to_parse = items_to_parse.replace(base_jq, ".") if data_path else items_to_parse
|
|
188
|
+
|
|
189
|
+
temp_data_path = None
|
|
190
|
+
temp_output_path = None
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
# Create secure temporary files
|
|
194
|
+
if not data_path:
|
|
195
|
+
raw_data_serialized = json.dumps(raw_data)
|
|
196
|
+
temp_data_path = _create_secure_temp_file("_input.json")
|
|
197
|
+
with open(temp_data_path, "w") as f:
|
|
198
|
+
f.write(raw_data_serialized)
|
|
199
|
+
data_path = temp_data_path
|
|
200
|
+
|
|
201
|
+
temp_output_path = _create_secure_temp_file("_parsed.json")
|
|
202
|
+
|
|
203
|
+
delete_target = items_to_parse.split('|', 1)[0].strip() if not items_to_parse.startswith('map(') else '.'
|
|
204
|
+
base_jq_object_string = await _build_base_jq_object_string(raw_data, base_jq, delete_target)
|
|
205
|
+
|
|
206
|
+
# Build jq expression safely
|
|
207
|
+
jq_expression = f""". as $all
|
|
208
|
+
| ($all | {items_to_parse}) as $items
|
|
209
|
+
| $items
|
|
210
|
+
| map({{{items_to_parse_name}: ., {base_jq_object_string}}})"""
|
|
211
|
+
|
|
212
|
+
# Use subprocess with list arguments instead of shell=True
|
|
213
|
+
jq_args = ["/bin/jq", jq_expression, data_path]
|
|
214
|
+
|
|
215
|
+
with open(temp_output_path, "w") as output_file:
|
|
216
|
+
result = subprocess.run(
|
|
217
|
+
jq_args,
|
|
218
|
+
stdout=output_file,
|
|
219
|
+
stderr=subprocess.PIPE,
|
|
220
|
+
text=True,
|
|
221
|
+
check=False # Don't raise exception, handle errors manually
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if result.returncode != 0:
|
|
225
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {result.stderr}")
|
|
226
|
+
yield []
|
|
227
|
+
else:
|
|
228
|
+
with open(temp_output_path, "r") as f:
|
|
229
|
+
events_stream = get_events_as_a_stream(f, 'item', ocean.config.yield_items_to_parse_batch_size)
|
|
230
|
+
for items_bulk in events_stream:
|
|
231
|
+
yield items_bulk
|
|
232
|
+
|
|
233
|
+
except ValueError as e:
|
|
234
|
+
logger.error(f"Invalid jq expression: {e}")
|
|
235
|
+
yield []
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(f"Failed to parse items for JQ expression {items_to_parse}, error: {e}")
|
|
238
|
+
yield []
|
|
239
|
+
finally:
|
|
240
|
+
# Cleanup temporary files
|
|
241
|
+
for temp_path in [temp_data_path, temp_output_path]:
|
|
242
|
+
if temp_path and os.path.exists(temp_path):
|
|
243
|
+
try:
|
|
244
|
+
os.remove(temp_path)
|
|
245
|
+
except OSError as e:
|
|
246
|
+
logger.warning(f"Failed to cleanup temporary file {temp_path}: {e}")
|
|
104
247
|
|
|
105
248
|
def unsupported_kind_response(
|
|
106
249
|
kind: str, available_resync_kinds: list[str]
|
|
@@ -108,6 +251,44 @@ def unsupported_kind_response(
|
|
|
108
251
|
logger.error(f"Kind {kind} is not supported in this integration")
|
|
109
252
|
return [], [KindNotImplementedException(kind, available_resync_kinds)]
|
|
110
253
|
|
|
254
|
+
async def _build_base_jq_object_string(raw_data: dict[Any, Any], base_jq: str, delete_target: str) -> str:
|
|
255
|
+
base_jq_object_before_parsing = await cast(JQEntityProcessor, ocean.app.integration.entity_processor)._search(raw_data, f"{base_jq} = {json.dumps("__all")}")
|
|
256
|
+
base_jq_object_before_parsing_serialized = json.dumps(base_jq_object_before_parsing)
|
|
257
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized[1:-1] if len(base_jq_object_before_parsing_serialized) >= 2 else base_jq_object_before_parsing_serialized
|
|
258
|
+
base_jq_object_before_parsing_serialized = base_jq_object_before_parsing_serialized.replace("\"__all\"", f"(($all | del({delete_target})) // {{}})")
|
|
259
|
+
return base_jq_object_before_parsing_serialized
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def get_events_as_a_stream(
|
|
263
|
+
stream: Any,
|
|
264
|
+
target_items: str = "item",
|
|
265
|
+
max_buffer_size_mb: int = 1
|
|
266
|
+
) -> Generator[list[dict[str, Any]], None, None]:
|
|
267
|
+
events = ijson.sendable_list()
|
|
268
|
+
coro = ijson.items_coro(events, target_items)
|
|
269
|
+
|
|
270
|
+
# Convert MB to bytes for the buffer size
|
|
271
|
+
buffer_size = max_buffer_size_mb * 1024 * 1024
|
|
272
|
+
|
|
273
|
+
# Read chunks from the stream until exhausted
|
|
274
|
+
while True:
|
|
275
|
+
chunk = stream.read(buffer_size)
|
|
276
|
+
if not chunk: # End of stream
|
|
277
|
+
break
|
|
278
|
+
|
|
279
|
+
# Convert string to bytes if necessary (for text mode files)
|
|
280
|
+
if isinstance(chunk, str):
|
|
281
|
+
chunk = chunk.encode('utf-8')
|
|
282
|
+
|
|
283
|
+
coro.send(chunk)
|
|
284
|
+
yield events
|
|
285
|
+
del events[:]
|
|
286
|
+
try:
|
|
287
|
+
coro.close()
|
|
288
|
+
finally:
|
|
289
|
+
if events:
|
|
290
|
+
yield events
|
|
291
|
+
events[:] = []
|
|
111
292
|
|
|
112
293
|
class ProcessWrapper(multiprocessing.Process):
|
|
113
294
|
def __init__(self, *args, **kwargs):
|
|
@@ -134,3 +315,34 @@ def clear_http_client_context() -> None:
|
|
|
134
315
|
_port_http_client.pop()
|
|
135
316
|
except (RuntimeError, AttributeError):
|
|
136
317
|
pass
|
|
318
|
+
|
|
319
|
+
class _AiterReader:
|
|
320
|
+
"""
|
|
321
|
+
Wraps an iterable of byte chunks (e.g., response.iter_bytes())
|
|
322
|
+
and exposes a .read(n) method that ijson expects.
|
|
323
|
+
"""
|
|
324
|
+
def __init__(self, iterable):
|
|
325
|
+
self._iter = iter(iterable)
|
|
326
|
+
self._buf = bytearray()
|
|
327
|
+
self._eof = False
|
|
328
|
+
|
|
329
|
+
def read(self, n=-1):
|
|
330
|
+
# If n < 0, return everything until EOF
|
|
331
|
+
if n is None or n < 0:
|
|
332
|
+
chunks = [bytes(self._buf)]
|
|
333
|
+
self._buf.clear()
|
|
334
|
+
chunks.extend(self._iter) # drain the iterator
|
|
335
|
+
return b"".join(chunks)
|
|
336
|
+
|
|
337
|
+
# Fill buffer until we have n bytes or hit EOF
|
|
338
|
+
while len(self._buf) < n and not self._eof:
|
|
339
|
+
try:
|
|
340
|
+
self._buf.extend(next(self._iter))
|
|
341
|
+
except StopIteration:
|
|
342
|
+
self._eof = True
|
|
343
|
+
break
|
|
344
|
+
|
|
345
|
+
# Serve up to n bytes
|
|
346
|
+
out = bytes(self._buf[:n])
|
|
347
|
+
del self._buf[:n]
|
|
348
|
+
return out
|
|
@@ -50,7 +50,7 @@ class TestJQEntityProcessor:
|
|
|
50
50
|
raw_entity_mappings = {"foo": ".foo"}
|
|
51
51
|
selector_query = '.foo == "bar"'
|
|
52
52
|
result = await mocked_processor._get_mapped_entity(
|
|
53
|
-
data, raw_entity_mappings, selector_query
|
|
53
|
+
data, raw_entity_mappings, None, selector_query
|
|
54
54
|
)
|
|
55
55
|
assert result.entity == {"foo": "bar"}
|
|
56
56
|
assert result.did_entity_pass_selector is True
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
integrations/_infra/Dockerfile.Deb,sha256=
|
|
1
|
+
integrations/_infra/Dockerfile.Deb,sha256=ZqAg-p3GbLaneWS0sIcUDHp1FLwLoxHLvsKT5H8sCLc,2562
|
|
2
2
|
integrations/_infra/Dockerfile.alpine,sha256=7E4Sb-8supsCcseerHwTkuzjHZoYcaHIyxiBZ-wewo0,3482
|
|
3
3
|
integrations/_infra/Dockerfile.base.builder,sha256=ESe1PKC6itp_AuXawbLI75k1Kruny6NTANaTinxOgVs,743
|
|
4
4
|
integrations/_infra/Dockerfile.base.runner,sha256=uAcs2IsxrAAUHGXt_qULA5INr-HFguf5a5fCKiqEzbY,384
|
|
5
5
|
integrations/_infra/Dockerfile.dockerignore,sha256=CM1Fxt3I2AvSvObuUZRmy5BNLSGC7ylnbpWzFgD4cso,1163
|
|
6
|
-
integrations/_infra/Dockerfile.local,sha256=
|
|
6
|
+
integrations/_infra/Dockerfile.local,sha256=yLkNs8AB1QMsSXyb2OOo0F8cPXeNF9bb2pzAt2d9fZ8,1663
|
|
7
7
|
integrations/_infra/Makefile,sha256=YgLKvuF_Dw4IA7X98Nus6zIW_3cJ60M1QFGs3imj5c4,2430
|
|
8
8
|
integrations/_infra/README.md,sha256=ZtJFSMCTU5zTeM8ddRuW1ZL1ga8z7Ic2F3mxmgOSjgo,1195
|
|
9
9
|
integrations/_infra/entry_local.sh,sha256=Sn2TexTEpruH2ixIAGsk-fZV6Y7pT3jd2Pi9TxBeFuw,633
|
|
@@ -101,11 +101,12 @@ port_ocean/core/handlers/entities_state_applier/port/get_related_entities.py,sha
|
|
|
101
101
|
port_ocean/core/handlers/entities_state_applier/port/order_by_entities_dependencies.py,sha256=lyv6xKzhYfd6TioUgR3AVRSJqj7JpAaj1LxxU2xAqeo,1720
|
|
102
102
|
port_ocean/core/handlers/entity_processor/__init__.py,sha256=FvFCunFg44wNQoqlybem9MthOs7p1Wawac87uSXz9U8,156
|
|
103
103
|
port_ocean/core/handlers/entity_processor/base.py,sha256=PsnpNRqjHth9xwOvDRe7gKu8cjnVV0XGmTIHGvOelX0,1867
|
|
104
|
-
port_ocean/core/handlers/entity_processor/jq_entity_processor.py,sha256=
|
|
104
|
+
port_ocean/core/handlers/entity_processor/jq_entity_processor.py,sha256=yMymDTiEvIvqA01yB08A6A1_cPH0Pnp1zs0O1v38RJQ,25730
|
|
105
|
+
port_ocean/core/handlers/entity_processor/jq_input_evaluator.py,sha256=fcCt35pPi-Myv27ZI_HNhCoqvntU2sVWM2aGJ7yrkHQ,2300
|
|
105
106
|
port_ocean/core/handlers/port_app_config/__init__.py,sha256=8AAT5OthiVM7KCcM34iEgEeXtn2pRMrT4Dze5r1Ixbk,134
|
|
106
107
|
port_ocean/core/handlers/port_app_config/api.py,sha256=r_Th66NEw38IpRdnXZcRvI8ACfvxW_A6V62WLwjWXlQ,1044
|
|
107
108
|
port_ocean/core/handlers/port_app_config/base.py,sha256=Sup4-X_a7JGa27rMy_OgqGIjFHMlKBpKevicaK3AeHU,2919
|
|
108
|
-
port_ocean/core/handlers/port_app_config/models.py,sha256=
|
|
109
|
+
port_ocean/core/handlers/port_app_config/models.py,sha256=SMeId2M5Po3_NbJi1mGMduz3VaV9DY20O0tIqBAxEZw,3012
|
|
109
110
|
port_ocean/core/handlers/queue/__init__.py,sha256=yzgicE_jAR1wtljFKxgyG6j-HbLcG_Zze5qw1kkALUI,171
|
|
110
111
|
port_ocean/core/handlers/queue/abstract_queue.py,sha256=SaivrYbqg8qsX6wtQlJZyxgcbdMD5B9NZG3byN9AvrI,782
|
|
111
112
|
port_ocean/core/handlers/queue/group_queue.py,sha256=JvvJOwz9z_aI4CjPr7yQX-0rOgqLI5wMdxWk2x5x-34,4989
|
|
@@ -123,8 +124,8 @@ port_ocean/core/integrations/mixins/events.py,sha256=2L7P3Jhp8XBqddh2_o9Cn4N261n
|
|
|
123
124
|
port_ocean/core/integrations/mixins/handler.py,sha256=mZ7-0UlG3LcrwJttFbMe-R4xcOU2H_g33tZar7PwTv8,3771
|
|
124
125
|
port_ocean/core/integrations/mixins/live_events.py,sha256=zM24dhNc7uHx9XYZ6toVhDADPA90EnpOmZxgDegFZbA,4196
|
|
125
126
|
port_ocean/core/integrations/mixins/sync.py,sha256=Vm_898pLKBwfVewtwouDWsXoxcOLicnAy6pzyqqk6U8,4053
|
|
126
|
-
port_ocean/core/integrations/mixins/sync_raw.py,sha256=
|
|
127
|
-
port_ocean/core/integrations/mixins/utils.py,sha256=
|
|
127
|
+
port_ocean/core/integrations/mixins/sync_raw.py,sha256=Zga3fSxALuXmAMKmIS0hZYWRe22lSGhiSVFWUCI4f1U,40972
|
|
128
|
+
port_ocean/core/integrations/mixins/utils.py,sha256=aIIMCGb2_ezc5d8NEbKwh9cJ_E0C1VIkyqO8jVENH-Q,14120
|
|
128
129
|
port_ocean/core/models.py,sha256=DNbKpStMINI2lIekKprTqBevqkw_wFuFayN19w1aDfQ,2893
|
|
129
130
|
port_ocean/core/ocean_types.py,sha256=bkLlTd8XfJK6_JDl0eXUHfE_NygqgiInSMwJ4YJH01Q,1399
|
|
130
131
|
port_ocean/core/utils/entity_topological_sorter.py,sha256=MDUjM6OuDy4Xj68o-7InNN0w1jqjxeDfeY8U02vySNI,3081
|
|
@@ -170,7 +171,7 @@ port_ocean/tests/core/conftest.py,sha256=0Oql7R1iTbjPyNdUoO6M21IKknLwnCIgDRz2JQ7
|
|
|
170
171
|
port_ocean/tests/core/defaults/test_common.py,sha256=sR7RqB3ZYV6Xn6NIg-c8k5K6JcGsYZ2SCe_PYX5vLYM,5560
|
|
171
172
|
port_ocean/tests/core/event_listener/test_kafka.py,sha256=PH90qk2fvdrQOSZD2QrvkGy8w_WoYb_KHGnqJ6PLHAo,2681
|
|
172
173
|
port_ocean/tests/core/handlers/entities_state_applier/test_applier.py,sha256=7XWgwUB9uVYRov4VbIz1A-7n2YLbHTTYT-4rKJxjB0A,10711
|
|
173
|
-
port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py,sha256=
|
|
174
|
+
port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py,sha256=JcDyC7bI4KqIcPpS2S3Hju0mNRfKIoPHdgKVCRBey_E,14105
|
|
174
175
|
port_ocean/tests/core/handlers/mixins/test_live_events.py,sha256=Sbv9IZAGQoZDhf27xDjMMVYxUSie9mHltDtxLSqckmM,12548
|
|
175
176
|
port_ocean/tests/core/handlers/mixins/test_sync_raw.py,sha256=-Jd2rUG63fZM8LuyKtCp1tt4WEqO2m5woESjs1c91sU,44428
|
|
176
177
|
port_ocean/tests/core/handlers/port_app_config/test_api.py,sha256=eJZ6SuFBLz71y4ca3DNqKag6d6HUjNJS0aqQPwiLMTI,1999
|
|
@@ -208,8 +209,8 @@ port_ocean/utils/repeat.py,sha256=U2OeCkHPWXmRTVoPV-VcJRlQhcYqPWI5NfmPlb1JIbc,32
|
|
|
208
209
|
port_ocean/utils/signal.py,sha256=J1sI-e_32VHP_VUa5bskLMFoJjJOAk5isrnewKDikUI,2125
|
|
209
210
|
port_ocean/utils/time.py,sha256=pufAOH5ZQI7gXvOvJoQXZXZJV-Dqktoj9Qp9eiRwmJ4,1939
|
|
210
211
|
port_ocean/version.py,sha256=UsuJdvdQlazzKGD3Hd5-U7N69STh8Dq9ggJzQFnu9fU,177
|
|
211
|
-
port_ocean-0.28.
|
|
212
|
-
port_ocean-0.28.
|
|
213
|
-
port_ocean-0.28.
|
|
214
|
-
port_ocean-0.28.
|
|
215
|
-
port_ocean-0.28.
|
|
212
|
+
port_ocean-0.28.7.dist-info/LICENSE.md,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
213
|
+
port_ocean-0.28.7.dist-info/METADATA,sha256=gvbQSwADuQQrcqf9it-ShnXH-82xm4KUTYvqpn_rf0A,7015
|
|
214
|
+
port_ocean-0.28.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
215
|
+
port_ocean-0.28.7.dist-info/entry_points.txt,sha256=F_DNUmGZU2Kme-8NsWM5LLE8piGMafYZygRYhOVtcjA,54
|
|
216
|
+
port_ocean-0.28.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|