port-ocean 0.28.11__py3-none-any.whl → 0.28.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of port-ocean might be problematic. Click here for more details.
- integrations/_infra/Dockerfile.Deb +1 -0
- integrations/_infra/Dockerfile.local +1 -0
- port_ocean/clients/port/mixins/integrations.py +1 -1
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +472 -17
- port_ocean/core/handlers/entity_processor/jq_input_evaluator.py +137 -0
- port_ocean/core/handlers/port_app_config/models.py +1 -1
- port_ocean/core/integrations/mixins/sync_raw.py +1 -1
- port_ocean/core/integrations/mixins/utils.py +241 -23
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +932 -1
- port_ocean/tests/core/handlers/entity_processor/test_jq_input_evaluator.py +932 -0
- port_ocean/tests/utils/test_cache.py +240 -0
- port_ocean/utils/cache.py +45 -9
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/METADATA +1 -1
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/RECORD +17 -15
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/WHEEL +0 -0
- {port_ocean-0.28.11.dist-info → port_ocean-0.28.14.dist-info}/entry_points.txt +0 -0
|
@@ -301,7 +301,7 @@ class IntegrationClientMixin:
|
|
|
301
301
|
headers=headers,
|
|
302
302
|
json={
|
|
303
303
|
"items": raw_data,
|
|
304
|
-
"extractionTimestamp": int(datetime.now().timestamp()),
|
|
304
|
+
"extractionTimestamp": int(datetime.now().timestamp() * 1000),
|
|
305
305
|
},
|
|
306
306
|
)
|
|
307
307
|
handle_port_status_code(response, should_log=False)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from asyncio import Task
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
|
|
5
4
|
from functools import lru_cache
|
|
5
|
+
import json
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
import jq # type: ignore
|
|
8
8
|
from loguru import logger
|
|
9
|
-
|
|
10
9
|
from port_ocean.context.ocean import ocean
|
|
11
10
|
from port_ocean.core.handlers.entity_processor.base import BaseEntityProcessor
|
|
12
11
|
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
@@ -22,6 +21,11 @@ from port_ocean.core.utils.utils import (
|
|
|
22
21
|
)
|
|
23
22
|
from port_ocean.exceptions.core import EntityProcessorException
|
|
24
23
|
from port_ocean.utils.queue_utils import process_in_queue
|
|
24
|
+
from port_ocean.core.handlers.entity_processor.jq_input_evaluator import (
|
|
25
|
+
InputClassifyingResult,
|
|
26
|
+
classify_input,
|
|
27
|
+
can_expression_run_with_no_input,
|
|
28
|
+
)
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
class ExampleStates:
|
|
@@ -76,7 +80,7 @@ class MappedEntity:
|
|
|
76
80
|
|
|
77
81
|
entity: dict[str, Any] = field(default_factory=dict)
|
|
78
82
|
did_entity_pass_selector: bool = False
|
|
79
|
-
raw_data: Optional[dict[str, Any]] = None
|
|
83
|
+
raw_data: Optional[dict[str, Any] | tuple[dict[str, Any], str]] = None
|
|
80
84
|
misconfigurations: dict[str, str] = field(default_factory=dict)
|
|
81
85
|
|
|
82
86
|
|
|
@@ -133,17 +137,35 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
133
137
|
return await loop.run_in_executor(
|
|
134
138
|
None, self._stop_iterator_handler(func.first)
|
|
135
139
|
)
|
|
140
|
+
except Exception as exc:
|
|
141
|
+
logger.error(
|
|
142
|
+
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
143
|
+
)
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
@lru_cache
|
|
147
|
+
async def _search_stringified(self, data: str, pattern: str) -> Any:
|
|
148
|
+
try:
|
|
149
|
+
loop = asyncio.get_event_loop()
|
|
150
|
+
compiled_pattern = self._compile(pattern)
|
|
151
|
+
func = compiled_pattern.input_text(data)
|
|
152
|
+
return await loop.run_in_executor(
|
|
153
|
+
None, self._stop_iterator_handler(func.first)
|
|
154
|
+
)
|
|
136
155
|
except Exception as exc:
|
|
137
156
|
logger.debug(
|
|
138
157
|
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
139
158
|
)
|
|
140
159
|
return None
|
|
141
160
|
|
|
142
|
-
async def _search_as_bool(self, data: dict[str, Any], pattern: str) -> bool:
|
|
161
|
+
async def _search_as_bool(self, data: dict[str, Any] | str, pattern: str) -> bool:
|
|
143
162
|
loop = asyncio.get_event_loop()
|
|
144
163
|
|
|
145
164
|
compiled_pattern = self._compile(pattern)
|
|
146
|
-
|
|
165
|
+
if isinstance(data, str):
|
|
166
|
+
func = compiled_pattern.input_text(data)
|
|
167
|
+
else:
|
|
168
|
+
func = compiled_pattern.input_value(data)
|
|
147
169
|
|
|
148
170
|
value = await loop.run_in_executor(
|
|
149
171
|
None, self._stop_iterator_handler(func.first)
|
|
@@ -156,7 +178,7 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
156
178
|
|
|
157
179
|
async def _search_as_object(
|
|
158
180
|
self,
|
|
159
|
-
data: dict[str, Any],
|
|
181
|
+
data: dict[str, Any] | str,
|
|
160
182
|
obj: dict[str, Any],
|
|
161
183
|
misconfigurations: dict[str, str] | None = None,
|
|
162
184
|
) -> dict[str, Any | None]:
|
|
@@ -188,7 +210,12 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
188
210
|
self._search_as_object(data, value, misconfigurations)
|
|
189
211
|
)
|
|
190
212
|
else:
|
|
191
|
-
|
|
213
|
+
if isinstance(data, str):
|
|
214
|
+
search_tasks[key] = asyncio.create_task(
|
|
215
|
+
self._search_stringified(data, value)
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
search_tasks[key] = asyncio.create_task(self._search(data, value))
|
|
192
219
|
|
|
193
220
|
result: dict[str, Any | None] = {}
|
|
194
221
|
for key, task in search_tasks.items():
|
|
@@ -212,16 +239,18 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
212
239
|
|
|
213
240
|
async def _get_mapped_entity(
|
|
214
241
|
self,
|
|
215
|
-
data: dict[str, Any],
|
|
242
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
216
243
|
raw_entity_mappings: dict[str, Any],
|
|
244
|
+
items_to_parse_key: str | None,
|
|
217
245
|
selector_query: str,
|
|
218
246
|
parse_all: bool = False,
|
|
219
247
|
) -> MappedEntity:
|
|
220
|
-
should_run = await self.
|
|
248
|
+
should_run = await self._should_map_entity(
|
|
249
|
+
data, selector_query, items_to_parse_key
|
|
250
|
+
)
|
|
221
251
|
if parse_all or should_run:
|
|
222
|
-
misconfigurations
|
|
223
|
-
|
|
224
|
-
data, raw_entity_mappings, misconfigurations
|
|
252
|
+
misconfigurations, mapped_entity = await self._map_entity(
|
|
253
|
+
data, raw_entity_mappings, items_to_parse_key
|
|
225
254
|
)
|
|
226
255
|
return MappedEntity(
|
|
227
256
|
mapped_entity,
|
|
@@ -237,6 +266,77 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
237
266
|
misconfigurations={},
|
|
238
267
|
)
|
|
239
268
|
|
|
269
|
+
async def _map_entity(
|
|
270
|
+
self,
|
|
271
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
272
|
+
raw_entity_mappings: dict[str, Any],
|
|
273
|
+
items_to_parse_key: str | None,
|
|
274
|
+
) -> tuple[dict[str, str], dict[str, Any]]:
|
|
275
|
+
if not items_to_parse_key:
|
|
276
|
+
# No items to parse, map the entity and return the misconfigurations and the mapped entity
|
|
277
|
+
misconfigurations: dict[str, str] = {}
|
|
278
|
+
data_to_search = data if isinstance(data, dict) else data[0]
|
|
279
|
+
mapped_entity = await self._search_as_object(
|
|
280
|
+
data_to_search, raw_entity_mappings, misconfigurations
|
|
281
|
+
)
|
|
282
|
+
return misconfigurations, mapped_entity
|
|
283
|
+
|
|
284
|
+
modified_data: tuple[dict[str, Any], str | dict[str, Any]] = (
|
|
285
|
+
data
|
|
286
|
+
if isinstance(data, tuple)
|
|
287
|
+
else (
|
|
288
|
+
{items_to_parse_key: data[items_to_parse_key]},
|
|
289
|
+
data,
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
misconfigurations_item: dict[str, str] = {}
|
|
294
|
+
misconfigurations_all: dict[str, str] = {}
|
|
295
|
+
# Map the entity with jq expressions that classified as single item expressions with the single item as input
|
|
296
|
+
mapped_entity_item = await self._search_as_object(
|
|
297
|
+
modified_data[0], raw_entity_mappings["item"], misconfigurations_item
|
|
298
|
+
)
|
|
299
|
+
# To Prevent misclassification data loss, we merge the expressions that classified as single item expressions and resulted as misconfigured
|
|
300
|
+
# into the expressions that classified as all expressions
|
|
301
|
+
if misconfigurations_item:
|
|
302
|
+
# The misconfigurations dict not contains the mapping expressions themselves, so we need to filter the original mapping by the misconfigured keys
|
|
303
|
+
filtered_item_mappings = self._filter_mappings_by_keys(
|
|
304
|
+
raw_entity_mappings["item"], list(misconfigurations_item.keys())
|
|
305
|
+
)
|
|
306
|
+
raw_entity_mappings["all"] = self._deep_merge(
|
|
307
|
+
raw_entity_mappings["all"], filtered_item_mappings
|
|
308
|
+
)
|
|
309
|
+
# Map the entity with jq expressions that classified as all expressions with the whole data as input
|
|
310
|
+
mapped_entity_all = await self._search_as_object(
|
|
311
|
+
modified_data[1], raw_entity_mappings["all"], misconfigurations_all
|
|
312
|
+
)
|
|
313
|
+
# Map the entity with jq expressions that classified as no input required expressions with empty object as input
|
|
314
|
+
mapped_entity_empty = await self._search_as_object(
|
|
315
|
+
{}, raw_entity_mappings["empty"], misconfigurations_all
|
|
316
|
+
)
|
|
317
|
+
# Merge the mapped entities
|
|
318
|
+
mapped_entity = self._deep_merge(mapped_entity_item, mapped_entity_all)
|
|
319
|
+
mapped_entity = self._deep_merge(mapped_entity, mapped_entity_empty)
|
|
320
|
+
return misconfigurations_all, mapped_entity
|
|
321
|
+
|
|
322
|
+
async def _should_map_entity(
|
|
323
|
+
self,
|
|
324
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
325
|
+
selector_query: str,
|
|
326
|
+
items_to_parse_key: str | None,
|
|
327
|
+
) -> bool:
|
|
328
|
+
if can_expression_run_with_no_input(selector_query):
|
|
329
|
+
return await self._search_as_bool({}, selector_query)
|
|
330
|
+
if isinstance(data, tuple):
|
|
331
|
+
return await self._search_as_bool(
|
|
332
|
+
data[0], selector_query
|
|
333
|
+
) or await self._search_as_bool(data[1], selector_query)
|
|
334
|
+
if items_to_parse_key:
|
|
335
|
+
return await self._search_as_bool(
|
|
336
|
+
data[items_to_parse_key], selector_query
|
|
337
|
+
) or await self._search_as_bool(data, selector_query)
|
|
338
|
+
return await self._search_as_bool(data, selector_query)
|
|
339
|
+
|
|
240
340
|
async def _calculate_entity(
|
|
241
341
|
self,
|
|
242
342
|
data: dict[str, Any],
|
|
@@ -246,9 +346,17 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
246
346
|
selector_query: str,
|
|
247
347
|
parse_all: bool = False,
|
|
248
348
|
) -> tuple[list[MappedEntity], list[Exception]]:
|
|
249
|
-
raw_data = [
|
|
250
|
-
|
|
251
|
-
|
|
349
|
+
raw_data: list[dict[str, Any]] | list[tuple[dict[str, Any], str]] = [
|
|
350
|
+
data.copy()
|
|
351
|
+
]
|
|
352
|
+
items_to_parse_key = None
|
|
353
|
+
if items_to_parse:
|
|
354
|
+
items_to_parse_key = items_to_parse_name
|
|
355
|
+
if not ocean.config.yield_items_to_parse:
|
|
356
|
+
if isinstance(data, dict) and data.get("__type") == "path":
|
|
357
|
+
file_path = data.get("file", {}).get("content", {}).get("path")
|
|
358
|
+
with open(file_path, "r") as f:
|
|
359
|
+
data["file"]["content"] = json.loads(f.read())
|
|
252
360
|
items = await self._search(data, items_to_parse)
|
|
253
361
|
if not isinstance(items, list):
|
|
254
362
|
logger.warning(
|
|
@@ -256,13 +364,28 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
256
364
|
f" Skipping..."
|
|
257
365
|
)
|
|
258
366
|
return [], []
|
|
259
|
-
|
|
367
|
+
raw_all_payload_stringified = json.dumps(data)
|
|
368
|
+
raw_data = [
|
|
369
|
+
({items_to_parse_name: item}, raw_all_payload_stringified)
|
|
370
|
+
for item in items
|
|
371
|
+
]
|
|
372
|
+
single_item_mappings, all_items_mappings, empty_items_mappings = (
|
|
373
|
+
self._build_raw_entity_mappings(
|
|
374
|
+
raw_entity_mappings, items_to_parse_name
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
raw_entity_mappings = {
|
|
378
|
+
"item": single_item_mappings,
|
|
379
|
+
"all": all_items_mappings,
|
|
380
|
+
"empty": empty_items_mappings,
|
|
381
|
+
}
|
|
260
382
|
|
|
261
383
|
entities, errors = await gather_and_split_errors_from_results(
|
|
262
384
|
[
|
|
263
385
|
self._get_mapped_entity(
|
|
264
386
|
raw,
|
|
265
387
|
raw_entity_mappings,
|
|
388
|
+
items_to_parse_key,
|
|
266
389
|
selector_query,
|
|
267
390
|
parse_all,
|
|
268
391
|
)
|
|
@@ -275,6 +398,325 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
275
398
|
)
|
|
276
399
|
return entities, errors
|
|
277
400
|
|
|
401
|
+
def _build_raw_entity_mappings(
|
|
402
|
+
self, raw_entity_mappings: dict[str, Any], items_to_parse_name: str
|
|
403
|
+
) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
|
|
404
|
+
"""
|
|
405
|
+
Build the raw entity mappings for the items to parse.
|
|
406
|
+
The mappings are grouped by the input classifying result.
|
|
407
|
+
There are 3 input classifying results:
|
|
408
|
+
- NONE: The expression can be executed with no input
|
|
409
|
+
- SINGLE: The expression can be executed on a single item
|
|
410
|
+
- ALL: The expression can be executed on all the data
|
|
411
|
+
"""
|
|
412
|
+
mappings: dict[InputClassifyingResult, dict[str, Any]] = {
|
|
413
|
+
InputClassifyingResult.NONE: {},
|
|
414
|
+
InputClassifyingResult.SINGLE: {},
|
|
415
|
+
InputClassifyingResult.ALL: {},
|
|
416
|
+
}
|
|
417
|
+
for key, value in raw_entity_mappings.items():
|
|
418
|
+
if isinstance(value, str):
|
|
419
|
+
# Direct string values (identifier, title, icon, blueprint, team)
|
|
420
|
+
self.group_string_mapping_value(
|
|
421
|
+
items_to_parse_name,
|
|
422
|
+
mappings,
|
|
423
|
+
key,
|
|
424
|
+
value,
|
|
425
|
+
)
|
|
426
|
+
elif isinstance(value, dict):
|
|
427
|
+
# Complex objects (IngestSearchQuery for identifier/team, properties, relations)
|
|
428
|
+
self.group_complex_mapping_value(
|
|
429
|
+
items_to_parse_name,
|
|
430
|
+
mappings,
|
|
431
|
+
key,
|
|
432
|
+
value,
|
|
433
|
+
)
|
|
434
|
+
return (
|
|
435
|
+
mappings[InputClassifyingResult.SINGLE],
|
|
436
|
+
mappings[InputClassifyingResult.ALL],
|
|
437
|
+
mappings[InputClassifyingResult.NONE],
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
def group_complex_mapping_value(
|
|
441
|
+
self,
|
|
442
|
+
pattern: str,
|
|
443
|
+
mappings: dict[InputClassifyingResult, dict[str, Any]],
|
|
444
|
+
key: str,
|
|
445
|
+
value: dict[str, Any],
|
|
446
|
+
) -> None:
|
|
447
|
+
|
|
448
|
+
if key in ["properties", "relations"]:
|
|
449
|
+
mapping_dicts: dict[InputClassifyingResult, dict[str, Any]] = {
|
|
450
|
+
InputClassifyingResult.SINGLE: {},
|
|
451
|
+
InputClassifyingResult.ALL: {},
|
|
452
|
+
InputClassifyingResult.NONE: {},
|
|
453
|
+
}
|
|
454
|
+
# For properties and relations, filter the dictionary values
|
|
455
|
+
for dict_key, dict_value in value.items():
|
|
456
|
+
if isinstance(dict_value, str):
|
|
457
|
+
self.group_string_mapping_value(
|
|
458
|
+
pattern,
|
|
459
|
+
mapping_dicts,
|
|
460
|
+
dict_key,
|
|
461
|
+
dict_value,
|
|
462
|
+
)
|
|
463
|
+
elif isinstance(dict_value, dict):
|
|
464
|
+
# Handle IngestSearchQuery objects
|
|
465
|
+
self.group_search_query_mapping_value(
|
|
466
|
+
pattern,
|
|
467
|
+
mapping_dicts[InputClassifyingResult.SINGLE],
|
|
468
|
+
mapping_dicts[InputClassifyingResult.ALL],
|
|
469
|
+
dict_key,
|
|
470
|
+
dict_value,
|
|
471
|
+
)
|
|
472
|
+
for input_classifying_result, mapping_dict in mapping_dicts.items():
|
|
473
|
+
if mapping_dict:
|
|
474
|
+
mappings[input_classifying_result][key] = mapping_dict
|
|
475
|
+
else:
|
|
476
|
+
# For identifier/team IngestSearchQuery objects
|
|
477
|
+
self.group_search_query_mapping_value(
|
|
478
|
+
pattern,
|
|
479
|
+
mappings[InputClassifyingResult.SINGLE],
|
|
480
|
+
mappings[InputClassifyingResult.ALL],
|
|
481
|
+
key,
|
|
482
|
+
value,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
def group_search_query_mapping_value(
|
|
486
|
+
self,
|
|
487
|
+
pattern: str,
|
|
488
|
+
single_item_dict: dict[str, Any],
|
|
489
|
+
all_item_dict: dict[str, Any],
|
|
490
|
+
dict_key: str,
|
|
491
|
+
dict_value: dict[str, Any],
|
|
492
|
+
) -> None:
|
|
493
|
+
if self._classify_search_query(dict_value, pattern):
|
|
494
|
+
single_item_dict[dict_key] = dict_value
|
|
495
|
+
else:
|
|
496
|
+
all_item_dict[dict_key] = dict_value
|
|
497
|
+
|
|
498
|
+
def group_string_mapping_value(
|
|
499
|
+
self,
|
|
500
|
+
pattern: str,
|
|
501
|
+
mappings: dict[InputClassifyingResult, dict[str, Any]],
|
|
502
|
+
key: str,
|
|
503
|
+
value: str,
|
|
504
|
+
) -> None:
|
|
505
|
+
input_evaluation_result = classify_input(value, pattern)
|
|
506
|
+
mappings[input_evaluation_result][key] = value
|
|
507
|
+
|
|
508
|
+
def _classify_search_query(self, query_dict: dict[str, Any], pattern: str) -> bool:
|
|
509
|
+
"""
|
|
510
|
+
Classify the input required to run jq expressions of an IngestSearchQuery
|
|
511
|
+
If at least one rule contains a value that includes the pattern, return True
|
|
512
|
+
Otherwise, return False
|
|
513
|
+
Example:
|
|
514
|
+
The pattern is item
|
|
515
|
+
relations:
|
|
516
|
+
someRelation:
|
|
517
|
+
combinator: "and"
|
|
518
|
+
rules:
|
|
519
|
+
- operator: "="
|
|
520
|
+
property: "someProperty"
|
|
521
|
+
value: .field
|
|
522
|
+
- combinator: "or"
|
|
523
|
+
rules:
|
|
524
|
+
- operator: "="
|
|
525
|
+
property: "anotherProperty"
|
|
526
|
+
value: .item.something
|
|
527
|
+
- operator: "="
|
|
528
|
+
property: "yetAnotherProperty"
|
|
529
|
+
value: .yetAnotherValue
|
|
530
|
+
One value is .item.something, which is a single item based expression, so it will be classified as SINGLE
|
|
531
|
+
"""
|
|
532
|
+
if "rules" not in query_dict:
|
|
533
|
+
return False
|
|
534
|
+
|
|
535
|
+
rules = query_dict["rules"]
|
|
536
|
+
if not isinstance(rules, list):
|
|
537
|
+
return False
|
|
538
|
+
|
|
539
|
+
# Check if any rule contains a value that includes the pattern
|
|
540
|
+
for rule in rules:
|
|
541
|
+
if isinstance(rule, dict) and self._is_rule_or_query_contains_pattern(
|
|
542
|
+
rule, pattern
|
|
543
|
+
):
|
|
544
|
+
return True
|
|
545
|
+
return False
|
|
546
|
+
|
|
547
|
+
def _is_rule_or_query_contains_pattern(
|
|
548
|
+
self, rule: dict[str, Any], pattern: str
|
|
549
|
+
) -> bool:
|
|
550
|
+
"""
|
|
551
|
+
Check if a rule or query contains a value that includes the pattern
|
|
552
|
+
If the value is a single item based expression, return True
|
|
553
|
+
Otherwise, return False
|
|
554
|
+
Example:
|
|
555
|
+
The pattern is item
|
|
556
|
+
The rule is:
|
|
557
|
+
- combinator: "or"
|
|
558
|
+
rules:
|
|
559
|
+
- operator: "="
|
|
560
|
+
property: "anotherProperty"
|
|
561
|
+
value: .item.something
|
|
562
|
+
- operator: "="
|
|
563
|
+
property: "yetAnotherProperty"
|
|
564
|
+
value: .yetAnotherValue
|
|
565
|
+
This rule is not a single rule (not contains a value property) but a search query rule (contains a rules property)
|
|
566
|
+
so we need to recursively check the rules property to check if at least one rule contains a value that includes the pattern
|
|
567
|
+
In this case, one value is .item.something, which is a single item based expression, so it will be classified as SINGLE ==> true
|
|
568
|
+
"""
|
|
569
|
+
if "value" in rule and isinstance(rule["value"], str):
|
|
570
|
+
# Use evaluate_input to check if the pattern is relevant for this value
|
|
571
|
+
input_evaluation_result = classify_input(rule["value"], pattern)
|
|
572
|
+
if input_evaluation_result == InputClassifyingResult.SINGLE:
|
|
573
|
+
return True
|
|
574
|
+
# Recursively check nested IngestSearchQuery objects
|
|
575
|
+
elif "rules" in rule:
|
|
576
|
+
if self._classify_search_query(rule, pattern):
|
|
577
|
+
return True
|
|
578
|
+
return False
|
|
579
|
+
|
|
580
|
+
def _filter_mappings_by_keys(
|
|
581
|
+
self, mappings: dict[str, Any], target_keys: list[str]
|
|
582
|
+
) -> dict[str, Any]:
|
|
583
|
+
"""
|
|
584
|
+
Filter mappings to preserve structure with only the specified keys present.
|
|
585
|
+
Recursively handles nested dictionaries and lists, searching for keys at any level.
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
mappings: The dictionary containing mapping configurations to filter
|
|
589
|
+
target_keys: List of keys to preserve in the filtered result
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
A filtered dictionary containing only the specified keys and their nested structures
|
|
593
|
+
|
|
594
|
+
Examples:
|
|
595
|
+
# Basic filtering with direct keys
|
|
596
|
+
mappings = {
|
|
597
|
+
"name": "John",
|
|
598
|
+
"age": 30,
|
|
599
|
+
"city": "New York",
|
|
600
|
+
"country": "USA"
|
|
601
|
+
}
|
|
602
|
+
target_keys = ["name", "age"]
|
|
603
|
+
result = self._filter_mappings_by_keys(mappings, target_keys)
|
|
604
|
+
# Returns: {"name": "John", "age": 30}
|
|
605
|
+
|
|
606
|
+
# Nested dictionary filtering
|
|
607
|
+
mappings = {
|
|
608
|
+
"user": {
|
|
609
|
+
"profile": {"name": "John", "email": "john@example.com"},
|
|
610
|
+
"settings": {"theme": "dark", "notifications": True}
|
|
611
|
+
},
|
|
612
|
+
"metadata": {"created_at": "2023-01-01", "version": "1.0"}
|
|
613
|
+
}
|
|
614
|
+
target_keys = ["name", "email", "created_at"]
|
|
615
|
+
result = self._filter_mappings_by_keys(mappings, target_keys)
|
|
616
|
+
# Returns: {
|
|
617
|
+
# "user": {
|
|
618
|
+
# "profile": {"name": "John", "email": "john@example.com"}
|
|
619
|
+
# },
|
|
620
|
+
# "metadata": {"created_at": "2023-01-01"}
|
|
621
|
+
# }
|
|
622
|
+
|
|
623
|
+
# Empty target keys returns empty dict
|
|
624
|
+
result = self._filter_mappings_by_keys(mappings, [])
|
|
625
|
+
# Returns: {}
|
|
626
|
+
"""
|
|
627
|
+
if not target_keys:
|
|
628
|
+
return {}
|
|
629
|
+
|
|
630
|
+
filtered_mappings: dict[str, Any] = {}
|
|
631
|
+
|
|
632
|
+
for key, value in mappings.items():
|
|
633
|
+
filtered_value = self._process_mapping_value(key, value, target_keys)
|
|
634
|
+
|
|
635
|
+
# Include if it's a direct match or contains nested target keys
|
|
636
|
+
if key in target_keys or filtered_value:
|
|
637
|
+
filtered_mappings[key] = filtered_value
|
|
638
|
+
|
|
639
|
+
return filtered_mappings
|
|
640
|
+
|
|
641
|
+
def _process_mapping_value(
|
|
642
|
+
self, key: str, value: Any, target_keys: list[str]
|
|
643
|
+
) -> Any:
|
|
644
|
+
"""
|
|
645
|
+
Process a single mapping value, handling different types recursively.
|
|
646
|
+
|
|
647
|
+
This helper method is used by _filter_mappings_by_keys to process individual
|
|
648
|
+
key-value pairs. It handles both simple values and nested dictionaries,
|
|
649
|
+
applying the filtering logic recursively to maintain the hierarchical structure.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
key: The key of the current mapping item being processed
|
|
653
|
+
value: The value associated with the key (can be any type)
|
|
654
|
+
target_keys: List of keys to preserve in the filtered result
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
The processed value if it should be included, None otherwise.
|
|
658
|
+
For dictionaries, returns the filtered dictionary or None if empty.
|
|
659
|
+
For simple values, returns the value if the key is in target_keys, None otherwise.
|
|
660
|
+
|
|
661
|
+
Examples:
|
|
662
|
+
# Simple value processing - key in target_keys
|
|
663
|
+
result = self._process_mapping_value("name", "John", ["name", "age"])
|
|
664
|
+
# Returns: "John"
|
|
665
|
+
|
|
666
|
+
# Simple value processing - key not in target_keys
|
|
667
|
+
result = self._process_mapping_value("city", "New York", ["name", "age"])
|
|
668
|
+
# Returns: None
|
|
669
|
+
|
|
670
|
+
# Dictionary processing with nested target keys
|
|
671
|
+
nested_dict = {
|
|
672
|
+
"profile": {"name": "John", "email": "john@example.com"},
|
|
673
|
+
"settings": {"theme": "dark"}
|
|
674
|
+
}
|
|
675
|
+
result = self._process_mapping_value("user", nested_dict, ["name", "email"])
|
|
676
|
+
# Returns: {"profile": {"name": "John", "email": "john@example.com"}}
|
|
677
|
+
|
|
678
|
+
# Dictionary processing with no matching target keys
|
|
679
|
+
result = self._process_mapping_value("user", nested_dict, ["version"])
|
|
680
|
+
# Returns: None
|
|
681
|
+
"""
|
|
682
|
+
if isinstance(value, dict):
|
|
683
|
+
# Recursively filter nested dictionary
|
|
684
|
+
filtered_dict = self._filter_mappings_by_keys(value, target_keys)
|
|
685
|
+
return filtered_dict if filtered_dict else None
|
|
686
|
+
else:
|
|
687
|
+
# Return simple values as-is
|
|
688
|
+
return value if key in target_keys else None
|
|
689
|
+
|
|
690
|
+
def _deep_merge(
|
|
691
|
+
self, dict1: dict[str, Any], dict2: dict[str, Any]
|
|
692
|
+
) -> dict[str, Any]:
|
|
693
|
+
"""
|
|
694
|
+
Deep merge two dictionaries, preserving nested structures.
|
|
695
|
+
Values from dict2 override values from dict1 for the same keys.
|
|
696
|
+
"""
|
|
697
|
+
result = dict1.copy()
|
|
698
|
+
|
|
699
|
+
for key, value in dict2.items():
|
|
700
|
+
if (
|
|
701
|
+
key in result
|
|
702
|
+
and isinstance(result[key], dict)
|
|
703
|
+
and isinstance(value, dict)
|
|
704
|
+
):
|
|
705
|
+
# Recursively merge nested dictionaries
|
|
706
|
+
result[key] = self._deep_merge(result[key], value)
|
|
707
|
+
elif (
|
|
708
|
+
key in result
|
|
709
|
+
and isinstance(result[key], list)
|
|
710
|
+
and isinstance(value, list)
|
|
711
|
+
):
|
|
712
|
+
# Merge lists by extending
|
|
713
|
+
result[key].extend(value)
|
|
714
|
+
else:
|
|
715
|
+
# Override with value from dict2
|
|
716
|
+
result[key] = value
|
|
717
|
+
|
|
718
|
+
return result
|
|
719
|
+
|
|
278
720
|
@staticmethod
|
|
279
721
|
async def _send_examples(data: list[dict[str, Any]], kind: str) -> None:
|
|
280
722
|
try:
|
|
@@ -329,7 +771,10 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
329
771
|
and result.raw_data is not None
|
|
330
772
|
):
|
|
331
773
|
examples_to_send.add_example(
|
|
332
|
-
result.did_entity_pass_selector,
|
|
774
|
+
result.did_entity_pass_selector,
|
|
775
|
+
self._get_raw_data_for_example(
|
|
776
|
+
result.raw_data, mapping.port.items_to_parse_name
|
|
777
|
+
),
|
|
333
778
|
)
|
|
334
779
|
|
|
335
780
|
if result.entity.get("identifier") and result.entity.get("blueprint"):
|
|
@@ -355,3 +800,13 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
355
800
|
errors,
|
|
356
801
|
misconfigured_entity_keys=entity_misconfigurations,
|
|
357
802
|
)
|
|
803
|
+
|
|
804
|
+
def _get_raw_data_for_example(
|
|
805
|
+
self,
|
|
806
|
+
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
807
|
+
items_to_parse_name: str,
|
|
808
|
+
) -> dict[str, Any]:
|
|
809
|
+
if isinstance(data, tuple):
|
|
810
|
+
raw_data = json.loads(data[1])
|
|
811
|
+
return {items_to_parse_name: data[0], **raw_data}
|
|
812
|
+
return data
|