port-ocean 0.30.6__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- integrations/_infra/Dockerfile.Deb +1 -1
- integrations/_infra/Dockerfile.local +2 -2
- integrations/_infra/entry_local.sh +2 -1
- integrations/_infra/hosts +4 -0
- port_ocean/config/settings.py +1 -2
- port_ocean/core/handlers/entity_processor/jq_entity_processor.py +16 -482
- port_ocean/core/integrations/mixins/utils.py +72 -302
- port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +2 -933
- port_ocean/tests/core/integrations/mixins/test_integration_utils.py +0 -312
- {port_ocean-0.30.6.dist-info → port_ocean-0.31.0.dist-info}/METADATA +1 -1
- {port_ocean-0.30.6.dist-info → port_ocean-0.31.0.dist-info}/RECORD +14 -13
- {port_ocean-0.30.6.dist-info → port_ocean-0.31.0.dist-info}/LICENSE.md +0 -0
- {port_ocean-0.30.6.dist-info → port_ocean-0.31.0.dist-info}/WHEEL +0 -0
- {port_ocean-0.30.6.dist-info → port_ocean-0.31.0.dist-info}/entry_points.txt +0 -0
|
@@ -26,10 +26,10 @@ RUN apt-get update \
|
|
|
26
26
|
python3-pip \
|
|
27
27
|
python3-poetry \
|
|
28
28
|
build-essential\
|
|
29
|
-
jq \
|
|
30
29
|
git \
|
|
31
30
|
python3-venv \
|
|
32
31
|
acl \
|
|
32
|
+
libyaml-dev \
|
|
33
33
|
&& apt-get clean
|
|
34
34
|
|
|
35
35
|
ARG BUILD_CONTEXT
|
|
@@ -51,7 +51,6 @@ RUN rm -rf .venv-docker ${BUILD_CONTEXT}/.venv-docker
|
|
|
51
51
|
RUN python3 -m venv .venv-docker
|
|
52
52
|
RUN python3 -m venv ${BUILD_CONTEXT}/.venv-docker
|
|
53
53
|
|
|
54
|
-
|
|
55
54
|
WORKDIR /app/${BUILD_CONTEXT}
|
|
56
55
|
|
|
57
56
|
WORKDIR /app
|
|
@@ -60,6 +59,7 @@ RUN chown -R ocean:appgroup /app/${BUILD_CONTEXT} && chmod -R 755 /app/${BUILD_C
|
|
|
60
59
|
RUN chown -R ocean:appgroup /tmp/ocean && chmod -R 755 /tmp/ocean
|
|
61
60
|
# Add ocean user to ssl certs group
|
|
62
61
|
RUN setfacl -m u:ocean:rwX /etc/ssl/certs
|
|
62
|
+
|
|
63
63
|
USER ocean
|
|
64
64
|
|
|
65
65
|
ENTRYPOINT ["./integrations/_infra/entry_local.sh"]
|
port_ocean/config/settings.py
CHANGED
|
@@ -132,8 +132,7 @@ class IntegrationConfiguration(BaseOceanSettings, extra=Extra.allow):
|
|
|
132
132
|
upsert_entities_batch_max_length: int = 20
|
|
133
133
|
upsert_entities_batch_max_size_in_bytes: int = 1024 * 1024
|
|
134
134
|
lakehouse_enabled: bool = False
|
|
135
|
-
|
|
136
|
-
yield_items_to_parse_batch_size: int = 10
|
|
135
|
+
yield_items_to_parse_batch_size: int = 500
|
|
137
136
|
|
|
138
137
|
streaming: StreamingSettings = Field(default_factory=lambda: StreamingSettings())
|
|
139
138
|
actions_processor: ActionsProcessorSettings = Field(
|
|
@@ -11,11 +11,6 @@ from loguru import logger
|
|
|
11
11
|
|
|
12
12
|
from port_ocean.context.ocean import ocean
|
|
13
13
|
from port_ocean.core.handlers.entity_processor.base import BaseEntityProcessor
|
|
14
|
-
from port_ocean.core.handlers.entity_processor.jq_input_evaluator import (
|
|
15
|
-
InputClassifyingResult,
|
|
16
|
-
can_expression_run_with_no_input,
|
|
17
|
-
classify_input,
|
|
18
|
-
)
|
|
19
14
|
from port_ocean.core.handlers.port_app_config.models import ResourceConfig
|
|
20
15
|
from port_ocean.core.models import Entity
|
|
21
16
|
from port_ocean.core.ocean_types import (
|
|
@@ -144,55 +139,32 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
144
139
|
entity_mapping_fault_counter: int,
|
|
145
140
|
) -> None:
|
|
146
141
|
if len(entity_misconfigurations) > 0:
|
|
147
|
-
logger.
|
|
142
|
+
logger.error(
|
|
148
143
|
f"Unable to find valid data for: {entity_misconfigurations} (null, missing, or misconfigured)"
|
|
149
144
|
)
|
|
150
145
|
if missing_required_fields:
|
|
151
|
-
logger.
|
|
146
|
+
logger.error(
|
|
152
147
|
f"{entity_mapping_fault_counter} transformations of batch failed due to empty, null or missing values"
|
|
153
148
|
)
|
|
154
149
|
|
|
155
150
|
async def _search(self, data: dict[str, Any], pattern: str) -> Any:
|
|
156
151
|
try:
|
|
157
|
-
loop = asyncio.get_event_loop()
|
|
158
152
|
compiled_pattern = self._compile(pattern)
|
|
159
153
|
func = compiled_pattern.input_value(data)
|
|
160
|
-
return
|
|
161
|
-
None, self._stop_iterator_handler(func.first)
|
|
162
|
-
)
|
|
154
|
+
return func.first()
|
|
163
155
|
except Exception as exc:
|
|
164
156
|
logger.error(
|
|
165
157
|
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
166
158
|
)
|
|
167
159
|
return None
|
|
168
160
|
|
|
169
|
-
@lru_cache
|
|
170
|
-
async def _search_stringified(self, data: str, pattern: str) -> Any:
|
|
171
|
-
try:
|
|
172
|
-
loop = asyncio.get_event_loop()
|
|
173
|
-
compiled_pattern = self._compile(pattern)
|
|
174
|
-
func = compiled_pattern.input_text(data)
|
|
175
|
-
return await loop.run_in_executor(
|
|
176
|
-
None, self._stop_iterator_handler(func.first)
|
|
177
|
-
)
|
|
178
|
-
except Exception as exc:
|
|
179
|
-
logger.debug(
|
|
180
|
-
f"Search failed for pattern '{pattern}' in data: {data}, Error: {exc}"
|
|
181
|
-
)
|
|
182
|
-
return None
|
|
183
|
-
|
|
184
161
|
async def _search_as_bool(self, data: dict[str, Any] | str, pattern: str) -> bool:
|
|
185
|
-
loop = asyncio.get_event_loop()
|
|
186
162
|
|
|
187
163
|
compiled_pattern = self._compile(pattern)
|
|
188
|
-
if isinstance(data, str):
|
|
189
|
-
func = compiled_pattern.input_text(data)
|
|
190
|
-
else:
|
|
191
|
-
func = compiled_pattern.input_value(data)
|
|
192
164
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
)
|
|
165
|
+
func = compiled_pattern.input_value(data)
|
|
166
|
+
|
|
167
|
+
value = func.first()
|
|
196
168
|
if isinstance(value, bool):
|
|
197
169
|
return value
|
|
198
170
|
raise EntityProcessorException(
|
|
@@ -201,7 +173,7 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
201
173
|
|
|
202
174
|
async def _search_as_object(
|
|
203
175
|
self,
|
|
204
|
-
data: dict[str, Any]
|
|
176
|
+
data: dict[str, Any],
|
|
205
177
|
obj: dict[str, Any],
|
|
206
178
|
misconfigurations: dict[str, str] | None = None,
|
|
207
179
|
) -> dict[str, Any | None]:
|
|
@@ -233,12 +205,7 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
233
205
|
self._search_as_object(data, value, misconfigurations)
|
|
234
206
|
)
|
|
235
207
|
else:
|
|
236
|
-
|
|
237
|
-
search_tasks[key] = asyncio.create_task(
|
|
238
|
-
self._search_stringified(data, value)
|
|
239
|
-
)
|
|
240
|
-
else:
|
|
241
|
-
search_tasks[key] = asyncio.create_task(self._search(data, value))
|
|
208
|
+
search_tasks[key] = asyncio.create_task(self._search(data, value))
|
|
242
209
|
|
|
243
210
|
result: dict[str, Any | None] = {}
|
|
244
211
|
for key, task in search_tasks.items():
|
|
@@ -262,153 +229,40 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
262
229
|
|
|
263
230
|
async def _get_mapped_entity(
|
|
264
231
|
self,
|
|
265
|
-
data: dict[str, Any]
|
|
232
|
+
data: dict[str, Any],
|
|
266
233
|
raw_entity_mappings: dict[str, Any],
|
|
267
|
-
items_to_parse_key: str | None,
|
|
268
234
|
selector_query: str,
|
|
269
235
|
parse_all: bool = False,
|
|
270
236
|
) -> MappedEntity:
|
|
271
|
-
should_run = await self.
|
|
272
|
-
data, selector_query, items_to_parse_key
|
|
273
|
-
)
|
|
237
|
+
should_run = await self._search_as_bool(data, selector_query)
|
|
274
238
|
if parse_all or should_run:
|
|
275
|
-
misconfigurations,
|
|
276
|
-
|
|
239
|
+
misconfigurations: dict[str, str] = {}
|
|
240
|
+
mapped_entity = await self._search_as_object(
|
|
241
|
+
data, raw_entity_mappings, misconfigurations
|
|
277
242
|
)
|
|
278
243
|
return MappedEntity(
|
|
279
244
|
mapped_entity,
|
|
280
245
|
did_entity_pass_selector=should_run,
|
|
281
|
-
raw_data=data,
|
|
246
|
+
raw_data=data if should_run else None,
|
|
282
247
|
misconfigurations=misconfigurations,
|
|
283
248
|
)
|
|
284
249
|
|
|
285
|
-
return MappedEntity(
|
|
286
|
-
{},
|
|
287
|
-
did_entity_pass_selector=False,
|
|
288
|
-
raw_data=data,
|
|
289
|
-
misconfigurations={},
|
|
290
|
-
)
|
|
291
|
-
|
|
292
|
-
async def _map_entity(
|
|
293
|
-
self,
|
|
294
|
-
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
295
|
-
raw_entity_mappings: dict[str, Any],
|
|
296
|
-
items_to_parse_key: str | None,
|
|
297
|
-
) -> tuple[dict[str, str], dict[str, Any]]:
|
|
298
|
-
if not items_to_parse_key:
|
|
299
|
-
# No items to parse, map the entity and return the misconfigurations and the mapped entity
|
|
300
|
-
misconfigurations: dict[str, str] = {}
|
|
301
|
-
data_to_search = data if isinstance(data, dict) else data[0]
|
|
302
|
-
mapped_entity = await self._search_as_object(
|
|
303
|
-
data_to_search, raw_entity_mappings, misconfigurations
|
|
304
|
-
)
|
|
305
|
-
return misconfigurations, mapped_entity
|
|
306
|
-
|
|
307
|
-
modified_data: tuple[dict[str, Any], str | dict[str, Any]] = (
|
|
308
|
-
data
|
|
309
|
-
if isinstance(data, tuple)
|
|
310
|
-
else (
|
|
311
|
-
{items_to_parse_key: data[items_to_parse_key]},
|
|
312
|
-
data,
|
|
313
|
-
)
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
misconfigurations_item: dict[str, str] = {}
|
|
317
|
-
misconfigurations_all: dict[str, str] = {}
|
|
318
|
-
# Map the entity with jq expressions that classified as single item expressions with the single item as input
|
|
319
|
-
mapped_entity_item = await self._search_as_object(
|
|
320
|
-
modified_data[0], raw_entity_mappings["item"], misconfigurations_item
|
|
321
|
-
)
|
|
322
|
-
# To Prevent misclassification data loss, we merge the expressions that classified as single item expressions and resulted as misconfigured
|
|
323
|
-
# into the expressions that classified as all expressions
|
|
324
|
-
if misconfigurations_item:
|
|
325
|
-
# The misconfigurations dict not contains the mapping expressions themselves, so we need to filter the original mapping by the misconfigured keys
|
|
326
|
-
filtered_item_mappings = self._filter_mappings_by_keys(
|
|
327
|
-
raw_entity_mappings["item"], list(misconfigurations_item.keys())
|
|
328
|
-
)
|
|
329
|
-
raw_entity_mappings["all"] = self._deep_merge(
|
|
330
|
-
raw_entity_mappings["all"], filtered_item_mappings
|
|
331
|
-
)
|
|
332
|
-
# Map the entity with jq expressions that classified as all expressions with the whole data as input
|
|
333
|
-
mapped_entity_all = await self._search_as_object(
|
|
334
|
-
modified_data[1], raw_entity_mappings["all"], misconfigurations_all
|
|
335
|
-
)
|
|
336
|
-
# Map the entity with jq expressions that classified as no input required expressions with empty object as input
|
|
337
|
-
mapped_entity_empty = await self._search_as_object(
|
|
338
|
-
{}, raw_entity_mappings["empty"], misconfigurations_all
|
|
339
|
-
)
|
|
340
|
-
# Merge the mapped entities
|
|
341
|
-
mapped_entity = self._deep_merge(mapped_entity_item, mapped_entity_all)
|
|
342
|
-
mapped_entity = self._deep_merge(mapped_entity, mapped_entity_empty)
|
|
343
|
-
return misconfigurations_all, mapped_entity
|
|
344
|
-
|
|
345
|
-
async def _should_map_entity(
|
|
346
|
-
self,
|
|
347
|
-
data: dict[str, Any] | tuple[dict[str, Any], str],
|
|
348
|
-
selector_query: str,
|
|
349
|
-
items_to_parse_key: str | None,
|
|
350
|
-
) -> bool:
|
|
351
|
-
if can_expression_run_with_no_input(selector_query):
|
|
352
|
-
return await self._search_as_bool({}, selector_query)
|
|
353
|
-
if isinstance(data, tuple):
|
|
354
|
-
return await self._search_as_bool(
|
|
355
|
-
data[0], selector_query
|
|
356
|
-
) or await self._search_as_bool(data[1], selector_query)
|
|
357
|
-
if items_to_parse_key:
|
|
358
|
-
return await self._search_as_bool(
|
|
359
|
-
data[items_to_parse_key], selector_query
|
|
360
|
-
) or await self._search_as_bool(data, selector_query)
|
|
361
|
-
return await self._search_as_bool(data, selector_query)
|
|
250
|
+
return MappedEntity()
|
|
362
251
|
|
|
363
252
|
async def _calculate_entity(
|
|
364
253
|
self,
|
|
365
254
|
data: dict[str, Any],
|
|
366
255
|
raw_entity_mappings: dict[str, Any],
|
|
367
|
-
items_to_parse: str | None,
|
|
368
|
-
items_to_parse_name: str,
|
|
369
256
|
selector_query: str,
|
|
370
257
|
parse_all: bool = False,
|
|
371
258
|
) -> tuple[list[MappedEntity], list[Exception]]:
|
|
372
|
-
raw_data
|
|
373
|
-
data.copy()
|
|
374
|
-
]
|
|
375
|
-
items_to_parse_key = None
|
|
376
|
-
if items_to_parse:
|
|
377
|
-
items_to_parse_key = items_to_parse_name
|
|
378
|
-
if not ocean.config.yield_items_to_parse:
|
|
379
|
-
if isinstance(data, dict) and data.get("__type") == "path":
|
|
380
|
-
file_path = data.get("file", {}).get("content", {}).get("path")
|
|
381
|
-
with open(file_path, "r") as f:
|
|
382
|
-
data["file"]["content"] = json.loads(f.read())
|
|
383
|
-
items = await self._search(data, items_to_parse)
|
|
384
|
-
if not isinstance(items, list):
|
|
385
|
-
logger.warning(
|
|
386
|
-
f"Failed to parse items for JQ expression {items_to_parse}, Expected list but got {type(items)}."
|
|
387
|
-
f" Skipping..."
|
|
388
|
-
)
|
|
389
|
-
return [], []
|
|
390
|
-
raw_all_payload_stringified = json.dumps(data)
|
|
391
|
-
raw_data = [
|
|
392
|
-
({items_to_parse_name: item}, raw_all_payload_stringified)
|
|
393
|
-
for item in items
|
|
394
|
-
]
|
|
395
|
-
single_item_mappings, all_items_mappings, empty_items_mappings = (
|
|
396
|
-
self._build_raw_entity_mappings(
|
|
397
|
-
raw_entity_mappings, items_to_parse_name
|
|
398
|
-
)
|
|
399
|
-
)
|
|
400
|
-
raw_entity_mappings = {
|
|
401
|
-
"item": single_item_mappings,
|
|
402
|
-
"all": all_items_mappings,
|
|
403
|
-
"empty": empty_items_mappings,
|
|
404
|
-
}
|
|
259
|
+
raw_data = [data.copy()]
|
|
405
260
|
|
|
406
261
|
entities, errors = await gather_and_split_errors_from_results(
|
|
407
262
|
[
|
|
408
263
|
self._get_mapped_entity(
|
|
409
264
|
raw,
|
|
410
265
|
raw_entity_mappings,
|
|
411
|
-
items_to_parse_key,
|
|
412
266
|
selector_query,
|
|
413
267
|
parse_all,
|
|
414
268
|
)
|
|
@@ -421,324 +275,6 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
421
275
|
)
|
|
422
276
|
return entities, errors
|
|
423
277
|
|
|
424
|
-
def _build_raw_entity_mappings(
|
|
425
|
-
self, raw_entity_mappings: dict[str, Any], items_to_parse_name: str
|
|
426
|
-
) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
|
|
427
|
-
"""
|
|
428
|
-
Build the raw entity mappings for the items to parse.
|
|
429
|
-
The mappings are grouped by the input classifying result.
|
|
430
|
-
There are 3 input classifying results:
|
|
431
|
-
- NONE: The expression can be executed with no input
|
|
432
|
-
- SINGLE: The expression can be executed on a single item
|
|
433
|
-
- ALL: The expression can be executed on all the data
|
|
434
|
-
"""
|
|
435
|
-
mappings: dict[InputClassifyingResult, dict[str, Any]] = {
|
|
436
|
-
InputClassifyingResult.NONE: {},
|
|
437
|
-
InputClassifyingResult.SINGLE: {},
|
|
438
|
-
InputClassifyingResult.ALL: {},
|
|
439
|
-
}
|
|
440
|
-
for key, value in raw_entity_mappings.items():
|
|
441
|
-
if isinstance(value, str):
|
|
442
|
-
# Direct string values (identifier, title, icon, blueprint, team)
|
|
443
|
-
self.group_string_mapping_value(
|
|
444
|
-
items_to_parse_name,
|
|
445
|
-
mappings,
|
|
446
|
-
key,
|
|
447
|
-
value,
|
|
448
|
-
)
|
|
449
|
-
elif isinstance(value, dict):
|
|
450
|
-
# Complex objects (IngestSearchQuery for identifier/team, properties, relations)
|
|
451
|
-
self.group_complex_mapping_value(
|
|
452
|
-
items_to_parse_name,
|
|
453
|
-
mappings,
|
|
454
|
-
key,
|
|
455
|
-
value,
|
|
456
|
-
)
|
|
457
|
-
return (
|
|
458
|
-
mappings[InputClassifyingResult.SINGLE],
|
|
459
|
-
mappings[InputClassifyingResult.ALL],
|
|
460
|
-
mappings[InputClassifyingResult.NONE],
|
|
461
|
-
)
|
|
462
|
-
|
|
463
|
-
def group_complex_mapping_value(
|
|
464
|
-
self,
|
|
465
|
-
pattern: str,
|
|
466
|
-
mappings: dict[InputClassifyingResult, dict[str, Any]],
|
|
467
|
-
key: str,
|
|
468
|
-
value: dict[str, Any],
|
|
469
|
-
) -> None:
|
|
470
|
-
if key in ["properties", "relations"]:
|
|
471
|
-
mapping_dicts: dict[InputClassifyingResult, dict[str, Any]] = {
|
|
472
|
-
InputClassifyingResult.SINGLE: {},
|
|
473
|
-
InputClassifyingResult.ALL: {},
|
|
474
|
-
InputClassifyingResult.NONE: {},
|
|
475
|
-
}
|
|
476
|
-
# For properties and relations, filter the dictionary values
|
|
477
|
-
for dict_key, dict_value in value.items():
|
|
478
|
-
if isinstance(dict_value, str):
|
|
479
|
-
self.group_string_mapping_value(
|
|
480
|
-
pattern,
|
|
481
|
-
mapping_dicts,
|
|
482
|
-
dict_key,
|
|
483
|
-
dict_value,
|
|
484
|
-
)
|
|
485
|
-
elif isinstance(dict_value, dict):
|
|
486
|
-
# Handle IngestSearchQuery objects
|
|
487
|
-
self.group_search_query_mapping_value(
|
|
488
|
-
pattern,
|
|
489
|
-
mapping_dicts[InputClassifyingResult.SINGLE],
|
|
490
|
-
mapping_dicts[InputClassifyingResult.ALL],
|
|
491
|
-
dict_key,
|
|
492
|
-
dict_value,
|
|
493
|
-
)
|
|
494
|
-
for input_classifying_result, mapping_dict in mapping_dicts.items():
|
|
495
|
-
if mapping_dict:
|
|
496
|
-
mappings[input_classifying_result][key] = mapping_dict
|
|
497
|
-
else:
|
|
498
|
-
# For identifier/team IngestSearchQuery objects
|
|
499
|
-
self.group_search_query_mapping_value(
|
|
500
|
-
pattern,
|
|
501
|
-
mappings[InputClassifyingResult.SINGLE],
|
|
502
|
-
mappings[InputClassifyingResult.ALL],
|
|
503
|
-
key,
|
|
504
|
-
value,
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
def group_search_query_mapping_value(
|
|
508
|
-
self,
|
|
509
|
-
pattern: str,
|
|
510
|
-
single_item_dict: dict[str, Any],
|
|
511
|
-
all_item_dict: dict[str, Any],
|
|
512
|
-
dict_key: str,
|
|
513
|
-
dict_value: dict[str, Any],
|
|
514
|
-
) -> None:
|
|
515
|
-
if self._classify_search_query(dict_value, pattern):
|
|
516
|
-
single_item_dict[dict_key] = dict_value
|
|
517
|
-
else:
|
|
518
|
-
all_item_dict[dict_key] = dict_value
|
|
519
|
-
|
|
520
|
-
def group_string_mapping_value(
|
|
521
|
-
self,
|
|
522
|
-
pattern: str,
|
|
523
|
-
mappings: dict[InputClassifyingResult, dict[str, Any]],
|
|
524
|
-
key: str,
|
|
525
|
-
value: str,
|
|
526
|
-
) -> None:
|
|
527
|
-
input_evaluation_result = classify_input(value, pattern)
|
|
528
|
-
mappings[input_evaluation_result][key] = value
|
|
529
|
-
|
|
530
|
-
def _classify_search_query(self, query_dict: dict[str, Any], pattern: str) -> bool:
|
|
531
|
-
"""
|
|
532
|
-
Classify the input required to run jq expressions of an IngestSearchQuery
|
|
533
|
-
If at least one rule contains a value that includes the pattern, return True
|
|
534
|
-
Otherwise, return False
|
|
535
|
-
Example:
|
|
536
|
-
The pattern is item
|
|
537
|
-
relations:
|
|
538
|
-
someRelation:
|
|
539
|
-
combinator: "and"
|
|
540
|
-
rules:
|
|
541
|
-
- operator: "="
|
|
542
|
-
property: "someProperty"
|
|
543
|
-
value: .field
|
|
544
|
-
- combinator: "or"
|
|
545
|
-
rules:
|
|
546
|
-
- operator: "="
|
|
547
|
-
property: "anotherProperty"
|
|
548
|
-
value: .item.something
|
|
549
|
-
- operator: "="
|
|
550
|
-
property: "yetAnotherProperty"
|
|
551
|
-
value: .yetAnotherValue
|
|
552
|
-
One value is .item.something, which is a single item based expression, so it will be classified as SINGLE
|
|
553
|
-
"""
|
|
554
|
-
if "rules" not in query_dict:
|
|
555
|
-
return False
|
|
556
|
-
|
|
557
|
-
rules = query_dict["rules"]
|
|
558
|
-
if not isinstance(rules, list):
|
|
559
|
-
return False
|
|
560
|
-
|
|
561
|
-
# Check if any rule contains a value that includes the pattern
|
|
562
|
-
for rule in rules:
|
|
563
|
-
if isinstance(rule, dict) and self._is_rule_or_query_contains_pattern(
|
|
564
|
-
rule, pattern
|
|
565
|
-
):
|
|
566
|
-
return True
|
|
567
|
-
return False
|
|
568
|
-
|
|
569
|
-
def _is_rule_or_query_contains_pattern(
|
|
570
|
-
self, rule: dict[str, Any], pattern: str
|
|
571
|
-
) -> bool:
|
|
572
|
-
"""
|
|
573
|
-
Check if a rule or query contains a value that includes the pattern
|
|
574
|
-
If the value is a single item based expression, return True
|
|
575
|
-
Otherwise, return False
|
|
576
|
-
Example:
|
|
577
|
-
The pattern is item
|
|
578
|
-
The rule is:
|
|
579
|
-
- combinator: "or"
|
|
580
|
-
rules:
|
|
581
|
-
- operator: "="
|
|
582
|
-
property: "anotherProperty"
|
|
583
|
-
value: .item.something
|
|
584
|
-
- operator: "="
|
|
585
|
-
property: "yetAnotherProperty"
|
|
586
|
-
value: .yetAnotherValue
|
|
587
|
-
This rule is not a single rule (not contains a value property) but a search query rule (contains a rules property)
|
|
588
|
-
so we need to recursively check the rules property to check if at least one rule contains a value that includes the pattern
|
|
589
|
-
In this case, one value is .item.something, which is a single item based expression, so it will be classified as SINGLE ==> true
|
|
590
|
-
"""
|
|
591
|
-
if "value" in rule and isinstance(rule["value"], str):
|
|
592
|
-
# Use evaluate_input to check if the pattern is relevant for this value
|
|
593
|
-
input_evaluation_result = classify_input(rule["value"], pattern)
|
|
594
|
-
if input_evaluation_result == InputClassifyingResult.SINGLE:
|
|
595
|
-
return True
|
|
596
|
-
# Recursively check nested IngestSearchQuery objects
|
|
597
|
-
elif "rules" in rule:
|
|
598
|
-
if self._classify_search_query(rule, pattern):
|
|
599
|
-
return True
|
|
600
|
-
return False
|
|
601
|
-
|
|
602
|
-
def _filter_mappings_by_keys(
|
|
603
|
-
self, mappings: dict[str, Any], target_keys: list[str]
|
|
604
|
-
) -> dict[str, Any]:
|
|
605
|
-
"""
|
|
606
|
-
Filter mappings to preserve structure with only the specified keys present.
|
|
607
|
-
Recursively handles nested dictionaries and lists, searching for keys at any level.
|
|
608
|
-
|
|
609
|
-
Args:
|
|
610
|
-
mappings: The dictionary containing mapping configurations to filter
|
|
611
|
-
target_keys: List of keys to preserve in the filtered result
|
|
612
|
-
|
|
613
|
-
Returns:
|
|
614
|
-
A filtered dictionary containing only the specified keys and their nested structures
|
|
615
|
-
|
|
616
|
-
Examples:
|
|
617
|
-
# Basic filtering with direct keys
|
|
618
|
-
mappings = {
|
|
619
|
-
"name": "John",
|
|
620
|
-
"age": 30,
|
|
621
|
-
"city": "New York",
|
|
622
|
-
"country": "USA"
|
|
623
|
-
}
|
|
624
|
-
target_keys = ["name", "age"]
|
|
625
|
-
result = self._filter_mappings_by_keys(mappings, target_keys)
|
|
626
|
-
# Returns: {"name": "John", "age": 30}
|
|
627
|
-
|
|
628
|
-
# Nested dictionary filtering
|
|
629
|
-
mappings = {
|
|
630
|
-
"user": {
|
|
631
|
-
"profile": {"name": "John", "email": "john@example.com"},
|
|
632
|
-
"settings": {"theme": "dark", "notifications": True}
|
|
633
|
-
},
|
|
634
|
-
"metadata": {"created_at": "2023-01-01", "version": "1.0"}
|
|
635
|
-
}
|
|
636
|
-
target_keys = ["name", "email", "created_at"]
|
|
637
|
-
result = self._filter_mappings_by_keys(mappings, target_keys)
|
|
638
|
-
# Returns: {
|
|
639
|
-
# "user": {
|
|
640
|
-
# "profile": {"name": "John", "email": "john@example.com"}
|
|
641
|
-
# },
|
|
642
|
-
# "metadata": {"created_at": "2023-01-01"}
|
|
643
|
-
# }
|
|
644
|
-
|
|
645
|
-
# Empty target keys returns empty dict
|
|
646
|
-
result = self._filter_mappings_by_keys(mappings, [])
|
|
647
|
-
# Returns: {}
|
|
648
|
-
"""
|
|
649
|
-
if not target_keys:
|
|
650
|
-
return {}
|
|
651
|
-
|
|
652
|
-
filtered_mappings: dict[str, Any] = {}
|
|
653
|
-
|
|
654
|
-
for key, value in mappings.items():
|
|
655
|
-
filtered_value = self._process_mapping_value(key, value, target_keys)
|
|
656
|
-
|
|
657
|
-
# Include if it's a direct match or contains nested target keys
|
|
658
|
-
if key in target_keys or filtered_value:
|
|
659
|
-
filtered_mappings[key] = filtered_value
|
|
660
|
-
|
|
661
|
-
return filtered_mappings
|
|
662
|
-
|
|
663
|
-
def _process_mapping_value(
|
|
664
|
-
self, key: str, value: Any, target_keys: list[str]
|
|
665
|
-
) -> Any:
|
|
666
|
-
"""
|
|
667
|
-
Process a single mapping value, handling different types recursively.
|
|
668
|
-
|
|
669
|
-
This helper method is used by _filter_mappings_by_keys to process individual
|
|
670
|
-
key-value pairs. It handles both simple values and nested dictionaries,
|
|
671
|
-
applying the filtering logic recursively to maintain the hierarchical structure.
|
|
672
|
-
|
|
673
|
-
Args:
|
|
674
|
-
key: The key of the current mapping item being processed
|
|
675
|
-
value: The value associated with the key (can be any type)
|
|
676
|
-
target_keys: List of keys to preserve in the filtered result
|
|
677
|
-
|
|
678
|
-
Returns:
|
|
679
|
-
The processed value if it should be included, None otherwise.
|
|
680
|
-
For dictionaries, returns the filtered dictionary or None if empty.
|
|
681
|
-
For simple values, returns the value if the key is in target_keys, None otherwise.
|
|
682
|
-
|
|
683
|
-
Examples:
|
|
684
|
-
# Simple value processing - key in target_keys
|
|
685
|
-
result = self._process_mapping_value("name", "John", ["name", "age"])
|
|
686
|
-
# Returns: "John"
|
|
687
|
-
|
|
688
|
-
# Simple value processing - key not in target_keys
|
|
689
|
-
result = self._process_mapping_value("city", "New York", ["name", "age"])
|
|
690
|
-
# Returns: None
|
|
691
|
-
|
|
692
|
-
# Dictionary processing with nested target keys
|
|
693
|
-
nested_dict = {
|
|
694
|
-
"profile": {"name": "John", "email": "john@example.com"},
|
|
695
|
-
"settings": {"theme": "dark"}
|
|
696
|
-
}
|
|
697
|
-
result = self._process_mapping_value("user", nested_dict, ["name", "email"])
|
|
698
|
-
# Returns: {"profile": {"name": "John", "email": "john@example.com"}}
|
|
699
|
-
|
|
700
|
-
# Dictionary processing with no matching target keys
|
|
701
|
-
result = self._process_mapping_value("user", nested_dict, ["version"])
|
|
702
|
-
# Returns: None
|
|
703
|
-
"""
|
|
704
|
-
if isinstance(value, dict):
|
|
705
|
-
# Recursively filter nested dictionary
|
|
706
|
-
filtered_dict = self._filter_mappings_by_keys(value, target_keys)
|
|
707
|
-
return filtered_dict if filtered_dict else None
|
|
708
|
-
else:
|
|
709
|
-
# Return simple values as-is
|
|
710
|
-
return value if key in target_keys else None
|
|
711
|
-
|
|
712
|
-
def _deep_merge(
|
|
713
|
-
self, dict1: dict[str, Any], dict2: dict[str, Any]
|
|
714
|
-
) -> dict[str, Any]:
|
|
715
|
-
"""
|
|
716
|
-
Deep merge two dictionaries, preserving nested structures.
|
|
717
|
-
Values from dict2 override values from dict1 for the same keys.
|
|
718
|
-
"""
|
|
719
|
-
result = dict1.copy()
|
|
720
|
-
|
|
721
|
-
for key, value in dict2.items():
|
|
722
|
-
if (
|
|
723
|
-
key in result
|
|
724
|
-
and isinstance(result[key], dict)
|
|
725
|
-
and isinstance(value, dict)
|
|
726
|
-
):
|
|
727
|
-
# Recursively merge nested dictionaries
|
|
728
|
-
result[key] = self._deep_merge(result[key], value)
|
|
729
|
-
elif (
|
|
730
|
-
key in result
|
|
731
|
-
and isinstance(result[key], list)
|
|
732
|
-
and isinstance(value, list)
|
|
733
|
-
):
|
|
734
|
-
# Merge lists by extending
|
|
735
|
-
result[key].extend(value)
|
|
736
|
-
else:
|
|
737
|
-
# Override with value from dict2
|
|
738
|
-
result[key] = value
|
|
739
|
-
|
|
740
|
-
return result
|
|
741
|
-
|
|
742
278
|
@staticmethod
|
|
743
279
|
async def _send_examples(data: list[dict[str, Any]], kind: str) -> None:
|
|
744
280
|
try:
|
|
@@ -768,8 +304,6 @@ class JQEntityProcessor(BaseEntityProcessor):
|
|
|
768
304
|
raw_results,
|
|
769
305
|
self._calculate_entity,
|
|
770
306
|
raw_entity_mappings,
|
|
771
|
-
mapping.port.items_to_parse,
|
|
772
|
-
mapping.port.items_to_parse_name,
|
|
773
307
|
mapping.selector.query,
|
|
774
308
|
parse_all,
|
|
775
309
|
)
|