howler-api 2.10.0.dev255__py3-none-any.whl → 2.13.0.dev344__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of howler-api might be problematic. Click here for more details.
- howler/api/__init__.py +1 -1
- howler/api/v1/auth.py +1 -1
- howler/api/v1/{borealis.py → clue.py} +24 -26
- howler/api/v1/dossier.py +4 -28
- howler/api/v1/hit.py +11 -7
- howler/api/v1/search.py +160 -17
- howler/api/v1/user.py +2 -2
- howler/api/v1/utils/etag.py +43 -5
- howler/api/v1/view.py +26 -34
- howler/app.py +4 -4
- howler/cronjobs/view_cleanup.py +88 -0
- howler/datastore/README.md +0 -2
- howler/datastore/collection.py +109 -132
- howler/datastore/howler_store.py +0 -45
- howler/datastore/store.py +25 -6
- howler/odm/base.py +1 -1
- howler/odm/helper.py +9 -6
- howler/odm/models/config.py +168 -8
- howler/odm/models/howler_data.py +2 -1
- howler/odm/models/lead.py +1 -10
- howler/odm/models/pivot.py +2 -11
- howler/odm/random_data.py +1 -1
- howler/security/__init__.py +2 -2
- howler/services/analytic_service.py +31 -0
- howler/services/config_service.py +2 -2
- howler/services/dossier_service.py +140 -7
- howler/services/hit_service.py +317 -72
- howler/services/lucene_service.py +14 -7
- howler/services/overview_service.py +44 -0
- howler/services/template_service.py +45 -0
- howler/utils/lucene.py +22 -2
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/METADATA +5 -5
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/RECORD +35 -32
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/WHEEL +1 -1
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/entry_points.txt +0 -0
howler/services/hit_service.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import json
|
|
2
3
|
import re
|
|
3
4
|
import typing
|
|
@@ -29,11 +30,11 @@ from howler.odm.models.ecs.event import Event
|
|
|
29
30
|
from howler.odm.models.hit import Hit
|
|
30
31
|
from howler.odm.models.howler_data import HitOperationType, HitStatus, HitStatusTransition, Log
|
|
31
32
|
from howler.odm.models.user import User
|
|
32
|
-
from howler.services import action_service
|
|
33
|
+
from howler.services import action_service, analytic_service, dossier_service, overview_service, template_service
|
|
33
34
|
from howler.utils.dict_utils import extra_keys, flatten
|
|
34
35
|
from howler.utils.uid import get_random_id
|
|
35
36
|
|
|
36
|
-
|
|
37
|
+
logger = get_logger(__file__)
|
|
37
38
|
|
|
38
39
|
odm_helper = OdmHelper(Hit)
|
|
39
40
|
|
|
@@ -249,20 +250,34 @@ def validate_hit_ids(hit_ids: list[str]) -> bool:
|
|
|
249
250
|
|
|
250
251
|
|
|
251
252
|
def convert_hit(data: dict[str, Any], unique: bool, ignore_extra_values: bool = False) -> tuple[Hit, list[str]]: # noqa: C901
|
|
252
|
-
"""Validate
|
|
253
|
+
"""Validate and convert a dictionary to a Hit ODM object.
|
|
253
254
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
emit a warning. Defaults to False.
|
|
255
|
+
This function performs comprehensive validation on input data to ensure it can be
|
|
256
|
+
safely converted to a Hit object. It handles hash generation, ID assignment,
|
|
257
|
+
data normalization, and validation warnings. The function also checks for
|
|
258
|
+
deprecated fields and enforces naming conventions for analytics and detections.
|
|
259
259
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
260
|
+
Args:
|
|
261
|
+
data: Dictionary containing hit data to validate and convert
|
|
262
|
+
unique: Whether to enforce uniqueness by checking if the hit ID already exists
|
|
263
|
+
ignore_extra_values: Whether to ignore invalid extra fields (True) or raise an exception (False)
|
|
263
264
|
|
|
264
265
|
Returns:
|
|
265
|
-
|
|
266
|
+
Tuple containing:
|
|
267
|
+
- Hit: The validated and converted ODM object
|
|
268
|
+
- list[str]: List of validation warnings (unused fields, deprecated fields, naming issues)
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
HowlerValueError: If bundle is specified during creation, invalid parameters are provided,
|
|
272
|
+
or naming conventions are violated
|
|
273
|
+
HowlerTypeError: If the data cannot be converted to a Hit ODM object
|
|
274
|
+
ResourceExists: If unique=True and a hit with the generated ID already exists
|
|
275
|
+
|
|
276
|
+
Note:
|
|
277
|
+
- Automatically generates a hash based on analytic, detection, and raw data
|
|
278
|
+
- Assigns a random ID if not provided
|
|
279
|
+
- Normalizes data fields to ensure consistent storage format
|
|
280
|
+
- Validates analytic and detection names against best practices (letters and spaces only)
|
|
266
281
|
"""
|
|
267
282
|
data = flatten(data, odm=Hit)
|
|
268
283
|
|
|
@@ -340,7 +355,14 @@ def convert_hit(data: dict[str, Any], unique: bool, ignore_extra_values: bool =
|
|
|
340
355
|
|
|
341
356
|
|
|
342
357
|
def exists(id: str):
|
|
343
|
-
"Check if a hit exists
|
|
358
|
+
"""Check if a hit exists in the datastore.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
id: The unique identifier of the hit to check
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
bool: True if the hit exists, False otherwise
|
|
365
|
+
"""
|
|
344
366
|
return datastore().hit.exists(id)
|
|
345
367
|
|
|
346
368
|
|
|
@@ -349,7 +371,17 @@ def get_hit(
|
|
|
349
371
|
as_odm: bool = False,
|
|
350
372
|
version: bool = False,
|
|
351
373
|
):
|
|
352
|
-
"""
|
|
374
|
+
"""Retrieve a hit from the datastore.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
id: The unique identifier of the hit to retrieve
|
|
378
|
+
as_odm: Whether to return the hit as an ODM object (True) or dictionary (False)
|
|
379
|
+
version: Whether to include version information in the response
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Hit object (if as_odm=True) or dictionary representation of the hit.
|
|
383
|
+
Returns None if the hit doesn't exist.
|
|
384
|
+
"""
|
|
353
385
|
return datastore().hit.get_if_exists(key=id, as_obj=as_odm, version=version)
|
|
354
386
|
|
|
355
387
|
|
|
@@ -366,7 +398,23 @@ def create_hit(
|
|
|
366
398
|
user: Optional[str] = None,
|
|
367
399
|
overwrite: bool = False,
|
|
368
400
|
) -> bool:
|
|
369
|
-
"""Create a hit in the database
|
|
401
|
+
"""Create a new hit in the database.
|
|
402
|
+
|
|
403
|
+
This function saves a hit to the datastore, optionally adding a creation log entry
|
|
404
|
+
and updating metrics. It will prevent overwriting existing hits unless explicitly allowed.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
id: The unique identifier for the hit
|
|
408
|
+
hit: The Hit ODM object to save
|
|
409
|
+
user: Optional username to record in the creation log
|
|
410
|
+
overwrite: Whether to allow overwriting an existing hit with the same ID
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
bool: True if the hit was successfully created
|
|
414
|
+
|
|
415
|
+
Raises:
|
|
416
|
+
ResourceExists: If a hit with the same ID already exists and overwrite=False
|
|
417
|
+
"""
|
|
370
418
|
if not overwrite and does_hit_exist(id):
|
|
371
419
|
raise ResourceExists("Hit %s already exists in datastore" % id)
|
|
372
420
|
|
|
@@ -383,7 +431,23 @@ def update_hit(
|
|
|
383
431
|
user: Optional[str] = None,
|
|
384
432
|
version: Optional[str] = None,
|
|
385
433
|
):
|
|
386
|
-
"""Update one or more properties of a hit in the database.
|
|
434
|
+
"""Update one or more properties of a hit in the database.
|
|
435
|
+
|
|
436
|
+
This function applies a list of update operations to modify hit properties.
|
|
437
|
+
Note that hit status cannot be modified through this function - use transition_hit instead.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
hit_id: The unique identifier of the hit to update
|
|
441
|
+
operations: List of ODM update operations to apply
|
|
442
|
+
user: Optional username to record in the update log
|
|
443
|
+
version: Optional version string for optimistic locking
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
Tuple of (updated_hit_data, new_version)
|
|
447
|
+
|
|
448
|
+
Raises:
|
|
449
|
+
HowlerValueError: If attempting to modify hit status through this function
|
|
450
|
+
"""
|
|
387
451
|
# Status of a hit should only be updated through the transition function
|
|
388
452
|
if _modifies_prop("howler.status", operations):
|
|
389
453
|
raise HowlerValueError(
|
|
@@ -395,7 +459,18 @@ def update_hit(
|
|
|
395
459
|
|
|
396
460
|
@typing.no_type_check
|
|
397
461
|
def save_hit(hit: Hit, version: Optional[str] = None) -> tuple[Hit, str]:
|
|
398
|
-
"Save a hit to the datastore
|
|
462
|
+
"""Save a hit to the datastore and emit an event notification.
|
|
463
|
+
|
|
464
|
+
This function persists a hit object to the database and emits an event
|
|
465
|
+
to notify other systems of the change.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
hit: The Hit ODM object to save
|
|
469
|
+
version: Optional version string for optimistic locking
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
Tuple of (hit_data_dict, version_string)
|
|
473
|
+
"""
|
|
399
474
|
datastore().hit.save(hit.howler.id, hit, version=version)
|
|
400
475
|
data, _version = datastore().hit.get(hit.howler.id, as_obj=False, version=True)
|
|
401
476
|
event_service.emit("hits", {"hit": data, "version": _version})
|
|
@@ -409,7 +484,23 @@ def _update_hit(
|
|
|
409
484
|
user: Optional[str] = None,
|
|
410
485
|
version: Optional[str] = None,
|
|
411
486
|
) -> tuple[Hit, str]:
|
|
412
|
-
"""
|
|
487
|
+
"""Internal function to update a hit with proper logging and event emission.
|
|
488
|
+
|
|
489
|
+
This function applies update operations to a hit, automatically adding worklog entries
|
|
490
|
+
for non-silent operations and emitting events to notify other systems of changes.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
hit_id: The unique identifier of the hit to update
|
|
494
|
+
operations: List of ODM update operations to apply
|
|
495
|
+
user: Optional username to record in operation logs
|
|
496
|
+
version: Optional version string for optimistic locking
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Tuple of (updated_hit_data, new_version)
|
|
500
|
+
|
|
501
|
+
Raises:
|
|
502
|
+
HowlerValueError: If user parameter is provided but not a string
|
|
503
|
+
"""
|
|
413
504
|
final_operations = []
|
|
414
505
|
|
|
415
506
|
if user and not isinstance(user, str):
|
|
@@ -444,7 +535,7 @@ def _update_hit(
|
|
|
444
535
|
else:
|
|
445
536
|
operation_type = HitOperationType.SET
|
|
446
537
|
|
|
447
|
-
|
|
538
|
+
logger.debug("%s - %s - %s -> %s", hit_id, operation.key, previous_value, operation.value)
|
|
448
539
|
final_operations.append(operation)
|
|
449
540
|
|
|
450
541
|
if not operation.silent:
|
|
@@ -486,13 +577,26 @@ def get_transitions(status: HitStatus) -> list[str]:
|
|
|
486
577
|
|
|
487
578
|
|
|
488
579
|
def get_all_children(hit: Hit):
|
|
489
|
-
"Get a list of all
|
|
580
|
+
"""Get a list of all child hits for a given hit, including nested children.
|
|
581
|
+
|
|
582
|
+
This function recursively traverses bundle structures to find all child hits.
|
|
583
|
+
If a child hit is itself a bundle, it will recursively get its children too.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
hit: The parent hit to get children for
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
List of all child hits (may include None values for missing hits)
|
|
590
|
+
"""
|
|
591
|
+
# Get immediate child hits from the hit's bundle
|
|
490
592
|
child_hits = [get_hit(hit_id) for hit_id in hit["howler"].get("hits", [])]
|
|
491
593
|
|
|
594
|
+
# Recursively process child hits that are themselves bundles
|
|
492
595
|
for entry in child_hits:
|
|
493
596
|
if not entry:
|
|
494
597
|
continue
|
|
495
598
|
|
|
599
|
+
# If this child is a bundle, get its children too
|
|
496
600
|
if entry["howler"]["is_bundle"]:
|
|
497
601
|
child_hits.extend(get_all_children(entry))
|
|
498
602
|
|
|
@@ -506,76 +610,96 @@ def transition_hit(
|
|
|
506
610
|
version: Optional[str] = None,
|
|
507
611
|
**kwargs,
|
|
508
612
|
):
|
|
509
|
-
"""Transition a hit from one status to another while updating related properties
|
|
613
|
+
"""Transition a hit from one status to another while updating related properties.
|
|
614
|
+
|
|
615
|
+
This function handles status transitions for both individual hits and bundles,
|
|
616
|
+
applying the same transition to all child hits in a bundle. For certain transitions
|
|
617
|
+
(PROMOTE, DEMOTE, ASSESS, RE_EVALUATE), it also executes bulk actions and emits events.
|
|
510
618
|
|
|
511
619
|
Args:
|
|
512
|
-
id
|
|
513
|
-
transition
|
|
514
|
-
user
|
|
515
|
-
version
|
|
516
|
-
|
|
517
|
-
hit: Hit = get_hit(id, as_odm=False) if not kwargs.get("hit", None) else kwargs.pop("hit")
|
|
620
|
+
id: The id of the hit to transition
|
|
621
|
+
transition: The transition to execute (e.g., ASSIGN_TO_ME, ASSESS, PROMOTE)
|
|
622
|
+
user: The user running the transition
|
|
623
|
+
version: Optional version to validate against. The transition will not run if the version doesn't match.
|
|
624
|
+
**kwargs: Additional arguments including potential 'hit' object and 'assessment' value
|
|
518
625
|
|
|
519
|
-
|
|
626
|
+
Raises:
|
|
627
|
+
NotFoundException: If the hit does not exist
|
|
628
|
+
"""
|
|
629
|
+
# Get the primary hit (either provided in kwargs or fetch from database)
|
|
630
|
+
primary_hit: Hit = kwargs.pop("hit", None) or get_hit(id, as_odm=False)
|
|
520
631
|
|
|
521
|
-
if not
|
|
632
|
+
if not primary_hit:
|
|
522
633
|
raise NotFoundException("Hit does not exist")
|
|
523
634
|
|
|
524
|
-
|
|
635
|
+
workflow: Workflow = get_hit_workflow()
|
|
636
|
+
|
|
637
|
+
# Get all child hits that need to be processed along with the primary hit
|
|
638
|
+
child_hits = get_all_children(primary_hit)
|
|
639
|
+
primary_hit_status = primary_hit["howler"]["status"]
|
|
525
640
|
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
)
|
|
641
|
+
# Log all hits that will be transitioned
|
|
642
|
+
all_hit_ids = [h["howler"]["id"] for h in ([primary_hit] + [ch for ch in child_hits if ch])]
|
|
643
|
+
logger.debug("Transitioning (%s)", ", ".join(all_hit_ids))
|
|
530
644
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
645
|
+
# Process each hit (primary + children) with the workflow transition
|
|
646
|
+
for current_hit in [primary_hit] + [ch for ch in child_hits if ch]:
|
|
647
|
+
current_hit_status = current_hit["howler"]["status"]
|
|
648
|
+
current_hit_id = current_hit["howler"]["id"]
|
|
534
649
|
|
|
535
|
-
|
|
536
|
-
|
|
650
|
+
# Skip hits that don't match the primary hit's status
|
|
651
|
+
# This ensures consistent state transitions across bundles
|
|
652
|
+
if current_hit_status != primary_hit_status:
|
|
653
|
+
logger.debug("Skipping %s (status mismatch)", current_hit_id)
|
|
537
654
|
continue
|
|
538
655
|
|
|
539
|
-
|
|
656
|
+
# Apply the workflow transition to get required updates
|
|
657
|
+
updates = workflow.transition(current_hit_status, transition, user=user, hit=current_hit, **kwargs)
|
|
540
658
|
|
|
659
|
+
# Apply updates if any were generated by the workflow
|
|
541
660
|
if updates:
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
user["uname"],
|
|
546
|
-
version=(version if (hit_id == hit["howler"]["id"] and version) else None),
|
|
547
|
-
)
|
|
661
|
+
# Only apply version validation to the primary hit
|
|
662
|
+
hit_version = version if (current_hit_id == primary_hit["howler"]["id"] and version) else None
|
|
663
|
+
_update_hit(current_hit_id, updates, user["uname"], version=hit_version)
|
|
548
664
|
|
|
549
|
-
|
|
665
|
+
# Execute bulk actions for transitions that require them
|
|
666
|
+
# These transitions need additional processing beyond the workflow
|
|
667
|
+
transitions_requiring_bulk_actions = [
|
|
550
668
|
HitStatusTransition.PROMOTE,
|
|
551
669
|
HitStatusTransition.DEMOTE,
|
|
552
670
|
HitStatusTransition.ASSESS,
|
|
553
671
|
HitStatusTransition.RE_EVALUATE,
|
|
554
|
-
]
|
|
672
|
+
]
|
|
673
|
+
|
|
674
|
+
if transition in transitions_requiring_bulk_actions:
|
|
675
|
+
# Determine the trigger action (promote/demote) based on transition type
|
|
555
676
|
trigger: Union[Literal["promote"], Literal["demote"]]
|
|
556
677
|
|
|
557
678
|
if transition == HitStatusTransition.ASSESS:
|
|
679
|
+
# For assessments, determine promotion/demotion based on escalation level
|
|
558
680
|
new_escalation = AssessmentEscalationMap[kwargs["assessment"]]
|
|
559
|
-
|
|
560
|
-
if new_escalation == Escalation.EVIDENCE:
|
|
561
|
-
trigger = "promote"
|
|
562
|
-
else:
|
|
563
|
-
trigger = "demote"
|
|
681
|
+
trigger = "promote" if new_escalation == Escalation.EVIDENCE else "demote"
|
|
564
682
|
elif transition == HitStatusTransition.RE_EVALUATE:
|
|
683
|
+
# Re-evaluation always promotes the hit
|
|
565
684
|
trigger = "promote"
|
|
566
685
|
else:
|
|
686
|
+
# For direct PROMOTE/DEMOTE transitions, use the transition name
|
|
567
687
|
trigger = cast(Union[Literal["promote"], Literal["demote"]], transition)
|
|
568
688
|
|
|
689
|
+
# Commit database changes before executing bulk actions
|
|
569
690
|
datastore().hit.commit()
|
|
570
|
-
action_service.bulk_execute_on_query(
|
|
571
|
-
f"howler.id:({' OR '.join(h['howler']['id'] for h in ([hit] + child_hits))})",
|
|
572
|
-
trigger=trigger,
|
|
573
|
-
user=user,
|
|
574
|
-
)
|
|
575
691
|
|
|
576
|
-
for
|
|
577
|
-
|
|
578
|
-
|
|
692
|
+
# Build query for all processed hits (primary + children)
|
|
693
|
+
all_processed_hits = [primary_hit] + child_hits
|
|
694
|
+
hit_query = f"howler.id:({' OR '.join(h['howler']['id'] for h in all_processed_hits)})"
|
|
695
|
+
|
|
696
|
+
# Execute bulk actions on all hits
|
|
697
|
+
action_service.bulk_execute_on_query(hit_query, trigger=trigger, user=user)
|
|
698
|
+
|
|
699
|
+
# Emit events for all processed hits to notify other systems
|
|
700
|
+
for processed_hit in all_processed_hits:
|
|
701
|
+
data, hit_version = datastore().hit.get(processed_hit["howler"]["id"], as_obj=False, version=True)
|
|
702
|
+
event_service.emit("hits", {"hit": data, "version": hit_version})
|
|
579
703
|
|
|
580
704
|
|
|
581
705
|
DELETED_HITS = Counter(f"{APP_NAME.replace('-', '_')}_deleted_hits_total", "The number of deleted hits")
|
|
@@ -619,21 +743,25 @@ def search(
|
|
|
619
743
|
track_total_hits: Optional[Any] = None,
|
|
620
744
|
as_obj: bool = True,
|
|
621
745
|
) -> HitSearchResult:
|
|
622
|
-
"""Search for
|
|
746
|
+
"""Search for hits in the datastore using a query.
|
|
747
|
+
|
|
748
|
+
This function provides a flexible search interface for finding hits based on
|
|
749
|
+
various criteria. It supports pagination, sorting, field limiting, and other
|
|
750
|
+
advanced search features.
|
|
623
751
|
|
|
624
752
|
Args:
|
|
625
|
-
query
|
|
626
|
-
offset
|
|
627
|
-
rows
|
|
628
|
-
sort
|
|
629
|
-
fl
|
|
630
|
-
timeout
|
|
631
|
-
deep_paging_id
|
|
632
|
-
track_total_hits
|
|
633
|
-
as_obj (
|
|
753
|
+
query: The search query string (supports Lucene syntax)
|
|
754
|
+
offset: Number of results to skip (for pagination)
|
|
755
|
+
rows: Maximum number of results to return
|
|
756
|
+
sort: Sort criteria for the results
|
|
757
|
+
fl: Field list - which fields to include in results
|
|
758
|
+
timeout: Query timeout duration
|
|
759
|
+
deep_paging_id: Identifier for deep pagination
|
|
760
|
+
track_total_hits: Whether to track the total hit count
|
|
761
|
+
as_obj: Whether to return results as ODM objects (True) or dictionaries (False)
|
|
634
762
|
|
|
635
763
|
Returns:
|
|
636
|
-
HitSearchResult
|
|
764
|
+
HitSearchResult containing the matching hits and metadata
|
|
637
765
|
"""
|
|
638
766
|
return datastore().hit.search(
|
|
639
767
|
query=query,
|
|
@@ -646,3 +774,120 @@ def search(
|
|
|
646
774
|
track_total_hits=track_total_hits,
|
|
647
775
|
as_obj=as_obj,
|
|
648
776
|
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
TYPE_PRIORITY = {"personal": 2, "readonly": 1, "global": 0, None: 0}
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def __compare_metadata(object_a: dict[str, Any], object_b: dict[str, Any]) -> int:
|
|
783
|
+
# Sort priority:
|
|
784
|
+
# 1. personal > readonly > global
|
|
785
|
+
# 2. detection > !detection
|
|
786
|
+
|
|
787
|
+
if object_a.get("type", None) != object_b.get("type", None):
|
|
788
|
+
return TYPE_PRIORITY[object_b.get("type", None)] - TYPE_PRIORITY[object_a.get("type", None)]
|
|
789
|
+
|
|
790
|
+
if object_a.get("detection", None) and not object_b.get("detection", None):
|
|
791
|
+
return -1
|
|
792
|
+
|
|
793
|
+
if not object_a.get("detection", None) and object_b.get("detection", None):
|
|
794
|
+
return 1
|
|
795
|
+
|
|
796
|
+
return 0
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def __match_metadata(candidates: list[dict[str, Any]], hit: dict[str, Any]) -> Optional[dict[str, Any]]:
|
|
800
|
+
matching_candidates: list[dict[str, Any]] = []
|
|
801
|
+
|
|
802
|
+
for candidate in candidates:
|
|
803
|
+
if candidate["analytic"].lower() != hit["howler"]["analytic"].lower():
|
|
804
|
+
continue
|
|
805
|
+
|
|
806
|
+
if not candidate.get("detection", None):
|
|
807
|
+
matching_candidates.append(candidate)
|
|
808
|
+
continue
|
|
809
|
+
|
|
810
|
+
if not hit["howler"].get("detection", None):
|
|
811
|
+
continue
|
|
812
|
+
|
|
813
|
+
if hit["howler"]["detection"].lower() != candidate["detection"].lower():
|
|
814
|
+
continue
|
|
815
|
+
|
|
816
|
+
matching_candidates.append(candidate)
|
|
817
|
+
|
|
818
|
+
if len(matching_candidates) < 1:
|
|
819
|
+
return None
|
|
820
|
+
|
|
821
|
+
return sorted(matching_candidates, key=functools.cmp_to_key(__compare_metadata))[0]
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def augment_metadata(data: list[dict[str, Any]] | dict[str, Any], metadata: list[str], user: dict[str, Any]): # noqa: C901
|
|
825
|
+
"""Augment hit search results with additional metadata.
|
|
826
|
+
|
|
827
|
+
This function enriches hit data by adding related information such as templates,
|
|
828
|
+
overviews, and matching dossiers. The metadata is added as special fields prefixed
|
|
829
|
+
with double underscores (e.g., __template, __overview, __dossiers).
|
|
830
|
+
|
|
831
|
+
Args:
|
|
832
|
+
data: Hit data - either a single hit dictionary or list of hit dictionaries
|
|
833
|
+
metadata: List of metadata types to include ('template', 'overview', 'dossiers')
|
|
834
|
+
user: User context for determining accessible templates and other user-specific data
|
|
835
|
+
|
|
836
|
+
Note:
|
|
837
|
+
This function modifies the input data in-place, adding metadata fields.
|
|
838
|
+
Templates are filtered based on user permissions (global or owned by user).
|
|
839
|
+
"""
|
|
840
|
+
if isinstance(data, list):
|
|
841
|
+
hits = data
|
|
842
|
+
elif data is not None:
|
|
843
|
+
hits = [data]
|
|
844
|
+
else:
|
|
845
|
+
hits = []
|
|
846
|
+
|
|
847
|
+
if len(hits) < 1:
|
|
848
|
+
return
|
|
849
|
+
|
|
850
|
+
logger.debug("Augmenting %s hits with %s", len(hits), ",".join(metadata))
|
|
851
|
+
|
|
852
|
+
if "template" in metadata:
|
|
853
|
+
template_candidates = template_service.get_matching_templates(hits, user["uname"], as_odm=False)
|
|
854
|
+
|
|
855
|
+
logger.debug("\tRetrieved %s matching templates", len(template_candidates))
|
|
856
|
+
|
|
857
|
+
for hit in hits:
|
|
858
|
+
hit["__template"] = __match_metadata(cast(list[dict[str, Any]], template_candidates), hit)
|
|
859
|
+
|
|
860
|
+
if "overview" in metadata:
|
|
861
|
+
overview_candidates = overview_service.get_matching_overviews(hits, as_odm=False)
|
|
862
|
+
|
|
863
|
+
logger.debug("\tRetrieved %s matching overviews", len(overview_candidates))
|
|
864
|
+
|
|
865
|
+
for hit in hits:
|
|
866
|
+
hit["__overview"] = __match_metadata(cast(list[dict[str, Any]], overview_candidates), hit)
|
|
867
|
+
|
|
868
|
+
if "analytic" in metadata:
|
|
869
|
+
matched_analytics = analytic_service.get_matching_analytics(hits)
|
|
870
|
+
logger.debug("\tRetrieved %s matching analytics", len(matched_analytics))
|
|
871
|
+
|
|
872
|
+
for hit in hits:
|
|
873
|
+
matched_analytic = next(
|
|
874
|
+
(
|
|
875
|
+
analytic
|
|
876
|
+
for analytic in matched_analytics
|
|
877
|
+
if analytic.name.lower() == hit["howler"]["analytic"].lower()
|
|
878
|
+
),
|
|
879
|
+
None,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
hit["__analytic"] = matched_analytic.as_primitives() if matched_analytic else None
|
|
883
|
+
|
|
884
|
+
if "dossiers" in metadata:
|
|
885
|
+
dossiers: list[dict[str, Any]] = datastore().dossier.search(
|
|
886
|
+
"dossier_id:*",
|
|
887
|
+
as_obj=False,
|
|
888
|
+
# TODO: Eventually implement caching here
|
|
889
|
+
rows=1000,
|
|
890
|
+
)["items"]
|
|
891
|
+
|
|
892
|
+
for hit in hits:
|
|
893
|
+
hit["__dossiers"] = dossier_service.get_matching_dossiers(hit, dossiers)
|
|
@@ -4,7 +4,6 @@ import re
|
|
|
4
4
|
import sys
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
6
|
from hashlib import sha256
|
|
7
|
-
from ipaddress import ip_address
|
|
8
7
|
from typing import Any, Literal, Union, cast
|
|
9
8
|
|
|
10
9
|
from elasticsearch._sync.client.indices import IndicesClient
|
|
@@ -19,7 +18,7 @@ from howler.common.loader import datastore
|
|
|
19
18
|
from howler.config import redis
|
|
20
19
|
from howler.remote.datatypes.hash import Hash
|
|
21
20
|
from howler.utils.dict_utils import flatten_deep
|
|
22
|
-
from howler.utils.lucene import coerce, normalize_phrase, try_parse_date, try_parse_ip
|
|
21
|
+
from howler.utils.lucene import coerce, normalize_phrase, try_parse_date, try_parse_ip, try_parse_number
|
|
23
22
|
|
|
24
23
|
logger = get_logger(__file__)
|
|
25
24
|
|
|
@@ -86,6 +85,13 @@ class LuceneProcessor(TreeVisitor):
|
|
|
86
85
|
|
|
87
86
|
return low_datetime_result, datetime_result, high_datetime_result
|
|
88
87
|
|
|
88
|
+
if number_result := coerce(value, try_parse_number):
|
|
89
|
+
low_number_result = coerce(low, try_parse_number)
|
|
90
|
+
high_number_result = coerce(high, try_parse_number)
|
|
91
|
+
|
|
92
|
+
if low_number_result is not None and high_number_result is not None:
|
|
93
|
+
return low_number_result, number_result, high_number_result
|
|
94
|
+
|
|
89
95
|
try:
|
|
90
96
|
# Check if the value is a simple integer
|
|
91
97
|
return int(low), coerce(value, int), int(high)
|
|
@@ -93,10 +99,11 @@ class LuceneProcessor(TreeVisitor):
|
|
|
93
99
|
pass
|
|
94
100
|
|
|
95
101
|
if ip_result := coerce(value, try_parse_ip):
|
|
96
|
-
low_ip_result =
|
|
97
|
-
high_ip_result =
|
|
102
|
+
low_ip_result = coerce(low, try_parse_ip)
|
|
103
|
+
high_ip_result = coerce(high, try_parse_ip)
|
|
98
104
|
|
|
99
|
-
|
|
105
|
+
if low_ip_result is not None and high_ip_result is not None:
|
|
106
|
+
return low_ip_result, ip_result, high_ip_result
|
|
100
107
|
|
|
101
108
|
try:
|
|
102
109
|
# Check if the value is a float
|
|
@@ -108,7 +115,7 @@ class LuceneProcessor(TreeVisitor):
|
|
|
108
115
|
|
|
109
116
|
def visit_range(self, node: Range, context: dict[str, Any]):
|
|
110
117
|
"Handle range queries"
|
|
111
|
-
low, value, high = self.__parse_range(node.
|
|
118
|
+
low, value, high = self.__parse_range(node.low.value, context["hit"].get(context["field"]), node.high.value)
|
|
112
119
|
|
|
113
120
|
if isinstance(value, list):
|
|
114
121
|
values = value
|
|
@@ -221,7 +228,7 @@ def match(lucene: str, obj: dict[str, Any]):
|
|
|
221
228
|
hash_key = sha256(lucene.encode()).hexdigest()
|
|
222
229
|
|
|
223
230
|
# We cache the results back from ES, since we will frequently run the same validation queries over and over again.
|
|
224
|
-
if (normalized_query := NORMALIZED_QUERY_CACHE.get(hash_key)) is
|
|
231
|
+
if (normalized_query := NORMALIZED_QUERY_CACHE.get(hash_key)) is None or "pytest" in sys.modules:
|
|
225
232
|
# This regex checks for lucene phrases (i.e. the "Example Analytic" part of howler.analytic:"Example Analytic")
|
|
226
233
|
# And then escapes them.
|
|
227
234
|
# https://regex101.com/r/8u5F6a/1
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Any, Union
|
|
2
|
+
|
|
3
|
+
from howler.common.loader import datastore
|
|
4
|
+
from howler.common.logging import get_logger
|
|
5
|
+
from howler.datastore.exceptions import SearchException
|
|
6
|
+
from howler.odm.models.hit import Hit
|
|
7
|
+
from howler.odm.models.overview import Overview
|
|
8
|
+
from howler.utils.str_utils import sanitize_lucene_query
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__file__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_matching_overviews(
|
|
14
|
+
hits: Union[list[Hit], list[dict[str, Any]]], as_odm: bool = False
|
|
15
|
+
) -> Union[list[dict[str, Any]], list[Overview]]:
|
|
16
|
+
"""Generate a list of overviews matching a given list of analytic names from the provided hits.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
hits (list[Hit] | list[dict[str, Any]]): A list of Hit objects or dictionaries containing analytic information.
|
|
20
|
+
as_odm (bool, optional): If True, return Overview objects; otherwise, return dictionaries. Defaults to False.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
list[dict[str, Any]] | list[Overview]: A list of matching overviews, either as dictionaries or Overview objects.
|
|
24
|
+
"""
|
|
25
|
+
if len(hits) < 1:
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
analytic_names: set[str] = set()
|
|
29
|
+
for hit in hits:
|
|
30
|
+
analytic_names.add(f'"{sanitize_lucene_query(hit["howler"]["analytic"])}"')
|
|
31
|
+
|
|
32
|
+
if len(analytic_names) < 1:
|
|
33
|
+
return []
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
overview_candidates = datastore().overview.search(
|
|
37
|
+
f"analytic:({' OR '.join(analytic_names)})",
|
|
38
|
+
as_obj=as_odm,
|
|
39
|
+
)["items"]
|
|
40
|
+
|
|
41
|
+
return overview_candidates
|
|
42
|
+
except SearchException:
|
|
43
|
+
logger.exception("Exception on analytic matching")
|
|
44
|
+
return []
|