howler-api 2.10.0.dev255__py3-none-any.whl → 2.13.0.dev344__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of howler-api might be problematic. Click here for more details.
- howler/api/__init__.py +1 -1
- howler/api/v1/auth.py +1 -1
- howler/api/v1/{borealis.py → clue.py} +24 -26
- howler/api/v1/dossier.py +4 -28
- howler/api/v1/hit.py +11 -7
- howler/api/v1/search.py +160 -17
- howler/api/v1/user.py +2 -2
- howler/api/v1/utils/etag.py +43 -5
- howler/api/v1/view.py +26 -34
- howler/app.py +4 -4
- howler/cronjobs/view_cleanup.py +88 -0
- howler/datastore/README.md +0 -2
- howler/datastore/collection.py +109 -132
- howler/datastore/howler_store.py +0 -45
- howler/datastore/store.py +25 -6
- howler/odm/base.py +1 -1
- howler/odm/helper.py +9 -6
- howler/odm/models/config.py +168 -8
- howler/odm/models/howler_data.py +2 -1
- howler/odm/models/lead.py +1 -10
- howler/odm/models/pivot.py +2 -11
- howler/odm/random_data.py +1 -1
- howler/security/__init__.py +2 -2
- howler/services/analytic_service.py +31 -0
- howler/services/config_service.py +2 -2
- howler/services/dossier_service.py +140 -7
- howler/services/hit_service.py +317 -72
- howler/services/lucene_service.py +14 -7
- howler/services/overview_service.py +44 -0
- howler/services/template_service.py +45 -0
- howler/utils/lucene.py +22 -2
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/METADATA +5 -5
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/RECORD +35 -32
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/WHEEL +1 -1
- {howler_api-2.10.0.dev255.dist-info → howler_api-2.13.0.dev344.dist-info}/entry_points.txt +0 -0
howler/datastore/collection.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
|
+
import sys
|
|
6
7
|
import time
|
|
7
8
|
import typing
|
|
8
9
|
import warnings
|
|
@@ -20,7 +21,6 @@ from howler import odm
|
|
|
20
21
|
from howler.common.exceptions import HowlerRuntimeError, HowlerValueError, NonRecoverableError
|
|
21
22
|
from howler.common.loader import APP_NAME
|
|
22
23
|
from howler.common.logging.format import HWL_DATE_FORMAT, HWL_LOG_FORMAT
|
|
23
|
-
from howler.datastore.bulk import ElasticBulkPlan
|
|
24
24
|
from howler.datastore.constants import BACK_MAPPING, TYPE_MAPPING
|
|
25
25
|
from howler.datastore.exceptions import (
|
|
26
26
|
DataStoreException,
|
|
@@ -66,8 +66,8 @@ console.setFormatter(logging.Formatter(HWL_LOG_FORMAT, HWL_DATE_FORMAT))
|
|
|
66
66
|
logger.addHandler(console)
|
|
67
67
|
|
|
68
68
|
ModelType = TypeVar("ModelType", bound=Model)
|
|
69
|
-
write_block_settings = {"
|
|
70
|
-
write_unblock_settings = {"
|
|
69
|
+
write_block_settings = {"index.blocks.write": True}
|
|
70
|
+
write_unblock_settings = {"index.blocks.write": None}
|
|
71
71
|
|
|
72
72
|
# A token value to represent a document not existing. Its a string to match the
|
|
73
73
|
# type used for version values. Any string will do as long as it never matches
|
|
@@ -209,11 +209,13 @@ class ESCollection(Generic[ModelType]):
|
|
|
209
209
|
IGNORE_ENSURE_COLLECTION = False
|
|
210
210
|
|
|
211
211
|
def __init__(self, datastore: ESStore, name, model_class=None, validate=True, max_attempts=10):
|
|
212
|
-
self.replicas =
|
|
213
|
-
|
|
214
|
-
|
|
212
|
+
self.replicas = int(
|
|
213
|
+
environ.get(
|
|
214
|
+
f"ELASTIC_{name.upper()}_REPLICAS",
|
|
215
|
+
environ.get("ELASTIC_DEFAULT_REPLICAS", 0),
|
|
216
|
+
)
|
|
215
217
|
)
|
|
216
|
-
self.shards = environ.get(f"ELASTIC_{name.upper()}_SHARDS", environ.get("ELASTIC_DEFAULT_SHARDS", 1))
|
|
218
|
+
self.shards = int(environ.get(f"ELASTIC_{name.upper()}_SHARDS", environ.get("ELASTIC_DEFAULT_SHARDS", 1)))
|
|
217
219
|
self._index_list: list[str] = []
|
|
218
220
|
|
|
219
221
|
self.datastore = datastore
|
|
@@ -225,7 +227,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
225
227
|
|
|
226
228
|
if not ESCollection.IGNORE_ENSURE_COLLECTION:
|
|
227
229
|
self._ensure_collection()
|
|
228
|
-
|
|
230
|
+
elif "pytest" not in sys.modules:
|
|
229
231
|
logger.warning("Skipping ensure collection! This is dangerous. Waiting five seconds before continuing.")
|
|
230
232
|
time.sleep(5)
|
|
231
233
|
|
|
@@ -441,6 +443,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
441
443
|
iter(self.with_retries(self.datastore.client.indices.get_alias, index=index)),
|
|
442
444
|
None,
|
|
443
445
|
)
|
|
446
|
+
|
|
444
447
|
return None
|
|
445
448
|
|
|
446
449
|
def _wait_for_status(self, index, min_status="yellow"):
|
|
@@ -456,20 +459,20 @@ class ESCollection(Generic[ModelType]):
|
|
|
456
459
|
else:
|
|
457
460
|
raise
|
|
458
461
|
|
|
459
|
-
def _safe_index_copy(self, copy_function, src, target,
|
|
460
|
-
ret = copy_function(index=src, target=target,
|
|
462
|
+
def _safe_index_copy(self, copy_function, src, target, settings=None, min_status="yellow"):
|
|
463
|
+
ret = copy_function(index=src, target=target, settings=settings, request_timeout=60)
|
|
461
464
|
if not ret["acknowledged"]:
|
|
462
465
|
raise DataStoreException(f"Failed to create index {target} from {src}.")
|
|
463
466
|
|
|
464
467
|
self._wait_for_status(target, min_status=min_status)
|
|
465
468
|
|
|
466
|
-
def _delete_async(self, index,
|
|
469
|
+
def _delete_async(self, index, query, max_docs=None, sort=None):
|
|
467
470
|
deleted = 0
|
|
468
471
|
while True:
|
|
469
472
|
task = self.with_retries(
|
|
470
473
|
self.datastore.client.delete_by_query,
|
|
471
474
|
index=index,
|
|
472
|
-
|
|
475
|
+
query=query,
|
|
473
476
|
wait_for_completion=False,
|
|
474
477
|
conflicts="proceed",
|
|
475
478
|
sort=sort,
|
|
@@ -483,13 +486,14 @@ class ESCollection(Generic[ModelType]):
|
|
|
483
486
|
else:
|
|
484
487
|
deleted += res["deleted"]
|
|
485
488
|
|
|
486
|
-
def _update_async(self, index,
|
|
489
|
+
def _update_async(self, index, script, query, max_docs=None):
|
|
487
490
|
updated = 0
|
|
488
491
|
while True:
|
|
489
492
|
task = self.with_retries(
|
|
490
493
|
self.datastore.client.update_by_query,
|
|
491
494
|
index=index,
|
|
492
|
-
|
|
495
|
+
script=script,
|
|
496
|
+
query=query,
|
|
493
497
|
wait_for_completion=False,
|
|
494
498
|
conflicts="proceed",
|
|
495
499
|
max_docs=max_docs,
|
|
@@ -502,23 +506,6 @@ class ESCollection(Generic[ModelType]):
|
|
|
502
506
|
else:
|
|
503
507
|
updated += res["updated"]
|
|
504
508
|
|
|
505
|
-
def bulk(self, operations):
|
|
506
|
-
"""Receives a bulk plan and executes the plan.
|
|
507
|
-
|
|
508
|
-
:return: Results of the bulk operation
|
|
509
|
-
"""
|
|
510
|
-
if not isinstance(operations, ElasticBulkPlan):
|
|
511
|
-
return TypeError("Operations must be of type ElasticBulkPlan")
|
|
512
|
-
|
|
513
|
-
return self.with_retries(self.datastore.client.bulk, body=operations.get_plan_data())
|
|
514
|
-
|
|
515
|
-
def get_bulk_plan(self):
|
|
516
|
-
"""Creates a BulkPlan tailored for the current datastore
|
|
517
|
-
|
|
518
|
-
:return: The BulkPlan object
|
|
519
|
-
"""
|
|
520
|
-
return ElasticBulkPlan(self.index_list, model=self.model_class)
|
|
521
|
-
|
|
522
509
|
def commit(self):
|
|
523
510
|
"""This function should be overloaded to perform a commit of the index data of all the different hosts
|
|
524
511
|
specified in self.datastore.hosts.
|
|
@@ -536,8 +523,8 @@ class ESCollection(Generic[ModelType]):
|
|
|
536
523
|
:return: Should return True of the fix was successful on all hosts
|
|
537
524
|
"""
|
|
538
525
|
replicas = self._get_index_settings()["index"]["number_of_replicas"]
|
|
539
|
-
|
|
540
|
-
return self.with_retries(self.datastore.client.indices.put_settings, index=self.index_name,
|
|
526
|
+
settings = {"number_of_replicas": replicas}
|
|
527
|
+
return self.with_retries(self.datastore.client.indices.put_settings, index=self.index_name, settings=settings)[
|
|
541
528
|
"acknowledged"
|
|
542
529
|
]
|
|
543
530
|
|
|
@@ -547,8 +534,8 @@ class ESCollection(Generic[ModelType]):
|
|
|
547
534
|
|
|
548
535
|
:return: Should return True of the fix was successful on all hosts
|
|
549
536
|
"""
|
|
550
|
-
|
|
551
|
-
|
|
537
|
+
settings = self._get_index_settings()
|
|
538
|
+
clone_settings = {"index.number_of_replicas": 0}
|
|
552
539
|
clone_finish_settings = None
|
|
553
540
|
clone_setup_settings = None
|
|
554
541
|
method = None
|
|
@@ -564,12 +551,10 @@ class ESCollection(Generic[ModelType]):
|
|
|
564
551
|
|
|
565
552
|
cur_replicas = int(current_settings["settings"]["index"]["number_of_replicas"])
|
|
566
553
|
cur_shards = int(current_settings["settings"]["index"]["number_of_shards"])
|
|
567
|
-
target_shards = int(
|
|
554
|
+
target_shards = int(settings["index"]["number_of_shards"])
|
|
568
555
|
clone_finish_settings = {
|
|
569
|
-
"
|
|
570
|
-
|
|
571
|
-
"index.routing.allocation.require._name": None,
|
|
572
|
-
}
|
|
556
|
+
"index.number_of_replicas": cur_replicas,
|
|
557
|
+
"index.routing.allocation.require._name": None,
|
|
573
558
|
}
|
|
574
559
|
|
|
575
560
|
if cur_shards > target_shards:
|
|
@@ -583,10 +568,8 @@ class ESCollection(Generic[ModelType]):
|
|
|
583
568
|
else:
|
|
584
569
|
target_node = self.with_retries(self.datastore.client.cat.nodes, format="json")[0]["name"]
|
|
585
570
|
clone_setup_settings = {
|
|
586
|
-
"
|
|
587
|
-
|
|
588
|
-
"index.routing.allocation.require._name": target_node,
|
|
589
|
-
}
|
|
571
|
+
"index.number_of_replicas": 0,
|
|
572
|
+
"index.routing.allocation.require._name": target_node,
|
|
590
573
|
}
|
|
591
574
|
method = self.datastore.client.indices.shrink
|
|
592
575
|
elif cur_shards < target_shards:
|
|
@@ -600,7 +583,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
600
583
|
else:
|
|
601
584
|
method = self.datastore.client.indices.split
|
|
602
585
|
else:
|
|
603
|
-
logger.
|
|
586
|
+
logger.info(
|
|
604
587
|
f"Current shards ({cur_shards}) is equal to the target shards ({target_shards}), "
|
|
605
588
|
"only house keeping operations will be performed."
|
|
606
589
|
)
|
|
@@ -612,7 +595,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
612
595
|
|
|
613
596
|
# Block all indexes to be written to
|
|
614
597
|
logger.info("Set a datastore wide write block on Elastic.")
|
|
615
|
-
self.with_retries(self.datastore.client.indices.put_settings,
|
|
598
|
+
self.with_retries(self.datastore.client.indices.put_settings, settings=write_block_settings)
|
|
616
599
|
|
|
617
600
|
# Clone it onto a temporary index
|
|
618
601
|
if not self.with_retries(self.datastore.client.indices.exists, index=temp_name):
|
|
@@ -622,7 +605,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
622
605
|
self.with_retries(
|
|
623
606
|
self.datastore.client.indices.put_settings,
|
|
624
607
|
index=self.index_name,
|
|
625
|
-
|
|
608
|
+
settings=clone_setup_settings,
|
|
626
609
|
)
|
|
627
610
|
|
|
628
611
|
# Make sure no shard are relocating
|
|
@@ -635,7 +618,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
635
618
|
self.datastore.client.indices.clone,
|
|
636
619
|
self.index_name,
|
|
637
620
|
temp_name,
|
|
638
|
-
|
|
621
|
+
settings=clone_settings,
|
|
639
622
|
min_status="green",
|
|
640
623
|
)
|
|
641
624
|
|
|
@@ -650,13 +633,11 @@ class ESCollection(Generic[ModelType]):
|
|
|
650
633
|
f"and delete {self.index_name.upper()}."
|
|
651
634
|
)
|
|
652
635
|
# Make the hot index the temporary index while deleting the original index
|
|
653
|
-
|
|
654
|
-
"
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
}
|
|
659
|
-
self.with_retries(self.datastore.client.indices.update_aliases, body=alias_body)
|
|
636
|
+
alias_actions = [
|
|
637
|
+
{"add": {"index": temp_name, "alias": self.name}},
|
|
638
|
+
{"remove_index": {"index": self.index_name}},
|
|
639
|
+
]
|
|
640
|
+
self.with_retries(self.datastore.client.indices.update_aliases, actions=alias_actions)
|
|
660
641
|
|
|
661
642
|
# Make sure the original index is deleted
|
|
662
643
|
if self.with_retries(self.datastore.client.indices.exists, index=self.index_name):
|
|
@@ -665,31 +646,29 @@ class ESCollection(Generic[ModelType]):
|
|
|
665
646
|
|
|
666
647
|
# Shrink/split the temporary index into the original index
|
|
667
648
|
logger.info(f"Perform shard fix operation from {temp_name.upper()} to {self.index_name.upper()}.")
|
|
668
|
-
self._safe_index_copy(method, temp_name, self.index_name,
|
|
649
|
+
self._safe_index_copy(method, temp_name, self.index_name, settings=settings)
|
|
669
650
|
|
|
670
651
|
# Make the original index the new alias
|
|
671
652
|
logger.info(
|
|
672
653
|
f"Make {self.index_name.upper()} the current alias for {self.name.upper()} "
|
|
673
654
|
f"and delete {temp_name.upper()}."
|
|
674
655
|
)
|
|
675
|
-
|
|
676
|
-
"
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
}
|
|
681
|
-
self.with_retries(self.datastore.client.indices.update_aliases, body=alias_body)
|
|
656
|
+
alias_actions = [
|
|
657
|
+
{"add": {"index": self.index_name, "alias": self.name}},
|
|
658
|
+
{"remove_index": {"index": temp_name}},
|
|
659
|
+
]
|
|
660
|
+
self.with_retries(self.datastore.client.indices.update_aliases, actions=alias_actions)
|
|
682
661
|
|
|
683
662
|
# Restore writes
|
|
684
663
|
logger.debug("Restore datastore wide write operation on Elastic.")
|
|
685
|
-
self.with_retries(self.datastore.client.indices.put_settings,
|
|
664
|
+
self.with_retries(self.datastore.client.indices.put_settings, settings=write_unblock_settings)
|
|
686
665
|
|
|
687
666
|
# Restore normal routing and replicas
|
|
688
667
|
logger.debug(f"Restore original routing table for {self.name.upper()}.")
|
|
689
668
|
self.with_retries(
|
|
690
669
|
self.datastore.client.indices.put_settings,
|
|
691
670
|
index=self.name,
|
|
692
|
-
|
|
671
|
+
settings=clone_finish_settings,
|
|
693
672
|
)
|
|
694
673
|
|
|
695
674
|
def reindex(self):
|
|
@@ -719,31 +698,34 @@ class ESCollection(Generic[ModelType]):
|
|
|
719
698
|
for alias, alias_data in index_data["aliases"].items():
|
|
720
699
|
# Make the reindex index the new write index if the original index was
|
|
721
700
|
if alias_data.get("is_write_index", True):
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
{
|
|
725
|
-
"
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
{
|
|
732
|
-
"
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
]
|
|
739
|
-
}
|
|
701
|
+
alias_actions = [
|
|
702
|
+
{
|
|
703
|
+
"add": {
|
|
704
|
+
"index": new_name,
|
|
705
|
+
"alias": alias,
|
|
706
|
+
"is_write_index": True,
|
|
707
|
+
}
|
|
708
|
+
},
|
|
709
|
+
{
|
|
710
|
+
"add": {
|
|
711
|
+
"index": index,
|
|
712
|
+
"alias": alias,
|
|
713
|
+
"is_write_index": False,
|
|
714
|
+
}
|
|
715
|
+
},
|
|
716
|
+
]
|
|
740
717
|
else:
|
|
741
|
-
|
|
742
|
-
|
|
718
|
+
alias_actions = [{"add": {"index": new_name, "alias": alias}}]
|
|
719
|
+
|
|
720
|
+
self.with_retries(self.datastore.client.indices.update_aliases, actions=alias_actions)
|
|
743
721
|
|
|
744
722
|
# Reindex data into target
|
|
745
|
-
|
|
746
|
-
|
|
723
|
+
r_task = self.with_retries(
|
|
724
|
+
self.datastore.client.reindex,
|
|
725
|
+
source={"index": index},
|
|
726
|
+
dest={"index": new_name},
|
|
727
|
+
wait_for_completion=False,
|
|
728
|
+
)
|
|
747
729
|
self._get_task_results(r_task)
|
|
748
730
|
|
|
749
731
|
if self.with_retries(self.datastore.client.indices.exists, index=new_name):
|
|
@@ -761,38 +743,35 @@ class ESCollection(Generic[ModelType]):
|
|
|
761
743
|
# Block write to the index
|
|
762
744
|
self.with_retries(
|
|
763
745
|
self.datastore.client.indices.put_settings,
|
|
764
|
-
|
|
746
|
+
settings=write_block_settings,
|
|
765
747
|
)
|
|
766
748
|
|
|
767
749
|
# Rename reindexed index
|
|
768
750
|
try:
|
|
769
|
-
clone_body = {"settings": self._get_index_settings()}
|
|
770
751
|
self._safe_index_copy(
|
|
771
752
|
self.datastore.client.indices.clone,
|
|
772
753
|
new_name,
|
|
773
754
|
index,
|
|
774
|
-
|
|
755
|
+
settings=self._get_index_settings(),
|
|
775
756
|
)
|
|
776
757
|
|
|
777
758
|
# Restore original aliases for the index
|
|
778
759
|
for alias, alias_data in index_data["aliases"].items():
|
|
779
760
|
# Make the reindex index the new write index if the original index was
|
|
780
761
|
if alias_data.get("is_write_index", True):
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
{
|
|
784
|
-
"
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
]
|
|
792
|
-
}
|
|
762
|
+
alias_actions = [
|
|
763
|
+
{
|
|
764
|
+
"add": {
|
|
765
|
+
"index": index,
|
|
766
|
+
"alias": alias,
|
|
767
|
+
"is_write_index": True,
|
|
768
|
+
}
|
|
769
|
+
},
|
|
770
|
+
{"remove_index": {"index": new_name}},
|
|
771
|
+
]
|
|
793
772
|
self.with_retries(
|
|
794
773
|
self.datastore.client.indices.update_aliases,
|
|
795
|
-
|
|
774
|
+
actions=alias_actions,
|
|
796
775
|
)
|
|
797
776
|
|
|
798
777
|
# Delete the reindex target if it still exists
|
|
@@ -802,7 +781,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
802
781
|
# Unblock write to the index
|
|
803
782
|
self.with_retries(
|
|
804
783
|
self.datastore.client.indices.put_settings,
|
|
805
|
-
|
|
784
|
+
settings=write_unblock_settings,
|
|
806
785
|
)
|
|
807
786
|
|
|
808
787
|
return True
|
|
@@ -838,7 +817,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
838
817
|
out = []
|
|
839
818
|
|
|
840
819
|
if key_list:
|
|
841
|
-
data = self.with_retries(self.datastore.client.mget,
|
|
820
|
+
data = self.with_retries(self.datastore.client.mget, ids=key_list, index=self.name)
|
|
842
821
|
|
|
843
822
|
for row in data.get("docs", []):
|
|
844
823
|
if "found" in row and not row["found"]:
|
|
@@ -1055,8 +1034,8 @@ class ESCollection(Generic[ModelType]):
|
|
|
1055
1034
|
:param workers: Number of workers used for deletion if basic currency delete is used
|
|
1056
1035
|
:return: True is delete successful
|
|
1057
1036
|
"""
|
|
1058
|
-
|
|
1059
|
-
info = self._delete_async(self.name,
|
|
1037
|
+
query = {"bool": {"must": {"query_string": {"query": query}}}}
|
|
1038
|
+
info = self._delete_async(self.name, query=query, sort=sort_str(parse_sort(sort)), max_docs=max_docs)
|
|
1060
1039
|
return info.get("deleted", 0) != 0
|
|
1061
1040
|
|
|
1062
1041
|
def _create_scripts_from_operations(self, operations):
|
|
@@ -1262,18 +1241,18 @@ class ESCollection(Generic[ModelType]):
|
|
|
1262
1241
|
|
|
1263
1242
|
script = self._create_scripts_from_operations(operations)
|
|
1264
1243
|
|
|
1265
|
-
query_body = {
|
|
1266
|
-
"script": script,
|
|
1267
|
-
"query": {
|
|
1268
|
-
"bool": {
|
|
1269
|
-
"must": {"query_string": {"query": query}},
|
|
1270
|
-
"filter": [{"query_string": {"query": ff}} for ff in filters],
|
|
1271
|
-
}
|
|
1272
|
-
},
|
|
1273
|
-
}
|
|
1274
|
-
|
|
1275
1244
|
try:
|
|
1276
|
-
res = self._update_async(
|
|
1245
|
+
res = self._update_async(
|
|
1246
|
+
self.name,
|
|
1247
|
+
script=script,
|
|
1248
|
+
query={
|
|
1249
|
+
"bool": {
|
|
1250
|
+
"must": {"query_string": {"query": query}},
|
|
1251
|
+
"filter": [{"query_string": {"query": ff}} for ff in filters],
|
|
1252
|
+
}
|
|
1253
|
+
},
|
|
1254
|
+
max_docs=max_docs,
|
|
1255
|
+
)
|
|
1277
1256
|
except Exception:
|
|
1278
1257
|
return False
|
|
1279
1258
|
|
|
@@ -1437,14 +1416,14 @@ class ESCollection(Generic[ModelType]):
|
|
|
1437
1416
|
result = self.with_retries(
|
|
1438
1417
|
self.datastore.client.scroll,
|
|
1439
1418
|
scroll_id=deep_paging_id,
|
|
1440
|
-
params
|
|
1419
|
+
**params,
|
|
1441
1420
|
)
|
|
1442
1421
|
else:
|
|
1443
1422
|
# Run the query
|
|
1444
1423
|
result = self.with_retries(
|
|
1445
1424
|
self.datastore.client.search,
|
|
1446
1425
|
index=self.name,
|
|
1447
|
-
params
|
|
1426
|
+
**params,
|
|
1448
1427
|
**query_body,
|
|
1449
1428
|
)
|
|
1450
1429
|
|
|
@@ -2223,7 +2202,7 @@ class ESCollection(Generic[ModelType]):
|
|
|
2223
2202
|
)
|
|
2224
2203
|
self.with_retries(
|
|
2225
2204
|
self.datastore.client.indices.put_settings,
|
|
2226
|
-
|
|
2205
|
+
settings={"index.mapping.total_fields.limit": current_count + 500},
|
|
2227
2206
|
)
|
|
2228
2207
|
self._add_fields({key: model[key] for key in missing})
|
|
2229
2208
|
handled = True
|
|
@@ -2274,21 +2253,21 @@ class ESCollection(Generic[ModelType]):
|
|
|
2274
2253
|
self.datastore.client.indices.exists, index=self.index_name
|
|
2275
2254
|
) and not self.with_retries(self.datastore.client.indices.exists_alias, name=self.name):
|
|
2276
2255
|
# Turn on write block
|
|
2277
|
-
self.with_retries(self.datastore.client.indices.put_settings,
|
|
2256
|
+
self.with_retries(self.datastore.client.indices.put_settings, settings=write_block_settings)
|
|
2278
2257
|
|
|
2279
2258
|
# Create a copy on the result index
|
|
2280
2259
|
self._safe_index_copy(self.datastore.client.indices.clone, self.name, self.index_name)
|
|
2281
2260
|
|
|
2282
2261
|
# Make the hot index the new clone
|
|
2283
|
-
|
|
2284
|
-
|
|
2262
|
+
self.with_retries(
|
|
2263
|
+
self.datastore.client.indices.update_aliases,
|
|
2264
|
+
actions=[
|
|
2285
2265
|
{"add": {"index": self.index_name, "alias": self.name}},
|
|
2286
2266
|
{"remove_index": {"index": self.name}},
|
|
2287
|
-
]
|
|
2288
|
-
|
|
2289
|
-
self.with_retries(self.datastore.client.indices.update_aliases, body=alias_body)
|
|
2267
|
+
],
|
|
2268
|
+
)
|
|
2290
2269
|
|
|
2291
|
-
self.with_retries(self.datastore.client.indices.put_settings,
|
|
2270
|
+
self.with_retries(self.datastore.client.indices.put_settings, settings=write_unblock_settings)
|
|
2292
2271
|
|
|
2293
2272
|
self._check_fields()
|
|
2294
2273
|
|
|
@@ -2318,17 +2297,15 @@ class ESCollection(Generic[ModelType]):
|
|
|
2318
2297
|
|
|
2319
2298
|
# If we got this far, the missing fields have been described in properties, upload them to the
|
|
2320
2299
|
# server, and we should be able to move on.
|
|
2321
|
-
mappings = {"properties": properties}
|
|
2322
2300
|
for index in self.index_list_full:
|
|
2323
|
-
self.with_retries(self.datastore.client.indices.put_mapping, index=index,
|
|
2301
|
+
self.with_retries(self.datastore.client.indices.put_mapping, index=index, properties=properties)
|
|
2324
2302
|
|
|
2325
2303
|
if self.with_retries(self.datastore.client.indices.exists_template, name=self.name):
|
|
2326
2304
|
current_template = self.with_retries(self.datastore.client.indices.get_template, name=self.name)[self.name]
|
|
2327
|
-
recursive_update(current_template, {"mappings": mappings})
|
|
2328
2305
|
self.with_retries(
|
|
2329
2306
|
self.datastore.client.indices.put_template,
|
|
2330
2307
|
name=self.name,
|
|
2331
|
-
|
|
2308
|
+
**recursive_update(current_template, {"mappings": {"properties": properties}}),
|
|
2332
2309
|
)
|
|
2333
2310
|
|
|
2334
2311
|
def wipe(self):
|
howler/datastore/howler_store.py
CHANGED
|
@@ -1,9 +1,5 @@
|
|
|
1
|
-
import time
|
|
2
1
|
from typing import TYPE_CHECKING
|
|
3
2
|
|
|
4
|
-
import elasticapm
|
|
5
|
-
import elasticsearch
|
|
6
|
-
|
|
7
3
|
from howler.common.exceptions import HowlerAttributeError
|
|
8
4
|
from howler.datastore.collection import ESCollection, logger
|
|
9
5
|
from howler.odm.models.action import Action
|
|
@@ -107,44 +103,3 @@ class HowlerDatastore(object):
|
|
|
107
103
|
return getattr(self, collection_name)
|
|
108
104
|
else:
|
|
109
105
|
raise HowlerAttributeError(f"Collection {collection_name} does not exist.")
|
|
110
|
-
|
|
111
|
-
@elasticapm.capture_span(span_type="datastore")
|
|
112
|
-
def multi_index_bulk(self, bulk_plans):
|
|
113
|
-
max_retry_backoff = 10
|
|
114
|
-
retries = 0
|
|
115
|
-
while True:
|
|
116
|
-
try:
|
|
117
|
-
plan = "\n".join([p.get_plan_data() for p in bulk_plans])
|
|
118
|
-
ret_val = self.ds.client.bulk(body=plan) # type: ignore[call-arg]
|
|
119
|
-
return ret_val
|
|
120
|
-
except (
|
|
121
|
-
elasticsearch.exceptions.ConnectionError,
|
|
122
|
-
elasticsearch.exceptions.ConnectionTimeout,
|
|
123
|
-
elasticsearch.exceptions.AuthenticationException,
|
|
124
|
-
):
|
|
125
|
-
logger.warning(
|
|
126
|
-
f"No connection to Elasticsearch server(s): "
|
|
127
|
-
f"{' | '.join(self.ds.get_hosts(safe=True))}"
|
|
128
|
-
f", retrying..."
|
|
129
|
-
)
|
|
130
|
-
time.sleep(min(retries, max_retry_backoff))
|
|
131
|
-
self.ds.connection_reset()
|
|
132
|
-
retries += 1
|
|
133
|
-
|
|
134
|
-
except elasticsearch.exceptions.TransportError as e:
|
|
135
|
-
err_code, msg, cause = e.args
|
|
136
|
-
if err_code == 503 or err_code == "503":
|
|
137
|
-
logger.warning("Looks like index is not ready yet, retrying...")
|
|
138
|
-
time.sleep(min(retries, max_retry_backoff))
|
|
139
|
-
self.ds.connection_reset()
|
|
140
|
-
retries += 1
|
|
141
|
-
elif err_code == 429 or err_code == "429":
|
|
142
|
-
logger.warning(
|
|
143
|
-
"Elasticsearch is too busy to perform the requested task, " "we will wait a bit and retry..."
|
|
144
|
-
)
|
|
145
|
-
time.sleep(min(retries, max_retry_backoff))
|
|
146
|
-
self.ds.connection_reset()
|
|
147
|
-
retries += 1
|
|
148
|
-
|
|
149
|
-
else:
|
|
150
|
-
raise
|
howler/datastore/store.py
CHANGED
|
@@ -55,6 +55,8 @@ class ESStore(object):
|
|
|
55
55
|
config = _config
|
|
56
56
|
|
|
57
57
|
self._apikey: Optional[tuple[str, str]] = None
|
|
58
|
+
self._username: Optional[str] = None
|
|
59
|
+
self._password: Optional[str] = None
|
|
58
60
|
self._hosts = []
|
|
59
61
|
|
|
60
62
|
for host in config.datastore.hosts:
|
|
@@ -64,6 +66,9 @@ class ESStore(object):
|
|
|
64
66
|
os.environ[f"{host.name.upper()}_HOST_APIKEY_ID"],
|
|
65
67
|
os.environ[f"{host.name.upper()}_HOST_APIKEY_SECRET"],
|
|
66
68
|
)
|
|
69
|
+
elif os.getenv(f"{host.name.upper()}_HOST_USERNAME") is not None:
|
|
70
|
+
self._username = os.environ[f"{host.name.upper()}_HOST_USERNAME"]
|
|
71
|
+
self._password = os.environ[f"{host.name.upper()}_HOST_PASSWORD"]
|
|
67
72
|
|
|
68
73
|
self._closed = False
|
|
69
74
|
self._collections: dict[str, ESCollection] = {}
|
|
@@ -73,12 +78,26 @@ class ESStore(object):
|
|
|
73
78
|
tracer = logging.getLogger("elasticsearch")
|
|
74
79
|
tracer.setLevel(logging.CRITICAL)
|
|
75
80
|
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
if self._apikey is not None:
|
|
82
|
+
self.client = elasticsearch.Elasticsearch(
|
|
83
|
+
hosts=self._hosts, # type: ignore
|
|
84
|
+
api_key=self._apikey,
|
|
85
|
+
max_retries=0,
|
|
86
|
+
request_timeout=TRANSPORT_TIMEOUT,
|
|
87
|
+
)
|
|
88
|
+
elif self._username is not None and self._password is not None:
|
|
89
|
+
self.client = elasticsearch.Elasticsearch(
|
|
90
|
+
hosts=self._hosts, # type: ignore
|
|
91
|
+
basic_auth=(self._username, self._password),
|
|
92
|
+
max_retries=0,
|
|
93
|
+
request_timeout=TRANSPORT_TIMEOUT,
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
self.client = elasticsearch.Elasticsearch(
|
|
97
|
+
hosts=self._hosts, # type: ignore
|
|
98
|
+
max_retries=0,
|
|
99
|
+
request_timeout=TRANSPORT_TIMEOUT,
|
|
100
|
+
)
|
|
82
101
|
self.eql = elasticsearch.client.EqlClient(self.client)
|
|
83
102
|
self.archive_access = archive_access
|
|
84
103
|
self.url_path = "elastic"
|
howler/odm/base.py
CHANGED