howler-client 2.4.0.dev178__tar.gz → 2.4.0.dev209__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/PKG-INFO +1 -2
  2. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/__init__.py +2 -2
  3. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/common/dict_utils.py +8 -8
  4. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/connection.py +11 -10
  5. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/bundle.py +9 -9
  6. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/hit.py +30 -25
  7. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/__init__.py +1 -6
  8. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/facet.py +5 -1
  9. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/grouped.py +4 -0
  10. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/histogram.py +1 -1
  11. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/stats.py +5 -1
  12. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/stream.py +2 -1
  13. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/pyproject.toml +2 -3
  14. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/LICENSE +0 -0
  15. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/README.md +0 -0
  16. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/client.py +0 -0
  17. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/common/__init__.py +0 -0
  18. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/common/utils.py +0 -0
  19. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/logger.py +0 -0
  20. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/__init__.py +0 -0
  21. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/comment.py +0 -0
  22. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/help.py +0 -0
  23. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/chunk.py +0 -0
  24. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/search/fields.py +0 -0
  25. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/module/user.py +0 -0
  26. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/utils/__init__.py +0 -0
  27. {howler_client-2.4.0.dev178 → howler_client-2.4.0.dev209}/howler_client/utils/json_encoders.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: howler-client
3
- Version: 2.4.0.dev178
3
+ Version: 2.4.0.dev209
4
4
  Summary: The Howler client library facilitates issuing requests to Howler
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -19,7 +19,6 @@ Classifier: Programming Language :: Python :: 3.13
19
19
  Classifier: Programming Language :: Python :: 3.14
20
20
  Classifier: Topic :: Software Development :: Libraries
21
21
  Requires-Dist: coverage[toml] (>=7.6.1,<8.0.0)
22
- Requires-Dist: diff-cover (>=9.2.0,<10.0.0)
23
22
  Requires-Dist: pycryptodome (>=3.20.0,<4.0.0)
24
23
  Requires-Dist: python-baseconv (>=1.2.2,<2.0.0)
25
24
  Requires-Dist: requests[security] (>=2.32.0,<3.0.0)
@@ -25,7 +25,7 @@ def get_client(
25
25
  timeout=None,
26
26
  throw_on_bad_request=True,
27
27
  throw_on_max_retries=True,
28
- token=None,
28
+ authenticate=None,
29
29
  ):
30
30
  "Initialize a howler client object"
31
31
  connection = Connection(
@@ -41,6 +41,6 @@ def get_client(
41
41
  timeout,
42
42
  throw_on_bad_request,
43
43
  throw_on_max_retries,
44
- token,
44
+ authenticate,
45
45
  )
46
46
  return Client(connection)
@@ -1,5 +1,5 @@
1
1
  from collections.abc import Mapping
2
- from typing import TYPE_CHECKING, Any, AnyStr, Optional, cast
2
+ from typing import TYPE_CHECKING, Any, AnyStr, cast
3
3
  from typing import Mapping as _Mapping
4
4
 
5
5
  if TYPE_CHECKING:
@@ -15,8 +15,8 @@ def strip_nulls(d: Any):
15
15
 
16
16
 
17
17
  def recursive_update(
18
- d: Optional[dict[str, Any]],
19
- u: Optional[_Mapping[str, Any]],
18
+ d: dict[str, Any] | None,
19
+ u: _Mapping[str, Any] | None,
20
20
  stop_keys: list[AnyStr] = [],
21
21
  allow_recursion: bool = True,
22
22
  ) -> dict[str, Any]:
@@ -37,11 +37,11 @@ def recursive_update(
37
37
 
38
38
 
39
39
  def get_recursive_delta(
40
- d1: Optional[_Mapping[str, Any]],
41
- d2: Optional[_Mapping[str, Any]],
40
+ d1: _Mapping[str, Any] | None,
41
+ d2: _Mapping[str, Any] | None,
42
42
  stop_keys: list[AnyStr] = [],
43
43
  allow_recursion: bool = True,
44
- ) -> Optional[dict[str, Any]]:
44
+ ) -> dict[str, Any] | None:
45
45
  "Get the recursive difference between two objects"
46
46
  if d1 is None:
47
47
  return cast(dict, d2)
@@ -73,7 +73,7 @@ def get_recursive_delta(
73
73
  return out
74
74
 
75
75
 
76
- def flatten(data: dict, fields: list[str] = [], parent_key: Optional[str] = None) -> dict:
76
+ def flatten(data: dict, fields: list[str] = [], parent_key: str | None = None) -> dict:
77
77
  "Flatten a nested dict"
78
78
  items: list[tuple[str, Any]] = []
79
79
 
@@ -105,7 +105,7 @@ def unflatten(data: _Mapping) -> _Mapping:
105
105
  return out
106
106
 
107
107
 
108
- def prune(data: _Mapping, keys: list[str], parent_key: Optional[str] = None) -> dict[str, Any]:
108
+ def prune(data: _Mapping, keys: list[str], parent_key: str | None = None) -> dict[str, Any]:
109
109
  "Remove all keys in the given list from the dict if they exist"
110
110
  pruned_items: list[tuple[str, Any]] = []
111
111
 
@@ -3,7 +3,7 @@ import json
3
3
  import sys
4
4
  import time
5
5
  import warnings
6
- from typing import Any, Callable, MutableMapping, Optional, Union
6
+ from typing import Callable, MutableMapping
7
7
 
8
8
  import requests
9
9
 
@@ -36,21 +36,21 @@ class Connection(object):
36
36
  def __init__( # pylint: disable=R0913
37
37
  self: Self,
38
38
  server: str,
39
- auth: Optional[Union[str, tuple[str, str]]],
40
- cert: Optional[Union[str, tuple[str, str]]],
39
+ auth: str | tuple[str, str] | None,
40
+ cert: str | tuple[str, str] | None,
41
41
  debug: Callable[[str], None],
42
- headers: Optional[MutableMapping[str, Union[str, bytes]]],
42
+ headers: MutableMapping[str, str | bytes] | None,
43
43
  retries: int,
44
44
  silence_warnings: bool,
45
- apikey: Optional[tuple[str, str]],
45
+ apikey: tuple[str, str] | None,
46
46
  verify: bool,
47
- timeout: Optional[int],
47
+ timeout: int | None,
48
48
  throw_on_bad_request: bool,
49
49
  throw_on_max_retries: bool,
50
- # TODO: Not sure what this argument is for (if used at all)
51
- token: Optional[Any],
50
+ authenticate: Callable[[str | tuple[str, str] | None, tuple[str, str] | None], str] | None = None,
52
51
  ):
53
52
  self.apikey = apikey
53
+ self.authenticate = authenticate
54
54
  self.debug = debug
55
55
  self.max_retries = retries
56
56
  self.server = server
@@ -58,13 +58,14 @@ class Connection(object):
58
58
  self.default_timeout = timeout
59
59
  self.throw_on_bad_request = throw_on_bad_request
60
60
  self.throw_on_max_retries = throw_on_max_retries
61
- self.token = token
62
61
 
63
62
  session = requests.Session()
64
63
 
65
64
  session.headers.update({"Content-Type": "application/json"})
66
65
 
67
- if auth:
66
+ if self.authenticate:
67
+ session.headers.update({"Authorization": f"Bearer {self.authenticate(auth, apikey)}"})
68
+ elif auth:
68
69
  if not isinstance(auth, str):
69
70
  auth = base64.b64encode(":".join(auth).encode("utf-8")).decode("utf-8")
70
71
 
@@ -1,5 +1,5 @@
1
1
  import sys
2
- from typing import TYPE_CHECKING, Any, Optional, Union
2
+ from typing import TYPE_CHECKING, Any
3
3
 
4
4
  from howler_client.common.utils import api_path
5
5
  from howler_client.logger import get_logger
@@ -50,7 +50,7 @@ class Bundle(object):
50
50
  map: dict[str, list[str]],
51
51
  documents: list[dict[str, Any]],
52
52
  ignore_extra_values: bool = False,
53
- ) -> dict[str, Union[str, list[str], None]]:
53
+ ) -> list[dict[str, str | list[str] | None]]:
54
54
  """Create a bundle using a format similar to the hit.create_from_map function
55
55
 
56
56
  Args:
@@ -63,7 +63,7 @@ class Bundle(object):
63
63
  Defaults to False.
64
64
 
65
65
  Returns:
66
- list[dict[str, Optional[str]]]: The list of IDs of the created hits
66
+ list[dict[str, str | None]]: The list of IDs of the created hits
67
67
  """
68
68
  map = {**map, "bundle": ["howler.is_bundle"]}
69
69
  bundle_hit = {**bundle_hit, "bundle": True}
@@ -74,9 +74,9 @@ class Bundle(object):
74
74
  def create(
75
75
  self: Self,
76
76
  bundle_hit: dict[str, Any],
77
- data: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = [],
77
+ data: dict[str, Any] | list[dict[str, Any]] | None = None,
78
78
  ignore_extra_values: bool = False,
79
- ) -> dict[str, Any]:
79
+ ) -> dict[str, Any] | None:
80
80
  """Create a bundle using a format similar to the hit.create function
81
81
 
82
82
  Args:
@@ -107,24 +107,24 @@ class Bundle(object):
107
107
 
108
108
  return self._connection.post(api_path("hit/bundle"), json={"bundle": bundle_hit, "hits": hit_ids})
109
109
 
110
- def add(self: Self, bundle_id: str, hit_ids: Union[str, list[str]]):
110
+ def add(self: Self, bundle_id: str, hit_ids: str | list[str]):
111
111
  """Add a list of hits to a bundle by their IDs
112
112
 
113
113
  Args:
114
114
  bundle_id (str): The ID of the bundle we want to add the hits to
115
- hit_ids (Union[str, list[str]]): The list of hit IDs to add to the bundle
115
+ hit_ids (str | list[str]): The list of hit IDs to add to the bundle
116
116
  """
117
117
  if not isinstance(hit_ids, list):
118
118
  hit_ids = [hit_ids]
119
119
 
120
120
  return self._connection.put(api_path("hit/bundle", bundle_id), json=hit_ids)
121
121
 
122
- def remove(self: Self, bundle_id: str, hit_ids: Union[str, list[str]]):
122
+ def remove(self: Self, bundle_id: str, hit_ids: str | list[str]):
123
123
  """Remove a list of hits from a bundle by their IDs
124
124
 
125
125
  Args:
126
126
  bundle_id (str): The bundle ID from which to remove the hits
127
- hit_ids (Union[str, list[str]]): A list of hit IDs to remove from the bundle
127
+ hit_ids (str | list[str]): A list of hit IDs to remove from the bundle
128
128
  """
129
129
  if not isinstance(hit_ids, list):
130
130
  hit_ids = [hit_ids]
@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import sys
3
+ import warnings
3
4
  from hashlib import sha256
4
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
5
+ from typing import TYPE_CHECKING, Any, Literal
5
6
 
6
7
  from howler_client.common.dict_utils import flatten
7
8
  from howler_client.common.utils import ClientError, api_path
@@ -68,7 +69,7 @@ class Hit(object):
68
69
  map: dict[str, list[str]],
69
70
  documents: list[dict[str, Any]],
70
71
  ignore_extra_values: bool = False,
71
- ) -> dict[str, Union[Optional[str], list[str]]]:
72
+ ) -> list[dict[str, str | list[str] | None]]:
72
73
  """Create hits for a given tool using the raw documents and a map of the document fields to howler's fields.
73
74
 
74
75
  Args:
@@ -79,8 +80,17 @@ class Hit(object):
79
80
  ignore_extra_values (bool, optional): Whether to allow extra fields, or raise an error. Defaults to False.
80
81
 
81
82
  Returns:
82
- dict[str, Union[Optional[str], list[str]]]: A list of IDs/Errors in the same order as the original documents
83
+ list[dict[str, str | list[str] | None]]: One entry per document, each with keys ``id``, ``error``,
84
+ and ``warn``.
85
+
86
+ .. deprecated::
87
+ Use the regular create() function instead, mapping the record before ingestion.
83
88
  """
89
+ warnings.warn(
90
+ "create_from_map is deprecated and will be removed in a future version.",
91
+ DeprecationWarning,
92
+ stacklevel=2,
93
+ )
84
94
  data = {"map": map, "hits": documents}
85
95
 
86
96
  try:
@@ -103,9 +113,9 @@ class Hit(object):
103
113
  warn = res["warn"]
104
114
  if isinstance(warn, list):
105
115
  for w in warn:
106
- logger.warn(w)
116
+ logger.warning(w)
107
117
  else:
108
- logger.warn(warn)
118
+ logger.warning(warn)
109
119
 
110
120
  return result
111
121
 
@@ -135,13 +145,13 @@ class Hit(object):
135
145
 
136
146
  def create( # noqa: C901
137
147
  self: Self,
138
- data: Union[dict[str, Any], list[dict[str, Any]]],
148
+ data: dict[str, Any] | list[dict[str, Any]],
139
149
  ignore_extra_values: bool = False,
140
150
  ):
141
151
  """Create one or many hits using the howler schema.
142
152
 
143
153
  Args:
144
- data (Union[dict[str, Any], list[dict[str, Any]]]): The hit or list of hits to create
154
+ data (dict[str, Any] | list[dict[str, Any]]): The hit or list of hits to create
145
155
  ignore_extra_values (bool, optional): Whtether to ignore extra values, or throw an exception.
146
156
  Defaults to False.
147
157
 
@@ -178,22 +188,20 @@ class Hit(object):
178
188
 
179
189
  final_hit_list.append(hit)
180
190
 
181
- search_result = self._search.grouped.hit(
191
+ hashes = [hit["howler.hash"] for hit in final_hit_list]
192
+ existing_hashes: dict[str, int] = self._search.facet.hit(
182
193
  "howler.hash",
183
- limit=1,
184
- filters=[f"howler.hash:{' '.join(list_hit['howler.hash'] for list_hit in final_hit_list)}"],
185
- )["items"]
186
-
187
- for hit in final_hit_list:
188
- for match in search_result:
189
- if hit["howler.hash"] == match["value"]:
190
- matched_hit = match["items"][0]
191
-
192
- logger.warning(
193
- f"Hit with hash {hit['howler.hash']} already exists in the DB at "
194
- f"id {matched_hit['howler']['id']}, reusing"
195
- )
196
- final_hit_list.remove(hit)
194
+ query=f"howler.hash:({' OR '.join(hashes)})",
195
+ rows=len(hashes),
196
+ )
197
+
198
+ for hit in list(final_hit_list):
199
+ if hit["howler.hash"] in existing_hashes:
200
+ logger.warning(
201
+ "Hit with hash %s already exists in the DB, reusing",
202
+ hit["howler.hash"],
203
+ )
204
+ final_hit_list.remove(hit)
197
205
 
198
206
  if len(final_hit_list) < 1:
199
207
  logger.info("No hits to submit.")
@@ -212,9 +220,6 @@ class Hit(object):
212
220
  for invalid_hit in result["invalid"]:
213
221
  logger.error(invalid_hit["error"])
214
222
 
215
- for entry in search_result:
216
- result["valid"].append(entry["items"][0])
217
-
218
223
  return result
219
224
 
220
225
  def overwrite(self: Self, hit_id: str, new_hit_data: dict[str, Any]):
@@ -21,7 +21,7 @@ class Search(object):
21
21
  self.stats = Stats(connection)
22
22
  self.stream = Stream(connection, self._do_search)
23
23
 
24
- def _do_search(self, index, query, use_archive=False, track_total_hits=None, **kwargs):
24
+ def _do_search(self, index, query, track_total_hits=None, **kwargs):
25
25
  if index not in SEARCHABLE:
26
26
  raise ClientError("Index %s is not searchable" % index, 400)
27
27
 
@@ -34,8 +34,6 @@ class Search(object):
34
34
 
35
35
  kwargs = {k: v for k, v in kwargs.items() if v is not None}
36
36
  kwargs["query"] = query
37
- if use_archive:
38
- kwargs["use_archive"] = ""
39
37
  if track_total_hits:
40
38
  kwargs["track_total_hits"] = track_total_hits
41
39
  path = api_path("search", index)
@@ -50,7 +48,6 @@ class Search(object):
50
48
  rows=25,
51
49
  sort=None,
52
50
  timeout=None,
53
- use_archive=False,
54
51
  track_total_hits=None,
55
52
  ):
56
53
  """Search hits with a lucene query.
@@ -65,7 +62,6 @@ class Search(object):
65
62
  rows : Number of records to return (integer)
66
63
  sort : Field used for sorting with direction (string: ex. 'id desc')
67
64
  timeout : Max amount of miliseconds the query will run (integer)
68
- use_archive : Also query the archive
69
65
  track_total_hits : Number of hits to track (default: 10k)
70
66
 
71
67
  Returns all results.
@@ -79,6 +75,5 @@ class Search(object):
79
75
  rows=rows,
80
76
  sort=sort,
81
77
  timeout=timeout,
82
- use_archive=use_archive,
83
78
  track_total_hits=track_total_hits,
84
79
  )
@@ -31,11 +31,15 @@ class Facet(object):
31
31
  field : field to extract the facets from
32
32
 
33
33
  Optional:
34
- query : Initial query to filter the data (default: 'id:*')
34
+ query : Initial query to filter the data (default: 'howler.id:*')
35
35
  filters : Additional lucene queries used to filter the data (list of strings)
36
36
  mincount : Minimum amount of hits for the value to be returned
37
37
  rows : The number of different facets to return
38
38
 
39
39
  Returns all results.
40
40
  """
41
+ # Default the query to include all hits if none provided
42
+ if query is None:
43
+ query = "howler.id:*"
44
+
41
45
  return self._do_facet("hit", field, query=query, mincount=mincount, filters=filters, rows=rows)
@@ -53,6 +53,10 @@ class Grouped(object):
53
53
 
54
54
  Returns a generator that transparently and efficiently pages through results.
55
55
  """
56
+ # Default the query to include all hits if none provided
57
+ if query is None:
58
+ query = "howler.id:*"
59
+
56
60
  return self._do_grouped(
57
61
  "hit",
58
62
  field,
@@ -42,7 +42,7 @@ class Histogram(object):
42
42
  field : field to create the histograms with (only work on date or number fields)
43
43
 
44
44
  Optional:
45
- query : Initial query to filter the data (default: 'id:*')
45
+ query : Initial query to filter the data (default: 'howler.id:*')
46
46
  filters : Additional lucene queries used to filter the data (list of strings)
47
47
  mincount : Minimum amount of hits for the value to be returned
48
48
  start : Beginning of the histogram range (Default: now-1d or 0)
@@ -31,9 +31,13 @@ class Stats(object):
31
31
  field : field to create the stats on (only work on number fields)
32
32
 
33
33
  Optional:
34
- query : Initial query to filter the data (default: 'id:*')
34
+ query : Initial query to filter the data (default: 'howler.id:*')
35
35
  filters : Additional lucene queries used to filter the data (list of strings)
36
36
 
37
37
  Returns statistics about the field.
38
38
  """
39
+ # Default the query to include all hits if none provided
40
+ if query is None:
41
+ query = "howler.id:*"
42
+
39
43
  return self._do_stats("hit", field, query=query, filters=filters)
@@ -53,7 +53,8 @@ class Stream(object):
53
53
  items: list[Any] = []
54
54
  lock = threading.Lock()
55
55
  sf_t = threading.Thread(target=self._auto_fill, args=[items, lock, index, query], kwargs=kwargs)
56
- sf_t.setDaemon(True)
56
+ # setDaemon is deprecated; set the daemon attribute instead
57
+ sf_t.daemon = True
57
58
  sf_t.start()
58
59
  while not yield_done:
59
60
  try:
@@ -91,7 +91,7 @@ suppress-none-returning = true
91
91
  [tool.poetry]
92
92
  package-mode = true
93
93
  name = "howler-client"
94
- version = "2.4.0.dev178"
94
+ version = "2.4.0.dev209"
95
95
  description = "The Howler client library facilitates issuing requests to Howler"
96
96
  authors = [
97
97
  "Canadian Centre for Cyber Security <howler@cyber.gc.ca>",
@@ -128,7 +128,6 @@ pycryptodome = "^3.20.0"
128
128
  requests = { extras = ["security"], version = ">=2.32.0,<3.0.0" }
129
129
  python-baseconv = "^1.2.2"
130
130
  coverage = { extras = ["toml"], version = "^7.6.1" }
131
- diff-cover = "^9.2.0"
132
131
 
133
132
  [tool.poetry.group.dev.dependencies]
134
133
  ruff = ">=0.6.8,<0.16.0"
@@ -139,7 +138,7 @@ mypy = "^1.11.2"
139
138
  pytest = ">=8.3.3,<10.0.0"
140
139
  pytest-cov = "^5.0.0"
141
140
  passlib = "^1.7.4"
142
-
141
+ diff-cover = "^10.3.0"
143
142
 
144
143
  [tool.poetry.group.types.dependencies]
145
144
  types-requests = "^2.32.0.20240914"