howler-client 2.4.0.dev37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ import sys
2
+ from typing import TYPE_CHECKING, Any, List
3
+
4
+ from howler_client.common.utils import api_path
5
+
6
+ if sys.version_info >= (3, 11):
7
+ from typing import Self
8
+ else:
9
+ from typing_extensions import Self
10
+
11
+ if TYPE_CHECKING:
12
+ from howler_client import Connection
13
+
14
+
15
+ class Comment(object):
16
+ """Help related endpoints"""
17
+
18
+ def __init__(self: Self, connection: "Connection"):
19
+ self._connection = connection
20
+
21
+ def add(self: Self, hit_id: str, comment: str) -> dict[str, Any]:
22
+ """Add a comment to a hit and return it
23
+
24
+ Args:
25
+ hit_id (str): ID of the hit
26
+ comment: content of the comment
27
+
28
+ Returns:
29
+ dict[str, Any]: The corresponding hit data
30
+ """
31
+ return self._connection.post(api_path("hit", hit_id, "comments"), json={"value": comment})
32
+
33
+ def edit(self: Self, hit_id: str, comment: str, comment_id: str) -> dict[str, Any]:
34
+ """Update a comment on a hit and return it
35
+
36
+ Args:
37
+ hit_id (str): ID of the hit
38
+ comment_id (str): ID of the comment that need to be updated
39
+ comment: content of the comment
40
+
41
+ Returns:
42
+ dict[str, Any]: The corresponding hit data
43
+ """
44
+ return self._connection.put(
45
+ api_path("hit", hit_id, "comments", comment_id),
46
+ json={"value": comment},
47
+ )
48
+
49
+ def delete(self: Self, hit_id: str, comment_ids: List[str]) -> dict[str, Any]:
50
+ """Delete a comment on a hit and return it
51
+
52
+ Args:
53
+ hit_id (str): ID of the hit
54
+ comment_ids (List[str]): list of all comment ids that need to be removed
55
+
56
+ Returns:
57
+ dict[str, Any]: The corresponding hit data
58
+ """
59
+ return self._connection.delete(api_path("hit", hit_id, "comments"), json=comment_ids)
@@ -0,0 +1,23 @@
1
+ import sys
2
+ from typing import TYPE_CHECKING
3
+
4
+ from howler_client.common.utils import api_path
5
+
6
+ if sys.version_info >= (3, 11):
7
+ from typing import Self
8
+ else:
9
+ from typing_extensions import Self
10
+
11
+ if TYPE_CHECKING:
12
+ from howler_client import Connection
13
+
14
+
15
+ class Help(object):
16
+ """Help related endpoints"""
17
+
18
+ def __init__(self: Self, connection: "Connection"):
19
+ self._connection = connection
20
+
21
+ def classification_definition(self):
22
+ """Return the current system classification definition"""
23
+ return self._connection.get(api_path("help", "classification_definition"))
@@ -0,0 +1,299 @@
1
+ import json
2
+ import sys
3
+ from hashlib import sha256
4
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
5
+
6
+ from howler_client.common.dict_utils import flatten
7
+ from howler_client.common.utils import ClientError, api_path
8
+ from howler_client.logger import get_logger
9
+ from howler_client.module.comment import Comment
10
+ from howler_client.utils.json_encoders import BytesDatetimeEncoder, DatetimeEncoder
11
+
12
+ if sys.version_info >= (3, 11):
13
+ from typing import Self
14
+ else:
15
+ from typing_extensions import Self
16
+
17
+ if TYPE_CHECKING:
18
+ from howler_client import Connection
19
+ from howler_client.module.search import Search
20
+
21
+ logger = get_logger("hit")
22
+
23
+ UPDATE_SET = "SET"
24
+ UPDATE_INC = "INC"
25
+ UPDATE_DEC = "DEC"
26
+ UPDATE_MAX = "MAX"
27
+ UPDATE_MIN = "MIN"
28
+ UPDATE_APPEND = "APPEND"
29
+ UPDATE_APPEND_IF_MISSING = "APPEND_IF_MISSING"
30
+ UPDATE_REMOVE = "REMOVE"
31
+ UPDATE_DELETE = "DELETE"
32
+
33
+ UPDATE_OPERATIONS = [
34
+ UPDATE_APPEND,
35
+ UPDATE_APPEND_IF_MISSING,
36
+ UPDATE_DEC,
37
+ UPDATE_INC,
38
+ UPDATE_MAX,
39
+ UPDATE_MIN,
40
+ UPDATE_REMOVE,
41
+ UPDATE_SET,
42
+ UPDATE_DELETE,
43
+ ]
44
+
45
+
46
+ class Hit(object):
47
+ "Operations pertaining to ingesting and interacting with Howler hits."
48
+
49
+ def __init__(self: Self, connection: "Connection", search: "Search"):
50
+ self._connection: "Connection" = connection
51
+ self._search: "Search" = search
52
+ self.comment: "Comment" = Comment(connection)
53
+
54
+ def __call__(self: Self, hit_id: str) -> dict[str, Any]:
55
+ """Return the hit for a given ID
56
+
57
+ Args:
58
+ hit_id (str): ID of the hit
59
+
60
+ Returns:
61
+ dict[str, Any]: The corresponding hit data
62
+ """
63
+ return self._connection.get(api_path("hit", hit_id))
64
+
65
+ def create_from_map(
66
+ self: Self,
67
+ tool_name: str,
68
+ map: dict[str, list[str]],
69
+ documents: list[dict[str, Any]],
70
+ ignore_extra_values: bool = False,
71
+ ) -> dict[str, Union[Optional[str], list[str]]]:
72
+ """Create hits for a given tool using the raw documents and a map of the document fields to howler's fields.
73
+
74
+ Args:
75
+ tool_name (str): Name of the tool the hits will be created for
76
+ map (dict[str, list[str]]): Dictionary where the keys are the flattened path of the tool's raw document and
77
+ the values are a list of flattened path for Howler's fields where the data will be copied into
78
+ documents (list[dict[str, Any]]): The data to ingest into howler, in the tool's raw document format
79
+ ignore_extra_values (bool, optional): Whether to allow extra fields, or raise an error. Defaults to False.
80
+
81
+ Returns:
82
+ dict[str, Union[Optional[str], list[str]]]: A list of IDs/Errors in the same order as the original documents
83
+ """
84
+ data = {"map": map, "hits": documents}
85
+
86
+ try:
87
+ result = self._connection.post(
88
+ api_path("tools", tool_name, "hits", ignore_extra_values=ignore_extra_values),
89
+ json=data,
90
+ )
91
+ except ClientError as e:
92
+ if e.api_response and isinstance(e.api_response, list):
93
+ for res in e.api_response:
94
+ if "warn" in res and res["warn"]:
95
+ logger.warning(res["warn"])
96
+
97
+ if "error" in res and res["error"]:
98
+ logger.error(res["error"]) # noqa: TRY400
99
+ raise
100
+
101
+ for res in result:
102
+ if "warn" in res and res["warn"]:
103
+ warn = res["warn"]
104
+ if isinstance(warn, list):
105
+ for w in warn:
106
+ logger.warn(w)
107
+ else:
108
+ logger.warn(warn)
109
+
110
+ return result
111
+
112
+ def generate_hash(self: Self, hit: dict[str, Any]) -> str:
113
+ """Generate hash value for hit using the analytic, detection, and raw_data values from the hit data.
114
+
115
+ Args:
116
+ hit (str): hit data
117
+
118
+ Returns:
119
+ str: A hash value for the hit
120
+ """
121
+ howler_data = hit.get("howler.data", [])
122
+
123
+ if not isinstance(howler_data, list):
124
+ howler_data = [howler_data]
125
+
126
+ hash_contents = {
127
+ "analytic": hit["howler.analytic"],
128
+ "detection": hit.get("howler.detection", "no_detection"),
129
+ "raw_data": sorted(
130
+ json.dumps(entry, sort_keys=True, ensure_ascii=True, cls=BytesDatetimeEncoder) for entry in howler_data
131
+ ),
132
+ }
133
+
134
+ return sha256(json.dumps(hash_contents, sort_keys=True, ensure_ascii=True).encode("utf-8")).hexdigest()
135
+
136
+ def create( # noqa: C901
137
+ self: Self,
138
+ data: Union[dict[str, Any], list[dict[str, Any]]],
139
+ ignore_extra_values: bool = False,
140
+ ):
141
+ """Create one or many hits using the howler schema.
142
+
143
+ Args:
144
+ data (Union[dict[str, Any], list[dict[str, Any]]]): The hit or list of hits to create
145
+ ignore_extra_values (bool, optional): Whtether to ignore extra values, or throw an exception.
146
+ Defaults to False.
147
+
148
+ Returns:
149
+ dict[str, list[dict[str, Any]]]: A list of valid and invalid hits
150
+ """
151
+ if not isinstance(data, list):
152
+ data = [data]
153
+
154
+ final_hit_list = []
155
+ for hit in data:
156
+ hit = flatten(hit, fields=["howler"])
157
+
158
+ existing_hash = hit.get("howler.hash", None)
159
+ if existing_hash is None:
160
+ existing_hash = self.generate_hash(hit)
161
+
162
+ hit["howler.hash"] = existing_hash
163
+
164
+ if "howler.data" in hit:
165
+ howler_data = hit["howler.data"]
166
+ if not isinstance(howler_data, list):
167
+ howler_data = [howler_data]
168
+
169
+ hit["howler.data"] = [
170
+ json.dumps(
171
+ entry,
172
+ sort_keys=True,
173
+ ensure_ascii=True,
174
+ cls=BytesDatetimeEncoder,
175
+ )
176
+ for entry in howler_data
177
+ ]
178
+
179
+ final_hit_list.append(hit)
180
+
181
+ search_result = self._search.grouped.hit(
182
+ "howler.hash",
183
+ limit=1,
184
+ filters=[f"howler.hash:{' '.join(list_hit['howler.hash'] for list_hit in final_hit_list)}"],
185
+ )["items"]
186
+
187
+ for hit in final_hit_list:
188
+ for match in search_result:
189
+ if hit["howler.hash"] == match["value"]:
190
+ matched_hit = match["items"][0]
191
+
192
+ logger.warning(
193
+ f"Hit with hash {hit['howler.hash']} already exists in the DB at "
194
+ f"id {matched_hit['howler']['id']}, reusing"
195
+ )
196
+ final_hit_list.remove(hit)
197
+
198
+ if len(final_hit_list) < 1:
199
+ logger.info("No hits to submit.")
200
+ return None
201
+
202
+ result = self._connection.post(
203
+ api_path("hit", ignore_extra_values=ignore_extra_values),
204
+ data=json.dumps(final_hit_list, cls=DatetimeEncoder),
205
+ headers={"Content-Type": "application/json"},
206
+ )
207
+
208
+ if not result:
209
+ logger.warning("No result was returned.")
210
+ return result
211
+
212
+ for invalid_hit in result["invalid"]:
213
+ logger.error(invalid_hit["error"])
214
+
215
+ for entry in search_result:
216
+ result["valid"].append(entry["items"][0])
217
+
218
+ return result
219
+
220
+ def overwrite(self: Self, hit_id: str, new_hit_data: dict[str, Any]):
221
+ """Overwrite a hit.
222
+
223
+ This is different from updating a hit, as you simply provide a partial hit object
224
+
225
+ Args:
226
+ hit_id (str): Id of the hit you would like to overwrite
227
+ new_hit_data (dict[str, Any]): A partial hit data object to overwrite the specified hit with
228
+
229
+ Raises:
230
+ ClientError: Updates provided were invalid
231
+ """
232
+ if not isinstance(new_hit_data, dict):
233
+ raise TypeError("New hit data must be of type dict.")
234
+
235
+ return self._connection.put(api_path(f"hit/{hit_id}/overwrite"), json=new_hit_data)
236
+
237
+ def update(self: Self, hit_id: str, updates: list[tuple[str, str, Any]]):
238
+ """Update a hit.
239
+
240
+ Args:
241
+ hit_id (str): Id of the hit you would like to update
242
+ updates (list[tuple[str, str, Any]]): A list of updates to run. The first entry in the tuple must be a valid
243
+ update operation (see UPDATE_OPERATIONS), the second a key for a howler hit, and the third the value
244
+ to use in the operation.
245
+
246
+ Raises:
247
+ ClientError: Updates provided were invalid
248
+ """
249
+ if not isinstance(updates, list):
250
+ raise TypeError("Updates must be of type list.")
251
+
252
+ for update in updates:
253
+ if not isinstance(update, tuple):
254
+ raise TypeError("Entries in updates must be of type tuple.")
255
+
256
+ if update[0] not in UPDATE_OPERATIONS:
257
+ raise ClientError(
258
+ f"Invalid update - operation must be one of {','.join(UPDATE_OPERATIONS)}!",
259
+ 400,
260
+ )
261
+
262
+ return self._connection.put(api_path(f"hit/{hit_id}/update"), json=updates)
263
+
264
+ def update_by_query(self: Self, query: str, updates: list[tuple[str, str, Any]]):
265
+ """Update a set of hits by query.
266
+
267
+ Args:
268
+ query (str): Query representing the hits you would like to update
269
+ updates (list[tuple[str, str, Any]]): A list of updates to run. The first entry in the tuple must be a valid
270
+ update operation (see UPDATE_OPERATIONS), the second a key for a howler hit, and the third the value
271
+ to use in the operation.
272
+
273
+ Raises:
274
+ ClientError: Updates provided were invalid
275
+ """
276
+ if not isinstance(updates, list):
277
+ raise TypeError("Updates must be of type list.")
278
+
279
+ for update in updates:
280
+ if not isinstance(update, tuple):
281
+ raise TypeError("Entries in updates must be of type tuple.")
282
+ if update[0] not in UPDATE_OPERATIONS:
283
+ raise ClientError(
284
+ f"Invalid update - operation must be one of {','.join(UPDATE_OPERATIONS)}!",
285
+ 400,
286
+ )
287
+
288
+ return self._connection.put(api_path("hit/update"), json={"query": query, "operations": updates})
289
+
290
+ def delete(self: Self, hit_ids: list[str]) -> dict[Literal["success"], bool]:
291
+ """Delete a list of hits by id
292
+
293
+ Returns:
294
+ dict[Literal["success"], bool]: Whether the delete operation was successful
295
+ """
296
+ if not isinstance(hit_ids, list):
297
+ hit_ids = [hit_ids]
298
+
299
+ return self._connection.delete(api_path("hit"), json=hit_ids)
@@ -0,0 +1,84 @@
1
+ import json
2
+
3
+ from howler_client.common.utils import SEARCHABLE, ClientError, api_path
4
+ from howler_client.module.search.facet import Facet
5
+ from howler_client.module.search.fields import Fields
6
+ from howler_client.module.search.grouped import Grouped
7
+ from howler_client.module.search.histogram import Histogram
8
+ from howler_client.module.search.stats import Stats
9
+ from howler_client.module.search.stream import Stream
10
+
11
+
12
+ class Search(object):
13
+ "Module dedicated to searching collections and performing various other operations like group by or faceting"
14
+
15
+ def __init__(self, connection):
16
+ self._connection = connection
17
+ self.facet = Facet(connection)
18
+ self.fields = Fields(connection)
19
+ self.grouped = Grouped(connection)
20
+ self.histogram = Histogram(connection)
21
+ self.stats = Stats(connection)
22
+ self.stream = Stream(connection, self._do_search)
23
+
24
+ def _do_search(self, index, query, use_archive=False, track_total_hits=None, **kwargs):
25
+ if index not in SEARCHABLE:
26
+ raise ClientError("Index %s is not searchable" % index, 400)
27
+
28
+ filters = kwargs.pop("filters", None)
29
+ if filters is not None:
30
+ if isinstance(filters, str):
31
+ filters = [filters]
32
+
33
+ kwargs["filters"] = filters
34
+
35
+ kwargs = {k: v for k, v in kwargs.items() if v is not None}
36
+ kwargs["query"] = query
37
+ if use_archive:
38
+ kwargs["use_archive"] = ""
39
+ if track_total_hits:
40
+ kwargs["track_total_hits"] = track_total_hits
41
+ path = api_path("search", index)
42
+ return self._connection.post(path, data=json.dumps(kwargs))
43
+
44
+ def hit(
45
+ self,
46
+ query,
47
+ filters=None,
48
+ fl=None,
49
+ offset=0,
50
+ rows=25,
51
+ sort=None,
52
+ timeout=None,
53
+ use_archive=False,
54
+ track_total_hits=None,
55
+ ):
56
+ """Search hits with a lucene query.
57
+
58
+ Required:
59
+ query : lucene query (string)
60
+
61
+ Optional:
62
+ filters : Additional lucene queries used to filter the data (list of strings)
63
+ fl : List of fields to return (comma separated string of fields)
64
+ offset : Offset at which the query items should start (integer)
65
+ rows : Number of records to return (integer)
66
+ sort : Field used for sorting with direction (string: ex. 'id desc')
67
+ timeout : Max amount of miliseconds the query will run (integer)
68
+ use_archive : Also query the archive
69
+ track_total_hits : Number of hits to track (default: 10k)
70
+
71
+ Returns all results.
72
+ """
73
+ return self._do_search(
74
+ "hit",
75
+ query,
76
+ filters=filters,
77
+ fl=fl,
78
+ offset=offset,
79
+ rows=rows,
80
+ sort=sort,
81
+ timeout=timeout,
82
+ use_archive=use_archive,
83
+ track_total_hits=track_total_hits,
84
+ )
@@ -0,0 +1,38 @@
1
+ """Sequence manipulation methods used in parsing raw datastore output."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Generator, Sequence, TypeVar, overload
6
+
7
+ _T = TypeVar("_T")
8
+
9
+
10
+ @overload
11
+ def chunk(items: bytes, n: int) -> Generator[bytes, None, None]: ...
12
+
13
+
14
+ @overload
15
+ def chunk(items: str, n: int) -> Generator[str, None, None]: ...
16
+
17
+
18
+ @overload
19
+ def chunk(items: Sequence[_T], n: int) -> Generator[Sequence[_T], None, None]: ...
20
+
21
+
22
+ def chunk(items, n: int):
23
+ """Yield n-sized chunks from list.
24
+
25
+ >>> list(chunk([1,2,3,4,5,6,7], 2))
26
+ [[1,2], [3,4], [5,6], [7,]]
27
+ """
28
+ for i in range(0, len(items), n):
29
+ yield items[i : i + n]
30
+
31
+
32
+ def chunked_list(items: Sequence[_T], n: int) -> list[Sequence[_T]]:
33
+ """Create a list of n-sized chunks from list.
34
+
35
+ >>> chunked_list([1,2,3,4,5,6,7], 2)
36
+ [[1,2], [3,4], [5,6], [7,]]
37
+ """
38
+ return list(chunk(items, n))
@@ -0,0 +1,41 @@
1
+ from howler_client.common.utils import SEARCHABLE, ClientError, api_path
2
+
3
+
4
+ class Facet(object):
5
+ "List most frequent values for a field in the given collection"
6
+
7
+ def __init__(self, connection):
8
+ self._connection = connection
9
+
10
+ def _do_facet(self, index, field, **kwargs):
11
+ if index not in SEARCHABLE:
12
+ raise ClientError("Index %s is not searchable" % index, 400)
13
+
14
+ filters = kwargs.pop("filters", None)
15
+ if filters is not None:
16
+ if isinstance(filters, str):
17
+ filters = [filters]
18
+
19
+ filters = [("filters", fq) for fq in filters]
20
+
21
+ kwargs = {k: v for k, v in kwargs.items() if v is not None and k != "filters"}
22
+ if filters is not None:
23
+ kwargs["params_tuples"] = filters
24
+ path = api_path("search", "facet", index, field, **kwargs)
25
+ return self._connection.get(path)
26
+
27
+ def hit(self, field, query=None, mincount=None, filters=None, rows=None):
28
+ """List most frequent value for a field in the hit collection.
29
+
30
+ Required:
31
+ field : field to extract the facets from
32
+
33
+ Optional:
34
+ query : Initial query to filter the data (default: 'id:*')
35
+ filters : Additional lucene queries used to filter the data (list of strings)
36
+ mincount : Minimum amount of hits for the value to be returned
37
+ rows : The number of different facets to return
38
+
39
+ Returns all results.
40
+ """
41
+ return self._do_facet("hit", field, query=query, mincount=mincount, filters=filters, rows=rows)
@@ -0,0 +1,19 @@
1
+ from howler_client.common.utils import SEARCHABLE, ClientError, api_path
2
+
3
+
4
+ class Fields(object):
5
+ "List the fields of given indexes"
6
+
7
+ def __init__(self, connection):
8
+ self._connection = connection
9
+
10
+ def _do_fields(self, index):
11
+ if index not in SEARCHABLE:
12
+ raise ClientError("Index %s is not searchable" % index, 400)
13
+
14
+ path = api_path("search", "fields", index)
15
+ return self._connection.get(path)
16
+
17
+ def hit(self):
18
+ """List all fields details for the hit collection."""
19
+ return self._do_fields("hit")
@@ -0,0 +1,67 @@
1
+ from howler_client.common.utils import SEARCHABLE, ClientError, api_path
2
+
3
+
4
+ class Grouped(object):
5
+ "Module for grouping search results from given indexes"
6
+
7
+ def __init__(self, connection):
8
+ self._connection = connection
9
+
10
+ def _do_grouped(self, index, field, **kwargs):
11
+ if index not in SEARCHABLE:
12
+ raise ClientError("Index %s is not searchable" % index, 400)
13
+
14
+ filters = kwargs.pop("filters", None)
15
+ if filters is not None:
16
+ if isinstance(filters, str):
17
+ filters = [filters]
18
+
19
+ filters = [("filters", fq) for fq in filters]
20
+
21
+ kwargs = {k: v for k, v in kwargs.items() if v is not None and k != "filters"}
22
+ if filters is not None:
23
+ kwargs["params_tuples"] = filters
24
+ path = api_path("search", "grouped", index, field, **kwargs)
25
+ return self._connection.get(path)
26
+
27
+ def hit(
28
+ self,
29
+ field,
30
+ group_sort=None,
31
+ limit=None,
32
+ query=None,
33
+ filters=None,
34
+ offset=None,
35
+ rows=None,
36
+ sort=None,
37
+ fl=None,
38
+ ):
39
+ """Search hit collection and group result to a given field
40
+
41
+ Required:
42
+ field : Field used to group the results
43
+
44
+ Optional:
45
+ group_sort : Field used for sorting items in the groups with direction (string: ex. 'id desc')
46
+ limit : Maximum number of items returned per group (integer)
47
+ query : lucene query (string)
48
+ filters : Additional lucene queries used to filter the data (list of strings)
49
+ offset : Offset at which the query items should start (integer)
50
+ rows : Number of records to return (integer)
51
+ sort : Field used for sorting with direction (string: ex. 'id desc')
52
+ fl : List of fields to return (comma separated string of fields)
53
+
54
+ Returns a generator that transparently and efficiently pages through results.
55
+ """
56
+ return self._do_grouped(
57
+ "hit",
58
+ field,
59
+ group_sort=group_sort,
60
+ limit=limit,
61
+ query=query,
62
+ filters=filters,
63
+ offset=offset,
64
+ rows=rows,
65
+ sort=sort,
66
+ fl=fl,
67
+ )