howler-client 2.4.0.dev37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- howler_client/__init__.py +46 -0
- howler_client/client.py +32 -0
- howler_client/common/__init__.py +0 -0
- howler_client/common/dict_utils.py +138 -0
- howler_client/common/utils.py +113 -0
- howler_client/connection.py +204 -0
- howler_client/logger.py +14 -0
- howler_client/module/__init__.py +0 -0
- howler_client/module/bundle.py +132 -0
- howler_client/module/comment.py +59 -0
- howler_client/module/help.py +23 -0
- howler_client/module/hit.py +299 -0
- howler_client/module/search/__init__.py +84 -0
- howler_client/module/search/chunk.py +38 -0
- howler_client/module/search/facet.py +41 -0
- howler_client/module/search/fields.py +19 -0
- howler_client/module/search/grouped.py +67 -0
- howler_client/module/search/histogram.py +63 -0
- howler_client/module/search/stats.py +39 -0
- howler_client/module/search/stream.py +81 -0
- howler_client/module/user.py +97 -0
- howler_client/utils/__init__.py +0 -0
- howler_client/utils/json_encoders.py +36 -0
- howler_client-2.4.0.dev37.dist-info/LICENSE +23 -0
- howler_client-2.4.0.dev37.dist-info/METADATA +61 -0
- howler_client-2.4.0.dev37.dist-info/RECORD +28 -0
- howler_client-2.4.0.dev37.dist-info/WHEEL +4 -0
- howler_client-2.4.0.dev37.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import TYPE_CHECKING, Any, List
|
|
3
|
+
|
|
4
|
+
from howler_client.common.utils import api_path
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
from typing import Self
|
|
8
|
+
else:
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from howler_client import Connection
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Comment(object):
|
|
16
|
+
"""Help related endpoints"""
|
|
17
|
+
|
|
18
|
+
def __init__(self: Self, connection: "Connection"):
|
|
19
|
+
self._connection = connection
|
|
20
|
+
|
|
21
|
+
def add(self: Self, hit_id: str, comment: str) -> dict[str, Any]:
|
|
22
|
+
"""Add a comment to a hit and return it
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
hit_id (str): ID of the hit
|
|
26
|
+
comment: content of the comment
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
dict[str, Any]: The corresponding hit data
|
|
30
|
+
"""
|
|
31
|
+
return self._connection.post(api_path("hit", hit_id, "comments"), json={"value": comment})
|
|
32
|
+
|
|
33
|
+
def edit(self: Self, hit_id: str, comment: str, comment_id: str) -> dict[str, Any]:
|
|
34
|
+
"""Update a comment on a hit and return it
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
hit_id (str): ID of the hit
|
|
38
|
+
comment_id (str): ID of the comment that need to be updated
|
|
39
|
+
comment: content of the comment
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
dict[str, Any]: The corresponding hit data
|
|
43
|
+
"""
|
|
44
|
+
return self._connection.put(
|
|
45
|
+
api_path("hit", hit_id, "comments", comment_id),
|
|
46
|
+
json={"value": comment},
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def delete(self: Self, hit_id: str, comment_ids: List[str]) -> dict[str, Any]:
|
|
50
|
+
"""Delete a comment on a hit and return it
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
hit_id (str): ID of the hit
|
|
54
|
+
comment_ids (List[str]): list of all comment ids that need to be removed
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
dict[str, Any]: The corresponding hit data
|
|
58
|
+
"""
|
|
59
|
+
return self._connection.delete(api_path("hit", hit_id, "comments"), json=comment_ids)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
from howler_client.common.utils import api_path
|
|
5
|
+
|
|
6
|
+
if sys.version_info >= (3, 11):
|
|
7
|
+
from typing import Self
|
|
8
|
+
else:
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from howler_client import Connection
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Help(object):
|
|
16
|
+
"""Help related endpoints"""
|
|
17
|
+
|
|
18
|
+
def __init__(self: Self, connection: "Connection"):
|
|
19
|
+
self._connection = connection
|
|
20
|
+
|
|
21
|
+
def classification_definition(self):
|
|
22
|
+
"""Return the current system classification definition"""
|
|
23
|
+
return self._connection.get(api_path("help", "classification_definition"))
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
from hashlib import sha256
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Union
|
|
5
|
+
|
|
6
|
+
from howler_client.common.dict_utils import flatten
|
|
7
|
+
from howler_client.common.utils import ClientError, api_path
|
|
8
|
+
from howler_client.logger import get_logger
|
|
9
|
+
from howler_client.module.comment import Comment
|
|
10
|
+
from howler_client.utils.json_encoders import BytesDatetimeEncoder, DatetimeEncoder
|
|
11
|
+
|
|
12
|
+
if sys.version_info >= (3, 11):
|
|
13
|
+
from typing import Self
|
|
14
|
+
else:
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from howler_client import Connection
|
|
19
|
+
from howler_client.module.search import Search
|
|
20
|
+
|
|
21
|
+
logger = get_logger("hit")
|
|
22
|
+
|
|
23
|
+
UPDATE_SET = "SET"
|
|
24
|
+
UPDATE_INC = "INC"
|
|
25
|
+
UPDATE_DEC = "DEC"
|
|
26
|
+
UPDATE_MAX = "MAX"
|
|
27
|
+
UPDATE_MIN = "MIN"
|
|
28
|
+
UPDATE_APPEND = "APPEND"
|
|
29
|
+
UPDATE_APPEND_IF_MISSING = "APPEND_IF_MISSING"
|
|
30
|
+
UPDATE_REMOVE = "REMOVE"
|
|
31
|
+
UPDATE_DELETE = "DELETE"
|
|
32
|
+
|
|
33
|
+
UPDATE_OPERATIONS = [
|
|
34
|
+
UPDATE_APPEND,
|
|
35
|
+
UPDATE_APPEND_IF_MISSING,
|
|
36
|
+
UPDATE_DEC,
|
|
37
|
+
UPDATE_INC,
|
|
38
|
+
UPDATE_MAX,
|
|
39
|
+
UPDATE_MIN,
|
|
40
|
+
UPDATE_REMOVE,
|
|
41
|
+
UPDATE_SET,
|
|
42
|
+
UPDATE_DELETE,
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Hit(object):
|
|
47
|
+
"Operations pertaining to ingesting and interacting with Howler hits."
|
|
48
|
+
|
|
49
|
+
def __init__(self: Self, connection: "Connection", search: "Search"):
|
|
50
|
+
self._connection: "Connection" = connection
|
|
51
|
+
self._search: "Search" = search
|
|
52
|
+
self.comment: "Comment" = Comment(connection)
|
|
53
|
+
|
|
54
|
+
def __call__(self: Self, hit_id: str) -> dict[str, Any]:
|
|
55
|
+
"""Return the hit for a given ID
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
hit_id (str): ID of the hit
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
dict[str, Any]: The corresponding hit data
|
|
62
|
+
"""
|
|
63
|
+
return self._connection.get(api_path("hit", hit_id))
|
|
64
|
+
|
|
65
|
+
def create_from_map(
|
|
66
|
+
self: Self,
|
|
67
|
+
tool_name: str,
|
|
68
|
+
map: dict[str, list[str]],
|
|
69
|
+
documents: list[dict[str, Any]],
|
|
70
|
+
ignore_extra_values: bool = False,
|
|
71
|
+
) -> dict[str, Union[Optional[str], list[str]]]:
|
|
72
|
+
"""Create hits for a given tool using the raw documents and a map of the document fields to howler's fields.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
tool_name (str): Name of the tool the hits will be created for
|
|
76
|
+
map (dict[str, list[str]]): Dictionary where the keys are the flattened path of the tool's raw document and
|
|
77
|
+
the values are a list of flattened path for Howler's fields where the data will be copied into
|
|
78
|
+
documents (list[dict[str, Any]]): The data to ingest into howler, in the tool's raw document format
|
|
79
|
+
ignore_extra_values (bool, optional): Whether to allow extra fields, or raise an error. Defaults to False.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
dict[str, Union[Optional[str], list[str]]]: A list of IDs/Errors in the same order as the original documents
|
|
83
|
+
"""
|
|
84
|
+
data = {"map": map, "hits": documents}
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
result = self._connection.post(
|
|
88
|
+
api_path("tools", tool_name, "hits", ignore_extra_values=ignore_extra_values),
|
|
89
|
+
json=data,
|
|
90
|
+
)
|
|
91
|
+
except ClientError as e:
|
|
92
|
+
if e.api_response and isinstance(e.api_response, list):
|
|
93
|
+
for res in e.api_response:
|
|
94
|
+
if "warn" in res and res["warn"]:
|
|
95
|
+
logger.warning(res["warn"])
|
|
96
|
+
|
|
97
|
+
if "error" in res and res["error"]:
|
|
98
|
+
logger.error(res["error"]) # noqa: TRY400
|
|
99
|
+
raise
|
|
100
|
+
|
|
101
|
+
for res in result:
|
|
102
|
+
if "warn" in res and res["warn"]:
|
|
103
|
+
warn = res["warn"]
|
|
104
|
+
if isinstance(warn, list):
|
|
105
|
+
for w in warn:
|
|
106
|
+
logger.warn(w)
|
|
107
|
+
else:
|
|
108
|
+
logger.warn(warn)
|
|
109
|
+
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
def generate_hash(self: Self, hit: dict[str, Any]) -> str:
|
|
113
|
+
"""Generate hash value for hit using the analytic, detection, and raw_data values from the hit data.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
hit (str): hit data
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
str: A hash value for the hit
|
|
120
|
+
"""
|
|
121
|
+
howler_data = hit.get("howler.data", [])
|
|
122
|
+
|
|
123
|
+
if not isinstance(howler_data, list):
|
|
124
|
+
howler_data = [howler_data]
|
|
125
|
+
|
|
126
|
+
hash_contents = {
|
|
127
|
+
"analytic": hit["howler.analytic"],
|
|
128
|
+
"detection": hit.get("howler.detection", "no_detection"),
|
|
129
|
+
"raw_data": sorted(
|
|
130
|
+
json.dumps(entry, sort_keys=True, ensure_ascii=True, cls=BytesDatetimeEncoder) for entry in howler_data
|
|
131
|
+
),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return sha256(json.dumps(hash_contents, sort_keys=True, ensure_ascii=True).encode("utf-8")).hexdigest()
|
|
135
|
+
|
|
136
|
+
def create( # noqa: C901
|
|
137
|
+
self: Self,
|
|
138
|
+
data: Union[dict[str, Any], list[dict[str, Any]]],
|
|
139
|
+
ignore_extra_values: bool = False,
|
|
140
|
+
):
|
|
141
|
+
"""Create one or many hits using the howler schema.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
data (Union[dict[str, Any], list[dict[str, Any]]]): The hit or list of hits to create
|
|
145
|
+
ignore_extra_values (bool, optional): Whtether to ignore extra values, or throw an exception.
|
|
146
|
+
Defaults to False.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
dict[str, list[dict[str, Any]]]: A list of valid and invalid hits
|
|
150
|
+
"""
|
|
151
|
+
if not isinstance(data, list):
|
|
152
|
+
data = [data]
|
|
153
|
+
|
|
154
|
+
final_hit_list = []
|
|
155
|
+
for hit in data:
|
|
156
|
+
hit = flatten(hit, fields=["howler"])
|
|
157
|
+
|
|
158
|
+
existing_hash = hit.get("howler.hash", None)
|
|
159
|
+
if existing_hash is None:
|
|
160
|
+
existing_hash = self.generate_hash(hit)
|
|
161
|
+
|
|
162
|
+
hit["howler.hash"] = existing_hash
|
|
163
|
+
|
|
164
|
+
if "howler.data" in hit:
|
|
165
|
+
howler_data = hit["howler.data"]
|
|
166
|
+
if not isinstance(howler_data, list):
|
|
167
|
+
howler_data = [howler_data]
|
|
168
|
+
|
|
169
|
+
hit["howler.data"] = [
|
|
170
|
+
json.dumps(
|
|
171
|
+
entry,
|
|
172
|
+
sort_keys=True,
|
|
173
|
+
ensure_ascii=True,
|
|
174
|
+
cls=BytesDatetimeEncoder,
|
|
175
|
+
)
|
|
176
|
+
for entry in howler_data
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
final_hit_list.append(hit)
|
|
180
|
+
|
|
181
|
+
search_result = self._search.grouped.hit(
|
|
182
|
+
"howler.hash",
|
|
183
|
+
limit=1,
|
|
184
|
+
filters=[f"howler.hash:{' '.join(list_hit['howler.hash'] for list_hit in final_hit_list)}"],
|
|
185
|
+
)["items"]
|
|
186
|
+
|
|
187
|
+
for hit in final_hit_list:
|
|
188
|
+
for match in search_result:
|
|
189
|
+
if hit["howler.hash"] == match["value"]:
|
|
190
|
+
matched_hit = match["items"][0]
|
|
191
|
+
|
|
192
|
+
logger.warning(
|
|
193
|
+
f"Hit with hash {hit['howler.hash']} already exists in the DB at "
|
|
194
|
+
f"id {matched_hit['howler']['id']}, reusing"
|
|
195
|
+
)
|
|
196
|
+
final_hit_list.remove(hit)
|
|
197
|
+
|
|
198
|
+
if len(final_hit_list) < 1:
|
|
199
|
+
logger.info("No hits to submit.")
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
result = self._connection.post(
|
|
203
|
+
api_path("hit", ignore_extra_values=ignore_extra_values),
|
|
204
|
+
data=json.dumps(final_hit_list, cls=DatetimeEncoder),
|
|
205
|
+
headers={"Content-Type": "application/json"},
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if not result:
|
|
209
|
+
logger.warning("No result was returned.")
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
for invalid_hit in result["invalid"]:
|
|
213
|
+
logger.error(invalid_hit["error"])
|
|
214
|
+
|
|
215
|
+
for entry in search_result:
|
|
216
|
+
result["valid"].append(entry["items"][0])
|
|
217
|
+
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
def overwrite(self: Self, hit_id: str, new_hit_data: dict[str, Any]):
|
|
221
|
+
"""Overwrite a hit.
|
|
222
|
+
|
|
223
|
+
This is different from updating a hit, as you simply provide a partial hit object
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
hit_id (str): Id of the hit you would like to overwrite
|
|
227
|
+
new_hit_data (dict[str, Any]): A partial hit data object to overwrite the specified hit with
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ClientError: Updates provided were invalid
|
|
231
|
+
"""
|
|
232
|
+
if not isinstance(new_hit_data, dict):
|
|
233
|
+
raise TypeError("New hit data must be of type dict.")
|
|
234
|
+
|
|
235
|
+
return self._connection.put(api_path(f"hit/{hit_id}/overwrite"), json=new_hit_data)
|
|
236
|
+
|
|
237
|
+
def update(self: Self, hit_id: str, updates: list[tuple[str, str, Any]]):
|
|
238
|
+
"""Update a hit.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
hit_id (str): Id of the hit you would like to update
|
|
242
|
+
updates (list[tuple[str, str, Any]]): A list of updates to run. The first entry in the tuple must be a valid
|
|
243
|
+
update operation (see UPDATE_OPERATIONS), the second a key for a howler hit, and the third the value
|
|
244
|
+
to use in the operation.
|
|
245
|
+
|
|
246
|
+
Raises:
|
|
247
|
+
ClientError: Updates provided were invalid
|
|
248
|
+
"""
|
|
249
|
+
if not isinstance(updates, list):
|
|
250
|
+
raise TypeError("Updates must be of type list.")
|
|
251
|
+
|
|
252
|
+
for update in updates:
|
|
253
|
+
if not isinstance(update, tuple):
|
|
254
|
+
raise TypeError("Entries in updates must be of type tuple.")
|
|
255
|
+
|
|
256
|
+
if update[0] not in UPDATE_OPERATIONS:
|
|
257
|
+
raise ClientError(
|
|
258
|
+
f"Invalid update - operation must be one of {','.join(UPDATE_OPERATIONS)}!",
|
|
259
|
+
400,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
return self._connection.put(api_path(f"hit/{hit_id}/update"), json=updates)
|
|
263
|
+
|
|
264
|
+
def update_by_query(self: Self, query: str, updates: list[tuple[str, str, Any]]):
|
|
265
|
+
"""Update a set of hits by query.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
query (str): Query representing the hits you would like to update
|
|
269
|
+
updates (list[tuple[str, str, Any]]): A list of updates to run. The first entry in the tuple must be a valid
|
|
270
|
+
update operation (see UPDATE_OPERATIONS), the second a key for a howler hit, and the third the value
|
|
271
|
+
to use in the operation.
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
ClientError: Updates provided were invalid
|
|
275
|
+
"""
|
|
276
|
+
if not isinstance(updates, list):
|
|
277
|
+
raise TypeError("Updates must be of type list.")
|
|
278
|
+
|
|
279
|
+
for update in updates:
|
|
280
|
+
if not isinstance(update, tuple):
|
|
281
|
+
raise TypeError("Entries in updates must be of type tuple.")
|
|
282
|
+
if update[0] not in UPDATE_OPERATIONS:
|
|
283
|
+
raise ClientError(
|
|
284
|
+
f"Invalid update - operation must be one of {','.join(UPDATE_OPERATIONS)}!",
|
|
285
|
+
400,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return self._connection.put(api_path("hit/update"), json={"query": query, "operations": updates})
|
|
289
|
+
|
|
290
|
+
def delete(self: Self, hit_ids: list[str]) -> dict[Literal["success"], bool]:
|
|
291
|
+
"""Delete a list of hits by id
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
dict[Literal["success"], bool]: Whether the delete operation was successful
|
|
295
|
+
"""
|
|
296
|
+
if not isinstance(hit_ids, list):
|
|
297
|
+
hit_ids = [hit_ids]
|
|
298
|
+
|
|
299
|
+
return self._connection.delete(api_path("hit"), json=hit_ids)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from howler_client.common.utils import SEARCHABLE, ClientError, api_path
|
|
4
|
+
from howler_client.module.search.facet import Facet
|
|
5
|
+
from howler_client.module.search.fields import Fields
|
|
6
|
+
from howler_client.module.search.grouped import Grouped
|
|
7
|
+
from howler_client.module.search.histogram import Histogram
|
|
8
|
+
from howler_client.module.search.stats import Stats
|
|
9
|
+
from howler_client.module.search.stream import Stream
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Search(object):
|
|
13
|
+
"Module dedicated to searching collections and performing various other operations like group by or faceting"
|
|
14
|
+
|
|
15
|
+
def __init__(self, connection):
|
|
16
|
+
self._connection = connection
|
|
17
|
+
self.facet = Facet(connection)
|
|
18
|
+
self.fields = Fields(connection)
|
|
19
|
+
self.grouped = Grouped(connection)
|
|
20
|
+
self.histogram = Histogram(connection)
|
|
21
|
+
self.stats = Stats(connection)
|
|
22
|
+
self.stream = Stream(connection, self._do_search)
|
|
23
|
+
|
|
24
|
+
def _do_search(self, index, query, use_archive=False, track_total_hits=None, **kwargs):
|
|
25
|
+
if index not in SEARCHABLE:
|
|
26
|
+
raise ClientError("Index %s is not searchable" % index, 400)
|
|
27
|
+
|
|
28
|
+
filters = kwargs.pop("filters", None)
|
|
29
|
+
if filters is not None:
|
|
30
|
+
if isinstance(filters, str):
|
|
31
|
+
filters = [filters]
|
|
32
|
+
|
|
33
|
+
kwargs["filters"] = filters
|
|
34
|
+
|
|
35
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
36
|
+
kwargs["query"] = query
|
|
37
|
+
if use_archive:
|
|
38
|
+
kwargs["use_archive"] = ""
|
|
39
|
+
if track_total_hits:
|
|
40
|
+
kwargs["track_total_hits"] = track_total_hits
|
|
41
|
+
path = api_path("search", index)
|
|
42
|
+
return self._connection.post(path, data=json.dumps(kwargs))
|
|
43
|
+
|
|
44
|
+
def hit(
|
|
45
|
+
self,
|
|
46
|
+
query,
|
|
47
|
+
filters=None,
|
|
48
|
+
fl=None,
|
|
49
|
+
offset=0,
|
|
50
|
+
rows=25,
|
|
51
|
+
sort=None,
|
|
52
|
+
timeout=None,
|
|
53
|
+
use_archive=False,
|
|
54
|
+
track_total_hits=None,
|
|
55
|
+
):
|
|
56
|
+
"""Search hits with a lucene query.
|
|
57
|
+
|
|
58
|
+
Required:
|
|
59
|
+
query : lucene query (string)
|
|
60
|
+
|
|
61
|
+
Optional:
|
|
62
|
+
filters : Additional lucene queries used to filter the data (list of strings)
|
|
63
|
+
fl : List of fields to return (comma separated string of fields)
|
|
64
|
+
offset : Offset at which the query items should start (integer)
|
|
65
|
+
rows : Number of records to return (integer)
|
|
66
|
+
sort : Field used for sorting with direction (string: ex. 'id desc')
|
|
67
|
+
timeout : Max amount of miliseconds the query will run (integer)
|
|
68
|
+
use_archive : Also query the archive
|
|
69
|
+
track_total_hits : Number of hits to track (default: 10k)
|
|
70
|
+
|
|
71
|
+
Returns all results.
|
|
72
|
+
"""
|
|
73
|
+
return self._do_search(
|
|
74
|
+
"hit",
|
|
75
|
+
query,
|
|
76
|
+
filters=filters,
|
|
77
|
+
fl=fl,
|
|
78
|
+
offset=offset,
|
|
79
|
+
rows=rows,
|
|
80
|
+
sort=sort,
|
|
81
|
+
timeout=timeout,
|
|
82
|
+
use_archive=use_archive,
|
|
83
|
+
track_total_hits=track_total_hits,
|
|
84
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Sequence manipulation methods used in parsing raw datastore output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Generator, Sequence, TypeVar, overload
|
|
6
|
+
|
|
7
|
+
_T = TypeVar("_T")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@overload
|
|
11
|
+
def chunk(items: bytes, n: int) -> Generator[bytes, None, None]: ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@overload
|
|
15
|
+
def chunk(items: str, n: int) -> Generator[str, None, None]: ...
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@overload
|
|
19
|
+
def chunk(items: Sequence[_T], n: int) -> Generator[Sequence[_T], None, None]: ...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def chunk(items, n: int):
|
|
23
|
+
"""Yield n-sized chunks from list.
|
|
24
|
+
|
|
25
|
+
>>> list(chunk([1,2,3,4,5,6,7], 2))
|
|
26
|
+
[[1,2], [3,4], [5,6], [7,]]
|
|
27
|
+
"""
|
|
28
|
+
for i in range(0, len(items), n):
|
|
29
|
+
yield items[i : i + n]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def chunked_list(items: Sequence[_T], n: int) -> list[Sequence[_T]]:
|
|
33
|
+
"""Create a list of n-sized chunks from list.
|
|
34
|
+
|
|
35
|
+
>>> chunked_list([1,2,3,4,5,6,7], 2)
|
|
36
|
+
[[1,2], [3,4], [5,6], [7,]]
|
|
37
|
+
"""
|
|
38
|
+
return list(chunk(items, n))
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from howler_client.common.utils import SEARCHABLE, ClientError, api_path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Facet(object):
|
|
5
|
+
"List most frequent values for a field in the given collection"
|
|
6
|
+
|
|
7
|
+
def __init__(self, connection):
|
|
8
|
+
self._connection = connection
|
|
9
|
+
|
|
10
|
+
def _do_facet(self, index, field, **kwargs):
|
|
11
|
+
if index not in SEARCHABLE:
|
|
12
|
+
raise ClientError("Index %s is not searchable" % index, 400)
|
|
13
|
+
|
|
14
|
+
filters = kwargs.pop("filters", None)
|
|
15
|
+
if filters is not None:
|
|
16
|
+
if isinstance(filters, str):
|
|
17
|
+
filters = [filters]
|
|
18
|
+
|
|
19
|
+
filters = [("filters", fq) for fq in filters]
|
|
20
|
+
|
|
21
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None and k != "filters"}
|
|
22
|
+
if filters is not None:
|
|
23
|
+
kwargs["params_tuples"] = filters
|
|
24
|
+
path = api_path("search", "facet", index, field, **kwargs)
|
|
25
|
+
return self._connection.get(path)
|
|
26
|
+
|
|
27
|
+
def hit(self, field, query=None, mincount=None, filters=None, rows=None):
|
|
28
|
+
"""List most frequent value for a field in the hit collection.
|
|
29
|
+
|
|
30
|
+
Required:
|
|
31
|
+
field : field to extract the facets from
|
|
32
|
+
|
|
33
|
+
Optional:
|
|
34
|
+
query : Initial query to filter the data (default: 'id:*')
|
|
35
|
+
filters : Additional lucene queries used to filter the data (list of strings)
|
|
36
|
+
mincount : Minimum amount of hits for the value to be returned
|
|
37
|
+
rows : The number of different facets to return
|
|
38
|
+
|
|
39
|
+
Returns all results.
|
|
40
|
+
"""
|
|
41
|
+
return self._do_facet("hit", field, query=query, mincount=mincount, filters=filters, rows=rows)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from howler_client.common.utils import SEARCHABLE, ClientError, api_path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Fields(object):
|
|
5
|
+
"List the fields of given indexes"
|
|
6
|
+
|
|
7
|
+
def __init__(self, connection):
|
|
8
|
+
self._connection = connection
|
|
9
|
+
|
|
10
|
+
def _do_fields(self, index):
|
|
11
|
+
if index not in SEARCHABLE:
|
|
12
|
+
raise ClientError("Index %s is not searchable" % index, 400)
|
|
13
|
+
|
|
14
|
+
path = api_path("search", "fields", index)
|
|
15
|
+
return self._connection.get(path)
|
|
16
|
+
|
|
17
|
+
def hit(self):
|
|
18
|
+
"""List all fields details for the hit collection."""
|
|
19
|
+
return self._do_fields("hit")
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from howler_client.common.utils import SEARCHABLE, ClientError, api_path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Grouped(object):
|
|
5
|
+
"Module for grouping search results from given indexes"
|
|
6
|
+
|
|
7
|
+
def __init__(self, connection):
|
|
8
|
+
self._connection = connection
|
|
9
|
+
|
|
10
|
+
def _do_grouped(self, index, field, **kwargs):
|
|
11
|
+
if index not in SEARCHABLE:
|
|
12
|
+
raise ClientError("Index %s is not searchable" % index, 400)
|
|
13
|
+
|
|
14
|
+
filters = kwargs.pop("filters", None)
|
|
15
|
+
if filters is not None:
|
|
16
|
+
if isinstance(filters, str):
|
|
17
|
+
filters = [filters]
|
|
18
|
+
|
|
19
|
+
filters = [("filters", fq) for fq in filters]
|
|
20
|
+
|
|
21
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None and k != "filters"}
|
|
22
|
+
if filters is not None:
|
|
23
|
+
kwargs["params_tuples"] = filters
|
|
24
|
+
path = api_path("search", "grouped", index, field, **kwargs)
|
|
25
|
+
return self._connection.get(path)
|
|
26
|
+
|
|
27
|
+
def hit(
|
|
28
|
+
self,
|
|
29
|
+
field,
|
|
30
|
+
group_sort=None,
|
|
31
|
+
limit=None,
|
|
32
|
+
query=None,
|
|
33
|
+
filters=None,
|
|
34
|
+
offset=None,
|
|
35
|
+
rows=None,
|
|
36
|
+
sort=None,
|
|
37
|
+
fl=None,
|
|
38
|
+
):
|
|
39
|
+
"""Search hit collection and group result to a given field
|
|
40
|
+
|
|
41
|
+
Required:
|
|
42
|
+
field : Field used to group the results
|
|
43
|
+
|
|
44
|
+
Optional:
|
|
45
|
+
group_sort : Field used for sorting items in the groups with direction (string: ex. 'id desc')
|
|
46
|
+
limit : Maximum number of items returned per group (integer)
|
|
47
|
+
query : lucene query (string)
|
|
48
|
+
filters : Additional lucene queries used to filter the data (list of strings)
|
|
49
|
+
offset : Offset at which the query items should start (integer)
|
|
50
|
+
rows : Number of records to return (integer)
|
|
51
|
+
sort : Field used for sorting with direction (string: ex. 'id desc')
|
|
52
|
+
fl : List of fields to return (comma separated string of fields)
|
|
53
|
+
|
|
54
|
+
Returns a generator that transparently and efficiently pages through results.
|
|
55
|
+
"""
|
|
56
|
+
return self._do_grouped(
|
|
57
|
+
"hit",
|
|
58
|
+
field,
|
|
59
|
+
group_sort=group_sort,
|
|
60
|
+
limit=limit,
|
|
61
|
+
query=query,
|
|
62
|
+
filters=filters,
|
|
63
|
+
offset=offset,
|
|
64
|
+
rows=rows,
|
|
65
|
+
sort=sort,
|
|
66
|
+
fl=fl,
|
|
67
|
+
)
|