nucliadb 6.2.1.post3139__py3-none-any.whl → 6.2.1.post3165__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
+ from nucliadb.search.search.query_parser.exceptions import InvalidQueryError as InvalidQueryError
21
+
20
22
 
21
23
  class IncompleteFindResultsError(Exception):
22
24
  pass
@@ -24,10 +26,3 @@ class IncompleteFindResultsError(Exception):
24
26
 
25
27
  class ResourceNotFoundError(Exception):
26
28
  pass
27
-
28
-
29
- class InvalidQueryError(Exception):
30
- def __init__(self, param: str, reason: str):
31
- self.param = param
32
- self.reason = reason
33
- super().__init__(f"Invalid query. Error in {param}: {reason}")
@@ -260,7 +260,7 @@ async def query_parser_from_find_request(
260
260
  # XXX this is becoming the new /find query parsing, this should be moved to
261
261
  # a cleaner abstraction
262
262
 
263
- parsed = parse_find(item)
263
+ parsed = await parse_find(kbid, item)
264
264
 
265
265
  rank_fusion = get_rank_fusion(parsed.rank_fusion)
266
266
  reranker = get_reranker(parsed.reranker)
@@ -23,12 +23,9 @@ import string
23
23
  from datetime import datetime
24
24
  from typing import Any, Awaitable, Optional, Union
25
25
 
26
- from async_lru import alru_cache
27
-
28
26
  from nucliadb.common import datamanagers
29
- from nucliadb.common.maindb.utils import get_driver
30
27
  from nucliadb.search import logger
31
- from nucliadb.search.predict import SendToPredictError, convert_relations
28
+ from nucliadb.search.predict import SendToPredictError
32
29
  from nucliadb.search.search.filters import (
33
30
  convert_to_node_filters,
34
31
  flatten_filter_literals,
@@ -39,15 +36,14 @@ from nucliadb.search.search.filters import (
39
36
  )
40
37
  from nucliadb.search.search.metrics import (
41
38
  node_features,
42
- query_parse_dependency_observer,
43
39
  )
40
+ from nucliadb.search.search.query_parser.fetcher import Fetcher, get_classification_labels
44
41
  from nucliadb.search.search.rank_fusion import (
45
42
  RankFusionAlgorithm,
46
43
  )
47
44
  from nucliadb.search.search.rerankers import (
48
45
  Reranker,
49
46
  )
50
- from nucliadb.search.utilities import get_predict
51
47
  from nucliadb_models.internal.predict import QueryInfo
52
48
  from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
53
49
  from nucliadb_models.metadata import ResourceProcessingStatus
@@ -63,7 +59,7 @@ from nucliadb_models.search import (
63
59
  SuggestOptions,
64
60
  )
65
61
  from nucliadb_models.security import RequestSecurity
66
- from nucliadb_protos import knowledgebox_pb2, nodereader_pb2, utils_pb2
62
+ from nucliadb_protos import nodereader_pb2, utils_pb2
67
63
  from nucliadb_protos.noderesources_pb2 import Resource
68
64
 
69
65
  from .exceptions import InvalidQueryError
@@ -87,13 +83,6 @@ class QueryParser:
87
83
  """
88
84
 
89
85
  _query_information_task: Optional[asyncio.Task] = None
90
- _get_vectorset_task: Optional[asyncio.Task] = None
91
- _detected_entities_task: Optional[asyncio.Task] = None
92
- _entities_meta_cache_task: Optional[asyncio.Task] = None
93
- _deleted_entities_groups_task: Optional[asyncio.Task] = None
94
- _synonyms_task: Optional[asyncio.Task] = None
95
- _get_classification_labels_task: Optional[asyncio.Task] = None
96
- _get_matryoshka_dimension_task: Optional[asyncio.Task] = None
97
86
 
98
87
  def __init__(
99
88
  self,
@@ -168,6 +157,15 @@ class QueryParser:
168
157
  self.max_tokens = max_tokens
169
158
  self.rank_fusion = rank_fusion
170
159
  self.reranker = reranker
160
+ self.fetcher = Fetcher(
161
+ kbid=kbid,
162
+ query=query,
163
+ user_vector=user_vector,
164
+ vectorset=vectorset,
165
+ rephrase=rephrase,
166
+ rephrase_prompt=rephrase_prompt,
167
+ generative_model=generative_model,
168
+ )
171
169
 
172
170
  @property
173
171
  def has_vector_search(self) -> bool:
@@ -183,78 +181,12 @@ class QueryParser:
183
181
  return self._query_information_task
184
182
 
185
183
  async def _query_information(self) -> QueryInfo:
186
- vectorset = await self.select_query_vectorset()
187
- return await query_information(
188
- self.kbid, self.query, vectorset, self.generative_model, self.rephrase, self.rephrase_prompt
189
- )
190
-
191
- def _get_vectorset(self) -> Awaitable[Optional[str]]:
192
- if self._get_vectorset_task is None:
193
- self._get_vectorset_task = asyncio.create_task(self._select_vectorset())
194
- return self._get_vectorset_task
195
-
196
- async def _select_vectorset(self) -> Optional[str]:
197
- if self.vectorset:
198
- return self.vectorset
199
-
200
- # When vectorset is not provided we get the default from Predict API
201
-
202
- try:
203
- query_information = await self._get_query_information()
204
- except SendToPredictError:
205
- return None
206
-
207
- if query_information.sentence is None:
208
- logger.error(
209
- "Asking for a vectorset but /query didn't return one", extra={"kbid": self.kbid}
210
- )
211
- return None
212
-
213
- for vectorset in query_information.sentence.vectors.keys():
214
- self.vectorset = vectorset
215
- break
216
-
217
- return self.vectorset
218
-
219
- def _get_matryoshka_dimension(self) -> Awaitable[Optional[int]]:
220
- if self._get_matryoshka_dimension_task is None:
221
- self._get_matryoshka_dimension_task = asyncio.create_task(self._matryoshka_dimension())
222
- return self._get_matryoshka_dimension_task
223
-
224
- async def _matryoshka_dimension(self) -> Optional[int]:
225
- vectorset = await self._select_vectorset()
226
- return await get_matryoshka_dimension_cached(self.kbid, vectorset)
227
-
228
- def _get_detected_entities(self) -> Awaitable[list[utils_pb2.RelationNode]]:
229
- if self._detected_entities_task is None: # pragma: no cover
230
- self._detected_entities_task = asyncio.create_task(detect_entities(self.kbid, self.query))
231
- return self._detected_entities_task
232
-
233
- def _get_entities_meta_cache(
234
- self,
235
- ) -> Awaitable[datamanagers.entities.EntitiesMetaCache]:
236
- if self._entities_meta_cache_task is None:
237
- self._entities_meta_cache_task = asyncio.create_task(get_entities_meta_cache(self.kbid))
238
- return self._entities_meta_cache_task
239
-
240
- def _get_deleted_entity_groups(self) -> Awaitable[list[str]]:
241
- if self._deleted_entities_groups_task is None:
242
- self._deleted_entities_groups_task = asyncio.create_task(
243
- get_deleted_entity_groups(self.kbid)
244
- )
245
- return self._deleted_entities_groups_task
246
-
247
- def _get_synomyns(self) -> Awaitable[Optional[knowledgebox_pb2.Synonyms]]:
248
- if self._synonyms_task is None:
249
- self._synonyms_task = asyncio.create_task(get_kb_synonyms(self.kbid))
250
- return self._synonyms_task
251
-
252
- def _get_classification_labels(self) -> Awaitable[knowledgebox_pb2.Labels]:
253
- if self._get_classification_labels_task is None:
254
- self._get_classification_labels_task = asyncio.create_task(
255
- get_classification_labels(self.kbid)
256
- )
257
- return self._get_classification_labels_task
184
+ # HACK: while transitioning to the new query parser, use fetcher under
185
+ # the hood for a smoother migration
186
+ query_info = await self.fetcher._predict_query_endpoint()
187
+ if query_info is None:
188
+ raise SendToPredictError("Error while using predict's query endpoint")
189
+ return query_info
258
190
 
259
191
  async def _schedule_dependency_tasks(self) -> None:
260
192
  """
@@ -262,21 +194,22 @@ class QueryParser:
262
194
  for the sake of the query being performed
263
195
  """
264
196
  if len(self.label_filters) > 0 and has_classification_label_filters(self.flat_label_filters):
265
- asyncio.ensure_future(self._get_classification_labels())
197
+ asyncio.ensure_future(self.fetcher.get_classification_labels())
266
198
 
267
199
  if self.has_vector_search and self.user_vector is None:
268
200
  self.query_endpoint_used = True
269
201
  asyncio.ensure_future(self._get_query_information())
270
- asyncio.ensure_future(self._get_matryoshka_dimension())
202
+ # XXX: should we also ensure get_vectorset and get_query_vector?
203
+ asyncio.ensure_future(self.fetcher.get_matryoshka_dimension())
271
204
 
272
205
  if (self.has_relations_search or self.autofilter) and len(self.query) > 0:
273
206
  if not self.query_endpoint_used:
274
207
  # If we only need to detect entities, we don't need the query endpoint
275
- asyncio.ensure_future(self._get_detected_entities())
276
- asyncio.ensure_future(self._get_entities_meta_cache())
277
- asyncio.ensure_future(self._get_deleted_entity_groups())
208
+ asyncio.ensure_future(self.fetcher.get_detected_entities())
209
+ asyncio.ensure_future(self.fetcher.get_entities_meta_cache())
210
+ asyncio.ensure_future(self.fetcher.get_deleted_entity_groups())
278
211
  if self.with_synonyms and self.query:
279
- asyncio.ensure_future(self._get_synomyns())
212
+ asyncio.ensure_future(self.fetcher.get_synonyms())
280
213
 
281
214
  async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]:
282
215
  """
@@ -309,7 +242,7 @@ class QueryParser:
309
242
  field_labels = self.flat_label_filters
310
243
  paragraph_labels: list[str] = []
311
244
  if has_classification_label_filters(self.flat_label_filters):
312
- classification_labels = await self._get_classification_labels()
245
+ classification_labels = await self.fetcher.get_classification_labels()
313
246
  field_labels, paragraph_labels = split_labels_by_type(
314
247
  self.flat_label_filters, classification_labels
315
248
  )
@@ -398,19 +331,13 @@ class QueryParser:
398
331
  semantic_min_score = self.min_score.semantic
399
332
  elif self.has_vector_search and not incomplete:
400
333
  query_information = await self._get_query_information()
401
- vectorset = await self._select_vectorset()
402
- if vectorset is not None:
403
- semantic_threshold = query_information.semantic_thresholds.get(vectorset, None)
404
- if semantic_threshold is not None:
405
- semantic_min_score = semantic_threshold
406
- else:
407
- logger.warning(
408
- "Semantic threshold not found in query information, using default",
409
- extra={"kbid": self.kbid},
410
- )
334
+ vectorset = await self.fetcher.get_vectorset()
335
+ semantic_threshold = query_information.semantic_thresholds.get(vectorset, None)
336
+ if semantic_threshold is not None:
337
+ semantic_min_score = semantic_threshold
411
338
  else:
412
339
  logger.warning(
413
- "Vectorset unset by user or predict, using default semantic threshold",
340
+ "Semantic threshold not found in query information, using default",
414
341
  extra={"kbid": self.kbid},
415
342
  )
416
343
  self.min_score.semantic = semantic_min_score
@@ -427,70 +354,18 @@ class QueryParser:
427
354
  request.paragraph = True
428
355
  node_features.inc({"type": "paragraphs"})
429
356
 
430
- async def select_query_vectorset(self) -> Optional[str]:
431
- """Set and return the requested vectorset parameter (if used) validated
432
- for the current KB.
433
-
434
- """
435
- if not self.vectorset:
436
- return None
437
-
438
- # validate vectorset
439
- async with datamanagers.with_ro_transaction() as txn:
440
- if not await datamanagers.vectorsets.exists(
441
- txn, kbid=self.kbid, vectorset_id=self.vectorset
442
- ):
443
- raise InvalidQueryError(
444
- "vectorset",
445
- f"Vectorset {self.vectorset} doesn't exist in you Knowledge Box",
446
- )
447
- return self.vectorset
448
-
449
357
  async def parse_vector_search(self, request: nodereader_pb2.SearchRequest) -> bool:
450
358
  if not self.has_vector_search:
451
359
  return False
452
360
 
453
361
  node_features.inc({"type": "vectors"})
454
362
 
455
- incomplete = False
456
-
457
- vectorset = await self._select_vectorset()
458
- if vectorset is not None:
459
- request.vectorset = vectorset
460
-
461
- query_vector = None
462
- if self.user_vector is None:
463
- try:
464
- query_info = await self._get_query_information()
465
- except SendToPredictError as err:
466
- logger.warning(f"Errors on predict api trying to embedd query: {err}")
467
- incomplete = True
468
- else:
469
- if query_info and query_info.sentence:
470
- if vectorset:
471
- if vectorset in query_info.sentence.vectors:
472
- query_vector = query_info.sentence.vectors[vectorset]
473
- else:
474
- incomplete = True
475
- else:
476
- for vectorset_id, vector in query_info.sentence.vectors.items():
477
- if vector:
478
- query_vector = vector
479
- break
480
- else:
481
- incomplete = True
482
-
483
- else:
484
- incomplete = True
485
- else:
486
- query_vector = self.user_vector
363
+ vectorset = await self.fetcher.get_vectorset()
364
+ query_vector = await self.fetcher.get_query_vector()
365
+ incomplete = query_vector is None
487
366
 
367
+ request.vectorset = vectorset
488
368
  if query_vector is not None:
489
- matryoshka_dimension = await self._get_matryoshka_dimension()
490
- if matryoshka_dimension is not None:
491
- # KB using a matryoshka embeddings model, cut the query vector
492
- # accordingly
493
- query_vector = query_vector[:matryoshka_dimension]
494
369
  request.vector.extend(query_vector)
495
370
 
496
371
  return incomplete
@@ -498,20 +373,15 @@ class QueryParser:
498
373
  async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
499
374
  autofilters = []
500
375
  if self.has_relations_search or self.autofilter:
501
- if not self.query_endpoint_used:
502
- detected_entities = await self._get_detected_entities()
503
- else:
504
- query_info_result = await self._get_query_information()
505
- if query_info_result.entities:
506
- detected_entities = convert_relations(query_info_result.entities.model_dump())
507
- else:
508
- detected_entities = []
509
- meta_cache = await self._get_entities_meta_cache()
376
+ detected_entities = await self.fetcher.get_detected_entities()
377
+ meta_cache = await self.fetcher.get_entities_meta_cache()
510
378
  detected_entities = expand_entities(meta_cache, detected_entities)
511
379
  if self.has_relations_search:
512
380
  request.relation_subgraph.entry_points.extend(detected_entities)
513
381
  request.relation_subgraph.depth = 1
514
- request.relation_subgraph.deleted_groups.extend(await self._get_deleted_entity_groups())
382
+ request.relation_subgraph.deleted_groups.extend(
383
+ await self.fetcher.get_deleted_entity_groups()
384
+ )
515
385
  for group_id, deleted_entities in meta_cache.deleted_entities.items():
516
386
  request.relation_subgraph.deleted_entities.append(
517
387
  nodereader_pb2.EntitiesSubgraphRequest.DeletedEntities(
@@ -544,7 +414,7 @@ class QueryParser:
544
414
  "Search with custom synonyms is only supported on paragraph and document search",
545
415
  )
546
416
 
547
- synonyms = await self._get_synomyns()
417
+ synonyms = await self.fetcher.get_synonyms()
548
418
  if synonyms is None:
549
419
  # No synonyms found
550
420
  return
@@ -680,29 +550,6 @@ async def paragraph_query_to_pb(
680
550
  return request
681
551
 
682
552
 
683
- @query_parse_dependency_observer.wrap({"type": "query_information"})
684
- async def query_information(
685
- kbid: str,
686
- query: str,
687
- semantic_model: Optional[str],
688
- generative_model: Optional[str] = None,
689
- rephrase: bool = False,
690
- rephrase_prompt: Optional[str] = None,
691
- ) -> QueryInfo:
692
- predict = get_predict()
693
- return await predict.query(kbid, query, semantic_model, generative_model, rephrase, rephrase_prompt)
694
-
695
-
696
- @query_parse_dependency_observer.wrap({"type": "detect_entities"})
697
- async def detect_entities(kbid: str, query: str) -> list[utils_pb2.RelationNode]:
698
- predict = get_predict()
699
- try:
700
- return await predict.detect_entities(kbid, query)
701
- except SendToPredictError as ex:
702
- logger.warning(f"Errors on predict api detecting entities: {ex}")
703
- return []
704
-
705
-
706
553
  def expand_entities(
707
554
  meta_cache: datamanagers.entities.EntitiesMetaCache,
708
555
  detected_entities: list[utils_pb2.RelationNode],
@@ -833,30 +680,6 @@ PROCESSING_STATUS_TO_PB_MAP = {
833
680
  }
834
681
 
835
682
 
836
- @query_parse_dependency_observer.wrap({"type": "synonyms"})
837
- async def get_kb_synonyms(kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
838
- async with get_driver().transaction(read_only=True) as txn:
839
- return await datamanagers.synonyms.get(txn, kbid=kbid)
840
-
841
-
842
- @query_parse_dependency_observer.wrap({"type": "entities_meta_cache"})
843
- async def get_entities_meta_cache(kbid: str) -> datamanagers.entities.EntitiesMetaCache:
844
- async with get_driver().transaction(read_only=True) as txn:
845
- return await datamanagers.entities.get_entities_meta_cache(txn, kbid=kbid)
846
-
847
-
848
- @query_parse_dependency_observer.wrap({"type": "deleted_entities_groups"})
849
- async def get_deleted_entity_groups(kbid: str) -> list[str]:
850
- async with get_driver().transaction(read_only=True) as txn:
851
- return list((await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups)
852
-
853
-
854
- @query_parse_dependency_observer.wrap({"type": "classification_labels"})
855
- async def get_classification_labels(kbid: str) -> knowledgebox_pb2.Labels:
856
- async with get_driver().transaction(read_only=True) as txn:
857
- return await datamanagers.labels.get_labels(txn, kbid=kbid)
858
-
859
-
860
683
  def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]):
861
684
  """
862
685
  Check if the provided filters are supported:
@@ -889,28 +712,6 @@ def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]
889
712
  )
890
713
 
891
714
 
892
- @alru_cache(maxsize=None)
893
- async def get_matryoshka_dimension_cached(kbid: str, vectorset: Optional[str]) -> Optional[int]:
894
- # This can be safely cached as the matryoshka dimension is not expected to change
895
- return await get_matryoshka_dimension(kbid, vectorset)
896
-
897
-
898
- @query_parse_dependency_observer.wrap({"type": "matryoshka_dimension"})
899
- async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optional[int]:
900
- async with get_driver().transaction(read_only=True) as txn:
901
- matryoshka_dimension = None
902
- if not vectorset:
903
- # XXX this should be migrated once we remove the "default" vectorset
904
- # concept
905
- matryoshka_dimension = await datamanagers.kb.get_matryoshka_vector_dimension(txn, kbid=kbid)
906
- else:
907
- vectorset_config = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset)
908
- if vectorset_config is not None and vectorset_config.vectorset_index_config.vector_dimension:
909
- matryoshka_dimension = vectorset_config.vectorset_index_config.vector_dimension
910
-
911
- return matryoshka_dimension
912
-
913
-
914
715
  def get_sort_field_proto(obj: SortField) -> Optional[nodereader_pb2.OrderBy.OrderField.ValueType]:
915
716
  return {
916
717
  SortField.SCORE: None,
@@ -19,4 +19,14 @@
19
19
  #
20
20
 
21
21
 
22
- class ParserError(ValueError): ...
22
+ class InternalParserError(ValueError):
23
+ """Raised when parsing fails due to some internal error"""
24
+
25
+
26
+ class InvalidQueryError(Exception):
27
+ """Raised when parsing a query containing an invalid parameter"""
28
+
29
+ def __init__(self, param: str, reason: str):
30
+ self.param = param
31
+ self.reason = reason
32
+ super().__init__(f"Invalid query. Error in {param}: {reason}")
@@ -0,0 +1,399 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from typing import Optional, TypeVar, Union
21
+
22
+ from async_lru import alru_cache
23
+ from typing_extensions import TypeIs
24
+
25
+ from nucliadb.common import datamanagers
26
+ from nucliadb.common.maindb.utils import get_driver
27
+ from nucliadb.search import logger
28
+ from nucliadb.search.predict import SendToPredictError, convert_relations
29
+ from nucliadb.search.search.metrics import (
30
+ query_parse_dependency_observer,
31
+ )
32
+ from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
33
+ from nucliadb.search.utilities import get_predict
34
+ from nucliadb_models.internal.predict import QueryInfo
35
+ from nucliadb_protos import knowledgebox_pb2, utils_pb2
36
+
37
+
38
+ # We use a class as cache miss marker to allow None values in the cache and to
39
+ # make mypy happy with typing
40
+ class NotCached:
41
+ pass
42
+
43
+
44
+ not_cached = NotCached()
45
+
46
+
47
+ T = TypeVar("T")
48
+
49
+
50
+ def is_cached(field: Union[T, NotCached]) -> TypeIs[T]:
51
+ return not isinstance(field, NotCached)
52
+
53
+
54
+ class FetcherCache:
55
+ predict_query_info: Union[Optional[QueryInfo], NotCached] = not_cached
56
+ predict_detected_entities: Union[list[utils_pb2.RelationNode], NotCached] = not_cached
57
+
58
+ # semantic search
59
+ query_vector: Union[Optional[list[float]], NotCached] = not_cached
60
+ vectorset: Union[str, NotCached] = not_cached
61
+ matryoshka_dimension: Union[Optional[int], NotCached] = not_cached
62
+
63
+ labels: Union[knowledgebox_pb2.Labels, NotCached] = not_cached
64
+
65
+ synonyms: Union[Optional[knowledgebox_pb2.Synonyms], NotCached] = not_cached
66
+
67
+ entities_meta_cache: Union[datamanagers.entities.EntitiesMetaCache, NotCached] = not_cached
68
+ deleted_entity_groups: Union[list[str], NotCached] = not_cached
69
+ detected_entities: Union[list[utils_pb2.RelationNode], NotCached] = not_cached
70
+
71
+
72
+ class Fetcher:
73
+ """Queries are getting more and more complex and different phases of the
74
+ query depend on different data, not only from the user but from other parts
75
+ of the system.
76
+
77
+ This class is an encapsulation of data gathering across different parts of
78
+ the system. Given the user query input, it aims to be as efficient as
79
+ possible removing redundant expensive calls to other parts of the system. An
80
+ instance of a fetcher caches it's results and it's thought to be used in the
81
+ context of a single request. DO NOT use this as a global object!
82
+
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ kbid: str,
88
+ *,
89
+ query: str,
90
+ user_vector: Optional[list[float]],
91
+ vectorset: Optional[str],
92
+ rephrase: bool,
93
+ rephrase_prompt: Optional[str],
94
+ generative_model: Optional[str],
95
+ ):
96
+ self.kbid = kbid
97
+ self.query = query
98
+ self.user_vector = user_vector
99
+ self.user_vectorset = vectorset
100
+ self.rephrase = rephrase
101
+ self.rephrase_prompt = rephrase_prompt
102
+ self.generative_model = generative_model
103
+
104
+ self.cache = FetcherCache()
105
+ self._validated = False
106
+
107
+ # Validation
108
+
109
+ async def initial_validate(self):
110
+ """Runs a validation on the input parameters. It can raise errors if
111
+ there's some wrong parameter.
112
+
113
+ This function should be always called if validated input for fetching is
114
+ desired
115
+ """
116
+ if self._validated:
117
+ return
118
+
119
+ self._validated = True
120
+
121
+ async def _validate_vectorset(self):
122
+ if self.user_vectorset is not None:
123
+ await validate_vectorset(self.kbid, self.user_vectorset)
124
+
125
+ # Semantic search
126
+
127
+ async def get_matryoshka_dimension(self) -> Optional[int]:
128
+ if is_cached(self.cache.matryoshka_dimension):
129
+ return self.cache.matryoshka_dimension
130
+
131
+ vectorset = await self.get_vectorset()
132
+ matryoshka_dimension = await get_matryoshka_dimension_cached(self.kbid, vectorset)
133
+ self.cache.matryoshka_dimension = matryoshka_dimension
134
+ return matryoshka_dimension
135
+
136
+ async def _get_user_vectorset(self) -> Optional[str]:
137
+ """Returns the user's requested vectorset and validates if it does exist
138
+ in the KB.
139
+
140
+ """
141
+ vectorset = self.user_vectorset
142
+ if not self._validated:
143
+ await self._validate_vectorset()
144
+ return vectorset
145
+
146
+ async def get_vectorset(self) -> str:
147
+ """Get the vectorset to be used in the search. If not specified, by the
148
+ user, Predict API or the own uses KB will provide a default.
149
+
150
+ """
151
+
152
+ if is_cached(self.cache.vectorset):
153
+ return self.cache.vectorset
154
+
155
+ if self.user_vectorset:
156
+ # user explicitly asked for a vectorset
157
+ self.cache.vectorset = self.user_vectorset
158
+ return self.user_vectorset
159
+
160
+ # when it's not provided, we get the default from Predict API
161
+ query_info = await self._predict_query_endpoint()
162
+ if query_info is None:
163
+ vectorset = None
164
+ else:
165
+ if query_info.sentence is None:
166
+ logger.error(
167
+ "Asking for a vectorset but /query didn't return one", extra={"kbid": self.kbid}
168
+ )
169
+ vectorset = None
170
+ else:
171
+ # vectors field is enforced by the data model to have at least one key
172
+ for vectorset in query_info.sentence.vectors.keys():
173
+ vectorset = vectorset
174
+ break
175
+
176
+ if vectorset is None:
177
+ # in case predict don't answer which vectorset to use, fallback to
178
+ # the first vectorset of the KB
179
+ async with datamanagers.with_ro_transaction() as txn:
180
+ async for vectorset, _ in datamanagers.vectorsets.iter(txn, kbid=self.kbid):
181
+ break
182
+ assert vectorset is not None, "All KBs must have at least one vectorset in maindb"
183
+
184
+ self.cache.vectorset = vectorset
185
+ return vectorset
186
+
187
+ async def get_query_vector(self) -> Optional[list[float]]:
188
+ if is_cached(self.cache.query_vector):
189
+ return self.cache.query_vector
190
+
191
+ if self.user_vector is not None:
192
+ query_vector = self.user_vector
193
+ else:
194
+ query_info = await self._predict_query_endpoint()
195
+ if query_info is None or query_info.sentence is None:
196
+ self.cache.query_vector = None
197
+ return None
198
+
199
+ vectorset = await self.get_vectorset()
200
+ if vectorset not in query_info.sentence.vectors:
201
+ logger.warning(
202
+ "Predict is not responding with a valid query nucliadb vectorset",
203
+ extra={
204
+ "kbid": self.kbid,
205
+ "vectorset": vectorset,
206
+ "predict_vectorsets": ",".join(query_info.sentence.vectors.keys()),
207
+ },
208
+ )
209
+ self.cache.query_vector = None
210
+ return None
211
+
212
+ query_vector = query_info.sentence.vectors[vectorset]
213
+
214
+ matryoshka_dimension = await self.get_matryoshka_dimension()
215
+ if matryoshka_dimension is not None:
216
+ if self.user_vector is not None and len(query_vector) < matryoshka_dimension:
217
+ raise InvalidQueryError(
218
+ "vector",
219
+ f"Invalid vector length, please check valid embedding size for {vectorset} model",
220
+ )
221
+
222
+ # KB using a matryoshka embeddings model, cut the query vector
223
+ # accordingly
224
+ query_vector = query_vector[:matryoshka_dimension]
225
+
226
+ self.cache.query_vector = query_vector
227
+ return query_vector
228
+
229
+ # Labels
230
+
231
+ async def get_classification_labels(self) -> knowledgebox_pb2.Labels:
232
+ if is_cached(self.cache.labels):
233
+ return self.cache.labels
234
+
235
+ labels = await get_classification_labels(self.kbid)
236
+ self.cache.labels = labels
237
+ return labels
238
+
239
+ # Entities
240
+
241
+ async def get_entities_meta_cache(self) -> datamanagers.entities.EntitiesMetaCache:
242
+ if is_cached(self.cache.entities_meta_cache):
243
+ return self.cache.entities_meta_cache
244
+
245
+ entities_meta_cache = await get_entities_meta_cache(self.kbid)
246
+ self.cache.entities_meta_cache = entities_meta_cache
247
+ return entities_meta_cache
248
+
249
+ async def get_deleted_entity_groups(self) -> list[str]:
250
+ if is_cached(self.cache.deleted_entity_groups):
251
+ return self.cache.deleted_entity_groups
252
+
253
+ deleted_entity_groups = await get_deleted_entity_groups(self.kbid)
254
+ self.cache.deleted_entity_groups = deleted_entity_groups
255
+ return deleted_entity_groups
256
+
257
+ async def get_detected_entities(self) -> list[utils_pb2.RelationNode]:
258
+ if is_cached(self.cache.detected_entities):
259
+ return self.cache.detected_entities
260
+
261
+ # Optimization to avoid calling predict twice
262
+ if is_cached(self.cache.predict_query_info):
263
+ # /query supersets detect entities, so we already have them
264
+ query_info = self.cache.predict_query_info
265
+ if query_info is not None and query_info.entities is not None:
266
+ detected_entities = convert_relations(query_info.entities.model_dump())
267
+ else:
268
+ detected_entities = []
269
+ else:
270
+ # No call to /query has been done, we'll use detect entities
271
+ # endpoint instead (as it's faster)
272
+ detected_entities = await self._predict_detect_entities()
273
+
274
+ self.cache.detected_entities = detected_entities
275
+ return detected_entities
276
+
277
+ # Synonyms
278
+
279
+ async def get_synonyms(self) -> Optional[knowledgebox_pb2.Synonyms]:
280
+ if is_cached(self.cache.synonyms):
281
+ return self.cache.synonyms
282
+
283
+ synonyms = await get_kb_synonyms(self.kbid)
284
+ self.cache.synonyms = synonyms
285
+ return synonyms
286
+
287
+ # Predict API
288
+
289
+ async def _predict_query_endpoint(self) -> Optional[QueryInfo]:
290
+ if is_cached(self.cache.predict_query_info):
291
+ return self.cache.predict_query_info
292
+
293
+ # calling twice should be avoided as query endpoint is a superset of detect entities
294
+ if is_cached(self.cache.predict_detected_entities):
295
+ logger.warning("Fetcher is not being efficient enough and has called predict twice!")
296
+
297
+ # we can't call get_vectorset, as it would do a recirsive loop between
298
+ # functions, so we'll manually parse it
299
+ vectorset = await self._get_user_vectorset()
300
+ try:
301
+ query_info = await query_information(
302
+ self.kbid,
303
+ self.query,
304
+ vectorset,
305
+ self.generative_model,
306
+ self.rephrase,
307
+ self.rephrase_prompt,
308
+ )
309
+ except SendToPredictError:
310
+ query_info = None
311
+
312
+ self.cache.predict_query_info = query_info
313
+ return query_info
314
+
315
+ async def _predict_detect_entities(self) -> list[utils_pb2.RelationNode]:
316
+ if is_cached(self.cache.predict_detected_entities):
317
+ return self.cache.predict_detected_entities
318
+
319
+ try:
320
+ detected_entities = await detect_entities(self.kbid, self.query)
321
+ except SendToPredictError as ex:
322
+ logger.warning(f"Errors on Predict API detecting entities: {ex}", extra={"kbid": self.kbid})
323
+ detected_entities = []
324
+
325
+ self.cache.predict_detected_entities = detected_entities
326
+ return detected_entities
327
+
328
+
329
+ async def validate_vectorset(kbid: str, vectorset: str):
330
+ async with datamanagers.with_ro_transaction() as txn:
331
+ if not await datamanagers.vectorsets.exists(txn, kbid=kbid, vectorset_id=vectorset):
332
+ raise InvalidQueryError(
333
+ "vectorset", f"Vectorset {vectorset} doesn't exist in you Knowledge Box"
334
+ )
335
+
336
+
337
+ @query_parse_dependency_observer.wrap({"type": "query_information"})
338
+ async def query_information(
339
+ kbid: str,
340
+ query: str,
341
+ semantic_model: Optional[str],
342
+ generative_model: Optional[str] = None,
343
+ rephrase: bool = False,
344
+ rephrase_prompt: Optional[str] = None,
345
+ ) -> QueryInfo:
346
+ predict = get_predict()
347
+ return await predict.query(kbid, query, semantic_model, generative_model, rephrase, rephrase_prompt)
348
+
349
+
350
+ @query_parse_dependency_observer.wrap({"type": "detect_entities"})
351
+ async def detect_entities(kbid: str, query: str) -> list[utils_pb2.RelationNode]:
352
+ predict = get_predict()
353
+ return await predict.detect_entities(kbid, query)
354
+
355
+
356
+ @alru_cache(maxsize=None)
357
+ async def get_matryoshka_dimension_cached(kbid: str, vectorset: Optional[str]) -> Optional[int]:
358
+ # This can be safely cached as the matryoshka dimension is not expected to change
359
+ return await get_matryoshka_dimension(kbid, vectorset)
360
+
361
+
362
+ @query_parse_dependency_observer.wrap({"type": "matryoshka_dimension"})
363
+ async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optional[int]:
364
+ async with get_driver().transaction(read_only=True) as txn:
365
+ matryoshka_dimension = None
366
+ if not vectorset:
367
+ # XXX this should be migrated once we remove the "default" vectorset
368
+ # concept
369
+ matryoshka_dimension = await datamanagers.kb.get_matryoshka_vector_dimension(txn, kbid=kbid)
370
+ else:
371
+ vectorset_config = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset)
372
+ if vectorset_config is not None and vectorset_config.vectorset_index_config.vector_dimension:
373
+ matryoshka_dimension = vectorset_config.vectorset_index_config.vector_dimension
374
+
375
+ return matryoshka_dimension
376
+
377
+
378
+ @query_parse_dependency_observer.wrap({"type": "classification_labels"})
379
+ async def get_classification_labels(kbid: str) -> knowledgebox_pb2.Labels:
380
+ async with get_driver().transaction(read_only=True) as txn:
381
+ return await datamanagers.labels.get_labels(txn, kbid=kbid)
382
+
383
+
384
+ @query_parse_dependency_observer.wrap({"type": "synonyms"})
385
+ async def get_kb_synonyms(kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
386
+ async with get_driver().transaction(read_only=True) as txn:
387
+ return await datamanagers.synonyms.get(txn, kbid=kbid)
388
+
389
+
390
+ @query_parse_dependency_observer.wrap({"type": "entities_meta_cache"})
391
+ async def get_entities_meta_cache(kbid: str) -> datamanagers.entities.EntitiesMetaCache:
392
+ async with get_driver().transaction(read_only=True) as txn:
393
+ return await datamanagers.entities.get_entities_meta_cache(txn, kbid=kbid)
394
+
395
+
396
+ @query_parse_dependency_observer.wrap({"type": "deleted_entities_groups"})
397
+ async def get_deleted_entity_groups(kbid: str) -> list[str]:
398
+ async with get_driver().transaction(read_only=True) as txn:
399
+ return list((await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups)
@@ -26,7 +26,7 @@ from nucliadb.search.search.filters import (
26
26
  convert_to_node_filters,
27
27
  translate_label_filters,
28
28
  )
29
- from nucliadb.search.search.query_parser.exceptions import ParserError
29
+ from nucliadb.search.search.query_parser.exceptions import InternalParserError
30
30
  from nucliadb.search.search.query_parser.models import (
31
31
  CatalogFilters,
32
32
  CatalogQuery,
@@ -50,25 +50,26 @@ from nucliadb_models.search import (
50
50
  )
51
51
 
52
52
 
53
- def parse_find(item: FindRequest) -> UnitRetrieval:
54
- parser = _FindParser(item)
55
- return parser.parse()
53
+ async def parse_find(kbid: str, item: FindRequest) -> UnitRetrieval:
54
+ parser = _FindParser(kbid, item)
55
+ return await parser.parse()
56
56
 
57
57
 
58
58
  class _FindParser:
59
- def __init__(self, item: FindRequest):
59
+ def __init__(self, kbid: str, item: FindRequest):
60
+ self.kbid = kbid
60
61
  self.item = item
61
62
 
62
- def parse(self) -> UnitRetrieval:
63
+ async def parse(self) -> UnitRetrieval:
63
64
  top_k = self._parse_top_k()
64
65
  try:
65
66
  rank_fusion = self._parse_rank_fusion()
66
67
  except ValidationError as exc:
67
- raise ParserError(f"Parsing error in rank fusion: {str(exc)}") from exc
68
+ raise InternalParserError(f"Parsing error in rank fusion: {str(exc)}") from exc
68
69
  try:
69
70
  reranker = self._parse_reranker()
70
71
  except ValidationError as exc:
71
- raise ParserError(f"Parsing error in reranker: {str(exc)}") from exc
72
+ raise InternalParserError(f"Parsing error in reranker: {str(exc)}") from exc
72
73
 
73
74
  # Adjust retrieval windows. Our current implementation assume:
74
75
  # `top_k <= reranker.window <= rank_fusion.window`
@@ -98,7 +99,7 @@ class _FindParser:
98
99
  if self.item.rank_fusion == search_models.RankFusionName.RECIPROCAL_RANK_FUSION:
99
100
  rank_fusion = ReciprocalRankFusion(window=window)
100
101
  else:
101
- raise ParserError(f"Unknown rank fusion algorithm: {self.item.rank_fusion}")
102
+ raise InternalParserError(f"Unknown rank fusion algorithm: {self.item.rank_fusion}")
102
103
 
103
104
  elif isinstance(self.item.rank_fusion, search_models.ReciprocalRankFusion):
104
105
  user_window = self.item.rank_fusion.window
@@ -109,7 +110,7 @@ class _FindParser:
109
110
  )
110
111
 
111
112
  else:
112
- raise ParserError(f"Unknown rank fusion {self.item.rank_fusion}")
113
+ raise InternalParserError(f"Unknown rank fusion {self.item.rank_fusion}")
113
114
 
114
115
  return rank_fusion
115
116
 
@@ -131,14 +132,14 @@ class _FindParser:
131
132
  reranking = PredictReranker(window=min(top_k * 2, 200))
132
133
 
133
134
  else:
134
- raise ParserError(f"Unknown reranker algorithm: {self.item.reranker}")
135
+ raise InternalParserError(f"Unknown reranker algorithm: {self.item.reranker}")
135
136
 
136
137
  elif isinstance(self.item.reranker, search_models.PredictReranker):
137
138
  user_window = self.item.reranker.window
138
139
  reranking = PredictReranker(window=min(max(user_window or 0, top_k), 200))
139
140
 
140
141
  else:
141
- raise ParserError(f"Unknown reranker {self.item.reranker}")
142
+ raise InternalParserError(f"Unknown reranker {self.item.reranker}")
142
143
 
143
144
  return reranking
144
145
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3139
3
+ Version: 6.2.1.post3165
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3139
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3139
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3139
28
- Requires-Dist: nucliadb-models>=6.2.1.post3139
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3165
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3165
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3165
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3165
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -207,10 +207,10 @@ nucliadb/search/requesters/utils.py,sha256=ZTiWDkDihJ7rcvs7itCe8hr6OclVcvu_2EAPF
207
207
  nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
208
208
  nucliadb/search/search/cache.py,sha256=n9vkN6Y6Xnr2RBJyoH0WzjzGTJOMfKekU9tfPTWWCPc,6810
209
209
  nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
210
- nucliadb/search/search/exceptions.py,sha256=mbToQ-ghrv8ukLEv8S_-EZrgweWaIZZ5SIpoeuGDk6s,1154
210
+ nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
211
211
  nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
212
212
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
213
- nucliadb/search/search/find.py,sha256=iJrLR9ML01Z0FZ5FOc80Z9S9IxCjgJiHyTZjwSRREDI,9889
213
+ nucliadb/search/search/find.py,sha256=DaO3CPBQqRAw-iK_DNf_gM-aEipjtuX6oA2TbAplkxs,9901
214
214
  nucliadb/search/search/find_merge.py,sha256=5Aqz54E5GG8jw666KNncVHIJcs821ug-YwJ46YL6Br8,17363
215
215
  nucliadb/search/search/graph_strategy.py,sha256=Egcq_zn895gTUYmyQTsXj8YaUMa3HBKhcSa1GBvgzAM,31877
216
216
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
@@ -219,7 +219,7 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
219
219
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
220
220
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
221
221
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
222
- nucliadb/search/search/query.py,sha256=CbCLdkBgD1XRVXN5qgT1rforf28cLNXvKbjbZzQYmUA,38051
222
+ nucliadb/search/search/query.py,sha256=doRdBhM928wB64v271RSyJxsRT5qd6oevImEMz4gpvw,29487
223
223
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
224
224
  nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
225
225
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
@@ -232,9 +232,10 @@ nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqE
232
232
  nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR86qAVa7k,47106
233
233
  nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeDncSrEp4,15684
234
234
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
- nucliadb/search/search/query_parser/exceptions.py,sha256=tuzl7ZyvVsRz6u0_3zMe60vx39nd3pi641prs-5nC0E,872
235
+ nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
236
+ nucliadb/search/search/query_parser/fetcher.py,sha256=NnzbRIhtg15_N9rw6uNXgPLNOjmO_dv8HMvAskLZ6-g,15496
236
237
  nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
237
- nucliadb/search/search/query_parser/parser.py,sha256=7L7vcEKIum07HeQ6F2EzKCYe3u6CFCgWsu16r91h3S0,6371
238
+ nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
238
239
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
239
240
  nucliadb/standalone/api_router.py,sha256=zR03TQ-Pd2kXx1jeV83Puw19112Z8Jhln7p1cAn69kg,6699
240
241
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -331,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
331
332
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
332
333
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
333
334
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
334
- nucliadb-6.2.1.post3139.dist-info/METADATA,sha256=olyfxfEent03zeFNUA2bZClJgYjD0cPL8ZHVa6xbM50,4603
335
- nucliadb-6.2.1.post3139.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
336
- nucliadb-6.2.1.post3139.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
- nucliadb-6.2.1.post3139.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
- nucliadb-6.2.1.post3139.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
- nucliadb-6.2.1.post3139.dist-info/RECORD,,
335
+ nucliadb-6.2.1.post3165.dist-info/METADATA,sha256=9FA7BAbWWQlT3pJKH0iexO3PiSOl7mpz-PAh8W7kdxs,4603
336
+ nucliadb-6.2.1.post3165.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
337
+ nucliadb-6.2.1.post3165.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
338
+ nucliadb-6.2.1.post3165.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
339
+ nucliadb-6.2.1.post3165.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
340
+ nucliadb-6.2.1.post3165.dist-info/RECORD,,