nucliadb 6.4.2.post4389__py3-none-any.whl → 6.5.0.post4404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. nucliadb/common/exceptions.py +28 -0
  2. nucliadb/{search/search/query_parser → common}/filter_expression.py +1 -1
  3. nucliadb/search/api/v1/catalog.py +1 -1
  4. nucliadb/search/api/v1/find.py +1 -1
  5. nucliadb/search/api/v1/resource/search.py +1 -1
  6. nucliadb/search/api/v1/search.py +1 -1
  7. nucliadb/search/api/v1/suggest.py +1 -1
  8. nucliadb/search/search/chat/ask.py +1 -1
  9. nucliadb/search/search/exceptions.py +0 -2
  10. nucliadb/search/search/filters.py +1 -2
  11. nucliadb/search/search/query.py +2 -2
  12. nucliadb/search/search/query_parser/exceptions.py +0 -9
  13. nucliadb/search/search/query_parser/fetcher.py +1 -1
  14. nucliadb/search/search/query_parser/old_filters.py +1 -1
  15. nucliadb/search/search/query_parser/parsers/catalog.py +2 -2
  16. nucliadb/search/search/query_parser/parsers/common.py +1 -1
  17. nucliadb/search/search/query_parser/parsers/find.py +3 -2
  18. nucliadb/search/search/query_parser/parsers/graph.py +1 -1
  19. nucliadb/search/search/query_parser/parsers/search.py +2 -2
  20. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +1 -1
  21. nucliadb/train/api/utils.py +0 -7
  22. nucliadb/train/api/v1/shards.py +37 -10
  23. nucliadb/train/generator.py +17 -12
  24. nucliadb/train/generators/field_classifier.py +3 -1
  25. nucliadb/train/generators/field_streaming.py +35 -16
  26. nucliadb/train/generators/image_classifier.py +3 -1
  27. nucliadb/train/generators/paragraph_classifier.py +3 -1
  28. nucliadb/train/generators/paragraph_streaming.py +3 -1
  29. nucliadb/train/generators/question_answer_streaming.py +3 -1
  30. nucliadb/train/generators/sentence_classifier.py +3 -1
  31. nucliadb/train/generators/token_classifier.py +3 -1
  32. {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/METADATA +6 -6
  33. {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/RECORD +36 -35
  34. {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/WHEEL +0 -0
  35. {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/entry_points.txt +0 -0
  36. {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+
22
+ class InvalidQueryError(Exception):
23
+ """Raised when parsing a query containing an invalid parameter"""
24
+
25
+ def __init__(self, param: str, reason: str):
26
+ self.param = param
27
+ self.reason = reason
28
+ super().__init__(f"Invalid query. Error in {param}: {reason}")
@@ -23,8 +23,8 @@ from typing import Union
23
23
  from nidx_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
24
24
 
25
25
  from nucliadb.common import datamanagers
26
+ from nucliadb.common.exceptions import InvalidQueryError
26
27
  from nucliadb.common.ids import FIELD_TYPE_NAME_TO_STR
27
- from nucliadb.search.search.exceptions import InvalidQueryError
28
28
  from nucliadb_models.filters import (
29
29
  And,
30
30
  DateCreated,
@@ -26,6 +26,7 @@ from fastapi_versioning import version
26
26
  from pydantic import ValidationError
27
27
 
28
28
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
29
+ from nucliadb.common.exceptions import InvalidQueryError
29
30
  from nucliadb.common.maindb.pg import PGDriver
30
31
  from nucliadb.common.maindb.utils import get_driver
31
32
  from nucliadb.models.responses import HTTPClientError
@@ -33,7 +34,6 @@ from nucliadb.search import logger
33
34
  from nucliadb.search.api.v1.router import KB_PREFIX, api
34
35
  from nucliadb.search.api.v1.utils import fastapi_query
35
36
  from nucliadb.search.search import cache
36
- from nucliadb.search.search.exceptions import InvalidQueryError
37
37
  from nucliadb.search.search.merge import fetch_resources
38
38
  from nucliadb.search.search.pgcatalog import pgcatalog_search
39
39
  from nucliadb.search.search.query_parser.parsers import parse_catalog
@@ -27,12 +27,12 @@ from pydantic import ValidationError
27
27
 
28
28
  from nucliadb.common import datamanagers
29
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
+ from nucliadb.common.exceptions import InvalidQueryError
30
31
  from nucliadb.models.responses import HTTPClientError
31
32
  from nucliadb.search import predict
32
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
33
34
  from nucliadb.search.api.v1.utils import fastapi_query
34
35
  from nucliadb.search.search import cache
35
- from nucliadb.search.search.exceptions import InvalidQueryError
36
36
  from nucliadb.search.search.find import find
37
37
  from nucliadb.search.search.metrics import Metrics
38
38
  from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params
@@ -24,12 +24,12 @@ from fastapi import Header, Request, Response
24
24
  from fastapi_versioning import version
25
25
  from pydantic import ValidationError
26
26
 
27
+ from nucliadb.common.exceptions import InvalidQueryError
27
28
  from nucliadb.models.responses import HTTPClientError
28
29
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
29
30
  from nucliadb.search.api.v1.utils import fastapi_query
30
31
  from nucliadb.search.requesters.utils import Method, nidx_query
31
32
  from nucliadb.search.search import cache
32
- from nucliadb.search.search.exceptions import InvalidQueryError
33
33
  from nucliadb.search.search.merge import merge_paragraphs_results
34
34
  from nucliadb.search.search.query import paragraph_query_to_pb
35
35
  from nucliadb_models.filters import FilterExpression
@@ -27,6 +27,7 @@ from fastapi_versioning import version
27
27
  from pydantic import ValidationError
28
28
 
29
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
+ from nucliadb.common.exceptions import InvalidQueryError
30
31
  from nucliadb.common.models_utils import to_proto
31
32
  from nucliadb.models.responses import HTTPClientError
32
33
  from nucliadb.search import predict
@@ -34,7 +35,6 @@ from nucliadb.search.api.v1.router import KB_PREFIX, api
34
35
  from nucliadb.search.api.v1.utils import fastapi_query
35
36
  from nucliadb.search.requesters.utils import Method, nidx_query
36
37
  from nucliadb.search.search import cache
37
- from nucliadb.search.search.exceptions import InvalidQueryError
38
38
  from nucliadb.search.search.merge import merge_results
39
39
  from nucliadb.search.search.query_parser.parsers.search import parse_search
40
40
  from nucliadb.search.search.query_parser.parsers.unit_retrieval import legacy_convert_retrieval_to_proto
@@ -25,12 +25,12 @@ from fastapi import Header, Request, Response
25
25
  from fastapi_versioning import version
26
26
  from pydantic import ValidationError
27
27
 
28
+ from nucliadb.common.exceptions import InvalidQueryError
28
29
  from nucliadb.models.responses import HTTPClientError
29
30
  from nucliadb.search.api.v1.router import KB_PREFIX, api
30
31
  from nucliadb.search.api.v1.utils import fastapi_query
31
32
  from nucliadb.search.requesters.utils import Method, nidx_query
32
33
  from nucliadb.search.search import cache
33
- from nucliadb.search.search.exceptions import InvalidQueryError
34
34
  from nucliadb.search.search.merge import merge_suggest_results
35
35
  from nucliadb.search.search.query import suggest_query_to_pb
36
36
  from nucliadb.search.search.utils import filter_hidden_resources
@@ -33,6 +33,7 @@ from nuclia_models.predict.generative_responses import (
33
33
  from pydantic_core import ValidationError
34
34
 
35
35
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
36
+ from nucliadb.common.exceptions import InvalidQueryError
36
37
  from nucliadb.common.external_index_providers.base import ScoredTextBlock
37
38
  from nucliadb.common.ids import ParagraphId
38
39
  from nucliadb.models.responses import HTTPClientError
@@ -59,7 +60,6 @@ from nucliadb.search.search.chat.query import (
59
60
  )
60
61
  from nucliadb.search.search.exceptions import (
61
62
  IncompleteFindResultsError,
62
- InvalidQueryError,
63
63
  )
64
64
  from nucliadb.search.search.graph_strategy import get_graph_results
65
65
  from nucliadb.search.search.metrics import AskMetrics, Metrics
@@ -17,8 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- from nucliadb.search.search.query_parser.exceptions import InvalidQueryError as InvalidQueryError
21
-
22
20
 
23
21
  class IncompleteFindResultsError(Exception):
24
22
  pass
@@ -20,12 +20,11 @@
20
20
  from collections.abc import Iterator
21
21
  from typing import Any, Optional, Union
22
22
 
23
+ from nucliadb.common.exceptions import InvalidQueryError
23
24
  from nucliadb_models.labels import translate_alias_to_system_label
24
25
  from nucliadb_models.search import Filter
25
26
  from nucliadb_protos import knowledgebox_pb2
26
27
 
27
- from .exceptions import InvalidQueryError
28
-
29
28
  ENTITY_PREFIX = "/e/"
30
29
  CLASSIFICATION_LABEL_PREFIX = "/l/"
31
30
 
@@ -24,6 +24,8 @@ from nidx_protos import nodereader_pb2
24
24
  from nidx_protos.noderesources_pb2 import Resource
25
25
 
26
26
  from nucliadb.common import datamanagers
27
+ from nucliadb.common.exceptions import InvalidQueryError
28
+ from nucliadb.common.filter_expression import add_and_expression, parse_expression
27
29
  from nucliadb.search.search.filters import (
28
30
  translate_label,
29
31
  )
@@ -38,8 +40,6 @@ from nucliadb_models.search import (
38
40
  )
39
41
  from nucliadb_protos import utils_pb2
40
42
 
41
- from .exceptions import InvalidQueryError
42
- from .query_parser.filter_expression import add_and_expression, parse_expression
43
43
  from .query_parser.old_filters import OldFilterParams, parse_old_filters
44
44
 
45
45
 
@@ -21,12 +21,3 @@
21
21
 
22
22
  class InternalParserError(ValueError):
23
23
  """Raised when parsing fails due to some internal error"""
24
-
25
-
26
- class InvalidQueryError(Exception):
27
- """Raised when parsing a query containing an invalid parameter"""
28
-
29
- def __init__(self, param: str, reason: str):
30
- self.param = param
31
- self.reason = reason
32
- super().__init__(f"Invalid query. Error in {param}: {reason}")
@@ -24,13 +24,13 @@ from async_lru import alru_cache
24
24
  from typing_extensions import TypeIs
25
25
 
26
26
  from nucliadb.common import datamanagers
27
+ from nucliadb.common.exceptions import InvalidQueryError
27
28
  from nucliadb.common.maindb.utils import get_driver
28
29
  from nucliadb.search import logger
29
30
  from nucliadb.search.predict import SendToPredictError, convert_relations
30
31
  from nucliadb.search.search.metrics import (
31
32
  query_parse_dependency_observer,
32
33
  )
33
- from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
34
34
  from nucliadb.search.utilities import get_predict
35
35
  from nucliadb_models.internal.predict import QueryInfo
36
36
  from nucliadb_models.search import (
@@ -24,13 +24,13 @@ from typing import Optional, Union
24
24
 
25
25
  from nidx_protos.nodereader_pb2 import FilterExpression
26
26
 
27
+ from nucliadb.common.exceptions import InvalidQueryError
27
28
  from nucliadb.search.search.filters import translate_label
28
29
  from nucliadb_models.search import (
29
30
  Filter,
30
31
  )
31
32
  from nucliadb_protos import knowledgebox_pb2
32
33
 
33
- from .exceptions import InvalidQueryError
34
34
  from .fetcher import Fetcher
35
35
 
36
36
 
@@ -19,9 +19,9 @@
19
19
  #
20
20
 
21
21
  from nucliadb.common import datamanagers
22
- from nucliadb.search.search.exceptions import InvalidQueryError
22
+ from nucliadb.common.exceptions import InvalidQueryError
23
+ from nucliadb.common.filter_expression import FacetFilterTypes, facet_from_filter
23
24
  from nucliadb.search.search.filters import translate_label
24
- from nucliadb.search.search.query_parser.filter_expression import FacetFilterTypes, facet_from_filter
25
25
  from nucliadb.search.search.query_parser.models import (
26
26
  CatalogExpression,
27
27
  CatalogQuery,
@@ -21,8 +21,8 @@ import re
21
21
  import string
22
22
  from typing import Optional, Union
23
23
 
24
+ from nucliadb.common.exceptions import InvalidQueryError
24
25
  from nucliadb.search import logger
25
- from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
26
26
  from nucliadb.search.search.query_parser.fetcher import Fetcher
27
27
  from nucliadb.search.search.query_parser.models import (
28
28
  KeywordQuery,
@@ -23,12 +23,13 @@ from typing import Optional
23
23
  from nidx_protos import nodereader_pb2
24
24
  from pydantic import ValidationError
25
25
 
26
+ from nucliadb.common.exceptions import InvalidQueryError
27
+ from nucliadb.common.filter_expression import parse_expression
26
28
  from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
27
29
  from nucliadb.search.search.metrics import query_parser_observer
28
30
  from nucliadb.search.search.query import expand_entities
29
- from nucliadb.search.search.query_parser.exceptions import InternalParserError, InvalidQueryError
31
+ from nucliadb.search.search.query_parser.exceptions import InternalParserError
30
32
  from nucliadb.search.search.query_parser.fetcher import Fetcher
31
- from nucliadb.search.search.query_parser.filter_expression import parse_expression
32
33
  from nucliadb.search.search.query_parser.models import (
33
34
  Filters,
34
35
  GraphQuery,
@@ -22,8 +22,8 @@ from typing import Optional, Union
22
22
 
23
23
  from nidx_protos import nodereader_pb2
24
24
 
25
+ from nucliadb.common.filter_expression import add_and_expression, parse_expression
25
26
  from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap, RelationTypeMap
26
- from nucliadb.search.search.query_parser.filter_expression import add_and_expression, parse_expression
27
27
  from nucliadb.search.search.query_parser.models import GraphRetrieval
28
28
  from nucliadb.search.search.utils import filter_hidden_resources
29
29
  from nucliadb_models.graph import requests as graph_requests
@@ -21,11 +21,11 @@ from typing import Optional
21
21
 
22
22
  from nidx_protos import nodereader_pb2
23
23
 
24
+ from nucliadb.common.exceptions import InvalidQueryError
25
+ from nucliadb.common.filter_expression import parse_expression
24
26
  from nucliadb.search.search.metrics import query_parser_observer
25
27
  from nucliadb.search.search.query import expand_entities
26
- from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
27
28
  from nucliadb.search.search.query_parser.fetcher import Fetcher
28
- from nucliadb.search.search.query_parser.filter_expression import parse_expression
29
29
  from nucliadb.search.search.query_parser.models import (
30
30
  Filters,
31
31
  ParsedQuery,
@@ -22,10 +22,10 @@ from typing import Optional
22
22
  from nidx_protos import nodereader_pb2
23
23
  from nidx_protos.nodereader_pb2 import SearchRequest
24
24
 
25
+ from nucliadb.common.filter_expression import add_and_expression
25
26
  from nucliadb.search.search.filters import translate_label
26
27
  from nucliadb.search.search.metrics import node_features, query_parser_observer
27
28
  from nucliadb.search.search.query import apply_entities_filter, get_sort_field_proto
28
- from nucliadb.search.search.query_parser.filter_expression import add_and_expression
29
29
  from nucliadb.search.search.query_parser.models import ParsedQuery, PredictReranker, UnitRetrieval
30
30
  from nucliadb.search.search.query_parser.parsers.graph import parse_path_query
31
31
  from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
@@ -22,7 +22,6 @@
22
22
  from typing import Optional
23
23
 
24
24
  from nucliadb.train.utils import get_shard_manager
25
- from nucliadb_protos.dataset_pb2 import TrainSet
26
25
 
27
26
 
28
27
  async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
@@ -35,9 +34,3 @@ async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
35
34
  if shard.shard.startswith(prefix):
36
35
  valid_shards.append(shard.shard)
37
36
  return valid_shards
38
-
39
-
40
- def get_train(trainset: bytes) -> TrainSet:
41
- train = TrainSet()
42
- train.ParseFromString(trainset)
43
- return train
@@ -17,16 +17,22 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import json
21
+ from typing import Optional
20
22
 
21
-
23
+ import google.protobuf.message
24
+ import pydantic
22
25
  from fastapi import HTTPException, Request
23
26
  from fastapi.responses import StreamingResponse
24
27
  from fastapi_versioning import version
25
28
 
26
- from nucliadb.train.api.utils import get_kb_partitions, get_train
29
+ from nucliadb.train.api.utils import get_kb_partitions
27
30
  from nucliadb.train.api.v1.router import KB_PREFIX, api
28
31
  from nucliadb.train.generator import generate_train_data
32
+ from nucliadb_models.filters import FilterExpression
29
33
  from nucliadb_models.resource import NucliaDBRoles
34
+ from nucliadb_models.trainset import TrainSet as TrainSetModel
35
+ from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
30
36
  from nucliadb_utils.authentication import requires_one
31
37
 
32
38
 
@@ -43,14 +49,35 @@ async def object_get_response(
43
49
  kbid: str,
44
50
  shard: str,
45
51
  ) -> StreamingResponse:
46
- item: bytes = await request.body()
47
- trainset = get_train(item)
48
- all_keys = await get_kb_partitions(kbid, shard)
49
-
50
- if len(all_keys) == 0:
51
- raise HTTPException(status_code=404)
52
-
52
+ partitions = await get_kb_partitions(kbid, shard)
53
+ if shard not in partitions:
54
+ raise HTTPException(status_code=404, detail=f"Partition {shard} not found")
55
+ trainset, filter_expression = await get_trainset(request)
53
56
  return StreamingResponse(
54
- generate_train_data(kbid, shard, trainset),
57
+ generate_train_data(kbid, shard, trainset, filter_expression),
55
58
  media_type="application/octet-stream",
56
59
  )
60
+
61
+
62
+ async def get_trainset(request: Request) -> tuple[TrainSet, Optional[FilterExpression]]:
63
+ if request.headers.get("Content-Type") == "application/json":
64
+ try:
65
+ trainset_model = TrainSetModel.model_validate(await request.json())
66
+ except (pydantic.ValidationError, json.JSONDecodeError, ValueError) as err:
67
+ raise HTTPException(status_code=422, detail=str(err))
68
+ trainset_pb = TrainSet(
69
+ type=TaskType.ValueType(trainset_model.type.value),
70
+ batch_size=trainset_model.batch_size,
71
+ exclude_text=trainset_model.exclude_text,
72
+ )
73
+ filter_expression = trainset_model.filter_expression
74
+ else:
75
+ # Legacy version of the endpoint where the encoded TrainSet protobuf is passed as request body.
76
+ trainset_pb = TrainSet()
77
+ try:
78
+ trainset_pb.ParseFromString(await request.body())
79
+ except google.protobuf.message.DecodeError as err:
80
+ raise HTTPException(status_code=422, detail=str(err))
81
+ # Filter expressions not supported on legacy version of the endpoint
82
+ filter_expression = None
83
+ return trainset_pb, filter_expression
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncIterator, Optional
21
+ from typing import AsyncIterator, Callable, Optional
22
22
 
23
23
  from fastapi import HTTPException
24
24
 
@@ -48,10 +48,15 @@ from nucliadb.train.generators.token_classifier import (
48
48
  from nucliadb.train.settings import settings
49
49
  from nucliadb.train.types import TrainBatch
50
50
  from nucliadb.train.utils import get_shard_manager
51
+ from nucliadb_models.filters import FilterExpression
51
52
  from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
52
53
 
54
+ BatchGenerator = Callable[[str, TrainSet, str, Optional[FilterExpression]], AsyncIterator[TrainBatch]]
53
55
 
54
- async def generate_train_data(kbid: str, shard: str, trainset: TrainSet):
56
+
57
+ async def generate_train_data(
58
+ kbid: str, shard: str, trainset: TrainSet, filter_expression: Optional[FilterExpression] = None
59
+ ):
55
60
  # Get the data structure to generate data
56
61
  shard_manager = get_shard_manager()
57
62
  shard_replica_id = await shard_manager.get_shard_id(kbid, shard)
@@ -59,25 +64,25 @@ async def generate_train_data(kbid: str, shard: str, trainset: TrainSet):
59
64
  if trainset.batch_size == 0:
60
65
  trainset.batch_size = 50
61
66
 
62
- batch_generator: Optional[AsyncIterator[TrainBatch]] = None
67
+ batch_generator: Optional[BatchGenerator] = None
63
68
 
64
69
  if trainset.type == TaskType.FIELD_CLASSIFICATION:
65
- batch_generator = field_classification_batch_generator(kbid, trainset, shard_replica_id)
70
+ batch_generator = field_classification_batch_generator
66
71
  elif trainset.type == TaskType.IMAGE_CLASSIFICATION:
67
- batch_generator = image_classification_batch_generator(kbid, trainset, shard_replica_id)
72
+ batch_generator = image_classification_batch_generator
68
73
  elif trainset.type == TaskType.PARAGRAPH_CLASSIFICATION:
69
- batch_generator = paragraph_classification_batch_generator(kbid, trainset, shard_replica_id)
74
+ batch_generator = paragraph_classification_batch_generator
70
75
  elif trainset.type == TaskType.TOKEN_CLASSIFICATION:
71
- batch_generator = token_classification_batch_generator(kbid, trainset, shard_replica_id)
76
+ batch_generator = token_classification_batch_generator
72
77
  elif trainset.type == TaskType.SENTENCE_CLASSIFICATION:
73
- batch_generator = sentence_classification_batch_generator(kbid, trainset, shard_replica_id)
78
+ batch_generator = sentence_classification_batch_generator
74
79
  elif trainset.type == TaskType.PARAGRAPH_STREAMING:
75
- batch_generator = paragraph_streaming_batch_generator(kbid, trainset, shard_replica_id)
80
+ batch_generator = paragraph_streaming_batch_generator
76
81
 
77
82
  elif trainset.type == TaskType.QUESTION_ANSWER_STREAMING:
78
- batch_generator = question_answer_batch_generator(kbid, trainset, shard_replica_id)
83
+ batch_generator = question_answer_batch_generator
79
84
  elif trainset.type == TaskType.FIELD_STREAMING:
80
- batch_generator = field_streaming_batch_generator(kbid, trainset, shard_replica_id)
85
+ batch_generator = field_streaming_batch_generator
81
86
 
82
87
  if batch_generator is None:
83
88
  raise HTTPException(
@@ -88,7 +93,7 @@ async def generate_train_data(kbid: str, shard: str, trainset: TrainSet):
88
93
  # This cache size is an arbitrary number, once we have a metric in place and
89
94
  # we analyze memory consumption, we can adjust it with more knoweldge
90
95
  with resource_cache(size=settings.resource_cache_size):
91
- async for item in batch_generator:
96
+ async for item in batch_generator(kbid, trainset, shard_replica_id, filter_expression):
92
97
  payload = item.SerializeToString()
93
98
  yield len(payload).to_bytes(4, byteorder="big", signed=False)
94
99
  yield payload
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from nidx_protos.nodereader_pb2 import StreamRequest
24
24
 
@@ -26,6 +26,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
26
26
  from nucliadb.common.nidx import get_nidx_searcher_client
27
27
  from nucliadb.train import logger
28
28
  from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
29
+ from nucliadb_models.filters import FilterExpression
29
30
  from nucliadb_protos.dataset_pb2 import (
30
31
  FieldClassificationBatch,
31
32
  Label,
@@ -38,6 +39,7 @@ def field_classification_batch_generator(
38
39
  kbid: str,
39
40
  trainset: TrainSet,
40
41
  shard_replica_id: str,
42
+ filter_expression: Optional[FilterExpression],
41
43
  ) -> AsyncGenerator[FieldClassificationBatch, None]:
42
44
  generator = generate_field_classification_payloads(kbid, trainset, shard_replica_id)
43
45
  batch_generator = batchify(generator, trainset.batch_size, FieldClassificationBatch)
@@ -23,11 +23,15 @@ from typing import AsyncGenerator, AsyncIterable, Optional
23
23
 
24
24
  from nidx_protos.nodereader_pb2 import DocumentItem, StreamRequest
25
25
 
26
+ from nucliadb.common.filter_expression import parse_expression
26
27
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
27
28
  from nucliadb.common.nidx import get_nidx_searcher_client
28
29
  from nucliadb.train import logger
29
30
  from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
30
31
  from nucliadb.train.settings import settings
32
+ from nucliadb_models.filters import (
33
+ FilterExpression,
34
+ )
31
35
  from nucliadb_protos.dataset_pb2 import (
32
36
  FieldSplitData,
33
37
  FieldStreamingBatch,
@@ -41,32 +45,23 @@ def field_streaming_batch_generator(
41
45
  kbid: str,
42
46
  trainset: TrainSet,
43
47
  shard_replica_id: str,
48
+ filter_expression: Optional[FilterExpression],
44
49
  ) -> AsyncGenerator[FieldStreamingBatch, None]:
45
- generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id)
50
+ generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id, filter_expression)
46
51
  batch_generator = batchify(generator, trainset.batch_size, FieldStreamingBatch)
47
52
  return batch_generator
48
53
 
49
54
 
50
55
  async def generate_field_streaming_payloads(
51
- kbid: str,
52
- trainset: TrainSet,
53
- shard_replica_id: str,
56
+ kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: Optional[FilterExpression]
54
57
  ) -> AsyncGenerator[FieldSplitData, None]:
55
58
  request = StreamRequest()
56
59
  request.shard_id.id = shard_replica_id
57
60
 
58
- for label in trainset.filter.labels:
59
- request.filter.labels.append(f"/l/{label}")
60
- for path in trainset.filter.paths:
61
- request.filter.labels.append(f"/p/{path}")
62
- for metadata in trainset.filter.metadata:
63
- request.filter.labels.append(f"/m/{metadata}")
64
- for entity in trainset.filter.entities:
65
- request.filter.labels.append(f"/e/{entity}")
66
- for field in trainset.filter.fields:
67
- request.filter.labels.append(f"/f/{field}")
68
- for status in trainset.filter.status:
69
- request.filter.labels.append(f"/n/s/{status}")
61
+ if filter_expression:
62
+ await parse_filter_expression(kbid, request, filter_expression)
63
+ else:
64
+ parse_legacy_filters(request, trainset)
70
65
 
71
66
  resources = set()
72
67
  fields = set()
@@ -107,6 +102,30 @@ async def generate_field_streaming_payloads(
107
102
  )
108
103
 
109
104
 
105
+ async def parse_filter_expression(
106
+ kbid: str, request: StreamRequest, filter_expression: FilterExpression
107
+ ):
108
+ if filter_expression.field:
109
+ expr = await parse_expression(filter_expression.field, kbid)
110
+ if expr:
111
+ request.filter_expression.CopyFrom(expr)
112
+
113
+
114
+ def parse_legacy_filters(request: StreamRequest, trainset: TrainSet):
115
+ for label in trainset.filter.labels:
116
+ request.filter.labels.append(f"/l/{label}")
117
+ for path in trainset.filter.paths:
118
+ request.filter.labels.append(f"/p/{path}")
119
+ for metadata in trainset.filter.metadata:
120
+ request.filter.labels.append(f"/m/{metadata}")
121
+ for entity in trainset.filter.entities:
122
+ request.filter.labels.append(f"/e/{entity}")
123
+ for field in trainset.filter.fields:
124
+ request.filter.labels.append(f"/f/{field}")
125
+ for status in trainset.filter.status:
126
+ request.filter.labels.append(f"/n/s/{status}")
127
+
128
+
110
129
  async def iter_field_split_data(
111
130
  request: StreamRequest, kbid: str, trainset: TrainSet, max_parallel: int = 5
112
131
  ) -> AsyncIterable[FieldSplitData]:
@@ -18,9 +18,10 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from nucliadb.train.generators.utils import batchify
24
+ from nucliadb_models.filters import FilterExpression
24
25
  from nucliadb_protos.dataset_pb2 import (
25
26
  ImageClassification,
26
27
  ImageClassificationBatch,
@@ -32,6 +33,7 @@ def image_classification_batch_generator(
32
33
  kbid: str,
33
34
  trainset: TrainSet,
34
35
  shard_replica_id: str,
36
+ filter_expression: Optional[FilterExpression],
35
37
  ) -> AsyncGenerator[ImageClassificationBatch, None]:
36
38
  generator = generate_image_classification_payloads(kbid, trainset, shard_replica_id)
37
39
  batch_generator = batchify(generator, trainset.batch_size, ImageClassificationBatch)
@@ -18,13 +18,14 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from fastapi import HTTPException
24
24
  from nidx_protos.nodereader_pb2 import StreamRequest
25
25
 
26
26
  from nucliadb.common.nidx import get_nidx_searcher_client
27
27
  from nucliadb.train.generators.utils import batchify, get_paragraph
28
+ from nucliadb_models.filters import FilterExpression
28
29
  from nucliadb_protos.dataset_pb2 import (
29
30
  Label,
30
31
  ParagraphClassificationBatch,
@@ -37,6 +38,7 @@ def paragraph_classification_batch_generator(
37
38
  kbid: str,
38
39
  trainset: TrainSet,
39
40
  shard_replica_id: str,
41
+ filter_expression: Optional[FilterExpression],
40
42
  ) -> AsyncGenerator[ParagraphClassificationBatch, None]:
41
43
  if len(trainset.filter.labels) != 1:
42
44
  raise HTTPException(
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from nidx_protos.nodereader_pb2 import StreamRequest
24
24
 
@@ -26,6 +26,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
26
26
  from nucliadb.common.nidx import get_nidx_searcher_client
27
27
  from nucliadb.train import logger
28
28
  from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
29
+ from nucliadb_models.filters import FilterExpression
29
30
  from nucliadb_protos.dataset_pb2 import (
30
31
  ParagraphStreamingBatch,
31
32
  ParagraphStreamItem,
@@ -37,6 +38,7 @@ def paragraph_streaming_batch_generator(
37
38
  kbid: str,
38
39
  trainset: TrainSet,
39
40
  shard_replica_id: str,
41
+ filter_expression: Optional[FilterExpression],
40
42
  ) -> AsyncGenerator[ParagraphStreamingBatch, None]:
41
43
  generator = generate_paragraph_streaming_payloads(kbid, trainset, shard_replica_id)
42
44
  batch_generator = batchify(generator, trainset.batch_size, ParagraphStreamingBatch)
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from nidx_protos.nodereader_pb2 import StreamRequest
24
24
 
@@ -30,6 +30,7 @@ from nucliadb.train.generators.utils import (
30
30
  get_paragraph,
31
31
  get_resource_from_cache_or_db,
32
32
  )
33
+ from nucliadb_models.filters import FilterExpression
33
34
  from nucliadb_protos.dataset_pb2 import (
34
35
  QuestionAnswerStreamingBatch,
35
36
  QuestionAnswerStreamItem,
@@ -46,6 +47,7 @@ def question_answer_batch_generator(
46
47
  kbid: str,
47
48
  trainset: TrainSet,
48
49
  shard_replica_id: str,
50
+ filter_expression: Optional[FilterExpression],
49
51
  ) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]:
50
52
  generator = generate_question_answer_streaming_payloads(kbid, trainset, shard_replica_id)
51
53
  batch_generator = batchify(generator, trainset.batch_size, QuestionAnswerStreamingBatch)
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from typing import AsyncGenerator
21
+ from typing import AsyncGenerator, Optional
22
22
 
23
23
  from fastapi import HTTPException
24
24
  from nidx_protos.nodereader_pb2 import StreamRequest
@@ -27,6 +27,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
27
27
  from nucliadb.common.nidx import get_nidx_searcher_client
28
28
  from nucliadb.train import logger
29
29
  from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
30
+ from nucliadb_models.filters import FilterExpression
30
31
  from nucliadb_protos.dataset_pb2 import (
31
32
  Label,
32
33
  MultipleTextSameLabels,
@@ -39,6 +40,7 @@ def sentence_classification_batch_generator(
39
40
  kbid: str,
40
41
  trainset: TrainSet,
41
42
  shard_replica_id: str,
43
+ filter_expression: Optional[FilterExpression],
42
44
  ) -> AsyncGenerator[SentenceClassificationBatch, None]:
43
45
  if len(trainset.filter.labels) == 0:
44
46
  raise HTTPException(
@@ -19,7 +19,7 @@
19
19
  #
20
20
 
21
21
  from collections import OrderedDict
22
- from typing import AsyncGenerator, cast
22
+ from typing import AsyncGenerator, Optional, cast
23
23
 
24
24
  from nidx_protos.nodereader_pb2 import StreamFilter, StreamRequest
25
25
 
@@ -27,6 +27,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
27
27
  from nucliadb.common.nidx import get_nidx_searcher_client
28
28
  from nucliadb.train import logger
29
29
  from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
30
+ from nucliadb_models.filters import FilterExpression
30
31
  from nucliadb_protos.dataset_pb2 import (
31
32
  TokenClassificationBatch,
32
33
  TokensClassification,
@@ -42,6 +43,7 @@ def token_classification_batch_generator(
42
43
  kbid: str,
43
44
  trainset: TrainSet,
44
45
  shard_replica_id: str,
46
+ filter_expression: Optional[FilterExpression],
45
47
  ) -> AsyncGenerator[TokenClassificationBatch, None]:
46
48
  generator = generate_token_classification_payloads(kbid, trainset, shard_replica_id)
47
49
  batch_generator = batchify(generator, trainset.batch_size, TokenClassificationBatch)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.4.2.post4389
3
+ Version: 6.5.0.post4404
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.4.2.post4389
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.2.post4389
24
- Requires-Dist: nucliadb-protos>=6.4.2.post4389
25
- Requires-Dist: nucliadb-models>=6.4.2.post4389
26
- Requires-Dist: nidx-protos>=6.4.2.post4389
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4404
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4404
24
+ Requires-Dist: nucliadb-protos>=6.5.0.post4404
25
+ Requires-Dist: nucliadb-models>=6.5.0.post4404
26
+ Requires-Dist: nidx-protos>=6.5.0.post4404
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -57,6 +57,8 @@ nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,8
57
57
  nucliadb/common/cache.py,sha256=NM69CVvNjlh58jiVUF1JeYPmBO7_L4rB3tffxK0k_vI,6549
58
58
  nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
59
59
  nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
60
+ nucliadb/common/exceptions.py,sha256=_PJk_NfAhZBFBvmgAfvsJKZ9KuRt5Y1cNsH3-cXE07w,1120
61
+ nucliadb/common/filter_expression.py,sha256=aRbGfg2pci7sUo1KgSqcm72Eu-3Ordy8knnW6DYId5o,6568
60
62
  nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
61
63
  nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
62
64
  nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
@@ -213,30 +215,30 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
213
215
  nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
214
216
  nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
215
217
  nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
216
- nucliadb/search/api/v1/catalog.py,sha256=W0cPWuC27Y4bO7Ifl1VQp8OPYfF5gv5yeWZBsuJMxUU,7721
218
+ nucliadb/search/api/v1/catalog.py,sha256=mVAPPf6CXimVOsBpbhPo63KXf8eXps--cifZOEQAIyk,7714
217
219
  nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
218
- nucliadb/search/api/v1/find.py,sha256=C4sTGFRS9tQFF8v1zhnHQvnExJoGDYi78bZTRfwhGrc,10831
220
+ nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
219
221
  nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
220
222
  nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
221
223
  nucliadb/search/api/v1/predict_proxy.py,sha256=Q03ZTvWp7Sq0x71t5Br4LHxTiYsRd6-GCb4YuKqhynM,3131
222
224
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
223
- nucliadb/search/api/v1/search.py,sha256=bp2JfBO_wiPl7vG3-MXJfqdFfIGwJM3L25UqqGWj4V4,12304
224
- nucliadb/search/api/v1/suggest.py,sha256=GJ7DveD6c9_h0m6NbI7IAvfO2j82TtrGuLg6UF3GBh4,6350
225
+ nucliadb/search/api/v1/search.py,sha256=eqlrvRE7IlMpunNwD1RJwt6RgMV01sIDJLgxxE7CFcE,12297
226
+ nucliadb/search/api/v1/suggest.py,sha256=gaJE60r8-z6TVO05mQRKBITwXn2_ofM3B4-OtpOgZEk,6343
225
227
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
226
228
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
227
229
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
228
230
  nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
229
231
  nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=AZ5_cH1jbf7d5wh_gz6EHLEKAzEOMrQZwEZAu1Q_3FE,4846
230
- nucliadb/search/api/v1/resource/search.py,sha256=Gnn4CY5NO4AK5ZWwrSIRJqBDm16u8k0XtpUwDXEBeYY,4930
232
+ nucliadb/search/api/v1/resource/search.py,sha256=PZR7fs5oYD0RKqKoD38NZMAnOJzBv35NB2YOr2xy1ck,4923
231
233
  nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
232
234
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
233
235
  nucliadb/search/requesters/utils.py,sha256=Ne5fweSWk9hettQKyUZAMZrw_MTjPE5W_EVqj4p5XiI,6109
234
236
  nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
237
  nucliadb/search/search/cache.py,sha256=-6l3i2Qi8ig2SM_FCgOLIaQ48XVj7L5ctd5PdQRY5mY,4458
236
238
  nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
237
- nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
239
+ nucliadb/search/search/exceptions.py,sha256=q6IKlajYRGLx_AVc2DI6gIZLpOY7ydf4EevMr5_2Krw,940
238
240
  nucliadb/search/search/fetch.py,sha256=eiljOKim-4OOEZn-3fyVZSYxztCH156BXYdqlIwVdN4,6181
239
- nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
241
+ nucliadb/search/search/filters.py,sha256=vZnbf3BjYuDkEQcBeLX_GDkq3Ahbbb7pLJ6DJU9z-QE,6490
240
242
  nucliadb/search/search/find.py,sha256=ZocoQNN28OHOmMaroGVFCnce3YHPZbFb1-9jxLNHSFM,7805
241
243
  nucliadb/search/search/find_merge.py,sha256=c-7IlfjfdmWAvQOyM7IO3bKS1EQpnR4oi6pN6mwrQKw,19815
242
244
  nucliadb/search/search/graph_merge.py,sha256=y5V7X-BhjHsKDXE69tzQLIIKGm4XuaFrZXw0odcHVNM,3402
@@ -248,32 +250,31 @@ nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCi
248
250
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
249
251
  nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
250
252
  nucliadb/search/search/predict_proxy.py,sha256=JwgBeEg1j4LnCjPCvTUrnmOd9LceJAt3iAu4m9cmJBo,3390
251
- nucliadb/search/search/query.py,sha256=-gvKsyGmKYpsoEVzKkq3HJUMcs_3LD3TYUueOcJsTec,11511
253
+ nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
252
254
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
253
255
  nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
254
256
  nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
255
257
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
256
258
  nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
257
259
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
258
- nucliadb/search/search/chat/ask.py,sha256=aaNj0MeAbx9dyeKpQJdm3VsHMq9OmcCESxahbgSxvCk,37805
260
+ nucliadb/search/search/chat/ask.py,sha256=GFxUh6KvqbidXmtvzgA7trVwF9xNPLcPDCD4IlqvTmI,37839
259
261
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
260
262
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
261
263
  nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
262
264
  nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
263
265
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
264
- nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
265
- nucliadb/search/search/query_parser/fetcher.py,sha256=SkvBRDfSKmuz-QygNKLAU4AhZhhDo1dnOZmt1zA28RA,16851
266
- nucliadb/search/search/query_parser/filter_expression.py,sha256=fZI8qFRF3h2sa01gwPmDlA5c16mq7ShMOBk-rDaA_fE,6575
266
+ nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
267
+ nucliadb/search/search/query_parser/fetcher.py,sha256=nP4EySj2BvH10QgCvgzvp13Nf22wwfHsdLbDoPlH2cQ,16831
267
268
  nucliadb/search/search/query_parser/models.py,sha256=k9cCjTpndP9ynr8A9J8MBmDYmjLBKL1UM4L0GXVuJw0,5031
268
- nucliadb/search/search/query_parser/old_filters.py,sha256=0NKjRdzAn2bH6veG0M-xM9BNKEYwa4U6WXtZzJAWRvo,9068
269
+ nucliadb/search/search/query_parser/old_filters.py,sha256=HircRqYEac_90bNCtFIJZ2RKA90kjbpNOQcp_ArBqR0,9083
269
270
  nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
270
271
  nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
271
- nucliadb/search/search/query_parser/parsers/catalog.py,sha256=XdBiTweGTQkj8m_V_i2xbwp7P5pPO8K1Tud692XKhMw,7149
272
- nucliadb/search/search/query_parser/parsers/common.py,sha256=o3028wUnK78lOmFK0jtmpvx2Y1Jh_atBYBoO5VD-qJ4,6359
273
- nucliadb/search/search/query_parser/parsers/find.py,sha256=Fo4lXOnCbP0AKEc1mKLNINJBv63B4DPlix0vlhyesck,12717
274
- nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyMogyPMgB5xc8obNY2kqz3yGU,9405
275
- nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
276
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
272
+ nucliadb/search/search/query_parser/parsers/catalog.py,sha256=e89kh3nGV9JT9wjdWB8JbC2HPydn0rVk7WsKBo6q3gw,7122
273
+ nucliadb/search/search/query_parser/parsers/common.py,sha256=mJMPOKurBK7-A7s3oNlPLxHP_yIn4j5Uw8rh_OQtzS4,6339
274
+ nucliadb/search/search/query_parser/parsers/find.py,sha256=lHVspg-i_eWXvu7BT9WfuFVGVKYhr380y4tDX5yfTD4,12735
275
+ nucliadb/search/search/query_parser/parsers/graph.py,sha256=zyqdUg5Afmhb2_-hvj9FUCaoLh026MUP1fgY2j-lD7c,9385
276
+ nucliadb/search/search/query_parser/parsers/search.py,sha256=huhz3lk6y4n7fcHU2XB-90Q34sXIwP5-cwp2rqFtPh8,10477
277
+ nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
277
278
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
278
279
  nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
279
280
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -301,7 +302,7 @@ nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
301
302
  nucliadb/tests/vectors.py,sha256=CcNKx-E8LPpyvRyljbmb-Tn_wST9Juw2CBoogWrKiTk,62843
302
303
  nucliadb/train/__init__.py,sha256=NVwe5yULoHXb80itIJT8YJYEz2xbiOPQ7_OMys6XJw8,1301
303
304
  nucliadb/train/app.py,sha256=z6xlGVVVaJmZZmLPIVTgkjD-wIz5b0NYlXAQp7hBHYw,2652
304
- nucliadb/train/generator.py,sha256=UHcvBtzNIGtqFR_Rnn3zH4yQkp0koXiC2hFFTgKgRco,4155
305
+ nucliadb/train/generator.py,sha256=fwFYal7VsV0EP7J_g3IOJ-WLpjwqrVo0gEP7vxIlxGs,4152
305
306
  nucliadb/train/lifecycle.py,sha256=3HadM4GRsYb2m-v4jtdr9C-KBEBx8GlrJDArPYi3SWQ,1960
306
307
  nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
307
308
  nucliadb/train/nodes.py,sha256=6QD9ZnAacPyFCzs1jxJzsL3CkVNyADvsfnPjjrvn-NU,5610
@@ -315,20 +316,20 @@ nucliadb/train/upload.py,sha256=fTjH1KEL-0ogf3LV0T6ODO0QdPGwdZShSUtFUCAcUlA,3256
315
316
  nucliadb/train/uploader.py,sha256=xdLGz1ToDue9Q_M8A-_KYkO-V6fWKYOZQ6IGM4FuwWA,6424
316
317
  nucliadb/train/utils.py,sha256=OxQ8No19nxOxUhwNYDICNt8n2H-gowkAgu2Vt3Hatzk,3163
317
318
  nucliadb/train/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
318
- nucliadb/train/api/utils.py,sha256=xxTZml7yGstWwnEtjEtdus4iZXX9G1WtK6Z6Bf2f59Y,1478
319
+ nucliadb/train/api/utils.py,sha256=1E5h1dS3dLppXD4k6qH6jmaY0WYa_ChaNHiTW9y336Q,1307
319
320
  nucliadb/train/api/v1/__init__.py,sha256=P4vCIv93r_Cq1WFDDNjy_Wg7zBkzx0S4euXwfPy1LA4,928
320
321
  nucliadb/train/api/v1/router.py,sha256=ukdxn5q1oMar6NSPobgJczWsSxLCHw6DYKlb3zwCiSo,910
321
- nucliadb/train/api/v1/shards.py,sha256=GJRnQe8P-7_VTIN1oxVmxlrDA08qVN7opEZdbF4WxDk,1892
322
+ nucliadb/train/api/v1/shards.py,sha256=olgjQFKeYSSCbpYhglCRq6Q9WsM9hxOZgk6mOBb4g0o,3367
322
323
  nucliadb/train/api/v1/trainset.py,sha256=kpnpDgiMWr1FKHZJgwH7hue5kzilA8-i9X0YHlNeHuU,2113
323
324
  nucliadb/train/generators/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
324
- nucliadb/train/generators/field_classifier.py,sha256=xUA10o9CtBtilbP3uc-8Wn_zQ0oK3BrqYGqZgxh4ZLk,3428
325
- nucliadb/train/generators/field_streaming.py,sha256=fq4XxHf5fPLccPjO722tA9Xcw6smmceVqSme0fY2_NA,7631
326
- nucliadb/train/generators/image_classifier.py,sha256=BDXgyd5TGZRnzDnVRvp-qsRCuoTbTYwui3JiDIjuiDc,1736
327
- nucliadb/train/generators/paragraph_classifier.py,sha256=4sH3IQc7yJrlDs1C76SxFzL9N5mXWRZzJzoiF7y4dSQ,2703
328
- nucliadb/train/generators/paragraph_streaming.py,sha256=1xsc_IqP-1M0TzYTqu5qCvWBNp_J3Kyvnx8HVbToXmQ,3532
329
- nucliadb/train/generators/question_answer_streaming.py,sha256=yZZD0GpuHdV-BT4O8CV1sYVDk8ri8yGPVhfjSp3FTBQ,5626
330
- nucliadb/train/generators/sentence_classifier.py,sha256=bp-UeIbZ0vm4ujbTbZnqdI0JWijuMi53cmeLbP7dofI,5063
331
- nucliadb/train/generators/token_classifier.py,sha256=DdyMbrpxIVGWdTcz3SEN_3HwxKffUV3JGyTZzlCET8c,9503
325
+ nucliadb/train/generators/field_classifier.py,sha256=UcA5snqLNjIHw0VBzXo9ZtSua6o7wBU3tV9_d5qWpRA,3542
326
+ nucliadb/train/generators/field_streaming.py,sha256=p0xu39D5gaSQc-LagKwpgsVaxm2ULTkWZDPi-Ad1lHc,8378
327
+ nucliadb/train/generators/image_classifier.py,sha256=46YShcl7nf1_iLXZklWTAFvUNIIPulBwHlc9Y1ZJHZU,1850
328
+ nucliadb/train/generators/paragraph_classifier.py,sha256=Jk3B8a2zkAf-2-59RECiNmNP1Nz7f0-hfnS4rIE69xA,2817
329
+ nucliadb/train/generators/paragraph_streaming.py,sha256=axGNYjOTgxGsOcAAvCz_rTYzZCdZf0f1s-Hzn-VvIX0,3646
330
+ nucliadb/train/generators/question_answer_streaming.py,sha256=YXjWQc_SJ_TyXXJRg8tGkC9IeIzbRDcn0spQw9AlRNY,5740
331
+ nucliadb/train/generators/sentence_classifier.py,sha256=4JBGtInnWUQrM9wBd-P7Z2lTK1Dka6U3PPDqnLbGnFM,5177
332
+ nucliadb/train/generators/token_classifier.py,sha256=T8JOVR1vv5g7rn7HtcQcIZ3O5TFqh15uv5rOselLBVo,9617
332
333
  nucliadb/train/generators/utils.py,sha256=ZNwvEVPZr-eP0MW3ABN7a11hPQKaa0NdVaRcgBcTp5w,3601
333
334
  nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
334
335
  nucliadb/writer/app.py,sha256=ABBO8-u4pDAa61b3mCdD0TFhuHAYcxMkgpZSGgWARuE,2736
@@ -368,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
368
369
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
369
370
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
370
371
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
371
- nucliadb-6.4.2.post4389.dist-info/METADATA,sha256=aeKKQWQ-B_KzWiVM1ZY8kXfO6wa0uJTQI0OPUiaNQPk,4152
372
- nucliadb-6.4.2.post4389.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
373
- nucliadb-6.4.2.post4389.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
374
- nucliadb-6.4.2.post4389.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
375
- nucliadb-6.4.2.post4389.dist-info/RECORD,,
372
+ nucliadb-6.5.0.post4404.dist-info/METADATA,sha256=5sqpnNr9UzsK7TlG4IxnM1bXMwocq2mCwi-dDOBvb4M,4152
373
+ nucliadb-6.5.0.post4404.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
374
+ nucliadb-6.5.0.post4404.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
375
+ nucliadb-6.5.0.post4404.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
376
+ nucliadb-6.5.0.post4404.dist-info/RECORD,,