nucliadb 6.4.2.post4389__py3-none-any.whl → 6.5.0.post4404__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/exceptions.py +28 -0
- nucliadb/{search/search/query_parser → common}/filter_expression.py +1 -1
- nucliadb/search/api/v1/catalog.py +1 -1
- nucliadb/search/api/v1/find.py +1 -1
- nucliadb/search/api/v1/resource/search.py +1 -1
- nucliadb/search/api/v1/search.py +1 -1
- nucliadb/search/api/v1/suggest.py +1 -1
- nucliadb/search/search/chat/ask.py +1 -1
- nucliadb/search/search/exceptions.py +0 -2
- nucliadb/search/search/filters.py +1 -2
- nucliadb/search/search/query.py +2 -2
- nucliadb/search/search/query_parser/exceptions.py +0 -9
- nucliadb/search/search/query_parser/fetcher.py +1 -1
- nucliadb/search/search/query_parser/old_filters.py +1 -1
- nucliadb/search/search/query_parser/parsers/catalog.py +2 -2
- nucliadb/search/search/query_parser/parsers/common.py +1 -1
- nucliadb/search/search/query_parser/parsers/find.py +3 -2
- nucliadb/search/search/query_parser/parsers/graph.py +1 -1
- nucliadb/search/search/query_parser/parsers/search.py +2 -2
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +1 -1
- nucliadb/train/api/utils.py +0 -7
- nucliadb/train/api/v1/shards.py +37 -10
- nucliadb/train/generator.py +17 -12
- nucliadb/train/generators/field_classifier.py +3 -1
- nucliadb/train/generators/field_streaming.py +35 -16
- nucliadb/train/generators/image_classifier.py +3 -1
- nucliadb/train/generators/paragraph_classifier.py +3 -1
- nucliadb/train/generators/paragraph_streaming.py +3 -1
- nucliadb/train/generators/question_answer_streaming.py +3 -1
- nucliadb/train/generators/sentence_classifier.py +3 -1
- nucliadb/train/generators/token_classifier.py +3 -1
- {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/METADATA +6 -6
- {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/RECORD +36 -35
- {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/WHEEL +0 -0
- {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.4.2.post4389.dist-info → nucliadb-6.5.0.post4404.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
|
22
|
+
class InvalidQueryError(Exception):
|
23
|
+
"""Raised when parsing a query containing an invalid parameter"""
|
24
|
+
|
25
|
+
def __init__(self, param: str, reason: str):
|
26
|
+
self.param = param
|
27
|
+
self.reason = reason
|
28
|
+
super().__init__(f"Invalid query. Error in {param}: {reason}")
|
@@ -23,8 +23,8 @@ from typing import Union
|
|
23
23
|
from nidx_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
|
24
24
|
|
25
25
|
from nucliadb.common import datamanagers
|
26
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
26
27
|
from nucliadb.common.ids import FIELD_TYPE_NAME_TO_STR
|
27
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
28
28
|
from nucliadb_models.filters import (
|
29
29
|
And,
|
30
30
|
DateCreated,
|
@@ -26,6 +26,7 @@ from fastapi_versioning import version
|
|
26
26
|
from pydantic import ValidationError
|
27
27
|
|
28
28
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
29
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
29
30
|
from nucliadb.common.maindb.pg import PGDriver
|
30
31
|
from nucliadb.common.maindb.utils import get_driver
|
31
32
|
from nucliadb.models.responses import HTTPClientError
|
@@ -33,7 +34,6 @@ from nucliadb.search import logger
|
|
33
34
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
34
35
|
from nucliadb.search.api.v1.utils import fastapi_query
|
35
36
|
from nucliadb.search.search import cache
|
36
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
37
37
|
from nucliadb.search.search.merge import fetch_resources
|
38
38
|
from nucliadb.search.search.pgcatalog import pgcatalog_search
|
39
39
|
from nucliadb.search.search.query_parser.parsers import parse_catalog
|
nucliadb/search/api/v1/find.py
CHANGED
@@ -27,12 +27,12 @@ from pydantic import ValidationError
|
|
27
27
|
|
28
28
|
from nucliadb.common import datamanagers
|
29
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
30
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
30
31
|
from nucliadb.models.responses import HTTPClientError
|
31
32
|
from nucliadb.search import predict
|
32
33
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
33
34
|
from nucliadb.search.api.v1.utils import fastapi_query
|
34
35
|
from nucliadb.search.search import cache
|
35
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
36
36
|
from nucliadb.search.search.find import find
|
37
37
|
from nucliadb.search.search.metrics import Metrics
|
38
38
|
from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params
|
@@ -24,12 +24,12 @@ from fastapi import Header, Request, Response
|
|
24
24
|
from fastapi_versioning import version
|
25
25
|
from pydantic import ValidationError
|
26
26
|
|
27
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
27
28
|
from nucliadb.models.responses import HTTPClientError
|
28
29
|
from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
|
29
30
|
from nucliadb.search.api.v1.utils import fastapi_query
|
30
31
|
from nucliadb.search.requesters.utils import Method, nidx_query
|
31
32
|
from nucliadb.search.search import cache
|
32
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
33
33
|
from nucliadb.search.search.merge import merge_paragraphs_results
|
34
34
|
from nucliadb.search.search.query import paragraph_query_to_pb
|
35
35
|
from nucliadb_models.filters import FilterExpression
|
nucliadb/search/api/v1/search.py
CHANGED
@@ -27,6 +27,7 @@ from fastapi_versioning import version
|
|
27
27
|
from pydantic import ValidationError
|
28
28
|
|
29
29
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
30
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
30
31
|
from nucliadb.common.models_utils import to_proto
|
31
32
|
from nucliadb.models.responses import HTTPClientError
|
32
33
|
from nucliadb.search import predict
|
@@ -34,7 +35,6 @@ from nucliadb.search.api.v1.router import KB_PREFIX, api
|
|
34
35
|
from nucliadb.search.api.v1.utils import fastapi_query
|
35
36
|
from nucliadb.search.requesters.utils import Method, nidx_query
|
36
37
|
from nucliadb.search.search import cache
|
37
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
38
38
|
from nucliadb.search.search.merge import merge_results
|
39
39
|
from nucliadb.search.search.query_parser.parsers.search import parse_search
|
40
40
|
from nucliadb.search.search.query_parser.parsers.unit_retrieval import legacy_convert_retrieval_to_proto
|
@@ -25,12 +25,12 @@ from fastapi import Header, Request, Response
|
|
25
25
|
from fastapi_versioning import version
|
26
26
|
from pydantic import ValidationError
|
27
27
|
|
28
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
28
29
|
from nucliadb.models.responses import HTTPClientError
|
29
30
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
30
31
|
from nucliadb.search.api.v1.utils import fastapi_query
|
31
32
|
from nucliadb.search.requesters.utils import Method, nidx_query
|
32
33
|
from nucliadb.search.search import cache
|
33
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
34
34
|
from nucliadb.search.search.merge import merge_suggest_results
|
35
35
|
from nucliadb.search.search.query import suggest_query_to_pb
|
36
36
|
from nucliadb.search.search.utils import filter_hidden_resources
|
@@ -33,6 +33,7 @@ from nuclia_models.predict.generative_responses import (
|
|
33
33
|
from pydantic_core import ValidationError
|
34
34
|
|
35
35
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
36
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
36
37
|
from nucliadb.common.external_index_providers.base import ScoredTextBlock
|
37
38
|
from nucliadb.common.ids import ParagraphId
|
38
39
|
from nucliadb.models.responses import HTTPClientError
|
@@ -59,7 +60,6 @@ from nucliadb.search.search.chat.query import (
|
|
59
60
|
)
|
60
61
|
from nucliadb.search.search.exceptions import (
|
61
62
|
IncompleteFindResultsError,
|
62
|
-
InvalidQueryError,
|
63
63
|
)
|
64
64
|
from nucliadb.search.search.graph_strategy import get_graph_results
|
65
65
|
from nucliadb.search.search.metrics import AskMetrics, Metrics
|
@@ -17,8 +17,6 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
|
20
|
-
from nucliadb.search.search.query_parser.exceptions import InvalidQueryError as InvalidQueryError
|
21
|
-
|
22
20
|
|
23
21
|
class IncompleteFindResultsError(Exception):
|
24
22
|
pass
|
@@ -20,12 +20,11 @@
|
|
20
20
|
from collections.abc import Iterator
|
21
21
|
from typing import Any, Optional, Union
|
22
22
|
|
23
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
23
24
|
from nucliadb_models.labels import translate_alias_to_system_label
|
24
25
|
from nucliadb_models.search import Filter
|
25
26
|
from nucliadb_protos import knowledgebox_pb2
|
26
27
|
|
27
|
-
from .exceptions import InvalidQueryError
|
28
|
-
|
29
28
|
ENTITY_PREFIX = "/e/"
|
30
29
|
CLASSIFICATION_LABEL_PREFIX = "/l/"
|
31
30
|
|
nucliadb/search/search/query.py
CHANGED
@@ -24,6 +24,8 @@ from nidx_protos import nodereader_pb2
|
|
24
24
|
from nidx_protos.noderesources_pb2 import Resource
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers
|
27
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
28
|
+
from nucliadb.common.filter_expression import add_and_expression, parse_expression
|
27
29
|
from nucliadb.search.search.filters import (
|
28
30
|
translate_label,
|
29
31
|
)
|
@@ -38,8 +40,6 @@ from nucliadb_models.search import (
|
|
38
40
|
)
|
39
41
|
from nucliadb_protos import utils_pb2
|
40
42
|
|
41
|
-
from .exceptions import InvalidQueryError
|
42
|
-
from .query_parser.filter_expression import add_and_expression, parse_expression
|
43
43
|
from .query_parser.old_filters import OldFilterParams, parse_old_filters
|
44
44
|
|
45
45
|
|
@@ -21,12 +21,3 @@
|
|
21
21
|
|
22
22
|
class InternalParserError(ValueError):
|
23
23
|
"""Raised when parsing fails due to some internal error"""
|
24
|
-
|
25
|
-
|
26
|
-
class InvalidQueryError(Exception):
|
27
|
-
"""Raised when parsing a query containing an invalid parameter"""
|
28
|
-
|
29
|
-
def __init__(self, param: str, reason: str):
|
30
|
-
self.param = param
|
31
|
-
self.reason = reason
|
32
|
-
super().__init__(f"Invalid query. Error in {param}: {reason}")
|
@@ -24,13 +24,13 @@ from async_lru import alru_cache
|
|
24
24
|
from typing_extensions import TypeIs
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers
|
27
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
27
28
|
from nucliadb.common.maindb.utils import get_driver
|
28
29
|
from nucliadb.search import logger
|
29
30
|
from nucliadb.search.predict import SendToPredictError, convert_relations
|
30
31
|
from nucliadb.search.search.metrics import (
|
31
32
|
query_parse_dependency_observer,
|
32
33
|
)
|
33
|
-
from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
|
34
34
|
from nucliadb.search.utilities import get_predict
|
35
35
|
from nucliadb_models.internal.predict import QueryInfo
|
36
36
|
from nucliadb_models.search import (
|
@@ -24,13 +24,13 @@ from typing import Optional, Union
|
|
24
24
|
|
25
25
|
from nidx_protos.nodereader_pb2 import FilterExpression
|
26
26
|
|
27
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
27
28
|
from nucliadb.search.search.filters import translate_label
|
28
29
|
from nucliadb_models.search import (
|
29
30
|
Filter,
|
30
31
|
)
|
31
32
|
from nucliadb_protos import knowledgebox_pb2
|
32
33
|
|
33
|
-
from .exceptions import InvalidQueryError
|
34
34
|
from .fetcher import Fetcher
|
35
35
|
|
36
36
|
|
@@ -19,9 +19,9 @@
|
|
19
19
|
#
|
20
20
|
|
21
21
|
from nucliadb.common import datamanagers
|
22
|
-
from nucliadb.
|
22
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
23
|
+
from nucliadb.common.filter_expression import FacetFilterTypes, facet_from_filter
|
23
24
|
from nucliadb.search.search.filters import translate_label
|
24
|
-
from nucliadb.search.search.query_parser.filter_expression import FacetFilterTypes, facet_from_filter
|
25
25
|
from nucliadb.search.search.query_parser.models import (
|
26
26
|
CatalogExpression,
|
27
27
|
CatalogQuery,
|
@@ -21,8 +21,8 @@ import re
|
|
21
21
|
import string
|
22
22
|
from typing import Optional, Union
|
23
23
|
|
24
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
24
25
|
from nucliadb.search import logger
|
25
|
-
from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
|
26
26
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
27
27
|
from nucliadb.search.search.query_parser.models import (
|
28
28
|
KeywordQuery,
|
@@ -23,12 +23,13 @@ from typing import Optional
|
|
23
23
|
from nidx_protos import nodereader_pb2
|
24
24
|
from pydantic import ValidationError
|
25
25
|
|
26
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
27
|
+
from nucliadb.common.filter_expression import parse_expression
|
26
28
|
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
|
27
29
|
from nucliadb.search.search.metrics import query_parser_observer
|
28
30
|
from nucliadb.search.search.query import expand_entities
|
29
|
-
from nucliadb.search.search.query_parser.exceptions import InternalParserError
|
31
|
+
from nucliadb.search.search.query_parser.exceptions import InternalParserError
|
30
32
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
31
|
-
from nucliadb.search.search.query_parser.filter_expression import parse_expression
|
32
33
|
from nucliadb.search.search.query_parser.models import (
|
33
34
|
Filters,
|
34
35
|
GraphQuery,
|
@@ -22,8 +22,8 @@ from typing import Optional, Union
|
|
22
22
|
|
23
23
|
from nidx_protos import nodereader_pb2
|
24
24
|
|
25
|
+
from nucliadb.common.filter_expression import add_and_expression, parse_expression
|
25
26
|
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap, RelationTypeMap
|
26
|
-
from nucliadb.search.search.query_parser.filter_expression import add_and_expression, parse_expression
|
27
27
|
from nucliadb.search.search.query_parser.models import GraphRetrieval
|
28
28
|
from nucliadb.search.search.utils import filter_hidden_resources
|
29
29
|
from nucliadb_models.graph import requests as graph_requests
|
@@ -21,11 +21,11 @@ from typing import Optional
|
|
21
21
|
|
22
22
|
from nidx_protos import nodereader_pb2
|
23
23
|
|
24
|
+
from nucliadb.common.exceptions import InvalidQueryError
|
25
|
+
from nucliadb.common.filter_expression import parse_expression
|
24
26
|
from nucliadb.search.search.metrics import query_parser_observer
|
25
27
|
from nucliadb.search.search.query import expand_entities
|
26
|
-
from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
|
27
28
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
28
|
-
from nucliadb.search.search.query_parser.filter_expression import parse_expression
|
29
29
|
from nucliadb.search.search.query_parser.models import (
|
30
30
|
Filters,
|
31
31
|
ParsedQuery,
|
@@ -22,10 +22,10 @@ from typing import Optional
|
|
22
22
|
from nidx_protos import nodereader_pb2
|
23
23
|
from nidx_protos.nodereader_pb2 import SearchRequest
|
24
24
|
|
25
|
+
from nucliadb.common.filter_expression import add_and_expression
|
25
26
|
from nucliadb.search.search.filters import translate_label
|
26
27
|
from nucliadb.search.search.metrics import node_features, query_parser_observer
|
27
28
|
from nucliadb.search.search.query import apply_entities_filter, get_sort_field_proto
|
28
|
-
from nucliadb.search.search.query_parser.filter_expression import add_and_expression
|
29
29
|
from nucliadb.search.search.query_parser.models import ParsedQuery, PredictReranker, UnitRetrieval
|
30
30
|
from nucliadb.search.search.query_parser.parsers.graph import parse_path_query
|
31
31
|
from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
|
nucliadb/train/api/utils.py
CHANGED
@@ -22,7 +22,6 @@
|
|
22
22
|
from typing import Optional
|
23
23
|
|
24
24
|
from nucliadb.train.utils import get_shard_manager
|
25
|
-
from nucliadb_protos.dataset_pb2 import TrainSet
|
26
25
|
|
27
26
|
|
28
27
|
async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
|
@@ -35,9 +34,3 @@ async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
|
|
35
34
|
if shard.shard.startswith(prefix):
|
36
35
|
valid_shards.append(shard.shard)
|
37
36
|
return valid_shards
|
38
|
-
|
39
|
-
|
40
|
-
def get_train(trainset: bytes) -> TrainSet:
|
41
|
-
train = TrainSet()
|
42
|
-
train.ParseFromString(trainset)
|
43
|
-
return train
|
nucliadb/train/api/v1/shards.py
CHANGED
@@ -17,16 +17,22 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
import json
|
21
|
+
from typing import Optional
|
20
22
|
|
21
|
-
|
23
|
+
import google.protobuf.message
|
24
|
+
import pydantic
|
22
25
|
from fastapi import HTTPException, Request
|
23
26
|
from fastapi.responses import StreamingResponse
|
24
27
|
from fastapi_versioning import version
|
25
28
|
|
26
|
-
from nucliadb.train.api.utils import get_kb_partitions
|
29
|
+
from nucliadb.train.api.utils import get_kb_partitions
|
27
30
|
from nucliadb.train.api.v1.router import KB_PREFIX, api
|
28
31
|
from nucliadb.train.generator import generate_train_data
|
32
|
+
from nucliadb_models.filters import FilterExpression
|
29
33
|
from nucliadb_models.resource import NucliaDBRoles
|
34
|
+
from nucliadb_models.trainset import TrainSet as TrainSetModel
|
35
|
+
from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
|
30
36
|
from nucliadb_utils.authentication import requires_one
|
31
37
|
|
32
38
|
|
@@ -43,14 +49,35 @@ async def object_get_response(
|
|
43
49
|
kbid: str,
|
44
50
|
shard: str,
|
45
51
|
) -> StreamingResponse:
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
if len(all_keys) == 0:
|
51
|
-
raise HTTPException(status_code=404)
|
52
|
-
|
52
|
+
partitions = await get_kb_partitions(kbid, shard)
|
53
|
+
if shard not in partitions:
|
54
|
+
raise HTTPException(status_code=404, detail=f"Partition {shard} not found")
|
55
|
+
trainset, filter_expression = await get_trainset(request)
|
53
56
|
return StreamingResponse(
|
54
|
-
generate_train_data(kbid, shard, trainset),
|
57
|
+
generate_train_data(kbid, shard, trainset, filter_expression),
|
55
58
|
media_type="application/octet-stream",
|
56
59
|
)
|
60
|
+
|
61
|
+
|
62
|
+
async def get_trainset(request: Request) -> tuple[TrainSet, Optional[FilterExpression]]:
|
63
|
+
if request.headers.get("Content-Type") == "application/json":
|
64
|
+
try:
|
65
|
+
trainset_model = TrainSetModel.model_validate(await request.json())
|
66
|
+
except (pydantic.ValidationError, json.JSONDecodeError, ValueError) as err:
|
67
|
+
raise HTTPException(status_code=422, detail=str(err))
|
68
|
+
trainset_pb = TrainSet(
|
69
|
+
type=TaskType.ValueType(trainset_model.type.value),
|
70
|
+
batch_size=trainset_model.batch_size,
|
71
|
+
exclude_text=trainset_model.exclude_text,
|
72
|
+
)
|
73
|
+
filter_expression = trainset_model.filter_expression
|
74
|
+
else:
|
75
|
+
# Legacy version of the endpoint where the encoded TrainSet protobuf is passed as request body.
|
76
|
+
trainset_pb = TrainSet()
|
77
|
+
try:
|
78
|
+
trainset_pb.ParseFromString(await request.body())
|
79
|
+
except google.protobuf.message.DecodeError as err:
|
80
|
+
raise HTTPException(status_code=422, detail=str(err))
|
81
|
+
# Filter expressions not supported on legacy version of the endpoint
|
82
|
+
filter_expression = None
|
83
|
+
return trainset_pb, filter_expression
|
nucliadb/train/generator.py
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncIterator, Optional
|
21
|
+
from typing import AsyncIterator, Callable, Optional
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
|
@@ -48,10 +48,15 @@ from nucliadb.train.generators.token_classifier import (
|
|
48
48
|
from nucliadb.train.settings import settings
|
49
49
|
from nucliadb.train.types import TrainBatch
|
50
50
|
from nucliadb.train.utils import get_shard_manager
|
51
|
+
from nucliadb_models.filters import FilterExpression
|
51
52
|
from nucliadb_protos.dataset_pb2 import TaskType, TrainSet
|
52
53
|
|
54
|
+
BatchGenerator = Callable[[str, TrainSet, str, Optional[FilterExpression]], AsyncIterator[TrainBatch]]
|
53
55
|
|
54
|
-
|
56
|
+
|
57
|
+
async def generate_train_data(
|
58
|
+
kbid: str, shard: str, trainset: TrainSet, filter_expression: Optional[FilterExpression] = None
|
59
|
+
):
|
55
60
|
# Get the data structure to generate data
|
56
61
|
shard_manager = get_shard_manager()
|
57
62
|
shard_replica_id = await shard_manager.get_shard_id(kbid, shard)
|
@@ -59,25 +64,25 @@ async def generate_train_data(kbid: str, shard: str, trainset: TrainSet):
|
|
59
64
|
if trainset.batch_size == 0:
|
60
65
|
trainset.batch_size = 50
|
61
66
|
|
62
|
-
batch_generator: Optional[
|
67
|
+
batch_generator: Optional[BatchGenerator] = None
|
63
68
|
|
64
69
|
if trainset.type == TaskType.FIELD_CLASSIFICATION:
|
65
|
-
batch_generator = field_classification_batch_generator
|
70
|
+
batch_generator = field_classification_batch_generator
|
66
71
|
elif trainset.type == TaskType.IMAGE_CLASSIFICATION:
|
67
|
-
batch_generator = image_classification_batch_generator
|
72
|
+
batch_generator = image_classification_batch_generator
|
68
73
|
elif trainset.type == TaskType.PARAGRAPH_CLASSIFICATION:
|
69
|
-
batch_generator = paragraph_classification_batch_generator
|
74
|
+
batch_generator = paragraph_classification_batch_generator
|
70
75
|
elif trainset.type == TaskType.TOKEN_CLASSIFICATION:
|
71
|
-
batch_generator = token_classification_batch_generator
|
76
|
+
batch_generator = token_classification_batch_generator
|
72
77
|
elif trainset.type == TaskType.SENTENCE_CLASSIFICATION:
|
73
|
-
batch_generator = sentence_classification_batch_generator
|
78
|
+
batch_generator = sentence_classification_batch_generator
|
74
79
|
elif trainset.type == TaskType.PARAGRAPH_STREAMING:
|
75
|
-
batch_generator = paragraph_streaming_batch_generator
|
80
|
+
batch_generator = paragraph_streaming_batch_generator
|
76
81
|
|
77
82
|
elif trainset.type == TaskType.QUESTION_ANSWER_STREAMING:
|
78
|
-
batch_generator = question_answer_batch_generator
|
83
|
+
batch_generator = question_answer_batch_generator
|
79
84
|
elif trainset.type == TaskType.FIELD_STREAMING:
|
80
|
-
batch_generator = field_streaming_batch_generator
|
85
|
+
batch_generator = field_streaming_batch_generator
|
81
86
|
|
82
87
|
if batch_generator is None:
|
83
88
|
raise HTTPException(
|
@@ -88,7 +93,7 @@ async def generate_train_data(kbid: str, shard: str, trainset: TrainSet):
|
|
88
93
|
# This cache size is an arbitrary number, once we have a metric in place and
|
89
94
|
# we analyze memory consumption, we can adjust it with more knoweldge
|
90
95
|
with resource_cache(size=settings.resource_cache_size):
|
91
|
-
async for item in batch_generator:
|
96
|
+
async for item in batch_generator(kbid, trainset, shard_replica_id, filter_expression):
|
92
97
|
payload = item.SerializeToString()
|
93
98
|
yield len(payload).to_bytes(4, byteorder="big", signed=False)
|
94
99
|
yield payload
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
24
24
|
|
@@ -26,6 +26,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
|
26
26
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
27
27
|
from nucliadb.train import logger
|
28
28
|
from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
|
29
|
+
from nucliadb_models.filters import FilterExpression
|
29
30
|
from nucliadb_protos.dataset_pb2 import (
|
30
31
|
FieldClassificationBatch,
|
31
32
|
Label,
|
@@ -38,6 +39,7 @@ def field_classification_batch_generator(
|
|
38
39
|
kbid: str,
|
39
40
|
trainset: TrainSet,
|
40
41
|
shard_replica_id: str,
|
42
|
+
filter_expression: Optional[FilterExpression],
|
41
43
|
) -> AsyncGenerator[FieldClassificationBatch, None]:
|
42
44
|
generator = generate_field_classification_payloads(kbid, trainset, shard_replica_id)
|
43
45
|
batch_generator = batchify(generator, trainset.batch_size, FieldClassificationBatch)
|
@@ -23,11 +23,15 @@ from typing import AsyncGenerator, AsyncIterable, Optional
|
|
23
23
|
|
24
24
|
from nidx_protos.nodereader_pb2 import DocumentItem, StreamRequest
|
25
25
|
|
26
|
+
from nucliadb.common.filter_expression import parse_expression
|
26
27
|
from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
27
28
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
28
29
|
from nucliadb.train import logger
|
29
30
|
from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
|
30
31
|
from nucliadb.train.settings import settings
|
32
|
+
from nucliadb_models.filters import (
|
33
|
+
FilterExpression,
|
34
|
+
)
|
31
35
|
from nucliadb_protos.dataset_pb2 import (
|
32
36
|
FieldSplitData,
|
33
37
|
FieldStreamingBatch,
|
@@ -41,32 +45,23 @@ def field_streaming_batch_generator(
|
|
41
45
|
kbid: str,
|
42
46
|
trainset: TrainSet,
|
43
47
|
shard_replica_id: str,
|
48
|
+
filter_expression: Optional[FilterExpression],
|
44
49
|
) -> AsyncGenerator[FieldStreamingBatch, None]:
|
45
|
-
generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id)
|
50
|
+
generator = generate_field_streaming_payloads(kbid, trainset, shard_replica_id, filter_expression)
|
46
51
|
batch_generator = batchify(generator, trainset.batch_size, FieldStreamingBatch)
|
47
52
|
return batch_generator
|
48
53
|
|
49
54
|
|
50
55
|
async def generate_field_streaming_payloads(
|
51
|
-
kbid: str,
|
52
|
-
trainset: TrainSet,
|
53
|
-
shard_replica_id: str,
|
56
|
+
kbid: str, trainset: TrainSet, shard_replica_id: str, filter_expression: Optional[FilterExpression]
|
54
57
|
) -> AsyncGenerator[FieldSplitData, None]:
|
55
58
|
request = StreamRequest()
|
56
59
|
request.shard_id.id = shard_replica_id
|
57
60
|
|
58
|
-
|
59
|
-
request
|
60
|
-
|
61
|
-
request
|
62
|
-
for metadata in trainset.filter.metadata:
|
63
|
-
request.filter.labels.append(f"/m/{metadata}")
|
64
|
-
for entity in trainset.filter.entities:
|
65
|
-
request.filter.labels.append(f"/e/{entity}")
|
66
|
-
for field in trainset.filter.fields:
|
67
|
-
request.filter.labels.append(f"/f/{field}")
|
68
|
-
for status in trainset.filter.status:
|
69
|
-
request.filter.labels.append(f"/n/s/{status}")
|
61
|
+
if filter_expression:
|
62
|
+
await parse_filter_expression(kbid, request, filter_expression)
|
63
|
+
else:
|
64
|
+
parse_legacy_filters(request, trainset)
|
70
65
|
|
71
66
|
resources = set()
|
72
67
|
fields = set()
|
@@ -107,6 +102,30 @@ async def generate_field_streaming_payloads(
|
|
107
102
|
)
|
108
103
|
|
109
104
|
|
105
|
+
async def parse_filter_expression(
|
106
|
+
kbid: str, request: StreamRequest, filter_expression: FilterExpression
|
107
|
+
):
|
108
|
+
if filter_expression.field:
|
109
|
+
expr = await parse_expression(filter_expression.field, kbid)
|
110
|
+
if expr:
|
111
|
+
request.filter_expression.CopyFrom(expr)
|
112
|
+
|
113
|
+
|
114
|
+
def parse_legacy_filters(request: StreamRequest, trainset: TrainSet):
|
115
|
+
for label in trainset.filter.labels:
|
116
|
+
request.filter.labels.append(f"/l/{label}")
|
117
|
+
for path in trainset.filter.paths:
|
118
|
+
request.filter.labels.append(f"/p/{path}")
|
119
|
+
for metadata in trainset.filter.metadata:
|
120
|
+
request.filter.labels.append(f"/m/{metadata}")
|
121
|
+
for entity in trainset.filter.entities:
|
122
|
+
request.filter.labels.append(f"/e/{entity}")
|
123
|
+
for field in trainset.filter.fields:
|
124
|
+
request.filter.labels.append(f"/f/{field}")
|
125
|
+
for status in trainset.filter.status:
|
126
|
+
request.filter.labels.append(f"/n/s/{status}")
|
127
|
+
|
128
|
+
|
110
129
|
async def iter_field_split_data(
|
111
130
|
request: StreamRequest, kbid: str, trainset: TrainSet, max_parallel: int = 5
|
112
131
|
) -> AsyncIterable[FieldSplitData]:
|
@@ -18,9 +18,10 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from nucliadb.train.generators.utils import batchify
|
24
|
+
from nucliadb_models.filters import FilterExpression
|
24
25
|
from nucliadb_protos.dataset_pb2 import (
|
25
26
|
ImageClassification,
|
26
27
|
ImageClassificationBatch,
|
@@ -32,6 +33,7 @@ def image_classification_batch_generator(
|
|
32
33
|
kbid: str,
|
33
34
|
trainset: TrainSet,
|
34
35
|
shard_replica_id: str,
|
36
|
+
filter_expression: Optional[FilterExpression],
|
35
37
|
) -> AsyncGenerator[ImageClassificationBatch, None]:
|
36
38
|
generator = generate_image_classification_payloads(kbid, trainset, shard_replica_id)
|
37
39
|
batch_generator = batchify(generator, trainset.batch_size, ImageClassificationBatch)
|
@@ -18,13 +18,14 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
25
25
|
|
26
26
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
27
27
|
from nucliadb.train.generators.utils import batchify, get_paragraph
|
28
|
+
from nucliadb_models.filters import FilterExpression
|
28
29
|
from nucliadb_protos.dataset_pb2 import (
|
29
30
|
Label,
|
30
31
|
ParagraphClassificationBatch,
|
@@ -37,6 +38,7 @@ def paragraph_classification_batch_generator(
|
|
37
38
|
kbid: str,
|
38
39
|
trainset: TrainSet,
|
39
40
|
shard_replica_id: str,
|
41
|
+
filter_expression: Optional[FilterExpression],
|
40
42
|
) -> AsyncGenerator[ParagraphClassificationBatch, None]:
|
41
43
|
if len(trainset.filter.labels) != 1:
|
42
44
|
raise HTTPException(
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
24
24
|
|
@@ -26,6 +26,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
|
26
26
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
27
27
|
from nucliadb.train import logger
|
28
28
|
from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
|
29
|
+
from nucliadb_models.filters import FilterExpression
|
29
30
|
from nucliadb_protos.dataset_pb2 import (
|
30
31
|
ParagraphStreamingBatch,
|
31
32
|
ParagraphStreamItem,
|
@@ -37,6 +38,7 @@ def paragraph_streaming_batch_generator(
|
|
37
38
|
kbid: str,
|
38
39
|
trainset: TrainSet,
|
39
40
|
shard_replica_id: str,
|
41
|
+
filter_expression: Optional[FilterExpression],
|
40
42
|
) -> AsyncGenerator[ParagraphStreamingBatch, None]:
|
41
43
|
generator = generate_paragraph_streaming_payloads(kbid, trainset, shard_replica_id)
|
42
44
|
batch_generator = batchify(generator, trainset.batch_size, ParagraphStreamingBatch)
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
24
24
|
|
@@ -30,6 +30,7 @@ from nucliadb.train.generators.utils import (
|
|
30
30
|
get_paragraph,
|
31
31
|
get_resource_from_cache_or_db,
|
32
32
|
)
|
33
|
+
from nucliadb_models.filters import FilterExpression
|
33
34
|
from nucliadb_protos.dataset_pb2 import (
|
34
35
|
QuestionAnswerStreamingBatch,
|
35
36
|
QuestionAnswerStreamItem,
|
@@ -46,6 +47,7 @@ def question_answer_batch_generator(
|
|
46
47
|
kbid: str,
|
47
48
|
trainset: TrainSet,
|
48
49
|
shard_replica_id: str,
|
50
|
+
filter_expression: Optional[FilterExpression],
|
49
51
|
) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]:
|
50
52
|
generator = generate_question_answer_streaming_payloads(kbid, trainset, shard_replica_id)
|
51
53
|
batch_generator = batchify(generator, trainset.batch_size, QuestionAnswerStreamingBatch)
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from typing import AsyncGenerator
|
21
|
+
from typing import AsyncGenerator, Optional
|
22
22
|
|
23
23
|
from fastapi import HTTPException
|
24
24
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
@@ -27,6 +27,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
|
27
27
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
28
28
|
from nucliadb.train import logger
|
29
29
|
from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
|
30
|
+
from nucliadb_models.filters import FilterExpression
|
30
31
|
from nucliadb_protos.dataset_pb2 import (
|
31
32
|
Label,
|
32
33
|
MultipleTextSameLabels,
|
@@ -39,6 +40,7 @@ def sentence_classification_batch_generator(
|
|
39
40
|
kbid: str,
|
40
41
|
trainset: TrainSet,
|
41
42
|
shard_replica_id: str,
|
43
|
+
filter_expression: Optional[FilterExpression],
|
42
44
|
) -> AsyncGenerator[SentenceClassificationBatch, None]:
|
43
45
|
if len(trainset.filter.labels) == 0:
|
44
46
|
raise HTTPException(
|
@@ -19,7 +19,7 @@
|
|
19
19
|
#
|
20
20
|
|
21
21
|
from collections import OrderedDict
|
22
|
-
from typing import AsyncGenerator, cast
|
22
|
+
from typing import AsyncGenerator, Optional, cast
|
23
23
|
|
24
24
|
from nidx_protos.nodereader_pb2 import StreamFilter, StreamRequest
|
25
25
|
|
@@ -27,6 +27,7 @@ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
|
27
27
|
from nucliadb.common.nidx import get_nidx_searcher_client
|
28
28
|
from nucliadb.train import logger
|
29
29
|
from nucliadb.train.generators.utils import batchify, get_resource_from_cache_or_db
|
30
|
+
from nucliadb_models.filters import FilterExpression
|
30
31
|
from nucliadb_protos.dataset_pb2 import (
|
31
32
|
TokenClassificationBatch,
|
32
33
|
TokensClassification,
|
@@ -42,6 +43,7 @@ def token_classification_batch_generator(
|
|
42
43
|
kbid: str,
|
43
44
|
trainset: TrainSet,
|
44
45
|
shard_replica_id: str,
|
46
|
+
filter_expression: Optional[FilterExpression],
|
45
47
|
) -> AsyncGenerator[TokenClassificationBatch, None]:
|
46
48
|
generator = generate_token_classification_payloads(kbid, trainset, shard_replica_id)
|
47
49
|
batch_generator = batchify(generator, trainset.batch_size, TokenClassificationBatch)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.5.0.post4404
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
20
20
|
Requires-Python: <4,>=3.9
|
21
21
|
Description-Content-Type: text/markdown
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.
|
24
|
-
Requires-Dist: nucliadb-protos>=6.
|
25
|
-
Requires-Dist: nucliadb-models>=6.
|
26
|
-
Requires-Dist: nidx-protos>=6.
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.5.0.post4404
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.0.post4404
|
24
|
+
Requires-Dist: nucliadb-protos>=6.5.0.post4404
|
25
|
+
Requires-Dist: nucliadb-models>=6.5.0.post4404
|
26
|
+
Requires-Dist: nidx-protos>=6.5.0.post4404
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
28
28
|
Requires-Dist: nuclia-models>=0.24.2
|
29
29
|
Requires-Dist: uvicorn[standard]
|
@@ -57,6 +57,8 @@ nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,8
|
|
57
57
|
nucliadb/common/cache.py,sha256=NM69CVvNjlh58jiVUF1JeYPmBO7_L4rB3tffxK0k_vI,6549
|
58
58
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
59
59
|
nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
|
60
|
+
nucliadb/common/exceptions.py,sha256=_PJk_NfAhZBFBvmgAfvsJKZ9KuRt5Y1cNsH3-cXE07w,1120
|
61
|
+
nucliadb/common/filter_expression.py,sha256=aRbGfg2pci7sUo1KgSqcm72Eu-3Ordy8knnW6DYId5o,6568
|
60
62
|
nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
|
61
63
|
nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
|
62
64
|
nucliadb/common/nidx.py,sha256=3EeQGjM_gxK0l_Rb54fspFWVNnzUiKF-_GMxTiiDC8Q,9116
|
@@ -213,30 +215,30 @@ nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,
|
|
213
215
|
nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
214
216
|
nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
|
215
217
|
nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
|
216
|
-
nucliadb/search/api/v1/catalog.py,sha256=
|
218
|
+
nucliadb/search/api/v1/catalog.py,sha256=mVAPPf6CXimVOsBpbhPo63KXf8eXps--cifZOEQAIyk,7714
|
217
219
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
218
|
-
nucliadb/search/api/v1/find.py,sha256=
|
220
|
+
nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
|
219
221
|
nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
|
220
222
|
nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
|
221
223
|
nucliadb/search/api/v1/predict_proxy.py,sha256=Q03ZTvWp7Sq0x71t5Br4LHxTiYsRd6-GCb4YuKqhynM,3131
|
222
224
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
223
|
-
nucliadb/search/api/v1/search.py,sha256=
|
224
|
-
nucliadb/search/api/v1/suggest.py,sha256=
|
225
|
+
nucliadb/search/api/v1/search.py,sha256=eqlrvRE7IlMpunNwD1RJwt6RgMV01sIDJLgxxE7CFcE,12297
|
226
|
+
nucliadb/search/api/v1/suggest.py,sha256=gaJE60r8-z6TVO05mQRKBITwXn2_ofM3B4-OtpOgZEk,6343
|
225
227
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
226
228
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
227
229
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
228
230
|
nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
|
229
231
|
nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=AZ5_cH1jbf7d5wh_gz6EHLEKAzEOMrQZwEZAu1Q_3FE,4846
|
230
|
-
nucliadb/search/api/v1/resource/search.py,sha256=
|
232
|
+
nucliadb/search/api/v1/resource/search.py,sha256=PZR7fs5oYD0RKqKoD38NZMAnOJzBv35NB2YOr2xy1ck,4923
|
231
233
|
nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
|
232
234
|
nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
|
233
235
|
nucliadb/search/requesters/utils.py,sha256=Ne5fweSWk9hettQKyUZAMZrw_MTjPE5W_EVqj4p5XiI,6109
|
234
236
|
nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
235
237
|
nucliadb/search/search/cache.py,sha256=-6l3i2Qi8ig2SM_FCgOLIaQ48XVj7L5ctd5PdQRY5mY,4458
|
236
238
|
nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
|
237
|
-
nucliadb/search/search/exceptions.py,sha256=
|
239
|
+
nucliadb/search/search/exceptions.py,sha256=q6IKlajYRGLx_AVc2DI6gIZLpOY7ydf4EevMr5_2Krw,940
|
238
240
|
nucliadb/search/search/fetch.py,sha256=eiljOKim-4OOEZn-3fyVZSYxztCH156BXYdqlIwVdN4,6181
|
239
|
-
nucliadb/search/search/filters.py,sha256=
|
241
|
+
nucliadb/search/search/filters.py,sha256=vZnbf3BjYuDkEQcBeLX_GDkq3Ahbbb7pLJ6DJU9z-QE,6490
|
240
242
|
nucliadb/search/search/find.py,sha256=ZocoQNN28OHOmMaroGVFCnce3YHPZbFb1-9jxLNHSFM,7805
|
241
243
|
nucliadb/search/search/find_merge.py,sha256=c-7IlfjfdmWAvQOyM7IO3bKS1EQpnR4oi6pN6mwrQKw,19815
|
242
244
|
nucliadb/search/search/graph_merge.py,sha256=y5V7X-BhjHsKDXE69tzQLIIKGm4XuaFrZXw0odcHVNM,3402
|
@@ -248,32 +250,31 @@ nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCi
|
|
248
250
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
249
251
|
nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
|
250
252
|
nucliadb/search/search/predict_proxy.py,sha256=JwgBeEg1j4LnCjPCvTUrnmOd9LceJAt3iAu4m9cmJBo,3390
|
251
|
-
nucliadb/search/search/query.py,sha256
|
253
|
+
nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
|
252
254
|
nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
|
253
255
|
nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
|
254
256
|
nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
|
255
257
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
256
258
|
nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
|
257
259
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
258
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
260
|
+
nucliadb/search/search/chat/ask.py,sha256=GFxUh6KvqbidXmtvzgA7trVwF9xNPLcPDCD4IlqvTmI,37839
|
259
261
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
260
262
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
261
263
|
nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
|
262
264
|
nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
|
263
265
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
264
|
-
nucliadb/search/search/query_parser/exceptions.py,sha256=
|
265
|
-
nucliadb/search/search/query_parser/fetcher.py,sha256=
|
266
|
-
nucliadb/search/search/query_parser/filter_expression.py,sha256=fZI8qFRF3h2sa01gwPmDlA5c16mq7ShMOBk-rDaA_fE,6575
|
266
|
+
nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
|
267
|
+
nucliadb/search/search/query_parser/fetcher.py,sha256=nP4EySj2BvH10QgCvgzvp13Nf22wwfHsdLbDoPlH2cQ,16831
|
267
268
|
nucliadb/search/search/query_parser/models.py,sha256=k9cCjTpndP9ynr8A9J8MBmDYmjLBKL1UM4L0GXVuJw0,5031
|
268
|
-
nucliadb/search/search/query_parser/old_filters.py,sha256=
|
269
|
+
nucliadb/search/search/query_parser/old_filters.py,sha256=HircRqYEac_90bNCtFIJZ2RKA90kjbpNOQcp_ArBqR0,9083
|
269
270
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
270
271
|
nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
|
271
|
-
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=
|
272
|
-
nucliadb/search/search/query_parser/parsers/common.py,sha256=
|
273
|
-
nucliadb/search/search/query_parser/parsers/find.py,sha256=
|
274
|
-
nucliadb/search/search/query_parser/parsers/graph.py,sha256=
|
275
|
-
nucliadb/search/search/query_parser/parsers/search.py,sha256=
|
276
|
-
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=
|
272
|
+
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=e89kh3nGV9JT9wjdWB8JbC2HPydn0rVk7WsKBo6q3gw,7122
|
273
|
+
nucliadb/search/search/query_parser/parsers/common.py,sha256=mJMPOKurBK7-A7s3oNlPLxHP_yIn4j5Uw8rh_OQtzS4,6339
|
274
|
+
nucliadb/search/search/query_parser/parsers/find.py,sha256=lHVspg-i_eWXvu7BT9WfuFVGVKYhr380y4tDX5yfTD4,12735
|
275
|
+
nucliadb/search/search/query_parser/parsers/graph.py,sha256=zyqdUg5Afmhb2_-hvj9FUCaoLh026MUP1fgY2j-lD7c,9385
|
276
|
+
nucliadb/search/search/query_parser/parsers/search.py,sha256=huhz3lk6y4n7fcHU2XB-90Q34sXIwP5-cwp2rqFtPh8,10477
|
277
|
+
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
|
277
278
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
278
279
|
nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
|
279
280
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
@@ -301,7 +302,7 @@ nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
|
|
301
302
|
nucliadb/tests/vectors.py,sha256=CcNKx-E8LPpyvRyljbmb-Tn_wST9Juw2CBoogWrKiTk,62843
|
302
303
|
nucliadb/train/__init__.py,sha256=NVwe5yULoHXb80itIJT8YJYEz2xbiOPQ7_OMys6XJw8,1301
|
303
304
|
nucliadb/train/app.py,sha256=z6xlGVVVaJmZZmLPIVTgkjD-wIz5b0NYlXAQp7hBHYw,2652
|
304
|
-
nucliadb/train/generator.py,sha256=
|
305
|
+
nucliadb/train/generator.py,sha256=fwFYal7VsV0EP7J_g3IOJ-WLpjwqrVo0gEP7vxIlxGs,4152
|
305
306
|
nucliadb/train/lifecycle.py,sha256=3HadM4GRsYb2m-v4jtdr9C-KBEBx8GlrJDArPYi3SWQ,1960
|
306
307
|
nucliadb/train/models.py,sha256=BmgmMjDsu_1Ih5JDAqo6whhume90q0ASJcDP9dkMQm8,1198
|
307
308
|
nucliadb/train/nodes.py,sha256=6QD9ZnAacPyFCzs1jxJzsL3CkVNyADvsfnPjjrvn-NU,5610
|
@@ -315,20 +316,20 @@ nucliadb/train/upload.py,sha256=fTjH1KEL-0ogf3LV0T6ODO0QdPGwdZShSUtFUCAcUlA,3256
|
|
315
316
|
nucliadb/train/uploader.py,sha256=xdLGz1ToDue9Q_M8A-_KYkO-V6fWKYOZQ6IGM4FuwWA,6424
|
316
317
|
nucliadb/train/utils.py,sha256=OxQ8No19nxOxUhwNYDICNt8n2H-gowkAgu2Vt3Hatzk,3163
|
317
318
|
nucliadb/train/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
318
|
-
nucliadb/train/api/utils.py,sha256=
|
319
|
+
nucliadb/train/api/utils.py,sha256=1E5h1dS3dLppXD4k6qH6jmaY0WYa_ChaNHiTW9y336Q,1307
|
319
320
|
nucliadb/train/api/v1/__init__.py,sha256=P4vCIv93r_Cq1WFDDNjy_Wg7zBkzx0S4euXwfPy1LA4,928
|
320
321
|
nucliadb/train/api/v1/router.py,sha256=ukdxn5q1oMar6NSPobgJczWsSxLCHw6DYKlb3zwCiSo,910
|
321
|
-
nucliadb/train/api/v1/shards.py,sha256=
|
322
|
+
nucliadb/train/api/v1/shards.py,sha256=olgjQFKeYSSCbpYhglCRq6Q9WsM9hxOZgk6mOBb4g0o,3367
|
322
323
|
nucliadb/train/api/v1/trainset.py,sha256=kpnpDgiMWr1FKHZJgwH7hue5kzilA8-i9X0YHlNeHuU,2113
|
323
324
|
nucliadb/train/generators/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
324
|
-
nucliadb/train/generators/field_classifier.py,sha256=
|
325
|
-
nucliadb/train/generators/field_streaming.py,sha256=
|
326
|
-
nucliadb/train/generators/image_classifier.py,sha256=
|
327
|
-
nucliadb/train/generators/paragraph_classifier.py,sha256=
|
328
|
-
nucliadb/train/generators/paragraph_streaming.py,sha256=
|
329
|
-
nucliadb/train/generators/question_answer_streaming.py,sha256=
|
330
|
-
nucliadb/train/generators/sentence_classifier.py,sha256=
|
331
|
-
nucliadb/train/generators/token_classifier.py,sha256=
|
325
|
+
nucliadb/train/generators/field_classifier.py,sha256=UcA5snqLNjIHw0VBzXo9ZtSua6o7wBU3tV9_d5qWpRA,3542
|
326
|
+
nucliadb/train/generators/field_streaming.py,sha256=p0xu39D5gaSQc-LagKwpgsVaxm2ULTkWZDPi-Ad1lHc,8378
|
327
|
+
nucliadb/train/generators/image_classifier.py,sha256=46YShcl7nf1_iLXZklWTAFvUNIIPulBwHlc9Y1ZJHZU,1850
|
328
|
+
nucliadb/train/generators/paragraph_classifier.py,sha256=Jk3B8a2zkAf-2-59RECiNmNP1Nz7f0-hfnS4rIE69xA,2817
|
329
|
+
nucliadb/train/generators/paragraph_streaming.py,sha256=axGNYjOTgxGsOcAAvCz_rTYzZCdZf0f1s-Hzn-VvIX0,3646
|
330
|
+
nucliadb/train/generators/question_answer_streaming.py,sha256=YXjWQc_SJ_TyXXJRg8tGkC9IeIzbRDcn0spQw9AlRNY,5740
|
331
|
+
nucliadb/train/generators/sentence_classifier.py,sha256=4JBGtInnWUQrM9wBd-P7Z2lTK1Dka6U3PPDqnLbGnFM,5177
|
332
|
+
nucliadb/train/generators/token_classifier.py,sha256=T8JOVR1vv5g7rn7HtcQcIZ3O5TFqh15uv5rOselLBVo,9617
|
332
333
|
nucliadb/train/generators/utils.py,sha256=ZNwvEVPZr-eP0MW3ABN7a11hPQKaa0NdVaRcgBcTp5w,3601
|
333
334
|
nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
|
334
335
|
nucliadb/writer/app.py,sha256=ABBO8-u4pDAa61b3mCdD0TFhuHAYcxMkgpZSGgWARuE,2736
|
@@ -368,8 +369,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
368
369
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
369
370
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
370
371
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
371
|
-
nucliadb-6.
|
372
|
-
nucliadb-6.
|
373
|
-
nucliadb-6.
|
374
|
-
nucliadb-6.
|
375
|
-
nucliadb-6.
|
372
|
+
nucliadb-6.5.0.post4404.dist-info/METADATA,sha256=5sqpnNr9UzsK7TlG4IxnM1bXMwocq2mCwi-dDOBvb4M,4152
|
373
|
+
nucliadb-6.5.0.post4404.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
374
|
+
nucliadb-6.5.0.post4404.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
375
|
+
nucliadb-6.5.0.post4404.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
376
|
+
nucliadb-6.5.0.post4404.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|