nucliadb 6.3.7.post4114__py3-none-any.whl → 6.3.7.post4119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. migrations/0017_multiple_writable_shards.py +16 -13
  2. migrations/0025_assign_models_to_kbs_v2.py +3 -6
  3. nucliadb/common/cluster/base.py +6 -4
  4. nucliadb/common/cluster/grpc_node_dummy.py +5 -4
  5. nucliadb/common/cluster/manager.py +7 -9
  6. nucliadb/common/cluster/rebalance.py +2 -1
  7. nucliadb/common/cluster/rollover.py +2 -2
  8. nucliadb/common/cluster/utils.py +2 -1
  9. nucliadb/common/datamanagers/rollover.py +2 -3
  10. nucliadb/common/external_index_providers/base.py +2 -2
  11. nucliadb/common/external_index_providers/pinecone.py +2 -2
  12. nucliadb/common/nidx.py +3 -3
  13. nucliadb/common/vector_index_config.py +39 -0
  14. nucliadb/ingest/consumer/auditing.py +3 -1
  15. nucliadb/ingest/consumer/shard_creator.py +3 -1
  16. nucliadb/ingest/orm/brain.py +10 -9
  17. nucliadb/ingest/orm/brain_v2.py +10 -9
  18. nucliadb/ingest/orm/entities.py +8 -7
  19. nucliadb/ingest/orm/index_message.py +2 -1
  20. nucliadb/ingest/orm/knowledgebox.py +4 -3
  21. nucliadb/ingest/orm/processor/pgcatalog.py +2 -1
  22. nucliadb/ingest/orm/processor/processor.py +2 -3
  23. nucliadb/metrics_exporter.py +2 -1
  24. nucliadb/search/api/v1/knowledgebox.py +1 -1
  25. nucliadb/search/requesters/utils.py +8 -8
  26. nucliadb/search/search/chat/query.py +5 -4
  27. nucliadb/search/search/fetch.py +2 -1
  28. nucliadb/search/search/find_merge.py +9 -8
  29. nucliadb/search/search/graph_merge.py +2 -1
  30. nucliadb/search/search/graph_strategy.py +1 -1
  31. nucliadb/search/search/merge.py +12 -11
  32. nucliadb/search/search/query.py +4 -2
  33. nucliadb/search/search/query_parser/filter_expression.py +2 -1
  34. nucliadb/search/search/query_parser/models.py +2 -1
  35. nucliadb/search/search/query_parser/old_filters.py +2 -1
  36. nucliadb/search/search/query_parser/parsers/find.py +2 -1
  37. nucliadb/search/search/query_parser/parsers/graph.py +3 -1
  38. nucliadb/search/search/query_parser/parsers/search.py +3 -1
  39. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -2
  40. nucliadb/search/search/shards.py +4 -4
  41. nucliadb/train/generators/field_classifier.py +2 -1
  42. nucliadb/train/generators/field_streaming.py +2 -1
  43. nucliadb/train/generators/paragraph_classifier.py +1 -1
  44. nucliadb/train/generators/paragraph_streaming.py +2 -1
  45. nucliadb/train/generators/question_answer_streaming.py +2 -1
  46. nucliadb/train/generators/sentence_classifier.py +1 -1
  47. nucliadb/train/generators/token_classifier.py +2 -1
  48. nucliadb/writer/back_pressure.py +0 -24
  49. {nucliadb-6.3.7.post4114.dist-info → nucliadb-6.3.7.post4119.dist-info}/METADATA +6 -6
  50. {nucliadb-6.3.7.post4114.dist-info → nucliadb-6.3.7.post4119.dist-info}/RECORD +53 -52
  51. {nucliadb-6.3.7.post4114.dist-info → nucliadb-6.3.7.post4119.dist-info}/WHEEL +0 -0
  52. {nucliadb-6.3.7.post4114.dist-info → nucliadb-6.3.7.post4119.dist-info}/entry_points.txt +0 -0
  53. {nucliadb-6.3.7.post4114.dist-info → nucliadb-6.3.7.post4119.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,15 @@
20
20
  import asyncio
21
21
  from typing import Iterable, Optional, Union
22
22
 
23
+ from nidx_protos.nodereader_pb2 import (
24
+ DocumentScored,
25
+ GraphSearchResponse,
26
+ ParagraphResult,
27
+ ParagraphSearchResponse,
28
+ SearchResponse,
29
+ VectorSearchResponse,
30
+ )
31
+
23
32
  from nucliadb.common.external_index_providers.base import TextBlockMatch
24
33
  from nucliadb.common.ids import ParagraphId, VectorId
25
34
  from nucliadb.search import SERVICE_NAME, logger
@@ -50,14 +59,6 @@ from nucliadb_models.search import (
50
59
  ResourceProperties,
51
60
  TextPosition,
52
61
  )
53
- from nucliadb_protos.nodereader_pb2 import (
54
- DocumentScored,
55
- GraphSearchResponse,
56
- ParagraphResult,
57
- ParagraphSearchResponse,
58
- SearchResponse,
59
- VectorSearchResponse,
60
- )
61
62
  from nucliadb_telemetry import metrics
62
63
 
63
64
  from .metrics import merge_observer
@@ -19,6 +19,8 @@
19
19
  #
20
20
 
21
21
 
22
+ from nidx_protos import nodereader_pb2
23
+
22
24
  from nucliadb.common.models_utils.from_proto import RelationNodeTypePbMap, RelationTypePbMap
23
25
  from nucliadb_models.graph import responses as graph_responses
24
26
  from nucliadb_models.graph.responses import (
@@ -26,7 +28,6 @@ from nucliadb_models.graph.responses import (
26
28
  GraphRelationsSearchResponse,
27
29
  GraphSearchResponse,
28
30
  )
29
- from nucliadb_protos import nodereader_pb2
30
31
 
31
32
 
32
33
  def build_graph_response(results: list[nodereader_pb2.GraphSearchResponse]) -> GraphSearchResponse:
@@ -21,6 +21,7 @@ import json
21
21
  from collections import defaultdict
22
22
  from typing import Any, Collection, Iterable, Optional, Union
23
23
 
24
+ from nidx_protos import nodereader_pb2
24
25
  from nuclia_models.predict.generative_responses import (
25
26
  JSONGenerativeResponse,
26
27
  MetaGenerativeResponse,
@@ -73,7 +74,6 @@ from nucliadb_models.search import (
73
74
  TextPosition,
74
75
  UserPrompt,
75
76
  )
76
- from nucliadb_protos import nodereader_pb2
77
77
  from nucliadb_protos.utils_pb2 import RelationNode
78
78
 
79
79
  SCHEMA = {
@@ -22,6 +22,18 @@ import datetime
22
22
  import math
23
23
  from typing import Any, Iterable, Optional, Set, Union
24
24
 
25
+ from nidx_protos.nodereader_pb2 import (
26
+ DocumentResult,
27
+ DocumentScored,
28
+ DocumentSearchResponse,
29
+ GraphSearchResponse,
30
+ ParagraphResult,
31
+ ParagraphSearchResponse,
32
+ SearchResponse,
33
+ SuggestResponse,
34
+ VectorSearchResponse,
35
+ )
36
+
25
37
  from nucliadb.common.ids import FieldId, ParagraphId
26
38
  from nucliadb.common.models_utils import from_proto
27
39
  from nucliadb.common.models_utils.from_proto import RelationTypePbMap
@@ -61,17 +73,6 @@ from nucliadb_models.search import (
61
73
  SortOrder,
62
74
  TextPosition,
63
75
  )
64
- from nucliadb_protos.nodereader_pb2 import (
65
- DocumentResult,
66
- DocumentScored,
67
- DocumentSearchResponse,
68
- GraphSearchResponse,
69
- ParagraphResult,
70
- ParagraphSearchResponse,
71
- SearchResponse,
72
- SuggestResponse,
73
- VectorSearchResponse,
74
- )
75
76
  from nucliadb_protos.utils_pb2 import RelationNode
76
77
 
77
78
  from .metrics import merge_observer
@@ -20,6 +20,9 @@
20
20
  from datetime import datetime
21
21
  from typing import Any, Optional
22
22
 
23
+ from nidx_protos import nodereader_pb2
24
+ from nidx_protos.noderesources_pb2 import Resource
25
+
23
26
  from nucliadb.common import datamanagers
24
27
  from nucliadb.search.search.filters import (
25
28
  translate_label,
@@ -33,8 +36,7 @@ from nucliadb_models.search import (
33
36
  SortOrder,
34
37
  SuggestOptions,
35
38
  )
36
- from nucliadb_protos import nodereader_pb2, utils_pb2
37
- from nucliadb_protos.noderesources_pb2 import Resource
39
+ from nucliadb_protos import utils_pb2
38
40
 
39
41
  from .exceptions import InvalidQueryError
40
42
  from .query_parser.filter_expression import add_and_expression, parse_expression
@@ -20,6 +20,8 @@
20
20
 
21
21
  from typing import Union
22
22
 
23
+ from nidx_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
24
+
23
25
  from nucliadb.common import datamanagers
24
26
  from nucliadb.common.ids import FIELD_TYPE_NAME_TO_STR
25
27
  from nucliadb.search.search.exceptions import InvalidQueryError
@@ -48,7 +50,6 @@ from nucliadb_models.filters import (
48
50
  ResourceMimetype,
49
51
  Status,
50
52
  )
51
- from nucliadb_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
52
53
 
53
54
  # Filters that end up as a facet
54
55
  FacetFilter = Union[
@@ -21,11 +21,12 @@ from dataclasses import dataclass
21
21
  from datetime import datetime
22
22
  from typing import Literal, Optional, Union
23
23
 
24
+ from nidx_protos import nodereader_pb2
24
25
  from pydantic import BaseModel, ConfigDict, Field
25
26
 
26
27
  from nucliadb.search.search.query_parser.fetcher import Fetcher
27
28
  from nucliadb_models import search as search_models
28
- from nucliadb_protos import nodereader_pb2, utils_pb2
29
+ from nucliadb_protos import utils_pb2
29
30
 
30
31
  ### Retrieval
31
32
 
@@ -22,12 +22,13 @@ from dataclasses import dataclass
22
22
  from datetime import datetime
23
23
  from typing import Optional, Union
24
24
 
25
+ from nidx_protos.nodereader_pb2 import FilterExpression
26
+
25
27
  from nucliadb.search.search.filters import translate_label
26
28
  from nucliadb_models.search import (
27
29
  Filter,
28
30
  )
29
31
  from nucliadb_protos import knowledgebox_pb2
30
- from nucliadb_protos.nodereader_pb2 import FilterExpression
31
32
 
32
33
  from .exceptions import InvalidQueryError
33
34
  from .fetcher import Fetcher
@@ -20,6 +20,7 @@
20
20
 
21
21
  from typing import Optional
22
22
 
23
+ from nidx_protos import nodereader_pb2
23
24
  from pydantic import ValidationError
24
25
 
25
26
  from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
@@ -47,7 +48,7 @@ from nucliadb_models.filters import FilterExpression
47
48
  from nucliadb_models.search import (
48
49
  FindRequest,
49
50
  )
50
- from nucliadb_protos import nodereader_pb2, utils_pb2
51
+ from nucliadb_protos import utils_pb2
51
52
 
52
53
  from .common import (
53
54
  parse_keyword_query,
@@ -20,13 +20,15 @@
20
20
 
21
21
  from typing import Optional, Union
22
22
 
23
+ from nidx_protos import nodereader_pb2
24
+
23
25
  from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap, RelationTypeMap
24
26
  from nucliadb.search.search.query_parser.filter_expression import add_and_expression, parse_expression
25
27
  from nucliadb.search.search.query_parser.models import GraphRetrieval
26
28
  from nucliadb.search.search.utils import filter_hidden_resources
27
29
  from nucliadb_models.graph import requests as graph_requests
28
30
  from nucliadb_models.labels import LABEL_HIDDEN
29
- from nucliadb_protos import nodereader_pb2, utils_pb2
31
+ from nucliadb_protos import utils_pb2
30
32
 
31
33
 
32
34
  async def parse_graph_search(kbid: str, item: graph_requests.GraphSearchRequest) -> GraphRetrieval:
@@ -19,6 +19,8 @@
19
19
  #
20
20
  from typing import Optional
21
21
 
22
+ from nidx_protos import nodereader_pb2
23
+
22
24
  from nucliadb.search.search.metrics import query_parser_observer
23
25
  from nucliadb.search.search.query import expand_entities
24
26
  from nucliadb.search.search.query_parser.exceptions import InvalidQueryError
@@ -42,7 +44,7 @@ from nucliadb_models.search import (
42
44
  SortOptions,
43
45
  SortOrder,
44
46
  )
45
- from nucliadb_protos import nodereader_pb2, utils_pb2
47
+ from nucliadb_protos import utils_pb2
46
48
 
47
49
  from .common import (
48
50
  parse_keyword_query,
@@ -19,6 +19,9 @@
19
19
  #
20
20
  from typing import Optional
21
21
 
22
+ from nidx_protos import nodereader_pb2
23
+ from nidx_protos.nodereader_pb2 import SearchRequest
24
+
22
25
  from nucliadb.search.search.filters import translate_label
23
26
  from nucliadb.search.search.metrics import node_features, query_parser_observer
24
27
  from nucliadb.search.search.query import apply_entities_filter, get_sort_field_proto
@@ -26,8 +29,7 @@ from nucliadb.search.search.query_parser.filter_expression import add_and_expres
26
29
  from nucliadb.search.search.query_parser.models import ParsedQuery, PredictReranker, UnitRetrieval
27
30
  from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
28
31
  from nucliadb_models.search import SortOrderMap
29
- from nucliadb_protos import nodereader_pb2, utils_pb2
30
- from nucliadb_protos.nodereader_pb2 import SearchRequest
32
+ from nucliadb_protos import utils_pb2
31
33
 
32
34
 
33
35
  @query_parser_observer.wrap({"type": "convert_retrieval_to_proto"})
@@ -22,9 +22,7 @@ import asyncio
22
22
  import backoff
23
23
  from grpc import StatusCode
24
24
  from grpc.aio import AioRpcError
25
-
26
- from nucliadb.common.cluster.base import AbstractIndexNode
27
- from nucliadb_protos.nodereader_pb2 import (
25
+ from nidx_protos.nodereader_pb2 import (
28
26
  GetShardRequest,
29
27
  GraphSearchRequest,
30
28
  GraphSearchResponse,
@@ -33,7 +31,9 @@ from nucliadb_protos.nodereader_pb2 import (
33
31
  SuggestRequest,
34
32
  SuggestResponse,
35
33
  )
36
- from nucliadb_protos.noderesources_pb2 import Shard
34
+ from nidx_protos.noderesources_pb2 import Shard
35
+
36
+ from nucliadb.common.cluster.base import AbstractIndexNode
37
37
  from nucliadb_telemetry import metrics
38
38
 
39
39
  node_observer = metrics.Observer(
@@ -20,6 +20,8 @@
20
20
 
21
21
  from typing import AsyncGenerator
22
22
 
23
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
+
23
25
  from nucliadb.common.cluster.base import AbstractIndexNode
24
26
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
27
  from nucliadb.train import logger
@@ -30,7 +32,6 @@ from nucliadb_protos.dataset_pb2 import (
30
32
  TextLabel,
31
33
  TrainSet,
32
34
  )
33
- from nucliadb_protos.nodereader_pb2 import StreamRequest
34
35
 
35
36
 
36
37
  def field_classification_batch_generator(
@@ -20,6 +20,8 @@
20
20
 
21
21
  from typing import AsyncGenerator, Optional
22
22
 
23
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
+
23
25
  from nucliadb.common.cluster.base import AbstractIndexNode
24
26
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
27
  from nucliadb.train import logger
@@ -29,7 +31,6 @@ from nucliadb_protos.dataset_pb2 import (
29
31
  FieldStreamingBatch,
30
32
  TrainSet,
31
33
  )
32
- from nucliadb_protos.nodereader_pb2 import StreamRequest
33
34
  from nucliadb_protos.resources_pb2 import Basic, FieldComputedMetadata
34
35
  from nucliadb_protos.utils_pb2 import ExtractedText
35
36
 
@@ -21,6 +21,7 @@
21
21
  from typing import AsyncGenerator
22
22
 
23
23
  from fastapi import HTTPException
24
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
25
 
25
26
  from nucliadb.common.cluster.base import AbstractIndexNode
26
27
  from nucliadb.train.generators.utils import batchify, get_paragraph
@@ -30,7 +31,6 @@ from nucliadb_protos.dataset_pb2 import (
30
31
  TextLabel,
31
32
  TrainSet,
32
33
  )
33
- from nucliadb_protos.nodereader_pb2 import StreamRequest
34
34
 
35
35
 
36
36
  def paragraph_classification_batch_generator(
@@ -20,6 +20,8 @@
20
20
 
21
21
  from typing import AsyncGenerator
22
22
 
23
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
+
23
25
  from nucliadb.common.cluster.base import AbstractIndexNode
24
26
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
27
  from nucliadb.train import logger
@@ -29,7 +31,6 @@ from nucliadb_protos.dataset_pb2 import (
29
31
  ParagraphStreamItem,
30
32
  TrainSet,
31
33
  )
32
- from nucliadb_protos.nodereader_pb2 import StreamRequest
33
34
 
34
35
 
35
36
  def paragraph_streaming_batch_generator(
@@ -20,6 +20,8 @@
20
20
 
21
21
  from typing import AsyncGenerator
22
22
 
23
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
+
23
25
  from nucliadb.common.cluster.base import AbstractIndexNode
24
26
  from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR, FIELD_TYPE_STR_TO_PB
25
27
  from nucliadb.train import logger
@@ -33,7 +35,6 @@ from nucliadb_protos.dataset_pb2 import (
33
35
  QuestionAnswerStreamItem,
34
36
  TrainSet,
35
37
  )
36
- from nucliadb_protos.nodereader_pb2 import StreamRequest
37
38
  from nucliadb_protos.resources_pb2 import (
38
39
  FieldID,
39
40
  QuestionAnswer,
@@ -21,6 +21,7 @@
21
21
  from typing import AsyncGenerator
22
22
 
23
23
  from fastapi import HTTPException
24
+ from nidx_protos.nodereader_pb2 import StreamRequest
24
25
 
25
26
  from nucliadb.common.cluster.base import AbstractIndexNode
26
27
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
@@ -32,7 +33,6 @@ from nucliadb_protos.dataset_pb2 import (
32
33
  SentenceClassificationBatch,
33
34
  TrainSet,
34
35
  )
35
- from nucliadb_protos.nodereader_pb2 import StreamRequest
36
36
 
37
37
 
38
38
  def sentence_classification_batch_generator(
@@ -21,6 +21,8 @@
21
21
  from collections import OrderedDict
22
22
  from typing import AsyncGenerator, cast
23
23
 
24
+ from nidx_protos.nodereader_pb2 import StreamFilter, StreamRequest
25
+
24
26
  from nucliadb.common.cluster.base import AbstractIndexNode
25
27
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
26
28
  from nucliadb.train import logger
@@ -30,7 +32,6 @@ from nucliadb_protos.dataset_pb2 import (
30
32
  TokensClassification,
31
33
  TrainSet,
32
34
  )
33
- from nucliadb_protos.nodereader_pb2 import StreamFilter, StreamRequest
34
35
 
35
36
  NERS_DICT = dict[str, dict[str, list[tuple[int, int]]]]
36
37
  POSITION_DICT = OrderedDict[tuple[int, int], tuple[str, str]]
@@ -25,7 +25,6 @@ from dataclasses import dataclass
25
25
  from datetime import datetime, timedelta
26
26
  from typing import Optional
27
27
 
28
- from async_lru import alru_cache
29
28
  from cachetools import TTLCache
30
29
  from fastapi import HTTPException, Request
31
30
 
@@ -442,29 +441,6 @@ def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
442
441
  return datetime.utcnow() + timedelta(seconds=delta_seconds)
443
442
 
444
443
 
445
- @alru_cache(maxsize=1024, ttl=60 * 15)
446
- async def get_nodes_for_kb_active_shards(context: ApplicationContext, kbid: str) -> list[str]:
447
- with back_pressure_observer({"type": "get_kb_active_shard"}):
448
- active_shard = await get_kb_active_shard(context, kbid)
449
- if active_shard is None:
450
- # KB doesn't exist or has been deleted
451
- logger.debug("No active shard found for KB", extra={"kbid": kbid})
452
- return []
453
- return [replica.node for replica in active_shard.replicas]
454
-
455
-
456
- @alru_cache(maxsize=1024, ttl=60 * 60)
457
- async def get_nodes_for_resource_shard(
458
- context: ApplicationContext, kbid: str, resource_uuid: str
459
- ) -> list[str]:
460
- with back_pressure_observer({"type": "get_resource_shard"}):
461
- resource_shard = await get_resource_shard(context, kbid, resource_uuid)
462
- if resource_shard is None:
463
- # Resource doesn't exist or KB has been deleted
464
- return []
465
- return [replica.node for replica in resource_shard.replicas]
466
-
467
-
468
444
  async def get_nats_consumer_pending_messages(
469
445
  nats_manager: NatsConnectionManager, *, stream: str, consumer: str
470
446
  ) -> int:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.7.post4114
3
+ Version: 6.3.7.post4119
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.7.post4114
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.7.post4114
25
- Requires-Dist: nucliadb-protos>=6.3.7.post4114
26
- Requires-Dist: nucliadb-models>=6.3.7.post4114
27
- Requires-Dist: nidx-protos>=6.3.7.post4114
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.7.post4119
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.7.post4119
25
+ Requires-Dist: nucliadb-protos>=6.3.7.post4119
26
+ Requires-Dist: nucliadb-models>=6.3.7.post4119
27
+ Requires-Dist: nidx-protos>=6.3.7.post4119
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]