nucliadb 6.9.1.post5180__py3-none-any.whl → 6.9.2.post5282__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

Files changed (35) hide show
  1. migrations/pg/0010_shards_index.py +34 -0
  2. nucliadb/common/cluster/manager.py +3 -19
  3. nucliadb/common/cluster/rebalance.py +484 -110
  4. nucliadb/common/cluster/rollover.py +29 -0
  5. nucliadb/common/cluster/utils.py +26 -0
  6. nucliadb/common/datamanagers/atomic.py +6 -0
  7. nucliadb/common/filter_expression.py +15 -32
  8. nucliadb/ingest/consumer/service.py +1 -2
  9. nucliadb/ingest/consumer/shard_creator.py +16 -5
  10. nucliadb/ingest/fields/base.py +0 -17
  11. nucliadb/ingest/orm/knowledgebox.py +78 -29
  12. nucliadb/ingest/orm/processor/processor.py +21 -16
  13. nucliadb/ingest/service/writer.py +12 -5
  14. nucliadb/migrator/datamanager.py +1 -7
  15. nucliadb/purge/__init__.py +2 -7
  16. nucliadb/reader/api/v1/learning_config.py +21 -0
  17. nucliadb/search/api/v1/find.py +1 -4
  18. nucliadb/search/api/v1/resource/ask.py +21 -1
  19. nucliadb/search/api/v1/search.py +1 -4
  20. nucliadb/search/search/chat/ask.py +0 -1
  21. nucliadb/search/search/chat/prompt.py +45 -13
  22. nucliadb/search/search/chat/query.py +0 -1
  23. nucliadb/search/search/find.py +1 -6
  24. nucliadb/search/search/query.py +0 -23
  25. nucliadb/search/search/query_parser/models.py +0 -1
  26. nucliadb/search/search/query_parser/parsers/catalog.py +2 -2
  27. nucliadb/search/search/query_parser/parsers/find.py +0 -8
  28. nucliadb/search/search/query_parser/parsers/search.py +0 -8
  29. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -11
  30. nucliadb/writer/api/v1/knowledgebox.py +15 -22
  31. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/METADATA +8 -9
  32. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/RECORD +35 -34
  33. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/WHEEL +0 -0
  34. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/entry_points.txt +0 -0
  35. {nucliadb-6.9.1.post5180.dist-info → nucliadb-6.9.2.post5282.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,34 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from nucliadb.common.maindb.pg import PGTransaction
22
+
23
+
24
+ async def migrate(txn: PGTransaction) -> None:
25
+ # Concurrent index must be created outside of a transaction but psycopg automatically
26
+ # creates transactions. We temporarily disable this for building indexes.
27
+ await txn.connection.commit()
28
+ try:
29
+ await txn.connection.set_autocommit(True)
30
+ await txn.connection.execute(
31
+ "CREATE INDEX CONCURRENTLY ON resources (key, value) WHERE key ~ '/kbs/[^/]*/r/[^/]*/shard$';"
32
+ )
33
+ finally:
34
+ await txn.connection.set_autocommit(False)
@@ -43,8 +43,6 @@ from nucliadb_protos import knowledgebox_pb2, writer_pb2
43
43
  from nucliadb_telemetry import errors
44
44
  from nucliadb_utils.utilities import get_storage
45
45
 
46
- from .settings import settings
47
-
48
46
  logger = logging.getLogger(__name__)
49
47
 
50
48
 
@@ -113,6 +111,8 @@ class KBShardManager:
113
111
  self,
114
112
  txn: Transaction,
115
113
  kbid: str,
114
+ *,
115
+ prewarm_enabled: bool,
116
116
  ) -> writer_pb2.ShardObject:
117
117
  kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
118
118
  if kb_shards is None:
@@ -133,6 +133,7 @@ class KBShardManager:
133
133
  req = NewShardRequest(
134
134
  kbid=kbid,
135
135
  vectorsets_configs=vectorsets,
136
+ prewarm_enabled=prewarm_enabled,
136
137
  )
137
138
 
138
139
  resp = await nidx_api.NewShard(req) # type: ignore
@@ -232,23 +233,6 @@ class KBShardManager:
232
233
  indexpb.shard = shard.nidx_shard_id
233
234
  await nidx.index(indexpb)
234
235
 
235
- def should_create_new_shard(self, num_paragraphs: int) -> bool:
236
- return num_paragraphs > settings.max_shard_paragraphs
237
-
238
- async def maybe_create_new_shard(
239
- self,
240
- kbid: str,
241
- num_paragraphs: int,
242
- ):
243
- if not self.should_create_new_shard(num_paragraphs):
244
- return
245
-
246
- logger.info({"message": "Adding shard", "kbid": kbid})
247
-
248
- async with datamanagers.with_transaction() as txn:
249
- await self.create_shard_by_kbid(txn, kbid)
250
- await txn.commit()
251
-
252
236
  async def create_vectorset(self, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
253
237
  """Create a new vectorset in all KB shards."""
254
238