nucliadb 6.3.5.post3865__py3-none-any.whl → 6.3.5.post3886__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,16 @@ async def get_kb_shards(
38
38
  return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
39
39
 
40
40
 
41
+ async def is_kb_shard(txn: Transaction, *, kbid: str, shard_id: str, for_update: bool = False) -> bool:
42
+ shards = await get_kb_shards(txn, kbid=kbid, for_update=for_update)
43
+ if shards is None:
44
+ return False
45
+ for shard in shards.shards:
46
+ if shard.shard == shard_id:
47
+ return True
48
+ return False
49
+
50
+
41
51
  async def update_kb_shards(txn: Transaction, *, kbid: str, shards: writer_pb2.Shards) -> None:
42
52
  key = KB_SHARDS.format(kbid=kbid)
43
53
  await txn.set(key, shards.SerializeToString())
@@ -20,7 +20,6 @@
20
20
  import argparse
21
21
  import asyncio
22
22
  import importlib.metadata
23
- from dataclasses import dataclass
24
23
  from typing import Optional
25
24
 
26
25
  from grpc.aio import AioRpcError
@@ -28,8 +27,6 @@ from grpc.aio import AioRpcError
28
27
  from nucliadb.common import datamanagers
29
28
  from nucliadb.common.cluster import manager
30
29
  from nucliadb.common.cluster.base import AbstractIndexNode
31
- from nucliadb.common.cluster.exceptions import ShardsNotFound
32
- from nucliadb.common.cluster.manager import KBShardManager
33
30
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
34
31
  from nucliadb.common.maindb.driver import Driver
35
32
  from nucliadb.common.maindb.utils import setup_driver, teardown_driver
@@ -38,17 +35,13 @@ from nucliadb.ingest import logger
38
35
  from nucliadb_telemetry import errors
39
36
  from nucliadb_telemetry.logs import setup_logging
40
37
 
41
-
42
- @dataclass
43
- class ShardLocation:
44
- kbid: str
45
- node_id: str
38
+ ShardKb = str
46
39
 
47
40
 
48
41
  UNKNOWN_KB = "unknown"
49
42
 
50
43
 
51
- async def detect_orphan_shards(driver: Driver) -> dict[str, ShardLocation]:
44
+ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardKb]:
52
45
  """Detect orphan shards in the system. An orphan shard is one indexed but
53
46
  not referenced for any stored KB.
54
47
 
@@ -58,59 +51,63 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardLocation]:
58
51
  indexed_shards = await _get_indexed_shards()
59
52
  stored_shards = await _get_stored_shards(driver)
60
53
 
61
- # Log an error in case we found a shard stored but not indexed, this should
62
- # never happen as shards are created in the index node and then stored in
63
- # maindb
54
+ # In normal conditions, this should never happen as shards are created first
55
+ # in the index and then in maindb. However, if a new shard has been created
56
+ # between index and maindb scans, we can also see it here
64
57
  not_indexed_shards = stored_shards.keys() - indexed_shards.keys()
65
58
  for shard_id in not_indexed_shards:
66
- location = stored_shards[shard_id]
67
- logger.error(
68
- "Found a shard on maindb not indexed in the index nodes",
59
+ kbid = stored_shards[shard_id]
60
+ logger.info(
61
+ "Found a shard on maindb not indexed in the index nodes. "
62
+ "This can be either a shard not indexed (error) or a brand new shard. "
63
+ "If you run purge and find it again, it's probably an error",
69
64
  extra={
70
65
  "shard_id": shard_id,
71
- "kbid": location.kbid,
72
- "node_id": location.node_id,
66
+ "kbid": kbid,
73
67
  },
74
68
  )
75
69
 
76
70
  orphan_shard_ids = indexed_shards.keys() - stored_shards.keys()
77
- orphan_shards: dict[str, ShardLocation] = {}
71
+ orphan_shards: dict[str, ShardKb] = {}
78
72
  node = manager.get_nidx_fake_node()
79
- async with datamanagers.with_ro_transaction() as txn:
80
- for shard_id in orphan_shard_ids:
81
- kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
82
- # Shards with knwon KB ids can be checked and ignore those comming from
83
- # an ongoing migration/rollover
84
- if kbid != UNKNOWN_KB:
85
- skip = await datamanagers.rollover.is_rollover_shard(txn, kbid=kbid, shard_id=shard_id)
73
+ for shard_id in orphan_shard_ids:
74
+ kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
75
+ # Shards with knwon KB ids can be checked and ignore those comming from
76
+ # an ongoing migration/rollover (ongoing or finished)
77
+ if kbid != UNKNOWN_KB:
78
+ async with datamanagers.with_ro_transaction() as txn:
79
+ skip = await datamanagers.rollover.is_rollover_shard(
80
+ txn, kbid=kbid, shard_id=shard_id
81
+ ) or await datamanagers.cluster.is_kb_shard(txn, kbid=kbid, shard_id=shard_id)
86
82
  if skip:
87
83
  continue
88
- orphan_shards[shard_id] = ShardLocation(kbid=kbid, node_id="nidx")
84
+ orphan_shards[shard_id] = kbid
85
+
86
+ for shard_id in orphan_shard_ids:
87
+ kbid = await _get_kbid(node, shard_id) or UNKNOWN_KB
88
+ orphan_shards[shard_id] = kbid
89
89
  return orphan_shards
90
90
 
91
91
 
92
- async def _get_indexed_shards() -> dict[str, ShardLocation]:
92
+ async def _get_indexed_shards() -> dict[str, ShardKb]:
93
93
  nidx = manager.get_nidx_fake_node()
94
94
  shards = await nidx.list_shards()
95
- return {shard_id: ShardLocation(kbid=UNKNOWN_KB, node_id="nidx") for shard_id in shards}
95
+ return {shard_id: UNKNOWN_KB for shard_id in shards}
96
96
 
97
97
 
98
- async def _get_stored_shards(driver: Driver) -> dict[str, ShardLocation]:
99
- stored_shards: dict[str, ShardLocation] = {}
100
- shards_manager = KBShardManager()
98
+ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
99
+ stored_shards: dict[str, ShardKb] = {}
101
100
 
102
101
  async with driver.transaction(read_only=True) as txn:
103
102
  async for kbid, _ in datamanagers.kb.get_kbs(txn):
104
- try:
105
- kb_shards = await shards_manager.get_shards_by_kbid(kbid)
106
- except ShardsNotFound:
103
+ kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
104
+ if kb_shards is None:
107
105
  logger.warning("KB not found while looking for orphan shards", extra={"kbid": kbid})
108
106
  continue
109
- else:
110
- for shard_object_pb in kb_shards:
111
- stored_shards[shard_object_pb.nidx_shard_id] = ShardLocation(
112
- kbid=kbid, node_id="nidx"
113
- )
107
+
108
+ for shard_object_pb in kb_shards.shards:
109
+ stored_shards[shard_object_pb.nidx_shard_id] = kbid
110
+
114
111
  return stored_shards
115
112
 
116
113
 
@@ -120,7 +117,7 @@ async def _get_kbid(node: AbstractIndexNode, shard_id: str) -> Optional[str]:
120
117
  shard_pb = await node.get_shard(shard_id)
121
118
  except AioRpcError as grpc_error:
122
119
  logger.error(
123
- "Can't get shard while looking for orphans in index nodes, is it broken?",
120
+ "Can't get shard while looking for orphans in nidx, is there something broken?",
124
121
  exc_info=grpc_error,
125
122
  extra={
126
123
  "node_id": node.id,
@@ -136,11 +133,11 @@ async def report_orphan_shards(driver: Driver):
136
133
  orphan_shards = await detect_orphan_shards(driver)
137
134
  logger.info(f"Found {len(orphan_shards)} orphan shards")
138
135
  async with driver.transaction(read_only=True) as txn:
139
- for shard_id, location in orphan_shards.items():
140
- if location.kbid == UNKNOWN_KB:
136
+ for shard_id, kbid in orphan_shards.items():
137
+ if kbid == UNKNOWN_KB:
141
138
  msg = "Found orphan shard but could not get KB info"
142
139
  else:
143
- kb_exists = await datamanagers.kb.exists_kb(txn, kbid=location.kbid)
140
+ kb_exists = await datamanagers.kb.exists_kb(txn, kbid=kbid)
144
141
  if kb_exists:
145
142
  msg = "Found orphan shard for existing KB"
146
143
  else:
@@ -150,8 +147,7 @@ async def report_orphan_shards(driver: Driver):
150
147
  msg,
151
148
  extra={
152
149
  "shard_id": shard_id,
153
- "kbid": location.kbid,
154
- "node_id": location.node_id,
150
+ "kbid": kbid,
155
151
  },
156
152
  )
157
153
 
@@ -161,13 +157,12 @@ async def purge_orphan_shards(driver: Driver):
161
157
  logger.info(f"Found {len(orphan_shards)} orphan shards. Purge starts...")
162
158
 
163
159
  node = manager.get_nidx_fake_node()
164
- for shard_id, location in orphan_shards.items():
160
+ for shard_id, kbid in orphan_shards.items():
165
161
  logger.info(
166
162
  "Deleting orphan shard from index node",
167
163
  extra={
168
164
  "shard_id": shard_id,
169
- "kbid": location.kbid,
170
- "node_id": location.node_id,
165
+ "kbid": kbid,
171
166
  },
172
167
  )
173
168
  await node.delete_shard(shard_id)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.5.post3865
3
+ Version: 6.3.5.post3886
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,14 +20,14 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3865
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3865
25
- Requires-Dist: nucliadb-protos>=6.3.5.post3865
26
- Requires-Dist: nucliadb-models>=6.3.5.post3865
27
- Requires-Dist: nidx-protos>=6.3.5.post3865
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3886
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3886
25
+ Requires-Dist: nucliadb-protos>=6.3.5.post3886
26
+ Requires-Dist: nucliadb-models>=6.3.5.post3886
27
+ Requires-Dist: nidx-protos>=6.3.5.post3886
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
- Requires-Dist: uvicorn
30
+ Requires-Dist: uvicorn[standard]
31
31
  Requires-Dist: argdantic
32
32
  Requires-Dist: aiohttp>=3.11.11
33
33
  Requires-Dist: lru-dict>=1.1.7
@@ -72,7 +72,7 @@ nucliadb/common/context/__init__.py,sha256=ZLUvKuIPaolKeA3aeZa2JcHwCIaEauNu8WpdK
72
72
  nucliadb/common/context/fastapi.py,sha256=j3HZ3lne6mIfw1eEar2het8RWzv6UruUZpXaKieSLOs,1527
73
73
  nucliadb/common/datamanagers/__init__.py,sha256=jksw4pXyXb05SG3EN-BPBrhc1u1Ge_m21PYqD7NYQEs,2118
74
74
  nucliadb/common/datamanagers/atomic.py,sha256=WihdtBWQIAuElZQjh1xQ--q5dJowwlkovqsW-OB_t2k,3230
75
- nucliadb/common/datamanagers/cluster.py,sha256=psTwAWSLj83vhFnC1iJJ6holrolAI4nKos9PuEWspYY,1500
75
+ nucliadb/common/datamanagers/cluster.py,sha256=iU0b7AESm1Yi8Wp3pIKgqixZGNMjeBrxSpvEKsaZKgY,1831
76
76
  nucliadb/common/datamanagers/entities.py,sha256=gI-0mbMlqrr9FiyhexEh6czhgYcMxE2s9m4o866EK9o,5340
77
77
  nucliadb/common/datamanagers/exceptions.py,sha256=Atz_PP_GGq4jgJaWcAkcRbHBoBaGcC9yJvFteylKtTE,883
78
78
  nucliadb/common/datamanagers/fields.py,sha256=9KqBzTssAT68FR5hd17Xu_CSwAYdKFuYic1ITnrfFNc,3971
@@ -170,7 +170,7 @@ nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,28
170
170
  nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
171
171
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
172
172
  nucliadb/purge/__init__.py,sha256=UXbto56EWYLwZj6uEc-flQVe3gDDNFtM6EV-aIkryPU,12353
173
- nucliadb/purge/orphan_shards.py,sha256=AU1Jfc4qtQFasB6dkuGz0a_Zjs-i7liUTf7Xpl8R8ng,7939
173
+ nucliadb/purge/orphan_shards.py,sha256=WSQmVQJQ-rAoQAmypOUJBpDP-9VJchWao3ZLTzHdJ1U,7764
174
174
  nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
175
175
  nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
176
176
  nucliadb/reader/lifecycle.py,sha256=5jYyzMD1tpIh-OYbQoNMjKZ0-3D9KFnULa3B_Vf2xyY,1740
@@ -355,8 +355,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
355
355
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
356
356
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
357
357
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
358
- nucliadb-6.3.5.post3865.dist-info/METADATA,sha256=Rc22hxCQhijcEqGOWxspP2qpzON-fTJFhZjRg9VAxzY,4291
359
- nucliadb-6.3.5.post3865.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
360
- nucliadb-6.3.5.post3865.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
361
- nucliadb-6.3.5.post3865.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
362
- nucliadb-6.3.5.post3865.dist-info/RECORD,,
358
+ nucliadb-6.3.5.post3886.dist-info/METADATA,sha256=fshbhRod9yW4otVjd60tvgPUwOAklTc0Wh8VTcp-lfo,4301
359
+ nucliadb-6.3.5.post3886.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
360
+ nucliadb-6.3.5.post3886.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
361
+ nucliadb-6.3.5.post3886.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
362
+ nucliadb-6.3.5.post3886.dist-info/RECORD,,