nucliadb 6.2.1.post2954__py3-none-any.whl → 6.2.1.post2972__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. nucliadb/common/cluster/manager.py +33 -331
  2. nucliadb/common/cluster/rebalance.py +2 -2
  3. nucliadb/common/cluster/rollover.py +12 -71
  4. nucliadb/common/cluster/standalone/utils.py +0 -43
  5. nucliadb/common/cluster/utils.py +0 -16
  6. nucliadb/common/nidx.py +21 -23
  7. nucliadb/health.py +0 -7
  8. nucliadb/ingest/app.py +0 -8
  9. nucliadb/ingest/consumer/auditing.py +1 -1
  10. nucliadb/ingest/consumer/shard_creator.py +1 -1
  11. nucliadb/ingest/orm/entities.py +3 -6
  12. nucliadb/purge/orphan_shards.py +6 -4
  13. nucliadb/search/api/v1/knowledgebox.py +1 -5
  14. nucliadb/search/predict.py +4 -4
  15. nucliadb/search/requesters/utils.py +1 -2
  16. nucliadb/search/search/chat/ask.py +18 -11
  17. nucliadb/search/search/chat/query.py +1 -1
  18. nucliadb/search/search/shards.py +19 -0
  19. nucliadb/standalone/introspect.py +0 -25
  20. nucliadb/train/lifecycle.py +0 -6
  21. nucliadb/train/nodes.py +1 -5
  22. nucliadb/writer/back_pressure.py +17 -46
  23. nucliadb/writer/settings.py +2 -2
  24. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/METADATA +5 -7
  25. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/RECORD +29 -39
  26. nucliadb/common/cluster/discovery/__init__.py +0 -19
  27. nucliadb/common/cluster/discovery/base.py +0 -178
  28. nucliadb/common/cluster/discovery/k8s.py +0 -301
  29. nucliadb/common/cluster/discovery/manual.py +0 -57
  30. nucliadb/common/cluster/discovery/single.py +0 -51
  31. nucliadb/common/cluster/discovery/types.py +0 -32
  32. nucliadb/common/cluster/discovery/utils.py +0 -67
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
  34. nucliadb/common/cluster/standalone/index_node.py +0 -123
  35. nucliadb/common/cluster/standalone/service.py +0 -84
  36. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/WHEEL +0 -0
  37. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/entry_points.txt +0 -0
  38. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/top_level.txt +0 -0
  39. {nucliadb-6.2.1.post2954.dist-info → nucliadb-6.2.1.post2972.dist-info}/zip-safe +0 -0
@@ -1,301 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import asyncio
21
- import concurrent.futures
22
- import logging
23
- import os
24
- import time
25
- from typing import TypedDict
26
-
27
- import kubernetes_asyncio.client # type: ignore
28
- import kubernetes_asyncio.client.models.v1_container_status # type: ignore
29
- import kubernetes_asyncio.client.models.v1_object_meta # type: ignore
30
- import kubernetes_asyncio.client.models.v1_pod # type: ignore
31
- import kubernetes_asyncio.client.models.v1_pod_status # type: ignore
32
- import kubernetes_asyncio.config # type: ignore
33
- import kubernetes_asyncio.watch # type: ignore
34
-
35
- from nucliadb.common.cluster import manager
36
- from nucliadb.common.cluster.discovery.base import (
37
- AVAILABLE_NODES,
38
- AbstractClusterDiscovery,
39
- )
40
- from nucliadb.common.cluster.discovery.types import IndexNodeMetadata
41
- from nucliadb.common.cluster.exceptions import NodeConnectionError
42
- from nucliadb.common.cluster.settings import Settings
43
-
44
- logger = logging.getLogger(__name__)
45
-
46
-
47
- class EventType(TypedDict):
48
- type: str
49
- object: kubernetes_asyncio.client.models.v1_pod.V1Pod
50
-
51
-
52
- class KubernetesDiscovery(AbstractClusterDiscovery):
53
- """
54
- Load cluster members from kubernetes.
55
- """
56
-
57
- node_heartbeat_interval = 10
58
- cluster_task: asyncio.Task
59
- update_node_data_cache_task: asyncio.Task
60
-
61
- def __init__(self, settings: Settings) -> None:
62
- super().__init__(settings)
63
- self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
64
- self.node_id_cache: dict[str, IndexNodeMetadata] = {}
65
- self.update_lock = asyncio.Lock()
66
-
67
- async def get_node_metadata(
68
- self, pod_name: str, node_ip: str, read_replica: bool
69
- ) -> IndexNodeMetadata:
70
- async with self.update_lock:
71
- if pod_name not in self.node_id_cache:
72
- self.node_id_cache[pod_name] = await self._query_node_metadata(node_ip, read_replica)
73
- else:
74
- self.node_id_cache[pod_name].address = node_ip
75
- self.node_id_cache[pod_name].updated_at = time.time()
76
- return self.node_id_cache[pod_name]
77
-
78
- async def update_node(self, event: EventType) -> None:
79
- """
80
- Update node metadata when a pod is updated.
81
-
82
- This method will update global node state by utilizing the cluster manager
83
- to add or remove nodes.
84
- """
85
- status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event["object"].status
86
- event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = event[
87
- "object"
88
- ].metadata
89
-
90
- ready = status.container_statuses is not None
91
- if event["type"] == "DELETED":
92
- ready = False
93
- elif status.container_statuses is not None:
94
- container_statuses: list[
95
- kubernetes_asyncio.client.models.v1_container_status.V1ContainerStatus
96
- ] = status.container_statuses
97
- for container_status in container_statuses:
98
- if container_status.name not in ("reader", "writer"):
99
- continue
100
- if not container_status.ready or status.pod_ip is None:
101
- ready = False
102
- break
103
-
104
- pod_name = event_metadata.name
105
- read_replica = event_metadata.labels.get("readReplica", "") == "true"
106
- if not ready:
107
- if pod_name not in self.node_id_cache:
108
- logger.debug(
109
- "Node not ready and not in cache, ignore",
110
- extra={"pod_name": pod_name},
111
- )
112
- return
113
- else:
114
- node_data = self.node_id_cache[pod_name]
115
- else:
116
- try:
117
- node_data = await self.get_node_metadata(
118
- pod_name,
119
- status.pod_ip,
120
- read_replica=read_replica,
121
- )
122
- except NodeConnectionError: # pragma: no cover
123
- logger.warning(
124
- "Error connecting to node",
125
- extra={
126
- "pod_name": pod_name,
127
- "node_ip": status.pod_ip,
128
- "read_replica": read_replica,
129
- },
130
- )
131
- raise
132
-
133
- if ready:
134
- node = manager.get_index_node(node_data.node_id)
135
- if node is None:
136
- logger.info(
137
- "Adding node",
138
- extra={
139
- "node_id": node_data.node_id,
140
- "pod_name": pod_name,
141
- "address": node_data.address,
142
- },
143
- )
144
- manager.add_index_node(
145
- id=node_data.node_id,
146
- address=node_data.address,
147
- shard_count=node_data.shard_count,
148
- available_disk=node_data.available_disk,
149
- primary_id=node_data.primary_id,
150
- )
151
- else:
152
- logger.debug(
153
- "Update node",
154
- extra={"pod_name": pod_name, "node_id": node_data.node_id},
155
- )
156
- node.address = node_data.address
157
- node.shard_count = node_data.shard_count
158
- else:
159
- node = manager.get_index_node(node_data.node_id)
160
- if node is not None:
161
- logger.info(
162
- f"Remove node",
163
- extra={
164
- "node_id": node_data.node_id,
165
- "pod_name": pod_name,
166
- "address": node.address,
167
- },
168
- )
169
- manager.remove_index_node(node_data.node_id, node_data.primary_id)
170
- if pod_name in self.node_id_cache:
171
- del self.node_id_cache[pod_name]
172
-
173
- AVAILABLE_NODES.set(len(manager.get_index_nodes()))
174
-
175
- async def watch_k8s_for_updates(self) -> None:
176
- if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount/token"):
177
- kubernetes_asyncio.config.load_incluster_config()
178
- else:
179
- await kubernetes_asyncio.config.load_kube_config()
180
-
181
- async with kubernetes_asyncio.client.ApiClient() as api:
182
- v1 = kubernetes_asyncio.client.CoreV1Api(api)
183
- watch = kubernetes_asyncio.watch.Watch()
184
- try:
185
- while True:
186
- try:
187
- async for event in watch.stream(
188
- v1.list_namespaced_pod,
189
- namespace=self.settings.cluster_discovery_kubernetes_namespace,
190
- label_selector=self.settings.cluster_discovery_kubernetes_selector,
191
- timeout_seconds=30,
192
- ):
193
- try:
194
- await self.update_node(event)
195
- except NodeConnectionError: # pragma: no cover
196
- pass
197
- except Exception: # pragma: no cover
198
- logger.exception("Error while updating node", exc_info=True)
199
- except (
200
- asyncio.CancelledError,
201
- KeyboardInterrupt,
202
- SystemExit,
203
- RuntimeError,
204
- ): # pragma: no cover
205
- return
206
- except Exception: # pragma: no cover
207
- logger.exception(
208
- "Error while watching kubernetes. Trying again in 5 seconds.",
209
- exc_info=True,
210
- )
211
- await asyncio.sleep(5)
212
- finally:
213
- watch.stop()
214
- await watch.close()
215
-
216
- def _maybe_remove_stale_node(self, pod_name: str) -> None:
217
- """
218
- This is rare but possible to reproduce under contrived API usage scenarios to
219
- get in a situation where we do not remove a node from a cluster because we missed
220
- a removal event.
221
-
222
- It seems to be possible that we miss events from kubernetes.
223
-
224
- We should view getting node metadata as a health check just in case.
225
- """
226
- if pod_name not in self.node_id_cache:
227
- return
228
-
229
- node_data = self.node_id_cache[pod_name]
230
- if time.time() - node_data.updated_at > (self.node_heartbeat_interval * 2):
231
- node = manager.get_index_node(node_data.node_id)
232
- if node is not None:
233
- logger.warning(
234
- f"Removing stale node {pod_name} {node_data.address}",
235
- extra={
236
- "node_id": node_data.node_id,
237
- "pod_name": pod_name,
238
- "address": node_data.address,
239
- },
240
- )
241
- manager.remove_index_node(node_data.node_id, node_data.primary_id)
242
- del self.node_id_cache[pod_name]
243
-
244
- async def update_node_data_cache(self) -> None:
245
- while True:
246
- await asyncio.sleep(self.node_heartbeat_interval)
247
- try:
248
- for pod_name in list(self.node_id_cache.keys()):
249
- # force updating cache
250
- async with self.update_lock:
251
- if pod_name not in self.node_id_cache:
252
- # could change in the meantime since we're waiting for lock
253
- continue
254
- existing = self.node_id_cache[pod_name]
255
- try:
256
- self.node_id_cache[pod_name] = await self._query_node_metadata(
257
- existing.address,
258
- read_replica=existing.primary_id is not None,
259
- )
260
- except NodeConnectionError: # pragma: no cover
261
- self._maybe_remove_stale_node(pod_name)
262
- except (
263
- asyncio.CancelledError,
264
- KeyboardInterrupt,
265
- SystemExit,
266
- RuntimeError,
267
- ): # pragma: no cover
268
- return
269
- except Exception: # pragma: no cover
270
- logger.exception("Error while updating shard info.")
271
-
272
- async def _wait_ready(self, max_wait: int = 60) -> None:
273
- """
274
- Attempt to wait for the cluster to be ready.
275
- Since we don't know the number of nodes that the cluster will have, we assume
276
- that the cluster is ready when the number of nodes is stable for 3 consecutive checks.
277
- """
278
- ready = False
279
- success = 0
280
- start = time.monotonic()
281
- logger.info("Waiting for cluster to be ready.")
282
- while time.monotonic() - start < max_wait:
283
- await asyncio.sleep(0.25)
284
- if len(manager.get_index_nodes()) > 0:
285
- success += 1
286
- else:
287
- success = 0
288
- if success >= 3:
289
- ready = True
290
- break
291
- if not ready:
292
- logger.warning(f"Cluster not ready after {max_wait} seconds.")
293
-
294
- async def initialize(self) -> None:
295
- self.cluster_task = asyncio.create_task(self.watch_k8s_for_updates())
296
- self.update_node_data_cache_task = asyncio.create_task(self.update_node_data_cache())
297
- await self._wait_ready()
298
-
299
- async def finalize(self) -> None:
300
- self.cluster_task.cancel()
301
- self.update_node_data_cache_task.cancel()
@@ -1,57 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import asyncio
21
- import logging
22
-
23
- from nucliadb.common.cluster.discovery.base import (
24
- AbstractClusterDiscovery,
25
- update_members,
26
- )
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
-
31
- class ManualDiscovery(AbstractClusterDiscovery):
32
- """
33
- Manual provide all cluster members addresses to load information from.
34
- """
35
-
36
- async def discover(self) -> None:
37
- members = []
38
- for address in self.settings.cluster_discovery_manual_addresses:
39
- members.append(await self._query_node_metadata(address))
40
- update_members(members)
41
-
42
- async def watch(self) -> None:
43
- while True:
44
- try:
45
- await self.discover()
46
- except asyncio.CancelledError:
47
- return
48
- except Exception:
49
- logger.exception("Error while watching cluster members. Will retry at started interval")
50
- finally:
51
- await asyncio.sleep(15)
52
-
53
- async def initialize(self) -> None:
54
- self.task = asyncio.create_task(self.watch())
55
-
56
- async def finalize(self) -> None:
57
- self.task.cancel()
@@ -1,51 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import logging
21
-
22
- from nucliadb.common.cluster.discovery.base import (
23
- AbstractClusterDiscovery,
24
- update_members,
25
- )
26
- from nucliadb.common.cluster.discovery.types import IndexNodeMetadata
27
- from nucliadb.common.cluster.standalone.utils import get_self
28
-
29
- logger = logging.getLogger(__name__)
30
-
31
-
32
- class SingleNodeDiscovery(AbstractClusterDiscovery):
33
- """
34
- When there is no cluster and ndb is running as a single node.
35
- """
36
-
37
- async def initialize(self) -> None:
38
- self_node = get_self()
39
- update_members(
40
- [
41
- IndexNodeMetadata(
42
- node_id=self_node.id,
43
- name=self_node.id,
44
- address=self_node.address,
45
- shard_count=self_node.shard_count,
46
- available_disk=self_node.available_disk,
47
- )
48
- ]
49
- )
50
-
51
- async def finalize(self) -> None: ...
@@ -1,32 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- import time
20
- from dataclasses import dataclass, field
21
- from typing import Optional
22
-
23
-
24
- @dataclass
25
- class IndexNodeMetadata:
26
- node_id: str
27
- name: str
28
- address: str
29
- shard_count: int
30
- available_disk: int
31
- primary_id: Optional[str] = None
32
- updated_at: float = field(default_factory=time.time)
@@ -1,67 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- from typing import Type, Union
22
-
23
- from nucliadb.common.cluster.discovery.base import AbstractClusterDiscovery
24
- from nucliadb.common.cluster.discovery.k8s import KubernetesDiscovery
25
- from nucliadb.common.cluster.discovery.manual import ManualDiscovery
26
- from nucliadb.common.cluster.discovery.single import SingleNodeDiscovery
27
- from nucliadb.common.cluster.settings import ClusterDiscoveryMode, settings
28
- from nucliadb_utils.utilities import clean_utility, get_utility, set_utility
29
-
30
- UTIL_NAME = "cluster-discovery"
31
-
32
-
33
- _setup_lock = asyncio.Lock()
34
-
35
-
36
- async def setup_cluster_discovery() -> None:
37
- async with _setup_lock:
38
- util = get_utility(UTIL_NAME)
39
- if util is not None:
40
- # already loaded
41
- return util
42
-
43
- klass: Union[Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]]
44
- if settings.cluster_discovery_mode == ClusterDiscoveryMode.MANUAL:
45
- klass = ManualDiscovery
46
- elif settings.cluster_discovery_mode == ClusterDiscoveryMode.KUBERNETES:
47
- klass = KubernetesDiscovery
48
- elif settings.cluster_discovery_mode == ClusterDiscoveryMode.SINGLE_NODE:
49
- klass = SingleNodeDiscovery
50
- else:
51
- raise NotImplementedError(
52
- f"Cluster discovery mode {settings.cluster_discovery_mode} not implemented"
53
- )
54
-
55
- disc = klass(settings)
56
- await disc.initialize()
57
- set_utility(UTIL_NAME, disc)
58
-
59
-
60
- async def teardown_cluster_discovery() -> None:
61
- util: AbstractClusterDiscovery = get_utility(UTIL_NAME)
62
- if util is None:
63
- # already loaded
64
- return util
65
-
66
- await util.finalize()
67
- clean_utility(UTIL_NAME)