nucliadb 6.7.2.post4884__py3-none-any.whl → 6.7.2.post4889__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +4 -0
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/ingest/consumer/pull.py +7 -0
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/settings.py +26 -12
- {nucliadb-6.7.2.post4884.dist-info → nucliadb-6.7.2.post4889.dist-info}/METADATA +6 -6
- {nucliadb-6.7.2.post4884.dist-info → nucliadb-6.7.2.post4889.dist-info}/RECORD +11 -11
- {nucliadb-6.7.2.post4884.dist-info → nucliadb-6.7.2.post4889.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4884.dist-info → nucliadb-6.7.2.post4889.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4884.dist-info → nucliadb-6.7.2.post4889.dist-info}/top_level.txt +0 -0
|
@@ -21,6 +21,10 @@ class ClientException(Exception):
|
|
|
21
21
|
pass
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
class ServerException(Exception):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
24
28
|
class NotFoundException(ClientException):
|
|
25
29
|
pass
|
|
26
30
|
|
|
@@ -35,3 +39,7 @@ class RateLimitException(ClientException):
|
|
|
35
39
|
|
|
36
40
|
class AccountLimitException(ClientException):
|
|
37
41
|
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ServiceUnavailableException(ServerException):
|
|
45
|
+
pass
|
|
@@ -209,6 +209,10 @@ class ProcessingHTTPClient:
|
|
|
209
209
|
async def close(self):
|
|
210
210
|
await self.session.close()
|
|
211
211
|
|
|
212
|
+
async def reset_session(self):
|
|
213
|
+
await self.close()
|
|
214
|
+
self.session = aiohttp.ClientSession()
|
|
215
|
+
|
|
212
216
|
async def in_progress(self, ack_token: str):
|
|
213
217
|
url = self.base_url_v2 + "/pull/in_progress"
|
|
214
218
|
request = InProgressRequest(ack=[ack_token])
|
|
@@ -33,5 +33,8 @@ def check_status(resp: aiohttp.ClientResponse, resp_text: str) -> None:
|
|
|
33
33
|
raise exceptions.AuthorizationException(f"Unauthorized to access: {resp.status}")
|
|
34
34
|
elif resp.status == 429:
|
|
35
35
|
raise exceptions.RateLimitException("Rate limited")
|
|
36
|
+
elif resp.status in (502, 503):
|
|
37
|
+
# Service unavailable, can be retried
|
|
38
|
+
raise exceptions.ServiceUnavailableException(f"Service unavailable: {resp.status} - {resp_text}")
|
|
36
39
|
else:
|
|
37
40
|
raise exceptions.ClientException(f"Unknown error: {resp.status} - {resp_text}")
|
nucliadb/ingest/consumer/pull.py
CHANGED
|
@@ -31,6 +31,7 @@ from opentelemetry.trace import (
|
|
|
31
31
|
Link,
|
|
32
32
|
)
|
|
33
33
|
|
|
34
|
+
from nucliadb.common.http_clients.exceptions import ServiceUnavailableException
|
|
34
35
|
from nucliadb.common.http_clients.processing import (
|
|
35
36
|
ProcessingHTTPClient,
|
|
36
37
|
ProcessingPullMessageProgressUpdater,
|
|
@@ -209,6 +210,12 @@ class PullV2Worker:
|
|
|
209
210
|
payload_length = len(base64.b64decode(data.payload))
|
|
210
211
|
logger.error(f"Message too big for transaction: {payload_length}")
|
|
211
212
|
raise e
|
|
213
|
+
|
|
214
|
+
except ServiceUnavailableException as ex:
|
|
215
|
+
logger.warning(f"Processing api is unavailable, will retry shortly: {ex}")
|
|
216
|
+
await processing_http_client.reset_session()
|
|
217
|
+
await asyncio.sleep(self.pull_time_error_backoff)
|
|
218
|
+
|
|
212
219
|
except Exception:
|
|
213
220
|
logger.exception("Unhandled error pulling messages from processing")
|
|
214
221
|
await asyncio.sleep(self.pull_time_error_backoff)
|
nucliadb/ingest/partitions.py
CHANGED
|
@@ -25,12 +25,17 @@ from nucliadb.ingest.settings import Settings
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def assign_partitions(settings: Settings):
|
|
28
|
+
"""
|
|
29
|
+
This function dynamically assigns the partitions to the current ingest sts
|
|
30
|
+
replica based on its hostname, typically (ingest-0, ingest-1, etc).
|
|
31
|
+
"""
|
|
28
32
|
# partitions start from 1, instead of 0
|
|
29
33
|
all_partitions = [str(part + 1) for part in range(settings.nuclia_partitions)]
|
|
30
34
|
|
|
31
35
|
# get replica number and total replicas from environment
|
|
32
36
|
logger.info(f"PARTITIONS: Total Replicas = {settings.total_replicas}")
|
|
33
37
|
if settings.replica_number == -1:
|
|
38
|
+
# Get replica number from hostname
|
|
34
39
|
hostname = os.environ.get("HOSTNAME")
|
|
35
40
|
if hostname is not None:
|
|
36
41
|
sts_values = hostname.split("-")
|
|
@@ -39,10 +44,16 @@ def assign_partitions(settings: Settings):
|
|
|
39
44
|
settings.replica_number = int(sts_values[-1])
|
|
40
45
|
except Exception:
|
|
41
46
|
logger.error(f"Could not extract replica number from hostname: {hostname}")
|
|
42
|
-
|
|
47
|
+
else:
|
|
48
|
+
logger.warning(f"Could not determine replica number from hostname: {hostname}")
|
|
49
|
+
else:
|
|
50
|
+
logger.warning(f"Could not determine replica number from hostname.")
|
|
43
51
|
|
|
44
52
|
if settings.replica_number == -1:
|
|
45
53
|
settings.replica_number = 0
|
|
54
|
+
else:
|
|
55
|
+
# We assume that replica numbers are set manually via env variables
|
|
56
|
+
pass
|
|
46
57
|
logger.info(f"PARTITIONS: Replica Number = {settings.replica_number}")
|
|
47
58
|
|
|
48
59
|
# calculate assigned partitions based on total replicas and own replica number
|
nucliadb/ingest/settings.py
CHANGED
|
@@ -75,26 +75,40 @@ class ProcessingPullMode(Enum):
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class Settings(DriverSettings):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
partitions: list[str] = ["1"]
|
|
81
|
-
|
|
78
|
+
# Pull worker settings
|
|
82
79
|
pull_time_error_backoff: int = 30
|
|
83
80
|
pull_api_timeout: int = 60
|
|
84
|
-
disable_pull_worker: bool =
|
|
81
|
+
disable_pull_worker: bool = Field(
|
|
82
|
+
default=False, description="Set to true to disable the pull worker task"
|
|
83
|
+
)
|
|
85
84
|
|
|
86
|
-
#
|
|
87
|
-
replica_number: int =
|
|
88
|
-
|
|
89
|
-
|
|
85
|
+
# Ingest consumer sts replica settings
|
|
86
|
+
replica_number: int = Field(
|
|
87
|
+
default=-1,
|
|
88
|
+
description="The replica number of this ingest statefulset instance. Leave to -1 to auto-assign based on hostname.",
|
|
89
|
+
)
|
|
90
|
+
total_replicas: int = Field(default=1, description="Number of ingest statefulset replicas deployed")
|
|
91
|
+
nuclia_partitions: int = Field(
|
|
92
|
+
default=50, description="Total number of partitions of the nats stream."
|
|
93
|
+
)
|
|
94
|
+
partitions: list[str] = Field(
|
|
95
|
+
default=["1"],
|
|
96
|
+
description="List of partitions assigned to this ingest statefulset instance. This is automatically assigned based on the replica number and total replicas.",
|
|
97
|
+
)
|
|
98
|
+
max_concurrent_ingest_processing: int = Field(
|
|
99
|
+
default=5,
|
|
100
|
+
description="Controls the number of concurrent messages from different partitions that can be processed at the same time by ingest statefulset consumers.",
|
|
101
|
+
)
|
|
90
102
|
|
|
91
|
-
|
|
103
|
+
# Grpc server settings
|
|
104
|
+
grpc_port: int = 8030
|
|
105
|
+
max_receive_message_length: int = Field(
|
|
106
|
+
default=500, description="Maximum receive grpc message length in MB."
|
|
107
|
+
)
|
|
92
108
|
|
|
93
109
|
# Search query timeouts
|
|
94
110
|
relation_search_timeout: float = 10.0
|
|
95
111
|
relation_types_timeout: float = 10.0
|
|
96
112
|
|
|
97
|
-
max_concurrent_ingest_processing: int = 5
|
|
98
|
-
|
|
99
113
|
|
|
100
114
|
settings = Settings()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.7.2.
|
|
3
|
+
Version: 6.7.2.post4889
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.7.2.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.7.2.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.7.2.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4889
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4889
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.7.2.post4889
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.7.2.post4889
|
|
26
|
+
Requires-Dist: nidx-protos>=6.7.2.post4889
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.46.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|
|
@@ -109,10 +109,10 @@ nucliadb/common/external_index_providers/pinecone.py,sha256=PB0lUBBZyI9qcyRxtoi9
|
|
|
109
109
|
nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXTlKO3AgUxNa-6GeAZG25Njis,2002
|
|
110
110
|
nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
111
111
|
nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
|
|
112
|
-
nucliadb/common/http_clients/exceptions.py,sha256=
|
|
113
|
-
nucliadb/common/http_clients/processing.py,sha256=
|
|
112
|
+
nucliadb/common/http_clients/exceptions.py,sha256=HniqLZEZN9BNfVv-AaBLpRyb8wpXzMpZNP5oANJYE6M,1208
|
|
113
|
+
nucliadb/common/http_clients/processing.py,sha256=lIcR-Z9rqSUnTw0x8SjbIfyPWgV0nTQDr4o027GFmww,9086
|
|
114
114
|
nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
|
|
115
|
-
nucliadb/common/http_clients/utils.py,sha256=
|
|
115
|
+
nucliadb/common/http_clients/utils.py,sha256=j1jikzrqPzIWQnckvQ1ANM-PkAaQAao2P94p5-PvyGM,1717
|
|
116
116
|
nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
117
117
|
nucliadb/common/maindb/driver.py,sha256=y_puOqyZj-aq2indAVbFtMPnNNc2u2MShO4SVKr5FFE,2994
|
|
118
118
|
nucliadb/common/maindb/exceptions.py,sha256=u6ZSQW6jk5QM_IL5XmQ_dF-vZ-JkuWEqZbNJ-S6FG_g,988
|
|
@@ -132,18 +132,18 @@ nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3
|
|
|
132
132
|
nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp2Q,21626
|
|
133
133
|
nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
|
|
134
134
|
nucliadb/ingest/app.py,sha256=qiPad2eWgudRdLq0tB0MQZOxOezXO7QBK_ZpPNKQZO0,7378
|
|
135
|
-
nucliadb/ingest/partitions.py,sha256=
|
|
135
|
+
nucliadb/ingest/partitions.py,sha256=c1OWrFWgadNtvghY3Fl-xlurdyV5hZpVJPEoRAsBt1k,2903
|
|
136
136
|
nucliadb/ingest/processing.py,sha256=IKXMZXIPuuojKQiXR2T5-5NwMvmUnIQIhBXUGgzyFFo,21551
|
|
137
137
|
nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
138
|
nucliadb/ingest/serialize.py,sha256=hiddxbV5gxVk8uY8-Q1AEq2DhJx5fOBP34zq5ONGgcs,16240
|
|
139
|
-
nucliadb/ingest/settings.py,sha256=
|
|
139
|
+
nucliadb/ingest/settings.py,sha256=8OJMjVVbI3OWIbZLrXBqpB79zHbbLkCSb9VJA0IzRss,4269
|
|
140
140
|
nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
|
|
141
141
|
nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
142
142
|
nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
|
|
143
143
|
nucliadb/ingest/consumer/consumer.py,sha256=1OetpJXp6glaAe4kKqUA_L46BS-ZyEccTkwt7TGf0Zw,11658
|
|
144
144
|
nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
|
|
145
145
|
nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
|
|
146
|
-
nucliadb/ingest/consumer/pull.py,sha256=
|
|
146
|
+
nucliadb/ingest/consumer/pull.py,sha256=Ki_aHi72W83yD03lPt6Yz2l_uCeu62fd4upEMcOZcm4,9201
|
|
147
147
|
nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe05-bomA,6543
|
|
148
148
|
nucliadb/ingest/consumer/shard_creator.py,sha256=UKIk0yaS_jC_nGQqymn9NGJWzwZEqhIr0gznJYorlAE,4348
|
|
149
149
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
|
@@ -376,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
|
376
376
|
nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
|
|
377
377
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
|
378
378
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
|
379
|
-
nucliadb-6.7.2.
|
|
380
|
-
nucliadb-6.7.2.
|
|
381
|
-
nucliadb-6.7.2.
|
|
382
|
-
nucliadb-6.7.2.
|
|
383
|
-
nucliadb-6.7.2.
|
|
379
|
+
nucliadb-6.7.2.post4889.dist-info/METADATA,sha256=n5UTXqF3fg0n3X1orrwnWa9yaPykhsM2ntFhyJm-Z8E,4158
|
|
380
|
+
nucliadb-6.7.2.post4889.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
381
|
+
nucliadb-6.7.2.post4889.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
|
382
|
+
nucliadb-6.7.2.post4889.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
|
383
|
+
nucliadb-6.7.2.post4889.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|