nucliadb 6.7.2.post4882__py3-none-any.whl → 6.7.2.post4889__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

@@ -21,6 +21,10 @@ class ClientException(Exception):
21
21
  pass
22
22
 
23
23
 
24
+ class ServerException(Exception):
25
+ pass
26
+
27
+
24
28
  class NotFoundException(ClientException):
25
29
  pass
26
30
 
@@ -35,3 +39,7 @@ class RateLimitException(ClientException):
35
39
 
36
40
  class AccountLimitException(ClientException):
37
41
  pass
42
+
43
+
44
+ class ServiceUnavailableException(ServerException):
45
+ pass
@@ -209,6 +209,10 @@ class ProcessingHTTPClient:
209
209
  async def close(self):
210
210
  await self.session.close()
211
211
 
212
+ async def reset_session(self):
213
+ await self.close()
214
+ self.session = aiohttp.ClientSession()
215
+
212
216
  async def in_progress(self, ack_token: str):
213
217
  url = self.base_url_v2 + "/pull/in_progress"
214
218
  request = InProgressRequest(ack=[ack_token])
@@ -33,5 +33,8 @@ def check_status(resp: aiohttp.ClientResponse, resp_text: str) -> None:
33
33
  raise exceptions.AuthorizationException(f"Unauthorized to access: {resp.status}")
34
34
  elif resp.status == 429:
35
35
  raise exceptions.RateLimitException("Rate limited")
36
+ elif resp.status in (502, 503):
37
+ # Service unavailable, can be retried
38
+ raise exceptions.ServiceUnavailableException(f"Service unavailable: {resp.status} - {resp_text}")
36
39
  else:
37
40
  raise exceptions.ClientException(f"Unknown error: {resp.status} - {resp_text}")
@@ -31,6 +31,7 @@ from opentelemetry.trace import (
31
31
  Link,
32
32
  )
33
33
 
34
+ from nucliadb.common.http_clients.exceptions import ServiceUnavailableException
34
35
  from nucliadb.common.http_clients.processing import (
35
36
  ProcessingHTTPClient,
36
37
  ProcessingPullMessageProgressUpdater,
@@ -209,6 +210,12 @@ class PullV2Worker:
209
210
  payload_length = len(base64.b64decode(data.payload))
210
211
  logger.error(f"Message too big for transaction: {payload_length}")
211
212
  raise e
213
+
214
+ except ServiceUnavailableException as ex:
215
+ logger.warning(f"Processing api is unavailable, will retry shortly: {ex}")
216
+ await processing_http_client.reset_session()
217
+ await asyncio.sleep(self.pull_time_error_backoff)
218
+
212
219
  except Exception:
213
220
  logger.exception("Unhandled error pulling messages from processing")
214
221
  await asyncio.sleep(self.pull_time_error_backoff)
@@ -25,12 +25,17 @@ from nucliadb.ingest.settings import Settings
25
25
 
26
26
 
27
27
  def assign_partitions(settings: Settings):
28
+ """
29
+ This function dynamically assigns the partitions to the current ingest sts
30
+ replica based on its hostname, typically (ingest-0, ingest-1, etc).
31
+ """
28
32
  # partitions start from 1, instead of 0
29
33
  all_partitions = [str(part + 1) for part in range(settings.nuclia_partitions)]
30
34
 
31
35
  # get replica number and total replicas from environment
32
36
  logger.info(f"PARTITIONS: Total Replicas = {settings.total_replicas}")
33
37
  if settings.replica_number == -1:
38
+ # Get replica number from hostname
34
39
  hostname = os.environ.get("HOSTNAME")
35
40
  if hostname is not None:
36
41
  sts_values = hostname.split("-")
@@ -39,10 +44,16 @@ def assign_partitions(settings: Settings):
39
44
  settings.replica_number = int(sts_values[-1])
40
45
  except Exception:
41
46
  logger.error(f"Could not extract replica number from hostname: {hostname}")
42
- pass
47
+ else:
48
+ logger.warning(f"Could not determine replica number from hostname: {hostname}")
49
+ else:
50
+ logger.warning(f"Could not determine replica number from hostname.")
43
51
 
44
52
  if settings.replica_number == -1:
45
53
  settings.replica_number = 0
54
+ else:
55
+ # We assume that replica numbers are set manually via env variables
56
+ pass
46
57
  logger.info(f"PARTITIONS: Replica Number = {settings.replica_number}")
47
58
 
48
59
  # calculate assigned partitions based on total replicas and own replica number
@@ -75,26 +75,40 @@ class ProcessingPullMode(Enum):
75
75
 
76
76
 
77
77
  class Settings(DriverSettings):
78
- grpc_port: int = 8030
79
-
80
- partitions: list[str] = ["1"]
81
-
78
+ # Pull worker settings
82
79
  pull_time_error_backoff: int = 30
83
80
  pull_api_timeout: int = 60
84
- disable_pull_worker: bool = False
81
+ disable_pull_worker: bool = Field(
82
+ default=False, description="Set to true to disable the pull worker task"
83
+ )
85
84
 
86
- # ingest consumer sts replica settings
87
- replica_number: int = -1
88
- total_replicas: int = 1 # number of ingest processor replicas in the cluster
89
- nuclia_partitions: int = 50
85
+ # Ingest consumer sts replica settings
86
+ replica_number: int = Field(
87
+ default=-1,
88
+ description="The replica number of this ingest statefulset instance. Leave to -1 to auto-assign based on hostname.",
89
+ )
90
+ total_replicas: int = Field(default=1, description="Number of ingest statefulset replicas deployed")
91
+ nuclia_partitions: int = Field(
92
+ default=50, description="Total number of partitions of the nats stream."
93
+ )
94
+ partitions: list[str] = Field(
95
+ default=["1"],
96
+ description="List of partitions assigned to this ingest statefulset instance. This is automatically assigned based on the replica number and total replicas.",
97
+ )
98
+ max_concurrent_ingest_processing: int = Field(
99
+ default=5,
100
+ description="Controls the number of concurrent messages from different partitions that can be processed at the same time by ingest statefulset consumers.",
101
+ )
90
102
 
91
- max_receive_message_length: int = 500 # In MB
103
+ # Grpc server settings
104
+ grpc_port: int = 8030
105
+ max_receive_message_length: int = Field(
106
+ default=500, description="Maximum receive grpc message length in MB."
107
+ )
92
108
 
93
109
  # Search query timeouts
94
110
  relation_search_timeout: float = 10.0
95
111
  relation_types_timeout: float = 10.0
96
112
 
97
- max_concurrent_ingest_processing: int = 5
98
-
99
113
 
100
114
  settings = Settings()
@@ -116,6 +116,9 @@ def run():
116
116
  if nuclia_settings.nuclia_service_account:
117
117
  settings_to_output["NUA API key"] = "Configured ✔"
118
118
  settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
119
+ settings_to_output["NUA API url"] = (
120
+ nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone) + "/api"
121
+ )
119
122
 
120
123
  settings_to_output_fmted = "\n".join(
121
124
  [f"|| - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.7.2.post4882
3
+ Version: 6.7.2.post4889
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4882
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4882
24
- Requires-Dist: nucliadb-protos>=6.7.2.post4882
25
- Requires-Dist: nucliadb-models>=6.7.2.post4882
26
- Requires-Dist: nidx-protos>=6.7.2.post4882
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4889
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4889
24
+ Requires-Dist: nucliadb-protos>=6.7.2.post4889
25
+ Requires-Dist: nucliadb-models>=6.7.2.post4889
26
+ Requires-Dist: nidx-protos>=6.7.2.post4889
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.46.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -109,10 +109,10 @@ nucliadb/common/external_index_providers/pinecone.py,sha256=PB0lUBBZyI9qcyRxtoi9
109
109
  nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXTlKO3AgUxNa-6GeAZG25Njis,2002
110
110
  nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
111
111
  nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
112
- nucliadb/common/http_clients/exceptions.py,sha256=47Y8OjkaGV_F18G07FpJhOzgWKUIexhlILyuVtICz8s,1100
113
- nucliadb/common/http_clients/processing.py,sha256=mKd9vRK-Wb71UG2LCoGu47wmnN5krqA0D1Z8vitsBPE,8976
112
+ nucliadb/common/http_clients/exceptions.py,sha256=HniqLZEZN9BNfVv-AaBLpRyb8wpXzMpZNP5oANJYE6M,1208
113
+ nucliadb/common/http_clients/processing.py,sha256=lIcR-Z9rqSUnTw0x8SjbIfyPWgV0nTQDr4o027GFmww,9086
114
114
  nucliadb/common/http_clients/pypi.py,sha256=VHIUjwJEJVntVUo_FRoXIo8sLmluy7sa9-iXSITcrMY,1540
115
- nucliadb/common/http_clients/utils.py,sha256=yGUkHNS41abHiBoHqo_Mg3QSqGsS7rUtbfGftbEC57U,1529
115
+ nucliadb/common/http_clients/utils.py,sha256=j1jikzrqPzIWQnckvQ1ANM-PkAaQAao2P94p5-PvyGM,1717
116
116
  nucliadb/common/maindb/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
117
117
  nucliadb/common/maindb/driver.py,sha256=y_puOqyZj-aq2indAVbFtMPnNNc2u2MShO4SVKr5FFE,2994
118
118
  nucliadb/common/maindb/exceptions.py,sha256=u6ZSQW6jk5QM_IL5XmQ_dF-vZ-JkuWEqZbNJ-S6FG_g,988
@@ -132,18 +132,18 @@ nucliadb/export_import/tasks.py,sha256=DWbdqY97ffoyfipelGXz3Jqz1iam6JCjQSh367Fc3
132
132
  nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp2Q,21626
133
133
  nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
134
134
  nucliadb/ingest/app.py,sha256=qiPad2eWgudRdLq0tB0MQZOxOezXO7QBK_ZpPNKQZO0,7378
135
- nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
135
+ nucliadb/ingest/partitions.py,sha256=c1OWrFWgadNtvghY3Fl-xlurdyV5hZpVJPEoRAsBt1k,2903
136
136
  nucliadb/ingest/processing.py,sha256=IKXMZXIPuuojKQiXR2T5-5NwMvmUnIQIhBXUGgzyFFo,21551
137
137
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
138
  nucliadb/ingest/serialize.py,sha256=hiddxbV5gxVk8uY8-Q1AEq2DhJx5fOBP34zq5ONGgcs,16240
139
- nucliadb/ingest/settings.py,sha256=LskYx8Eefv5qdHkpcsMKHgkaVJuMhC9XnDHRS6s6BAc,3392
139
+ nucliadb/ingest/settings.py,sha256=8OJMjVVbI3OWIbZLrXBqpB79zHbbLkCSb9VJA0IzRss,4269
140
140
  nucliadb/ingest/utils.py,sha256=l1myURu3r8oA11dx3GpHw-gNTUc1AFX8xdPm9Lgl2rA,2275
141
141
  nucliadb/ingest/consumer/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
142
142
  nucliadb/ingest/consumer/auditing.py,sha256=xK21DIa_ZAiOJVVbnkmT4jgCRGshNGyPyxsqhE6kROE,7204
143
143
  nucliadb/ingest/consumer/consumer.py,sha256=1OetpJXp6glaAe4kKqUA_L46BS-ZyEccTkwt7TGf0Zw,11658
144
144
  nucliadb/ingest/consumer/materializer.py,sha256=tgD_rDI2twQzcz8kKNiW_L4YIth16IGh9mUfD5wiSD4,3858
145
145
  nucliadb/ingest/consumer/metrics.py,sha256=ji1l_4cKiHJthQd8YNem1ft4iMbw9KThmVvJmLcv3Xg,1075
146
- nucliadb/ingest/consumer/pull.py,sha256=vAOu2Zum-1e4RipoHvzzIha5PoNV28_C0nciQ2UFphc,8831
146
+ nucliadb/ingest/consumer/pull.py,sha256=Ki_aHi72W83yD03lPt6Yz2l_uCeu62fd4upEMcOZcm4,9201
147
147
  nucliadb/ingest/consumer/service.py,sha256=8AD41mMN7EUeUtk4ZNy14zfvxzwmVjIX6Mwe05-bomA,6543
148
148
  nucliadb/ingest/consumer/shard_creator.py,sha256=UKIk0yaS_jC_nGQqymn9NGJWzwZEqhIr0gznJYorlAE,4348
149
149
  nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
@@ -291,7 +291,7 @@ nucliadb/standalone/lifecycle.py,sha256=rdKLG-oOLN4rfd2VGG_2vlDUWYneWSCiuEhoeiFK
291
291
  nucliadb/standalone/migrations.py,sha256=s9-3RSZ-O3bjEw2TnBe_YWLUEKbub0bARUxi1gA3yuY,1950
292
292
  nucliadb/standalone/purge.py,sha256=ZY-cebb214FFiPG7OFmXZGg0G3CK5Amw0FLLm9WJhKE,1343
293
293
  nucliadb/standalone/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
294
- nucliadb/standalone/run.py,sha256=0QKEAT6pCaLvnuxTG3RltTlhE2g5-HI21KbOWfusBGE,5425
294
+ nucliadb/standalone/run.py,sha256=gbJcociwaQVKHR5C0HEq0cpWuxgOLcmVjCzlE0S2ZWw,5577
295
295
  nucliadb/standalone/settings.py,sha256=fbgqVT37XB2cJHJARnR19MO_dz6NLbkuIC2okH7J80o,5714
296
296
  nucliadb/standalone/versions.py,sha256=8CxNMNt2NgWM8ct50UsR4d44-ae7wtQI-sV-yGiFqyI,3508
297
297
  nucliadb/standalone/static/favicon.ico,sha256=96pKGp6Sx457JkTfjy1dtApMhkitixfU6invCUGAYOU,2285
@@ -376,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
376
376
  nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
377
377
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
378
378
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
379
- nucliadb-6.7.2.post4882.dist-info/METADATA,sha256=4iQdvdQQ2eCmmWGGgUH-zAT708gy9kMu8W6_DQnrw4Y,4158
380
- nucliadb-6.7.2.post4882.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
381
- nucliadb-6.7.2.post4882.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
382
- nucliadb-6.7.2.post4882.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
383
- nucliadb-6.7.2.post4882.dist-info/RECORD,,
379
+ nucliadb-6.7.2.post4889.dist-info/METADATA,sha256=n5UTXqF3fg0n3X1orrwnWa9yaPykhsM2ntFhyJm-Z8E,4158
380
+ nucliadb-6.7.2.post4889.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
381
+ nucliadb-6.7.2.post4889.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
382
+ nucliadb-6.7.2.post4889.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
383
+ nucliadb-6.7.2.post4889.dist-info/RECORD,,