nucliadb 6.7.2.post4862__py3-none-any.whl → 6.9.2.post5282__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- migrations/0016_upgrade_to_paragraphs_v2.py +1 -1
- migrations/0017_multiple_writable_shards.py +1 -1
- migrations/0018_purge_orphan_kbslugs.py +1 -1
- migrations/0019_upgrade_to_paragraphs_v3.py +1 -1
- migrations/0021_overwrite_vectorsets_key.py +1 -1
- migrations/0023_backfill_pg_catalog.py +7 -3
- migrations/0025_assign_models_to_kbs_v2.py +3 -3
- migrations/0027_rollover_texts3.py +1 -1
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +1 -1
- migrations/0032_remove_old_relations.py +1 -1
- migrations/0036_backfill_catalog_slug.py +1 -1
- migrations/0037_backfill_catalog_facets.py +1 -1
- migrations/0038_backfill_catalog_field_labels.py +7 -3
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/backups/create.py +3 -3
- nucliadb/backups/restore.py +3 -3
- nucliadb/common/cache.py +1 -1
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +294 -208
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +3 -19
- nucliadb/common/cluster/rebalance.py +484 -110
- nucliadb/common/cluster/rollover.py +29 -0
- nucliadb/common/cluster/settings.py +1 -1
- nucliadb/common/cluster/utils.py +26 -0
- nucliadb/common/datamanagers/atomic.py +6 -0
- nucliadb/common/datamanagers/utils.py +2 -2
- nucliadb/common/external_index_providers/manager.py +1 -29
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +16 -33
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +4 -0
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +77 -55
- nucliadb/common/locking.py +4 -4
- nucliadb/common/maindb/driver.py +11 -1
- nucliadb/common/maindb/local.py +1 -1
- nucliadb/common/maindb/pg.py +1 -1
- nucliadb/common/nidx.py +19 -1
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +3 -3
- nucliadb/ingest/consumer/pull.py +7 -0
- nucliadb/ingest/consumer/service.py +2 -27
- nucliadb/ingest/consumer/shard_creator.py +17 -6
- nucliadb/ingest/fields/base.py +9 -17
- nucliadb/ingest/fields/conversation.py +47 -1
- nucliadb/ingest/orm/brain_v2.py +21 -3
- nucliadb/ingest/orm/index_message.py +126 -111
- nucliadb/ingest/orm/knowledgebox.py +84 -43
- nucliadb/ingest/orm/processor/auditing.py +1 -1
- nucliadb/ingest/orm/processor/processor.py +95 -149
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +10 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/serialize.py +2 -2
- nucliadb/ingest/service/writer.py +26 -19
- nucliadb/ingest/settings.py +33 -11
- nucliadb/learning_proxy.py +12 -15
- nucliadb/metrics_exporter.py +17 -4
- nucliadb/migrator/datamanager.py +11 -17
- nucliadb/migrator/migrator.py +2 -2
- nucliadb/purge/__init__.py +12 -17
- nucliadb/purge/orphan_shards.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +40 -12
- nucliadb/reader/api/v1/learning_config.py +30 -10
- nucliadb/reader/api/v1/resource.py +2 -2
- nucliadb/reader/api/v1/services.py +1 -1
- nucliadb/reader/reader/notifications.py +1 -1
- nucliadb/search/api/v1/__init__.py +1 -0
- nucliadb/search/api/v1/catalog.py +4 -4
- nucliadb/search/api/v1/find.py +1 -4
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/resource/ask.py +21 -1
- nucliadb/search/api/v1/search.py +1 -4
- nucliadb/search/predict.py +9 -2
- nucliadb/search/search/cache.py +1 -20
- nucliadb/search/search/chat/ask.py +50 -8
- nucliadb/search/search/chat/prompt.py +47 -15
- nucliadb/search/search/chat/query.py +8 -1
- nucliadb/search/search/fetch.py +1 -1
- nucliadb/search/search/find.py +1 -6
- nucliadb/search/search/{hydrator.py → hydrator/__init__.py} +5 -4
- nucliadb/search/search/hydrator/fields.py +175 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +307 -0
- nucliadb/search/search/hydrator/resources.py +56 -0
- nucliadb/search/search/metrics.py +16 -0
- nucliadb/search/search/predict_proxy.py +33 -11
- nucliadb/search/search/query.py +0 -23
- nucliadb/search/search/query_parser/fetcher.py +5 -5
- nucliadb/search/search/query_parser/models.py +1 -30
- nucliadb/search/search/query_parser/parsers/ask.py +1 -1
- nucliadb/search/search/query_parser/parsers/catalog.py +4 -7
- nucliadb/search/search/query_parser/parsers/common.py +16 -7
- nucliadb/search/search/query_parser/parsers/find.py +0 -11
- nucliadb/search/search/query_parser/parsers/graph.py +5 -5
- nucliadb/search/search/query_parser/parsers/search.py +0 -11
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +4 -11
- nucliadb/search/search/rerankers.py +1 -1
- nucliadb/search/search/summarize.py +1 -1
- nucliadb/standalone/run.py +3 -0
- nucliadb/tasks/retries.py +4 -4
- nucliadb/train/generators/sentence_classifier.py +2 -8
- nucliadb/train/generators/utils.py +1 -1
- nucliadb/train/nodes.py +4 -4
- nucliadb/train/servicer.py +1 -1
- nucliadb/train/uploader.py +1 -1
- nucliadb/writer/api/v1/field.py +14 -9
- nucliadb/writer/api/v1/knowledgebox.py +15 -52
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +2 -2
- nucliadb/writer/resource/field.py +38 -2
- nucliadb/writer/tus/azure.py +4 -4
- nucliadb/writer/tus/gcs.py +11 -17
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/METADATA +9 -10
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/RECORD +124 -114
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4862.dist-info → nucliadb-6.9.2.post5282.dist-info}/top_level.txt +0 -0
|
@@ -36,10 +36,6 @@ from nucliadb.writer import logger
|
|
|
36
36
|
from nucliadb.writer.api.utils import only_for_onprem
|
|
37
37
|
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
|
|
38
38
|
from nucliadb.writer.utilities import get_processing
|
|
39
|
-
from nucliadb_models.external_index_providers import (
|
|
40
|
-
ExternalIndexProviderType,
|
|
41
|
-
PineconeServerlessCloud,
|
|
42
|
-
)
|
|
43
39
|
from nucliadb_models.resource import (
|
|
44
40
|
KnowledgeBoxConfig,
|
|
45
41
|
KnowledgeBoxObj,
|
|
@@ -118,20 +114,6 @@ async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
|
|
|
118
114
|
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
|
119
115
|
type=knowledgebox_pb2.ExternalIndexProviderType.UNSET,
|
|
120
116
|
)
|
|
121
|
-
if (
|
|
122
|
-
item.external_index_provider
|
|
123
|
-
and item.external_index_provider.type == ExternalIndexProviderType.PINECONE
|
|
124
|
-
):
|
|
125
|
-
pinecone_api_key = item.external_index_provider.api_key
|
|
126
|
-
serverless_pb = to_pinecone_serverless_cloud_pb(item.external_index_provider.serverless_cloud)
|
|
127
|
-
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
|
128
|
-
type=knowledgebox_pb2.ExternalIndexProviderType.PINECONE,
|
|
129
|
-
pinecone_config=knowledgebox_pb2.CreatePineconeConfig(
|
|
130
|
-
api_key=pinecone_api_key,
|
|
131
|
-
serverless_cloud=serverless_pb,
|
|
132
|
-
),
|
|
133
|
-
)
|
|
134
|
-
|
|
135
117
|
try:
|
|
136
118
|
(kbid, slug) = await KnowledgeBox.create(
|
|
137
119
|
driver,
|
|
@@ -165,8 +147,6 @@ async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
|
|
|
165
147
|
@requires(NucliaDBRoles.MANAGER)
|
|
166
148
|
@version(1)
|
|
167
149
|
async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> KnowledgeBoxObjID:
|
|
168
|
-
driver = get_driver()
|
|
169
|
-
config = None
|
|
170
150
|
if (
|
|
171
151
|
item.slug
|
|
172
152
|
or item.title
|
|
@@ -174,29 +154,24 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
|
|
|
174
154
|
or item.hidden_resources_enabled
|
|
175
155
|
or item.hidden_resources_hide_on_creation
|
|
176
156
|
):
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
title=item.title or "",
|
|
180
|
-
description=item.description or "",
|
|
181
|
-
hidden_resources_enabled=item.hidden_resources_enabled,
|
|
182
|
-
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
|
183
|
-
)
|
|
184
|
-
try:
|
|
185
|
-
async with driver.transaction() as txn:
|
|
157
|
+
try:
|
|
158
|
+
driver = get_driver()
|
|
186
159
|
await KnowledgeBox.update(
|
|
187
|
-
|
|
188
|
-
|
|
160
|
+
driver,
|
|
161
|
+
kbid=kbid,
|
|
189
162
|
slug=item.slug,
|
|
190
|
-
|
|
163
|
+
title=item.title,
|
|
164
|
+
description=item.description,
|
|
165
|
+
hidden_resources_enabled=item.hidden_resources_enabled,
|
|
166
|
+
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
|
191
167
|
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return KnowledgeBoxObjID(uuid=kbid)
|
|
168
|
+
except datamanagers.exceptions.KnowledgeBoxNotFound:
|
|
169
|
+
raise HTTPException(status_code=404, detail="Knowledge box does not exist")
|
|
170
|
+
except Exception as exc:
|
|
171
|
+
logger.exception("Could not update KB", exc_info=exc, extra={"kbid": kbid})
|
|
172
|
+
raise HTTPException(status_code=500, detail="Error updating knowledge box")
|
|
173
|
+
|
|
174
|
+
return KnowledgeBoxObjID(uuid=kbid)
|
|
200
175
|
|
|
201
176
|
|
|
202
177
|
@only_for_onprem
|
|
@@ -236,15 +211,3 @@ async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
236
211
|
asyncio.create_task(processing.delete_from_processing(kbid=kbid))
|
|
237
212
|
|
|
238
213
|
return KnowledgeBoxObj(uuid=kbid)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def to_pinecone_serverless_cloud_pb(
|
|
242
|
-
serverless: PineconeServerlessCloud,
|
|
243
|
-
) -> knowledgebox_pb2.PineconeServerlessCloud.ValueType:
|
|
244
|
-
return {
|
|
245
|
-
PineconeServerlessCloud.AWS_EU_WEST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_EU_WEST_1,
|
|
246
|
-
PineconeServerlessCloud.AWS_US_EAST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_EAST_1,
|
|
247
|
-
PineconeServerlessCloud.AWS_US_WEST_2: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_WEST_2,
|
|
248
|
-
PineconeServerlessCloud.AZURE_EASTUS2: knowledgebox_pb2.PineconeServerlessCloud.AZURE_EASTUS2,
|
|
249
|
-
PineconeServerlessCloud.GCP_US_CENTRAL1: knowledgebox_pb2.PineconeServerlessCloud.GCP_US_CENTRAL1,
|
|
250
|
-
}[serverless]
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from fastapi import Request
|
|
20
|
+
from fastapi import Header, Request
|
|
21
21
|
from fastapi_versioning import version
|
|
22
22
|
from nuclia_models.config.proto import ExtractConfig, SplitConfiguration
|
|
23
23
|
|
|
@@ -55,10 +55,11 @@ async def set_configuration(
|
|
|
55
55
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
56
56
|
@version(1)
|
|
57
57
|
async def patch_configuration(
|
|
58
|
-
request: Request,
|
|
59
|
-
kbid: str,
|
|
58
|
+
request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
60
59
|
):
|
|
61
|
-
return await learning_config_proxy(
|
|
60
|
+
return await learning_config_proxy(
|
|
61
|
+
request, "PATCH", f"/config/{kbid}", headers={"account-id": x_nucliadb_account}
|
|
62
|
+
)
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
@api.post(
|
|
@@ -373,7 +373,7 @@ async def update_resource_slug(
|
|
|
373
373
|
rid: str,
|
|
374
374
|
new_slug: str,
|
|
375
375
|
):
|
|
376
|
-
async with driver.
|
|
376
|
+
async with driver.rw_transaction() as txn:
|
|
377
377
|
old_slug = await datamanagers.resources.modify_slug(txn, kbid=kbid, rid=rid, new_slug=new_slug)
|
|
378
378
|
await txn.commit()
|
|
379
379
|
return old_slug
|
|
@@ -460,7 +460,7 @@ async def _reprocess_resource(
|
|
|
460
460
|
driver = get_driver()
|
|
461
461
|
|
|
462
462
|
writer = BrokerMessage()
|
|
463
|
-
async with driver.
|
|
463
|
+
async with driver.ro_transaction() as txn:
|
|
464
464
|
kb = KnowledgeBox(txn, storage, kbid)
|
|
465
465
|
|
|
466
466
|
resource = await kb.get(rid)
|
|
@@ -21,13 +21,14 @@ import dataclasses
|
|
|
21
21
|
from datetime import datetime
|
|
22
22
|
from typing import Optional, Union
|
|
23
23
|
|
|
24
|
+
from fastapi import HTTPException
|
|
24
25
|
from google.protobuf.json_format import MessageToDict
|
|
25
26
|
|
|
26
27
|
import nucliadb_models as models
|
|
27
28
|
from nucliadb.common import datamanagers
|
|
28
29
|
from nucliadb.common.maindb.driver import Transaction
|
|
29
30
|
from nucliadb.common.models_utils import from_proto, to_proto
|
|
30
|
-
from nucliadb.ingest.fields.conversation import Conversation
|
|
31
|
+
from nucliadb.ingest.fields.conversation import MAX_CONVERSATION_MESSAGES, Conversation
|
|
31
32
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
|
32
33
|
from nucliadb.models.internal import processing as processing_models
|
|
33
34
|
from nucliadb.models.internal.processing import ClassificationLabel, PushConversation, PushPayload
|
|
@@ -227,6 +228,7 @@ async def parse_fields(
|
|
|
227
228
|
kbid,
|
|
228
229
|
uuid,
|
|
229
230
|
resource_classifications,
|
|
231
|
+
replace_field=True,
|
|
230
232
|
)
|
|
231
233
|
|
|
232
234
|
|
|
@@ -430,11 +432,15 @@ async def parse_conversation_field(
|
|
|
430
432
|
kbid: str,
|
|
431
433
|
uuid: str,
|
|
432
434
|
resource_classifications: ResourceClassifications,
|
|
435
|
+
replace_field: bool,
|
|
433
436
|
) -> None:
|
|
437
|
+
if not replace_field:
|
|
438
|
+
# Appending messages to conversation
|
|
439
|
+
await _conversation_append_checks(kbid, uuid, key, conversation_field)
|
|
434
440
|
classif_labels = resource_classifications.for_field(key, resources_pb2.FieldType.CONVERSATION)
|
|
435
441
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
436
442
|
processing = get_processing()
|
|
437
|
-
field_value = resources_pb2.Conversation()
|
|
443
|
+
field_value = resources_pb2.Conversation(replace_field=replace_field)
|
|
438
444
|
convs = processing_models.PushConversation()
|
|
439
445
|
for message in conversation_field.messages:
|
|
440
446
|
cm = resources_pb2.Message()
|
|
@@ -543,3 +549,33 @@ async def get_stored_resource_classifications(
|
|
|
543
549
|
classif = ClassificationLabel(labelset=f_classif.labelset, label=f_classif.label)
|
|
544
550
|
rc.field_level.setdefault(fid, set()).add(classif)
|
|
545
551
|
return rc
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
async def _conversation_append_checks(
|
|
555
|
+
kbid: str, rid: str, field_id: str, input: models.InputConversationField
|
|
556
|
+
):
|
|
557
|
+
async with datamanagers.with_ro_transaction() as txn:
|
|
558
|
+
resource_obj = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
|
|
559
|
+
if resource_obj is None:
|
|
560
|
+
return
|
|
561
|
+
conv: Conversation = await resource_obj.get_field(
|
|
562
|
+
field_id, resources_pb2.FieldType.CONVERSATION, load=False
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Make sure that the max number of messages is not exceeded
|
|
566
|
+
current_message_count = (await conv.get_metadata()).total
|
|
567
|
+
if len(input.messages) + current_message_count > MAX_CONVERSATION_MESSAGES:
|
|
568
|
+
raise HTTPException(
|
|
569
|
+
status_code=422,
|
|
570
|
+
detail=f"Conversation fields cannot have more than {MAX_CONVERSATION_MESSAGES} messages.",
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
# Make sure input messages use unique idents
|
|
574
|
+
existing_message_ids = set((await conv.get_splits_metadata()).metadata.keys())
|
|
575
|
+
input_message_ids = {message.ident for message in input.messages}
|
|
576
|
+
intersection = input_message_ids.intersection(existing_message_ids)
|
|
577
|
+
if intersection != set():
|
|
578
|
+
raise HTTPException(
|
|
579
|
+
status_code=422,
|
|
580
|
+
detail=f"Message identifiers must be unique field={field_id}: {list(intersection)[:50]}",
|
|
581
|
+
)
|
nucliadb/writer/tus/azure.py
CHANGED
|
@@ -27,7 +27,6 @@ from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
|
|
|
27
27
|
from nucliadb_protos.resources_pb2 import CloudFile
|
|
28
28
|
from nucliadb_utils.storages import CHUNK_SIZE
|
|
29
29
|
from nucliadb_utils.storages.azure import AzureObjectStore
|
|
30
|
-
from nucliadb_utils.storages.exceptions import ObjectNotFoundError
|
|
31
30
|
from nucliadb_utils.storages.utils import ObjectMetadata
|
|
32
31
|
|
|
33
32
|
|
|
@@ -63,7 +62,7 @@ class AzureBlobStore(BlobStore):
|
|
|
63
62
|
class AzureFileStorageManager(FileStorageManager):
|
|
64
63
|
storage: AzureBlobStore
|
|
65
64
|
chunk_size = CHUNK_SIZE
|
|
66
|
-
min_upload_size =
|
|
65
|
+
min_upload_size = CHUNK_SIZE
|
|
67
66
|
|
|
68
67
|
@property
|
|
69
68
|
def object_store(self) -> AzureObjectStore:
|
|
@@ -87,7 +86,7 @@ class AzureFileStorageManager(FileStorageManager):
|
|
|
87
86
|
bucket = self.storage.get_bucket_name(kbid)
|
|
88
87
|
try:
|
|
89
88
|
await self.object_store.delete(bucket, uri)
|
|
90
|
-
except
|
|
89
|
+
except KeyError:
|
|
91
90
|
logger.warning(
|
|
92
91
|
"Attempt to delete an upload but not found",
|
|
93
92
|
extra={"uri": uri, "kbid": kbid, "bucket": bucket},
|
|
@@ -108,4 +107,5 @@ class AzureFileStorageManager(FileStorageManager):
|
|
|
108
107
|
return path
|
|
109
108
|
|
|
110
109
|
def validate_intermediate_chunk(self, uploaded_bytes: int):
|
|
111
|
-
|
|
110
|
+
if uploaded_bytes < self.min_upload_size:
|
|
111
|
+
raise ValueError(f"Intermediate chunks cannot be smaller than {self.min_upload_size} bytes")
|
nucliadb/writer/tus/gcs.py
CHANGED
|
@@ -74,7 +74,7 @@ RETRIABLE_EXCEPTIONS = (
|
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
class GCloudBlobStore(BlobStore):
|
|
77
|
-
|
|
77
|
+
_session: Optional[aiohttp.ClientSession] = None
|
|
78
78
|
loop = None
|
|
79
79
|
upload_url: str
|
|
80
80
|
object_base_url: str
|
|
@@ -84,6 +84,12 @@ class GCloudBlobStore(BlobStore):
|
|
|
84
84
|
project: str
|
|
85
85
|
executor = ThreadPoolExecutor(max_workers=5)
|
|
86
86
|
|
|
87
|
+
@property
|
|
88
|
+
def session(self) -> aiohttp.ClientSession:
|
|
89
|
+
if self._session is None: # pragma: no cover
|
|
90
|
+
raise AttributeError("Session not initialized")
|
|
91
|
+
return self._session
|
|
92
|
+
|
|
87
93
|
async def get_access_headers(self):
|
|
88
94
|
if self._credentials is None:
|
|
89
95
|
return {}
|
|
@@ -106,8 +112,9 @@ class GCloudBlobStore(BlobStore):
|
|
|
106
112
|
return access_token.access_token
|
|
107
113
|
|
|
108
114
|
async def finalize(self):
|
|
109
|
-
if self.
|
|
110
|
-
await self.
|
|
115
|
+
if self._session is not None:
|
|
116
|
+
await self._session.close()
|
|
117
|
+
self._session = None
|
|
111
118
|
|
|
112
119
|
async def initialize(
|
|
113
120
|
self,
|
|
@@ -143,12 +150,9 @@ class GCloudBlobStore(BlobStore):
|
|
|
143
150
|
self._credentials = None
|
|
144
151
|
|
|
145
152
|
loop = asyncio.get_event_loop()
|
|
146
|
-
self.
|
|
153
|
+
self._session = aiohttp.ClientSession(loop=loop, timeout=TIMEOUT)
|
|
147
154
|
|
|
148
155
|
async def check_exists(self, bucket_name: str):
|
|
149
|
-
if self.session is None:
|
|
150
|
-
raise AttributeError()
|
|
151
|
-
|
|
152
156
|
headers = await self.get_access_headers()
|
|
153
157
|
# Using object access url instead of bucket access to avoid
|
|
154
158
|
# giving admin permission to the SA, needed to GET a bucket
|
|
@@ -163,8 +167,6 @@ class GCloudBlobStore(BlobStore):
|
|
|
163
167
|
return False
|
|
164
168
|
|
|
165
169
|
async def create_bucket(self, bucket_name: str):
|
|
166
|
-
if self.session is None:
|
|
167
|
-
raise AttributeError()
|
|
168
170
|
headers = await self.get_access_headers()
|
|
169
171
|
url = f"{self.object_base_url}?project={self.project}"
|
|
170
172
|
|
|
@@ -199,10 +201,6 @@ class GCloudFileStorageManager(FileStorageManager):
|
|
|
199
201
|
_resumable_uri : uri to resumable upload
|
|
200
202
|
_uri : finished uploaded image
|
|
201
203
|
"""
|
|
202
|
-
|
|
203
|
-
if self.storage.session is None:
|
|
204
|
-
raise AttributeError()
|
|
205
|
-
|
|
206
204
|
upload_file_id = dm.get("upload_file_id")
|
|
207
205
|
if upload_file_id is not None:
|
|
208
206
|
await self.delete_upload(upload_file_id, kbid)
|
|
@@ -287,8 +285,6 @@ class GCloudFileStorageManager(FileStorageManager):
|
|
|
287
285
|
|
|
288
286
|
@backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=4)
|
|
289
287
|
async def _append(self, dm: FileDataManager, data, offset):
|
|
290
|
-
if self.storage.session is None:
|
|
291
|
-
raise AttributeError()
|
|
292
288
|
if dm.size:
|
|
293
289
|
size = str(dm.size)
|
|
294
290
|
else:
|
|
@@ -353,8 +349,6 @@ class GCloudFileStorageManager(FileStorageManager):
|
|
|
353
349
|
@backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=4)
|
|
354
350
|
async def finish(self, dm: FileDataManager):
|
|
355
351
|
if dm.size == 0:
|
|
356
|
-
if self.storage.session is None:
|
|
357
|
-
raise AttributeError()
|
|
358
352
|
# In case of empty file, we need to send a PUT request with empty body
|
|
359
353
|
# and Content-Range header set to "bytes */0"
|
|
360
354
|
headers = {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.9.2.post5282
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -12,20 +12,19 @@ Classifier: Development Status :: 4 - Beta
|
|
|
12
12
|
Classifier: Intended Audience :: Developers
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
14
14
|
Classifier: Programming Language :: Python
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
-
Requires-Python: <4,>=3.
|
|
19
|
+
Requires-Python: <4,>=3.10
|
|
21
20
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.
|
|
21
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.9.2.post5282
|
|
22
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.2.post5282
|
|
23
|
+
Requires-Dist: nucliadb-protos>=6.9.2.post5282
|
|
24
|
+
Requires-Dist: nucliadb-models>=6.9.2.post5282
|
|
25
|
+
Requires-Dist: nidx-protos>=6.9.2.post5282
|
|
27
26
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
|
-
Requires-Dist: nuclia-models>=0.
|
|
27
|
+
Requires-Dist: nuclia-models>=0.50.0
|
|
29
28
|
Requires-Dist: uvicorn[standard]
|
|
30
29
|
Requires-Dist: argdantic
|
|
31
30
|
Requires-Dist: aiohttp>=3.11.11
|
|
@@ -57,7 +56,7 @@ Requires-Dist: jwcrypto>=1.5.6
|
|
|
57
56
|
Requires-Dist: pyyaml>=5.1
|
|
58
57
|
Requires-Dist: fastapi-versioning>=0.10.0
|
|
59
58
|
Requires-Dist: fastapi>=0.95.2
|
|
60
|
-
Requires-Dist: sentry-sdk>=2.8.0
|
|
59
|
+
Requires-Dist: sentry-sdk[fastapi]>=2.8.0
|
|
61
60
|
Requires-Dist: pyjwt>=2.4.0
|
|
62
61
|
Requires-Dist: mmh3>=3.0.0
|
|
63
62
|
Requires-Dist: httpx>=0.23.0
|