nucliadb 6.7.2.post4868__py3-none-any.whl → 6.7.2.post4874__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- migrations/0016_upgrade_to_paragraphs_v2.py +1 -1
- migrations/0017_multiple_writable_shards.py +1 -1
- migrations/0018_purge_orphan_kbslugs.py +1 -1
- migrations/0019_upgrade_to_paragraphs_v3.py +1 -1
- migrations/0021_overwrite_vectorsets_key.py +1 -1
- migrations/0023_backfill_pg_catalog.py +1 -1
- migrations/0025_assign_models_to_kbs_v2.py +3 -3
- migrations/0027_rollover_texts3.py +1 -1
- migrations/0029_backfill_field_status.py +1 -1
- migrations/0032_remove_old_relations.py +1 -1
- migrations/0036_backfill_catalog_slug.py +1 -1
- migrations/0037_backfill_catalog_facets.py +1 -1
- migrations/0038_backfill_catalog_field_labels.py +1 -1
- nucliadb/backups/create.py +3 -3
- nucliadb/backups/restore.py +3 -3
- nucliadb/common/cache.py +1 -1
- nucliadb/common/datamanagers/utils.py +2 -2
- nucliadb/common/locking.py +4 -4
- nucliadb/common/maindb/driver.py +11 -1
- nucliadb/common/maindb/local.py +1 -1
- nucliadb/common/maindb/pg.py +1 -1
- nucliadb/export_import/datamanager.py +3 -3
- nucliadb/ingest/consumer/shard_creator.py +1 -1
- nucliadb/ingest/orm/knowledgebox.py +4 -4
- nucliadb/ingest/orm/processor/auditing.py +1 -1
- nucliadb/ingest/orm/processor/processor.py +4 -4
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/serialize.py +2 -2
- nucliadb/ingest/service/writer.py +15 -15
- nucliadb/metrics_exporter.py +1 -1
- nucliadb/migrator/datamanager.py +11 -11
- nucliadb/migrator/migrator.py +2 -2
- nucliadb/purge/__init__.py +10 -10
- nucliadb/purge/orphan_shards.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +3 -3
- nucliadb/reader/api/v1/resource.py +2 -2
- nucliadb/reader/api/v1/services.py +1 -1
- nucliadb/reader/reader/notifications.py +1 -1
- nucliadb/search/search/cache.py +1 -1
- nucliadb/search/search/chat/prompt.py +2 -2
- nucliadb/search/search/fetch.py +1 -1
- nucliadb/search/search/hydrator.py +2 -2
- nucliadb/search/search/query_parser/fetcher.py +5 -5
- nucliadb/search/search/summarize.py +1 -1
- nucliadb/tasks/retries.py +4 -4
- nucliadb/train/generators/utils.py +1 -1
- nucliadb/train/nodes.py +4 -4
- nucliadb/train/servicer.py +1 -1
- nucliadb/train/uploader.py +1 -1
- nucliadb/writer/api/v1/field.py +1 -1
- nucliadb/writer/api/v1/knowledgebox.py +1 -1
- nucliadb/writer/api/v1/resource.py +2 -2
- {nucliadb-6.7.2.post4868.dist-info → nucliadb-6.7.2.post4874.dist-info}/METADATA +6 -6
- {nucliadb-6.7.2.post4868.dist-info → nucliadb-6.7.2.post4874.dist-info}/RECORD +57 -57
- {nucliadb-6.7.2.post4868.dist-info → nucliadb-6.7.2.post4874.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4868.dist-info → nucliadb-6.7.2.post4874.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4868.dist-info → nucliadb-6.7.2.post4874.dist-info}/top_level.txt +0 -0
nucliadb/migrator/datamanager.py
CHANGED
|
@@ -48,36 +48,36 @@ class MigrationsDataManager:
|
|
|
48
48
|
|
|
49
49
|
async def schedule_all_kbs(self, target_version: int) -> None:
|
|
50
50
|
# Get all kb ids
|
|
51
|
-
async with self.driver.
|
|
51
|
+
async with self.driver.ro_transaction() as txn:
|
|
52
52
|
kbids = [kbid async for kbid, _ in datamanagers.kb.get_kbs(txn)]
|
|
53
53
|
# Schedule the migrations
|
|
54
|
-
async with self.driver.
|
|
54
|
+
async with self.driver.rw_transaction() as txn:
|
|
55
55
|
for kbid in kbids:
|
|
56
56
|
await txn.set(MIGRATIONS_KEY.format(kbid=kbid), str(target_version).encode())
|
|
57
57
|
await txn.commit()
|
|
58
58
|
|
|
59
59
|
async def get_kb_migrations(self) -> list[str]:
|
|
60
60
|
keys = []
|
|
61
|
-
async with self.driver.
|
|
61
|
+
async with self.driver.ro_transaction() as txn:
|
|
62
62
|
async for key in txn.keys(MIGRATIONS_CONTAINER_KEY):
|
|
63
63
|
keys.append(key.split("/")[-1])
|
|
64
64
|
|
|
65
65
|
return keys
|
|
66
66
|
|
|
67
67
|
async def delete_kb_migration(self, *, kbid: str) -> None:
|
|
68
|
-
async with self.driver.
|
|
68
|
+
async with self.driver.rw_transaction() as txn:
|
|
69
69
|
await txn.delete(MIGRATIONS_KEY.format(kbid=kbid))
|
|
70
70
|
await txn.commit()
|
|
71
71
|
|
|
72
72
|
async def get_kb_info(self, kbid: str) -> Optional[KnowledgeBoxInfo]:
|
|
73
|
-
async with self.driver.
|
|
73
|
+
async with self.driver.ro_transaction() as txn:
|
|
74
74
|
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
75
75
|
if kb_config is None:
|
|
76
76
|
return None
|
|
77
77
|
return KnowledgeBoxInfo(current_version=kb_config.migration_version)
|
|
78
78
|
|
|
79
79
|
async def update_kb_info(self, *, kbid: str, current_version: int) -> None:
|
|
80
|
-
async with self.driver.
|
|
80
|
+
async with self.driver.rw_transaction() as txn:
|
|
81
81
|
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid, for_update=True)
|
|
82
82
|
if kb_config is None:
|
|
83
83
|
raise Exception(f"KB {kbid} does not exist")
|
|
@@ -86,7 +86,7 @@ class MigrationsDataManager:
|
|
|
86
86
|
await txn.commit()
|
|
87
87
|
|
|
88
88
|
async def get_global_info(self) -> GlobalInfo:
|
|
89
|
-
async with self.driver.
|
|
89
|
+
async with self.driver.ro_transaction() as txn:
|
|
90
90
|
raw_pb = await txn.get(MIGRATION_INFO_KEY)
|
|
91
91
|
if raw_pb is None:
|
|
92
92
|
return GlobalInfo(current_version=0, target_version=None)
|
|
@@ -100,7 +100,7 @@ class MigrationsDataManager:
|
|
|
100
100
|
current_version: Union[int, _Unset] = _UNSET,
|
|
101
101
|
target_version: Union[int, None, _Unset] = _UNSET,
|
|
102
102
|
) -> None:
|
|
103
|
-
async with self.driver.
|
|
103
|
+
async with self.driver.rw_transaction() as txn:
|
|
104
104
|
raw_pb = await txn.get(MIGRATION_INFO_KEY, for_update=True)
|
|
105
105
|
pb = migrations_pb2.MigrationInfo()
|
|
106
106
|
if raw_pb is not None:
|
|
@@ -116,18 +116,18 @@ class MigrationsDataManager:
|
|
|
116
116
|
|
|
117
117
|
async def get_kbs_to_rollover(self) -> list[str]:
|
|
118
118
|
keys = []
|
|
119
|
-
async with self.driver.
|
|
119
|
+
async with self.driver.ro_transaction() as txn:
|
|
120
120
|
async for key in txn.keys(ROLLOVER_CONTAINER_KEY):
|
|
121
121
|
keys.append(key.split("/")[-1])
|
|
122
122
|
|
|
123
123
|
return keys
|
|
124
124
|
|
|
125
125
|
async def add_kb_rollover(self, kbid: str) -> None:
|
|
126
|
-
async with self.driver.
|
|
126
|
+
async with self.driver.rw_transaction() as txn:
|
|
127
127
|
await txn.set(ROLLOVER_KEY.format(kbid=kbid), b"")
|
|
128
128
|
await txn.commit()
|
|
129
129
|
|
|
130
130
|
async def delete_kb_rollover(self, kbid: str) -> None:
|
|
131
|
-
async with self.driver.
|
|
131
|
+
async with self.driver.rw_transaction() as txn:
|
|
132
132
|
await txn.delete(ROLLOVER_KEY.format(kbid=kbid))
|
|
133
133
|
await txn.commit()
|
nucliadb/migrator/migrator.py
CHANGED
|
@@ -211,7 +211,7 @@ async def run_pg_schema_migrations(driver: PGDriver):
|
|
|
211
211
|
|
|
212
212
|
# The migration uses two transactions. The former is only used to get a lock (pg_advisory_lock)
|
|
213
213
|
# without having to worry about correctly unlocking it (postgres unlocks it when the transaction ends)
|
|
214
|
-
async with driver.
|
|
214
|
+
async with driver.rw_transaction() as tx_lock, tx_lock.connection.cursor() as cur_lock: # type: ignore[attr-defined]
|
|
215
215
|
await cur_lock.execute(
|
|
216
216
|
"CREATE TABLE IF NOT EXISTS migrations (version INT PRIMARY KEY, migrated_at TIMESTAMP NOT NULL DEFAULT NOW())"
|
|
217
217
|
)
|
|
@@ -227,7 +227,7 @@ async def run_pg_schema_migrations(driver: PGDriver):
|
|
|
227
227
|
|
|
228
228
|
# Gets a new transaction for each migration, so if they get interrupted we at least
|
|
229
229
|
# save the state of the last finished transaction
|
|
230
|
-
async with driver.
|
|
230
|
+
async with driver.rw_transaction() as tx, tx.connection.cursor() as cur: # type: ignore[attr-defined]
|
|
231
231
|
await migration.migrate(tx)
|
|
232
232
|
await cur.execute("INSERT INTO migrations (version) VALUES (%s)", (version,))
|
|
233
233
|
await tx.commit()
|
nucliadb/purge/__init__.py
CHANGED
|
@@ -47,7 +47,7 @@ from nucliadb_utils.utilities import get_storage
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
async def _iter_keys(driver: Driver, match: str) -> AsyncGenerator[str, None]:
|
|
50
|
-
async with driver.
|
|
50
|
+
async with driver.ro_transaction() as keys_txn:
|
|
51
51
|
async for key in keys_txn.keys(match=match):
|
|
52
52
|
yield key
|
|
53
53
|
|
|
@@ -81,9 +81,9 @@ async def purge_kb(driver: Driver):
|
|
|
81
81
|
)
|
|
82
82
|
continue
|
|
83
83
|
|
|
84
|
-
# Now delete the
|
|
84
|
+
# Now delete the delete mark
|
|
85
85
|
try:
|
|
86
|
-
async with driver.
|
|
86
|
+
async with driver.rw_transaction() as txn:
|
|
87
87
|
key_to_purge = KB_TO_DELETE.format(kbid=kbid)
|
|
88
88
|
await txn.delete(key_to_purge)
|
|
89
89
|
await txn.commit()
|
|
@@ -126,7 +126,7 @@ async def purge_kb_storage(driver: Driver, storage: Storage):
|
|
|
126
126
|
|
|
127
127
|
if delete_marker:
|
|
128
128
|
try:
|
|
129
|
-
async with driver.
|
|
129
|
+
async with driver.rw_transaction() as txn:
|
|
130
130
|
await txn.delete(key)
|
|
131
131
|
await txn.commit()
|
|
132
132
|
logger.info(f" √ Deleted storage deletion marker {key}")
|
|
@@ -163,7 +163,7 @@ async def _count_resources_storage_to_purge(driver: Driver) -> int:
|
|
|
163
163
|
"""
|
|
164
164
|
Count the number of resources marked as deleted in storage.
|
|
165
165
|
"""
|
|
166
|
-
async with driver.
|
|
166
|
+
async with driver.ro_transaction() as txn:
|
|
167
167
|
return await txn.count(match=RESOURCE_TO_DELETE_STORAGE_BASE)
|
|
168
168
|
|
|
169
169
|
|
|
@@ -174,7 +174,7 @@ async def _purge_resources_storage_batch(driver: Driver, storage: Storage, batch
|
|
|
174
174
|
"""
|
|
175
175
|
# Get the keys of the resources to delete in batches of 100
|
|
176
176
|
to_delete_batch = []
|
|
177
|
-
async with driver.
|
|
177
|
+
async with driver.ro_transaction() as txn:
|
|
178
178
|
async for key in txn.keys(match=RESOURCE_TO_DELETE_STORAGE_BASE, count=batch_size):
|
|
179
179
|
to_delete_batch.append(key)
|
|
180
180
|
|
|
@@ -194,7 +194,7 @@ async def _purge_resources_storage_batch(driver: Driver, storage: Storage, batch
|
|
|
194
194
|
await asyncio.gather(*tasks)
|
|
195
195
|
|
|
196
196
|
# Delete the schedule-to-delete keys
|
|
197
|
-
async with driver.
|
|
197
|
+
async with driver.rw_transaction() as txn:
|
|
198
198
|
for key in to_delete_batch:
|
|
199
199
|
await txn.delete(key)
|
|
200
200
|
await txn.commit()
|
|
@@ -220,14 +220,14 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
|
|
|
220
220
|
continue
|
|
221
221
|
|
|
222
222
|
try:
|
|
223
|
-
async with driver.
|
|
223
|
+
async with driver.ro_transaction() as txn:
|
|
224
224
|
value = await txn.get(key)
|
|
225
225
|
assert value is not None, "Key must exist or we wouldn't had fetch it iterating keys"
|
|
226
226
|
purge_payload = VectorSetPurge()
|
|
227
227
|
purge_payload.ParseFromString(value)
|
|
228
228
|
|
|
229
229
|
fields: list[Field] = []
|
|
230
|
-
async with driver.
|
|
230
|
+
async with driver.ro_transaction() as txn:
|
|
231
231
|
kb = KnowledgeBox(txn, storage, kbid)
|
|
232
232
|
async for resource in kb.iterate_resources():
|
|
233
233
|
fields.extend((await resource.get_fields(force=True)).values())
|
|
@@ -259,7 +259,7 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
|
|
|
259
259
|
await asyncio.gather(*tasks)
|
|
260
260
|
|
|
261
261
|
# Finally, delete the key
|
|
262
|
-
async with driver.
|
|
262
|
+
async with driver.rw_transaction() as txn:
|
|
263
263
|
await txn.delete(key)
|
|
264
264
|
await txn.commit()
|
|
265
265
|
|
nucliadb/purge/orphan_shards.py
CHANGED
|
@@ -100,7 +100,7 @@ async def _get_indexed_shards() -> dict[str, ShardKb]:
|
|
|
100
100
|
async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
|
|
101
101
|
stored_shards: dict[str, ShardKb] = {}
|
|
102
102
|
|
|
103
|
-
async with driver.
|
|
103
|
+
async with driver.ro_transaction() as txn:
|
|
104
104
|
async for kbid, _ in datamanagers.kb.get_kbs(txn):
|
|
105
105
|
kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
|
106
106
|
if kb_shards is None:
|
|
@@ -135,7 +135,7 @@ async def _get_kbid(shard_id: str) -> Optional[str]:
|
|
|
135
135
|
async def report_orphan_shards(driver: Driver):
|
|
136
136
|
orphan_shards = await detect_orphan_shards(driver)
|
|
137
137
|
logger.info(f"Found {len(orphan_shards)} orphan shards")
|
|
138
|
-
async with driver.
|
|
138
|
+
async with driver.ro_transaction() as txn:
|
|
139
139
|
for shard_id, kbid in orphan_shards.items():
|
|
140
140
|
if kbid == UNKNOWN_KB:
|
|
141
141
|
msg = "Found orphan shard but could not get KB info"
|
|
@@ -46,7 +46,7 @@ from nucliadb_utils.authentication import requires, requires_one
|
|
|
46
46
|
@version(1)
|
|
47
47
|
async def get_kbs(request: Request, prefix: str = "") -> KnowledgeBoxList:
|
|
48
48
|
driver = get_driver()
|
|
49
|
-
async with driver.
|
|
49
|
+
async with driver.ro_transaction() as txn:
|
|
50
50
|
response = KnowledgeBoxList()
|
|
51
51
|
async for kbid, slug in datamanagers.kb.get_kbs(txn, prefix=prefix):
|
|
52
52
|
response.kbs.append(KnowledgeBoxObjSummary(slug=slug or None, uuid=kbid))
|
|
@@ -64,7 +64,7 @@ async def get_kbs(request: Request, prefix: str = "") -> KnowledgeBoxList:
|
|
|
64
64
|
@version(1)
|
|
65
65
|
async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
66
66
|
driver = get_driver()
|
|
67
|
-
async with driver.
|
|
67
|
+
async with driver.ro_transaction() as txn:
|
|
68
68
|
kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
|
|
69
69
|
if kb_config is None:
|
|
70
70
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
|
@@ -87,7 +87,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
87
87
|
@version(1)
|
|
88
88
|
async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
|
|
89
89
|
driver = get_driver()
|
|
90
|
-
async with driver.
|
|
90
|
+
async with driver.ro_transaction() as txn:
|
|
91
91
|
kbid = await datamanagers.kb.get_kb_uuid(txn, slug=slug)
|
|
92
92
|
if kbid is None:
|
|
93
93
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
|
@@ -77,7 +77,7 @@ async def list_resources(
|
|
|
77
77
|
|
|
78
78
|
# Get counters from maindb
|
|
79
79
|
driver = get_driver()
|
|
80
|
-
async with driver.
|
|
80
|
+
async with driver.ro_transaction() as txn:
|
|
81
81
|
# Filter parameters for serializer
|
|
82
82
|
show: list[ResourceProperties] = [ResourceProperties.BASIC]
|
|
83
83
|
field_types: list[FieldTypeName] = []
|
|
@@ -335,7 +335,7 @@ async def _get_resource_field(
|
|
|
335
335
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
336
336
|
driver = get_driver()
|
|
337
337
|
pb_field_id = to_proto.field_type_name(field_type)
|
|
338
|
-
async with driver.
|
|
338
|
+
async with driver.ro_transaction() as txn:
|
|
339
339
|
kb = ORMKnowledgeBox(txn, storage, kbid)
|
|
340
340
|
|
|
341
341
|
if rid is None:
|
|
@@ -287,7 +287,7 @@ async def processing_status(
|
|
|
287
287
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
288
288
|
driver = get_driver()
|
|
289
289
|
|
|
290
|
-
async with driver.
|
|
290
|
+
async with driver.ro_transaction() as txn:
|
|
291
291
|
kb = KnowledgeBox(txn, storage, kbid)
|
|
292
292
|
|
|
293
293
|
max_simultaneous = asyncio.Semaphore(10)
|
|
@@ -201,7 +201,7 @@ async def get_resource_title_cached(
|
|
|
201
201
|
|
|
202
202
|
|
|
203
203
|
async def get_resource_title(kv_driver: Driver, kbid: str, resource_uuid: str) -> Optional[str]:
|
|
204
|
-
async with kv_driver.
|
|
204
|
+
async with kv_driver.ro_transaction() as txn:
|
|
205
205
|
basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=resource_uuid)
|
|
206
206
|
if basic is None:
|
|
207
207
|
return None
|
nucliadb/search/search/cache.py
CHANGED
|
@@ -54,7 +54,7 @@ async def get_resource(kbid: str, uuid: str) -> Optional[ResourceORM]:
|
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
async def _orm_get_resource(kbid: str, uuid: str) -> Optional[ResourceORM]:
|
|
57
|
-
async with get_driver().
|
|
57
|
+
async with get_driver().ro_transaction() as txn:
|
|
58
58
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
59
59
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
|
60
60
|
return await kb.get(uuid)
|
|
@@ -246,7 +246,7 @@ async def default_prompt_context(
|
|
|
246
246
|
- Using an dict prevents from duplicates pulled in through conversation expansion.
|
|
247
247
|
"""
|
|
248
248
|
# Sort retrieved paragraphs by decreasing order (most relevant first)
|
|
249
|
-
async with get_driver().
|
|
249
|
+
async with get_driver().ro_transaction() as txn:
|
|
250
250
|
storage = await get_storage()
|
|
251
251
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
|
252
252
|
for paragraph in ordered_paragraphs:
|
|
@@ -779,7 +779,7 @@ async def conversation_prompt_context(
|
|
|
779
779
|
):
|
|
780
780
|
analyzed_fields: List[str] = []
|
|
781
781
|
ops = 0
|
|
782
|
-
async with get_driver().
|
|
782
|
+
async with get_driver().ro_transaction() as txn:
|
|
783
783
|
storage = await get_storage()
|
|
784
784
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
|
785
785
|
for paragraph in ordered_paragraphs:
|
nucliadb/search/search/fetch.py
CHANGED
|
@@ -79,7 +79,7 @@ async def hydrate_resource_text(
|
|
|
79
79
|
return []
|
|
80
80
|
|
|
81
81
|
# Schedule the extraction of the text of each field in the resource
|
|
82
|
-
async with get_driver().
|
|
82
|
+
async with get_driver().ro_transaction() as txn:
|
|
83
83
|
resource.txn = txn
|
|
84
84
|
runner = ConcurrentRunner(max_tasks=max_concurrent_tasks)
|
|
85
85
|
for field_type, field_key in await resource.get_fields(force=True):
|
|
@@ -120,7 +120,7 @@ async def hydrate_resource_metadata(
|
|
|
120
120
|
if concurrency_control is not None:
|
|
121
121
|
await stack.enter_async_context(concurrency_control)
|
|
122
122
|
|
|
123
|
-
async with get_driver().
|
|
123
|
+
async with get_driver().ro_transaction() as ro_txn:
|
|
124
124
|
serialized_resource = await managed_serialize(
|
|
125
125
|
txn=ro_txn,
|
|
126
126
|
kbid=kbid,
|
|
@@ -393,7 +393,7 @@ async def get_matryoshka_dimension_cached(kbid: str, vectorset: str) -> Optional
|
|
|
393
393
|
|
|
394
394
|
@query_parse_dependency_observer.wrap({"type": "matryoshka_dimension"})
|
|
395
395
|
async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optional[int]:
|
|
396
|
-
async with get_driver().
|
|
396
|
+
async with get_driver().ro_transaction() as txn:
|
|
397
397
|
matryoshka_dimension = None
|
|
398
398
|
if not vectorset:
|
|
399
399
|
# XXX this should be migrated once we remove the "default" vectorset
|
|
@@ -409,23 +409,23 @@ async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optio
|
|
|
409
409
|
|
|
410
410
|
@query_parse_dependency_observer.wrap({"type": "classification_labels"})
|
|
411
411
|
async def get_classification_labels(kbid: str) -> knowledgebox_pb2.Labels:
|
|
412
|
-
async with get_driver().
|
|
412
|
+
async with get_driver().ro_transaction() as txn:
|
|
413
413
|
return await datamanagers.labels.get_labels(txn, kbid=kbid)
|
|
414
414
|
|
|
415
415
|
|
|
416
416
|
@query_parse_dependency_observer.wrap({"type": "synonyms"})
|
|
417
417
|
async def get_kb_synonyms(kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
|
|
418
|
-
async with get_driver().
|
|
418
|
+
async with get_driver().ro_transaction() as txn:
|
|
419
419
|
return await datamanagers.synonyms.get(txn, kbid=kbid)
|
|
420
420
|
|
|
421
421
|
|
|
422
422
|
@query_parse_dependency_observer.wrap({"type": "entities_meta_cache"})
|
|
423
423
|
async def get_entities_meta_cache(kbid: str) -> datamanagers.entities.EntitiesMetaCache:
|
|
424
|
-
async with get_driver().
|
|
424
|
+
async with get_driver().ro_transaction() as txn:
|
|
425
425
|
return await datamanagers.entities.get_entities_meta_cache(txn, kbid=kbid)
|
|
426
426
|
|
|
427
427
|
|
|
428
428
|
@query_parse_dependency_observer.wrap({"type": "deleted_entities_groups"})
|
|
429
429
|
async def get_deleted_entity_groups(kbid: str) -> list[str]:
|
|
430
|
-
async with get_driver().
|
|
430
|
+
async with get_driver().ro_transaction() as txn:
|
|
431
431
|
return list((await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups)
|
|
@@ -77,7 +77,7 @@ async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) ->
|
|
|
77
77
|
tasks = []
|
|
78
78
|
|
|
79
79
|
# Schedule getting extracted text for each field of each resource
|
|
80
|
-
async with driver.
|
|
80
|
+
async with driver.ro_transaction() as txn:
|
|
81
81
|
if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
|
|
82
82
|
raise datamanagers.exceptions.KnowledgeBoxNotFound(kbid)
|
|
83
83
|
|
nucliadb/tasks/retries.py
CHANGED
|
@@ -151,7 +151,7 @@ class TaskRetryHandler:
|
|
|
151
151
|
|
|
152
152
|
|
|
153
153
|
async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMetadata]:
|
|
154
|
-
async with kv_driver.
|
|
154
|
+
async with kv_driver.ro_transaction() as txn:
|
|
155
155
|
metadata = await txn.get(metadata_key)
|
|
156
156
|
if metadata is None:
|
|
157
157
|
return None
|
|
@@ -159,7 +159,7 @@ async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMe
|
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
async def _set_metadata(kv_driver: Driver, metadata_key: str, metadata: TaskMetadata) -> None:
|
|
162
|
-
async with kv_driver.
|
|
162
|
+
async with kv_driver.rw_transaction() as txn:
|
|
163
163
|
await txn.set(metadata_key, metadata.model_dump_json().encode())
|
|
164
164
|
await txn.commit()
|
|
165
165
|
|
|
@@ -188,7 +188,7 @@ async def purge_batch(
|
|
|
188
188
|
"""
|
|
189
189
|
Returns the next start key and the number of purged records. If start is None, it means there are no more records to purge.
|
|
190
190
|
"""
|
|
191
|
-
async with kv_driver.
|
|
191
|
+
async with kv_driver.rw_transaction() as txn:
|
|
192
192
|
txn = cast(PGTransaction, txn)
|
|
193
193
|
async with txn.connection.cursor() as cur:
|
|
194
194
|
await cur.execute(
|
|
@@ -226,7 +226,7 @@ async def purge_batch(
|
|
|
226
226
|
while len(to_delete) > 0:
|
|
227
227
|
batch = to_delete[:delete_batch_size]
|
|
228
228
|
to_delete = to_delete[delete_batch_size:]
|
|
229
|
-
async with kv_driver.
|
|
229
|
+
async with kv_driver.rw_transaction() as txn:
|
|
230
230
|
for key in batch:
|
|
231
231
|
logger.info("Purging task metadata", extra={"key": key})
|
|
232
232
|
await txn.delete(key)
|
|
@@ -41,7 +41,7 @@ async def get_resource_from_cache_or_db(kbid: str, uuid: str) -> Optional[Resour
|
|
|
41
41
|
|
|
42
42
|
async def _get_resource_from_db(kbid: str, uuid: str) -> Optional[ResourceORM]:
|
|
43
43
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
44
|
-
async with get_driver().
|
|
44
|
+
async with get_driver().ro_transaction() as transaction:
|
|
45
45
|
kb = KnowledgeBoxORM(transaction, storage, kbid)
|
|
46
46
|
return await kb.get(uuid)
|
|
47
47
|
|
nucliadb/train/nodes.py
CHANGED
|
@@ -81,7 +81,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
81
81
|
return manager
|
|
82
82
|
|
|
83
83
|
async def kb_sentences(self, request: GetSentencesRequest) -> AsyncIterator[TrainSentence]:
|
|
84
|
-
async with self.driver.
|
|
84
|
+
async with self.driver.ro_transaction() as txn:
|
|
85
85
|
kb = KnowledgeBox(txn, self.storage, request.kb.uuid)
|
|
86
86
|
if request.uuid != "":
|
|
87
87
|
# Filter by uuid
|
|
@@ -95,7 +95,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
95
95
|
yield sentence
|
|
96
96
|
|
|
97
97
|
async def kb_paragraphs(self, request: GetParagraphsRequest) -> AsyncIterator[TrainParagraph]:
|
|
98
|
-
async with self.driver.
|
|
98
|
+
async with self.driver.ro_transaction() as txn:
|
|
99
99
|
kb = KnowledgeBox(txn, self.storage, request.kb.uuid)
|
|
100
100
|
if request.uuid != "":
|
|
101
101
|
# Filter by uuid
|
|
@@ -109,7 +109,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
109
109
|
yield paragraph
|
|
110
110
|
|
|
111
111
|
async def kb_fields(self, request: GetFieldsRequest) -> AsyncIterator[TrainField]:
|
|
112
|
-
async with self.driver.
|
|
112
|
+
async with self.driver.ro_transaction() as txn:
|
|
113
113
|
kb = KnowledgeBox(txn, self.storage, request.kb.uuid)
|
|
114
114
|
if request.uuid != "":
|
|
115
115
|
# Filter by uuid
|
|
@@ -123,7 +123,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
123
123
|
yield field
|
|
124
124
|
|
|
125
125
|
async def kb_resources(self, request: GetResourcesRequest) -> AsyncIterator[TrainResource]:
|
|
126
|
-
async with self.driver.
|
|
126
|
+
async with self.driver.ro_transaction() as txn:
|
|
127
127
|
kb = KnowledgeBox(txn, self.storage, request.kb.uuid)
|
|
128
128
|
base = KB_RESOURCE_SLUG_BASE.format(kbid=request.kb.uuid)
|
|
129
129
|
async for key in txn.keys(match=base):
|
nucliadb/train/servicer.py
CHANGED
|
@@ -89,7 +89,7 @@ class TrainServicer(train_pb2_grpc.TrainServicer):
|
|
|
89
89
|
) -> GetEntitiesResponse:
|
|
90
90
|
kbid = request.kb.uuid
|
|
91
91
|
response = GetEntitiesResponse()
|
|
92
|
-
async with self.proc.driver.
|
|
92
|
+
async with self.proc.driver.ro_transaction() as txn:
|
|
93
93
|
entities_manager = await self.proc.get_kb_entities_manager(txn, kbid)
|
|
94
94
|
if entities_manager is None:
|
|
95
95
|
await txn.abort()
|
nucliadb/train/uploader.py
CHANGED
|
@@ -75,7 +75,7 @@ class UploadServicer:
|
|
|
75
75
|
) -> GetEntitiesResponse:
|
|
76
76
|
kbid = request.kb.uuid
|
|
77
77
|
response = GetEntitiesResponse()
|
|
78
|
-
async with self.proc.driver.
|
|
78
|
+
async with self.proc.driver.ro_transaction() as txn:
|
|
79
79
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
|
80
80
|
if kbobj is None:
|
|
81
81
|
response.status = GetEntitiesResponse.Status.NOTFOUND
|
nucliadb/writer/api/v1/field.py
CHANGED
|
@@ -572,7 +572,7 @@ async def reprocess_file_field(
|
|
|
572
572
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
573
573
|
driver = get_driver()
|
|
574
574
|
|
|
575
|
-
async with driver.
|
|
575
|
+
async with driver.ro_transaction() as txn:
|
|
576
576
|
kb = KnowledgeBox(txn, storage, kbid)
|
|
577
577
|
|
|
578
578
|
resource = await kb.get(rid)
|
|
@@ -182,7 +182,7 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
|
|
|
182
182
|
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
|
183
183
|
)
|
|
184
184
|
try:
|
|
185
|
-
async with driver.
|
|
185
|
+
async with driver.rw_transaction() as txn:
|
|
186
186
|
await KnowledgeBox.update(
|
|
187
187
|
txn,
|
|
188
188
|
uuid=kbid,
|
|
@@ -373,7 +373,7 @@ async def update_resource_slug(
|
|
|
373
373
|
rid: str,
|
|
374
374
|
new_slug: str,
|
|
375
375
|
):
|
|
376
|
-
async with driver.
|
|
376
|
+
async with driver.rw_transaction() as txn:
|
|
377
377
|
old_slug = await datamanagers.resources.modify_slug(txn, kbid=kbid, rid=rid, new_slug=new_slug)
|
|
378
378
|
await txn.commit()
|
|
379
379
|
return old_slug
|
|
@@ -460,7 +460,7 @@ async def _reprocess_resource(
|
|
|
460
460
|
driver = get_driver()
|
|
461
461
|
|
|
462
462
|
writer = BrokerMessage()
|
|
463
|
-
async with driver.
|
|
463
|
+
async with driver.ro_transaction() as txn:
|
|
464
464
|
kb = KnowledgeBox(txn, storage, kbid)
|
|
465
465
|
|
|
466
466
|
resource = await kb.get(rid)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.7.2.
|
|
3
|
+
Version: 6.7.2.post4874
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.7.2.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.7.2.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.7.2.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.7.2.post4874
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.7.2.post4874
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.7.2.post4874
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.7.2.post4874
|
|
26
|
+
Requires-Dist: nidx-protos>=6.7.2.post4874
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.46.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|