nucliadb 6.2.1.post2751__py3-none-any.whl → 6.2.1.post2755__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/external_index_providers/pinecone.py +1 -0
- nucliadb/ingest/orm/brain.py +12 -2
- nucliadb/ingest/orm/processor/processor.py +5 -2
- nucliadb/ingest/orm/resource.py +4 -0
- nucliadb/writer/api/v1/knowledgebox.py +1 -0
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/RECORD +11 -11
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post2751.dist-info → nucliadb-6.2.1.post2755.dist-info}/zip-safe +0 -0
@@ -441,6 +441,7 @@ class PineconeIndexManager(ExternalIndexManager):
|
|
441
441
|
|
442
442
|
def get_prefixes_to_delete(self, index_data: Resource) -> set[str]:
|
443
443
|
prefixes_to_delete = set()
|
444
|
+
# TODO: migrate to vector_prefixes_to_delete
|
444
445
|
for field_id in index_data.sentences_to_delete:
|
445
446
|
try:
|
446
447
|
delete_vid = VectorId.from_string(field_id)
|
nucliadb/ingest/orm/brain.py
CHANGED
@@ -100,6 +100,8 @@ class ResourceBrain:
|
|
100
100
|
page_positions: Optional[FilePagePositions],
|
101
101
|
extracted_text: Optional[ExtractedText],
|
102
102
|
basic_user_field_metadata: Optional[UserFieldMetadata] = None,
|
103
|
+
*,
|
104
|
+
replace_field: bool = False,
|
103
105
|
):
|
104
106
|
# To check for duplicate paragraphs
|
105
107
|
unique_paragraphs: set[str] = set()
|
@@ -224,6 +226,11 @@ class ResourceBrain:
|
|
224
226
|
|
225
227
|
self.brain.paragraphs[field_key].paragraphs[key].CopyFrom(p)
|
226
228
|
|
229
|
+
if replace_field:
|
230
|
+
field_type, field_name = field_key.split("/")
|
231
|
+
full_field_id = ids.FieldId(rid=self.rid, type=field_type, key=field_name).full()
|
232
|
+
self.brain.paragraphs_to_delete.append(full_field_id)
|
233
|
+
|
227
234
|
for relations in metadata.metadata.relations:
|
228
235
|
for relation in relations.relations:
|
229
236
|
self.brain.relations.append(relation)
|
@@ -301,8 +308,11 @@ class ResourceBrain:
|
|
301
308
|
|
302
309
|
if replace_field:
|
303
310
|
full_field_id = ids.FieldId(rid=self.rid, type=fid.type, key=fid.key).full()
|
304
|
-
|
305
|
-
|
311
|
+
if vectorset is None:
|
312
|
+
# DEPRECATED
|
313
|
+
self.brain.sentences_to_delete.append(full_field_id)
|
314
|
+
else:
|
315
|
+
self.brain.vector_prefixes_to_delete[vectorset].items.append(full_field_id)
|
306
316
|
|
307
317
|
def _apply_field_vector(
|
308
318
|
self,
|
@@ -275,7 +275,6 @@ class Processor:
|
|
275
275
|
|
276
276
|
if message.source == writer_pb2.BrokerMessage.MessageSource.WRITER:
|
277
277
|
resource = await kb.get(uuid)
|
278
|
-
|
279
278
|
if resource is None:
|
280
279
|
# It's a new resource
|
281
280
|
resource = await kb.add_resource(uuid, message.slug, message.basic)
|
@@ -737,7 +736,11 @@ def has_vectors_operation(index_message: PBBrainResource) -> bool:
|
|
737
736
|
"""
|
738
737
|
Returns True if the index message has any vectors to index or to delete.
|
739
738
|
"""
|
740
|
-
if
|
739
|
+
if (
|
740
|
+
len(index_message.sentences_to_delete) > 0
|
741
|
+
or len(index_message.paragraphs_to_delete) > 0
|
742
|
+
or any([len(deletions.items) for deletions in index_message.vector_prefixes_to_delete.values()])
|
743
|
+
):
|
741
744
|
return True
|
742
745
|
for field_paragraphs in index_message.paragraphs.values():
|
743
746
|
for paragraph in field_paragraphs.paragraphs.values():
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -226,6 +226,7 @@ class Resource:
|
|
226
226
|
page_positions=page_positions,
|
227
227
|
extracted_text=await field_obj.get_extracted_text(),
|
228
228
|
basic_user_field_metadata=user_field_metadata,
|
229
|
+
replace_field=True,
|
229
230
|
)
|
230
231
|
|
231
232
|
# Some basic fields are computed off field metadata.
|
@@ -336,6 +337,7 @@ class Resource:
|
|
336
337
|
page_positions=page_positions,
|
337
338
|
extracted_text=await field.get_extracted_text(),
|
338
339
|
basic_user_field_metadata=user_field_metadata,
|
340
|
+
replace_field=reindex,
|
339
341
|
)
|
340
342
|
|
341
343
|
if self.disable_vectors is False:
|
@@ -584,6 +586,7 @@ class Resource:
|
|
584
586
|
# Upload to binary storage
|
585
587
|
# Vector indexing
|
586
588
|
if self.disable_vectors is False:
|
589
|
+
await self.get_fields(force=True)
|
587
590
|
for field_vectors in message.field_vectors:
|
588
591
|
await self._apply_extracted_vectors(field_vectors)
|
589
592
|
|
@@ -723,6 +726,7 @@ class Resource:
|
|
723
726
|
page_positions=page_positions,
|
724
727
|
extracted_text=extracted_text,
|
725
728
|
basic_user_field_metadata=user_field_metadata,
|
729
|
+
replace_field=True,
|
726
730
|
)
|
727
731
|
loop = asyncio.get_running_loop()
|
728
732
|
await loop.run_in_executor(_executor, apply_field_metadata)
|
@@ -68,6 +68,7 @@ async def create_kb_endpoint(request: Request, item: KnowledgeBoxConfig) -> Know
|
|
68
68
|
except ExternalIndexCreationError as exc:
|
69
69
|
raise HTTPException(status_code=502, detail=str(exc))
|
70
70
|
except Exception:
|
71
|
+
logger.exception("Could not create KB")
|
71
72
|
raise HTTPException(status_code=500, detail="Error creating knowledge box")
|
72
73
|
else:
|
73
74
|
return KnowledgeBoxObj(uuid=kbid, slug=slug)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post2755
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2755
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2755
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post2755
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post2755
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: nuclia-models>=0.24.2
|
@@ -82,7 +82,7 @@ nucliadb/common/external_index_providers/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A
|
|
82
82
|
nucliadb/common/external_index_providers/base.py,sha256=jijdPxWNLUUO3xZISpO7vvlmm8rOxNY56z8s6QVys6o,8809
|
83
83
|
nucliadb/common/external_index_providers/exceptions.py,sha256=nDhhOIkb66hjCrBk4Spvl2vN1SuW5gbwrMCDmrdjHHE,1209
|
84
84
|
nucliadb/common/external_index_providers/manager.py,sha256=aFSrrKKYG1ydpTSyq4zYD0LOxFS7P-CO6rcKC0hiF4I,4267
|
85
|
-
nucliadb/common/external_index_providers/pinecone.py,sha256=
|
85
|
+
nucliadb/common/external_index_providers/pinecone.py,sha256=afglJq6FfifFNTONGrFBCe5yuEL2h3pDFwZkKiA4_6o,39802
|
86
86
|
nucliadb/common/external_index_providers/settings.py,sha256=EGHnIkwxqe6aypwKegXTlKO3AgUxNa-6GeAZG25Njis,2002
|
87
87
|
nucliadb/common/http_clients/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
88
88
|
nucliadb/common/http_clients/auth.py,sha256=srfpgAbs2wmqA9u_l-HxsV4YoO77Tse4y3gm3q2YvYM,2112
|
@@ -134,19 +134,19 @@ nucliadb/ingest/fields/generic.py,sha256=elgtqv15aJUq3zY7X_g0bli_2BpcwPArVvzhe54
|
|
134
134
|
nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJs,4172
|
135
135
|
nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
|
136
136
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
137
|
-
nucliadb/ingest/orm/brain.py,sha256=
|
137
|
+
nucliadb/ingest/orm/brain.py,sha256=Hzq-3aarKaUCiUoa8H83unRUfduRE9TsQH1dEq0mvZY,28841
|
138
138
|
nucliadb/ingest/orm/broker_message.py,sha256=JYYUJIZEL_EqovQuw6u-FmEkjyoYlxIXJq9hFekOiks,6441
|
139
139
|
nucliadb/ingest/orm/entities.py,sha256=2PslT1FZ6yCvJtjR0UpKTSzxJrtS-C_gZx4ZTWHunTc,15759
|
140
140
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
141
141
|
nucliadb/ingest/orm/knowledgebox.py,sha256=dBetjoJBYT6JuGmMHiqjcfJeD8qJrK3MQt9X03IrHRA,23228
|
142
142
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
143
|
-
nucliadb/ingest/orm/resource.py,sha256=
|
143
|
+
nucliadb/ingest/orm/resource.py,sha256=ZiCWa-ayznr3XRnYN7v7_RoYpwvoKddky5N4QRBTjhY,54029
|
144
144
|
nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
|
145
145
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
146
146
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
147
147
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=ZF66gfHHMHCh9n9XXoTSdTXhETnzJlcylfQqhFq54Pw,5775
|
148
148
|
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=f32PIEXWktWzGDws6Ffife37OAfrseP5IOti_Cb4ir8,3012
|
149
|
-
nucliadb/ingest/orm/processor/processor.py,sha256=
|
149
|
+
nucliadb/ingest/orm/processor/processor.py,sha256=2FxAetUvtHvg6l-24xYrmBdsyqc0RU1zmliel44581g,30945
|
150
150
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
151
151
|
nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
|
152
152
|
nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -313,7 +313,7 @@ nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,
|
|
313
313
|
nucliadb/writer/api/v1/__init__.py,sha256=FVn7N9VJ6bsEoy4TRnkclr4Umd5hECiwPXVqRnJ8BME,1095
|
314
314
|
nucliadb/writer/api/v1/export_import.py,sha256=6_gn0-emCjmK6bCUX5kgMvG0qkZr4HlfGmBXhhngsxo,8243
|
315
315
|
nucliadb/writer/api/v1/field.py,sha256=PT5NfnD6TmX0oh-ehGiMJ7YiEtduIrMalqysQANjciY,17213
|
316
|
-
nucliadb/writer/api/v1/knowledgebox.py,sha256=
|
316
|
+
nucliadb/writer/api/v1/knowledgebox.py,sha256=Mr1vJSWOtiraDdtoTqQ1V2rSirMdojL4wN0Q3cOiX4k,10929
|
317
317
|
nucliadb/writer/api/v1/learning_config.py,sha256=GaYaagjBrVG9ZxrWQyVQfqGMQV3tAJjqJ5CStaKhktU,2058
|
318
318
|
nucliadb/writer/api/v1/resource.py,sha256=clUu0SZryLIZqvM9URM3fcDNnh6mhRoO7dt4SlRVE8I,18313
|
319
319
|
nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
|
@@ -335,9 +335,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
335
335
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
336
336
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
337
337
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
343
|
-
nucliadb-6.2.1.
|
338
|
+
nucliadb-6.2.1.post2755.dist-info/METADATA,sha256=v8quLHM4YNOOMHBTcxio0COtXxx-fpjNfLhiMRLRQFU,4689
|
339
|
+
nucliadb-6.2.1.post2755.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
340
|
+
nucliadb-6.2.1.post2755.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
341
|
+
nucliadb-6.2.1.post2755.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
342
|
+
nucliadb-6.2.1.post2755.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
343
|
+
nucliadb-6.2.1.post2755.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|