nucliadb 6.2.1.post3253__py3-none-any.whl → 6.2.1.post3256__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/search/query_parser/models.py +0 -3
- nucliadb/search/search/query_parser/parser.py +0 -4
- nucliadb/search/search/rerankers.py +1 -42
- nucliadb/writer/resource/basic.py +2 -1
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/RECORD +10 -10
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post3253.dist-info → nucliadb-6.2.1.post3256.dist-info}/zip-safe +0 -0
@@ -31,7 +31,6 @@ from nucliadb.search.search.query_parser.models import (
|
|
31
31
|
CatalogFilters,
|
32
32
|
CatalogQuery,
|
33
33
|
DateTimeFilter,
|
34
|
-
MultiMatchBoosterReranker,
|
35
34
|
NoopReranker,
|
36
35
|
PredictReranker,
|
37
36
|
RankFusion,
|
@@ -123,9 +122,6 @@ class _FindParser:
|
|
123
122
|
if self.item.reranker == search_models.RerankerName.NOOP:
|
124
123
|
reranking = NoopReranker()
|
125
124
|
|
126
|
-
elif self.item.reranker == search_models.RerankerName.MULTI_MATCH_BOOSTER:
|
127
|
-
reranking = MultiMatchBoosterReranker()
|
128
|
-
|
129
125
|
elif self.item.reranker == search_models.RerankerName.PREDICT_RERANKER:
|
130
126
|
# for predict rearnker, by default, we want a x2 factor with a
|
131
127
|
# top of 200 results
|
@@ -169,58 +169,17 @@ class PredictReranker(Reranker):
|
|
169
169
|
return best
|
170
170
|
|
171
171
|
|
172
|
-
class MultiMatchBoosterReranker(Reranker):
|
173
|
-
"""This reranker gives more value to items that come from different indices"""
|
174
|
-
|
175
|
-
@property
|
176
|
-
def window(self) -> Optional[int]:
|
177
|
-
return None
|
178
|
-
|
179
|
-
@reranker_observer.wrap({"type": "multi_match_booster"})
|
180
|
-
async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
|
181
|
-
"""Given a list of rerankable items, boost matches that appear multiple
|
182
|
-
times. The returned list can be smaller than the initial, as repeated
|
183
|
-
matches are deduplicated.
|
184
|
-
"""
|
185
|
-
reranked_by_id = {}
|
186
|
-
for item in items:
|
187
|
-
if item.id not in reranked_by_id:
|
188
|
-
reranked_by_id[item.id] = RankedItem(
|
189
|
-
id=item.id,
|
190
|
-
score=item.score,
|
191
|
-
score_type=item.score_type,
|
192
|
-
)
|
193
|
-
else:
|
194
|
-
# it's a mutiple match, boost the score
|
195
|
-
if reranked_by_id[item.id].score < item.score:
|
196
|
-
# previous implementation noted that we are using vector
|
197
|
-
# score x2 when we find a multiple match. However, this may
|
198
|
-
# not be true, as the same paragraph could come in any
|
199
|
-
# position in the rank fusioned result list
|
200
|
-
reranked_by_id[item.id].score = item.score * 2
|
201
|
-
|
202
|
-
reranked_by_id[item.id].score_type = SCORE_TYPE.BOTH
|
203
|
-
|
204
|
-
reranked = list(reranked_by_id.values())
|
205
|
-
sort_by_score(reranked)
|
206
|
-
return reranked
|
207
|
-
|
208
|
-
|
209
172
|
def get_reranker(reranker: parser_models.Reranker) -> Reranker:
|
210
173
|
algorithm: Reranker
|
211
174
|
|
212
175
|
if isinstance(reranker, parser_models.NoopReranker):
|
213
176
|
algorithm = NoopReranker()
|
214
177
|
|
215
|
-
elif isinstance(reranker, parser_models.MultiMatchBoosterReranker):
|
216
|
-
algorithm = MultiMatchBoosterReranker()
|
217
|
-
|
218
178
|
elif isinstance(reranker, parser_models.PredictReranker):
|
219
179
|
algorithm = PredictReranker(reranker.window)
|
220
180
|
|
221
181
|
else:
|
222
|
-
|
223
|
-
algorithm = MultiMatchBoosterReranker()
|
182
|
+
raise ValueError(f"Unknown reranker requested: {reranker}")
|
224
183
|
|
225
184
|
return algorithm
|
226
185
|
|
@@ -95,7 +95,8 @@ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess
|
|
95
95
|
if item.metadata.language:
|
96
96
|
bm.basic.metadata.language = item.metadata.language
|
97
97
|
if item.metadata.languages:
|
98
|
-
|
98
|
+
unique_languages = list(set(item.metadata.languages))
|
99
|
+
bm.basic.metadata.languages.extend(unique_languages)
|
99
100
|
|
100
101
|
if item.fieldmetadata is not None:
|
101
102
|
for fieldmetadata in item.fieldmetadata:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3256
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3256
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3256
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3256
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3256
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nuclia-models>=0.24.2
|
31
31
|
Requires-Dist: uvicorn
|
@@ -221,7 +221,7 @@ nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3Le
|
|
221
221
|
nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
|
222
222
|
nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
|
223
223
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
224
|
-
nucliadb/search/search/rerankers.py,sha256=
|
224
|
+
nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
|
225
225
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
226
226
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
227
227
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
@@ -234,8 +234,8 @@ nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeD
|
|
234
234
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
235
235
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
236
236
|
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
237
|
-
nucliadb/search/search/query_parser/models.py,sha256
|
238
|
-
nucliadb/search/search/query_parser/parser.py,sha256=
|
237
|
+
nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
|
238
|
+
nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
|
239
239
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
240
240
|
nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
|
241
241
|
nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
|
@@ -319,7 +319,7 @@ nucliadb/writer/api/v1/upload.py,sha256=VOeqNTrZx1_z8iaKjM7p8fVlVcIYMtnQNK1dm72c
|
|
319
319
|
nucliadb/writer/api/v1/vectorsets.py,sha256=mESaXkkI9f-jWWMW61ZZgv7E5YWXKemyc6vwT0lFXns,6747
|
320
320
|
nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
321
321
|
nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
|
322
|
-
nucliadb/writer/resource/basic.py,sha256=
|
322
|
+
nucliadb/writer/resource/basic.py,sha256=wo5VDyp9VBekoJaQAbhE7uPFkpB5auAoRnsdrh3Ny3s,11222
|
323
323
|
nucliadb/writer/resource/field.py,sha256=HsOERELyAsb9e0dx2IkSQ9lk0SThALFRcDKCVBw8ifU,15478
|
324
324
|
nucliadb/writer/resource/origin.py,sha256=pvhUDdU0mlWPUcpoQi4LDUJaRtfjzVVrA8XcGVI_N8k,2021
|
325
325
|
nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8hyM,5443
|
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
331
331
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
332
332
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
333
333
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
334
|
-
nucliadb-6.2.1.
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
334
|
+
nucliadb-6.2.1.post3256.dist-info/METADATA,sha256=HVAp6MZqKVjVZ9e5gpnJqSro67IFGw9tyJ8Ay3oYHzk,4603
|
335
|
+
nucliadb-6.2.1.post3256.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
336
|
+
nucliadb-6.2.1.post3256.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
337
|
+
nucliadb-6.2.1.post3256.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
338
|
+
nucliadb-6.2.1.post3256.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
339
|
+
nucliadb-6.2.1.post3256.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|