nucliadb 6.2.1.post3253__py3-none-any.whl → 6.2.1.post3256__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,9 +62,6 @@ class Reranker(BaseModel): ...
62
62
  class NoopReranker(Reranker): ...
63
63
 
64
64
 
65
- class MultiMatchBoosterReranker(Reranker): ...
66
-
67
-
68
65
  class PredictReranker(Reranker):
69
66
  window: int = Field(le=200)
70
67
 
@@ -31,7 +31,6 @@ from nucliadb.search.search.query_parser.models import (
31
31
  CatalogFilters,
32
32
  CatalogQuery,
33
33
  DateTimeFilter,
34
- MultiMatchBoosterReranker,
35
34
  NoopReranker,
36
35
  PredictReranker,
37
36
  RankFusion,
@@ -123,9 +122,6 @@ class _FindParser:
123
122
  if self.item.reranker == search_models.RerankerName.NOOP:
124
123
  reranking = NoopReranker()
125
124
 
126
- elif self.item.reranker == search_models.RerankerName.MULTI_MATCH_BOOSTER:
127
- reranking = MultiMatchBoosterReranker()
128
-
129
125
  elif self.item.reranker == search_models.RerankerName.PREDICT_RERANKER:
130
126
  # for predict rearnker, by default, we want a x2 factor with a
131
127
  # top of 200 results
@@ -169,58 +169,17 @@ class PredictReranker(Reranker):
169
169
  return best
170
170
 
171
171
 
172
- class MultiMatchBoosterReranker(Reranker):
173
- """This reranker gives more value to items that come from different indices"""
174
-
175
- @property
176
- def window(self) -> Optional[int]:
177
- return None
178
-
179
- @reranker_observer.wrap({"type": "multi_match_booster"})
180
- async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
181
- """Given a list of rerankable items, boost matches that appear multiple
182
- times. The returned list can be smaller than the initial, as repeated
183
- matches are deduplicated.
184
- """
185
- reranked_by_id = {}
186
- for item in items:
187
- if item.id not in reranked_by_id:
188
- reranked_by_id[item.id] = RankedItem(
189
- id=item.id,
190
- score=item.score,
191
- score_type=item.score_type,
192
- )
193
- else:
194
- # it's a mutiple match, boost the score
195
- if reranked_by_id[item.id].score < item.score:
196
- # previous implementation noted that we are using vector
197
- # score x2 when we find a multiple match. However, this may
198
- # not be true, as the same paragraph could come in any
199
- # position in the rank fusioned result list
200
- reranked_by_id[item.id].score = item.score * 2
201
-
202
- reranked_by_id[item.id].score_type = SCORE_TYPE.BOTH
203
-
204
- reranked = list(reranked_by_id.values())
205
- sort_by_score(reranked)
206
- return reranked
207
-
208
-
209
172
  def get_reranker(reranker: parser_models.Reranker) -> Reranker:
210
173
  algorithm: Reranker
211
174
 
212
175
  if isinstance(reranker, parser_models.NoopReranker):
213
176
  algorithm = NoopReranker()
214
177
 
215
- elif isinstance(reranker, parser_models.MultiMatchBoosterReranker):
216
- algorithm = MultiMatchBoosterReranker()
217
-
218
178
  elif isinstance(reranker, parser_models.PredictReranker):
219
179
  algorithm = PredictReranker(reranker.window)
220
180
 
221
181
  else:
222
- logger.warning(f"Unknown reranker requested: {reranker}. Using default instead")
223
- algorithm = MultiMatchBoosterReranker()
182
+ raise ValueError(f"Unknown reranker requested: {reranker}")
224
183
 
225
184
  return algorithm
226
185
 
@@ -95,7 +95,8 @@ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess
95
95
  if item.metadata.language:
96
96
  bm.basic.metadata.language = item.metadata.language
97
97
  if item.metadata.languages:
98
- bm.basic.metadata.languages.extend(item.metadata.languages)
98
+ unique_languages = list(set(item.metadata.languages))
99
+ bm.basic.metadata.languages.extend(unique_languages)
99
100
 
100
101
  if item.fieldmetadata is not None:
101
102
  for fieldmetadata in item.fieldmetadata:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3253
3
+ Version: 6.2.1.post3256
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3253
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3253
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3253
28
- Requires-Dist: nucliadb-models>=6.2.1.post3253
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3256
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3256
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3256
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3256
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -221,7 +221,7 @@ nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3Le
221
221
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
222
222
  nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
223
223
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
224
- nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
224
+ nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
225
225
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
226
226
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
227
227
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
@@ -234,8 +234,8 @@ nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeD
234
234
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
235
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
236
236
  nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
237
- nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
238
- nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
237
+ nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
238
+ nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
239
239
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
240
240
  nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
241
241
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -319,7 +319,7 @@ nucliadb/writer/api/v1/upload.py,sha256=VOeqNTrZx1_z8iaKjM7p8fVlVcIYMtnQNK1dm72c
319
319
  nucliadb/writer/api/v1/vectorsets.py,sha256=mESaXkkI9f-jWWMW61ZZgv7E5YWXKemyc6vwT0lFXns,6747
320
320
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
321
321
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
322
- nucliadb/writer/resource/basic.py,sha256=l9zD-Qiq4eUkHezMf0w1Ksx2izKYLYuNoMIlXcNxxpM,11163
322
+ nucliadb/writer/resource/basic.py,sha256=wo5VDyp9VBekoJaQAbhE7uPFkpB5auAoRnsdrh3Ny3s,11222
323
323
  nucliadb/writer/resource/field.py,sha256=HsOERELyAsb9e0dx2IkSQ9lk0SThALFRcDKCVBw8ifU,15478
324
324
  nucliadb/writer/resource/origin.py,sha256=pvhUDdU0mlWPUcpoQi4LDUJaRtfjzVVrA8XcGVI_N8k,2021
325
325
  nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8hyM,5443
@@ -331,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
331
331
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
332
332
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
333
333
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
334
- nucliadb-6.2.1.post3253.dist-info/METADATA,sha256=MVwir1nyj-58meH0zdQmizihGvR_1k_EIKxl3NxEZpg,4603
335
- nucliadb-6.2.1.post3253.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
336
- nucliadb-6.2.1.post3253.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
- nucliadb-6.2.1.post3253.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
- nucliadb-6.2.1.post3253.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
- nucliadb-6.2.1.post3253.dist-info/RECORD,,
334
+ nucliadb-6.2.1.post3256.dist-info/METADATA,sha256=HVAp6MZqKVjVZ9e5gpnJqSro67IFGw9tyJ8Ay3oYHzk,4603
335
+ nucliadb-6.2.1.post3256.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
336
+ nucliadb-6.2.1.post3256.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
+ nucliadb-6.2.1.post3256.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
+ nucliadb-6.2.1.post3256.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
+ nucliadb-6.2.1.post3256.dist-info/RECORD,,