nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,22 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+
22
+ class ParserError(ValueError): ...
@@ -0,0 +1,101 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from dataclasses import dataclass
22
+ from datetime import datetime
23
+ from typing import Any, Optional
24
+
25
+ from pydantic import (
26
+ BaseModel,
27
+ Field,
28
+ )
29
+
30
+ from nucliadb_models import search as search_models
31
+
32
+ ### Retrieval
33
+
34
+ # filters
35
+
36
+
37
+ class DateTimeFilter(BaseModel):
38
+ after: Optional[datetime] = None # aka, start
39
+ before: Optional[datetime] = None # aka, end
40
+
41
+
42
+ # rank fusion
43
+
44
+
45
+ class RankFusion(BaseModel):
46
+ window: int = Field(le=500)
47
+
48
+
49
+ class ReciprocalRankFusion(RankFusion):
50
+ k: float = Field(default=60.0)
51
+ boosting: search_models.ReciprocalRankFusionWeights = Field(
52
+ default_factory=search_models.ReciprocalRankFusionWeights
53
+ )
54
+
55
+
56
+ # reranking
57
+
58
+
59
+ class Reranker(BaseModel): ...
60
+
61
+
62
+ class NoopReranker(Reranker): ...
63
+
64
+
65
+ class MultiMatchBoosterReranker(Reranker): ...
66
+
67
+
68
+ class PredictReranker(Reranker):
69
+ window: int = Field(le=200)
70
+
71
+
72
+ # retrieval operation
73
+
74
+
75
+ @dataclass
76
+ class UnitRetrieval:
77
+ top_k: int
78
+ rank_fusion: RankFusion
79
+ reranker: Reranker
80
+
81
+
82
+ ### Catalog
83
+
84
+
85
+ class CatalogFilters(BaseModel):
86
+ labels: dict[str, Any] = Field(
87
+ default_factory=dict, description="Labels filter expression, like, `{and: {not: ...}, ...}`"
88
+ )
89
+ creation: DateTimeFilter
90
+ modification: DateTimeFilter
91
+ with_status: Optional[search_models.ResourceProcessingStatus] = None
92
+
93
+
94
+ class CatalogQuery(BaseModel):
95
+ kbid: str
96
+ query: str
97
+ filters: CatalogFilters
98
+ sort: search_models.SortOptions
99
+ faceted: list[str]
100
+ page_size: int
101
+ page_number: int
@@ -0,0 +1,183 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import Any
22
+
23
+ from pydantic import ValidationError
24
+
25
+ from nucliadb.search.search.filters import (
26
+ convert_to_node_filters,
27
+ translate_label_filters,
28
+ )
29
+ from nucliadb.search.search.query_parser.exceptions import ParserError
30
+ from nucliadb.search.search.query_parser.models import (
31
+ CatalogFilters,
32
+ CatalogQuery,
33
+ DateTimeFilter,
34
+ MultiMatchBoosterReranker,
35
+ NoopReranker,
36
+ PredictReranker,
37
+ RankFusion,
38
+ ReciprocalRankFusion,
39
+ Reranker,
40
+ UnitRetrieval,
41
+ )
42
+ from nucliadb_models import search as search_models
43
+ from nucliadb_models.labels import LABEL_HIDDEN
44
+ from nucliadb_models.search import (
45
+ Filter,
46
+ FindRequest,
47
+ SortField,
48
+ SortOptions,
49
+ SortOrder,
50
+ )
51
+
52
+
53
+ def parse_find(item: FindRequest) -> UnitRetrieval:
54
+ parser = _FindParser(item)
55
+ return parser.parse()
56
+
57
+
58
+ class _FindParser:
59
+ def __init__(self, item: FindRequest):
60
+ self.item = item
61
+
62
+ def parse(self) -> UnitRetrieval:
63
+ top_k = self._parse_top_k()
64
+ try:
65
+ rank_fusion = self._parse_rank_fusion()
66
+ except ValidationError as exc:
67
+ raise ParserError(f"Parsing error in rank fusion: {str(exc)}") from exc
68
+ try:
69
+ reranker = self._parse_reranker()
70
+ except ValidationError as exc:
71
+ raise ParserError(f"Parsing error in reranker: {str(exc)}") from exc
72
+
73
+ # Adjust retrieval windows. Our current implementation assume:
74
+ # `top_k <= reranker.window <= rank_fusion.window`
75
+ # and as rank fusion is done before reranking, we must ensure rank
76
+ # fusion window is at least, the reranker window
77
+ if isinstance(reranker, PredictReranker):
78
+ rank_fusion.window = max(rank_fusion.window, reranker.window)
79
+
80
+ return UnitRetrieval(
81
+ top_k=top_k,
82
+ rank_fusion=rank_fusion,
83
+ reranker=reranker,
84
+ )
85
+
86
+ def _parse_top_k(self) -> int:
87
+ assert self.item.top_k is not None, "top_k must have an int value"
88
+ top_k = self.item.top_k
89
+ return top_k
90
+
91
+ def _parse_rank_fusion(self) -> RankFusion:
92
+ rank_fusion: RankFusion
93
+
94
+ top_k = self._parse_top_k()
95
+ window = min(top_k, 500)
96
+
97
+ if isinstance(self.item.rank_fusion, search_models.RankFusionName):
98
+ if self.item.rank_fusion == search_models.RankFusionName.RECIPROCAL_RANK_FUSION:
99
+ rank_fusion = ReciprocalRankFusion(window=window)
100
+ else:
101
+ raise ParserError(f"Unknown rank fusion algorithm: {self.item.rank_fusion}")
102
+
103
+ elif isinstance(self.item.rank_fusion, search_models.ReciprocalRankFusion):
104
+ user_window = self.item.rank_fusion.window
105
+ rank_fusion = ReciprocalRankFusion(
106
+ k=self.item.rank_fusion.k,
107
+ boosting=self.item.rank_fusion.boosting,
108
+ window=min(max(user_window or 0, top_k), 500),
109
+ )
110
+
111
+ else:
112
+ raise ParserError(f"Unknown rank fusion {self.item.rank_fusion}")
113
+
114
+ return rank_fusion
115
+
116
+ def _parse_reranker(self) -> Reranker:
117
+ reranking: Reranker
118
+
119
+ top_k = self._parse_top_k()
120
+
121
+ if isinstance(self.item.reranker, search_models.RerankerName):
122
+ if self.item.reranker == search_models.RerankerName.NOOP:
123
+ reranking = NoopReranker()
124
+
125
+ elif self.item.reranker == search_models.RerankerName.MULTI_MATCH_BOOSTER:
126
+ reranking = MultiMatchBoosterReranker()
127
+
128
+ elif self.item.reranker == search_models.RerankerName.PREDICT_RERANKER:
129
+ # for predict rearnker, by default, we want a x2 factor with a
130
+ # top of 200 results
131
+ reranking = PredictReranker(window=min(top_k * 2, 200))
132
+
133
+ else:
134
+ raise ParserError(f"Unknown reranker algorithm: {self.item.reranker}")
135
+
136
+ elif isinstance(self.item.reranker, search_models.PredictReranker):
137
+ user_window = self.item.reranker.window
138
+ reranking = PredictReranker(window=min(max(user_window or 0, top_k), 200))
139
+
140
+ else:
141
+ raise ParserError(f"Unknown reranker {self.item.reranker}")
142
+
143
+ return reranking
144
+
145
+
146
+ def parse_catalog(kbid: str, item: search_models.CatalogRequest) -> CatalogQuery:
147
+ if item.hidden:
148
+ hidden_filter = Filter(all=[LABEL_HIDDEN])
149
+ else:
150
+ hidden_filter = Filter(none=[LABEL_HIDDEN])
151
+ label_filters: dict[str, Any] = convert_to_node_filters(item.filters + [hidden_filter]) # type: ignore
152
+ if len(label_filters) > 0:
153
+ label_filters = translate_label_filters(label_filters)
154
+
155
+ sort = item.sort
156
+ if sort is None:
157
+ # By default we sort by creation date (most recent first)
158
+ sort = SortOptions(
159
+ field=SortField.CREATED,
160
+ order=SortOrder.DESC,
161
+ limit=None,
162
+ )
163
+
164
+ return CatalogQuery(
165
+ kbid=kbid,
166
+ query=item.query,
167
+ filters=CatalogFilters(
168
+ labels=label_filters,
169
+ creation=DateTimeFilter(
170
+ after=item.range_creation_start,
171
+ before=item.range_creation_end,
172
+ ),
173
+ modification=DateTimeFilter(
174
+ after=item.range_modification_start,
175
+ before=item.range_modification_end,
176
+ ),
177
+ with_status=item.with_status,
178
+ ),
179
+ sort=sort,
180
+ faceted=item.faceted,
181
+ page_number=item.page_number,
182
+ page_size=item.page_size,
183
+ )
@@ -0,0 +1,204 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ import logging
21
+ from abc import ABC, abstractmethod
22
+ from typing import Iterable
23
+
24
+ from nucliadb.common.external_index_providers.base import TextBlockMatch
25
+ from nucliadb.common.ids import ParagraphId
26
+ from nucliadb.search.search.query_parser import models as parser_models
27
+ from nucliadb_models.search import SCORE_TYPE
28
+ from nucliadb_telemetry.metrics import Observer
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ rank_fusion_observer = Observer(
33
+ "rank_fusion",
34
+ labels={"type": ""},
35
+ buckets=[
36
+ 0.001,
37
+ 0.0025,
38
+ 0.005,
39
+ 0.01,
40
+ 0.025,
41
+ 0.05,
42
+ 0.1,
43
+ 0.25,
44
+ 0.5,
45
+ 1.0,
46
+ ],
47
+ )
48
+
49
+
50
+ class RankFusionAlgorithm(ABC):
51
+ def __init__(self, window: int):
52
+ self._window = window
53
+
54
+ @property
55
+ def window(self) -> int:
56
+ """Phony number used to compute the number of elements to retrieve and
57
+ feed the rank fusion algorithm.
58
+
59
+ This is here for convinience, but a query plan should be the way to go.
60
+
61
+ """
62
+ return self._window
63
+
64
+ def fuse(
65
+ self, keyword: Iterable[TextBlockMatch], semantic: Iterable[TextBlockMatch]
66
+ ) -> list[TextBlockMatch]:
67
+ """Fuse keyword and semantic results and return a list with the merged
68
+ results.
69
+
70
+ """
71
+ merged = self._fuse(keyword, semantic)
72
+ return merged
73
+
74
+ @abstractmethod
75
+ def _fuse(
76
+ self, keyword: Iterable[TextBlockMatch], semantic: Iterable[TextBlockMatch]
77
+ ) -> list[TextBlockMatch]: ...
78
+
79
+
80
+ class LegacyRankFusion(RankFusionAlgorithm):
81
+ """Legacy algorithm that given results from keyword and semantic search,
82
+ mixes them in the following way:
83
+ - 1st result from keyword search
84
+ - 2nd result from semantic search
85
+ - 2 keyword results and 1 semantic (and repeat)
86
+
87
+ """
88
+
89
+ @rank_fusion_observer.wrap({"type": "legacy"})
90
+ def _fuse(
91
+ self, keyword: Iterable[TextBlockMatch], semantic: Iterable[TextBlockMatch]
92
+ ) -> list[TextBlockMatch]:
93
+ merged: list[TextBlockMatch] = []
94
+
95
+ # sort results by it's score before merging them
96
+ keyword = [k for k in sorted(keyword, key=lambda r: r.score, reverse=True)]
97
+ semantic = [s for s in sorted(semantic, key=lambda r: r.score, reverse=True)]
98
+
99
+ for k in keyword:
100
+ merged.append(k)
101
+
102
+ nextpos = 1
103
+ for s in semantic:
104
+ merged.insert(nextpos, s)
105
+ nextpos += 3
106
+
107
+ return merged
108
+
109
+
110
+ class ReciprocalRankFusion(RankFusionAlgorithm):
111
+ """Rank-based rank fusion algorithm. Discounts the weight of documents
112
+ occurring deep in retrieved lists using a reciprocal distribution. It can be
113
+ parametrized with weights to boost retrievers.
114
+
115
+ RRF = Σ(r ∈ R) (1 / (k + r(d)) · w(r))
116
+
117
+ where:
118
+ - d is a document
119
+ - R is the set of retrievers
120
+ - k (constant)
121
+ - r(d) rank of document d in reranker r
122
+ - w(r) weight (boost) for retriever r
123
+
124
+ RRF boosts matches from multiple retrievers and deduplicate them
125
+
126
+ """
127
+
128
+ def __init__(
129
+ self,
130
+ k: float = 60.0,
131
+ *,
132
+ window: int,
133
+ keyword_weight: float = 1.0,
134
+ semantic_weight: float = 1.0,
135
+ ):
136
+ super().__init__(window)
137
+ # Constant used in RRF, studies agree on 60 as a good default value
138
+ # giving good results across many datasets. k allow bigger score
139
+ # difference among the best results and a smaller score difference among
140
+ # bad results
141
+ self._k = k
142
+ self._keyword_boost = keyword_weight
143
+ self._semantic_boost = semantic_weight
144
+
145
+ @rank_fusion_observer.wrap({"type": "reciprocal_rank_fusion"})
146
+ def _fuse(
147
+ self, keyword: Iterable[TextBlockMatch], semantic: Iterable[TextBlockMatch]
148
+ ) -> list[TextBlockMatch]:
149
+ scores: dict[ParagraphId, tuple[float, SCORE_TYPE]] = {}
150
+ match_positions: dict[ParagraphId, list[tuple[int, int]]] = {}
151
+
152
+ # sort results by it's score before merging them
153
+ keyword = [k for k in sorted(keyword, key=lambda r: r.score, reverse=True)]
154
+ semantic = [s for s in sorted(semantic, key=lambda r: r.score, reverse=True)]
155
+
156
+ rankings = [
157
+ (keyword, self._keyword_boost),
158
+ (semantic, self._semantic_boost),
159
+ ]
160
+ for r, (ranking, boost) in enumerate(rankings):
161
+ for i, result in enumerate(ranking):
162
+ id = result.paragraph_id
163
+ score, score_type = scores.setdefault(id, (0, result.score_type))
164
+ score += 1 / (self._k + i) * boost
165
+ if {score_type, result.score_type} == {SCORE_TYPE.BM25, SCORE_TYPE.VECTOR}:
166
+ score_type = SCORE_TYPE.BOTH
167
+ scores[id] = (score, score_type)
168
+
169
+ position = (r, i)
170
+ match_positions.setdefault(result.paragraph_id, []).append(position)
171
+
172
+ merged = []
173
+ for paragraph_id, positions in match_positions.items():
174
+ # we are getting only one position, effectively deduplicating
175
+ # multiple matches for the same text block
176
+ r, i = match_positions[paragraph_id][0]
177
+ score, score_type = scores[paragraph_id]
178
+ result = rankings[r][0][i]
179
+ result.score = score
180
+ result.score_type = score_type
181
+ merged.append(result)
182
+
183
+ merged.sort(key=lambda x: x.score, reverse=True)
184
+ return merged
185
+
186
+
187
+ def get_rank_fusion(rank_fusion: parser_models.RankFusion) -> RankFusionAlgorithm:
188
+ """Given a rank fusion API type, return the appropiate rank fusion algorithm instance"""
189
+ algorithm: RankFusionAlgorithm
190
+ window = rank_fusion.window
191
+
192
+ if isinstance(rank_fusion, parser_models.ReciprocalRankFusion):
193
+ algorithm = ReciprocalRankFusion(
194
+ k=rank_fusion.k,
195
+ window=window,
196
+ keyword_weight=rank_fusion.boosting.keyword,
197
+ semantic_weight=rank_fusion.boosting.semantic,
198
+ )
199
+
200
+ else:
201
+ logger.error(f"Unknown rank fusion algorithm {type(rank_fusion)}: {rank_fusion}. Using default")
202
+ algorithm = ReciprocalRankFusion(window=window)
203
+
204
+ return algorithm