nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,157 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- import random
22
- from unittest.mock import AsyncMock, MagicMock, patch
23
-
24
- import pytest
25
- from nucliadb_protos.utils_pb2 import ExtractedText
26
-
27
- from nucliadb.search.search import paragraphs
28
-
29
-
30
- @pytest.fixture()
31
- def extracted_text():
32
- yield ExtractedText(
33
- text=b"Hello World!",
34
- split_text={"1": b"Hello", "2": b"World!"},
35
- )
36
-
37
-
38
- @pytest.fixture()
39
- def storage_field(extracted_text):
40
- mock = MagicMock()
41
-
42
- data = extracted_text.SerializeToString()
43
-
44
- async def _read_range(start, end):
45
- yield data[start:end]
46
-
47
- mock.read_range = _read_range
48
- yield mock
49
-
50
-
51
- @pytest.fixture()
52
- def field(storage_field, extracted_text):
53
- mock = MagicMock()
54
- mock.get_storage_field.return_value = storage_field
55
- mock.get_extracted_text = AsyncMock(return_value=extracted_text)
56
- yield mock
57
-
58
-
59
- async def test_get_paragraph_from_full_text(field, extracted_text: ExtractedText):
60
- assert (
61
- await paragraphs.get_paragraph_from_full_text(
62
- field=field, start=0, end=12, split=None
63
- )
64
- == extracted_text.text
65
- )
66
-
67
-
68
- async def test_get_paragraph_from_full_text_with_split(
69
- field, extracted_text: ExtractedText
70
- ):
71
- assert (
72
- await paragraphs.get_paragraph_from_full_text(
73
- field=field, start=0, end=6, split="1"
74
- )
75
- == extracted_text.split_text["1"]
76
- )
77
-
78
-
79
- class TestGetParagraphText:
80
- @pytest.fixture()
81
- def orm_resource(self, field):
82
- mock = AsyncMock()
83
- mock.get_field.return_value = field
84
- with patch(
85
- "nucliadb.search.search.paragraphs.get_resource_from_cache",
86
- return_value=mock,
87
- ):
88
- yield mock
89
-
90
- async def test_get_paragraph_text(self, orm_resource):
91
- assert (
92
- await paragraphs.get_paragraph_text(
93
- kbid="kbid",
94
- rid="rid",
95
- field="/t/text",
96
- start=0,
97
- end=12,
98
- split=None,
99
- highlight=True,
100
- ematches=None,
101
- matches=None,
102
- )
103
- == "Hello World!"
104
- )
105
-
106
- orm_resource.get_field.assert_called_once_with("text", 4, load=False)
107
-
108
-
109
- async def fake_get_extracted_text_from_gcloud(*args, **kwargs):
110
- await asyncio.sleep(random.uniform(0, 1))
111
- return ExtractedText(text=b"Hello World!")
112
-
113
-
114
- async def test_get_field_extracted_text_is_cached(field):
115
- field.kbid = "kbid"
116
- field.uuid = "rid"
117
- field.id = "fid"
118
- # Simulate a slow response from GCloud
119
- field.get_extracted_text = AsyncMock(
120
- side_effect=fake_get_extracted_text_from_gcloud
121
- )
122
-
123
- # Run 10 times in parallel to check that the cache is working
124
- etcache = paragraphs.ExtractedTextCache()
125
- futures = [
126
- paragraphs.get_field_extracted_text(field, cache=etcache) for _ in range(10)
127
- ]
128
- await asyncio.gather(*futures)
129
-
130
- field.get_extracted_text.assert_awaited_once()
131
-
132
-
133
- async def test_get_field_extracted_text_is_not_cached_when_none(field):
134
- field.get_extracted_text = AsyncMock(return_value=None)
135
-
136
- await paragraphs.get_field_extracted_text(field)
137
- await paragraphs.get_field_extracted_text(field)
138
-
139
- assert field.get_extracted_text.await_count == 2
140
-
141
-
142
- def test_extracted_text_cache():
143
- etcache = paragraphs.ExtractedTextCache()
144
- assert etcache.get_value("foo") is None
145
-
146
- assert isinstance(etcache.get_lock("foo"), asyncio.Lock)
147
- assert len(etcache.locks) == 1
148
-
149
- etcache.set_value("foo", "bar")
150
- assert len(etcache.values) == 1
151
-
152
- assert etcache.get_value("foo") == "bar"
153
-
154
- etcache.clear()
155
-
156
- assert len(etcache.values) == 0
157
- assert len(etcache.locks) == 0
@@ -1,106 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from unittest.mock import AsyncMock, Mock, patch
22
-
23
- import pytest
24
- from fastapi.datastructures import QueryParams
25
- from fastapi.responses import JSONResponse, StreamingResponse
26
-
27
- from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
28
- from nucliadb.search.search.predict_proxy import PredictProxiedEndpoints, predict_proxy
29
-
30
- MODULE = "nucliadb.search.search.predict_proxy"
31
-
32
-
33
- @pytest.fixture(scope="function")
34
- def exists_kb():
35
- with patch(f"{MODULE}.exists_kb", return_value=True) as mock:
36
- yield mock
37
-
38
-
39
- @pytest.fixture(scope="function")
40
- def predict_response():
41
- async def iter_any():
42
- for i in range(3):
43
- yield i.to_bytes(i, "big")
44
-
45
- resp = Mock()
46
- resp.status = 200
47
- resp.headers = {}
48
- resp.content = Mock(iter_any=iter_any)
49
- resp.json = AsyncMock(return_value={"answer": "foo"})
50
- yield resp
51
-
52
-
53
- @pytest.fixture(scope="function")
54
- def predict(predict_response):
55
- predict_engine = Mock()
56
- predict_engine.get_predict_headers = Mock(return_value={})
57
- predict_engine.make_request = AsyncMock(return_value=predict_response)
58
- with patch(f"{MODULE}.get_predict", return_value=predict_engine):
59
- yield predict_engine
60
-
61
-
62
- async def test_raises_error_on_non_existing_kb(exists_kb):
63
- exists_kb.return_value = False
64
- with pytest.raises(KnowledgeBoxNotFound):
65
- await predict_proxy(
66
- "foo",
67
- PredictProxiedEndpoints.CHAT,
68
- "GET",
69
- QueryParams(),
70
- )
71
-
72
-
73
- async def test_stream_response(exists_kb, predict, predict_response):
74
- predict_response.headers["Transfer-Encoding"] = "chunked"
75
- predict_response.headers["NUCLIA-LEARNING-ID"] = "foo"
76
-
77
- resp = await predict_proxy(
78
- "foo",
79
- PredictProxiedEndpoints.CHAT,
80
- "GET",
81
- QueryParams(),
82
- )
83
-
84
- assert isinstance(resp, StreamingResponse)
85
- assert resp.status_code == 200
86
- assert resp.headers["NUCLIA-LEARNING-ID"] == "foo"
87
- assert resp.headers["Access-Control-Expose-Headers"] == "NUCLIA-LEARNING-ID"
88
- body = [chunk async for chunk in resp.body_iterator]
89
- assert list(map(lambda x: x.to_bytes(x, "big"), range(3))) == body
90
-
91
-
92
- async def test_json_response(exists_kb, predict, predict_response):
93
- predict_response.headers["NUCLIA-LEARNING-ID"] = "foo"
94
-
95
- resp = await predict_proxy(
96
- "foo",
97
- PredictProxiedEndpoints.CHAT,
98
- "GET",
99
- QueryParams(),
100
- )
101
-
102
- assert isinstance(resp, JSONResponse)
103
- assert resp.status_code == 200
104
- assert resp.headers["NUCLIA-LEARNING-ID"] == "foo"
105
- assert resp.headers["Access-Control-Expose-Headers"] == "NUCLIA-LEARNING-ID"
106
- assert resp.body == b'{"answer":"foo"}'
@@ -1,201 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import unittest
21
- from unittest.mock import AsyncMock, Mock, patch
22
-
23
- import pytest
24
- from nucliadb_protos.knowledgebox_pb2 import SemanticModelMetadata, Synonyms
25
- from nucliadb_protos.nodereader_pb2 import SearchRequest
26
- from nucliadb_protos.utils_pb2 import RelationNode, VectorSimilarity
27
-
28
- from nucliadb.search.search.exceptions import InvalidQueryError
29
- from nucliadb.search.search.query import (
30
- QueryParser,
31
- check_supported_filters,
32
- get_default_semantic_min_score,
33
- get_kb_model_default_min_score,
34
- parse_entities_to_filters,
35
- )
36
- from nucliadb_models.search import MinScore
37
-
38
- QUERY_MODULE = "nucliadb.search.search.query"
39
-
40
-
41
- def test_parse_entities_to_filters():
42
- detected_entities = [
43
- RelationNode(value="John", ntype=RelationNode.NodeType.ENTITY, subtype="person")
44
- ]
45
-
46
- request = SearchRequest()
47
- assert parse_entities_to_filters(request, detected_entities) == ["/e/person/John"]
48
- assert request.filter.field_labels == ["/e/person/John"]
49
-
50
- assert parse_entities_to_filters(request, detected_entities) == []
51
- assert request.filter.field_labels == ["/e/person/John"]
52
-
53
-
54
- @pytest.fixture()
55
- def get_kb_model_default_min_score_mock():
56
- with unittest.mock.patch(f"{QUERY_MODULE}.get_kb_model_default_min_score") as mock:
57
- yield mock
58
-
59
-
60
- async def test_get_default_semantic_min_score(get_kb_model_default_min_score_mock):
61
- get_kb_model_default_min_score_mock.return_value = 1.5
62
-
63
- assert await get_default_semantic_min_score("kbid") == 1.5
64
-
65
- get_default_semantic_min_score.cache_clear()
66
-
67
-
68
- async def test_get_default_semantic_min_score_default_value(
69
- get_kb_model_default_min_score_mock,
70
- ):
71
- get_kb_model_default_min_score_mock.return_value = None
72
-
73
- assert await get_default_semantic_min_score("kbid") == 0.7
74
-
75
- get_default_semantic_min_score.cache_clear()
76
-
77
-
78
- async def test_get_default_semantic_min_score_is_cached(
79
- get_kb_model_default_min_score_mock,
80
- ):
81
- await get_default_semantic_min_score("kbid1")
82
- await get_default_semantic_min_score("kbid1")
83
- await get_default_semantic_min_score("kbid1")
84
-
85
- await get_default_semantic_min_score("kbid2")
86
-
87
- assert get_kb_model_default_min_score_mock.call_count == 2
88
-
89
- get_default_semantic_min_score.cache_clear()
90
-
91
-
92
- @pytest.fixture()
93
- def read_only_txn():
94
- txn = unittest.mock.AsyncMock()
95
- with unittest.mock.patch(
96
- f"{QUERY_MODULE}.get_read_only_transaction", return_value=txn
97
- ):
98
- yield txn
99
-
100
-
101
- @pytest.fixture()
102
- def kbdm(read_only_txn):
103
- kbdm = unittest.mock.AsyncMock()
104
- with unittest.mock.patch(f"{QUERY_MODULE}.datamanagers.kb", kbdm):
105
- yield kbdm
106
-
107
-
108
- async def test_get_kb_model_default_min_score(kbdm):
109
- # If min_score is set, it should return it
110
- kbdm.get_model_metadata.return_value = SemanticModelMetadata(
111
- similarity_function=VectorSimilarity.COSINE,
112
- default_min_score=1.5,
113
- )
114
- assert await get_kb_model_default_min_score("kbid") == 1.5
115
-
116
-
117
- async def test_get_kb_model_default_min_score_backward_compatible(kbdm):
118
- # If min_score is not set yet, it should return None
119
- kbdm.get_model_metadata.return_value = SemanticModelMetadata(
120
- similarity_function=VectorSimilarity.COSINE
121
- )
122
- assert await get_kb_model_default_min_score("kbid") is None
123
-
124
-
125
- class TestApplySynonymsToRequest:
126
- @pytest.fixture
127
- def get_synonyms(self):
128
- get_kb_synonyms = AsyncMock()
129
- synonyms = Synonyms()
130
- synonyms.terms["planet"].synonyms.extend(["earth", "globe"])
131
- get_kb_synonyms.return_value = synonyms
132
- yield get_kb_synonyms
133
-
134
- @pytest.fixture
135
- def query_parser(self, get_synonyms):
136
- qp = QueryParser(
137
- kbid="kbid",
138
- features=[],
139
- query="query",
140
- filters=[],
141
- faceted=[],
142
- page_number=0,
143
- page_size=10,
144
- min_score=MinScore(vector=0.5),
145
- with_synonyms=True,
146
- )
147
- with patch("nucliadb.search.search.query.get_kb_synonyms", get_synonyms):
148
- yield qp
149
-
150
- @pytest.mark.asyncio
151
- async def test_not_applies_if_empty_body(
152
- self, query_parser: QueryParser, get_synonyms
153
- ):
154
- query_parser.query = ""
155
- search_request = Mock()
156
- await query_parser.parse_synonyms(search_request)
157
-
158
- get_synonyms.assert_not_awaited()
159
- search_request.ClearField.assert_not_called()
160
-
161
- @pytest.mark.asyncio
162
- async def test_not_applies_if_synonyms_object_not_found(
163
- self, query_parser: QueryParser, get_synonyms
164
- ):
165
- query_parser.query = "planet"
166
- get_synonyms.return_value = None
167
- request = Mock()
168
-
169
- await query_parser.parse_synonyms(Mock())
170
-
171
- request.ClearField.assert_not_called()
172
- get_synonyms.assert_awaited_once_with("kbid")
173
-
174
- @pytest.mark.asyncio
175
- async def test_not_applies_if_synonyms_not_found_for_query(
176
- self, query_parser: QueryParser, get_synonyms
177
- ):
178
- query_parser.query = "foobar"
179
- request = Mock()
180
-
181
- await query_parser.parse_synonyms(request)
182
-
183
- request.ClearField.assert_not_called()
184
-
185
- query_parser.query = "planet"
186
- await query_parser.parse_synonyms(request)
187
-
188
- request.ClearField.assert_called_once_with("body")
189
- assert request.advanced_query == "planet OR earth OR globe"
190
-
191
-
192
- def test_check_supported_filters():
193
- check_supported_filters({"literal": "a"}, ["a"])
194
- check_supported_filters({"or": [{"literal": "a"}, {"literal": "b"}]}, [])
195
- with pytest.raises(InvalidQueryError):
196
- check_supported_filters({"or": [{"literal": "a"}, {"literal": "b"}]}, ["b"])
197
- with pytest.raises(InvalidQueryError):
198
- check_supported_filters(
199
- {"and": [{"literal": "a"}, {"and": [{"literal": "c"}, {"literal": "b"}]}]},
200
- ["b"],
201
- )
@@ -1,79 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import json
21
- from unittest.mock import patch
22
-
23
- import pytest
24
-
25
- from nucliadb.common.cluster.index_node import IndexNode
26
- from nucliadb.search import app
27
-
28
- pytestmark = pytest.mark.asyncio
29
-
30
-
31
- async def test_alive():
32
- with patch.object(app.manager, "get_index_nodes", return_value=[{"id": "node1"}]):
33
- resp = await app.alive(None)
34
- assert resp.status_code == 200
35
-
36
-
37
- async def test_not_alive():
38
- with patch.object(app.manager, "get_index_nodes", return_value=[]):
39
- resp = await app.alive(None)
40
- assert resp.status_code == 503
41
-
42
-
43
- async def test_ready():
44
- with patch.object(app.manager, "get_index_nodes", return_value=[{"id": "node1"}]):
45
- resp = await app.ready(None)
46
- assert resp.status_code == 200
47
-
48
-
49
- async def test_not_ready():
50
- with patch.object(app.manager, "get_index_nodes", return_value=[]):
51
- resp = await app.ready(None)
52
- assert resp.status_code == 503
53
-
54
-
55
- async def test_node_members():
56
- nodes = [
57
- IndexNode(
58
- id="node1", address="node1", shard_count=0, available_disk=100, dummy=True
59
- ),
60
- IndexNode(
61
- id="node2",
62
- address="node2",
63
- shard_count=0,
64
- available_disk=50,
65
- dummy=True,
66
- primary_id="node1",
67
- ),
68
- ]
69
- with patch.object(app.manager, "get_index_nodes", return_value=nodes):
70
- resp = await app.node_members(None)
71
- assert resp.status_code == 200
72
- members = json.loads(resp.body)
73
- sorted(members, key=lambda x: x["id"])
74
- assert members[0]["id"] == "node1"
75
- assert members[0]["primary_id"] is None
76
- assert members[0]["available_disk"] == 100
77
- assert members[1]["id"] == "node2"
78
- assert members[1]["primary_id"] == "node1"
79
- assert members[1]["available_disk"] == 50
@@ -1,112 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import random
22
-
23
- from nucliadb_protos.nodereader_pb2 import DocumentScored, ParagraphResult
24
-
25
- from nucliadb.search.search.find_merge import Orderer, merge_paragraphs_vectors
26
- from nucliadb_models.search import SCORE_TYPE
27
-
28
-
29
- def test_orderer():
30
- orderer = Orderer()
31
-
32
- items = {}
33
- for i in range(30):
34
- key = str(i)
35
- score = random.random() * 25
36
- items[key] = score
37
-
38
- boosted = {4, 10, 28}
39
-
40
- boosted_items = []
41
- regular_items = []
42
-
43
- for i, (key, score) in enumerate(items.items()):
44
- if i in boosted:
45
- boosted_items.append(key)
46
- orderer.add_boosted(key)
47
- else:
48
- regular_items.append(key)
49
- orderer.add(key)
50
-
51
- sorted_items = list(orderer.sorted_by_insertion())
52
- assert sorted_items == boosted_items + regular_items
53
-
54
-
55
- def test_orderer_handles_duplicate_insertions():
56
- orderer = Orderer()
57
- orderer.add_boosted("a")
58
- orderer.add_boosted("b")
59
- orderer.add_boosted("a")
60
- orderer.add_boosted("c")
61
- orderer.add("a")
62
- assert list(orderer.sorted_by_insertion()) == ["a", "b", "c"]
63
-
64
-
65
- def test_merge_paragraphs_vectors():
66
- paragraphs = []
67
- for i in range(5):
68
- pr = ParagraphResult()
69
- pr.uuid = "foo"
70
- pr.score.bm25 = i
71
- pr.score.booster = 0
72
- pr.paragraph = f"id/text/paragraph/{i}/0-10"
73
- pr.start = 0
74
- pr.end = 10
75
- pr.field = "/a/title"
76
- paragraphs.append(pr)
77
-
78
- vectors = []
79
- for i in range(5):
80
- score = max(5 / float(i + 1), 1)
81
- vr = DocumentScored()
82
- vr.doc_id.id = f"id/vector/paragraph/{i}/0-2"
83
- vr.score = score
84
- vr.metadata.position.start = 0
85
- vr.metadata.position.start = 2
86
- vectors.append(vr)
87
-
88
- paragraphs, next_page = merge_paragraphs_vectors(
89
- [paragraphs], [vectors], 20, 0, min_score=1
90
- )
91
- assert not next_page
92
- assert len(paragraphs) == 10
93
-
94
- vector_scores = set()
95
- for index, score_type in [
96
- (0, SCORE_TYPE.BM25),
97
- (1, SCORE_TYPE.VECTOR),
98
- (2, SCORE_TYPE.BM25),
99
- (3, SCORE_TYPE.BM25),
100
- (4, SCORE_TYPE.VECTOR),
101
- (5, SCORE_TYPE.BM25),
102
- (6, SCORE_TYPE.BM25),
103
- (7, SCORE_TYPE.VECTOR),
104
- (8, SCORE_TYPE.VECTOR),
105
- (9, SCORE_TYPE.VECTOR),
106
- ]:
107
- assert paragraphs[index].paragraph.score_type == score_type
108
- if score_type == SCORE_TYPE.VECTOR:
109
- vector_scores.add(paragraphs[index].paragraph.score)
110
-
111
- # Check that the vector scores are different
112
- assert len(vector_scores) == 5
@@ -1,34 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import patch
21
-
22
- from nucliadb.search.search.merge import ResourceSearchResults, merge_paragraphs_results
23
-
24
-
25
- async def test_str_model():
26
- # make sure __str__ works as advertised
27
- res = await merge_paragraphs_results([], 1, 1, "kbid", [], [], [], False, 1)
28
- assert str(res) == res.json()
29
-
30
-
31
- async def test_str_model_fallback():
32
- with patch.object(ResourceSearchResults, "json", side_effect=Exception("ERROR")):
33
- res = await merge_paragraphs_results([], 1, 1, "kbid", [], [], [], False, 1)
34
- assert "sentences=None" in str(res)