nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,45 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import asyncio
21
- from unittest.mock import AsyncMock, Mock
22
-
23
- import pytest
24
- from nucliadb_protos.nodereader_pb2 import SearchRequest
25
-
26
- from nucliadb.search.search.shards import node_observer, query_shard
27
-
28
-
29
- async def test_node_observer_records_timeout_errors():
30
- node = Mock(id="node-1")
31
- # When waiting for a task to finish with asyncio, if it times out asyncio will
32
- # cancell the task throwing a CancelledError on that task
33
- node.reader.Search = AsyncMock(side_effect=asyncio.CancelledError)
34
- query = SearchRequest(body="foo")
35
-
36
- node_observer.counter.clear()
37
-
38
- with pytest.raises(asyncio.CancelledError):
39
- await query_shard(node, "shard", query)
40
-
41
- sample = node_observer.counter.collect()[0].samples[0]
42
- assert sample.name == "node_client_count_total"
43
- assert sample.labels["type"] == "search"
44
- assert sample.labels["node_id"] == "node-1"
45
- assert sample.labels["status"] == "timeout"
@@ -1,82 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
-
22
- from nucliadb.search.search.utils import (
23
- has_user_vectors,
24
- is_empty_query,
25
- is_exact_match_only_query,
26
- should_disable_vector_search,
27
- )
28
- from nucliadb_models.search import SearchRequest
29
-
30
-
31
- @pytest.mark.parametrize(
32
- "item,empty",
33
- [
34
- (SearchRequest(query=""), True),
35
- (SearchRequest(query="foo"), False),
36
- ],
37
- )
38
- def test_is_empty_query(item, empty):
39
- assert is_empty_query(item) is empty
40
-
41
-
42
- @pytest.mark.parametrize(
43
- "query,exact_match",
44
- [
45
- ("some", False),
46
- ("some query terms", False),
47
- ('"something"', True),
48
- (' "something"', True),
49
- ('"something" ', True),
50
- ('"something exact"', True),
51
- ('"something exact" and something else', False),
52
- ],
53
- )
54
- def test_is_exact_match_only_query(query, exact_match):
55
- item = SearchRequest(query=query)
56
- assert is_exact_match_only_query(item) is exact_match
57
-
58
-
59
- @pytest.mark.parametrize(
60
- "item,has_vectors",
61
- [
62
- (SearchRequest(query=""), False),
63
- (SearchRequest(vector=[]), False),
64
- (SearchRequest(vector=[1.0]), True),
65
- ],
66
- )
67
- def test_has_user_vectors(item, has_vectors):
68
- assert has_user_vectors(item) is has_vectors
69
-
70
-
71
- @pytest.mark.parametrize(
72
- "item,disable_vectors",
73
- [
74
- (SearchRequest(query=""), True),
75
- (SearchRequest(query='"exact match"'), True),
76
- (SearchRequest(query="foo"), False),
77
- (SearchRequest(query="", vector=[1.0, 2.0]), False),
78
- (SearchRequest(query='"exact match"', vector=[1.0, 2.0]), False),
79
- ],
80
- )
81
- def test_should_disable_vectors(item, disable_vectors):
82
- assert should_disable_vector_search(item) is disable_vectors
@@ -1,270 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from unittest import mock
20
- from unittest.mock import AsyncMock, patch
21
-
22
- import pytest
23
-
24
- from nucliadb.ingest.orm.resource import KB_REVERSE
25
- from nucliadb.search.search.chat import prompt as chat_prompt
26
- from nucliadb_models.search import (
27
- SCORE_TYPE,
28
- FindField,
29
- FindParagraph,
30
- FindResource,
31
- KnowledgeboxFindResults,
32
- MinScore,
33
- )
34
- from nucliadb_protos import resources_pb2
35
-
36
-
37
- @pytest.fixture()
38
- def messages():
39
- msgs = [
40
- resources_pb2.Message(
41
- ident="1", content=resources_pb2.MessageContent(text="Message 1")
42
- ),
43
- resources_pb2.Message(
44
- ident="2", content=resources_pb2.MessageContent(text="Message 2")
45
- ),
46
- resources_pb2.Message(
47
- ident="3",
48
- who="1",
49
- content=resources_pb2.MessageContent(text="Message 3"),
50
- type=resources_pb2.Message.MessageType.QUESTION,
51
- ),
52
- resources_pb2.Message(
53
- ident="4",
54
- content=resources_pb2.MessageContent(text="Message 4"),
55
- type=resources_pb2.Message.MessageType.ANSWER,
56
- to=["1"],
57
- ),
58
- resources_pb2.Message(
59
- ident="5", content=resources_pb2.MessageContent(text="Message 5")
60
- ),
61
- ]
62
- yield msgs
63
-
64
-
65
- @pytest.fixture()
66
- def field_obj(messages):
67
- mock = AsyncMock()
68
- mock.get_metadata.return_value = resources_pb2.FieldConversation(pages=1, total=5)
69
- mock.db_get_value.return_value = resources_pb2.Conversation(messages=messages)
70
-
71
- yield mock
72
-
73
-
74
- @pytest.fixture()
75
- def kb(field_obj):
76
- mock = AsyncMock()
77
- mock.get.return_value.get_field.return_value = field_obj
78
- yield mock
79
-
80
-
81
- @pytest.mark.asyncio
82
- async def test_get_next_conversation_messages(field_obj, messages):
83
- assert (
84
- len(
85
- await chat_prompt.get_next_conversation_messages(
86
- field_obj=field_obj, page=1, start_idx=0, num_messages=5
87
- )
88
- )
89
- == 5
90
- )
91
- assert (
92
- len(
93
- await chat_prompt.get_next_conversation_messages(
94
- field_obj=field_obj, page=1, start_idx=0, num_messages=1
95
- )
96
- )
97
- == 1
98
- )
99
-
100
- assert await chat_prompt.get_next_conversation_messages(
101
- field_obj=field_obj,
102
- page=1,
103
- start_idx=0,
104
- num_messages=1,
105
- message_type=resources_pb2.Message.MessageType.ANSWER,
106
- msg_to="1",
107
- ) == [messages[3]]
108
-
109
-
110
- @pytest.mark.asyncio
111
- async def test_find_conversation_message(field_obj, messages):
112
- assert await chat_prompt.find_conversation_message(
113
- field_obj=field_obj, mident="3"
114
- ) == (messages[2], 1, 2)
115
-
116
-
117
- @pytest.mark.asyncio
118
- async def test_get_expanded_conversation_messages(kb, messages):
119
- assert await chat_prompt.get_expanded_conversation_messages(
120
- kb=kb, rid="rid", field_id="field_id", mident="3"
121
- ) == [messages[3]]
122
-
123
-
124
- @pytest.mark.asyncio
125
- async def test_get_expanded_conversation_messages_question(kb, messages):
126
- assert (
127
- await chat_prompt.get_expanded_conversation_messages(
128
- kb=kb, rid="rid", field_id="field_id", mident="1"
129
- )
130
- == messages[1:]
131
- )
132
-
133
- kb.get.assert_called_with("rid")
134
- kb.get.return_value.get_field.assert_called_with(
135
- "field_id", KB_REVERSE["c"], load=True
136
- )
137
-
138
-
139
- @pytest.mark.asyncio
140
- async def test_get_expanded_conversation_messages_missing(kb, messages):
141
- assert (
142
- await chat_prompt.get_expanded_conversation_messages(
143
- kb=kb, rid="rid", field_id="field_id", mident="missing"
144
- )
145
- == []
146
- )
147
-
148
-
149
- def _create_find_result(
150
- _id: str, result_text: str, score_type: SCORE_TYPE = SCORE_TYPE.BM25, order=1
151
- ):
152
- return FindResource(
153
- id=_id.split("/")[0],
154
- fields={
155
- "c/conv": FindField(
156
- paragraphs={
157
- _id: FindParagraph(
158
- id=_id,
159
- score=1.0,
160
- score_type=score_type,
161
- order=order,
162
- text=result_text,
163
- )
164
- }
165
- )
166
- },
167
- )
168
-
169
-
170
- @pytest.mark.asyncio
171
- async def test_default_prompt_context(kb):
172
- result_text = " ".join(["text"] * 10)
173
- with (
174
- patch("nucliadb.search.search.chat.prompt.get_read_only_transaction"),
175
- patch("nucliadb.search.search.chat.prompt.get_storage"),
176
- patch("nucliadb.search.search.chat.prompt.KnowledgeBoxORM", return_value=kb),
177
- ):
178
- context = chat_prompt.CappedPromptContext(max_size=int(1e6))
179
- find_results = KnowledgeboxFindResults(
180
- facets={},
181
- resources={
182
- "bmid": _create_find_result(
183
- "bmid/c/conv/ident", result_text, SCORE_TYPE.BM25, order=1
184
- ),
185
- "vecid": _create_find_result(
186
- "vecid/c/conv/ident", result_text, SCORE_TYPE.VECTOR, order=2
187
- ),
188
- "both_id": _create_find_result(
189
- "both_id/c/conv/ident", result_text, SCORE_TYPE.BOTH, order=0
190
- ),
191
- },
192
- )
193
- ordered_paragraphs = chat_prompt.get_ordered_paragraphs(find_results)
194
-
195
- await chat_prompt.default_prompt_context(
196
- context,
197
- "kbid",
198
- ordered_paragraphs,
199
- )
200
- prompt_result = context.output
201
- # Check that the results are sorted by increasing order and that the extra
202
- # context is added at the beginning, indicating that it has the most priority
203
- paragraph_ids = [pid for pid in prompt_result.keys()]
204
- assert paragraph_ids == [
205
- "both_id/c/conv/ident",
206
- "bmid/c/conv/ident",
207
- "vecid/c/conv/ident",
208
- ]
209
-
210
-
211
- @pytest.fixture(scope="function")
212
- def find_results():
213
- return KnowledgeboxFindResults(
214
- facets={},
215
- resources={
216
- "resource1": _create_find_result(
217
- "resource1/a/title", "Resource 1", SCORE_TYPE.BOTH, order=1
218
- ),
219
- "resource2": _create_find_result(
220
- "resource2/a/title", "Resource 2", SCORE_TYPE.VECTOR, order=2
221
- ),
222
- },
223
- min_score=MinScore(semantic=-1),
224
- )
225
-
226
-
227
- @pytest.mark.asyncio
228
- async def test_prompt_context_builder_prepends_user_context(
229
- find_results: KnowledgeboxFindResults,
230
- ):
231
- builder = chat_prompt.PromptContextBuilder(
232
- kbid="kbid", find_results=find_results, user_context=["Carrots are orange"]
233
- )
234
-
235
- async def _mock_build_context(context, *args, **kwargs):
236
- context["resource1/a/title"] = "Resource 1"
237
- context["resource2/a/title"] = "Resource 2"
238
-
239
- with mock.patch.object(builder, "_build_context", new=_mock_build_context):
240
- context, context_order, image_context = await builder.build()
241
- assert len(context) == 3
242
- assert len(context_order) == 3
243
- assert len(image_context) == 0
244
- assert context["USER_CONTEXT_0"] == "Carrots are orange"
245
- assert context["resource1/a/title"] == "Resource 1"
246
- assert context["resource2/a/title"] == "Resource 2"
247
- assert context_order["USER_CONTEXT_0"] == 0
248
- assert context_order["resource1/a/title"] == 1
249
- assert context_order["resource2/a/title"] == 2
250
-
251
-
252
- def test_capped_prompt_context():
253
- context = chat_prompt.CappedPromptContext(max_size=2)
254
-
255
- # Check that output is trimmed
256
- context["key1"] = "123"
257
-
258
- assert context.output == {"key1": "12"}
259
- assert context.size == 2
260
-
261
- # Update existing value
262
- context["key1"] = "foobar"
263
- assert context.output == {"key1": "fo"}
264
- assert context.size == 2
265
-
266
- # Check without limits
267
- context = chat_prompt.CappedPromptContext(max_size=None)
268
- context["key1"] = "foo" * int(1e6)
269
-
270
- assert context.output == {"key1": "foo" * int(1e6)}
@@ -1,108 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import time
21
-
22
- import pytest
23
- from pytest_benchmark.fixture import BenchmarkFixture # type: ignore
24
-
25
- from nucliadb.search.search.paragraphs import highlight_paragraph as highlight
26
-
27
-
28
- @pytest.mark.benchmark(
29
- group="highlight",
30
- min_time=0.1,
31
- max_time=0.5,
32
- min_rounds=5,
33
- timer=time.time,
34
- disable_gc=True,
35
- warmup=False,
36
- )
37
- def test_highligh_error(benchmark: BenchmarkFixture):
38
- text = "bu kimlik belgelerinin geçerlilik sürelerinin standartlara aykırı olmadığını, fotoğraftaki yakın alan iletişim çipindeki bilgilerin tutarlı ve geçerli olmadığını ve İçişleri Bakanlığı'nın ortasında kimlik değişimine erişebilenleri onaylar. sistem" # noqa
39
- ematch = ["kimlik", "sistem"]
40
- res = benchmark(highlight, text, [], ematch)
41
- assert res.count("mark") == 6
42
- assert (
43
- res
44
- == "bu <mark>kimlik</mark> belgelerinin geçerlilik sürelerinin standartlara aykırı olmadığını, fotoğraftaki yakın alan iletişim çipindeki bilgilerin tutarlı ve geçerli olmadığını ve İçişleri Bakanlığı'nın ortasında <mark>kimlik</mark> değişimine erişebilenleri onaylar. <mark>sistem</mark>" # noqa
45
- )
46
-
47
-
48
- def test_highlight():
49
- res = highlight(
50
- "Query whatever you want my to make it work my query with this",
51
- ["this", "is", "my", "query"],
52
- )
53
- assert (
54
- res
55
- == "<mark>Query</mark> whatever you want <mark>my</mark> to make it work <mark>my</mark> <mark>query</mark> with <mark>this</mark>" # noqa
56
- )
57
-
58
- res = highlight(
59
- "Query whatever you want to make it work my query with this",
60
- ["this", "is"],
61
- ["my query"],
62
- )
63
-
64
- assert (
65
- res
66
- == "Query whatever you want to make it work <mark>my query</mark> with <mark>this</mark>"
67
- )
68
-
69
- res = highlight(
70
- "Query whatever you redis want to make it work my query with this",
71
- ["this", "is"],
72
- ["my query"],
73
- )
74
-
75
- assert (
76
- res
77
- == "Query whatever you redis want to make it work <mark>my query</mark> with <mark>this</mark>"
78
- )
79
-
80
- res = highlight(
81
- "Plone offers superior security controls, often without cost, of course!",
82
- ["use", "cases", "of", "plone"],
83
- )
84
-
85
- assert (
86
- res
87
- == "<mark>Plone</mark> offers superior security controls, often without cost, <mark>of</mark> course!"
88
- )
89
-
90
- res = highlight(
91
- "In contrast, traditional companies often make it impossible",
92
- ["of", "market"],
93
- ["of", "market"],
94
- )
95
- assert res == "In contrast, traditional companies often make it impossible"
96
-
97
- # sc-3067: Unbalanced parenthesis or brackets in query should not make highlight fail
98
- res = highlight(
99
- "Some sentence here",
100
- [
101
- "Some).",
102
- ],
103
- [
104
- "sent)ence",
105
- "(here",
106
- ],
107
- )
108
- assert res == "Some sentence here"
@@ -1,125 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from unittest import mock
20
-
21
- import jsonschema # type: ignore
22
- import pytest
23
-
24
- from nucliadb.search.search.filters import (
25
- INDEX_NODE_FILTERS_SCHEMA,
26
- convert_filter_to_node_schema,
27
- convert_to_node_filters,
28
- iter_filter_labels_expression,
29
- translate_label_filters,
30
- )
31
- from nucliadb_models.search import Filter
32
-
33
-
34
- @pytest.fixture(scope="function")
35
- def is_paragraph_labelset_kind_mock():
36
- with mock.patch(
37
- "nucliadb.search.search.filters.is_paragraph_labelset_kind"
38
- ) as mocked:
39
- yield mocked
40
-
41
-
42
- @pytest.mark.parametrize(
43
- "original,converted",
44
- [
45
- ("foo", {"literal": "foo"}),
46
- (Filter(all=["foo"]), {"literal": "foo"}),
47
- (Filter(all=["foo", "bar"]), {"and": [{"literal": "foo"}, {"literal": "bar"}]}),
48
- (Filter(any=["foo"]), {"literal": "foo"}),
49
- (Filter(any=["foo", "bar"]), {"or": [{"literal": "foo"}, {"literal": "bar"}]}),
50
- (Filter(none=["foo"]), {"not": {"literal": "foo"}}),
51
- (
52
- Filter(none=["foo", "bar"]),
53
- {"not": {"or": [{"literal": "foo"}, {"literal": "bar"}]}},
54
- ),
55
- (Filter(not_all=["foo"]), {"not": {"literal": "foo"}}),
56
- (
57
- Filter(not_all=["foo", "bar"]),
58
- {"not": {"and": [{"literal": "foo"}, {"literal": "bar"}]}},
59
- ),
60
- ],
61
- )
62
- def test_convert_filter_to_node_schema(original, converted):
63
- assert convert_filter_to_node_schema(original) == converted
64
- jsonschema.validate(converted, INDEX_NODE_FILTERS_SCHEMA)
65
-
66
-
67
- def test_convert_to_node_filters():
68
- assert convert_to_node_filters([]) == {}
69
- assert convert_to_node_filters(["foo"]) == {"literal": "foo"}
70
- assert convert_to_node_filters(["foo", "bar"]) == {
71
- "and": [{"literal": "foo"}, {"literal": "bar"}]
72
- }
73
- assert convert_to_node_filters([Filter(all=["foo"])]) == {"literal": "foo"}
74
- assert convert_to_node_filters([Filter(all=["foo"]), Filter(any=["bar"])]) == {
75
- "and": [{"literal": "foo"}, {"literal": "bar"}]
76
- }
77
-
78
-
79
- def test_translate_label_filters():
80
- literal = {"literal": "/classification.labels/foo/bar"}
81
- translated = {"literal": "/l/foo/bar"}
82
-
83
- assert translate_label_filters(literal) == translated
84
- assert translate_label_filters({"not": literal}) == {"not": translated}
85
- assert translate_label_filters({"and": [literal, literal]}) == {
86
- "and": [translated, translated]
87
- }
88
- assert translate_label_filters({"or": [literal, literal]}) == {
89
- "or": [translated, translated]
90
- }
91
- assert translate_label_filters(
92
- {"and": [{"or": [literal, literal]}, {"not": literal}]}
93
- ) == {
94
- "and": [
95
- {"or": [translated, translated]},
96
- {"not": translated},
97
- ]
98
- }
99
-
100
-
101
- def test_iter_filter_labels_expression():
102
- literal = {"literal": "foo"}
103
- assert list(iter_filter_labels_expression(literal)) == ["foo"]
104
- assert list(iter_filter_labels_expression({"and": [literal, literal]})) == [
105
- "foo",
106
- "foo",
107
- ]
108
- assert list(iter_filter_labels_expression({"or": [literal, literal]})) == [
109
- "foo",
110
- "foo",
111
- ]
112
- assert list(
113
- iter_filter_labels_expression({"not": {"and": [literal, literal]}})
114
- ) == ["foo", "foo"]
115
-
116
-
117
- def test_filters_model():
118
- f = Filter(all=["foo", "bar"], any=None)
119
- assert f.all == ["foo", "bar"]
120
- assert f.any is None
121
- assert f.none is None
122
- assert f.not_all is None
123
-
124
- with pytest.raises(ValueError):
125
- Filter(all=["foo"], any=["bar"])