nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,263 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import base64
21
- import json
22
- from typing import Any, Optional, Union
23
-
24
- import pydantic
25
- from fastapi import Body, Header, Request, Response
26
- from fastapi.openapi.models import Example
27
- from fastapi_versioning import version
28
- from starlette.responses import StreamingResponse
29
-
30
- from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
31
- from nucliadb.models.responses import HTTPClientError
32
- from nucliadb.search import logger, predict
33
- from nucliadb.search.api.v1.router import KB_PREFIX, api
34
- from nucliadb.search.predict import AnswerStatusCode
35
- from nucliadb.search.search.chat.query import (
36
- START_OF_CITATIONS,
37
- chat,
38
- get_relations_results,
39
- )
40
- from nucliadb.search.search.exceptions import (
41
- IncompleteFindResultsError,
42
- InvalidQueryError,
43
- )
44
- from nucliadb_models.resource import NucliaDBRoles
45
- from nucliadb_models.search import (
46
- ChatOptions,
47
- ChatRequest,
48
- KnowledgeboxFindResults,
49
- NucliaDBClientType,
50
- PromptContext,
51
- PromptContextOrder,
52
- Relations,
53
- parse_max_tokens,
54
- )
55
- from nucliadb_telemetry.errors import capture_exception
56
- from nucliadb_utils.authentication import requires
57
- from nucliadb_utils.exceptions import LimitsExceededError
58
-
59
- END_OF_STREAM = "_END_"
60
-
61
-
62
- class SyncChatResponse(pydantic.BaseModel):
63
- answer: str
64
- relations: Optional[Relations] = None
65
- results: KnowledgeboxFindResults
66
- status: AnswerStatusCode
67
- citations: dict[str, Any] = {}
68
- prompt_context: Optional[PromptContext] = None
69
- prompt_context_order: Optional[PromptContextOrder] = None
70
-
71
-
72
- CHAT_EXAMPLES = {
73
- "search_and_chat": Example(
74
- summary="Ask who won the league final",
75
- description="You can ask a question to your knowledge box", # noqa
76
- value={
77
- "query": "Who won the league final?",
78
- },
79
- ),
80
- "search_and_chat_with_custom_prompt": Example(
81
- summary="Ask for the gold price evolution in 2023 in a very conscise way",
82
- description="You can ask a question and specify a custom prompt to tweak the tone of the response", # noqa
83
- value={
84
- "query": "How has the price of gold evolved during 2023?",
85
- "prompt": "Given this context: {context}. Answer this {question} in a concise way using the provided context", # noqa
86
- },
87
- ),
88
- }
89
-
90
-
91
- @api.post(
92
- f"/{KB_PREFIX}/{{kbid}}/chat",
93
- status_code=200,
94
- summary="Chat on a Knowledge Box",
95
- description="Chat on a Knowledge Box",
96
- tags=["Search"],
97
- response_model=None,
98
- )
99
- @requires(NucliaDBRoles.READER)
100
- @version(1)
101
- async def chat_knowledgebox_endpoint(
102
- request: Request,
103
- kbid: str,
104
- item: ChatRequest = Body(openapi_examples=CHAT_EXAMPLES),
105
- x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
106
- x_nucliadb_user: str = Header(""),
107
- x_forwarded_for: str = Header(""),
108
- x_synchronous: bool = Header(
109
- False,
110
- description="When set to true, outputs response as JSON in a non-streaming way. "
111
- "This is slower and requires waiting for entire answer to be ready.",
112
- ),
113
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
114
- try:
115
- return await create_chat_response(
116
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
117
- )
118
- except KnowledgeBoxNotFound:
119
- return HTTPClientError(
120
- status_code=404,
121
- detail=f"Knowledge Box '{kbid}' not found.",
122
- )
123
- except LimitsExceededError as exc:
124
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
125
- except predict.ProxiedPredictAPIError as err:
126
- return HTTPClientError(
127
- status_code=err.status,
128
- detail=err.detail,
129
- )
130
- except IncompleteFindResultsError:
131
- return HTTPClientError(
132
- status_code=529,
133
- detail="Temporary error on information retrieval. Please try again.",
134
- )
135
- except predict.RephraseMissingContextError:
136
- return HTTPClientError(
137
- status_code=412,
138
- detail="Unable to rephrase the query with the provided context.",
139
- )
140
- except predict.RephraseError as err:
141
- return HTTPClientError(
142
- status_code=529,
143
- detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
144
- )
145
- except InvalidQueryError as exc:
146
- return HTTPClientError(status_code=412, detail=str(exc))
147
-
148
-
149
- async def create_chat_response(
150
- kbid: str,
151
- chat_request: ChatRequest,
152
- user_id: str,
153
- client_type: NucliaDBClientType,
154
- origin: str,
155
- x_synchronous: bool,
156
- resource: Optional[str] = None,
157
- ) -> Response:
158
- chat_request.max_tokens = parse_max_tokens(chat_request.max_tokens)
159
- chat_result = await chat(
160
- kbid,
161
- chat_request,
162
- user_id,
163
- client_type,
164
- origin,
165
- resource=resource,
166
- )
167
- if x_synchronous:
168
- streamed_answer = b""
169
- async for chunk in chat_result.answer_stream:
170
- streamed_answer += chunk
171
-
172
- answer, citations = parse_streamed_answer(
173
- streamed_answer, chat_request.citations
174
- )
175
-
176
- relations_results = None
177
- if ChatOptions.RELATIONS in chat_request.features:
178
- # XXX should use query parser here
179
- relations_results = await get_relations_results(
180
- kbid=kbid, text_answer=answer, target_shard_replicas=chat_request.shards
181
- )
182
-
183
- sync_chat_resp = SyncChatResponse(
184
- answer=answer,
185
- relations=relations_results,
186
- results=chat_result.find_results,
187
- status=chat_result.status_code.value,
188
- citations=citations,
189
- )
190
- if chat_request.debug:
191
- sync_chat_resp.prompt_context = chat_result.prompt_context
192
- sync_chat_resp.prompt_context_order = chat_result.prompt_context_order
193
- return Response(
194
- content=sync_chat_resp.json(exclude_unset=True),
195
- headers={
196
- "NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
197
- "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
198
- "Content-Type": "application/json",
199
- },
200
- )
201
- else:
202
-
203
- async def _streaming_response():
204
- bytes_results = base64.b64encode(chat_result.find_results.json().encode())
205
- yield len(bytes_results).to_bytes(length=4, byteorder="big", signed=False)
206
- yield bytes_results
207
-
208
- streamed_answer = b""
209
- async for chunk in chat_result.answer_stream:
210
- streamed_answer += chunk
211
- yield chunk
212
-
213
- answer, _ = parse_streamed_answer(streamed_answer, chat_request.citations)
214
-
215
- yield END_OF_STREAM.encode()
216
- if ChatOptions.RELATIONS in chat_request.features:
217
- # XXX should use query parser here
218
- relations_results = await get_relations_results(
219
- kbid=kbid,
220
- text_answer=answer,
221
- target_shard_replicas=chat_request.shards,
222
- )
223
- yield base64.b64encode(relations_results.json().encode())
224
-
225
- return StreamingResponse(
226
- _streaming_response(),
227
- media_type="application/octet-stream",
228
- headers={
229
- "NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
230
- "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
231
- },
232
- )
233
-
234
-
235
- def parse_streamed_answer(
236
- streamed_bytes: bytes, requested_citations: bool
237
- ) -> tuple[str, dict[str, Any]]:
238
- try:
239
- text_answer, tail = streamed_bytes.split(START_OF_CITATIONS, 1)
240
- except ValueError:
241
- if requested_citations:
242
- logger.warning(
243
- "Citations were requested but not found in the answer. "
244
- "Returning the answer without citations."
245
- )
246
- return streamed_bytes.decode("utf-8"), {}
247
- if not requested_citations:
248
- logger.warning(
249
- "Citations were not requested but found in the answer. "
250
- "Returning the answer without citations."
251
- )
252
- return text_answer.decode("utf-8"), {}
253
- try:
254
- citations_length = int.from_bytes(tail[:4], byteorder="big", signed=False)
255
- citations_bytes = tail[4 : 4 + citations_length]
256
- citations = json.loads(base64.b64decode(citations_bytes).decode())
257
- return text_answer.decode("utf-8"), citations
258
- except Exception as exc:
259
- capture_exception(exc)
260
- logger.exception(
261
- "Error parsing citations. Returning the answer without citations."
262
- )
263
- return text_answer.decode("utf-8"), {}
@@ -1,174 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import Optional, Union
21
-
22
- from fastapi import Header, Request, Response
23
- from fastapi_versioning import version
24
- from starlette.responses import StreamingResponse
25
-
26
- from nucliadb.common import datamanagers
27
- from nucliadb.models.responses import HTTPClientError
28
- from nucliadb.search import predict
29
- from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_SLUG_PREFIX, api
30
- from nucliadb.search.search.exceptions import (
31
- IncompleteFindResultsError,
32
- InvalidQueryError,
33
- )
34
- from nucliadb_models.resource import NucliaDBRoles
35
- from nucliadb_models.search import ChatRequest, NucliaDBClientType
36
- from nucliadb_utils.authentication import requires
37
- from nucliadb_utils.exceptions import LimitsExceededError
38
-
39
- from ..chat import create_chat_response
40
-
41
-
42
- @api.post(
43
- f"/{KB_PREFIX}/{{kbid}}/resource/{{rid}}/chat",
44
- status_code=200,
45
- summary="Chat with a resource (by id)",
46
- description="Chat with a resource",
47
- tags=["Search"],
48
- response_model=None,
49
- )
50
- @requires(NucliaDBRoles.READER)
51
- @version(1)
52
- async def resource_chat_endpoint_by_uuid(
53
- request: Request,
54
- kbid: str,
55
- rid: str,
56
- item: ChatRequest,
57
- x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
58
- x_nucliadb_user: str = Header(""),
59
- x_forwarded_for: str = Header(""),
60
- x_synchronous: bool = Header(
61
- False,
62
- description="When set to true, outputs response as JSON in a non-streaming way. "
63
- "This is slower and requires waiting for entire answer to be ready.",
64
- ),
65
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
66
- return await resource_chat_endpoint(
67
- request,
68
- kbid,
69
- item,
70
- x_ndb_client,
71
- x_nucliadb_user,
72
- x_forwarded_for,
73
- x_synchronous,
74
- resource_id=rid,
75
- )
76
-
77
-
78
- @api.post(
79
- f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_SLUG_PREFIX}/{{slug}}/chat",
80
- status_code=200,
81
- summary="Chat with a resource (by slug)",
82
- description="Chat with a resource",
83
- tags=["Search"],
84
- response_model=None,
85
- )
86
- @requires(NucliaDBRoles.READER)
87
- @version(1)
88
- async def resource_chat_endpoint_by_slug(
89
- request: Request,
90
- kbid: str,
91
- slug: str,
92
- item: ChatRequest,
93
- x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
94
- x_nucliadb_user: str = Header(""),
95
- x_forwarded_for: str = Header(""),
96
- x_synchronous: bool = Header(
97
- False,
98
- description="When set to true, outputs response as JSON in a non-streaming way. "
99
- "This is slower and requires waiting for entire answer to be ready.",
100
- ),
101
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
102
- return await resource_chat_endpoint(
103
- request,
104
- kbid,
105
- item,
106
- x_ndb_client,
107
- x_nucliadb_user,
108
- x_forwarded_for,
109
- x_synchronous,
110
- resource_slug=slug,
111
- )
112
-
113
-
114
- async def resource_chat_endpoint(
115
- request: Request,
116
- kbid: str,
117
- item: ChatRequest,
118
- x_ndb_client: NucliaDBClientType,
119
- x_nucliadb_user: str,
120
- x_forwarded_for: str,
121
- x_synchronous: bool,
122
- resource_id: Optional[str] = None,
123
- resource_slug: Optional[str] = None,
124
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
125
-
126
- if resource_id is None:
127
- if resource_slug is None:
128
- raise ValueError("Either resource_id or resource_slug must be provided")
129
-
130
- resource_id = await get_resource_uuid_by_slug(kbid, resource_slug)
131
- if resource_id is None:
132
- return HTTPClientError(status_code=404, detail="Resource not found")
133
-
134
- try:
135
- return await create_chat_response(
136
- kbid,
137
- item,
138
- x_nucliadb_user,
139
- x_ndb_client,
140
- x_forwarded_for,
141
- x_synchronous,
142
- resource=resource_id,
143
- )
144
- except LimitsExceededError as exc:
145
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
146
- except predict.ProxiedPredictAPIError as err:
147
- return HTTPClientError(
148
- status_code=err.status,
149
- detail=err.detail,
150
- )
151
- except IncompleteFindResultsError:
152
- return HTTPClientError(
153
- status_code=529,
154
- detail="Temporary error on information retrieval. Please try again.",
155
- )
156
- except predict.RephraseMissingContextError:
157
- return HTTPClientError(
158
- status_code=412,
159
- detail="Unable to rephrase the query with the provided context.",
160
- )
161
- except predict.RephraseError as err:
162
- return HTTPClientError(
163
- status_code=529,
164
- detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
165
- )
166
- except InvalidQueryError as exc:
167
- return HTTPClientError(status_code=412, detail=str(exc))
168
-
169
-
170
- async def get_resource_uuid_by_slug(kbid: str, slug: str) -> Optional[str]:
171
- async with datamanagers.with_transaction() as txn:
172
- return await datamanagers.resources.get_resource_uuid_from_slug(
173
- txn, kbid=kbid, slug=slug
174
- )
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,33 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- pytest_plugins = [
21
- "pytest_docker_fixtures",
22
- "nucliadb.tests.fixtures",
23
- "nucliadb.tests.tikv",
24
- "nucliadb.ingest.tests.fixtures", # should be refactored out
25
- "nucliadb.search.tests.node",
26
- "nucliadb.search.tests.fixtures",
27
- "nucliadb_utils.tests.conftest",
28
- "nucliadb_utils.tests.gcs",
29
- "nucliadb_utils.tests.s3",
30
- "nucliadb_utils.tests.nats",
31
- "nucliadb_utils.tests.asyncbenchmark",
32
- "nucliadb_utils.tests.indexing",
33
- ]
@@ -1,199 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- from enum import Enum
22
- from typing import Optional
23
-
24
- import pytest
25
- from httpx import AsyncClient
26
- from nucliadb_protos.nodereader_pb2 import GetShardRequest
27
- from nucliadb_protos.noderesources_pb2 import Shard
28
- from redis import asyncio as aioredis
29
-
30
- from nucliadb.common.cluster.manager import KBShardManager, get_index_node
31
- from nucliadb.common.maindb.utils import get_driver
32
- from nucliadb.ingest.cache import clear_ingest_cache
33
- from nucliadb.ingest.tests.fixtures import broker_resource
34
- from nucliadb.search import API_PREFIX
35
- from nucliadb_utils.tests import free_port
36
- from nucliadb_utils.utilities import clear_global_cache
37
-
38
-
39
- @pytest.fixture(scope="function")
40
- def test_settings_search(storage, natsd, node, maindb_driver): # type: ignore
41
- from nucliadb.ingest.settings import settings as ingest_settings
42
- from nucliadb_utils.cache.settings import settings as cache_settings
43
- from nucliadb_utils.settings import (
44
- nuclia_settings,
45
- nucliadb_settings,
46
- running_settings,
47
- )
48
-
49
- cache_settings.cache_pubsub_nats_url = [natsd]
50
-
51
- running_settings.debug = False
52
-
53
- ingest_settings.disable_pull_worker = True
54
-
55
- ingest_settings.nuclia_partitions = 1
56
-
57
- nuclia_settings.dummy_processing = True
58
- nuclia_settings.dummy_predict = True
59
- nuclia_settings.dummy_learning_services = True
60
-
61
- ingest_settings.grpc_port = free_port()
62
-
63
- nucliadb_settings.nucliadb_ingest = f"localhost:{ingest_settings.grpc_port}"
64
-
65
-
66
- @pytest.mark.asyncio
67
- @pytest.fixture(scope="function")
68
- async def search_api(test_settings_search, transaction_utility, redis): # type: ignore
69
- from nucliadb.common.cluster import manager
70
- from nucliadb.search.app import application
71
-
72
- driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
73
- await driver.flushall()
74
-
75
- await application.router.startup()
76
-
77
- # Make sure is clean
78
- await asyncio.sleep(1)
79
- count = 0
80
- while len(manager.INDEX_NODES) < 2:
81
- print("awaiting cluster nodes - search fixtures.py")
82
- await asyncio.sleep(1)
83
- if count == 40:
84
- raise Exception("No cluster")
85
- count += 1
86
-
87
- def make_client_fixture(
88
- roles: Optional[list[Enum]] = None,
89
- user: str = "",
90
- version: str = "1",
91
- root: bool = False,
92
- extra_headers: Optional[dict[str, str]] = None,
93
- ) -> AsyncClient:
94
- roles = roles or []
95
- client_base_url = "http://test"
96
-
97
- if root is False:
98
- client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
99
-
100
- client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
101
- client.headers["X-NUCLIADB-ROLES"] = ";".join([role.value for role in roles])
102
- client.headers["X-NUCLIADB-USER"] = user
103
-
104
- extra_headers = extra_headers or {}
105
- if len(extra_headers) == 0:
106
- return client
107
-
108
- for header, value in extra_headers.items():
109
- client.headers[f"{header}"] = value
110
-
111
- return client
112
-
113
- yield make_client_fixture
114
- await application.router.shutdown()
115
- # Make sure nodes can sync
116
- await asyncio.sleep(1)
117
- await driver.flushall()
118
- await driver.close(close_connection_pool=True)
119
- clear_ingest_cache()
120
- clear_global_cache()
121
- manager.INDEX_NODES.clear()
122
-
123
-
124
- @pytest.fixture(scope="function")
125
- async def test_search_resource(
126
- indexing_utility_registered,
127
- processor,
128
- knowledgebox_ingest,
129
- ):
130
- """
131
- Create a resource that has every possible bit of information
132
- """
133
- message1 = broker_resource(knowledgebox_ingest, rid="foobar", slug="foobar-slug")
134
- kbid = await inject_message(processor, knowledgebox_ingest, message1)
135
- resource_field_count = 3
136
- await wait_for_shard(knowledgebox_ingest, resource_field_count)
137
- yield kbid
138
-
139
-
140
- @pytest.fixture(scope="function")
141
- async def multiple_search_resource(
142
- indexing_utility_registered,
143
- processor,
144
- knowledgebox_ingest,
145
- ):
146
- """
147
- Create 100 resources that have every possible bit of information
148
- """
149
- n_resources = 100
150
- fields_per_resource = 3
151
- for count in range(1, n_resources + 1):
152
- message = broker_resource(knowledgebox_ingest)
153
- await processor.process(message=message, seqid=count)
154
-
155
- await wait_for_shard(knowledgebox_ingest, n_resources * fields_per_resource)
156
- return knowledgebox_ingest
157
-
158
-
159
- async def inject_message(
160
- processor, knowledgebox_ingest, message, count: int = 1
161
- ) -> str:
162
- await processor.process(message=message, seqid=count)
163
- await wait_for_shard(knowledgebox_ingest, count)
164
- return knowledgebox_ingest
165
-
166
-
167
- async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str:
168
- # Make sure is indexed
169
- driver = get_driver()
170
- txn = await driver.begin()
171
- shard_manager = KBShardManager()
172
- shard = await shard_manager.get_current_active_shard(txn, knowledgebox_ingest)
173
- if shard is None:
174
- raise Exception("Could not find shard")
175
- await txn.abort()
176
-
177
- checks: dict[str, bool] = {}
178
- for replica in shard.replicas:
179
- if replica.shard.id not in checks:
180
- checks[replica.shard.id] = False
181
-
182
- for i in range(30):
183
- for replica in shard.replicas:
184
- node_obj = get_index_node(replica.node)
185
- if node_obj is not None:
186
- req = GetShardRequest()
187
- req.shard_id.id = replica.shard.id
188
- count_shard: Shard = await node_obj.reader.GetShard(req) # type: ignore
189
- if count_shard.fields >= count:
190
- checks[replica.shard.id] = True
191
- else:
192
- checks[replica.shard.id] = False
193
-
194
- if all(checks.values()):
195
- break
196
- await asyncio.sleep(1)
197
-
198
- assert all(checks.values())
199
- return knowledgebox_ingest