nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,258 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import base64
21
- import json
22
- from typing import Any, Optional, Union
23
-
24
- import pydantic
25
- from fastapi import Body, Header, Request, Response
26
- from fastapi.openapi.models import Example
27
- from fastapi_versioning import version
28
- from starlette.responses import StreamingResponse
29
-
30
- from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
31
- from nucliadb.models.responses import HTTPClientError
32
- from nucliadb.search import logger, predict
33
- from nucliadb.search.api.v1.router import KB_PREFIX, api
34
- from nucliadb.search.predict import AnswerStatusCode
35
- from nucliadb.search.search.chat.query import (
36
- START_OF_CITATIONS,
37
- chat,
38
- get_relations_results,
39
- )
40
- from nucliadb.search.search.exceptions import (
41
- IncompleteFindResultsError,
42
- InvalidQueryError,
43
- )
44
- from nucliadb_models.resource import NucliaDBRoles
45
- from nucliadb_models.search import (
46
- ChatOptions,
47
- ChatRequest,
48
- KnowledgeboxFindResults,
49
- NucliaDBClientType,
50
- PromptContext,
51
- PromptContextOrder,
52
- Relations,
53
- )
54
- from nucliadb_telemetry.errors import capture_exception
55
- from nucliadb_utils.authentication import requires
56
- from nucliadb_utils.exceptions import LimitsExceededError
57
-
58
- END_OF_STREAM = "_END_"
59
-
60
-
61
- class SyncChatResponse(pydantic.BaseModel):
62
- answer: str
63
- relations: Optional[Relations]
64
- results: KnowledgeboxFindResults
65
- status: AnswerStatusCode
66
- citations: dict[str, Any] = {}
67
- prompt_context: Optional[PromptContext] = None
68
- prompt_context_order: Optional[PromptContextOrder] = None
69
-
70
-
71
- CHAT_EXAMPLES = {
72
- "search_and_chat": Example(
73
- summary="Ask who won the league final",
74
- description="You can ask a question to your knowledge box", # noqa
75
- value={
76
- "query": "Who won the league final?",
77
- },
78
- ),
79
- "search_and_chat_with_custom_prompt": Example(
80
- summary="Ask for the gold price evolution in 2023 in a very conscise way",
81
- description="You can ask a question and specify a custom prompt to tweak the tone of the response", # noqa
82
- value={
83
- "query": "How has the price of gold evolved during 2023?",
84
- "prompt": "Given this context: {context}. Answer this {question} in a concise way using the provided context", # noqa
85
- },
86
- ),
87
- }
88
-
89
-
90
- @api.post(
91
- f"/{KB_PREFIX}/{{kbid}}/chat",
92
- status_code=200,
93
- name="Chat Knowledge Box",
94
- summary="Chat on a Knowledge Box",
95
- description="Chat on a Knowledge Box",
96
- tags=["Search"],
97
- response_model=None,
98
- )
99
- @requires(NucliaDBRoles.READER)
100
- @version(1)
101
- async def chat_knowledgebox_endpoint(
102
- request: Request,
103
- kbid: str,
104
- item: ChatRequest = Body(openapi_examples=CHAT_EXAMPLES),
105
- x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
106
- x_nucliadb_user: str = Header(""),
107
- x_forwarded_for: str = Header(""),
108
- x_synchronous: bool = Header(
109
- False,
110
- description="When set to true, outputs response as JSON in a non-streaming way. "
111
- "This is slower and requires waiting for entire answer to be ready.",
112
- ),
113
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
114
- try:
115
- return await create_chat_response(
116
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
117
- )
118
- except KnowledgeBoxNotFound:
119
- return HTTPClientError(
120
- status_code=404,
121
- detail=f"Knowledge Box '{kbid}' not found.",
122
- )
123
- except LimitsExceededError as exc:
124
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
125
- except predict.ProxiedPredictAPIError as err:
126
- return HTTPClientError(
127
- status_code=503,
128
- detail=f"Chat service unavailable. {err.status}: {err.detail}",
129
- )
130
- except IncompleteFindResultsError:
131
- return HTTPClientError(
132
- status_code=529,
133
- detail="Temporary error on information retrieval. Please try again.",
134
- )
135
- except predict.RephraseMissingContextError:
136
- return HTTPClientError(
137
- status_code=412,
138
- detail="Unable to rephrase the query with the provided context.",
139
- )
140
- except predict.RephraseError as err:
141
- return HTTPClientError(
142
- status_code=529,
143
- detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
144
- )
145
- except InvalidQueryError as exc:
146
- return HTTPClientError(status_code=412, detail=str(exc))
147
-
148
-
149
- async def create_chat_response(
150
- kbid: str,
151
- chat_request: ChatRequest,
152
- user_id: str,
153
- client_type: NucliaDBClientType,
154
- origin: str,
155
- x_synchronous: bool,
156
- ) -> Response:
157
- chat_result = await chat(
158
- kbid,
159
- chat_request,
160
- user_id,
161
- client_type,
162
- origin,
163
- )
164
- if x_synchronous:
165
- streamed_answer = b""
166
- async for chunk in chat_result.answer_stream:
167
- streamed_answer += chunk
168
-
169
- answer, citations = parse_streamed_answer(
170
- streamed_answer, chat_request.citations
171
- )
172
-
173
- relations_results = None
174
- if ChatOptions.RELATIONS in chat_request.features:
175
- # XXX should use query parser here
176
- relations_results = await get_relations_results(
177
- kbid=kbid, chat_request=chat_request, text_answer=answer
178
- )
179
-
180
- sync_chat_resp = SyncChatResponse(
181
- answer=answer,
182
- relations=relations_results,
183
- results=chat_result.find_results,
184
- status=chat_result.status_code.value,
185
- citations=citations,
186
- )
187
- if chat_request.debug:
188
- sync_chat_resp.prompt_context = chat_result.prompt_context
189
- sync_chat_resp.prompt_context_order = chat_result.prompt_context_order
190
- return Response(
191
- content=sync_chat_resp.json(exclude_unset=True),
192
- headers={
193
- "NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
194
- "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
195
- "Content-Type": "application/json",
196
- },
197
- )
198
- else:
199
-
200
- async def _streaming_response():
201
- bytes_results = base64.b64encode(chat_result.find_results.json().encode())
202
- yield len(bytes_results).to_bytes(length=4, byteorder="big", signed=False)
203
- yield bytes_results
204
-
205
- streamed_answer = b""
206
- async for chunk in chat_result.answer_stream:
207
- streamed_answer += chunk
208
- yield chunk
209
-
210
- answer, _ = parse_streamed_answer(streamed_answer, chat_request.citations)
211
-
212
- yield END_OF_STREAM.encode()
213
- if ChatOptions.RELATIONS in chat_request.features:
214
- # XXX should use query parser here
215
- relations_results = await get_relations_results(
216
- kbid=kbid, chat_request=chat_request, text_answer=answer
217
- )
218
- yield base64.b64encode(relations_results.json().encode())
219
-
220
- return StreamingResponse(
221
- _streaming_response(),
222
- media_type="application/octet-stream",
223
- headers={
224
- "NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
225
- "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
226
- },
227
- )
228
-
229
-
230
- def parse_streamed_answer(
231
- streamed_bytes: bytes, requested_citations: bool
232
- ) -> tuple[str, dict[str, Any]]:
233
- try:
234
- text_answer, tail = streamed_bytes.split(START_OF_CITATIONS, 1)
235
- except ValueError:
236
- if requested_citations:
237
- logger.warning(
238
- "Citations were requested but not found in the answer. "
239
- "Returning the answer without citations."
240
- )
241
- return streamed_bytes.decode("utf-8"), {}
242
- if not requested_citations:
243
- logger.warning(
244
- "Citations were not requested but found in the answer. "
245
- "Returning the answer without citations."
246
- )
247
- return text_answer.decode("utf-8"), {}
248
- try:
249
- citations_length = int.from_bytes(tail[:4], byteorder="big", signed=False)
250
- citations_bytes = tail[4 : 4 + citations_length]
251
- citations = json.loads(base64.b64decode(citations_bytes).decode())
252
- return text_answer.decode("utf-8"), citations
253
- except Exception as exc:
254
- capture_exception(exc)
255
- logger.exception(
256
- "Error parsing citations. Returning the answer without citations."
257
- )
258
- return text_answer.decode("utf-8"), {}
@@ -1,94 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import Union
21
-
22
- from fastapi import Header, Request, Response
23
- from fastapi_versioning import version
24
- from starlette.responses import StreamingResponse
25
-
26
- from nucliadb.models.responses import HTTPClientError
27
- from nucliadb.search import predict
28
- from nucliadb.search.api.v1.router import KB_PREFIX, api
29
- from nucliadb.search.search.exceptions import (
30
- IncompleteFindResultsError,
31
- InvalidQueryError,
32
- )
33
- from nucliadb_models.resource import NucliaDBRoles
34
- from nucliadb_models.search import ChatRequest, NucliaDBClientType
35
- from nucliadb_utils.authentication import requires
36
- from nucliadb_utils.exceptions import LimitsExceededError
37
-
38
- from ..chat import create_chat_response
39
-
40
-
41
- @api.post(
42
- f"/{KB_PREFIX}/{{kbid}}/resource/{{rid}}/chat",
43
- status_code=200,
44
- name="Chat with a Resource (by id)",
45
- summary="Chat with a resource",
46
- description="Chat with a resource",
47
- tags=["Search"],
48
- response_model=None,
49
- )
50
- @requires(NucliaDBRoles.READER)
51
- @version(1)
52
- async def resource_chat_endpoint(
53
- request: Request,
54
- kbid: str,
55
- rid: str,
56
- item: ChatRequest,
57
- x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
58
- x_nucliadb_user: str = Header(""),
59
- x_forwarded_for: str = Header(""),
60
- x_synchronous: bool = Header(
61
- False,
62
- description="When set to true, outputs response as JSON in a non-streaming way. "
63
- "This is slower and requires waiting for entire answer to be ready.",
64
- ),
65
- ) -> Union[StreamingResponse, HTTPClientError, Response]:
66
- try:
67
- item.resource_filters = [rid]
68
- return await create_chat_response(
69
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
70
- )
71
- except LimitsExceededError as exc:
72
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
73
- except predict.ProxiedPredictAPIError as err:
74
- return HTTPClientError(
75
- status_code=503,
76
- detail=f"Chat service unavailable. {err.status}: {err.detail}",
77
- )
78
- except IncompleteFindResultsError:
79
- return HTTPClientError(
80
- status_code=529,
81
- detail="Temporary error on information retrieval. Please try again.",
82
- )
83
- except predict.RephraseMissingContextError:
84
- return HTTPClientError(
85
- status_code=412,
86
- detail="Unable to rephrase the query with the provided context.",
87
- )
88
- except predict.RephraseError as err:
89
- return HTTPClientError(
90
- status_code=529,
91
- detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
92
- )
93
- except InvalidQueryError as exc:
94
- return HTTPClientError(status_code=412, detail=str(exc))
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,33 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- pytest_plugins = [
21
- "pytest_docker_fixtures",
22
- "nucliadb.tests.fixtures",
23
- "nucliadb.tests.tikv",
24
- "nucliadb.ingest.tests.fixtures", # should be refactored out
25
- "nucliadb.search.tests.node",
26
- "nucliadb.search.tests.fixtures",
27
- "nucliadb_utils.tests.conftest",
28
- "nucliadb_utils.tests.gcs",
29
- "nucliadb_utils.tests.s3",
30
- "nucliadb_utils.tests.nats",
31
- "nucliadb_utils.tests.asyncbenchmark",
32
- "nucliadb_utils.tests.indexing",
33
- ]
@@ -1,199 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- from enum import Enum
22
- from typing import Optional
23
-
24
- import pytest
25
- from httpx import AsyncClient
26
- from nucliadb_protos.nodereader_pb2 import GetShardRequest
27
- from nucliadb_protos.noderesources_pb2 import Shard
28
- from redis import asyncio as aioredis
29
-
30
- from nucliadb.common.cluster.manager import KBShardManager, get_index_node
31
- from nucliadb.common.maindb.utils import get_driver
32
- from nucliadb.ingest.cache import clear_ingest_cache
33
- from nucliadb.ingest.tests.fixtures import broker_resource
34
- from nucliadb.search import API_PREFIX
35
- from nucliadb_utils.tests import free_port
36
- from nucliadb_utils.utilities import clear_global_cache
37
-
38
-
39
- @pytest.fixture(scope="function")
40
- def test_settings_search(storage, natsd, node, maindb_driver): # type: ignore
41
- from nucliadb.ingest.settings import settings as ingest_settings
42
- from nucliadb_utils.cache.settings import settings as cache_settings
43
- from nucliadb_utils.settings import (
44
- nuclia_settings,
45
- nucliadb_settings,
46
- running_settings,
47
- )
48
-
49
- cache_settings.cache_pubsub_nats_url = [natsd]
50
-
51
- running_settings.debug = False
52
-
53
- ingest_settings.disable_pull_worker = True
54
-
55
- ingest_settings.nuclia_partitions = 1
56
-
57
- nuclia_settings.dummy_processing = True
58
- nuclia_settings.dummy_predict = True
59
- nuclia_settings.dummy_learning_services = True
60
-
61
- ingest_settings.grpc_port = free_port()
62
-
63
- nucliadb_settings.nucliadb_ingest = f"localhost:{ingest_settings.grpc_port}"
64
-
65
-
66
- @pytest.mark.asyncio
67
- @pytest.fixture(scope="function")
68
- async def search_api(test_settings_search, transaction_utility, redis): # type: ignore
69
- from nucliadb.common.cluster import manager
70
- from nucliadb.search.app import application
71
-
72
- driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
73
- await driver.flushall()
74
-
75
- await application.router.startup()
76
-
77
- # Make sure is clean
78
- await asyncio.sleep(1)
79
- count = 0
80
- while len(manager.INDEX_NODES) < 2:
81
- print("awaiting cluster nodes - search fixtures.py")
82
- await asyncio.sleep(1)
83
- if count == 40:
84
- raise Exception("No cluster")
85
- count += 1
86
-
87
- def make_client_fixture(
88
- roles: Optional[list[Enum]] = None,
89
- user: str = "",
90
- version: str = "1",
91
- root: bool = False,
92
- extra_headers: Optional[dict[str, str]] = None,
93
- ) -> AsyncClient:
94
- roles = roles or []
95
- client_base_url = "http://test"
96
-
97
- if root is False:
98
- client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
99
-
100
- client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
101
- client.headers["X-NUCLIADB-ROLES"] = ";".join([role.value for role in roles])
102
- client.headers["X-NUCLIADB-USER"] = user
103
-
104
- extra_headers = extra_headers or {}
105
- if len(extra_headers) == 0:
106
- return client
107
-
108
- for header, value in extra_headers.items():
109
- client.headers[f"{header}"] = value
110
-
111
- return client
112
-
113
- yield make_client_fixture
114
- await application.router.shutdown()
115
- # Make sure nodes can sync
116
- await asyncio.sleep(1)
117
- await driver.flushall()
118
- await driver.close(close_connection_pool=True)
119
- clear_ingest_cache()
120
- clear_global_cache()
121
- manager.INDEX_NODES.clear()
122
-
123
-
124
- @pytest.fixture(scope="function")
125
- async def test_search_resource(
126
- indexing_utility_registered,
127
- processor,
128
- knowledgebox_ingest,
129
- ):
130
- """
131
- Create a resource that has every possible bit of information
132
- """
133
- message1 = broker_resource(knowledgebox_ingest, rid="foobar", slug="foobar-slug")
134
- kbid = await inject_message(processor, knowledgebox_ingest, message1)
135
- resource_field_count = 3
136
- await wait_for_shard(knowledgebox_ingest, resource_field_count)
137
- yield kbid
138
-
139
-
140
- @pytest.fixture(scope="function")
141
- async def multiple_search_resource(
142
- indexing_utility_registered,
143
- processor,
144
- knowledgebox_ingest,
145
- ):
146
- """
147
- Create 100 resources that have every possible bit of information
148
- """
149
- n_resources = 100
150
- fields_per_resource = 3
151
- for count in range(1, n_resources + 1):
152
- message = broker_resource(knowledgebox_ingest)
153
- await processor.process(message=message, seqid=count)
154
-
155
- await wait_for_shard(knowledgebox_ingest, n_resources * fields_per_resource)
156
- return knowledgebox_ingest
157
-
158
-
159
- async def inject_message(
160
- processor, knowledgebox_ingest, message, count: int = 1
161
- ) -> str:
162
- await processor.process(message=message, seqid=count)
163
- await wait_for_shard(knowledgebox_ingest, count)
164
- return knowledgebox_ingest
165
-
166
-
167
- async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str:
168
- # Make sure is indexed
169
- driver = get_driver()
170
- txn = await driver.begin()
171
- shard_manager = KBShardManager()
172
- shard = await shard_manager.get_current_active_shard(txn, knowledgebox_ingest)
173
- if shard is None:
174
- raise Exception("Could not find shard")
175
- await txn.abort()
176
-
177
- checks: dict[str, bool] = {}
178
- for replica in shard.replicas:
179
- if replica.shard.id not in checks:
180
- checks[replica.shard.id] = False
181
-
182
- for i in range(30):
183
- for replica in shard.replicas:
184
- node_obj = get_index_node(replica.node)
185
- if node_obj is not None:
186
- req = GetShardRequest()
187
- req.shard_id.id = replica.shard.id
188
- count_shard: Shard = await node_obj.reader.GetShard(req) # type: ignore
189
- if count_shard.fields >= count:
190
- checks[replica.shard.id] = True
191
- else:
192
- checks[replica.shard.id] = False
193
-
194
- if all(checks.values()):
195
- break
196
- await asyncio.sleep(1)
197
-
198
- assert all(checks.values())
199
- return knowledgebox_ingest