nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from datetime import datetime
21
- from time import time
22
21
  from typing import Optional, Union
23
22
 
24
23
  from fastapi import Header, Request, Response
@@ -28,9 +27,11 @@ from nucliadb.models.responses import HTTPClientError
28
27
  from nucliadb.search.api.v1.router import KB_PREFIX, api
29
28
  from nucliadb.search.api.v1.utils import fastapi_query
30
29
  from nucliadb.search.requesters.utils import Method, node_query
30
+ from nucliadb.search.search import cache
31
31
  from nucliadb.search.search.exceptions import InvalidQueryError
32
32
  from nucliadb.search.search.merge import merge_suggest_results
33
33
  from nucliadb.search.search.query import suggest_query_to_pb
34
+ from nucliadb.search.search.utils import filter_hidden_resources
34
35
  from nucliadb_models.common import FieldTypeName
35
36
  from nucliadb_models.resource import NucliaDBRoles
36
37
  from nucliadb_models.search import (
@@ -40,13 +41,14 @@ from nucliadb_models.search import (
40
41
  SearchParamDefaults,
41
42
  SuggestOptions,
42
43
  )
44
+ from nucliadb_models.utils import DateTime
43
45
  from nucliadb_utils.authentication import requires
44
- from nucliadb_utils.utilities import get_audit
45
46
 
46
47
 
47
48
  @api.get(
48
49
  f"/{KB_PREFIX}/{{kbid}}/suggest",
49
50
  status_code=200,
51
+ summary="Suggest on a knowledge box",
50
52
  description="Suggestions on a knowledge box",
51
53
  response_model=KnowledgeboxSuggestResults,
52
54
  response_model_exclude_unset=True,
@@ -62,21 +64,15 @@ async def suggest_knowledgebox(
62
64
  fields: list[str] = fastapi_query(SearchParamDefaults.fields),
63
65
  filters: list[str] = fastapi_query(SearchParamDefaults.filters),
64
66
  faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
65
- range_creation_start: Optional[datetime] = fastapi_query(
66
- SearchParamDefaults.range_creation_start
67
- ),
68
- range_creation_end: Optional[datetime] = fastapi_query(
69
- SearchParamDefaults.range_creation_end
70
- ),
71
- range_modification_start: Optional[datetime] = fastapi_query(
67
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
68
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
69
+ range_modification_start: Optional[DateTime] = fastapi_query(
72
70
  SearchParamDefaults.range_modification_start
73
71
  ),
74
- range_modification_end: Optional[datetime] = fastapi_query(
72
+ range_modification_end: Optional[DateTime] = fastapi_query(
75
73
  SearchParamDefaults.range_modification_end
76
74
  ),
77
- features: list[SuggestOptions] = fastapi_query(
78
- SearchParamDefaults.suggest_features
79
- ),
75
+ features: list[SuggestOptions] = fastapi_query(SearchParamDefaults.suggest_features),
80
76
  show: list[ResourceProperties] = fastapi_query(SearchParamDefaults.show),
81
77
  field_type_filter: list[FieldTypeName] = fastapi_query(
82
78
  SearchParamDefaults.field_type_filter, alias="field_type"
@@ -86,6 +82,7 @@ async def suggest_knowledgebox(
86
82
  x_forwarded_for: str = Header(""),
87
83
  debug: bool = fastapi_query(SearchParamDefaults.debug),
88
84
  highlight: bool = fastapi_query(SearchParamDefaults.highlight),
85
+ show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
89
86
  ) -> Union[KnowledgeboxSuggestResults, HTTPClientError]:
90
87
  try:
91
88
  return await suggest(
@@ -107,6 +104,7 @@ async def suggest_knowledgebox(
107
104
  x_forwarded_for,
108
105
  debug,
109
106
  highlight,
107
+ show_hidden,
110
108
  )
111
109
  except InvalidQueryError as exc:
112
110
  return HTTPClientError(status_code=412, detail=str(exc))
@@ -131,49 +129,37 @@ async def suggest(
131
129
  x_forwarded_for: str,
132
130
  debug: bool,
133
131
  highlight: bool,
132
+ show_hidden: bool,
134
133
  ) -> KnowledgeboxSuggestResults:
135
- # We need the nodes/shards that are connected to the KB
136
- audit = get_audit()
137
- start_time = time()
138
-
139
- # We need to query all nodes
140
- pb_query = suggest_query_to_pb(
141
- features,
142
- query,
143
- fields,
144
- filters,
145
- faceted,
146
- range_creation_start,
147
- range_creation_end,
148
- range_modification_start,
149
- range_modification_end,
150
- )
151
- results, incomplete_results, queried_nodes = await node_query(
152
- kbid, Method.SUGGEST, pb_query
153
- )
154
-
155
- # We need to merge
156
- search_results = await merge_suggest_results(
157
- results,
158
- kbid=kbid,
159
- show=show,
160
- field_type_filter=field_type_filter,
161
- highlight=highlight,
162
- )
134
+ with cache.request_caches():
135
+ hidden = await filter_hidden_resources(kbid, show_hidden)
136
+ pb_query = suggest_query_to_pb(
137
+ features,
138
+ query,
139
+ fields,
140
+ filters,
141
+ faceted,
142
+ range_creation_start,
143
+ range_creation_end,
144
+ range_modification_start,
145
+ range_modification_end,
146
+ hidden,
147
+ )
148
+ results, incomplete_results, queried_nodes = await node_query(kbid, Method.SUGGEST, pb_query)
163
149
 
164
- response.status_code = 206 if incomplete_results else 200
150
+ # We need to merge
151
+ search_results = await merge_suggest_results(
152
+ results,
153
+ kbid=kbid,
154
+ show=show,
155
+ field_type_filter=field_type_filter,
156
+ highlight=highlight,
157
+ )
165
158
 
166
- queried_shards = [shard_id for _, shard_id in queried_nodes]
167
- if debug and queried_shards:
168
- search_results.shards = queried_shards
159
+ response.status_code = 206 if incomplete_results else 200
169
160
 
170
- if audit is not None:
171
- await audit.suggest(
172
- kbid,
173
- x_nucliadb_user,
174
- x_ndb_client.to_proto(),
175
- x_forwarded_for,
176
- time() - start_time,
177
- )
161
+ queried_shards = [shard_id for _, shard_id in queried_nodes]
162
+ if debug and queried_shards:
163
+ search_results.shards = queried_shards
178
164
 
179
- return search_results
165
+ return search_results
@@ -26,7 +26,7 @@ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
26
26
  from nucliadb.models.responses import HTTPClientError
27
27
  from nucliadb.search import predict
28
28
  from nucliadb.search.api.v1.router import KB_PREFIX, api
29
- from nucliadb.search.search.summarize import summarize
29
+ from nucliadb.search.search.summarize import NoResourcesToSummarize, summarize
30
30
  from nucliadb_models.resource import NucliaDBRoles
31
31
  from nucliadb_models.search import SummarizedResponse, SummarizeRequest
32
32
  from nucliadb_utils.authentication import requires
@@ -36,8 +36,7 @@ from nucliadb_utils.exceptions import LimitsExceededError
36
36
  @api.post(
37
37
  f"/{KB_PREFIX}/{{kbid}}/summarize",
38
38
  status_code=200,
39
- name="Summarize Your Documents",
40
- summary="Summarize Your Documents",
39
+ summary="Summarize your documents",
41
40
  description="Summarize Your Documents",
42
41
  tags=["Search"],
43
42
  response_model=SummarizedResponse,
@@ -53,10 +52,15 @@ async def summarize_endpoint(
53
52
  return await summarize(kbid, item)
54
53
  except KnowledgeBoxNotFound:
55
54
  return HTTPClientError(status_code=404, detail="Knowledge box not found")
55
+ except NoResourcesToSummarize:
56
+ return HTTPClientError(
57
+ status_code=412,
58
+ detail="Could not summarize: No resources or extracted text found.",
59
+ )
56
60
  except LimitsExceededError as exc:
57
61
  return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
58
62
  except predict.ProxiedPredictAPIError as err:
59
63
  return HTTPClientError(
60
- status_code=503,
61
- detail=f"Summarize service unavailable. {err.status}: {err.detail}",
64
+ status_code=err.status,
65
+ detail=err.detail,
62
66
  )
@@ -36,7 +36,8 @@ def fastapi_query(param: ParamDefault, default: Optional[Any] = _NOT_SET, **kw)
36
36
  default=default_value,
37
37
  title=param.title,
38
38
  description=param.description,
39
+ le=param.le,
39
40
  gt=param.gt,
40
41
  max_length=param.max_items,
41
- **kw
42
+ **kw,
42
43
  )
nucliadb/search/app.py CHANGED
@@ -17,83 +17,52 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import pkg_resources
20
+ import importlib.metadata
21
+
21
22
  from fastapi import FastAPI
22
23
  from fastapi.responses import JSONResponse
23
24
  from starlette.middleware import Middleware
24
25
  from starlette.middleware.authentication import AuthenticationMiddleware
25
- from starlette.middleware.cors import CORSMiddleware
26
26
  from starlette.requests import ClientDisconnect, Request
27
27
  from starlette.responses import HTMLResponse
28
28
 
29
29
  from nucliadb.common.cluster import manager
30
+ from nucliadb.ingest.settings import DriverConfig
30
31
  from nucliadb.middleware import ProcessTimeHeaderMiddleware
31
- from nucliadb.middleware.transaction import ReadOnlyTransactionMiddleware
32
32
  from nucliadb.search import API_PREFIX
33
33
  from nucliadb.search.api.v1.router import api as api_v1
34
- from nucliadb.search.lifecycle import finalize, initialize
34
+ from nucliadb.search.lifecycle import lifespan
35
35
  from nucliadb.search.settings import settings
36
36
  from nucliadb_telemetry import errors
37
- from nucliadb_utils import const
37
+ from nucliadb_telemetry.fastapi.utils import (
38
+ client_disconnect_handler,
39
+ global_exception_handler,
40
+ )
41
+ from nucliadb_utils.audit.stream import AuditMiddleware
38
42
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
39
43
  from nucliadb_utils.fastapi.openapi import extend_openapi
40
44
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
41
- from nucliadb_utils.settings import http_settings, running_settings
42
- from nucliadb_utils.utilities import has_feature
45
+ from nucliadb_utils.settings import running_settings
46
+ from nucliadb_utils.utilities import get_audit
43
47
 
44
48
  middleware = []
45
-
46
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
47
- middleware.append(
48
- Middleware(
49
- CORSMiddleware,
50
- allow_origins=http_settings.cors_origins,
51
- allow_methods=["*"],
52
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
53
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
54
- allow_headers=["*", "Authorization"],
55
- )
56
- )
57
-
58
49
  middleware.extend(
59
50
  [
60
- Middleware(
61
- AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()
62
- ),
63
- Middleware(ReadOnlyTransactionMiddleware),
51
+ Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
52
+ Middleware(AuditMiddleware, audit_utility_getter=get_audit),
64
53
  ]
65
54
  )
66
55
 
67
56
  if running_settings.debug:
68
57
  middleware.append(Middleware(ProcessTimeHeaderMiddleware))
69
58
 
70
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
71
-
72
-
73
- on_startup = [initialize]
74
- on_shutdown = [finalize]
75
-
76
-
77
- async def global_exception_handler(request: Request, exc: Exception):
78
- errors.capture_exception(exc)
79
- return JSONResponse(
80
- status_code=500,
81
- content={"detail": "Something went wrong, please contact your administrator"},
82
- )
83
-
84
-
85
- async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
86
- return JSONResponse(
87
- status_code=200,
88
- content={"detail": "Client disconnected while an operation was in course"},
89
- )
59
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
90
60
 
91
61
 
92
62
  fastapi_settings = dict(
93
63
  debug=running_settings.debug,
94
64
  middleware=middleware,
95
- on_startup=on_startup,
96
- on_shutdown=on_shutdown,
65
+ lifespan=lifespan,
97
66
  exception_handlers={
98
67
  Exception: global_exception_handler,
99
68
  ClientDisconnect: client_disconnect_handler,
@@ -102,7 +71,6 @@ fastapi_settings = dict(
102
71
 
103
72
 
104
73
  base_app = FastAPI(title="NucliaDB Search API", **fastapi_settings) # type: ignore
105
-
106
74
  base_app.include_router(api_v1)
107
75
 
108
76
  extend_openapi(base_app)
@@ -139,7 +107,7 @@ async def node_members(request: Request) -> JSONResponse:
139
107
 
140
108
 
141
109
  async def alive(request: Request) -> JSONResponse:
142
- if len(manager.get_index_nodes()) == 0 and settings.driver != "local":
110
+ if len(manager.get_index_nodes()) == 0 and settings.driver != DriverConfig.LOCAL:
143
111
  return JSONResponse({"status": "error"}, status_code=503)
144
112
  else:
145
113
  return JSONResponse({"status": "ok"})
@@ -17,8 +17,13 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from contextlib import asynccontextmanager
21
+
22
+ from fastapi import FastAPI
23
+
20
24
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
21
- from nucliadb.common.maindb.utils import setup_driver # type: ignore
25
+ from nucliadb.common.maindb.utils import setup_driver
26
+ from nucliadb.common.nidx import start_nidx_utility
22
27
  from nucliadb.ingest.utils import start_ingest, stop_ingest
23
28
  from nucliadb.search import SERVICE_NAME
24
29
  from nucliadb.search.predict import start_predict_engine
@@ -33,7 +38,8 @@ from nucliadb_utils.utilities import (
33
38
  )
34
39
 
35
40
 
36
- async def initialize() -> None:
41
+ @asynccontextmanager
42
+ async def lifespan(app: FastAPI):
37
43
  await setup_telemetry(SERVICE_NAME)
38
44
 
39
45
  await start_ingest(SERVICE_NAME)
@@ -41,11 +47,12 @@ async def initialize() -> None:
41
47
 
42
48
  await setup_driver()
43
49
  await setup_cluster()
50
+ await start_nidx_utility()
44
51
 
45
52
  await start_audit_utility(SERVICE_NAME)
46
53
 
54
+ yield
47
55
 
48
- async def finalize() -> None:
49
56
  await stop_ingest()
50
57
  if get_utility(Utility.PARTITION):
51
58
  clean_utility(Utility.PARTITION)