nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,270 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ import logging
22
+ from abc import ABC, abstractmethod, abstractproperty
23
+ from dataclasses import dataclass
24
+ from typing import Optional
25
+
26
+ from nucliadb.search.predict import ProxiedPredictAPIError, SendToPredictError
27
+ from nucliadb.search.search.query_parser import models as parser_models
28
+ from nucliadb.search.utilities import get_predict
29
+ from nucliadb_models.internal.predict import RerankModel
30
+ from nucliadb_models.search import (
31
+ SCORE_TYPE,
32
+ KnowledgeboxFindResults,
33
+ )
34
+ from nucliadb_telemetry.metrics import Observer
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ reranker_observer = Observer("reranker", labels={"type": ""})
39
+
40
+
41
+ @dataclass
42
+ class RerankableItem:
43
+ id: str
44
+ score: float
45
+ score_type: SCORE_TYPE
46
+ content: str
47
+
48
+
49
+ @dataclass
50
+ class RankedItem:
51
+ id: str
52
+ score: float
53
+ score_type: SCORE_TYPE
54
+
55
+
56
+ @dataclass
57
+ class RerankingOptions:
58
+ kbid: str
59
+
60
+ # Query used to retrieve the results to be reranked. Smart rerankers will use it
61
+ query: str
62
+
63
+
64
+ class Reranker(ABC):
65
+ @abstractproperty
66
+ def window(self) -> Optional[int]:
67
+ """Number of elements the reranker requests. `None` means no specific
68
+ window is enforced."""
69
+ ...
70
+
71
+ @property
72
+ def needs_extra_results(self) -> bool:
73
+ return self.window is not None
74
+
75
+ async def rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
76
+ """Given a query and a set of resources, rerank elements and return the
77
+ list of reranked items sorted by decreasing score. The list will contain
78
+ at most, `window` elements.
79
+
80
+ """
81
+ # Enforce reranker window and drop the rest
82
+ # XXX: other search engines allow a mix of reranked and not reranked
83
+ # results, there's no technical reason we can't do it
84
+ items = items[: self.window]
85
+ reranked = await self._rerank(items, options)
86
+ return reranked
87
+
88
+ @abstractmethod
89
+ async def _rerank(
90
+ self, items: list[RerankableItem], options: RerankingOptions
91
+ ) -> list[RankedItem]: ...
92
+
93
+
94
+ class NoopReranker(Reranker):
95
+ """No-operation reranker. Given a list of items to rerank, it does nothing
96
+ with them and return the items in the same order. It can be use to not alter
97
+ the previous ordering.
98
+
99
+ """
100
+
101
+ @property
102
+ def window(self) -> Optional[int]:
103
+ return None
104
+
105
+ @reranker_observer.wrap({"type": "noop"})
106
+ async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
107
+ return [
108
+ RankedItem(
109
+ id=item.id,
110
+ score=item.score,
111
+ score_type=item.score_type,
112
+ )
113
+ for item in items
114
+ ]
115
+
116
+
117
+ class PredictReranker(Reranker):
118
+ """Rerank using a reranking model.
119
+
120
+ It uses Predict API to rerank elements using a model trained for this
121
+
122
+ """
123
+
124
+ def __init__(self, window: int):
125
+ self._window = window
126
+
127
+ @property
128
+ def window(self) -> int:
129
+ return self._window
130
+
131
+ @reranker_observer.wrap({"type": "predict"})
132
+ async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
133
+ if len(items) == 0:
134
+ return []
135
+
136
+ predict = get_predict()
137
+
138
+ # Conversion to format expected by predict. At the same time,
139
+ # deduplicates paragraphs found in different indices
140
+ context = {item.id: item.content for item in items}
141
+ request = RerankModel(
142
+ question=options.query,
143
+ user_id="", # TODO
144
+ context=context,
145
+ )
146
+ try:
147
+ response = await predict.rerank(options.kbid, request)
148
+ except (SendToPredictError, ProxiedPredictAPIError):
149
+ # predict failed, we can't rerank
150
+ reranked = [
151
+ RankedItem(
152
+ id=item.id,
153
+ score=item.score,
154
+ score_type=item.score_type,
155
+ )
156
+ for item in items
157
+ ]
158
+ else:
159
+ reranked = [
160
+ RankedItem(
161
+ id=id,
162
+ score=score,
163
+ score_type=SCORE_TYPE.RERANKER,
164
+ )
165
+ for id, score in response.context_scores.items()
166
+ ]
167
+ sort_by_score(reranked)
168
+ best = reranked
169
+ return best
170
+
171
+
172
+ class MultiMatchBoosterReranker(Reranker):
173
+ """This reranker gives more value to items that come from different indices"""
174
+
175
+ @property
176
+ def window(self) -> Optional[int]:
177
+ return None
178
+
179
+ @reranker_observer.wrap({"type": "multi_match_booster"})
180
+ async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
181
+ """Given a list of rerankable items, boost matches that appear multiple
182
+ times. The returned list can be smaller than the initial, as repeated
183
+ matches are deduplicated.
184
+ """
185
+ reranked_by_id = {}
186
+ for item in items:
187
+ if item.id not in reranked_by_id:
188
+ reranked_by_id[item.id] = RankedItem(
189
+ id=item.id,
190
+ score=item.score,
191
+ score_type=item.score_type,
192
+ )
193
+ else:
194
+ # it's a mutiple match, boost the score
195
+ if reranked_by_id[item.id].score < item.score:
196
+ # previous implementation noted that we are using vector
197
+ # score x2 when we find a multiple match. However, this may
198
+ # not be true, as the same paragraph could come in any
199
+ # position in the rank fusioned result list
200
+ reranked_by_id[item.id].score = item.score * 2
201
+
202
+ reranked_by_id[item.id].score_type = SCORE_TYPE.BOTH
203
+
204
+ reranked = list(reranked_by_id.values())
205
+ sort_by_score(reranked)
206
+ return reranked
207
+
208
+
209
+ def get_reranker(reranker: parser_models.Reranker) -> Reranker:
210
+ algorithm: Reranker
211
+
212
+ if isinstance(reranker, parser_models.NoopReranker):
213
+ algorithm = NoopReranker()
214
+
215
+ elif isinstance(reranker, parser_models.MultiMatchBoosterReranker):
216
+ algorithm = MultiMatchBoosterReranker()
217
+
218
+ elif isinstance(reranker, parser_models.PredictReranker):
219
+ algorithm = PredictReranker(reranker.window)
220
+
221
+ else:
222
+ logger.warning(f"Unknown reranker requested: {reranker}. Using default instead")
223
+ algorithm = MultiMatchBoosterReranker()
224
+
225
+ return algorithm
226
+
227
+
228
+ def sort_by_score(items: list[RankedItem]):
229
+ """Sort `items` in place by decreasing score"""
230
+ items.sort(key=lambda item: item.score, reverse=True)
231
+
232
+
233
+ def apply_reranking(results: KnowledgeboxFindResults, reranked: list[RankedItem]):
234
+ """Given a list of reranked items, update the find results payload.
235
+
236
+ *ATENTION* we assume `reranked` is an ordered list of decreasing relevance
237
+ and contains *only* the items relevant for this response. Any paragraph not
238
+ found in `reranked` will be removed from the `results`
239
+
240
+ """
241
+ inverted_results = {}
242
+ for rid, resource in results.resources.items():
243
+ for field_id, field in resource.fields.items():
244
+ for paragraph_id, paragraph in field.paragraphs.items():
245
+ inverted_results[paragraph_id] = (
246
+ paragraph,
247
+ (field_id, field),
248
+ (rid, resource),
249
+ )
250
+
251
+ # update results and best matches according to new scores
252
+ results.best_matches.clear()
253
+ for order, item in enumerate(reranked):
254
+ paragraph_id = item.id
255
+ paragraph = inverted_results[paragraph_id][0]
256
+ paragraph.score = item.score
257
+ paragraph.score_type = item.score_type
258
+ paragraph.order = order
259
+ results.best_matches.append(paragraph_id)
260
+
261
+ # prune uneeded results (not appearing in `reranked`)
262
+ extra = set(inverted_results.keys()) - set(results.best_matches)
263
+ for paragraph_id in extra:
264
+ _, (field_id, field), (rid, resource) = inverted_results[paragraph_id]
265
+ field.paragraphs.pop(paragraph_id)
266
+ if len(field.paragraphs) == 0:
267
+ resource.fields.pop(field_id)
268
+
269
+ if len(resource.fields) == 0:
270
+ results.resources.pop(rid)
@@ -19,20 +19,15 @@
19
19
  #
20
20
  import asyncio
21
21
 
22
+ from nucliadb.common.cluster.base import AbstractIndexNode
22
23
  from nucliadb_protos.nodereader_pb2 import (
23
24
  GetShardRequest,
24
- ParagraphSearchRequest,
25
- ParagraphSearchResponse,
26
- RelationSearchRequest,
27
- RelationSearchResponse,
28
25
  SearchRequest,
29
26
  SearchResponse,
30
27
  SuggestRequest,
31
28
  SuggestResponse,
32
29
  )
33
30
  from nucliadb_protos.noderesources_pb2 import Shard
34
-
35
- from nucliadb.common.cluster.base import AbstractIndexNode
36
31
  from nucliadb_telemetry import metrics
37
32
 
38
33
  node_observer = metrics.Observer(
@@ -44,9 +39,7 @@ node_observer = metrics.Observer(
44
39
  )
45
40
 
46
41
 
47
- async def query_shard(
48
- node: AbstractIndexNode, shard: str, query: SearchRequest
49
- ) -> SearchResponse:
42
+ async def query_shard(node: AbstractIndexNode, shard: str, query: SearchRequest) -> SearchResponse:
50
43
  req = SearchRequest()
51
44
  req.CopyFrom(query)
52
45
  req.shard = shard
@@ -61,31 +54,9 @@ async def get_shard(node: AbstractIndexNode, shard_id: str) -> Shard:
61
54
  return await node.reader.GetShard(req) # type: ignore
62
55
 
63
56
 
64
- async def query_paragraph_shard(
65
- node: AbstractIndexNode, shard: str, query: ParagraphSearchRequest
66
- ) -> ParagraphSearchResponse:
67
- req = ParagraphSearchRequest()
68
- req.CopyFrom(query)
69
- req.id = shard
70
- with node_observer({"type": "paragraph_search", "node_id": node.id}):
71
- return await node.reader.ParagraphSearch(req) # type: ignore
72
-
73
-
74
- async def suggest_shard(
75
- node: AbstractIndexNode, shard: str, query: SuggestRequest
76
- ) -> SuggestResponse:
57
+ async def suggest_shard(node: AbstractIndexNode, shard: str, query: SuggestRequest) -> SuggestResponse:
77
58
  req = SuggestRequest()
78
59
  req.CopyFrom(query)
79
60
  req.shard = shard
80
61
  with node_observer({"type": "suggest", "node_id": node.id}):
81
62
  return await node.reader.Suggest(req) # type: ignore
82
-
83
-
84
- async def relations_shard(
85
- node: AbstractIndexNode, shard: str, query: RelationSearchRequest
86
- ) -> RelationSearchResponse:
87
- req = RelationSearchRequest()
88
- req.CopyFrom(query)
89
- req.shard_id = shard
90
- with node_observer({"type": "relation_search", "node_id": node.id}):
91
- return await node.reader.RelationSearch(req) # type: ignore
@@ -20,8 +20,6 @@
20
20
  import asyncio
21
21
  from typing import Optional
22
22
 
23
- from nucliadb_protos.utils_pb2 import ExtractedText
24
-
25
23
  from nucliadb.common import datamanagers
26
24
  from nucliadb.common.maindb.utils import get_driver
27
25
  from nucliadb.ingest.fields.base import Field
@@ -35,6 +33,7 @@ from nucliadb_models.search import (
35
33
  SummarizeRequest,
36
34
  SummarizeResourceModel,
37
35
  )
36
+ from nucliadb_protos.utils_pb2 import ExtractedText
38
37
  from nucliadb_utils.utilities import get_storage
39
38
 
40
39
  ExtractedTexts = list[tuple[str, str, Optional[ExtractedText]]]
@@ -52,15 +51,11 @@ async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
52
51
  predict_request.user_prompt = request.user_prompt
53
52
  predict_request.summary_kind = request.summary_kind
54
53
 
55
- for uuid_or_slug, field_id, extracted_text in await get_extracted_texts(
56
- kbid, request.resources
57
- ):
54
+ for uuid_or_slug, field_id, extracted_text in await get_extracted_texts(kbid, request.resources):
58
55
  if extracted_text is None:
59
56
  continue
60
57
 
61
- fields = predict_request.resources.setdefault(
62
- uuid_or_slug, SummarizeResourceModel()
63
- ).fields
58
+ fields = predict_request.resources.setdefault(uuid_or_slug, SummarizeResourceModel()).fields
64
59
  fields[field_id] = extracted_text.text
65
60
 
66
61
  if len(predict_request.resources) == 0:
@@ -70,9 +65,7 @@ async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
70
65
  return await predict.summarize(kbid, predict_request)
71
66
 
72
67
 
73
- async def get_extracted_texts(
74
- kbid: str, resource_uuids_or_slugs: list[str]
75
- ) -> ExtractedTexts:
68
+ async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) -> ExtractedTexts:
76
69
  results: ExtractedTexts = []
77
70
 
78
71
  driver = get_driver()
@@ -82,7 +75,7 @@ async def get_extracted_texts(
82
75
  tasks = []
83
76
 
84
77
  # Schedule getting extracted text for each field of each resource
85
- async with driver.transaction() as txn:
78
+ async with driver.transaction(read_only=True) as txn:
86
79
  if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
87
80
  raise datamanagers.exceptions.KnowledgeBoxNotFound(kbid)
88
81
 
@@ -90,16 +83,12 @@ async def get_extracted_texts(
90
83
  for uuid_or_slug in set(resource_uuids_or_slugs):
91
84
  uuid = await get_resource_uuid(kb_orm, uuid_or_slug)
92
85
  if uuid is None:
93
- logger.warning(
94
- f"Resource {uuid_or_slug} not found in KB", extra={"kbid": kbid}
95
- )
86
+ logger.warning(f"Resource {uuid_or_slug} not found in KB", extra={"kbid": kbid})
96
87
  continue
97
88
  resource_orm = Resource(txn=txn, storage=storage, kb=kb_orm, uuid=uuid)
98
89
  fields = await resource_orm.get_fields(force=True)
99
90
  for _, field in fields.items():
100
- task = asyncio.create_task(
101
- get_extracted_text(uuid_or_slug, field, max_tasks)
102
- )
91
+ task = asyncio.create_task(get_extracted_text(uuid_or_slug, field, max_tasks))
103
92
  tasks.append(task)
104
93
 
105
94
  if len(tasks) == 0:
@@ -17,9 +17,26 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import logging
20
21
  from typing import Optional, Union
21
22
 
23
+ from pydantic import BaseModel
24
+
25
+ from nucliadb.common.datamanagers.atomic import kb
22
26
  from nucliadb_models.search import BaseSearchRequest, MinScore
27
+ from nucliadb_utils import const
28
+ from nucliadb_utils.utilities import has_feature
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ async def filter_hidden_resources(kbid: str, show_hidden: bool) -> Optional[bool]:
34
+ kb_config = await kb.get_config(kbid=kbid)
35
+ hidden_enabled = kb_config and kb_config.hidden_resources_enabled
36
+ if hidden_enabled and not show_hidden:
37
+ return False
38
+ else:
39
+ return None # None = No filtering, show all resources
23
40
 
24
41
 
25
42
  def is_empty_query(request: BaseSearchRequest) -> bool:
@@ -36,7 +53,7 @@ def is_exact_match_only_query(request: BaseSearchRequest) -> bool:
36
53
  'foo "something" else' -> False
37
54
  """
38
55
  query = request.query.strip()
39
- return len(query) > 0 and query[0] == '"' and query[-1] == '"'
56
+ return len(query) > 0 and query.startswith('"') and query.endswith('"')
40
57
 
41
58
 
42
59
  def should_disable_vector_search(request: BaseSearchRequest) -> bool:
@@ -58,9 +75,7 @@ def min_score_from_query_params(
58
75
  deprecated_min_score: Optional[float],
59
76
  ) -> MinScore:
60
77
  # Keep backward compatibility with the deprecated min_score parameter
61
- semantic = (
62
- deprecated_min_score if min_score_semantic is None else min_score_semantic
63
- )
78
+ semantic = deprecated_min_score if min_score_semantic is None else min_score_semantic
64
79
  return MinScore(bm25=min_score_bm25, semantic=semantic)
65
80
 
66
81
 
@@ -72,3 +87,11 @@ def min_score_from_payload(min_score: Optional[Union[float, MinScore]]) -> MinSc
72
87
  elif isinstance(min_score, float):
73
88
  return MinScore(bm25=0, semantic=min_score)
74
89
  return min_score
90
+
91
+
92
+ def maybe_log_request_payload(kbid: str, endpoint: str, item: BaseModel):
93
+ if has_feature(const.Features.LOG_REQUEST_PAYLOADS, context={"kbid": kbid}, default=False):
94
+ logger.info(
95
+ "Request payload",
96
+ extra={"kbid": kbid, "endpoint": endpoint, "payload": item.model_dump_json()},
97
+ )
@@ -18,6 +18,8 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
+ from typing import Optional
22
+
21
23
  from pydantic import Field
22
24
 
23
25
  from nucliadb.ingest.settings import DriverSettings
@@ -28,8 +30,20 @@ class Settings(DriverSettings):
28
30
  slow_find_log_threshold: float = Field(
29
31
  default=3.0,
30
32
  title="Slow query log threshold",
31
- description="The threshold in seconds for logging slow queries",
33
+ description="The threshold in seconds for logging slow find queries",
34
+ )
35
+
36
+ slow_node_query_log_threshold: float = Field(
37
+ default=2.0,
38
+ title="Slow node query log threshold",
39
+ description="The threshold in seconds for logging slow node queries",
40
+ )
41
+ prequeries_max_parallel: int = Field(
42
+ default=2,
43
+ title="Prequeries max parallel",
44
+ description="The maximum number of prequeries to run in parallel per /ask request",
32
45
  )
46
+ nidx_address: Optional[str] = Field(default=None)
33
47
 
34
48
 
35
49
  settings = Settings()
@@ -98,9 +98,7 @@ def get_temp_access_token(request: Request):
98
98
  logger.warning(
99
99
  "Dynamically generating JWK key. Please set JWK_KEY env variable to avoid this message."
100
100
  )
101
- settings.jwk_key = orjson.dumps(
102
- jwk.JWK.generate(kty="oct", size=256, kid="dyn")
103
- ).decode("utf-8")
101
+ settings.jwk_key = orjson.dumps(jwk.JWK.generate(kty="oct", size=256, kid="dyn")).decode("utf-8")
104
102
  jwetoken.add_recipient(jwk.JWK(**orjson.loads(settings.jwk_key)))
105
103
  token = jwetoken.serialize(compact=True)
106
104
  return JSONResponse({"token": token})
@@ -154,16 +152,14 @@ def introspect_endpoint(request: Request) -> StreamingResponse:
154
152
  return StreamingResponse(
155
153
  content=introspect.stream_tar(request.app),
156
154
  status_code=200,
157
- headers={
158
- "Content-Disposition": f"attachment; filename=introspect_{introspect_id}.tar.gz"
159
- },
155
+ headers={"Content-Disposition": f"attachment; filename=introspect_{introspect_id}.tar.gz"},
160
156
  media_type="application/octet-stream",
161
157
  )
162
158
 
163
159
 
164
160
  @standalone_api_router.get("/pull/position")
165
161
  async def pull_status(request: Request) -> JSONResponse:
166
- async with datamanagers.with_transaction() as txn:
162
+ async with datamanagers.with_ro_transaction() as txn:
167
163
  # standalone assumes 1 partition
168
164
  current_offset = await datamanagers.processing.get_pull_offset(
169
165
  txn, pull_type_id=processing.get_nua_api_id(), partition="1"
@@ -180,9 +176,7 @@ class UpdatePullPosition(pydantic.BaseModel):
180
176
 
181
177
 
182
178
  @standalone_api_router.patch("/pull/position")
183
- async def update_pull_position(
184
- request: Request, item: UpdatePullPosition
185
- ) -> JSONResponse:
179
+ async def update_pull_position(request: Request, item: UpdatePullPosition) -> JSONResponse:
186
180
  async with datamanagers.with_transaction() as txn:
187
181
  # standalone assumes 1 partition
188
182
  await datamanagers.processing.set_pull_offset(
@@ -20,7 +20,6 @@
20
20
  import logging
21
21
  import os
22
22
 
23
- import nucliadb_admin_assets # type: ignore
24
23
  from fastapi import FastAPI
25
24
  from fastapi.responses import RedirectResponse
26
25
  from fastapi.staticfiles import StaticFiles
@@ -31,13 +30,12 @@ from starlette.requests import ClientDisconnect
31
30
  from starlette.responses import HTMLResponse
32
31
  from starlette.routing import Mount
33
32
 
34
- from nucliadb.common.context.fastapi import set_app_context
33
+ import nucliadb_admin_assets # type: ignore
35
34
  from nucliadb.middleware import ProcessTimeHeaderMiddleware
36
- from nucliadb.middleware.transaction import ReadOnlyTransactionMiddleware
37
35
  from nucliadb.reader import API_PREFIX
38
36
  from nucliadb.reader.api.v1.router import api as api_reader_v1
39
37
  from nucliadb.search.api.v1.router import api as api_search_v1
40
- from nucliadb.standalone.lifecycle import finalize, initialize
38
+ from nucliadb.standalone.lifecycle import lifespan
41
39
  from nucliadb.train.api.v1.router import api as api_train_v1
42
40
  from nucliadb.writer.api.v1.router import api as api_writer_v1
43
41
  from nucliadb_telemetry.fastapi import metrics_endpoint
@@ -45,9 +43,11 @@ from nucliadb_telemetry.fastapi.utils import (
45
43
  client_disconnect_handler,
46
44
  global_exception_handler,
47
45
  )
46
+ from nucliadb_utils.audit.stream import AuditMiddleware
48
47
  from nucliadb_utils.fastapi.openapi import extend_openapi
49
48
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
50
49
  from nucliadb_utils.settings import http_settings, running_settings
50
+ from nucliadb_utils.utilities import get_audit
51
51
 
52
52
  from .api_router import standalone_api_router
53
53
  from .auth import get_auth_backend
@@ -71,7 +71,7 @@ HOMEPAGE_HTML = """
71
71
  <h2>Quick Links</h2>
72
72
  <ul>
73
73
  <li><a href="/admin">Admin UI</a></li>
74
- <li><a href="https://docs.nuclia.dev/docs/guides/nucliadb/deploy/basics">NucliaDB Deployment Documentation</a></li>
74
+ <li><a href="https://docs.nuclia.dev/docs/management/nucliadb/deploy/basics">NucliaDB Deployment Documentation</a></li>
75
75
  <li><a href="https://docs.nuclia.dev/docs/api">API Reference</a></li>
76
76
  <li><a href="/api/v1/docs">API Explorer</a></li>
77
77
  <li><a href="/metrics">Metrics</a></li>
@@ -94,7 +94,7 @@ def application_factory(settings: Settings) -> FastAPI:
94
94
  AuthenticationMiddleware,
95
95
  backend=get_auth_backend(settings),
96
96
  ),
97
- Middleware(ReadOnlyTransactionMiddleware),
97
+ Middleware(AuditMiddleware, audit_utility_getter=get_audit),
98
98
  ]
99
99
  if running_settings.debug:
100
100
  middleware.append(Middleware(ProcessTimeHeaderMiddleware))
@@ -102,8 +102,7 @@ def application_factory(settings: Settings) -> FastAPI:
102
102
  fastapi_settings = dict(
103
103
  debug=running_settings.debug,
104
104
  middleware=middleware,
105
- on_startup=[initialize],
106
- on_shutdown=[finalize],
105
+ lifespan=lifespan,
107
106
  exception_handlers={
108
107
  Exception: global_exception_handler,
109
108
  ClientDisconnect: client_disconnect_handler,
@@ -140,9 +139,7 @@ def application_factory(settings: Settings) -> FastAPI:
140
139
  # mount admin app assets
141
140
  application.mount(
142
141
  "/admin",
143
- StaticFiles(
144
- directory=os.path.dirname(nucliadb_admin_assets.__file__), html=True
145
- ),
142
+ StaticFiles(directory=os.path.dirname(nucliadb_admin_assets.__file__), html=True),
146
143
  name="static",
147
144
  )
148
145
  # redirect /contributor -> /admin
@@ -158,7 +155,4 @@ def application_factory(settings: Settings) -> FastAPI:
158
155
  if isinstance(route, Mount):
159
156
  route.app.settings = settings # type: ignore
160
157
 
161
- # Inject application context into the fastapi app's state
162
- set_app_context(application)
163
-
164
158
  return application