nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -22,19 +22,10 @@ import datetime
22
22
  import math
23
23
  from typing import Any, Optional, Set, Union
24
24
 
25
- from nucliadb_protos.nodereader_pb2 import (
26
- DocumentResult,
27
- DocumentScored,
28
- DocumentSearchResponse,
29
- EntitiesSubgraphRequest,
30
- ParagraphResult,
31
- ParagraphSearchResponse,
32
- RelationSearchResponse,
33
- SearchResponse,
34
- SuggestResponse,
35
- VectorSearchResponse,
36
- )
37
-
25
+ from nucliadb.common.ids import FieldId, ParagraphId
26
+ from nucliadb.common.models_utils.from_proto import RelationTypePbMap
27
+ from nucliadb.search.search import cache
28
+ from nucliadb.search.search.cut import cut_page
38
29
  from nucliadb.search.search.fetch import (
39
30
  fetch_resources,
40
31
  get_labels_paragraph,
@@ -43,11 +34,11 @@ from nucliadb.search.search.fetch import (
43
34
  )
44
35
  from nucliadb_models.common import FieldTypeName
45
36
  from nucliadb_models.labels import translate_system_to_alias_label
46
- from nucliadb_models.metadata import RelationTypePbMap
47
37
  from nucliadb_models.resource import ExtractedDataTypeName
48
38
  from nucliadb_models.search import (
49
39
  DirectionalRelation,
50
40
  EntitySubgraph,
41
+ EntityType,
51
42
  KnowledgeboxSearchResults,
52
43
  KnowledgeboxSuggestResults,
53
44
  MinScore,
@@ -56,7 +47,6 @@ from nucliadb_models.search import (
56
47
  RelatedEntities,
57
48
  RelatedEntity,
58
49
  RelationDirection,
59
- RelationNodeTypeMap,
60
50
  Relations,
61
51
  ResourceProperties,
62
52
  ResourceResult,
@@ -69,38 +59,59 @@ from nucliadb_models.search import (
69
59
  SortOrder,
70
60
  TextPosition,
71
61
  )
62
+ from nucliadb_protos.nodereader_pb2 import (
63
+ DocumentResult,
64
+ DocumentScored,
65
+ DocumentSearchResponse,
66
+ EntitiesSubgraphRequest,
67
+ ParagraphResult,
68
+ ParagraphSearchResponse,
69
+ RelationSearchResponse,
70
+ SearchResponse,
71
+ SuggestResponse,
72
+ VectorSearchResponse,
73
+ )
74
+ from nucliadb_protos.utils_pb2 import RelationNode
72
75
 
73
- from .cache import get_resource_cache, get_resource_from_cache
74
76
  from .metrics import merge_observer
75
- from .paragraphs import ExtractedTextCache, get_paragraph_text, get_text_sentence
77
+ from .paragraphs import get_paragraph_text, get_text_sentence
76
78
 
77
79
  Bm25Score = tuple[float, float]
78
80
  TimestampScore = datetime.datetime
79
81
  TitleScore = str
80
- Score = Union[Bm25Score, TimestampScore, TitleScore]
82
+ SortValue = Union[Bm25Score, TimestampScore, TitleScore]
83
+
84
+
85
+ def relation_node_type_to_entity_type(node_type: RelationNode.NodeType.ValueType) -> EntityType:
86
+ return {
87
+ RelationNode.NodeType.ENTITY: EntityType.ENTITY,
88
+ RelationNode.NodeType.LABEL: EntityType.LABEL,
89
+ RelationNode.NodeType.RESOURCE: EntityType.RESOURCE,
90
+ RelationNode.NodeType.USER: EntityType.USER,
91
+ }[node_type]
81
92
 
82
93
 
83
94
  def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]):
84
95
  results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True)
85
96
 
86
97
 
87
- async def text_score(
98
+ async def get_sort_value(
88
99
  item: Union[DocumentResult, ParagraphResult],
89
100
  sort_field: SortField,
90
101
  kbid: str,
91
- ) -> Optional[Score]:
102
+ ) -> Optional[SortValue]:
92
103
  """Returns the score for given `item` and `sort_field`. If the resource is being
93
104
  deleted, it might appear on search results but not in maindb. In this
94
105
  specific case, return None.
95
-
96
106
  """
97
107
  if sort_field == SortField.SCORE:
98
108
  return (item.score.bm25, item.score.booster)
99
109
 
100
110
  score: Any = None
101
- resource = await get_resource_from_cache(kbid, item.uuid)
111
+ resource = await cache.get_resource(kbid, item.uuid)
102
112
  if resource is None:
103
113
  return score
114
+
104
115
  basic = await resource.get_basic()
105
116
  if basic is None:
106
117
  return score
@@ -118,13 +129,12 @@ async def text_score(
118
129
  async def merge_documents_results(
119
130
  document_responses: list[DocumentSearchResponse],
120
131
  resources: list[str],
121
- count: int,
122
- page: int,
132
+ top_k: int,
123
133
  kbid: str,
124
134
  sort: SortOptions,
125
135
  min_score: float,
126
136
  ) -> Resources:
127
- raw_resource_list: list[tuple[DocumentResult, Score]] = []
137
+ raw_resource_list: list[tuple[DocumentResult, SortValue]] = []
128
138
  facets: dict[str, Any] = {}
129
139
  query = None
130
140
  total = 0
@@ -143,24 +153,18 @@ async def merge_documents_results(
143
153
  if document_response.next_page:
144
154
  next_page = True
145
155
  for result in document_response.results:
146
- score = await text_score(result, sort.field, kbid)
147
- if score is not None:
148
- raw_resource_list.append((result, score))
156
+ sort_value = await get_sort_value(result, sort.field, kbid)
157
+ if sort_value is not None:
158
+ raw_resource_list.append((result, sort_value))
149
159
  total += document_response.total
150
160
 
161
+ # We need to cut first and then sort, otherwise the page will be wrong if the order is DESC
162
+ raw_resource_list, has_more = cut_page(raw_resource_list, top_k)
163
+ next_page = next_page or has_more
151
164
  raw_resource_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
152
165
 
153
- skip = page * count
154
- end = skip + count
155
- length = len(raw_resource_list)
156
-
157
- if length > end:
158
- next_page = True
159
-
160
166
  result_resource_list: list[ResourceResult] = []
161
- for result, _ in raw_resource_list[min(skip, length) : min(end, length)]:
162
- # /f/file
163
-
167
+ for result, _ in raw_resource_list:
164
168
  labels = await get_labels_resource(result, kbid)
165
169
  _, field_type, field = result.field.split("/")
166
170
 
@@ -181,8 +185,8 @@ async def merge_documents_results(
181
185
  results=result_resource_list,
182
186
  query=query,
183
187
  total=total,
184
- page_number=page,
185
- page_size=count,
188
+ page_number=0, # Bw/c with pagination
189
+ page_size=top_k,
186
190
  next_page=next_page,
187
191
  min_score=min_score,
188
192
  )
@@ -207,65 +211,58 @@ async def merge_suggest_paragraph_results(
207
211
  if len(suggest_responses) > 1:
208
212
  sort_results_by_score(raw_paragraph_list)
209
213
 
210
- rcache = get_resource_cache(clear=True)
211
- etcache = ExtractedTextCache()
212
- try:
213
- result_paragraph_list: list[Paragraph] = []
214
- for result in raw_paragraph_list[:10]:
215
- _, field_type, field = result.field.split("/")
216
- text = await get_paragraph_text(
217
- kbid=kbid,
218
- rid=result.uuid,
219
- field=result.field,
220
- start=result.start,
221
- end=result.end,
222
- split=result.split,
223
- highlight=highlight,
224
- ematches=ematches, # type: ignore
225
- matches=result.matches, # type: ignore
226
- extracted_text_cache=etcache,
227
- )
228
- labels = await get_labels_paragraph(result, kbid)
229
- new_paragraph = Paragraph(
230
- score=result.score.bm25,
231
- rid=result.uuid,
232
- field_type=field_type,
233
- field=field,
234
- text=text,
235
- labels=labels,
236
- position=TextPosition(
237
- index=result.metadata.position.index,
238
- start=result.metadata.position.start,
239
- end=result.metadata.position.end,
240
- page_number=result.metadata.position.page_number,
214
+ result_paragraph_list: list[Paragraph] = []
215
+ for result in raw_paragraph_list[:10]:
216
+ _, field_type, field = result.field.split("/")
217
+ text = await get_paragraph_text(
218
+ kbid=kbid,
219
+ paragraph_id=ParagraphId(
220
+ field_id=FieldId(
221
+ rid=result.uuid,
222
+ type=field_type,
223
+ key=field,
224
+ subfield_id=result.split,
241
225
  ),
242
- )
243
- if len(result.metadata.position.start_seconds) or len(
244
- result.metadata.position.end_seconds
245
- ):
246
- new_paragraph.start_seconds = list(
247
- result.metadata.position.start_seconds
248
- )
249
- new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
250
- else:
251
- # TODO: Remove once we are sure all data has been migrated!
252
- seconds_positions = await get_seconds_paragraph(result, kbid)
253
- if seconds_positions is not None:
254
- new_paragraph.start_seconds = seconds_positions[0]
255
- new_paragraph.end_seconds = seconds_positions[1]
256
- result_paragraph_list.append(new_paragraph)
257
- return Paragraphs(results=result_paragraph_list, query=query, min_score=0)
258
- finally:
259
- etcache.clear()
260
- rcache.clear()
226
+ paragraph_start=result.start,
227
+ paragraph_end=result.end,
228
+ ),
229
+ highlight=highlight,
230
+ ematches=ematches, # type: ignore
231
+ matches=result.matches, # type: ignore
232
+ )
233
+ labels = await get_labels_paragraph(result, kbid)
234
+ new_paragraph = Paragraph(
235
+ score=result.score.bm25,
236
+ rid=result.uuid,
237
+ field_type=field_type,
238
+ field=field,
239
+ text=text,
240
+ labels=labels,
241
+ position=TextPosition(
242
+ index=result.metadata.position.index,
243
+ start=result.metadata.position.start,
244
+ end=result.metadata.position.end,
245
+ page_number=result.metadata.position.page_number,
246
+ ),
247
+ )
248
+ if len(result.metadata.position.start_seconds) or len(result.metadata.position.end_seconds):
249
+ new_paragraph.start_seconds = list(result.metadata.position.start_seconds)
250
+ new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
251
+ else:
252
+ # TODO: Remove once we are sure all data has been migrated!
253
+ seconds_positions = await get_seconds_paragraph(result, kbid)
254
+ if seconds_positions is not None:
255
+ new_paragraph.start_seconds = seconds_positions[0]
256
+ new_paragraph.end_seconds = seconds_positions[1]
257
+ result_paragraph_list.append(new_paragraph)
258
+ return Paragraphs(results=result_paragraph_list, query=query, min_score=0)
261
259
 
262
260
 
263
261
  async def merge_vectors_results(
264
262
  vector_responses: list[VectorSearchResponse],
265
263
  resources: list[str],
266
264
  kbid: str,
267
- count: int,
268
- page: int,
265
+ top_k: int,
269
266
  min_score: Optional[float] = None,
270
267
  ):
271
268
  facets: dict[str, Any] = {}
@@ -282,12 +279,10 @@ async def merge_vectors_results(
282
279
  if len(vector_responses) > 1:
283
280
  raw_vectors_list.sort(key=lambda x: x.score, reverse=True)
284
281
 
285
- skip = page * count
286
- end_element = skip + count
287
- length = len(raw_vectors_list)
282
+ raw_vectors_list, _ = cut_page(raw_vectors_list, top_k)
288
283
 
289
284
  result_sentence_list: list[Sentence] = []
290
- for result in raw_vectors_list[min(skip, length) : min(end_element, length)]:
285
+ for result in raw_vectors_list:
291
286
  id_count = result.doc_id.id.count("/")
292
287
  if id_count == 4:
293
288
  rid, field_type, field, index, position = result.doc_id.id.split("/")
@@ -335,8 +330,8 @@ async def merge_vectors_results(
335
330
  return Sentences(
336
331
  results=result_sentence_list,
337
332
  facets=facets,
338
- page_number=page,
339
- page_size=count,
333
+ page_number=0, # Bw/c with pagination
334
+ page_size=top_k,
340
335
  min_score=round(min_score or 0, ndigits=3),
341
336
  )
342
337
 
@@ -345,13 +340,12 @@ async def merge_paragraph_results(
345
340
  paragraph_responses: list[ParagraphSearchResponse],
346
341
  resources: list[str],
347
342
  kbid: str,
348
- count: int,
349
- page: int,
343
+ top_k: int,
350
344
  highlight: bool,
351
345
  sort: SortOptions,
352
346
  min_score: float,
353
- ):
354
- raw_paragraph_list: list[tuple[ParagraphResult, Score]] = []
347
+ ) -> Paragraphs:
348
+ raw_paragraph_list: list[tuple[ParagraphResult, SortValue]] = []
355
349
  facets: dict[str, Any] = {}
356
350
  query = None
357
351
  next_page = False
@@ -373,83 +367,75 @@ async def merge_paragraph_results(
373
367
  if paragraph_response.next_page:
374
368
  next_page = True
375
369
  for result in paragraph_response.results:
376
- score = await text_score(result, sort.field, kbid)
370
+ score = await get_sort_value(result, sort.field, kbid)
377
371
  if score is not None:
378
372
  raw_paragraph_list.append((result, score))
379
373
  total += paragraph_response.total
380
374
 
381
375
  raw_paragraph_list.sort(key=lambda x: x[1], reverse=(sort.order == SortOrder.DESC))
382
376
 
383
- skip = page * count
384
- end = skip + count
385
- length = len(raw_paragraph_list)
386
-
387
- if length > end:
388
- next_page = True
377
+ raw_paragraph_list, has_more = cut_page(raw_paragraph_list, top_k)
378
+ next_page = next_page or has_more
389
379
 
390
380
  result_paragraph_list: list[Paragraph] = []
391
- etcache = ExtractedTextCache()
392
- try:
393
- for result, _ in raw_paragraph_list[min(skip, length) : min(end, length)]:
394
- _, field_type, field = result.field.split("/")
395
- text = await get_paragraph_text(
396
- kbid=kbid,
397
- rid=result.uuid,
398
- field=result.field,
399
- start=result.start,
400
- end=result.end,
401
- split=result.split,
402
- highlight=highlight,
403
- ematches=ematches,
404
- matches=result.matches, # type: ignore
405
- extracted_text_cache=etcache,
406
- )
407
- labels = await get_labels_paragraph(result, kbid)
408
- fuzzy_result = len(result.matches) > 0
409
- new_paragraph = Paragraph(
410
- score=result.score.bm25,
411
- rid=result.uuid,
412
- field_type=field_type,
413
- field=field,
414
- text=text,
415
- labels=labels,
416
- position=TextPosition(
417
- index=result.metadata.position.index,
418
- start=result.metadata.position.start,
419
- end=result.metadata.position.end,
420
- page_number=result.metadata.position.page_number,
381
+ for result, _ in raw_paragraph_list:
382
+ _, field_type, field = result.field.split("/")
383
+ text = await get_paragraph_text(
384
+ kbid=kbid,
385
+ paragraph_id=ParagraphId(
386
+ field_id=FieldId(
387
+ rid=result.uuid,
388
+ type=field_type,
389
+ key=field,
390
+ subfield_id=result.split,
421
391
  ),
422
- fuzzy_result=fuzzy_result,
423
- )
424
- if len(result.metadata.position.start_seconds) or len(
425
- result.metadata.position.end_seconds
426
- ):
427
- new_paragraph.start_seconds = list(
428
- result.metadata.position.start_seconds
429
- )
430
- new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
431
- else:
432
- # TODO: Remove once we are sure all data has been migrated!
433
- seconds_positions = await get_seconds_paragraph(result, kbid)
434
- if seconds_positions is not None:
435
- new_paragraph.start_seconds = seconds_positions[0]
436
- new_paragraph.end_seconds = seconds_positions[1]
437
-
438
- result_paragraph_list.append(new_paragraph)
439
- if new_paragraph.rid not in resources:
440
- resources.append(new_paragraph.rid)
441
- return Paragraphs(
442
- results=result_paragraph_list,
443
- facets=facets,
444
- query=query,
445
- total=total,
446
- page_number=page,
447
- page_size=count,
448
- next_page=next_page,
449
- min_score=min_score,
392
+ paragraph_start=result.start,
393
+ paragraph_end=result.end,
394
+ ),
395
+ highlight=highlight,
396
+ ematches=ematches,
397
+ matches=result.matches, # type: ignore
450
398
  )
451
- finally:
452
- etcache.clear()
399
+ labels = await get_labels_paragraph(result, kbid)
400
+ fuzzy_result = len(result.matches) > 0
401
+ new_paragraph = Paragraph(
402
+ score=result.score.bm25,
403
+ rid=result.uuid,
404
+ field_type=field_type,
405
+ field=field,
406
+ text=text,
407
+ labels=labels,
408
+ position=TextPosition(
409
+ index=result.metadata.position.index,
410
+ start=result.metadata.position.start,
411
+ end=result.metadata.position.end,
412
+ page_number=result.metadata.position.page_number,
413
+ ),
414
+ fuzzy_result=fuzzy_result,
415
+ )
416
+ if len(result.metadata.position.start_seconds) or len(result.metadata.position.end_seconds):
417
+ new_paragraph.start_seconds = list(result.metadata.position.start_seconds)
418
+ new_paragraph.end_seconds = list(result.metadata.position.end_seconds)
419
+ else:
420
+ # TODO: Remove once we are sure all data has been migrated!
421
+ seconds_positions = await get_seconds_paragraph(result, kbid)
422
+ if seconds_positions is not None:
423
+ new_paragraph.start_seconds = seconds_positions[0]
424
+ new_paragraph.end_seconds = seconds_positions[1]
425
+
426
+ result_paragraph_list.append(new_paragraph)
427
+ if new_paragraph.rid not in resources:
428
+ resources.append(new_paragraph.rid)
429
+ return Paragraphs(
430
+ results=result_paragraph_list,
431
+ facets=facets,
432
+ query=query,
433
+ total=total,
434
+ page_number=0, # Bw/c with pagination
435
+ page_size=top_k,
436
+ next_page=next_page,
437
+ min_score=min_score,
438
+ )
453
439
 
454
440
 
455
441
  @merge_observer.wrap({"type": "merge_relations"})
@@ -458,9 +444,7 @@ async def merge_relations_results(
458
444
  query: EntitiesSubgraphRequest,
459
445
  ) -> Relations:
460
446
  loop = asyncio.get_event_loop()
461
- return await loop.run_in_executor(
462
- None, _merge_relations_results, relations_responses, query
463
- )
447
+ return await loop.run_in_executor(None, _merge_relations_results, relations_responses, query)
464
448
 
465
449
 
466
450
  def _merge_relations_results(
@@ -483,7 +467,7 @@ def _merge_relations_results(
483
467
  relations.entities[origin.value].related_to.append(
484
468
  DirectionalRelation(
485
469
  entity=destination.value,
486
- entity_type=RelationNodeTypeMap[destination.ntype],
470
+ entity_type=relation_node_type_to_entity_type(destination.ntype),
487
471
  relation=relation_type,
488
472
  relation_label=relation_label,
489
473
  direction=RelationDirection.OUT,
@@ -493,7 +477,7 @@ def _merge_relations_results(
493
477
  relations.entities[destination.value].related_to.append(
494
478
  DirectionalRelation(
495
479
  entity=origin.value,
496
- entity_type=RelationNodeTypeMap[origin.ntype],
480
+ entity_type=relation_node_type_to_entity_type(origin.ntype),
497
481
  relation=relation_type,
498
482
  relation_label=relation_label,
499
483
  direction=RelationDirection.IN,
@@ -506,8 +490,7 @@ def _merge_relations_results(
506
490
  @merge_observer.wrap({"type": "merge"})
507
491
  async def merge_results(
508
492
  search_responses: list[SearchResponse],
509
- count: int,
510
- page: int,
493
+ top_k: int,
511
494
  kbid: str,
512
495
  show: list[ResourceProperties],
513
496
  field_type_filter: list[FieldTypeName],
@@ -530,77 +513,59 @@ async def merge_results(
530
513
 
531
514
  api_results = KnowledgeboxSearchResults()
532
515
 
533
- rcache = get_resource_cache(clear=True)
534
- try:
535
- resources: list[str] = list()
536
- api_results.fulltext = await merge_documents_results(
537
- documents, resources, count, page, kbid, sort, min_score=min_score.bm25
538
- )
516
+ resources: list[str] = list()
517
+ api_results.fulltext = await merge_documents_results(
518
+ documents, resources, top_k, kbid, sort, min_score=min_score.bm25
519
+ )
539
520
 
540
- api_results.paragraphs = await merge_paragraph_results(
541
- paragraphs,
542
- resources,
543
- kbid,
544
- count,
545
- page,
546
- highlight,
547
- sort,
548
- min_score=min_score.bm25,
549
- )
521
+ api_results.paragraphs = await merge_paragraph_results(
522
+ paragraphs,
523
+ resources,
524
+ kbid,
525
+ top_k,
526
+ highlight,
527
+ sort,
528
+ min_score=min_score.bm25,
529
+ )
550
530
 
551
- api_results.sentences = await merge_vectors_results(
552
- vectors, resources, kbid, count, page, min_score=min_score.semantic
553
- )
531
+ api_results.sentences = await merge_vectors_results(
532
+ vectors, resources, kbid, top_k, min_score=min_score.semantic
533
+ )
554
534
 
555
- api_results.relations = await merge_relations_results(
556
- relations, requested_relations
557
- )
535
+ api_results.relations = await merge_relations_results(relations, requested_relations)
558
536
 
559
- api_results.resources = await fetch_resources(
560
- resources, kbid, show, field_type_filter, extracted
561
- )
562
- return api_results
563
- finally:
564
- rcache.clear()
537
+ api_results.resources = await fetch_resources(resources, kbid, show, field_type_filter, extracted)
538
+ return api_results
565
539
 
566
540
 
567
541
  async def merge_paragraphs_results(
568
- paragraph_responses: list[ParagraphSearchResponse],
569
- count: int,
570
- page: int,
542
+ responses: list[SearchResponse],
543
+ top_k: int,
571
544
  kbid: str,
572
- show: list[ResourceProperties],
573
- field_type_filter: list[FieldTypeName],
574
- extracted: list[ExtractedDataTypeName],
575
545
  highlight_split: bool,
576
546
  min_score: float,
577
547
  ) -> ResourceSearchResults:
578
548
  paragraphs = []
579
- for result in paragraph_responses:
580
- paragraphs.append(result)
549
+ for result in responses:
550
+ paragraphs.append(result.paragraph)
581
551
 
582
552
  api_results = ResourceSearchResults()
583
553
 
584
- rcache = get_resource_cache(clear=True)
585
- try:
586
- resources: list[str] = list()
587
- api_results.paragraphs = await merge_paragraph_results(
588
- paragraphs,
589
- resources,
590
- kbid,
591
- count,
592
- page,
593
- highlight=highlight_split,
594
- sort=SortOptions(
595
- field=SortField.SCORE,
596
- order=SortOrder.DESC,
597
- limit=None,
598
- ),
599
- min_score=min_score,
600
- )
601
- return api_results
602
- finally:
603
- rcache.clear()
554
+ resources: list[str] = list()
555
+ api_results.paragraphs = await merge_paragraph_results(
556
+ paragraphs,
557
+ resources,
558
+ kbid,
559
+ top_k,
560
+ highlight=highlight_split,
561
+ sort=SortOptions(
562
+ field=SortField.SCORE,
563
+ order=SortOrder.DESC,
564
+ limit=None,
565
+ ),
566
+ min_score=min_score,
567
+ )
568
+ return api_results
604
569
 
605
570
 
606
571
  async def merge_suggest_entities_results(
@@ -609,8 +574,7 @@ async def merge_suggest_entities_results(
609
574
  unique_entities: Set[RelatedEntity] = set()
610
575
  for response in suggest_responses:
611
576
  response_entities = (
612
- RelatedEntity(family=e.subtype, value=e.value)
613
- for e in response.entity_results.nodes
577
+ RelatedEntity(family=e.subtype, value=e.value) for e in response.entity_results.nodes
614
578
  )
615
579
  unique_entities.update(response_entities)
616
580