nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,465 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import dataclasses
21
- import logging
22
- import os
23
- import time
24
- from typing import Union
25
-
26
- import backoff
27
- import docker # type: ignore
28
- import pytest
29
- from grpc import insecure_channel
30
- from grpc_health.v1 import health_pb2_grpc
31
- from grpc_health.v1.health_pb2 import HealthCheckRequest
32
- from nucliadb_protos.nodewriter_pb2 import EmptyQuery, ShardId
33
- from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
34
- from pytest_docker_fixtures import images # type: ignore
35
- from pytest_docker_fixtures.containers._base import BaseImage # type: ignore
36
- from pytest_lazy_fixtures import lazy_fixture
37
-
38
- from nucliadb.common.cluster.settings import settings as cluster_settings
39
- from nucliadb_utils.tests.conftest import get_testing_storage_backend
40
-
41
- logger = logging.getLogger(__name__)
42
-
43
- images.settings["nucliadb_node_reader"] = {
44
- "image": "europe-west4-docker.pkg.dev/nuclia-internal/nuclia/node",
45
- "version": "latest",
46
- "env": {
47
- "HOST_KEY_PATH": "/data/node.key",
48
- "DATA_PATH": "/data",
49
- "READER_LISTEN_ADDRESS": "0.0.0.0:4445",
50
- "NUCLIADB_DISABLE_ANALYTICS": "True",
51
- "RUST_BACKTRACE": "full",
52
- "DEBUG": "1",
53
- "RUST_LOG": "nucliadb_*=DEBUG",
54
- },
55
- "options": {
56
- "command": [
57
- "/usr/local/bin/node_reader",
58
- ],
59
- "ports": {"4445": None},
60
- "mem_limit": "3g", # default is 1g, need to override
61
- "platform": "linux/amd64",
62
- },
63
- }
64
-
65
- images.settings["nucliadb_node_writer"] = {
66
- "image": "europe-west4-docker.pkg.dev/nuclia-internal/nuclia/node",
67
- "version": "latest",
68
- "env": {
69
- "HOST_KEY_PATH": "/data/node.key",
70
- "DATA_PATH": "/data",
71
- "WRITER_LISTEN_ADDRESS": "0.0.0.0:4446",
72
- "NUCLIADB_DISABLE_ANALYTICS": "True",
73
- "RUST_BACKTRACE": "full",
74
- "DEBUG": "1",
75
- "RUST_LOG": "nucliadb_*=DEBUG",
76
- },
77
- "options": {
78
- "command": [
79
- "/usr/local/bin/node_writer",
80
- ],
81
- "ports": {"4446": None},
82
- "mem_limit": "3g", # default is 1g, need to override
83
- "platform": "linux/amd64",
84
- },
85
- }
86
-
87
- images.settings["nucliadb_node_sidecar"] = {
88
- "image": "europe-west4-docker.pkg.dev/nuclia-internal/nuclia/node_sidecar",
89
- "version": "latest",
90
- "env": {
91
- "INDEX_JETSTREAM_SERVERS": "[]",
92
- "CACHE_PUBSUB_NATS_URL": "",
93
- "HOST_KEY_PATH": "/data/node.key",
94
- "DATA_PATH": "/data",
95
- "SIDECAR_LISTEN_ADDRESS": "0.0.0.0:4447",
96
- "READER_LISTEN_ADDRESS": "0.0.0.0:4445",
97
- "WRITER_LISTEN_ADDRESS": "0.0.0.0:4446",
98
- "PYTHONUNBUFFERED": "1",
99
- "LOG_LEVEL": "DEBUG",
100
- "DEBUG": "1",
101
- },
102
- "options": {
103
- "command": [
104
- "node_sidecar",
105
- ],
106
- "ports": {"4447": None},
107
- "platform": "linux/amd64",
108
- },
109
- }
110
-
111
-
112
- def get_container_host(container_obj):
113
- return container_obj.attrs["NetworkSettings"]["IPAddress"]
114
-
115
-
116
- class nucliadbNodeReader(BaseImage):
117
- name = "nucliadb_node_reader"
118
- port = 4445
119
-
120
- def run(self, volume):
121
- self._volume = volume
122
- self._mount = "/data"
123
- return super(nucliadbNodeReader, self).run()
124
-
125
- def get_image_options(self):
126
- options = super(nucliadbNodeReader, self).get_image_options()
127
- options["volumes"] = {self._volume.name: {"bind": "/data"}}
128
- return options
129
-
130
- def check(self):
131
- channel = insecure_channel(f"{self.host}:{self.get_port()}")
132
- stub = health_pb2_grpc.HealthStub(channel)
133
- pb = HealthCheckRequest(service="nodereader.NodeReader")
134
- try:
135
- result = stub.Check(pb)
136
- return result.status == 1
137
- except: # noqa
138
- return False
139
-
140
-
141
- class nucliadbNodeWriter(BaseImage):
142
- name = "nucliadb_node_writer"
143
- port = 4446
144
-
145
- def run(self, volume):
146
- self._volume = volume
147
- self._mount = "/data"
148
- return super(nucliadbNodeWriter, self).run()
149
-
150
- def get_image_options(self):
151
- options = super(nucliadbNodeWriter, self).get_image_options()
152
- options["volumes"] = {self._volume.name: {"bind": "/data"}}
153
- return options
154
-
155
- def check(self):
156
- channel = insecure_channel(f"{self.host}:{self.get_port()}")
157
- stub = health_pb2_grpc.HealthStub(channel)
158
- pb = HealthCheckRequest(service="nodewriter.NodeWriter")
159
- try:
160
- result = stub.Check(pb)
161
- return result.status == 1
162
- except: # noqa
163
- return False
164
-
165
-
166
- class nucliadbNodeSidecar(BaseImage):
167
- name = "nucliadb_node_sidecar"
168
- port = 4447
169
-
170
- def run(self, volume):
171
- self._volume = volume
172
- self._mount = "/data"
173
- return super(nucliadbNodeSidecar, self).run()
174
-
175
- def get_image_options(self):
176
- options = super(nucliadbNodeSidecar, self).get_image_options()
177
- options["volumes"] = {self._volume.name: {"bind": "/data"}}
178
- return options
179
-
180
- def check(self):
181
- channel = insecure_channel(f"{self.host}:{self.get_port()}")
182
- stub = health_pb2_grpc.HealthStub(channel)
183
- pb = HealthCheckRequest(service="")
184
- try:
185
- result = stub.Check(pb)
186
- return result.status == 1
187
- except: # noqa
188
- return False
189
-
190
-
191
- nucliadb_node_1_reader = nucliadbNodeReader()
192
- nucliadb_node_1_writer = nucliadbNodeWriter()
193
- nucliadb_node_1_sidecar = nucliadbNodeSidecar()
194
-
195
- nucliadb_node_2_reader = nucliadbNodeReader()
196
- nucliadb_node_2_writer = nucliadbNodeWriter()
197
- nucliadb_node_2_sidecar = nucliadbNodeSidecar()
198
-
199
-
200
- @dataclasses.dataclass
201
- class NodeS3Storage:
202
- server: str
203
-
204
- def envs(self):
205
- return {
206
- "FILE_BACKEND": "s3",
207
- "S3_CLIENT_ID": "",
208
- "S3_CLIENT_SECRET": "",
209
- "S3_BUCKET": "test",
210
- "S3_INDEXING_BUCKET": "indexing",
211
- "S3_DEADLETTER_BUCKET": "deadletter",
212
- "S3_ENDPOINT": self.server,
213
- }
214
-
215
-
216
- @dataclasses.dataclass
217
- class NodeGCSStorage:
218
- server: str
219
-
220
- def envs(self):
221
- return {
222
- "FILE_BACKEND": "gcs",
223
- "GCS_BUCKET": "test",
224
- "GCS_INDEXING_BUCKET": "indexing",
225
- "GCS_DEADLETTER_BUCKET": "deadletter",
226
- "GCS_ENDPOINT_URL": self.server,
227
- }
228
-
229
-
230
- NodeStorage = Union[NodeGCSStorage, NodeS3Storage]
231
-
232
-
233
- class _NodeRunner:
234
- def __init__(self, natsd, storage: NodeStorage):
235
- self.docker_client = docker.from_env(version=BaseImage.docker_version)
236
- self.natsd = natsd
237
- self.storage = storage
238
- self.data = {} # type: ignore
239
-
240
- def start(self):
241
- docker_platform_name = self.docker_client.api.version()["Platform"][
242
- "Name"
243
- ].upper()
244
- if "GITHUB_ACTION" not in os.environ and (
245
- "DESKTOP" in docker_platform_name
246
- # newer versions use community
247
- or "DOCKER ENGINE - COMMUNITY" == docker_platform_name
248
- ):
249
- # Valid when using Docker desktop
250
- docker_internal_host = "host.docker.internal"
251
- else:
252
- # Valid when using github actions
253
- docker_internal_host = "172.17.0.1"
254
-
255
- self.volume_node_1 = self.docker_client.volumes.create(driver="local")
256
- self.volume_node_2 = self.docker_client.volumes.create(driver="local")
257
-
258
- writer1_host, writer1_port = nucliadb_node_1_writer.run(self.volume_node_1)
259
- writer2_host, writer2_port = nucliadb_node_2_writer.run(self.volume_node_2)
260
-
261
- reader1_host, reader1_port = nucliadb_node_1_reader.run(self.volume_node_1)
262
- reader2_host, reader2_port = nucliadb_node_2_reader.run(self.volume_node_2)
263
-
264
- natsd_server = self.natsd.replace("localhost", docker_internal_host)
265
- images.settings["nucliadb_node_sidecar"]["env"].update(
266
- {
267
- "INDEX_JETSTREAM_SERVERS": f'["{natsd_server}"]',
268
- "CACHE_PUBSUB_NATS_URL": f'["{natsd_server}"]',
269
- "READER_LISTEN_ADDRESS": f"{docker_internal_host}:{reader1_port}",
270
- "WRITER_LISTEN_ADDRESS": f"{docker_internal_host}:{writer1_port}",
271
- }
272
- )
273
- self.storage.server = self.storage.server.replace(
274
- "localhost", docker_internal_host
275
- )
276
- images.settings["nucliadb_node_sidecar"]["env"].update(self.storage.envs())
277
-
278
- sidecar1_host, sidecar1_port = nucliadb_node_1_sidecar.run(self.volume_node_1)
279
-
280
- images.settings["nucliadb_node_sidecar"]["env"][
281
- "READER_LISTEN_ADDRESS"
282
- ] = f"{docker_internal_host}:{reader2_port}"
283
- images.settings["nucliadb_node_sidecar"]["env"][
284
- "WRITER_LISTEN_ADDRESS"
285
- ] = f"{docker_internal_host}:{writer2_port}"
286
-
287
- sidecar2_host, sidecar2_port = nucliadb_node_2_sidecar.run(self.volume_node_2)
288
-
289
- writer1_internal_host = get_container_host(nucliadb_node_1_writer.container_obj)
290
- writer2_internal_host = get_container_host(nucliadb_node_2_writer.container_obj)
291
-
292
- self.data.update(
293
- {
294
- "writer1_internal_host": writer1_internal_host,
295
- "writer2_internal_host": writer2_internal_host,
296
- "writer1": {
297
- "host": writer1_host,
298
- "port": writer1_port,
299
- },
300
- "writer2": {
301
- "host": writer2_host,
302
- "port": writer2_port,
303
- },
304
- "reader1": {
305
- "host": reader1_host,
306
- "port": reader1_port,
307
- },
308
- "reader2": {
309
- "host": reader2_host,
310
- "port": reader2_port,
311
- },
312
- "sidecar1": {
313
- "host": sidecar1_host,
314
- "port": sidecar1_port,
315
- },
316
- "sidecar2": {
317
- "host": sidecar2_host,
318
- "port": sidecar2_port,
319
- },
320
- }
321
- )
322
- return self.data
323
-
324
- def stop(self):
325
- container_ids = [
326
- nucliadb_node_1_reader.container_obj.id,
327
- nucliadb_node_1_writer.container_obj.id,
328
- nucliadb_node_1_sidecar.container_obj.id,
329
- nucliadb_node_2_writer.container_obj.id,
330
- nucliadb_node_2_reader.container_obj.id,
331
- nucliadb_node_2_sidecar.container_obj.id,
332
- ]
333
- nucliadb_node_1_reader.stop()
334
- nucliadb_node_1_writer.stop()
335
- nucliadb_node_1_sidecar.stop()
336
- nucliadb_node_2_writer.stop()
337
- nucliadb_node_2_reader.stop()
338
- nucliadb_node_2_sidecar.stop()
339
-
340
- for container_id in container_ids:
341
- for _ in range(5):
342
- try:
343
- self.docker_client.containers.get(container_id) # type: ignore
344
- except docker.errors.NotFound:
345
- break
346
- time.sleep(2)
347
-
348
- self.volume_node_1.remove()
349
- self.volume_node_2.remove()
350
-
351
- def setup_env(self):
352
- # reset on every test run in case something touches it
353
- cluster_settings.writer_port_map = {
354
- self.data["writer1_internal_host"]: self.data["writer1"]["port"],
355
- self.data["writer2_internal_host"]: self.data["writer2"]["port"],
356
- }
357
- cluster_settings.reader_port_map = {
358
- self.data["writer1_internal_host"]: self.data["reader1"]["port"],
359
- self.data["writer2_internal_host"]: self.data["reader2"]["port"],
360
- }
361
-
362
- cluster_settings.node_writer_port = None # type: ignore
363
- cluster_settings.node_reader_port = None # type: ignore
364
-
365
- cluster_settings.cluster_discovery_mode = "manual"
366
- cluster_settings.cluster_discovery_manual_addresses = [
367
- self.data["writer1_internal_host"],
368
- self.data["writer2_internal_host"],
369
- ]
370
-
371
-
372
- @pytest.fixture(scope="session")
373
- def gcs_node_storage(gcs):
374
- return NodeGCSStorage(server=gcs)
375
-
376
-
377
- @pytest.fixture(scope="session")
378
- def s3_node_storage(s3):
379
- return NodeS3Storage(server=s3)
380
-
381
-
382
- def lazy_load_storage_backend():
383
- backend = get_testing_storage_backend()
384
- if backend == "gcs":
385
- return [lazy_fixture.lf("gcs_node_storage")]
386
- elif backend == "s3":
387
- return [lazy_fixture.lf("s3_node_storage")]
388
- else:
389
- print(f"Unknown storage backend {backend}, using gcs")
390
- return [lazy_fixture.lf("gcs_node_storage")]
391
-
392
-
393
- @pytest.fixture(scope="session", params=lazy_load_storage_backend())
394
- def node_storage(request):
395
- return request.param
396
-
397
-
398
- @pytest.fixture(scope="session", autouse=False)
399
- def _node(natsd: str, node_storage):
400
- nr = _NodeRunner(natsd, node_storage)
401
- try:
402
- cluster_info = nr.start()
403
- except Exception:
404
- nr.stop()
405
- raise
406
- nr.setup_env()
407
- yield cluster_info
408
- nr.stop()
409
-
410
-
411
- @pytest.fixture(scope="function")
412
- def node(_node, request):
413
- # clean up all shard data before each test
414
- channel1 = insecure_channel(
415
- f"{_node['writer1']['host']}:{_node['writer1']['port']}"
416
- )
417
- channel2 = insecure_channel(
418
- f"{_node['writer2']['host']}:{_node['writer2']['port']}"
419
- )
420
- writer1 = NodeWriterStub(channel1)
421
- writer2 = NodeWriterStub(channel2)
422
-
423
- logger.debug("cleaning up shards data")
424
- try:
425
- cleanup_node(writer1)
426
- cleanup_node(writer2)
427
- except Exception:
428
- logger.error(
429
- "Error cleaning up shards data. Maybe the node fixture could not start properly?",
430
- exc_info=True,
431
- )
432
-
433
- client = docker.client.from_env()
434
- containers_by_port = {}
435
- for container in client.containers.list():
436
- name = container.name
437
- command = container.attrs["Config"]["Cmd"]
438
- ports = container.ports
439
- print(f"container {name} executing {command} is using ports: {ports}")
440
-
441
- for internal_port in container.ports:
442
- for host in container.ports[internal_port]:
443
- port = host["HostPort"]
444
- port_containers = containers_by_port.setdefault(port, [])
445
- if container not in port_containers:
446
- port_containers.append(container)
447
-
448
- for port, containers in containers_by_port.items():
449
- if len(containers) > 1:
450
- names = ", ".join([container.name for container in containers])
451
- print(f"ATENTION! Containers {names} share port {port}!")
452
- raise
453
- finally:
454
- channel1.close()
455
- channel2.close()
456
-
457
- yield _node
458
-
459
-
460
- @backoff.on_exception(
461
- backoff.expo, Exception, jitter=backoff.random_jitter, max_tries=5
462
- )
463
- def cleanup_node(writer: NodeWriterStub):
464
- for shard in writer.ListShards(EmptyQuery()).ids:
465
- writer.DeleteShard(ShardId(id=shard.id))
@@ -1,18 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,67 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from nucliadb_protos.resources_pb2 import FieldComputedMetadata, Paragraph
21
- from nucliadb_protos.utils_pb2 import ExtractedText
22
-
23
- from nucliadb.search.api.v1.resource.ask import (
24
- get_field_blocks,
25
- get_field_blocks_split_by_paragraphs,
26
- )
27
-
28
-
29
- def test_get_field_blocks():
30
- etxt = ExtractedText(text="Hello World")
31
- assert get_field_blocks(etxt) == ["Hello World"]
32
-
33
- # split text
34
- etxt = ExtractedText()
35
- etxt.split_text["foo"] = "Hello World"
36
- etxt.split_text["bar"] = "I am here"
37
- assert get_field_blocks(etxt) == ["I am here", "Hello World"]
38
-
39
-
40
- def test_get_field_blocks_split_by_paragraphs():
41
- etxt = ExtractedText(text="Hello World")
42
- fcm = FieldComputedMetadata()
43
- p1 = Paragraph(start=0, end=5)
44
- p2 = Paragraph(start=6, end=11)
45
- fcm.metadata.paragraphs.append(p1)
46
- fcm.metadata.paragraphs.append(p2)
47
- assert get_field_blocks_split_by_paragraphs(etxt, fcm) == ["Hello", "World"]
48
-
49
- # split text
50
- etxt = ExtractedText()
51
- etxt.split_text["foo"] = "Hello World"
52
- etxt.split_text["bar"] = "I am here"
53
- fcm = FieldComputedMetadata()
54
- p1 = Paragraph(start=0, end=5)
55
- p2 = Paragraph(start=6, end=11)
56
- p3 = Paragraph(start=0, end=4)
57
- p4 = Paragraph(start=5, end=9)
58
- fcm.split_metadata["foo"].paragraphs.append(p1)
59
- fcm.split_metadata["foo"].paragraphs.append(p2)
60
- fcm.split_metadata["bar"].paragraphs.append(p3)
61
- fcm.split_metadata["bar"].paragraphs.append(p4)
62
- assert get_field_blocks_split_by_paragraphs(etxt, fcm) == [
63
- "I am",
64
- "here",
65
- "Hello",
66
- "World",
67
- ]