nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,20 +17,16 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
|
20
|
-
import asyncio
|
21
20
|
import logging
|
22
21
|
import re
|
23
22
|
import string
|
24
23
|
from typing import Optional
|
25
24
|
|
26
|
-
from
|
27
|
-
|
25
|
+
from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB, ParagraphId
|
28
26
|
from nucliadb.ingest.fields.base import Field
|
29
|
-
from nucliadb.ingest.orm.resource import KB_REVERSE
|
30
27
|
from nucliadb.ingest.orm.resource import Resource as ResourceORM
|
31
|
-
from
|
32
|
-
|
33
|
-
from .cache import get_resource_from_cache
|
28
|
+
from nucliadb.search.search import cache
|
29
|
+
from nucliadb_telemetry import errors, metrics
|
34
30
|
|
35
31
|
logger = logging.getLogger(__name__)
|
36
32
|
PRE_WORD = string.punctuation + " "
|
@@ -56,62 +52,6 @@ GET_PARAGRAPH_LATENCY = metrics.Observer(
|
|
56
52
|
)
|
57
53
|
|
58
54
|
|
59
|
-
EXTRACTED_CACHE_OPS = metrics.Counter(
|
60
|
-
"nucliadb_extracted_text_cache_ops", labels={"type": ""}
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
|
-
class ExtractedTextCache:
|
65
|
-
"""
|
66
|
-
Used to cache extracted text from a resource in memory during
|
67
|
-
the process of search results serialization.
|
68
|
-
"""
|
69
|
-
|
70
|
-
def __init__(self):
|
71
|
-
self.locks = {}
|
72
|
-
self.values = {}
|
73
|
-
|
74
|
-
def get_value(self, key: str) -> Optional[ExtractedText]:
|
75
|
-
return self.values.get(key)
|
76
|
-
|
77
|
-
def get_lock(self, key: str) -> asyncio.Lock:
|
78
|
-
return self.locks.setdefault(key, asyncio.Lock())
|
79
|
-
|
80
|
-
def set_value(self, key: str, value: ExtractedText) -> None:
|
81
|
-
self.values[key] = value
|
82
|
-
|
83
|
-
def clear(self):
|
84
|
-
self.values.clear()
|
85
|
-
self.locks.clear()
|
86
|
-
|
87
|
-
|
88
|
-
async def get_field_extracted_text(
|
89
|
-
field: Field, cache: Optional[ExtractedTextCache] = None
|
90
|
-
) -> Optional[ExtractedText]:
|
91
|
-
if cache is None:
|
92
|
-
return await field.get_extracted_text()
|
93
|
-
|
94
|
-
key = f"{field.kbid}/{field.uuid}/{field.id}"
|
95
|
-
extracted_text = cache.get_value(key)
|
96
|
-
if extracted_text is not None:
|
97
|
-
EXTRACTED_CACHE_OPS.inc({"type": "hit"})
|
98
|
-
return extracted_text
|
99
|
-
|
100
|
-
async with cache.get_lock(key):
|
101
|
-
# Check again in case another task already fetched it
|
102
|
-
extracted_text = cache.get_value(key)
|
103
|
-
if extracted_text is not None:
|
104
|
-
EXTRACTED_CACHE_OPS.inc({"type": "hit"})
|
105
|
-
return extracted_text
|
106
|
-
|
107
|
-
EXTRACTED_CACHE_OPS.inc({"type": "miss"})
|
108
|
-
extracted_text = await field.get_extracted_text()
|
109
|
-
if extracted_text is not None:
|
110
|
-
# Only cache if we actually have extracted text
|
111
|
-
cache.set_value(key, extracted_text)
|
112
|
-
return extracted_text
|
113
|
-
|
114
|
-
|
115
55
|
@GET_PARAGRAPH_LATENCY.wrap({"type": "full"})
|
116
56
|
async def get_paragraph_from_full_text(
|
117
57
|
*,
|
@@ -119,16 +59,23 @@ async def get_paragraph_from_full_text(
|
|
119
59
|
start: int,
|
120
60
|
end: int,
|
121
61
|
split: Optional[str] = None,
|
122
|
-
|
62
|
+
log_on_missing_field: bool = True,
|
123
63
|
) -> str:
|
124
64
|
"""
|
125
65
|
Pull paragraph from full text stored in database.
|
126
66
|
|
127
67
|
This requires downloading the full text and then slicing it.
|
128
68
|
"""
|
129
|
-
extracted_text = await get_field_extracted_text(field
|
69
|
+
extracted_text = await cache.get_field_extracted_text(field)
|
130
70
|
if extracted_text is None:
|
131
|
-
|
71
|
+
if log_on_missing_field:
|
72
|
+
logger.warning(
|
73
|
+
"Extracted_text for field does not exist on DB. This should not happen.",
|
74
|
+
extra={
|
75
|
+
"field_id": field.resource_unique_id,
|
76
|
+
"kbid": field.kbid,
|
77
|
+
},
|
78
|
+
)
|
132
79
|
return ""
|
133
80
|
|
134
81
|
if split not in (None, ""):
|
@@ -141,39 +88,46 @@ async def get_paragraph_from_full_text(
|
|
141
88
|
async def get_paragraph_text(
|
142
89
|
*,
|
143
90
|
kbid: str,
|
144
|
-
|
145
|
-
field: str,
|
146
|
-
start: int,
|
147
|
-
end: int,
|
148
|
-
split: Optional[str] = None,
|
91
|
+
paragraph_id: ParagraphId,
|
149
92
|
highlight: bool = False,
|
150
93
|
ematches: Optional[list[str]] = None,
|
151
94
|
matches: Optional[list[str]] = None,
|
152
95
|
orm_resource: Optional[
|
153
96
|
ResourceORM
|
154
97
|
] = None, # allow passing in orm_resource to avoid extra DB calls or txn issues
|
155
|
-
|
98
|
+
log_on_missing_field: bool = True,
|
156
99
|
) -> str:
|
100
|
+
rid = paragraph_id.rid
|
101
|
+
field_type = paragraph_id.field_id.type
|
102
|
+
field_key = paragraph_id.field_id.key
|
103
|
+
|
157
104
|
if orm_resource is None:
|
158
|
-
orm_resource = await
|
105
|
+
orm_resource = await cache.get_resource(kbid, rid)
|
159
106
|
if orm_resource is None:
|
160
|
-
|
107
|
+
if log_on_missing_field:
|
108
|
+
logger.warning(
|
109
|
+
"Resource does not exist on DB. This should not happen.",
|
110
|
+
extra={"resource_id": rid, "kbid": kbid, "field": f"{field_type}/{field_key}"},
|
111
|
+
)
|
161
112
|
return ""
|
162
113
|
|
163
|
-
|
164
|
-
field_type_int =
|
165
|
-
field_obj = await orm_resource.get_field(field, field_type_int, load=False)
|
114
|
+
field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
|
115
|
+
field_obj = await orm_resource.get_field(field_key, field_type_int, load=False)
|
166
116
|
|
167
117
|
text = await get_paragraph_from_full_text(
|
168
118
|
field=field_obj,
|
169
|
-
start=
|
170
|
-
end=
|
171
|
-
split=
|
172
|
-
|
119
|
+
start=paragraph_id.paragraph_start,
|
120
|
+
end=paragraph_id.paragraph_end,
|
121
|
+
split=paragraph_id.field_id.subfield_id,
|
122
|
+
log_on_missing_field=log_on_missing_field,
|
173
123
|
)
|
174
124
|
|
175
125
|
if highlight:
|
176
|
-
|
126
|
+
try:
|
127
|
+
text = highlight_paragraph(text, words=matches, ematches=ematches)
|
128
|
+
except Exception as ex:
|
129
|
+
errors.capture_exception(ex)
|
130
|
+
logger.exception("Error highlighting paragraph", extra={"kbid": kbid})
|
177
131
|
return text
|
178
132
|
|
179
133
|
|
@@ -191,19 +145,17 @@ async def get_text_sentence(
|
|
191
145
|
Leave separated from get paragraph for now until we understand the differences
|
192
146
|
better.
|
193
147
|
"""
|
194
|
-
orm_resource = await
|
148
|
+
orm_resource = await cache.get_resource(kbid, rid)
|
195
149
|
|
196
150
|
if orm_resource is None:
|
197
151
|
logger.warning(f"{rid} does not exist on DB")
|
198
152
|
return ""
|
199
153
|
|
200
|
-
field_type_int =
|
154
|
+
field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
|
201
155
|
field_obj = await orm_resource.get_field(field, field_type_int, load=False)
|
202
156
|
extracted_text = await field_obj.get_extracted_text()
|
203
157
|
if extracted_text is None:
|
204
|
-
logger.info(
|
205
|
-
f"{rid} {field} {field_type_int} extracted_text does not exist on DB"
|
206
|
-
)
|
158
|
+
logger.info(f"{rid} {field} {field_type_int} extracted_text does not exist on DB")
|
207
159
|
return ""
|
208
160
|
start = start - 1
|
209
161
|
if start < 0:
|
@@ -216,36 +168,42 @@ async def get_text_sentence(
|
|
216
168
|
return splitted_text
|
217
169
|
|
218
170
|
|
219
|
-
def get_regex(some_string: str) -> str:
|
220
|
-
return r"\b" + some_string.lower() + r"\b"
|
221
|
-
|
222
|
-
|
223
171
|
def highlight_paragraph(
|
224
172
|
text: str, words: Optional[list[str]] = None, ematches: Optional[list[str]] = None
|
225
173
|
) -> str:
|
174
|
+
"""
|
175
|
+
Highlight `text` with <mark></mark> tags around the words in `words` and `ematches`.
|
176
|
+
|
177
|
+
Parameters:
|
178
|
+
- text: The text to highlight.
|
179
|
+
- words: A list of words to highlight.
|
180
|
+
- ematches: A list of exact matches to highlight.
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
- The highlighted text.
|
184
|
+
"""
|
185
|
+
REGEX_TEMPLATE = r"(^|\s)({text})(\s|$)"
|
226
186
|
text_lower = text.lower()
|
227
187
|
|
228
188
|
marks = [0] * (len(text_lower) + 1)
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
)
|
241
|
-
continue
|
189
|
+
ematches = ematches or []
|
190
|
+
for quote in ematches:
|
191
|
+
quote_regex = REGEX_TEMPLATE.format(text=re.escape(quote.lower()))
|
192
|
+
try:
|
193
|
+
for match in re.finditer(quote_regex, text_lower):
|
194
|
+
start, end = match.span(2)
|
195
|
+
marks[start] = 1
|
196
|
+
marks[end] = 2
|
197
|
+
except re.error:
|
198
|
+
logger.warning(f"Regex errors while highlighting text. Regex: {quote_regex}")
|
199
|
+
continue
|
242
200
|
|
243
201
|
words = words or []
|
244
202
|
for word in words:
|
245
|
-
word_regex =
|
203
|
+
word_regex = REGEX_TEMPLATE.format(text=re.escape(word.lower()))
|
246
204
|
try:
|
247
205
|
for match in re.finditer(word_regex, text_lower):
|
248
|
-
start, end = match.span()
|
206
|
+
start, end = match.span(2)
|
249
207
|
if marks[start] == 0 and marks[end] == 0:
|
250
208
|
marks[start] = 1
|
251
209
|
marks[end] = 2
|
@@ -0,0 +1,233 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
import logging
|
22
|
+
from collections import defaultdict
|
23
|
+
from typing import Any, cast
|
24
|
+
|
25
|
+
from psycopg.rows import dict_row
|
26
|
+
|
27
|
+
from nucliadb.common.maindb.pg import PGDriver
|
28
|
+
from nucliadb.common.maindb.utils import get_driver
|
29
|
+
from nucliadb.search.search.query_parser.models import CatalogQuery
|
30
|
+
from nucliadb_models.labels import translate_system_to_alias_label
|
31
|
+
from nucliadb_models.metadata import ResourceProcessingStatus
|
32
|
+
from nucliadb_models.search import (
|
33
|
+
ResourceResult,
|
34
|
+
Resources,
|
35
|
+
SortField,
|
36
|
+
SortOrder,
|
37
|
+
)
|
38
|
+
from nucliadb_telemetry import metrics
|
39
|
+
|
40
|
+
from .filters import translate_label
|
41
|
+
|
42
|
+
observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
|
43
|
+
logger = logging.getLogger(__name__)
|
44
|
+
|
45
|
+
|
46
|
+
def _filter_operands(operands):
|
47
|
+
literals = []
|
48
|
+
nonliterals = []
|
49
|
+
for operand in operands:
|
50
|
+
op, params = next(iter(operand.items()))
|
51
|
+
if op == "literal":
|
52
|
+
literals.append(params)
|
53
|
+
else:
|
54
|
+
nonliterals.append(operand)
|
55
|
+
|
56
|
+
return literals, nonliterals
|
57
|
+
|
58
|
+
|
59
|
+
def _convert_filter(filter, filter_params):
|
60
|
+
op, operands = next(iter(filter.items()))
|
61
|
+
if op == "literal":
|
62
|
+
param_name = f"param{len(filter_params)}"
|
63
|
+
filter_params[param_name] = [operands]
|
64
|
+
return f"labels @> %({param_name})s"
|
65
|
+
elif op in ("and", "or"):
|
66
|
+
array_op = "@>" if op == "and" else "&&"
|
67
|
+
sql = []
|
68
|
+
literals, nonliterals = _filter_operands(operands)
|
69
|
+
if literals:
|
70
|
+
param_name = f"param{len(filter_params)}"
|
71
|
+
filter_params[param_name] = literals
|
72
|
+
sql.append(f"labels {array_op} %({param_name})s")
|
73
|
+
for nonlit in nonliterals:
|
74
|
+
sql.append(_convert_filter(nonlit, filter_params))
|
75
|
+
return "(" + f" {op.upper()} ".join(sql) + ")"
|
76
|
+
elif op == "not":
|
77
|
+
return f"(NOT {_convert_filter(operands, filter_params)})"
|
78
|
+
else:
|
79
|
+
raise ValueError(f"Invalid operator {op}")
|
80
|
+
|
81
|
+
|
82
|
+
def _prepare_query(catalog_query: CatalogQuery):
|
83
|
+
filter_sql = ["kbid = %(kbid)s"]
|
84
|
+
filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}
|
85
|
+
|
86
|
+
if catalog_query.query:
|
87
|
+
# This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
|
88
|
+
# the python code at update/query time if it ever becomes a problem but for now, a single regex
|
89
|
+
# executed per query is not a problem.
|
90
|
+
filter_sql.append(
|
91
|
+
"regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
|
92
|
+
)
|
93
|
+
filter_params["query"] = catalog_query.query
|
94
|
+
|
95
|
+
if catalog_query.filters.creation.after:
|
96
|
+
filter_sql.append("created_at > %(created_at_start)s")
|
97
|
+
filter_params["created_at_start"] = catalog_query.filters.creation.after
|
98
|
+
|
99
|
+
if catalog_query.filters.creation.before:
|
100
|
+
filter_sql.append("created_at < %(created_at_end)s")
|
101
|
+
filter_params["created_at_end"] = catalog_query.filters.creation.before
|
102
|
+
|
103
|
+
if catalog_query.filters.modification.after:
|
104
|
+
filter_sql.append("modified_at > %(modified_at_start)s")
|
105
|
+
filter_params["modified_at_start"] = catalog_query.filters.modification.after
|
106
|
+
|
107
|
+
if catalog_query.filters.modification.before:
|
108
|
+
filter_sql.append("modified_at < %(modified_at_end)s")
|
109
|
+
filter_params["modified_at_end"] = catalog_query.filters.modification.before
|
110
|
+
|
111
|
+
if catalog_query.filters.labels:
|
112
|
+
filter_sql.append(_convert_filter(catalog_query.filters.labels, filter_params))
|
113
|
+
|
114
|
+
order_sql = ""
|
115
|
+
if catalog_query.sort:
|
116
|
+
if catalog_query.sort.field == SortField.CREATED:
|
117
|
+
order_field = "created_at"
|
118
|
+
elif catalog_query.sort.field == SortField.MODIFIED:
|
119
|
+
order_field = "modified_at"
|
120
|
+
elif catalog_query.sort.field == SortField.TITLE:
|
121
|
+
order_field = "title"
|
122
|
+
else:
|
123
|
+
# Deprecated order by score, use created_at instead
|
124
|
+
order_field = "created_at"
|
125
|
+
|
126
|
+
if catalog_query.sort.order == SortOrder.ASC:
|
127
|
+
order_dir = "ASC"
|
128
|
+
else:
|
129
|
+
order_dir = "DESC"
|
130
|
+
|
131
|
+
order_sql = f" ORDER BY {order_field} {order_dir}"
|
132
|
+
|
133
|
+
if catalog_query.filters.with_status:
|
134
|
+
filter_sql.append("labels && %(status)s")
|
135
|
+
if catalog_query.filters.with_status == ResourceProcessingStatus.PROCESSED:
|
136
|
+
filter_params["status"] = ["/n/s/PROCESSED", "/n/s/ERROR"]
|
137
|
+
else:
|
138
|
+
filter_params["status"] = ["/n/s/PENDING"]
|
139
|
+
|
140
|
+
return (
|
141
|
+
f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}{order_sql}",
|
142
|
+
filter_params,
|
143
|
+
)
|
144
|
+
|
145
|
+
|
146
|
+
def _pg_driver() -> PGDriver:
|
147
|
+
return cast(PGDriver, get_driver())
|
148
|
+
|
149
|
+
|
150
|
+
@observer.wrap({"op": "search"})
|
151
|
+
async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
|
152
|
+
# Prepare SQL query
|
153
|
+
query, query_params = _prepare_query(catalog_query)
|
154
|
+
|
155
|
+
async with _pg_driver()._get_connection() as conn, conn.cursor(row_factory=dict_row) as cur:
|
156
|
+
facets = {}
|
157
|
+
|
158
|
+
# Faceted search
|
159
|
+
if catalog_query.faceted:
|
160
|
+
with observer({"op": "facets"}):
|
161
|
+
tmp_facets: dict[str, dict[str, int]] = {
|
162
|
+
translate_label(f): defaultdict(int) for f in catalog_query.faceted
|
163
|
+
}
|
164
|
+
facet_filters = " OR ".join(f"label LIKE '{f}/%%'" for f in tmp_facets.keys())
|
165
|
+
for facet in tmp_facets.keys():
|
166
|
+
if not (
|
167
|
+
facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")
|
168
|
+
):
|
169
|
+
logger.warn(
|
170
|
+
f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}"
|
171
|
+
)
|
172
|
+
|
173
|
+
await cur.execute(
|
174
|
+
f"SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1",
|
175
|
+
query_params,
|
176
|
+
)
|
177
|
+
|
178
|
+
for row in await cur.fetchall():
|
179
|
+
label = row["label"]
|
180
|
+
label_parts = label.split("/")
|
181
|
+
parent = "/".join(label_parts[:-1])
|
182
|
+
count = row["count"]
|
183
|
+
if parent in tmp_facets:
|
184
|
+
tmp_facets[parent][translate_system_to_alias_label(label)] = count
|
185
|
+
|
186
|
+
# No need to get recursive because our facets are at most 3 levels deep (e.g: /l/set/label)
|
187
|
+
if len(label_parts) >= 3:
|
188
|
+
grandparent = "/".join(label_parts[:-2])
|
189
|
+
if grandparent in tmp_facets:
|
190
|
+
tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
|
191
|
+
|
192
|
+
facets = {translate_system_to_alias_label(k): v for k, v in tmp_facets.items()}
|
193
|
+
|
194
|
+
# Totals
|
195
|
+
with observer({"op": "totals"}):
|
196
|
+
await cur.execute(
|
197
|
+
f"SELECT COUNT(*) FROM ({query}) fc",
|
198
|
+
query_params,
|
199
|
+
)
|
200
|
+
total = (await cur.fetchone())["count"] # type: ignore
|
201
|
+
|
202
|
+
# Query
|
203
|
+
with observer({"op": "query"}):
|
204
|
+
offset = catalog_query.page_size * catalog_query.page_number
|
205
|
+
await cur.execute(
|
206
|
+
f"{query} LIMIT %(page_size)s OFFSET %(offset)s",
|
207
|
+
{
|
208
|
+
**query_params,
|
209
|
+
"page_size": catalog_query.page_size,
|
210
|
+
"offset": offset,
|
211
|
+
},
|
212
|
+
)
|
213
|
+
data = await cur.fetchall()
|
214
|
+
|
215
|
+
return Resources(
|
216
|
+
facets=facets,
|
217
|
+
results=[
|
218
|
+
ResourceResult(
|
219
|
+
rid=str(r["rid"]).replace("-", ""),
|
220
|
+
field="title",
|
221
|
+
field_type="a",
|
222
|
+
labels=[label for label in r["labels"] if label.startswith("/l/")],
|
223
|
+
score=0,
|
224
|
+
)
|
225
|
+
for r in data
|
226
|
+
],
|
227
|
+
query=catalog_query.query,
|
228
|
+
total=total,
|
229
|
+
page_number=catalog_query.page_number,
|
230
|
+
page_size=catalog_query.page_size,
|
231
|
+
next_page=(offset + len(data) < total),
|
232
|
+
min_score=0,
|
233
|
+
)
|