PyPI - nucliadb - Versions diffs - 6.2.1.post3251__py3-none-any.whl → 6.2.1.post3254__py3-none-any.whl - Mend

nucliadb 6.2.1.post3251py3-none-any.whl → 6.2.1.post3254py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nucliadb/search/search/query_parser/models.py CHANGED Viewed

@@ -62,9 +62,6 @@ class Reranker(BaseModel): ...
 class NoopReranker(Reranker): ...
-class MultiMatchBoosterReranker(Reranker): ...
 class PredictReranker(Reranker):
     window: int = Field(le=200)

nucliadb/search/search/query_parser/parser.py CHANGED Viewed

@@ -31,7 +31,6 @@ from nucliadb.search.search.query_parser.models import (
     CatalogFilters,
     CatalogQuery,
     DateTimeFilter,
-    MultiMatchBoosterReranker,
     NoopReranker,
     PredictReranker,
     RankFusion,
@@ -123,9 +122,6 @@ class _FindParser:
             if self.item.reranker == search_models.RerankerName.NOOP:
                 reranking = NoopReranker()
-            elif self.item.reranker == search_models.RerankerName.MULTI_MATCH_BOOSTER:
-                reranking = MultiMatchBoosterReranker()
             elif self.item.reranker == search_models.RerankerName.PREDICT_RERANKER:
                 # for predict rearnker, by default, we want a x2 factor with a
                 # top of 200 results

nucliadb/search/search/rerankers.py CHANGED Viewed

@@ -169,58 +169,17 @@ class PredictReranker(Reranker):
         return best
-class MultiMatchBoosterReranker(Reranker):
-    """This reranker gives more value to items that come from different indices"""
-    @property
-    def window(self) -> Optional[int]:
-        return None
-    @reranker_observer.wrap({"type": "multi_match_booster"})
-    async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
-        """Given a list of rerankable items, boost matches that appear multiple
-        times. The returned list can be smaller than the initial, as repeated
-        matches are deduplicated.
-        """
-        reranked_by_id = {}
-        for item in items:
-            if item.id not in reranked_by_id:
-                reranked_by_id[item.id] = RankedItem(
-                    id=item.id,
-                    score=item.score,
-                    score_type=item.score_type,
-                )
-            else:
-                # it's a mutiple match, boost the score
-                if reranked_by_id[item.id].score < item.score:
-                    # previous implementation noted that we are using vector
-                    # score x2 when we find a multiple match. However, this may
-                    # not be true, as the same paragraph could come in any
-                    # position in the rank fusioned result list
-                    reranked_by_id[item.id].score = item.score * 2
-                reranked_by_id[item.id].score_type = SCORE_TYPE.BOTH
-        reranked = list(reranked_by_id.values())
-        sort_by_score(reranked)
-        return reranked
 def get_reranker(reranker: parser_models.Reranker) -> Reranker:
     algorithm: Reranker
     if isinstance(reranker, parser_models.NoopReranker):
         algorithm = NoopReranker()
-    elif isinstance(reranker, parser_models.MultiMatchBoosterReranker):
-        algorithm = MultiMatchBoosterReranker()
     elif isinstance(reranker, parser_models.PredictReranker):
         algorithm = PredictReranker(reranker.window)
     else:
-        logger.warning(f"Unknown reranker requested: {reranker}. Using default instead")
-        algorithm = MultiMatchBoosterReranker()
+        raise ValueError(f"Unknown reranker requested: {reranker}")
     return algorithm

nucliadb/standalone/api_router.py CHANGED Viewed

@@ -17,14 +17,13 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
-import datetime
 import logging
 import time
 import orjson
 import pydantic
 from fastapi import Request
-from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.responses import JSONResponse
 from fastapi.routing import APIRouter
 from fastapi_versioning import version
 from jwcrypto import jwe, jwk  # type: ignore
@@ -33,7 +32,7 @@ from nucliadb.common import datamanagers
 from nucliadb.common.cluster import manager
 from nucliadb.common.http_clients import processing
 from nucliadb.common.http_clients.auth import NucliaAuthHTTPClient
-from nucliadb.standalone import introspect, versions
+from nucliadb.standalone import versions
 from nucliadb_models.resource import NucliaDBRoles
 from nucliadb_utils.authentication import requires
 from nucliadb_utils.settings import nuclia_settings
@@ -146,17 +145,6 @@ async def versions_endpoint(request: Request) -> JSONResponse:
     )
-@standalone_api_router.get("/introspect")
-def introspect_endpoint(request: Request) -> StreamingResponse:
-    introspect_id = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
-    return StreamingResponse(
-        content=introspect.stream_tar(request.app),
-        status_code=200,
-        headers={"Content-Disposition": f"attachment; filename=introspect_{introspect_id}.tar.gz"},
-        media_type="application/octet-stream",
-    )
 @standalone_api_router.get("/pull/position")
 async def pull_status(request: Request) -> JSONResponse:
     async with datamanagers.with_ro_transaction() as txn:

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nucliadb
-Version: 6.2.1.post3251
+Version: 6.2.1.post3254
 Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
 Author: NucliaDB Community
 Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9, <4
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3251
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3251
-Requires-Dist: nucliadb-protos>=6.2.1.post3251
-Requires-Dist: nucliadb-models>=6.2.1.post3251
+Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3254
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3254
+Requires-Dist: nucliadb-protos>=6.2.1.post3254
+Requires-Dist: nucliadb-models>=6.2.1.post3254
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/RECORD RENAMED Viewed

@@ -221,7 +221,7 @@ nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3Le
 nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
 nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
 nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
-nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
+nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
 nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
 nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
 nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
@@ -234,14 +234,13 @@ nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeD
 nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
 nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
-nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
-nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
+nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
+nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
 nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
-nucliadb/standalone/api_router.py,sha256=zR03TQ-Pd2kXx1jeV83Puw19112Z8Jhln7p1cAn69kg,6699
+nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
 nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
 nucliadb/standalone/auth.py,sha256=UwMv-TywhMZabvVg3anQLeCRdoHDnWf2o3luvnoNBjs,7670
 nucliadb/standalone/config.py,sha256=g9JBJQfyw87TYZ3yuy0O9WFVLd_MmCJxSRSI0E8FwZE,5396
-nucliadb/standalone/introspect.py,sha256=xHdHV-CB0Vy5cp1MQAodu0Pc8izpzl_lX2ARJJwL3RI,6083
 nucliadb/standalone/lifecycle.py,sha256=rdKLG-oOLN4rfd2VGG_2vlDUWYneWSCiuEhoeiFKfnM,2343
 nucliadb/standalone/migrations.py,sha256=s9-3RSZ-O3bjEw2TnBe_YWLUEKbub0bARUxi1gA3yuY,1950
 nucliadb/standalone/purge.py,sha256=ZY-cebb214FFiPG7OFmXZGg0G3CK5Amw0FLLm9WJhKE,1343
@@ -332,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.2.1.post3251.dist-info/METADATA,sha256=ykfBfM1MB4gZsChTKAkP6hzOw0DAlxBmAFMqMwQfh3Y,4603
-nucliadb-6.2.1.post3251.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-nucliadb-6.2.1.post3251.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.2.1.post3251.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.2.1.post3251.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-nucliadb-6.2.1.post3251.dist-info/RECORD,,
+nucliadb-6.2.1.post3254.dist-info/METADATA,sha256=KxrWjVFc1AQo6-OO61isHxc49B03BWRNesKyXN_TugY,4603
+nucliadb-6.2.1.post3254.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+nucliadb-6.2.1.post3254.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.2.1.post3254.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.2.1.post3254.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+nucliadb-6.2.1.post3254.dist-info/RECORD,,

nucliadb/standalone/introspect.py DELETED Viewed

@@ -1,183 +0,0 @@
-# Copyright (C) 2021 Bosutech XXI S.L.
-#
-# nucliadb is offered under the AGPL v3.0 and as commercial software.
-# For commercial licensing, contact us at info@nuclia.com.
-#
-# AGPL:
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-import asyncio
-import os
-import platform
-import sys
-import tarfile
-import tempfile
-from collections.abc import AsyncGenerator
-from typing import Optional
-import pkg_resources
-import psutil
-from fastapi import FastAPI
-from pydantic import BaseModel
-from nucliadb.standalone.settings import Settings
-from nucliadb_telemetry.settings import LogOutputType, LogSettings
-MB = 1024 * 1024
-CHUNK_SIZE = 2 * MB
-SYSTEM_INFO_TEMPLATE = """System info
-===========
-Python
-------
-    - Version: {python_version}
-Operative system
-----------------
-    - Name: {os_name}
-    - Release: {os_release}
-    - Version: {os_version}
-    - Machine: {os_machine}
-    - File System Encoding: {os_file_system_encoding}
-CPU information
----------------
-    - Number of CPUs: {cpu_count}
-Memory information
-------------------
-    - Total: {memory_total:.2f} MB
-    - Available: {memory_available:.2f} MB
-    - Used: {memory_used:.2f} MB
-    - Used %: {memory_used_percent:.2f}%
-"""
-class NodeInfo(BaseModel):
-    id: str
-    address: str
-    shard_count: int
-    primary_id: Optional[str] = None
-class ClusterInfo(BaseModel):
-    nodes: list[NodeInfo]
-async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
-    with tempfile.TemporaryDirectory() as temp_dir:
-        tar_file = os.path.join(temp_dir, "introspect.tar.gz")
-        with tarfile.open(tar_file, mode="w:gz") as tar:
-            await add_system_info(temp_dir, tar)
-            await add_dependencies(temp_dir, tar)
-            settings: Settings = app.settings.copy()  # type: ignore
-            await add_settings(temp_dir, tar, settings)
-            if settings.log_output_type == LogOutputType.FILE:
-                await add_logs(tar)
-        async for chunk in stream_out_tar(tar_file):
-            yield chunk
-async def stream_out_tar(tar_file: str) -> AsyncGenerator[bytes, None]:
-    loop = asyncio.get_event_loop()
-    with open(tar_file, "rb") as f:
-        chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
-        while chunk:
-            yield chunk
-            chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
-async def add_system_info(temp_dir: str, tar: tarfile.TarFile):
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, _add_system_info_to_tar, temp_dir, tar)
-def _add_system_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
-    system_info_file = os.path.join(temp_dir, "system_info.txt")
-    with open(system_info_file, "w") as f:
-        memory = psutil.virtual_memory()
-        f.write(
-            SYSTEM_INFO_TEMPLATE.format(
-                python_version=sys.version,
-                os_name=os.uname().sysname,
-                os_release=platform.release(),
-                os_version=platform.version(),
-                os_machine=platform.machine(),
-                os_file_system_encoding=os.sys.getfilesystemencoding(),  # type: ignore
-                cpu_count=psutil.cpu_count(),
-                memory_total=memory.total / MB,
-                memory_available=memory.available / MB,
-                memory_used=memory.used / MB,
-                memory_used_percent=memory.percent,
-            )
-        )
-    tar.add(system_info_file, arcname="system_info.txt")
-async def add_dependencies(temp_dir: str, tar: tarfile.TarFile):
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, _add_dependencies_to_tar, temp_dir, tar)
-def _add_dependencies_to_tar(temp_dir: str, tar: tarfile.TarFile):
-    dependendies_file = os.path.join(temp_dir, "dependencies.txt")
-    with open(dependendies_file, "w") as f:
-        installed_packages = [pkg for pkg in pkg_resources.working_set]
-        lines = []
-        for pkg in sorted(installed_packages, key=lambda p: p.key):
-            lines.append(f"{pkg.key}=={pkg.version}\n")
-        f.writelines(lines)
-    tar.add(dependendies_file, arcname="dependencies.txt")
-async def add_settings(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, _add_settings_to_tar, temp_dir, tar, settings)
-def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
-    remove_sensitive_settings(settings)
-    settings_file = os.path.join(temp_dir, "settings.json")
-    with open(settings_file, "w") as f:
-        f.write(settings.model_dump_json(indent=4))
-    tar.add(settings_file, arcname="settings.json")
-def remove_sensitive_settings(settings: Settings):
-    for sensitive_setting in [
-        "nua_api_key",
-        "jwk_key",
-        "gcs_base64_creds",
-        "s3_client_secret",
-        "driver_pg_url",
-    ]:
-        if hasattr(settings, sensitive_setting):
-            setattr(settings, sensitive_setting, "********")
-async def add_logs(tar):
-    loop = asyncio.get_event_loop()
-    await loop.run_in_executor(None, _add_logs_to_tar, tar)
-def _add_logs_to_tar(tar: tarfile.TarFile):
-    log_settings = LogSettings()
-    access_log = os.path.realpath(log_settings.access_log)
-    tar.add(access_log, arcname="logs/access.log")
-    error_log = os.path.realpath(log_settings.error_log)
-    tar.add(error_log, arcname="logs/error.log")
-    info_log = os.path.realpath(log_settings.info_log)
-    tar.add(info_log, arcname="logs/info.log")

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/top_level.txt RENAMED Viewed

File without changes

{nucliadb-6.2.1.post3251.dist-info → nucliadb-6.2.1.post3254.dist-info}/zip-safe RENAMED Viewed

File without changes

nucliadb 6.2.1.post3251__py3-none-any.whl → 6.2.1.post3254__py3-none-any.whl

nucliadb 6.2.1.post3251py3-none-any.whl → 6.2.1.post3254py3-none-any.whl