PyPI - ckanext-search-tweaks - Versions diffs - 0.6.3__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

ckanext-search-tweaks 0.6.3py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ckanext/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
 # this is a namespace package
 try:
     import pkg_resources

ckanext/search_tweaks/advanced_search/assets/advanced-search.css CHANGED Viewed

@@ -24,7 +24,3 @@
 .search-tweaks-advanced-search.enabled.use-solr-query .advanced-search-submit {
   display: none;
 }
-.advanced-toggles label:after {
-  content: none;
-}

ckanext/search_tweaks/advanced_search/plugin.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 from typing import Any
+from ckan import types
 import ckan.plugins as p
 import ckan.plugins.toolkit as tk
 from ckan.exceptions import CkanConfigurationException
@@ -55,6 +56,16 @@ def form_config():
     }
+@tk.side_effect_free
+def advanced_search_config(
+    context: types.Context,
+    data_dict: dict[str, Any],
+) -> dict[str, Any]:
+    """Configuration for advanced search fields."""
+    return tk.h.advanced_search_form_config()
+@tk.blanket.actions({"advanced_search_config": advanced_search_config})
 class AdvancedSearchPlugin(p.SingletonPlugin):
     p.implements(p.IConfigurer, inherit=True)
     p.implements(p.IConfigurable)

ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html CHANGED Viewed

@@ -39,7 +39,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
 <div class="input-group search-input-group">
     <input type="text" class="form-control input-lg"
 	   name="q" value="{{ query }}" autocomplete="off"
-	   placeholder="{{ placeholder }}">
+	   placeholder="{{ placeholder }}"
+	   aria-label="Search data">
     {{ search_button }}
 </div>
 {% endset %}
@@ -50,7 +51,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
     <input type="text" class="form-control input-lg"
 	   value="{{ request.args.ext_solr_q }}" autocomplete="off"
 	   name="ext_solr_q" disabled
-	   placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}" >
+	   placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}"
+	   aria-label="Search using Solr query">
     {{ search_button }}
 </div>
 {% endset %}
@@ -95,7 +97,7 @@ solr_syntax_blank - open SOLR syntax article in the new tab
 					{{ labels.solr_toggle or _('Add query syntax to search') }}
 					</label>
 					{% block solr_syntax_url %}
-						<a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}">
+						<a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}" aria-label="Learn more about SOLR query parameters">
 						<i class="fa fa-info-circle" data-placement="top" data-toggle="tooltip"
 						title="{{ labels.solr_explanation or _('This adds SOLR query language, for more information on how to use click here') }}"></i>
 						</a>

ckanext/search_tweaks/field_relevance/plugin.py CHANGED Viewed

@@ -8,8 +8,6 @@ import ckan.plugins.toolkit as tk
 from ckanext.search_tweaks.interfaces import ISearchTweaks
 from ckanext.search_tweaks.shared import feature_disabled
-from . import views
 CONFIG_BOOST_FN = "ckanext.search_tweaks.field_relevance.boost_function"
 DEFAULT_BOOST_FN = None

ckanext/search_tweaks/field_relevance/views.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any
 from flask import Blueprint
 from flask.views import MethodView
+import ckan.types as types
 import ckan.model as model
 import ckan.plugins.toolkit as tk
@@ -46,7 +47,8 @@ class PromoteView(MethodView):
                 tk.get_validator("convert_int"),
                 tk.get_validator("int_validator"),
                 tk.get_validator("limit_to_configured_maximum")(
-                    CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION,
+                    CONFIG_MAX_PROMOTION,
+                    DEFAULT_MAX_PROMOTION,
                 ),
             ],
         }
@@ -54,14 +56,15 @@ class PromoteView(MethodView):
         data, errors = tk.navl_validate(
             dict(tk.request.form),
             schema,
-            {"model": model, "session": model.Session},
+            types.Context(model=model, session=model.Session),  # type: ignore
         )
         if errors:
             return self.get(id, data, errors)
         try:
             pkg_dict = tk.get_action("package_patch")(
-                {}, {"id": id, field: data[field]},
+                {},
+                {"id": id, field: data[field]},
             )
         except tk.ValidationError as e:
             for k, v in e.error_summary.items():
@@ -86,7 +89,6 @@ class PromoteView(MethodView):
             "min_promotion": tk.asint(
                 tk.config.get(CONFIG_MIN_PROMOTION, DEFAULT_MIN_PROMOTION),
             ),
             "max_promotion": tk.asint(
                 tk.config.get(CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION),
             ),
@@ -99,4 +101,7 @@ class PromoteView(MethodView):
 if tk.asbool(
     tk.config.get(CONFIG_ENABLE_PROMOTION_ROUTE, DEFAULT_ENABLE_PROMOTION_ROUTE),
 ):
-    field_relevance.add_url_rule("/dataset/promote/<id>", view_func=PromoteView.as_view("promote"))
+    field_relevance.add_url_rule(
+        "/dataset/promote/<id>",
+        view_func=PromoteView.as_view("promote"),
+    )

ckanext/search_tweaks/interfaces.py CHANGED Viewed

@@ -24,9 +24,7 @@ class ISearchTweaks(Interface):
         return None
 class IQueryPopularity(Interface):
     def skip_query_popularity(self, params: dict[str, Any]) -> bool:
-        """Do not index search query.
-        """
+        """Do not index search query."""
         return False

ckanext/search_tweaks/plugin.py CHANGED Viewed

@@ -33,7 +33,6 @@ class SearchTweaksPlugin(plugins.SingletonPlugin):
         if config.prefer_boost() and search_params["defType"] == "edismax":
             _set_boost(search_params)
         else:
             _set_bf(search_params)

ckanext/search_tweaks/query_popularity/logic/schema.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from ckan.logic.schema import validator_args
 @validator_args
 def query_popularity_import(not_empty, boolean_validator, convert_to_json_if_string):
     return {

ckanext/search_tweaks/query_popularity/score.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
 import logging
-from typing import Any, Iterable
+from typing import Any
+from collections.abc import Iterable
 from ckanext.toolbelt.utils.tracking import DateTracker

ckanext/search_tweaks/query_relevance/__init__.py CHANGED Viewed

@@ -13,38 +13,63 @@ from .score import QueryScore, normalize_query
 __all__ = ["QueryScore", "normalize_query", "update_score_by_url"]
-def update_score_by_url(pkg: model.Package, ref: str | None = None) -> bool:
-    """Make given package more relevant for the current search query."""
-    if tk.request:
-        ref = ref or tk.request.referrer
+def update_score_by_url(pkg: model.Package, referrer: str | None = None) -> bool:
+    """Boost the relevance of the given package for the current search query
-    if not ref:
+    Args:
+        pkg: the package to boost
+        referrer: the URL of the current request
+    Returns:
+        True if the package was boosted, False otherwise
+    """
+    referrer = referrer or (tk.request.referrer if tk.request else None)
+    if not referrer:
         return False
-    url = urlparse(ref)
-    if not _path_has_score_for(url.path, pkg):
+    url = urlparse(referrer)
+    if not _is_scoring_enabled_for_path(url.path, pkg):
         return False
     query = parse_qs(url.query.lstrip("?"))
     if "q" not in query:
         return False
-    q = query["q"][0]
-    score = QueryScore(pkg.id, q)
-    score.increase(1)
+    QueryScore(pkg.id, query["q"][0]).increase(1)
     return True
-def _path_has_score_for(path: str, pkg: model.Package) -> bool:
+def _is_scoring_enabled_for_path(path: str, package: model.Package) -> bool:
+    """
+    Determine if a given URL path should have scoring enabled.
+    Checks if the provided path matches any of the following URL patterns that
+    support scoring functionality:
+    Args:
+        path: The URL path to check
+        package: The package object containing type and owner_org info
+    Returns:
+        True if the path should have scoring enabled, False otherwise
+    """
     path = path.rstrip("/")
     if path == tk.h.url_for("dataset.search").rstrip("/"):
         return True
     with contextlib.suppress(BuildError):
-        if path == tk.h.url_for(pkg.type + ".search").rstrip("/"):
+        if path == tk.h.url_for(package.type + ".search").rstrip("/"):
             return True
-    org = model.Group.get(pkg.owner_org)
+    org = model.Group.get(package.owner_org)
     if not org:
         return False

ckanext/search_tweaks/query_relevance/boost.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+from ckanext.search_tweaks.config import prefer_boost
+from . import QueryScore
+from .config import get_min_boost, get_max_boost, get_max_boost_count
+def build_boost_query_function(search_query: str) -> str | None:
+    """Build boost query function for given search query.
+    Args:
+        search_query: normalized query
+    Returns:
+        Boost function
+    """
+    boosts, max_score = get_boost_values(search_query)
+    min_boost = get_min_boost()
+    max_boost = get_max_boost()
+    if prefer_boost():
+        boost_expr = "1"
+        for pkg_id, raw_score in sorted(boosts.items(), reverse=True):
+            scaled = scale_score(raw_score, max_score, min_boost, max_boost)
+            boost_expr = f'if(eq(id,"{pkg_id}"),{scaled},{boost_expr})'
+        return f"sum(0, {boost_expr})"
+    else:
+        boost_parts = []
+        for pkg_id, raw_score in boosts.items():
+            score = scale_score(raw_score, max_score, min_boost, max_boost)
+            boost_parts.append(f'if(eq(id,"{pkg_id}"),{score},0)')
+        return f"sum(1,{','.join(boost_parts)})"
+def get_boost_values(search_query: str) -> tuple[dict[str, float], float]:
+    boosts = {}
+    max_score = 0
+    for entry in QueryScore.get_for_query(search_query):
+        package_id, score = entry
+        if score > max_score:
+            max_score = score
+        boosts[package_id.decode("utf-8")] = score
+    return boosts, max_score
+def scale_score(
+    value: float,
+    max_value: float,
+    min_boost: float,
+    max_boost: float,
+) -> float:
+    """
+    Linearly scales a value to the range [min_boost, max_boost].
+    This prevents datasets with high scores
+    from overpowering search relevance, ensuring more balanced results.
+    """
+    if max_value == 0:
+        return min_boost
+    value = max(0, min(value, max_value))
+    scaled = min_boost + (value / max_value) * (max_boost - min_boost)
+    return round(scaled, 4)

ckanext/search_tweaks/query_relevance/cli.py CHANGED Viewed

@@ -3,16 +3,15 @@ from __future__ import annotations
 import csv
 import datetime
 import logging
+from typing import TextIO
 import click
 import freezegun
 import ckan.model as model
-from ckan.lib.redis import connect_to_redis
-from ckan.lib.search import rebuild
 from . import QueryScore
-_search_csv_headers = ["package_id", "search_query", "count_of_hits"]
 log = logging.getLogger(__name__)
@@ -25,84 +24,47 @@ def query():
 @query.command("import")
 @click.argument("source", type=click.File())
 @click.option("--date", type=datetime.date.fromisoformat)
-def import_source(source, date):
+def import_source(source: TextIO, date) -> None:
     """Import search stats from source"""
     if not date:
         date = datetime.date.today()
     with freezegun.freeze_time(date):
         reader = csv.DictReader(source)
         for row in reader:
             pkg = model.Package.get(row["package_id"])
             if not pkg:
                 click.secho(f"Package {row['package_id']} does not exists", fg="red")
                 continue
             score = QueryScore(pkg.id, row["search_query"])
             score.reset()
             score.increase(int(row["count_of_hits"]))
     click.secho("Done", fg="green")
 @query.command()
 @click.argument("output", type=click.File("w"), required=False)
-def export(output):
+def export(output: TextIO | None) -> None:
     """Export search stats into specified file."""
     rows = QueryScore.get_all()
     if output:
         writer = csv.writer(output)
-        writer.writerow(_search_csv_headers)
+        writer.writerow(["package_id", "search_query", "count_of_hits"])
         writer.writerows(rows)
     else:
         for row in rows:
-            click.echo("Id: %s, query: %s, count: %d" % row)
-    click.secho("Done", fg="green")
+            click.echo("ID: {}, query: {}, count: {}".format(*row))
-@query.command()
-def align():
-    """Remove old records."""
-    rows = QueryScore.get_all()
-    for id_, query, _ in rows:
-        score = QueryScore(id_, query)
-        score.align()
+    click.secho("Done", fg="green")
 @query.command()
-@click.option("--days", "-d", type=int, default=1)
-@click.argument("file")
-@click.pass_context
-def safe_export(ctx, days, file):
-    """Export stats if redis haven't been reloaded recently.
-    If redis runs less than N days, it was reloaded recently and contains no
-    stats. We have to import old snapshot into it.
-    If redis is up for N days and more, it contains relevant stats. We can
-    safely export them and overwrite old snapshot.
-    """
-    conn = connect_to_redis()
-    uptime = conn.info()["uptime_in_days"]
-    if uptime >= days:
-        click.secho(f"Redis runs for {uptime} days. Creating snapshot..", fg="green")
-        ctx.invoke(export, output=click.File("w")(file))
-    else:
-        click.secho(
-            f"Redis runs for {uptime} days. Restore stats from snapshot..",
-            fg="red",
-        )
-        ctx.invoke(import_source, source=click.File()(file))
+def reset() -> None:
+    """Reset query relevance scores"""
+    QueryScore.reset_all()
-@query.command()
-def index():
-    """Re-index datasets that have query relevance scores.
-    """
-    storage = QueryScore.default_storage_class()
-    ids = {id for id, _, _ in storage.scan()}
-    with click.progressbar(ids) as bar:
-        for id in bar:
-            try:
-                rebuild(id)
-            except Exception:
-                log.exception("Cannot index %s", id)
+    click.secho("Done", fg="green")

ckanext/search_tweaks/query_relevance/config.py ADDED Viewed

@@ -0,0 +1,29 @@
+import ckan.plugins.toolkit as tk
+CONF_MIN_BOOST = "ckanext.search_tweaks.query_relevance.min_boost"
+CONF_MAX_BOOST = "ckanext.search_tweaks.query_relevance.max_boost"
+CONF_MAX_BOOST_COUNT = "ckanext.search_tweaks.query_relevance.max_boost_count"
+def get_min_boost() -> float:
+    return as_float(tk.config[CONF_MIN_BOOST])
+def get_max_boost() -> float:
+    return as_float(tk.config[CONF_MAX_BOOST])
+def get_max_boost_count() -> int:
+    return tk.config[CONF_MAX_BOOST_COUNT]
+def as_float(number: str) -> float:
+    """Convert a string into a float.
+    Example:
+        assert as_float("1.5") == 1.5
+    """
+    try:
+        return float(number)
+    except (TypeError, ValueError):
+        raise ValueError("Bad float value: {}".format(number))

ckanext/search_tweaks/query_relevance/config_declaration.yaml ADDED Viewed

@@ -0,0 +1,16 @@
+version: 1
+groups:
+  - annotation: "ckanext-search-tweaks:query_relevance"
+    options:
+      - key: ckanext.search_tweaks.query_relevance.min_boost
+        default: 1
+        description: Minimum boost to apply to a query. Use float values
+      - key: ckanext.search_tweaks.query_relevance.max_boost
+        default: 1.5
+        description: Maximum boost to apply to a query. Use float values
+      - key: ckanext.search_tweaks.query_relevance.max_boost_count
+        type: int
+        default: 60
+        description: Maximum number of boosts to apply to a query

ckanext/search_tweaks/query_relevance/plugin.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
-from string import Template
 from typing import Any
 import ckan.plugins as plugins
@@ -10,15 +9,11 @@ from ckanext.search_tweaks.cli import attach_relevance_command
 from ckanext.search_tweaks.interfaces import ISearchTweaks
 from ckanext.search_tweaks.shared import feature_disabled
-from . import QueryScore, cli, normalize_query, update_score_by_url
-CONFIG_BOOST_STRING = "ckanext.search_tweaks.query_relevance.boost_function"
-CONFIG_RELEVANCE_PREFIX = "ckanext.search_tweaks.query_relevance.field_prefix"
-DEFAULT_BOOST_STRING = "scale(def($field,0),1,1.2)"
-DEFAULT_RELEVANCE_PREFIX = "query_relevance_"
+from . import cli, normalize_query, update_score_by_url
+from .boost import build_boost_query_function
+@tk.blanket.config_declarations
 class QueryRelevancePlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IConfigurable)
     plugins.implements(plugins.IPackageController, inherit=True)
@@ -31,15 +26,6 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
     # IPackageController
-    def before_dataset_index(self, pkg_dict):
-        prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
-        for _, query, score in QueryScore.get_for(pkg_dict["id"]):
-            query = query.replace(" ", "_")
-            pkg_dict[prefix + query] = score
-        return pkg_dict
     def read(self, entity):
         # update search relevance only for WEB-requests. Any kind of
         # CLI/search-index manipulations has no effect on it
@@ -49,17 +35,12 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
     # ISearchTweaks
     def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
-        if feature_disabled("query_boost", search_params) or not search_params.get("q"):
-            return None
+        q = search_params.get("q")
-        normalized = normalize_query(search_params["q"]).replace(" ", "_")
-        if not normalized:
+        if feature_disabled("query_boost", search_params) or not q:
             return None
-        prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
-        field = prefix + normalized
-        boost_string = Template(
-            tk.config.get(CONFIG_BOOST_STRING, DEFAULT_BOOST_STRING),
-        )
+        if normalized := normalize_query(q).replace(" ", "_"):
+            return build_boost_query_function(normalized)
-        return boost_string.safe_substitute({"field": field})
+        return None

ckanext/search_tweaks/query_relevance/score.py CHANGED Viewed

@@ -1,20 +1,5 @@
-from typing import Optional, Type
-import ckan.plugins.toolkit as tk
-from .storage import (
-    PermanentRedisScoreStorage,
-    DailyRedisScoreStorage,
-    ScoreStorage,
-)
-_backends = {
-    "redis-permanent": PermanentRedisScoreStorage,
-    "redis-daily": DailyRedisScoreStorage,
-}
-CONFIG_BACKEND = "ckanext.search_tweaks.query_relevance.backend"
-DEFAULT_BACKEND = "redis-daily"
-DEFAULT_SCORE_STORAGE_CLASS = DailyRedisScoreStorage
+from .storage import QueryHitTracker
+from .config import get_max_boost_count
 def normalize_query(query: str) -> str:
@@ -26,50 +11,32 @@ def normalize_query(query: str) -> str:
 class QueryScore:
-    storage_class: Type[ScoreStorage]
-    def __init__(
-        self,
-        id_: str,
-        query: str,
-        *,
-        normalize: bool = True,
-        storage_class: Optional[Type[ScoreStorage]] = None,
-    ):
+    def __init__(self, entity_id: str, query: str, normalize: bool = True):
         if normalize:
             query = normalize_query(query)
-        if storage_class:
-            self.storage_class = storage_class
-        else:
-            self.storage_class = self.default_storage_class()
-        self.storage = self.storage_class(id_, query)
+        self.entity_id = entity_id
+        self.query = query
+        self.storage = QueryHitTracker(self.entity_id, self.query)
     def __int__(self):
         return self.storage.get()
-    @staticmethod
-    def default_storage_class() -> Type[ScoreStorage]:
-        return _backends[tk.config.get(CONFIG_BACKEND, DEFAULT_BACKEND)]
-    @property
-    def query(self):
-        return self.storage.query
-    def increase(self, n: int) -> None:
-        self.storage.inc(n)
-    def align(self):
-        self.storage.align()
+    def increase(self, amount: int) -> None:
+        self.storage.increase(amount)
     def reset(self):
-        self.storage.reset()
+        self.storage.reset(self.query)
+    @classmethod
+    def get_for_query(cls, query: str, limit: int | None = None) -> list[tuple[bytes, float]]:
+        return QueryHitTracker.top(query, limit or get_max_boost_count())
     @classmethod
     def get_all(cls):
-        storage = cls.default_storage_class()
-        return storage.scan()
+        return QueryHitTracker.get_all()
     @classmethod
-    def get_for(cls, id_: str):
-        return cls.default_storage_class().scan(id_)
+    def reset_all(cls):
+        return QueryHitTracker.reset_all()

ckanext-search-tweaks 0.6.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

ckanext-search-tweaks 0.6.3py3-none-any.whl → 1.0.0py3-none-any.whl