ckanext-search-tweaks 0.6.3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ckanext/__init__.py +0 -1
  2. ckanext/search_tweaks/advanced_search/assets/advanced-search.css +0 -4
  3. ckanext/search_tweaks/advanced_search/plugin.py +11 -0
  4. ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html +5 -3
  5. ckanext/search_tweaks/field_relevance/plugin.py +0 -2
  6. ckanext/search_tweaks/field_relevance/views.py +10 -5
  7. ckanext/search_tweaks/interfaces.py +1 -3
  8. ckanext/search_tweaks/plugin.py +0 -1
  9. ckanext/search_tweaks/query_popularity/logic/schema.py +1 -0
  10. ckanext/search_tweaks/query_popularity/score.py +2 -1
  11. ckanext/search_tweaks/query_relevance/__init__.py +38 -13
  12. ckanext/search_tweaks/query_relevance/boost.py +75 -0
  13. ckanext/search_tweaks/query_relevance/cli.py +16 -54
  14. ckanext/search_tweaks/query_relevance/config.py +29 -0
  15. ckanext/search_tweaks/query_relevance/config_declaration.yaml +16 -0
  16. ckanext/search_tweaks/query_relevance/plugin.py +8 -27
  17. ckanext/search_tweaks/query_relevance/score.py +17 -50
  18. ckanext/search_tweaks/query_relevance/storage.py +79 -137
  19. ckanext/search_tweaks/spellcheck/helpers.py +6 -2
  20. ckanext/search_tweaks/tests/conftest.py +13 -0
  21. ckanext/search_tweaks/tests/query_relevance/test_plugin.py +5 -5
  22. ckanext/search_tweaks/tests/query_relevance/test_search.py +84 -0
  23. ckanext/search_tweaks/tests/query_relevance/test_storage.py +23 -99
  24. ckanext/search_tweaks/tests/spellcheck/test_plugin.py +4 -2
  25. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/METADATA +121 -123
  26. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/RECORD +30 -26
  27. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/WHEEL +1 -1
  28. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/entry_points.txt +0 -0
  29. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info/licenses}/LICENSE +0 -0
  30. {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/top_level.txt +0 -0
ckanext/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  # this is a namespace package
3
2
  try:
4
3
  import pkg_resources
@@ -24,7 +24,3 @@
24
24
  .search-tweaks-advanced-search.enabled.use-solr-query .advanced-search-submit {
25
25
  display: none;
26
26
  }
27
-
28
- .advanced-toggles label:after {
29
- content: none;
30
- }
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  from typing import Any
5
5
 
6
+ from ckan import types
6
7
  import ckan.plugins as p
7
8
  import ckan.plugins.toolkit as tk
8
9
  from ckan.exceptions import CkanConfigurationException
@@ -55,6 +56,16 @@ def form_config():
55
56
  }
56
57
 
57
58
 
59
+ @tk.side_effect_free
60
+ def advanced_search_config(
61
+ context: types.Context,
62
+ data_dict: dict[str, Any],
63
+ ) -> dict[str, Any]:
64
+ """Configuration for advanced search fields."""
65
+ return tk.h.advanced_search_form_config()
66
+
67
+
68
+ @tk.blanket.actions({"advanced_search_config": advanced_search_config})
58
69
  class AdvancedSearchPlugin(p.SingletonPlugin):
59
70
  p.implements(p.IConfigurer, inherit=True)
60
71
  p.implements(p.IConfigurable)
@@ -39,7 +39,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
39
39
  <div class="input-group search-input-group">
40
40
  <input type="text" class="form-control input-lg"
41
41
  name="q" value="{{ query }}" autocomplete="off"
42
- placeholder="{{ placeholder }}">
42
+ placeholder="{{ placeholder }}"
43
+ aria-label="Search data">
43
44
  {{ search_button }}
44
45
  </div>
45
46
  {% endset %}
@@ -50,7 +51,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
50
51
  <input type="text" class="form-control input-lg"
51
52
  value="{{ request.args.ext_solr_q }}" autocomplete="off"
52
53
  name="ext_solr_q" disabled
53
- placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}" >
54
+ placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}"
55
+ aria-label="Search using Solr query">
54
56
  {{ search_button }}
55
57
  </div>
56
58
  {% endset %}
@@ -95,7 +97,7 @@ solr_syntax_blank - open SOLR syntax article in the new tab
95
97
  {{ labels.solr_toggle or _('Add query syntax to search') }}
96
98
  </label>
97
99
  {% block solr_syntax_url %}
98
- <a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}">
100
+ <a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}" aria-label="Learn more about SOLR query parameters">
99
101
  <i class="fa fa-info-circle" data-placement="top" data-toggle="tooltip"
100
102
  title="{{ labels.solr_explanation or _('This adds SOLR query language, for more information on how to use click here') }}"></i>
101
103
  </a>
@@ -8,8 +8,6 @@ import ckan.plugins.toolkit as tk
8
8
  from ckanext.search_tweaks.interfaces import ISearchTweaks
9
9
  from ckanext.search_tweaks.shared import feature_disabled
10
10
 
11
- from . import views
12
-
13
11
  CONFIG_BOOST_FN = "ckanext.search_tweaks.field_relevance.boost_function"
14
12
 
15
13
  DEFAULT_BOOST_FN = None
@@ -5,6 +5,7 @@ from typing import Any
5
5
  from flask import Blueprint
6
6
  from flask.views import MethodView
7
7
 
8
+ import ckan.types as types
8
9
  import ckan.model as model
9
10
  import ckan.plugins.toolkit as tk
10
11
 
@@ -46,7 +47,8 @@ class PromoteView(MethodView):
46
47
  tk.get_validator("convert_int"),
47
48
  tk.get_validator("int_validator"),
48
49
  tk.get_validator("limit_to_configured_maximum")(
49
- CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION,
50
+ CONFIG_MAX_PROMOTION,
51
+ DEFAULT_MAX_PROMOTION,
50
52
  ),
51
53
  ],
52
54
  }
@@ -54,14 +56,15 @@ class PromoteView(MethodView):
54
56
  data, errors = tk.navl_validate(
55
57
  dict(tk.request.form),
56
58
  schema,
57
- {"model": model, "session": model.Session},
59
+ types.Context(model=model, session=model.Session), # type: ignore
58
60
  )
59
61
 
60
62
  if errors:
61
63
  return self.get(id, data, errors)
62
64
  try:
63
65
  pkg_dict = tk.get_action("package_patch")(
64
- {}, {"id": id, field: data[field]},
66
+ {},
67
+ {"id": id, field: data[field]},
65
68
  )
66
69
  except tk.ValidationError as e:
67
70
  for k, v in e.error_summary.items():
@@ -86,7 +89,6 @@ class PromoteView(MethodView):
86
89
  "min_promotion": tk.asint(
87
90
  tk.config.get(CONFIG_MIN_PROMOTION, DEFAULT_MIN_PROMOTION),
88
91
  ),
89
-
90
92
  "max_promotion": tk.asint(
91
93
  tk.config.get(CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION),
92
94
  ),
@@ -99,4 +101,7 @@ class PromoteView(MethodView):
99
101
  if tk.asbool(
100
102
  tk.config.get(CONFIG_ENABLE_PROMOTION_ROUTE, DEFAULT_ENABLE_PROMOTION_ROUTE),
101
103
  ):
102
- field_relevance.add_url_rule("/dataset/promote/<id>", view_func=PromoteView.as_view("promote"))
104
+ field_relevance.add_url_rule(
105
+ "/dataset/promote/<id>",
106
+ view_func=PromoteView.as_view("promote"),
107
+ )
@@ -24,9 +24,7 @@ class ISearchTweaks(Interface):
24
24
  return None
25
25
 
26
26
 
27
-
28
27
  class IQueryPopularity(Interface):
29
28
  def skip_query_popularity(self, params: dict[str, Any]) -> bool:
30
- """Do not index search query.
31
- """
29
+ """Do not index search query."""
32
30
  return False
@@ -33,7 +33,6 @@ class SearchTweaksPlugin(plugins.SingletonPlugin):
33
33
 
34
34
  if config.prefer_boost() and search_params["defType"] == "edismax":
35
35
  _set_boost(search_params)
36
-
37
36
  else:
38
37
  _set_bf(search_params)
39
38
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from ckan.logic.schema import validator_args
4
4
 
5
+
5
6
  @validator_args
6
7
  def query_popularity_import(not_empty, boolean_validator, convert_to_json_if_string):
7
8
  return {
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Iterable
4
+ from typing import Any
5
+ from collections.abc import Iterable
5
6
 
6
7
  from ckanext.toolbelt.utils.tracking import DateTracker
7
8
 
@@ -13,38 +13,63 @@ from .score import QueryScore, normalize_query
13
13
  __all__ = ["QueryScore", "normalize_query", "update_score_by_url"]
14
14
 
15
15
 
16
- def update_score_by_url(pkg: model.Package, ref: str | None = None) -> bool:
17
- """Make given package more relevant for the current search query."""
18
- if tk.request:
19
- ref = ref or tk.request.referrer
16
+ def update_score_by_url(pkg: model.Package, referrer: str | None = None) -> bool:
17
+ """Boost the relevance of the given package for the current search query
20
18
 
21
- if not ref:
19
+ Args:
20
+ pkg: the package to boost
21
+ referrer: the URL of the current request
22
+
23
+ Returns:
24
+ True if the package was boosted, False otherwise
25
+ """
26
+
27
+ referrer = referrer or (tk.request.referrer if tk.request else None)
28
+
29
+ if not referrer:
22
30
  return False
23
31
 
24
- url = urlparse(ref)
25
- if not _path_has_score_for(url.path, pkg):
32
+ url = urlparse(referrer)
33
+
34
+ if not _is_scoring_enabled_for_path(url.path, pkg):
26
35
  return False
27
36
 
28
37
  query = parse_qs(url.query.lstrip("?"))
38
+
29
39
  if "q" not in query:
30
40
  return False
31
- q = query["q"][0]
32
41
 
33
- score = QueryScore(pkg.id, q)
34
- score.increase(1)
42
+ QueryScore(pkg.id, query["q"][0]).increase(1)
43
+
35
44
  return True
36
45
 
37
46
 
38
- def _path_has_score_for(path: str, pkg: model.Package) -> bool:
47
+ def _is_scoring_enabled_for_path(path: str, package: model.Package) -> bool:
48
+ """
49
+ Determine if a given URL path should have scoring enabled.
50
+
51
+ Checks if the provided path matches any of the following URL patterns that
52
+ support scoring functionality:
53
+
54
+ Args:
55
+ path: The URL path to check
56
+ package: The package object containing type and owner_org info
57
+
58
+ Returns:
59
+ True if the path should have scoring enabled, False otherwise
60
+ """
61
+
39
62
  path = path.rstrip("/")
63
+
40
64
  if path == tk.h.url_for("dataset.search").rstrip("/"):
41
65
  return True
42
66
 
43
67
  with contextlib.suppress(BuildError):
44
- if path == tk.h.url_for(pkg.type + ".search").rstrip("/"):
68
+ if path == tk.h.url_for(package.type + ".search").rstrip("/"):
45
69
  return True
46
70
 
47
- org = model.Group.get(pkg.owner_org)
71
+ org = model.Group.get(package.owner_org)
72
+
48
73
  if not org:
49
74
  return False
50
75
 
@@ -0,0 +1,75 @@
1
+ from __future__ import annotations
2
+
3
+ from ckanext.search_tweaks.config import prefer_boost
4
+
5
+ from . import QueryScore
6
+ from .config import get_min_boost, get_max_boost, get_max_boost_count
7
+
8
+
9
+ def build_boost_query_function(search_query: str) -> str | None:
10
+ """Build boost query function for given search query.
11
+
12
+ Args:
13
+ search_query: normalized query
14
+
15
+ Returns:
16
+ Boost function
17
+ """
18
+ boosts, max_score = get_boost_values(search_query)
19
+ min_boost = get_min_boost()
20
+ max_boost = get_max_boost()
21
+
22
+ if prefer_boost():
23
+ boost_expr = "1"
24
+
25
+ for pkg_id, raw_score in sorted(boosts.items(), reverse=True):
26
+ scaled = scale_score(raw_score, max_score, min_boost, max_boost)
27
+ boost_expr = f'if(eq(id,"{pkg_id}"),{scaled},{boost_expr})'
28
+
29
+ return f"sum(0, {boost_expr})"
30
+ else:
31
+ boost_parts = []
32
+
33
+ for pkg_id, raw_score in boosts.items():
34
+ score = scale_score(raw_score, max_score, min_boost, max_boost)
35
+
36
+ boost_parts.append(f'if(eq(id,"{pkg_id}"),{score},0)')
37
+
38
+ return f"sum(1,{','.join(boost_parts)})"
39
+
40
+
41
+ def get_boost_values(search_query: str) -> tuple[dict[str, float], float]:
42
+ boosts = {}
43
+ max_score = 0
44
+
45
+ for entry in QueryScore.get_for_query(search_query):
46
+ package_id, score = entry
47
+
48
+ if score > max_score:
49
+ max_score = score
50
+
51
+ boosts[package_id.decode("utf-8")] = score
52
+
53
+ return boosts, max_score
54
+
55
+
56
+ def scale_score(
57
+ value: float,
58
+ max_value: float,
59
+ min_boost: float,
60
+ max_boost: float,
61
+ ) -> float:
62
+ """
63
+ Linearly scales a value to the range [min_boost, max_boost].
64
+
65
+ This prevents datasets with high scores
66
+ from overpowering search relevance, ensuring more balanced results.
67
+ """
68
+ if max_value == 0:
69
+ return min_boost
70
+
71
+ value = max(0, min(value, max_value))
72
+
73
+ scaled = min_boost + (value / max_value) * (max_boost - min_boost)
74
+
75
+ return round(scaled, 4)
@@ -3,16 +3,15 @@ from __future__ import annotations
3
3
  import csv
4
4
  import datetime
5
5
  import logging
6
+ from typing import TextIO
7
+
6
8
  import click
7
9
  import freezegun
8
10
 
9
11
  import ckan.model as model
10
- from ckan.lib.redis import connect_to_redis
11
- from ckan.lib.search import rebuild
12
12
 
13
13
  from . import QueryScore
14
14
 
15
- _search_csv_headers = ["package_id", "search_query", "count_of_hits"]
16
15
 
17
16
  log = logging.getLogger(__name__)
18
17
 
@@ -25,84 +24,47 @@ def query():
25
24
  @query.command("import")
26
25
  @click.argument("source", type=click.File())
27
26
  @click.option("--date", type=datetime.date.fromisoformat)
28
- def import_source(source, date):
27
+ def import_source(source: TextIO, date) -> None:
29
28
  """Import search stats from source"""
30
29
  if not date:
31
30
  date = datetime.date.today()
31
+
32
32
  with freezegun.freeze_time(date):
33
33
  reader = csv.DictReader(source)
34
34
  for row in reader:
35
35
  pkg = model.Package.get(row["package_id"])
36
+
36
37
  if not pkg:
37
38
  click.secho(f"Package {row['package_id']} does not exists", fg="red")
38
39
  continue
40
+
39
41
  score = QueryScore(pkg.id, row["search_query"])
40
42
  score.reset()
41
43
  score.increase(int(row["count_of_hits"]))
44
+
42
45
  click.secho("Done", fg="green")
43
46
 
44
47
 
45
48
  @query.command()
46
49
  @click.argument("output", type=click.File("w"), required=False)
47
- def export(output):
50
+ def export(output: TextIO | None) -> None:
48
51
  """Export search stats into specified file."""
49
52
  rows = QueryScore.get_all()
53
+
50
54
  if output:
51
55
  writer = csv.writer(output)
52
- writer.writerow(_search_csv_headers)
56
+ writer.writerow(["package_id", "search_query", "count_of_hits"])
53
57
  writer.writerows(rows)
54
58
  else:
55
59
  for row in rows:
56
- click.echo("Id: %s, query: %s, count: %d" % row)
57
- click.secho("Done", fg="green")
58
-
60
+ click.echo("ID: {}, query: {}, count: {}".format(*row))
59
61
 
60
- @query.command()
61
- def align():
62
- """Remove old records."""
63
- rows = QueryScore.get_all()
64
- for id_, query, _ in rows:
65
- score = QueryScore(id_, query)
66
- score.align()
62
+ click.secho("Done", fg="green")
67
63
 
68
64
 
69
65
  @query.command()
70
- @click.option("--days", "-d", type=int, default=1)
71
- @click.argument("file")
72
- @click.pass_context
73
- def safe_export(ctx, days, file):
74
- """Export stats if redis haven't been reloaded recently.
75
-
76
- If redis runs less than N days, it was reloaded recently and contains no
77
- stats. We have to import old snapshot into it.
78
-
79
- If redis is up for N days and more, it contains relevant stats. We can
80
- safely export them and overwrite old snapshot.
81
-
82
- """
83
- conn = connect_to_redis()
84
- uptime = conn.info()["uptime_in_days"]
85
- if uptime >= days:
86
- click.secho(f"Redis runs for {uptime} days. Creating snapshot..", fg="green")
87
- ctx.invoke(export, output=click.File("w")(file))
88
- else:
89
- click.secho(
90
- f"Redis runs for {uptime} days. Restore stats from snapshot..",
91
- fg="red",
92
- )
93
- ctx.invoke(import_source, source=click.File()(file))
66
+ def reset() -> None:
67
+ """Reset query relevance scores"""
68
+ QueryScore.reset_all()
94
69
 
95
-
96
- @query.command()
97
- def index():
98
- """Re-index datasets that have query relevance scores.
99
- """
100
-
101
- storage = QueryScore.default_storage_class()
102
- ids = {id for id, _, _ in storage.scan()}
103
- with click.progressbar(ids) as bar:
104
- for id in bar:
105
- try:
106
- rebuild(id)
107
- except Exception:
108
- log.exception("Cannot index %s", id)
70
+ click.secho("Done", fg="green")
@@ -0,0 +1,29 @@
1
+ import ckan.plugins.toolkit as tk
2
+
3
+ CONF_MIN_BOOST = "ckanext.search_tweaks.query_relevance.min_boost"
4
+ CONF_MAX_BOOST = "ckanext.search_tweaks.query_relevance.max_boost"
5
+ CONF_MAX_BOOST_COUNT = "ckanext.search_tweaks.query_relevance.max_boost_count"
6
+
7
+
8
+ def get_min_boost() -> float:
9
+ return as_float(tk.config[CONF_MIN_BOOST])
10
+
11
+
12
+ def get_max_boost() -> float:
13
+ return as_float(tk.config[CONF_MAX_BOOST])
14
+
15
+
16
+ def get_max_boost_count() -> int:
17
+ return tk.config[CONF_MAX_BOOST_COUNT]
18
+
19
+
20
+ def as_float(number: str) -> float:
21
+ """Convert a string into a float.
22
+
23
+ Example:
24
+ assert as_float("1.5") == 1.5
25
+ """
26
+ try:
27
+ return float(number)
28
+ except (TypeError, ValueError):
29
+ raise ValueError("Bad float value: {}".format(number))
@@ -0,0 +1,16 @@
1
+ version: 1
2
+ groups:
3
+ - annotation: "ckanext-search-tweaks:query_relevance"
4
+ options:
5
+ - key: ckanext.search_tweaks.query_relevance.min_boost
6
+ default: 1
7
+ description: Minimum boost to apply to a query. Use float values
8
+
9
+ - key: ckanext.search_tweaks.query_relevance.max_boost
10
+ default: 1.5
11
+ description: Maximum boost to apply to a query. Use float values
12
+
13
+ - key: ckanext.search_tweaks.query_relevance.max_boost_count
14
+ type: int
15
+ default: 60
16
+ description: Maximum number of boosts to apply to a query
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from string import Template
4
3
  from typing import Any
5
4
 
6
5
  import ckan.plugins as plugins
@@ -10,15 +9,11 @@ from ckanext.search_tweaks.cli import attach_relevance_command
10
9
  from ckanext.search_tweaks.interfaces import ISearchTweaks
11
10
  from ckanext.search_tweaks.shared import feature_disabled
12
11
 
13
- from . import QueryScore, cli, normalize_query, update_score_by_url
14
-
15
- CONFIG_BOOST_STRING = "ckanext.search_tweaks.query_relevance.boost_function"
16
- CONFIG_RELEVANCE_PREFIX = "ckanext.search_tweaks.query_relevance.field_prefix"
17
-
18
- DEFAULT_BOOST_STRING = "scale(def($field,0),1,1.2)"
19
- DEFAULT_RELEVANCE_PREFIX = "query_relevance_"
12
+ from . import cli, normalize_query, update_score_by_url
13
+ from .boost import build_boost_query_function
20
14
 
21
15
 
16
+ @tk.blanket.config_declarations
22
17
  class QueryRelevancePlugin(plugins.SingletonPlugin):
23
18
  plugins.implements(plugins.IConfigurable)
24
19
  plugins.implements(plugins.IPackageController, inherit=True)
@@ -31,15 +26,6 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
31
26
 
32
27
  # IPackageController
33
28
 
34
- def before_dataset_index(self, pkg_dict):
35
- prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
36
-
37
- for _, query, score in QueryScore.get_for(pkg_dict["id"]):
38
- query = query.replace(" ", "_")
39
- pkg_dict[prefix + query] = score
40
-
41
- return pkg_dict
42
-
43
29
  def read(self, entity):
44
30
  # update search relevance only for WEB-requests. Any kind of
45
31
  # CLI/search-index manipulations has no effect on it
@@ -49,17 +35,12 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
49
35
  # ISearchTweaks
50
36
 
51
37
  def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
52
- if feature_disabled("query_boost", search_params) or not search_params.get("q"):
53
- return None
38
+ q = search_params.get("q")
54
39
 
55
- normalized = normalize_query(search_params["q"]).replace(" ", "_")
56
- if not normalized:
40
+ if feature_disabled("query_boost", search_params) or not q:
57
41
  return None
58
42
 
59
- prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
60
- field = prefix + normalized
61
- boost_string = Template(
62
- tk.config.get(CONFIG_BOOST_STRING, DEFAULT_BOOST_STRING),
63
- )
43
+ if normalized := normalize_query(q).replace(" ", "_"):
44
+ return build_boost_query_function(normalized)
64
45
 
65
- return boost_string.safe_substitute({"field": field})
46
+ return None
@@ -1,20 +1,5 @@
1
- from typing import Optional, Type
2
- import ckan.plugins.toolkit as tk
3
- from .storage import (
4
- PermanentRedisScoreStorage,
5
- DailyRedisScoreStorage,
6
- ScoreStorage,
7
- )
8
-
9
- _backends = {
10
- "redis-permanent": PermanentRedisScoreStorage,
11
- "redis-daily": DailyRedisScoreStorage,
12
- }
13
-
14
- CONFIG_BACKEND = "ckanext.search_tweaks.query_relevance.backend"
15
- DEFAULT_BACKEND = "redis-daily"
16
-
17
- DEFAULT_SCORE_STORAGE_CLASS = DailyRedisScoreStorage
1
+ from .storage import QueryHitTracker
2
+ from .config import get_max_boost_count
18
3
 
19
4
 
20
5
  def normalize_query(query: str) -> str:
@@ -26,50 +11,32 @@ def normalize_query(query: str) -> str:
26
11
 
27
12
 
28
13
  class QueryScore:
29
- storage_class: Type[ScoreStorage]
30
-
31
- def __init__(
32
- self,
33
- id_: str,
34
- query: str,
35
- *,
36
- normalize: bool = True,
37
- storage_class: Optional[Type[ScoreStorage]] = None,
38
- ):
14
+ def __init__(self, entity_id: str, query: str, normalize: bool = True):
39
15
  if normalize:
40
16
  query = normalize_query(query)
41
17
 
42
- if storage_class:
43
- self.storage_class = storage_class
44
- else:
45
- self.storage_class = self.default_storage_class()
46
- self.storage = self.storage_class(id_, query)
18
+ self.entity_id = entity_id
19
+ self.query = query
20
+
21
+ self.storage = QueryHitTracker(self.entity_id, self.query)
47
22
 
48
23
  def __int__(self):
49
24
  return self.storage.get()
50
25
 
51
- @staticmethod
52
- def default_storage_class() -> Type[ScoreStorage]:
53
- return _backends[tk.config.get(CONFIG_BACKEND, DEFAULT_BACKEND)]
54
-
55
- @property
56
- def query(self):
57
- return self.storage.query
58
-
59
- def increase(self, n: int) -> None:
60
- self.storage.inc(n)
61
-
62
- def align(self):
63
- self.storage.align()
26
+ def increase(self, amount: int) -> None:
27
+ self.storage.increase(amount)
64
28
 
65
29
  def reset(self):
66
- self.storage.reset()
30
+ self.storage.reset(self.query)
31
+
32
+ @classmethod
33
+ def get_for_query(cls, query: str, limit: int | None = None) -> list[tuple[bytes, float]]:
34
+ return QueryHitTracker.top(query, limit or get_max_boost_count())
67
35
 
68
36
  @classmethod
69
37
  def get_all(cls):
70
- storage = cls.default_storage_class()
71
- return storage.scan()
38
+ return QueryHitTracker.get_all()
72
39
 
73
40
  @classmethod
74
- def get_for(cls, id_: str):
75
- return cls.default_storage_class().scan(id_)
41
+ def reset_all(cls):
42
+ return QueryHitTracker.reset_all()