ckanext-search-tweaks 0.6.3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanext/__init__.py +0 -1
- ckanext/search_tweaks/advanced_search/assets/advanced-search.css +0 -4
- ckanext/search_tweaks/advanced_search/plugin.py +11 -0
- ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html +5 -3
- ckanext/search_tweaks/field_relevance/plugin.py +0 -2
- ckanext/search_tweaks/field_relevance/views.py +10 -5
- ckanext/search_tweaks/interfaces.py +1 -3
- ckanext/search_tweaks/plugin.py +0 -1
- ckanext/search_tweaks/query_popularity/logic/schema.py +1 -0
- ckanext/search_tweaks/query_popularity/score.py +2 -1
- ckanext/search_tweaks/query_relevance/__init__.py +38 -13
- ckanext/search_tweaks/query_relevance/boost.py +75 -0
- ckanext/search_tweaks/query_relevance/cli.py +16 -54
- ckanext/search_tweaks/query_relevance/config.py +29 -0
- ckanext/search_tweaks/query_relevance/config_declaration.yaml +16 -0
- ckanext/search_tweaks/query_relevance/plugin.py +8 -27
- ckanext/search_tweaks/query_relevance/score.py +17 -50
- ckanext/search_tweaks/query_relevance/storage.py +79 -137
- ckanext/search_tweaks/spellcheck/helpers.py +6 -2
- ckanext/search_tweaks/tests/conftest.py +13 -0
- ckanext/search_tweaks/tests/query_relevance/test_plugin.py +5 -5
- ckanext/search_tweaks/tests/query_relevance/test_search.py +84 -0
- ckanext/search_tweaks/tests/query_relevance/test_storage.py +23 -99
- ckanext/search_tweaks/tests/spellcheck/test_plugin.py +4 -2
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/METADATA +121 -123
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/RECORD +30 -26
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/WHEEL +1 -1
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/entry_points.txt +0 -0
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info/licenses}/LICENSE +0 -0
- {ckanext_search_tweaks-0.6.3.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/top_level.txt +0 -0
ckanext/__init__.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import json
|
4
4
|
from typing import Any
|
5
5
|
|
6
|
+
from ckan import types
|
6
7
|
import ckan.plugins as p
|
7
8
|
import ckan.plugins.toolkit as tk
|
8
9
|
from ckan.exceptions import CkanConfigurationException
|
@@ -55,6 +56,16 @@ def form_config():
|
|
55
56
|
}
|
56
57
|
|
57
58
|
|
59
|
+
@tk.side_effect_free
|
60
|
+
def advanced_search_config(
|
61
|
+
context: types.Context,
|
62
|
+
data_dict: dict[str, Any],
|
63
|
+
) -> dict[str, Any]:
|
64
|
+
"""Configuration for advanced search fields."""
|
65
|
+
return tk.h.advanced_search_form_config()
|
66
|
+
|
67
|
+
|
68
|
+
@tk.blanket.actions({"advanced_search_config": advanced_search_config})
|
58
69
|
class AdvancedSearchPlugin(p.SingletonPlugin):
|
59
70
|
p.implements(p.IConfigurer, inherit=True)
|
60
71
|
p.implements(p.IConfigurable)
|
@@ -39,7 +39,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
|
|
39
39
|
<div class="input-group search-input-group">
|
40
40
|
<input type="text" class="form-control input-lg"
|
41
41
|
name="q" value="{{ query }}" autocomplete="off"
|
42
|
-
placeholder="{{ placeholder }}"
|
42
|
+
placeholder="{{ placeholder }}"
|
43
|
+
aria-label="Search data">
|
43
44
|
{{ search_button }}
|
44
45
|
</div>
|
45
46
|
{% endset %}
|
@@ -50,7 +51,8 @@ solr_syntax_blank - open SOLR syntax article in the new tab
|
|
50
51
|
<input type="text" class="form-control input-lg"
|
51
52
|
value="{{ request.args.ext_solr_q }}" autocomplete="off"
|
52
53
|
name="ext_solr_q" disabled
|
53
|
-
placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}"
|
54
|
+
placeholder="{{ labels.solr_placeholder or _('metadata_created:[NOW/YEAR TO *] -tags:Health') }}"
|
55
|
+
aria-label="Search using Solr query">
|
54
56
|
{{ search_button }}
|
55
57
|
</div>
|
56
58
|
{% endset %}
|
@@ -95,7 +97,7 @@ solr_syntax_blank - open SOLR syntax article in the new tab
|
|
95
97
|
{{ labels.solr_toggle or _('Add query syntax to search') }}
|
96
98
|
</label>
|
97
99
|
{% block solr_syntax_url %}
|
98
|
-
<a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}">
|
100
|
+
<a {% if solr_syntax_blank %}target="_blank"{% endif %} href="{{ solr_syntax_url }}" aria-label="Learn more about SOLR query parameters">
|
99
101
|
<i class="fa fa-info-circle" data-placement="top" data-toggle="tooltip"
|
100
102
|
title="{{ labels.solr_explanation or _('This adds SOLR query language, for more information on how to use click here') }}"></i>
|
101
103
|
</a>
|
@@ -8,8 +8,6 @@ import ckan.plugins.toolkit as tk
|
|
8
8
|
from ckanext.search_tweaks.interfaces import ISearchTweaks
|
9
9
|
from ckanext.search_tweaks.shared import feature_disabled
|
10
10
|
|
11
|
-
from . import views
|
12
|
-
|
13
11
|
CONFIG_BOOST_FN = "ckanext.search_tweaks.field_relevance.boost_function"
|
14
12
|
|
15
13
|
DEFAULT_BOOST_FN = None
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
5
5
|
from flask import Blueprint
|
6
6
|
from flask.views import MethodView
|
7
7
|
|
8
|
+
import ckan.types as types
|
8
9
|
import ckan.model as model
|
9
10
|
import ckan.plugins.toolkit as tk
|
10
11
|
|
@@ -46,7 +47,8 @@ class PromoteView(MethodView):
|
|
46
47
|
tk.get_validator("convert_int"),
|
47
48
|
tk.get_validator("int_validator"),
|
48
49
|
tk.get_validator("limit_to_configured_maximum")(
|
49
|
-
CONFIG_MAX_PROMOTION,
|
50
|
+
CONFIG_MAX_PROMOTION,
|
51
|
+
DEFAULT_MAX_PROMOTION,
|
50
52
|
),
|
51
53
|
],
|
52
54
|
}
|
@@ -54,14 +56,15 @@ class PromoteView(MethodView):
|
|
54
56
|
data, errors = tk.navl_validate(
|
55
57
|
dict(tk.request.form),
|
56
58
|
schema,
|
57
|
-
|
59
|
+
types.Context(model=model, session=model.Session), # type: ignore
|
58
60
|
)
|
59
61
|
|
60
62
|
if errors:
|
61
63
|
return self.get(id, data, errors)
|
62
64
|
try:
|
63
65
|
pkg_dict = tk.get_action("package_patch")(
|
64
|
-
{},
|
66
|
+
{},
|
67
|
+
{"id": id, field: data[field]},
|
65
68
|
)
|
66
69
|
except tk.ValidationError as e:
|
67
70
|
for k, v in e.error_summary.items():
|
@@ -86,7 +89,6 @@ class PromoteView(MethodView):
|
|
86
89
|
"min_promotion": tk.asint(
|
87
90
|
tk.config.get(CONFIG_MIN_PROMOTION, DEFAULT_MIN_PROMOTION),
|
88
91
|
),
|
89
|
-
|
90
92
|
"max_promotion": tk.asint(
|
91
93
|
tk.config.get(CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION),
|
92
94
|
),
|
@@ -99,4 +101,7 @@ class PromoteView(MethodView):
|
|
99
101
|
if tk.asbool(
|
100
102
|
tk.config.get(CONFIG_ENABLE_PROMOTION_ROUTE, DEFAULT_ENABLE_PROMOTION_ROUTE),
|
101
103
|
):
|
102
|
-
field_relevance.add_url_rule(
|
104
|
+
field_relevance.add_url_rule(
|
105
|
+
"/dataset/promote/<id>",
|
106
|
+
view_func=PromoteView.as_view("promote"),
|
107
|
+
)
|
ckanext/search_tweaks/plugin.py
CHANGED
@@ -13,38 +13,63 @@ from .score import QueryScore, normalize_query
|
|
13
13
|
__all__ = ["QueryScore", "normalize_query", "update_score_by_url"]
|
14
14
|
|
15
15
|
|
16
|
-
def update_score_by_url(pkg: model.Package,
|
17
|
-
"""
|
18
|
-
if tk.request:
|
19
|
-
ref = ref or tk.request.referrer
|
16
|
+
def update_score_by_url(pkg: model.Package, referrer: str | None = None) -> bool:
|
17
|
+
"""Boost the relevance of the given package for the current search query
|
20
18
|
|
21
|
-
|
19
|
+
Args:
|
20
|
+
pkg: the package to boost
|
21
|
+
referrer: the URL of the current request
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
True if the package was boosted, False otherwise
|
25
|
+
"""
|
26
|
+
|
27
|
+
referrer = referrer or (tk.request.referrer if tk.request else None)
|
28
|
+
|
29
|
+
if not referrer:
|
22
30
|
return False
|
23
31
|
|
24
|
-
url = urlparse(
|
25
|
-
|
32
|
+
url = urlparse(referrer)
|
33
|
+
|
34
|
+
if not _is_scoring_enabled_for_path(url.path, pkg):
|
26
35
|
return False
|
27
36
|
|
28
37
|
query = parse_qs(url.query.lstrip("?"))
|
38
|
+
|
29
39
|
if "q" not in query:
|
30
40
|
return False
|
31
|
-
q = query["q"][0]
|
32
41
|
|
33
|
-
|
34
|
-
|
42
|
+
QueryScore(pkg.id, query["q"][0]).increase(1)
|
43
|
+
|
35
44
|
return True
|
36
45
|
|
37
46
|
|
38
|
-
def
|
47
|
+
def _is_scoring_enabled_for_path(path: str, package: model.Package) -> bool:
|
48
|
+
"""
|
49
|
+
Determine if a given URL path should have scoring enabled.
|
50
|
+
|
51
|
+
Checks if the provided path matches any of the following URL patterns that
|
52
|
+
support scoring functionality:
|
53
|
+
|
54
|
+
Args:
|
55
|
+
path: The URL path to check
|
56
|
+
package: The package object containing type and owner_org info
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
True if the path should have scoring enabled, False otherwise
|
60
|
+
"""
|
61
|
+
|
39
62
|
path = path.rstrip("/")
|
63
|
+
|
40
64
|
if path == tk.h.url_for("dataset.search").rstrip("/"):
|
41
65
|
return True
|
42
66
|
|
43
67
|
with contextlib.suppress(BuildError):
|
44
|
-
if path == tk.h.url_for(
|
68
|
+
if path == tk.h.url_for(package.type + ".search").rstrip("/"):
|
45
69
|
return True
|
46
70
|
|
47
|
-
org = model.Group.get(
|
71
|
+
org = model.Group.get(package.owner_org)
|
72
|
+
|
48
73
|
if not org:
|
49
74
|
return False
|
50
75
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from ckanext.search_tweaks.config import prefer_boost
|
4
|
+
|
5
|
+
from . import QueryScore
|
6
|
+
from .config import get_min_boost, get_max_boost, get_max_boost_count
|
7
|
+
|
8
|
+
|
9
|
+
def build_boost_query_function(search_query: str) -> str | None:
|
10
|
+
"""Build boost query function for given search query.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
search_query: normalized query
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
Boost function
|
17
|
+
"""
|
18
|
+
boosts, max_score = get_boost_values(search_query)
|
19
|
+
min_boost = get_min_boost()
|
20
|
+
max_boost = get_max_boost()
|
21
|
+
|
22
|
+
if prefer_boost():
|
23
|
+
boost_expr = "1"
|
24
|
+
|
25
|
+
for pkg_id, raw_score in sorted(boosts.items(), reverse=True):
|
26
|
+
scaled = scale_score(raw_score, max_score, min_boost, max_boost)
|
27
|
+
boost_expr = f'if(eq(id,"{pkg_id}"),{scaled},{boost_expr})'
|
28
|
+
|
29
|
+
return f"sum(0, {boost_expr})"
|
30
|
+
else:
|
31
|
+
boost_parts = []
|
32
|
+
|
33
|
+
for pkg_id, raw_score in boosts.items():
|
34
|
+
score = scale_score(raw_score, max_score, min_boost, max_boost)
|
35
|
+
|
36
|
+
boost_parts.append(f'if(eq(id,"{pkg_id}"),{score},0)')
|
37
|
+
|
38
|
+
return f"sum(1,{','.join(boost_parts)})"
|
39
|
+
|
40
|
+
|
41
|
+
def get_boost_values(search_query: str) -> tuple[dict[str, float], float]:
|
42
|
+
boosts = {}
|
43
|
+
max_score = 0
|
44
|
+
|
45
|
+
for entry in QueryScore.get_for_query(search_query):
|
46
|
+
package_id, score = entry
|
47
|
+
|
48
|
+
if score > max_score:
|
49
|
+
max_score = score
|
50
|
+
|
51
|
+
boosts[package_id.decode("utf-8")] = score
|
52
|
+
|
53
|
+
return boosts, max_score
|
54
|
+
|
55
|
+
|
56
|
+
def scale_score(
|
57
|
+
value: float,
|
58
|
+
max_value: float,
|
59
|
+
min_boost: float,
|
60
|
+
max_boost: float,
|
61
|
+
) -> float:
|
62
|
+
"""
|
63
|
+
Linearly scales a value to the range [min_boost, max_boost].
|
64
|
+
|
65
|
+
This prevents datasets with high scores
|
66
|
+
from overpowering search relevance, ensuring more balanced results.
|
67
|
+
"""
|
68
|
+
if max_value == 0:
|
69
|
+
return min_boost
|
70
|
+
|
71
|
+
value = max(0, min(value, max_value))
|
72
|
+
|
73
|
+
scaled = min_boost + (value / max_value) * (max_boost - min_boost)
|
74
|
+
|
75
|
+
return round(scaled, 4)
|
@@ -3,16 +3,15 @@ from __future__ import annotations
|
|
3
3
|
import csv
|
4
4
|
import datetime
|
5
5
|
import logging
|
6
|
+
from typing import TextIO
|
7
|
+
|
6
8
|
import click
|
7
9
|
import freezegun
|
8
10
|
|
9
11
|
import ckan.model as model
|
10
|
-
from ckan.lib.redis import connect_to_redis
|
11
|
-
from ckan.lib.search import rebuild
|
12
12
|
|
13
13
|
from . import QueryScore
|
14
14
|
|
15
|
-
_search_csv_headers = ["package_id", "search_query", "count_of_hits"]
|
16
15
|
|
17
16
|
log = logging.getLogger(__name__)
|
18
17
|
|
@@ -25,84 +24,47 @@ def query():
|
|
25
24
|
@query.command("import")
|
26
25
|
@click.argument("source", type=click.File())
|
27
26
|
@click.option("--date", type=datetime.date.fromisoformat)
|
28
|
-
def import_source(source, date):
|
27
|
+
def import_source(source: TextIO, date) -> None:
|
29
28
|
"""Import search stats from source"""
|
30
29
|
if not date:
|
31
30
|
date = datetime.date.today()
|
31
|
+
|
32
32
|
with freezegun.freeze_time(date):
|
33
33
|
reader = csv.DictReader(source)
|
34
34
|
for row in reader:
|
35
35
|
pkg = model.Package.get(row["package_id"])
|
36
|
+
|
36
37
|
if not pkg:
|
37
38
|
click.secho(f"Package {row['package_id']} does not exists", fg="red")
|
38
39
|
continue
|
40
|
+
|
39
41
|
score = QueryScore(pkg.id, row["search_query"])
|
40
42
|
score.reset()
|
41
43
|
score.increase(int(row["count_of_hits"]))
|
44
|
+
|
42
45
|
click.secho("Done", fg="green")
|
43
46
|
|
44
47
|
|
45
48
|
@query.command()
|
46
49
|
@click.argument("output", type=click.File("w"), required=False)
|
47
|
-
def export(output):
|
50
|
+
def export(output: TextIO | None) -> None:
|
48
51
|
"""Export search stats into specified file."""
|
49
52
|
rows = QueryScore.get_all()
|
53
|
+
|
50
54
|
if output:
|
51
55
|
writer = csv.writer(output)
|
52
|
-
writer.writerow(
|
56
|
+
writer.writerow(["package_id", "search_query", "count_of_hits"])
|
53
57
|
writer.writerows(rows)
|
54
58
|
else:
|
55
59
|
for row in rows:
|
56
|
-
click.echo("
|
57
|
-
click.secho("Done", fg="green")
|
58
|
-
|
60
|
+
click.echo("ID: {}, query: {}, count: {}".format(*row))
|
59
61
|
|
60
|
-
|
61
|
-
def align():
|
62
|
-
"""Remove old records."""
|
63
|
-
rows = QueryScore.get_all()
|
64
|
-
for id_, query, _ in rows:
|
65
|
-
score = QueryScore(id_, query)
|
66
|
-
score.align()
|
62
|
+
click.secho("Done", fg="green")
|
67
63
|
|
68
64
|
|
69
65
|
@query.command()
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def safe_export(ctx, days, file):
|
74
|
-
"""Export stats if redis haven't been reloaded recently.
|
75
|
-
|
76
|
-
If redis runs less than N days, it was reloaded recently and contains no
|
77
|
-
stats. We have to import old snapshot into it.
|
78
|
-
|
79
|
-
If redis is up for N days and more, it contains relevant stats. We can
|
80
|
-
safely export them and overwrite old snapshot.
|
81
|
-
|
82
|
-
"""
|
83
|
-
conn = connect_to_redis()
|
84
|
-
uptime = conn.info()["uptime_in_days"]
|
85
|
-
if uptime >= days:
|
86
|
-
click.secho(f"Redis runs for {uptime} days. Creating snapshot..", fg="green")
|
87
|
-
ctx.invoke(export, output=click.File("w")(file))
|
88
|
-
else:
|
89
|
-
click.secho(
|
90
|
-
f"Redis runs for {uptime} days. Restore stats from snapshot..",
|
91
|
-
fg="red",
|
92
|
-
)
|
93
|
-
ctx.invoke(import_source, source=click.File()(file))
|
66
|
+
def reset() -> None:
|
67
|
+
"""Reset query relevance scores"""
|
68
|
+
QueryScore.reset_all()
|
94
69
|
|
95
|
-
|
96
|
-
@query.command()
|
97
|
-
def index():
|
98
|
-
"""Re-index datasets that have query relevance scores.
|
99
|
-
"""
|
100
|
-
|
101
|
-
storage = QueryScore.default_storage_class()
|
102
|
-
ids = {id for id, _, _ in storage.scan()}
|
103
|
-
with click.progressbar(ids) as bar:
|
104
|
-
for id in bar:
|
105
|
-
try:
|
106
|
-
rebuild(id)
|
107
|
-
except Exception:
|
108
|
-
log.exception("Cannot index %s", id)
|
70
|
+
click.secho("Done", fg="green")
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import ckan.plugins.toolkit as tk
|
2
|
+
|
3
|
+
CONF_MIN_BOOST = "ckanext.search_tweaks.query_relevance.min_boost"
|
4
|
+
CONF_MAX_BOOST = "ckanext.search_tweaks.query_relevance.max_boost"
|
5
|
+
CONF_MAX_BOOST_COUNT = "ckanext.search_tweaks.query_relevance.max_boost_count"
|
6
|
+
|
7
|
+
|
8
|
+
def get_min_boost() -> float:
|
9
|
+
return as_float(tk.config[CONF_MIN_BOOST])
|
10
|
+
|
11
|
+
|
12
|
+
def get_max_boost() -> float:
|
13
|
+
return as_float(tk.config[CONF_MAX_BOOST])
|
14
|
+
|
15
|
+
|
16
|
+
def get_max_boost_count() -> int:
|
17
|
+
return tk.config[CONF_MAX_BOOST_COUNT]
|
18
|
+
|
19
|
+
|
20
|
+
def as_float(number: str) -> float:
|
21
|
+
"""Convert a string into a float.
|
22
|
+
|
23
|
+
Example:
|
24
|
+
assert as_float("1.5") == 1.5
|
25
|
+
"""
|
26
|
+
try:
|
27
|
+
return float(number)
|
28
|
+
except (TypeError, ValueError):
|
29
|
+
raise ValueError("Bad float value: {}".format(number))
|
@@ -0,0 +1,16 @@
|
|
1
|
+
version: 1
|
2
|
+
groups:
|
3
|
+
- annotation: "ckanext-search-tweaks:query_relevance"
|
4
|
+
options:
|
5
|
+
- key: ckanext.search_tweaks.query_relevance.min_boost
|
6
|
+
default: 1
|
7
|
+
description: Minimum boost to apply to a query. Use float values
|
8
|
+
|
9
|
+
- key: ckanext.search_tweaks.query_relevance.max_boost
|
10
|
+
default: 1.5
|
11
|
+
description: Maximum boost to apply to a query. Use float values
|
12
|
+
|
13
|
+
- key: ckanext.search_tweaks.query_relevance.max_boost_count
|
14
|
+
type: int
|
15
|
+
default: 60
|
16
|
+
description: Maximum number of boosts to apply to a query
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from string import Template
|
4
3
|
from typing import Any
|
5
4
|
|
6
5
|
import ckan.plugins as plugins
|
@@ -10,15 +9,11 @@ from ckanext.search_tweaks.cli import attach_relevance_command
|
|
10
9
|
from ckanext.search_tweaks.interfaces import ISearchTweaks
|
11
10
|
from ckanext.search_tweaks.shared import feature_disabled
|
12
11
|
|
13
|
-
from . import
|
14
|
-
|
15
|
-
CONFIG_BOOST_STRING = "ckanext.search_tweaks.query_relevance.boost_function"
|
16
|
-
CONFIG_RELEVANCE_PREFIX = "ckanext.search_tweaks.query_relevance.field_prefix"
|
17
|
-
|
18
|
-
DEFAULT_BOOST_STRING = "scale(def($field,0),1,1.2)"
|
19
|
-
DEFAULT_RELEVANCE_PREFIX = "query_relevance_"
|
12
|
+
from . import cli, normalize_query, update_score_by_url
|
13
|
+
from .boost import build_boost_query_function
|
20
14
|
|
21
15
|
|
16
|
+
@tk.blanket.config_declarations
|
22
17
|
class QueryRelevancePlugin(plugins.SingletonPlugin):
|
23
18
|
plugins.implements(plugins.IConfigurable)
|
24
19
|
plugins.implements(plugins.IPackageController, inherit=True)
|
@@ -31,15 +26,6 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
|
|
31
26
|
|
32
27
|
# IPackageController
|
33
28
|
|
34
|
-
def before_dataset_index(self, pkg_dict):
|
35
|
-
prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
|
36
|
-
|
37
|
-
for _, query, score in QueryScore.get_for(pkg_dict["id"]):
|
38
|
-
query = query.replace(" ", "_")
|
39
|
-
pkg_dict[prefix + query] = score
|
40
|
-
|
41
|
-
return pkg_dict
|
42
|
-
|
43
29
|
def read(self, entity):
|
44
30
|
# update search relevance only for WEB-requests. Any kind of
|
45
31
|
# CLI/search-index manipulations has no effect on it
|
@@ -49,17 +35,12 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
|
|
49
35
|
# ISearchTweaks
|
50
36
|
|
51
37
|
def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
|
52
|
-
|
53
|
-
return None
|
38
|
+
q = search_params.get("q")
|
54
39
|
|
55
|
-
|
56
|
-
if not normalized:
|
40
|
+
if feature_disabled("query_boost", search_params) or not q:
|
57
41
|
return None
|
58
42
|
|
59
|
-
|
60
|
-
|
61
|
-
boost_string = Template(
|
62
|
-
tk.config.get(CONFIG_BOOST_STRING, DEFAULT_BOOST_STRING),
|
63
|
-
)
|
43
|
+
if normalized := normalize_query(q).replace(" ", "_"):
|
44
|
+
return build_boost_query_function(normalized)
|
64
45
|
|
65
|
-
return
|
46
|
+
return None
|
@@ -1,20 +1,5 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
from .storage import (
|
4
|
-
PermanentRedisScoreStorage,
|
5
|
-
DailyRedisScoreStorage,
|
6
|
-
ScoreStorage,
|
7
|
-
)
|
8
|
-
|
9
|
-
_backends = {
|
10
|
-
"redis-permanent": PermanentRedisScoreStorage,
|
11
|
-
"redis-daily": DailyRedisScoreStorage,
|
12
|
-
}
|
13
|
-
|
14
|
-
CONFIG_BACKEND = "ckanext.search_tweaks.query_relevance.backend"
|
15
|
-
DEFAULT_BACKEND = "redis-daily"
|
16
|
-
|
17
|
-
DEFAULT_SCORE_STORAGE_CLASS = DailyRedisScoreStorage
|
1
|
+
from .storage import QueryHitTracker
|
2
|
+
from .config import get_max_boost_count
|
18
3
|
|
19
4
|
|
20
5
|
def normalize_query(query: str) -> str:
|
@@ -26,50 +11,32 @@ def normalize_query(query: str) -> str:
|
|
26
11
|
|
27
12
|
|
28
13
|
class QueryScore:
|
29
|
-
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
id_: str,
|
34
|
-
query: str,
|
35
|
-
*,
|
36
|
-
normalize: bool = True,
|
37
|
-
storage_class: Optional[Type[ScoreStorage]] = None,
|
38
|
-
):
|
14
|
+
def __init__(self, entity_id: str, query: str, normalize: bool = True):
|
39
15
|
if normalize:
|
40
16
|
query = normalize_query(query)
|
41
17
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
self.storage = self.storage_class(id_, query)
|
18
|
+
self.entity_id = entity_id
|
19
|
+
self.query = query
|
20
|
+
|
21
|
+
self.storage = QueryHitTracker(self.entity_id, self.query)
|
47
22
|
|
48
23
|
def __int__(self):
|
49
24
|
return self.storage.get()
|
50
25
|
|
51
|
-
|
52
|
-
|
53
|
-
return _backends[tk.config.get(CONFIG_BACKEND, DEFAULT_BACKEND)]
|
54
|
-
|
55
|
-
@property
|
56
|
-
def query(self):
|
57
|
-
return self.storage.query
|
58
|
-
|
59
|
-
def increase(self, n: int) -> None:
|
60
|
-
self.storage.inc(n)
|
61
|
-
|
62
|
-
def align(self):
|
63
|
-
self.storage.align()
|
26
|
+
def increase(self, amount: int) -> None:
|
27
|
+
self.storage.increase(amount)
|
64
28
|
|
65
29
|
def reset(self):
|
66
|
-
self.storage.reset()
|
30
|
+
self.storage.reset(self.query)
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def get_for_query(cls, query: str, limit: int | None = None) -> list[tuple[bytes, float]]:
|
34
|
+
return QueryHitTracker.top(query, limit or get_max_boost_count())
|
67
35
|
|
68
36
|
@classmethod
|
69
37
|
def get_all(cls):
|
70
|
-
|
71
|
-
return storage.scan()
|
38
|
+
return QueryHitTracker.get_all()
|
72
39
|
|
73
40
|
@classmethod
|
74
|
-
def
|
75
|
-
return
|
41
|
+
def reset_all(cls):
|
42
|
+
return QueryHitTracker.reset_all()
|