ckanext-search-tweaks 0.4.12__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanext/search_tweaks/__init__.py +1 -17
- ckanext/search_tweaks/advanced_search/plugin.py +16 -10
- ckanext/search_tweaks/cli.py +3 -3
- ckanext/search_tweaks/config.py +37 -0
- ckanext/search_tweaks/field_relevance/plugin.py +7 -10
- ckanext/search_tweaks/field_relevance/views.py +12 -19
- ckanext/search_tweaks/interfaces.py +16 -9
- ckanext/search_tweaks/plugin.py +19 -44
- ckanext/search_tweaks/query_popularity/__init__.py +0 -0
- ckanext/search_tweaks/query_popularity/config.py +30 -0
- ckanext/search_tweaks/query_popularity/logic/__init__.py +0 -0
- ckanext/search_tweaks/query_popularity/logic/action.py +43 -0
- ckanext/search_tweaks/query_popularity/logic/auth.py +23 -0
- ckanext/search_tweaks/query_popularity/plugin.py +47 -0
- ckanext/search_tweaks/query_popularity/score.py +165 -0
- ckanext/search_tweaks/query_relevance/__init__.py +1 -2
- ckanext/search_tweaks/query_relevance/cli.py +3 -7
- ckanext/search_tweaks/query_relevance/plugin.py +18 -24
- ckanext/search_tweaks/query_relevance/score.py +1 -1
- ckanext/search_tweaks/query_relevance/storage.py +7 -14
- ckanext/search_tweaks/shared.py +13 -0
- ckanext/search_tweaks/spellcheck/helpers.py +15 -23
- ckanext/search_tweaks/spellcheck/plugin.py +1 -1
- ckanext/search_tweaks/tests/query_relevance/test_plugin.py +2 -3
- ckanext/search_tweaks/tests/query_relevance/test_storage.py +4 -4
- ckanext/search_tweaks/tests/spellcheck/test_plugin.py +7 -15
- ckanext/search_tweaks/tests/test_plugin.py +21 -32
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/METADATA +5 -4
- ckanext_search_tweaks-0.6.0.dist-info/RECORD +52 -0
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/WHEEL +1 -1
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/entry_points.txt +1 -0
- ckanext_search_tweaks-0.4.12.dist-info/RECORD +0 -43
- /ckanext_search_tweaks-0.4.12-py3.10-nspkg.pth → /ckanext_search_tweaks-0.6.0-py3.8-nspkg.pth +0 -0
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/LICENSE +0 -0
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/namespace_packages.txt +0 -0
- {ckanext_search_tweaks-0.4.12.dist-info → ckanext_search_tweaks-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from collections import defaultdict
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
import logging
|
5
|
+
from hashlib import md5
|
6
|
+
from typing import Any, Iterable, cast
|
7
|
+
from operator import itemgetter
|
8
|
+
from ckan.lib.redis import connect_to_redis
|
9
|
+
import ckan.plugins.toolkit as tk
|
10
|
+
from redis import Redis
|
11
|
+
from . import config
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
connect_to_redis: Any
|
15
|
+
|
16
|
+
|
17
|
+
class Score:
|
18
|
+
redis: Redis[bytes]
|
19
|
+
date_format = "%Y-%m-%d %H-%M"
|
20
|
+
|
21
|
+
def __init__(self):
|
22
|
+
self.redis = connect_to_redis()
|
23
|
+
|
24
|
+
site = tk.config["ckan.site_id"]
|
25
|
+
self.prefix = f"{site}:search_tweaks:qp"
|
26
|
+
|
27
|
+
def export(self):
|
28
|
+
data: dict[bytes, dict[str, Any]] = {
|
29
|
+
hash: {"query": query, "records": []}
|
30
|
+
for hash, query in self.redis.hgetall(self.trans_key()).items()
|
31
|
+
}
|
32
|
+
for k, v in self.redis.hscan_iter(self.distribution_key()):
|
33
|
+
date_str, q_hash = k.split(b"/", 1)
|
34
|
+
try:
|
35
|
+
date = datetime.strptime(date_str.decode(), self.date_format)
|
36
|
+
except ValueError:
|
37
|
+
continue
|
38
|
+
|
39
|
+
data[q_hash]["records"].append({"date": date, "count": int(v)})
|
40
|
+
|
41
|
+
return list(data.values())
|
42
|
+
|
43
|
+
def save(self, q: str):
|
44
|
+
q = q.strip()
|
45
|
+
q_hash = self.hash(q)
|
46
|
+
|
47
|
+
if self.is_ignored(q_hash):
|
48
|
+
return
|
49
|
+
|
50
|
+
if self.is_throttling(q_hash):
|
51
|
+
return
|
52
|
+
|
53
|
+
self.redis.hset(self.trans_key(), q_hash, q)
|
54
|
+
|
55
|
+
date_stem = self.format_date_stem(self.now())
|
56
|
+
|
57
|
+
self.redis.hincrby(self.distribution_key(), f"{date_stem}/{q_hash}", 1)
|
58
|
+
|
59
|
+
def drop(self, q: str):
|
60
|
+
q_hash = self.hash(q)
|
61
|
+
dk = self.distribution_key()
|
62
|
+
|
63
|
+
series = self.redis.hscan_iter(dk, f"*/{q_hash}")
|
64
|
+
keys = list(map(itemgetter(0), series))
|
65
|
+
if keys:
|
66
|
+
self.redis.hdel(dk, *keys)
|
67
|
+
|
68
|
+
self.redis.hdel(self.trans_key(), q_hash)
|
69
|
+
self.redis.zrem(self.score_key(), q_hash)
|
70
|
+
|
71
|
+
def is_throttling(self, q_hash: str):
|
72
|
+
user = tk.current_user.name
|
73
|
+
|
74
|
+
throttle_key = f"{self.prefix}:throttle:{user}:{q_hash}"
|
75
|
+
if self.redis.exists(throttle_key):
|
76
|
+
return True
|
77
|
+
|
78
|
+
self.redis.set(throttle_key, 1, ex=config.throttle())
|
79
|
+
return False
|
80
|
+
|
81
|
+
def reset(self):
|
82
|
+
keys = self.redis.keys(f"{self.prefix}:*")
|
83
|
+
if keys:
|
84
|
+
self.redis.delete(*keys)
|
85
|
+
|
86
|
+
def refresh(self):
|
87
|
+
max_age = timedelta(seconds=config.max_age())
|
88
|
+
dk = self.distribution_key()
|
89
|
+
sk = self.score_key()
|
90
|
+
|
91
|
+
expired_dist: set[bytes] = set()
|
92
|
+
distribution = cast(
|
93
|
+
"Iterable[tuple[bytes, bytes]]",
|
94
|
+
self.redis.hscan_iter(dk),
|
95
|
+
)
|
96
|
+
|
97
|
+
scores: dict[bytes, float] = defaultdict(float)
|
98
|
+
|
99
|
+
for k, v in distribution:
|
100
|
+
date_str, q_hash = k.split(b"/", 1)
|
101
|
+
try:
|
102
|
+
date = datetime.strptime(date_str.decode(), self.date_format)
|
103
|
+
except ValueError:
|
104
|
+
log.error("Remove invalid key %s", k)
|
105
|
+
expired_dist.add(k)
|
106
|
+
continue
|
107
|
+
|
108
|
+
age = self.now() - date
|
109
|
+
|
110
|
+
if age > max_age:
|
111
|
+
expired_dist.add(k)
|
112
|
+
continue
|
113
|
+
|
114
|
+
scores[q_hash] += int(v) / (age.seconds // config.obsoletion_period() + 1)
|
115
|
+
|
116
|
+
if expired_dist:
|
117
|
+
self.redis.hdel(dk, *expired_dist)
|
118
|
+
|
119
|
+
expired_scores: set[bytes] = set()
|
120
|
+
for k, v in self.redis.zscan_iter(sk):
|
121
|
+
if k not in scores:
|
122
|
+
expired_scores.add(k)
|
123
|
+
continue
|
124
|
+
if scores:
|
125
|
+
self.redis.zadd(sk, cast(Any, scores))
|
126
|
+
|
127
|
+
if expired_scores:
|
128
|
+
self.redis.zrem(sk, *expired_scores)
|
129
|
+
self.redis.hdel(self.trans_key(), *expired_scores)
|
130
|
+
|
131
|
+
def hash(self, q: str):
|
132
|
+
return md5(q.encode()).hexdigest()
|
133
|
+
|
134
|
+
def is_ignored(self, q_hash: str):
|
135
|
+
return self.redis.sismember(self.ignore_key(), q_hash)
|
136
|
+
|
137
|
+
def ignore(self, q: str):
|
138
|
+
return self.redis.sadd(self.ignore_key(), self.hash(q))
|
139
|
+
|
140
|
+
def now(self):
|
141
|
+
return datetime.utcnow()
|
142
|
+
|
143
|
+
def score_key(self):
|
144
|
+
return f"{self.prefix}:score"
|
145
|
+
|
146
|
+
def trans_key(self):
|
147
|
+
return f"{self.prefix}:trans"
|
148
|
+
|
149
|
+
def ignore_key(self):
|
150
|
+
return f"{self.prefix}:ignore"
|
151
|
+
|
152
|
+
def distribution_key(self):
|
153
|
+
return f"{self.prefix}:distribution"
|
154
|
+
|
155
|
+
def format_date_stem(self, date: datetime):
|
156
|
+
return date.strftime(self.date_format)
|
157
|
+
|
158
|
+
def stats(self, num: int) -> Iterable[dict[str, Any]]:
|
159
|
+
scores: list[tuple[bytes, float]] = self.redis.zrange(
|
160
|
+
self.score_key(), 0, num - 1, desc=True, withscores=True
|
161
|
+
)
|
162
|
+
trans_key = self.trans_key()
|
163
|
+
|
164
|
+
for k, v in scores:
|
165
|
+
yield {"query": self.redis.hget(trans_key, k), "score": v}
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
import contextlib
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from urllib.parse import urlparse, parse_qs
|
6
5
|
|
@@ -14,7 +13,7 @@ from .score import QueryScore, normalize_query
|
|
14
13
|
__all__ = ["QueryScore", "normalize_query", "update_score_by_url"]
|
15
14
|
|
16
15
|
|
17
|
-
def update_score_by_url(pkg: model.Package, ref:
|
16
|
+
def update_score_by_url(pkg: model.Package, ref: str | None = None) -> bool:
|
18
17
|
"""Make given package more relevant for the current search query."""
|
19
18
|
if tk.request:
|
20
19
|
ref = ref or tk.request.referrer
|
@@ -29,9 +29,7 @@ def import_source(source, date):
|
|
29
29
|
for row in reader:
|
30
30
|
pkg = model.Package.get(row["package_id"])
|
31
31
|
if not pkg:
|
32
|
-
click.secho(
|
33
|
-
f"Package {row['package_id']} does not exists", fg="red"
|
34
|
-
)
|
32
|
+
click.secho(f"Package {row['package_id']} does not exists", fg="red")
|
35
33
|
continue
|
36
34
|
score = QueryScore(pkg.id, row["search_query"])
|
37
35
|
score.reset()
|
@@ -58,7 +56,7 @@ def export(output):
|
|
58
56
|
def align():
|
59
57
|
"""Remove old records."""
|
60
58
|
rows = QueryScore.get_all()
|
61
|
-
for
|
59
|
+
for id_, query, _ in rows:
|
62
60
|
score = QueryScore(id_, query)
|
63
61
|
score.align()
|
64
62
|
|
@@ -80,9 +78,7 @@ def safe_export(ctx, days, file):
|
|
80
78
|
conn = connect_to_redis()
|
81
79
|
uptime = conn.info()["uptime_in_days"]
|
82
80
|
if uptime >= days:
|
83
|
-
click.secho(
|
84
|
-
f"Redis runs for {uptime} days. Creating snapshot..", fg="green"
|
85
|
-
)
|
81
|
+
click.secho(f"Redis runs for {uptime} days. Creating snapshot..", fg="green")
|
86
82
|
ctx.invoke(export, output=click.File("w")(file))
|
87
83
|
else:
|
88
84
|
click.secho(
|
@@ -1,16 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
+
|
2
3
|
from string import Template
|
3
|
-
from typing import Any
|
4
|
+
from typing import Any
|
4
5
|
|
5
6
|
import ckan.plugins as plugins
|
6
7
|
import ckan.plugins.toolkit as tk
|
7
8
|
|
8
|
-
from . import
|
9
|
-
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from .. import feature_disabled
|
13
|
-
from . import cli
|
9
|
+
from ckanext.search_tweaks import feature_disabled
|
10
|
+
from ckanext.search_tweaks.cli import attach_relevance_command
|
11
|
+
from ckanext.search_tweaks.interfaces import ISearchTweaks
|
12
|
+
from . import QueryScore, cli, normalize_query, update_score_by_url
|
14
13
|
|
15
14
|
CONFIG_BOOST_STRING = "ckanext.search_tweaks.query_relevance.boost_function"
|
16
15
|
CONFIG_RELEVANCE_PREFIX = "ckanext.search_tweaks.query_relevance.field_prefix"
|
@@ -31,12 +30,10 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
|
|
31
30
|
|
32
31
|
# IPackageController
|
33
32
|
|
34
|
-
def
|
35
|
-
prefix = tk.config.get(
|
36
|
-
CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX
|
37
|
-
)
|
33
|
+
def before_dataset_index(self, pkg_dict):
|
34
|
+
prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
|
38
35
|
|
39
|
-
for
|
36
|
+
for _, query, score in QueryScore.get_for(pkg_dict["id"]):
|
40
37
|
query = query.replace(" ", "_")
|
41
38
|
pkg_dict[prefix + query] = score
|
42
39
|
|
@@ -50,31 +47,28 @@ class QueryRelevancePlugin(plugins.SingletonPlugin):
|
|
50
47
|
|
51
48
|
# ISearchTweaks
|
52
49
|
|
53
|
-
def get_search_boost_fn(
|
54
|
-
self, search_params: dict[str, Any]
|
55
|
-
) -> Optional[str]:
|
50
|
+
def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
|
56
51
|
if feature_disabled("query_boost", search_params):
|
57
|
-
return
|
52
|
+
return None
|
58
53
|
|
59
|
-
prefix = tk.config.get(
|
60
|
-
CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX
|
61
|
-
)
|
54
|
+
prefix = tk.config.get(CONFIG_RELEVANCE_PREFIX, DEFAULT_RELEVANCE_PREFIX)
|
62
55
|
disabled = tk.asbool(
|
63
56
|
search_params.get("extras", {}).get(
|
64
|
-
"ext_search_tweaks_disable_relevance",
|
65
|
-
|
57
|
+
"ext_search_tweaks_disable_relevance",
|
58
|
+
False,
|
59
|
+
),
|
66
60
|
)
|
67
61
|
|
68
62
|
if not search_params.get("q") or disabled:
|
69
|
-
return
|
63
|
+
return None
|
70
64
|
|
71
65
|
normalized = normalize_query(search_params["q"]).replace(" ", "_")
|
72
66
|
if not normalized:
|
73
|
-
return
|
67
|
+
return None
|
74
68
|
|
75
69
|
field = prefix + normalized
|
76
70
|
boost_string = Template(
|
77
|
-
tk.config.get(CONFIG_BOOST_STRING, DEFAULT_BOOST_STRING)
|
71
|
+
tk.config.get(CONFIG_BOOST_STRING, DEFAULT_BOOST_STRING),
|
78
72
|
)
|
79
73
|
|
80
74
|
return boost_string.safe_substitute({"field": field})
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
from abc import ABC, abstractclassmethod, abstractmethod
|
3
3
|
from datetime import date, timedelta
|
4
|
-
from typing import Any, Iterable,
|
4
|
+
from typing import Any, Iterable, cast, Tuple
|
5
5
|
|
6
6
|
import ckan.plugins.toolkit as tk
|
7
7
|
from ckan.lib.redis import connect_to_redis, Redis
|
@@ -37,7 +37,7 @@ class ScoreStorage(ABC):
|
|
37
37
|
|
38
38
|
@classmethod
|
39
39
|
@abstractclassmethod
|
40
|
-
def scan(cls, id_:
|
40
|
+
def scan(cls, id_: str | None = None) -> Iterable[ScanItem]:
|
41
41
|
"""Get all the scores."""
|
42
42
|
...
|
43
43
|
|
@@ -53,11 +53,10 @@ class ScoreStorage(ABC):
|
|
53
53
|
|
54
54
|
def align(self) -> None:
|
55
55
|
"""Make some cleanup in order to maintain fast and correct value."""
|
56
|
-
pass
|
57
56
|
|
58
57
|
|
59
58
|
class RedisScoreStorage(ScoreStorage):
|
60
|
-
_conn:
|
59
|
+
_conn: Redis | None = None
|
61
60
|
|
62
61
|
@property
|
63
62
|
def conn(self):
|
@@ -109,13 +108,10 @@ class PermanentRedisScoreStorage(RedisScoreStorage):
|
|
109
108
|
return f"{self._common_key_part()}:{self.id}"
|
110
109
|
|
111
110
|
@classmethod
|
112
|
-
def scan(cls, id_:
|
111
|
+
def scan(cls, id_: str | None = None) -> Iterable[ScanItem]:
|
113
112
|
conn = cls.connect()
|
114
113
|
common_key = cls._common_key_part()
|
115
|
-
if id_
|
116
|
-
pattern = f"{common_key}:{id_}"
|
117
|
-
else:
|
118
|
-
pattern = f"{common_key}:*"
|
114
|
+
pattern = f"{common_key}:{id_}" if id_ else f"{common_key}:*"
|
119
115
|
for key in conn.keys(pattern):
|
120
116
|
_, row_id = key.rsplit(b":", 1)
|
121
117
|
for query, score in conn.hgetall(key).items():
|
@@ -169,13 +165,10 @@ class DailyRedisScoreStorage(RedisScoreStorage):
|
|
169
165
|
return date.today().isoformat()
|
170
166
|
|
171
167
|
@classmethod
|
172
|
-
def scan(cls, id_:
|
168
|
+
def scan(cls, id_: str | None = None) -> Iterable[ScanItem]:
|
173
169
|
conn = cls.connect()
|
174
170
|
common_key = cls._common_key_part()
|
175
|
-
if id_
|
176
|
-
pattern = f"{common_key}:{id_}:*"
|
177
|
-
else:
|
178
|
-
pattern = f"{common_key}:*"
|
171
|
+
pattern = f"{common_key}:{id_}:*" if id_ else f"{common_key}:*"
|
179
172
|
for key in conn.keys(pattern):
|
180
173
|
_, id_, query = key.decode().rsplit(":", 2)
|
181
174
|
yield id_, query, cls(id_, query).get()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
import ckan.plugins.toolkit as tk
|
5
|
+
|
6
|
+
|
7
|
+
def feature_disabled(feature: str, search_params: dict[str, Any]) -> bool:
|
8
|
+
return tk.asbool(
|
9
|
+
search_params.get("extras", {}).get(
|
10
|
+
f"ext_search_tweaks_disable_{feature}",
|
11
|
+
False,
|
12
|
+
),
|
13
|
+
)
|
@@ -1,16 +1,12 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from functools import total_ordering
|
4
|
-
from typing import Any
|
4
|
+
from typing import Any
|
5
5
|
|
6
6
|
import ckan.plugins.toolkit as tk
|
7
7
|
from ckan.lib.search.common import make_connection
|
8
8
|
|
9
|
-
from . import
|
10
|
-
get_spellcheck_params,
|
11
|
-
CONFIG_SHOW_ONLY_MORE,
|
12
|
-
DEFAULT_SHOW_ONLY_MORE,
|
13
|
-
)
|
9
|
+
from . import CONFIG_SHOW_ONLY_MORE, DEFAULT_SHOW_ONLY_MORE, get_spellcheck_params
|
14
10
|
|
15
11
|
CONFIG_MAX_SUGGESTIONS = "ckanext.search_tweaks.spellcheck.max_suggestions"
|
16
12
|
CONFIG_SUGGESTION_FOR_SINGLE = (
|
@@ -28,7 +24,7 @@ def get_helpers():
|
|
28
24
|
|
29
25
|
|
30
26
|
def spellcheck_did_you_mean(
|
31
|
-
q: str, min_hits: int = 0, max_suggestions: int = None
|
27
|
+
q: str, min_hits: int = 0, max_suggestions: int = None,
|
32
28
|
) -> list[str]:
|
33
29
|
"""Return optimal query that can be used instead of the current one.
|
34
30
|
|
@@ -45,20 +41,18 @@ def spellcheck_did_you_mean(
|
|
45
41
|
spellcheck = _do_spellcheck(q)
|
46
42
|
|
47
43
|
show_only_more = tk.asbool(
|
48
|
-
tk.config.get(CONFIG_SHOW_ONLY_MORE, DEFAULT_SHOW_ONLY_MORE)
|
44
|
+
tk.config.get(CONFIG_SHOW_ONLY_MORE, DEFAULT_SHOW_ONLY_MORE),
|
49
45
|
)
|
50
46
|
if not show_only_more:
|
51
47
|
min_hits = -1
|
52
48
|
|
53
49
|
if not max_suggestions:
|
54
50
|
max_suggestions = tk.asint(
|
55
|
-
tk.config.get(CONFIG_MAX_SUGGESTIONS, DEFAULT_MAX_SUGGESTIONS)
|
51
|
+
tk.config.get(CONFIG_MAX_SUGGESTIONS, DEFAULT_MAX_SUGGESTIONS),
|
56
52
|
)
|
57
53
|
|
58
54
|
use_suggestion_for_single = tk.asbool(
|
59
|
-
tk.config.get(
|
60
|
-
CONFIG_SUGGESTION_FOR_SINGLE, DEFAULT_SUGGESTION_FOR_SINGLE
|
61
|
-
)
|
55
|
+
tk.config.get(CONFIG_SUGGESTION_FOR_SINGLE, DEFAULT_SUGGESTION_FOR_SINGLE),
|
62
56
|
)
|
63
57
|
terms = q.split()
|
64
58
|
if len(terms) == 1 and use_suggestion_for_single:
|
@@ -66,9 +60,7 @@ def spellcheck_did_you_mean(
|
|
66
60
|
return spellcheck.suggestions.get(terms[0], [])[:max_suggestions]
|
67
61
|
|
68
62
|
collations = [
|
69
|
-
str(c)
|
70
|
-
for c in spellcheck.best_collations(max_suggestions)
|
71
|
-
if min_hits < c
|
63
|
+
str(c) for c in spellcheck.best_collations(max_suggestions) if min_hits < c
|
72
64
|
]
|
73
65
|
|
74
66
|
if len(collations) < max_suggestions:
|
@@ -79,11 +71,7 @@ def spellcheck_did_you_mean(
|
|
79
71
|
|
80
72
|
# TODO: check min hits
|
81
73
|
new_q = " ".join(
|
82
|
-
[
|
83
|
-
spellcheck.suggestions[w][0]
|
84
|
-
for w in terms
|
85
|
-
if w in spellcheck.suggestions
|
86
|
-
]
|
74
|
+
[spellcheck.suggestions[w][0] for w in terms if w in spellcheck.suggestions],
|
87
75
|
)
|
88
76
|
if new_q:
|
89
77
|
collations.append(new_q)
|
@@ -142,13 +130,17 @@ class SpellcheckResult:
|
|
142
130
|
suggestions: dict[str, list[str]]
|
143
131
|
|
144
132
|
def __repr__(self):
|
145
|
-
return
|
133
|
+
return (
|
134
|
+
"<Spellcheck("
|
135
|
+
+ f"collations={self.collations}, suggestions={self.suggestions}"
|
136
|
+
+ ")>"
|
137
|
+
)
|
146
138
|
|
147
139
|
def __init__(self, collations: list[Any], suggestions: list[Any]):
|
148
140
|
self.collations = [Collation(item) for item in collations[1::2]]
|
149
141
|
self.suggestions = dict(
|
150
|
-
zip(suggestions[::2], [s["suggestion"] for s in suggestions[1::2]])
|
142
|
+
zip(suggestions[::2], [s["suggestion"] for s in suggestions[1::2]]),
|
151
143
|
)
|
152
144
|
|
153
|
-
def best_collations(self, n:
|
145
|
+
def best_collations(self, n: int | None = None) -> list[Collation]:
|
154
146
|
return sorted(self.collations, reverse=True)[:n]
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import pytest
|
2
2
|
|
3
3
|
import ckan.model as model
|
4
|
+
|
4
5
|
import ckanext.search_tweaks.query_relevance as relevance
|
5
6
|
|
6
7
|
|
@@ -45,9 +46,7 @@ class TestPathHasScore:
|
|
45
46
|
)
|
46
47
|
def test_group_referrer(self, path, has_score, monkeypatch):
|
47
48
|
pkg = model.Package(type="dataset")
|
48
|
-
monkeypatch.setattr(
|
49
|
-
model.Group, "get", lambda _: model.Group(name="valid")
|
50
|
-
)
|
49
|
+
monkeypatch.setattr(model.Group, "get", lambda _: model.Group(name="valid"))
|
51
50
|
assert relevance._path_has_score_for(path, pkg) is has_score
|
52
51
|
|
53
52
|
|
@@ -27,7 +27,7 @@ class TestStorages:
|
|
27
27
|
assert sorted(list(storage.scan())) == sorted(
|
28
28
|
[
|
29
29
|
("key", "query", 10),
|
30
|
-
]
|
30
|
+
],
|
31
31
|
)
|
32
32
|
|
33
33
|
s2 = storage("second key", "second query")
|
@@ -37,7 +37,7 @@ class TestStorages:
|
|
37
37
|
[
|
38
38
|
("key", "query", 100),
|
39
39
|
("second key", "second query", 5),
|
40
|
-
]
|
40
|
+
],
|
41
41
|
)
|
42
42
|
|
43
43
|
s3 = storage("key", "extra query")
|
@@ -47,14 +47,14 @@ class TestStorages:
|
|
47
47
|
("key", "query", 100),
|
48
48
|
("key", "extra query", 1),
|
49
49
|
("second key", "second query", 5),
|
50
|
-
]
|
50
|
+
],
|
51
51
|
)
|
52
52
|
|
53
53
|
assert sorted(list(storage.scan("key"))) == sorted(
|
54
54
|
[
|
55
55
|
("key", "query", 100),
|
56
56
|
("key", "extra query", 1),
|
57
|
-
]
|
57
|
+
],
|
58
58
|
)
|
59
59
|
|
60
60
|
def test_missing_key(self, storage):
|
@@ -6,24 +6,18 @@ from bs4 import BeautifulSoup
|
|
6
6
|
import ckan.plugins as p
|
7
7
|
import ckan.plugins.toolkit as tk
|
8
8
|
from ckan.tests.factories import Dataset
|
9
|
-
from ckanext.search_tweaks.spellcheck import (
|
10
|
-
CONFIG_SHOW_ONLY_MORE,
|
11
|
-
rebuild_dictionary,
|
12
|
-
)
|
13
9
|
|
10
|
+
from ckanext.search_tweaks.spellcheck import CONFIG_SHOW_ONLY_MORE, rebuild_dictionary
|
14
11
|
|
15
|
-
|
16
|
-
|
17
|
-
)
|
12
|
+
|
13
|
+
@pytest.mark.ckan_config("ckan.plugins", "search_tweaks search_tweaks_spellcheck")
|
18
14
|
@pytest.mark.usefixtures("with_plugins")
|
19
15
|
class TestSpellcheck:
|
20
16
|
def test_plugin_loaded(self):
|
21
17
|
assert p.plugin_loaded("search_tweaks_spellcheck")
|
22
18
|
|
23
19
|
|
24
|
-
@pytest.mark.ckan_config(
|
25
|
-
"ckan.plugins", "search_tweaks search_tweaks_spellcheck"
|
26
|
-
)
|
20
|
+
@pytest.mark.ckan_config("ckan.plugins", "search_tweaks search_tweaks_spellcheck")
|
27
21
|
@pytest.mark.usefixtures("with_plugins", "with_request_context")
|
28
22
|
class TestDidYouMeanSnippet:
|
29
23
|
def test_empty_without_data(self):
|
@@ -41,9 +35,7 @@ class TestDidYouMeanSnippet:
|
|
41
35
|
|
42
36
|
|
43
37
|
@pytest.mark.ckanext_search_tweaks_modified_schema
|
44
|
-
@pytest.mark.ckan_config(
|
45
|
-
"ckan.plugins", "search_tweaks search_tweaks_spellcheck"
|
46
|
-
)
|
38
|
+
@pytest.mark.ckan_config("ckan.plugins", "search_tweaks search_tweaks_spellcheck")
|
47
39
|
@pytest.mark.usefixtures("with_plugins", "clean_db", "clean_index")
|
48
40
|
class TestHelper:
|
49
41
|
def test_recommendations(self):
|
@@ -56,7 +48,7 @@ class TestHelper:
|
|
56
48
|
assert helper("do nat touc me") == ["do not touch me"]
|
57
49
|
|
58
50
|
assert helper("pic", 3) == [
|
59
|
-
"pick"
|
51
|
+
"pick",
|
60
52
|
] # min_hits fucked up because of single-term match
|
61
53
|
assert helper("pic", 1) == ["pick"]
|
62
54
|
|
@@ -69,7 +61,7 @@ class TestHelper:
|
|
69
61
|
|
70
62
|
assert helper("pock", 1) == ["pick"]
|
71
63
|
assert helper("pick", 3) == [
|
72
|
-
"pock"
|
64
|
+
"pock",
|
73
65
|
] # min_hits fucked up because of single-term match
|
74
66
|
|
75
67
|
monkeypatch.setitem(ckan_config, CONFIG_SHOW_ONLY_MORE, "off")
|