ckanext-search-tweaks 0.6.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanext/__init__.py +0 -1
- ckanext/search_tweaks/advanced_search/assets/advanced-search.css +0 -4
- ckanext/search_tweaks/advanced_search/plugin.py +11 -0
- ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html +5 -3
- ckanext/search_tweaks/config_declaration.yaml +24 -0
- ckanext/search_tweaks/field_relevance/plugin.py +0 -2
- ckanext/search_tweaks/field_relevance/templates/search_tweaks/field_relevance/promote.html +1 -1
- ckanext/search_tweaks/field_relevance/views.py +18 -5
- ckanext/search_tweaks/interfaces.py +1 -3
- ckanext/search_tweaks/plugin.py +0 -1
- ckanext/search_tweaks/query_popularity/config_declaration.yaml +29 -0
- ckanext/search_tweaks/query_popularity/logic/schema.py +1 -0
- ckanext/search_tweaks/query_popularity/score.py +2 -1
- ckanext/search_tweaks/query_relevance/__init__.py +38 -13
- ckanext/search_tweaks/query_relevance/boost.py +75 -0
- ckanext/search_tweaks/query_relevance/cli.py +20 -38
- ckanext/search_tweaks/query_relevance/config.py +29 -0
- ckanext/search_tweaks/query_relevance/config_declaration.yaml +16 -0
- ckanext/search_tweaks/query_relevance/plugin.py +8 -37
- ckanext/search_tweaks/query_relevance/score.py +17 -50
- ckanext/search_tweaks/query_relevance/storage.py +79 -137
- ckanext/search_tweaks/spellcheck/helpers.py +6 -2
- ckanext/search_tweaks/tests/conftest.py +13 -0
- ckanext/search_tweaks/tests/query_relevance/test_plugin.py +5 -5
- ckanext/search_tweaks/tests/query_relevance/test_search.py +84 -0
- ckanext/search_tweaks/tests/query_relevance/test_storage.py +23 -99
- ckanext/search_tweaks/tests/spellcheck/test_plugin.py +4 -2
- {ckanext_search_tweaks-0.6.2.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/METADATA +122 -124
- ckanext_search_tweaks-1.0.0.dist-info/RECORD +58 -0
- {ckanext_search_tweaks-0.6.2.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/WHEEL +1 -1
- ckanext_search_tweaks-0.6.2.dist-info/RECORD +0 -52
- {ckanext_search_tweaks-0.6.2.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/entry_points.txt +0 -0
- {ckanext_search_tweaks-0.6.2.dist-info → ckanext_search_tweaks-1.0.0.dist-info/licenses}/LICENSE +0 -0
- {ckanext_search_tweaks-0.6.2.dist-info → ckanext_search_tweaks-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,5 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
from .storage import (
|
4
|
-
PermanentRedisScoreStorage,
|
5
|
-
DailyRedisScoreStorage,
|
6
|
-
ScoreStorage,
|
7
|
-
)
|
8
|
-
|
9
|
-
_backends = {
|
10
|
-
"redis-permanent": PermanentRedisScoreStorage,
|
11
|
-
"redis-daily": DailyRedisScoreStorage,
|
12
|
-
}
|
13
|
-
|
14
|
-
CONFIG_BACKEND = "ckanext.search_tweaks.query_relevance.backend"
|
15
|
-
DEFAULT_BACKEND = "redis-daily"
|
16
|
-
|
17
|
-
DEFAULT_SCORE_STORAGE_CLASS = DailyRedisScoreStorage
|
1
|
+
from .storage import QueryHitTracker
|
2
|
+
from .config import get_max_boost_count
|
18
3
|
|
19
4
|
|
20
5
|
def normalize_query(query: str) -> str:
|
@@ -26,50 +11,32 @@ def normalize_query(query: str) -> str:
|
|
26
11
|
|
27
12
|
|
28
13
|
class QueryScore:
|
29
|
-
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
id_: str,
|
34
|
-
query: str,
|
35
|
-
*,
|
36
|
-
normalize: bool = True,
|
37
|
-
storage_class: Optional[Type[ScoreStorage]] = None,
|
38
|
-
):
|
14
|
+
def __init__(self, entity_id: str, query: str, normalize: bool = True):
|
39
15
|
if normalize:
|
40
16
|
query = normalize_query(query)
|
41
17
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
self.storage = self.storage_class(id_, query)
|
18
|
+
self.entity_id = entity_id
|
19
|
+
self.query = query
|
20
|
+
|
21
|
+
self.storage = QueryHitTracker(self.entity_id, self.query)
|
47
22
|
|
48
23
|
def __int__(self):
|
49
24
|
return self.storage.get()
|
50
25
|
|
51
|
-
|
52
|
-
|
53
|
-
return _backends[tk.config.get(CONFIG_BACKEND, DEFAULT_BACKEND)]
|
54
|
-
|
55
|
-
@property
|
56
|
-
def query(self):
|
57
|
-
return self.storage.query
|
58
|
-
|
59
|
-
def increase(self, n: int) -> None:
|
60
|
-
self.storage.inc(n)
|
61
|
-
|
62
|
-
def align(self):
|
63
|
-
self.storage.align()
|
26
|
+
def increase(self, amount: int) -> None:
|
27
|
+
self.storage.increase(amount)
|
64
28
|
|
65
29
|
def reset(self):
|
66
|
-
self.storage.reset()
|
30
|
+
self.storage.reset(self.query)
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def get_for_query(cls, query: str, limit: int | None = None) -> list[tuple[bytes, float]]:
|
34
|
+
return QueryHitTracker.top(query, limit or get_max_boost_count())
|
67
35
|
|
68
36
|
@classmethod
|
69
37
|
def get_all(cls):
|
70
|
-
|
71
|
-
return storage.scan()
|
38
|
+
return QueryHitTracker.get_all()
|
72
39
|
|
73
40
|
@classmethod
|
74
|
-
def
|
75
|
-
return
|
41
|
+
def reset_all(cls):
|
42
|
+
return QueryHitTracker.reset_all()
|
@@ -1,174 +1,116 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from datetime import date, timedelta
|
5
|
-
from typing import Any, Iterable, Tuple, cast
|
3
|
+
from ckan.lib.redis import connect_to_redis
|
6
4
|
|
7
|
-
import ckan.plugins.toolkit as tk
|
8
|
-
from ckan.lib.redis import Redis, connect_to_redis
|
9
5
|
|
10
|
-
|
11
|
-
DEFAULT_DAILY_AGE = 90
|
6
|
+
ScanItem = tuple[str, int]
|
12
7
|
|
13
|
-
ScanItem = Tuple[str, str, int]
|
14
8
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
query: str
|
19
|
-
|
20
|
-
def __init__(self, id_: str, query: str):
|
21
|
-
self.id = id_
|
9
|
+
class QueryHitTracker:
|
10
|
+
def __init__(self, entity_id: str, query: str, ttl: int | None = None):
|
11
|
+
self.entity_id = entity_id
|
22
12
|
self.query = query
|
23
|
-
|
24
|
-
|
25
|
-
def get(self) -> int:
|
26
|
-
"""Get current value."""
|
27
|
-
...
|
28
|
-
|
29
|
-
@abstractmethod
|
30
|
-
def inc(self, by: int) -> None:
|
31
|
-
"""Increase current value by the given value."""
|
32
|
-
...
|
33
|
-
|
34
|
-
@abstractmethod
|
35
|
-
def set(self, value: int) -> None:
|
36
|
-
"""Replace current value with the given one."""
|
37
|
-
...
|
13
|
+
self.conn = self.make_connection()
|
14
|
+
self.ttl = ttl
|
38
15
|
|
39
16
|
@classmethod
|
40
|
-
|
41
|
-
|
42
|
-
"""Get all the scores."""
|
43
|
-
...
|
17
|
+
def make_connection(cls):
|
18
|
+
return connect_to_redis()
|
44
19
|
|
45
20
|
@classmethod
|
46
|
-
|
47
|
-
|
48
|
-
"""Remove everything from storage."""
|
49
|
-
...
|
21
|
+
def _key(cls, query: str) -> str:
|
22
|
+
return f"search-tweaks:query-relevance:{query}"
|
50
23
|
|
51
|
-
def
|
52
|
-
"""
|
53
|
-
self.set(0)
|
24
|
+
def increase(self, amount: int) -> None:
|
25
|
+
"""Increase the score for the specific entity_id + query.
|
54
26
|
|
55
|
-
|
56
|
-
|
57
|
-
|
27
|
+
Args:
|
28
|
+
amount: amount to increase the score by
|
29
|
+
"""
|
30
|
+
key = self._key(self.query)
|
31
|
+
pipe = self.conn.pipeline()
|
58
32
|
|
33
|
+
pipe.zincrby(key, amount, self.entity_id)
|
59
34
|
|
60
|
-
|
61
|
-
|
35
|
+
if self.ttl is not None:
|
36
|
+
pipe.expire(key, self.ttl)
|
62
37
|
|
63
|
-
|
64
|
-
def conn(self):
|
65
|
-
if not self._conn:
|
66
|
-
self._conn = self.connect()
|
67
|
-
return self._conn
|
38
|
+
pipe.execute()
|
68
39
|
|
69
|
-
|
70
|
-
|
71
|
-
return connect_to_redis()
|
40
|
+
def get(self) -> int | None:
|
41
|
+
"""Get the score for the specific entity_id + query.
|
72
42
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
43
|
+
Returns:
|
44
|
+
score or None if not found
|
45
|
+
"""
|
46
|
+
result = self.conn.zscore(self._key(self.query), self.entity_id)
|
47
|
+
return int(result) if result else 0 # type: ignore
|
77
48
|
|
78
49
|
@classmethod
|
79
|
-
def
|
80
|
-
|
81
|
-
for key in conn.keys(f"{cls._common_key_part()}:*"):
|
82
|
-
conn.delete(key)
|
83
|
-
|
84
|
-
@abstractmethod
|
85
|
-
def _key(self) -> str: ...
|
86
|
-
|
87
|
-
def reset(self):
|
88
|
-
self.conn.delete(self._key())
|
89
|
-
|
50
|
+
def get_all(cls) -> list[tuple[str, str, int]]:
|
51
|
+
"""Get all scores.
|
90
52
|
|
91
|
-
|
92
|
-
|
53
|
+
Returns:
|
54
|
+
list of (entity_id, query, score) tuples
|
55
|
+
"""
|
56
|
+
conn = cls.make_connection()
|
57
|
+
cursor = 0
|
58
|
+
results: list[tuple[str, str, int]] = []
|
93
59
|
|
94
|
-
|
95
|
-
|
60
|
+
while True:
|
61
|
+
cursor, keys = conn.scan(cursor=cursor, match=cls._key("*"), count=1000) # type: ignore
|
96
62
|
|
97
|
-
|
63
|
+
for key in keys:
|
64
|
+
query = key.decode().rsplit(":", 1)[-1]
|
98
65
|
|
99
|
-
|
100
|
-
|
66
|
+
for entity_id, score in conn.zrange(key, 0, -1, withscores=True): # type: ignore
|
67
|
+
results.append((entity_id.decode(), query, int(score)))
|
101
68
|
|
102
|
-
|
103
|
-
|
69
|
+
if cursor == 0:
|
70
|
+
break
|
104
71
|
|
105
|
-
|
106
|
-
self.conn.hincrby(self._key(), self.query, by)
|
107
|
-
|
108
|
-
def _key(self):
|
109
|
-
return f"{self._common_key_part()}:{self.id}"
|
72
|
+
return results
|
110
73
|
|
111
74
|
@classmethod
|
112
|
-
def
|
113
|
-
|
114
|
-
common_key = cls._common_key_part()
|
115
|
-
pattern = f"{common_key}:{id_}" if id_ else f"{common_key}:*"
|
116
|
-
for key in conn.keys(pattern):
|
117
|
-
_, row_id = key.rsplit(b":", 1)
|
118
|
-
for query, score in conn.hgetall(key).items():
|
119
|
-
yield row_id.decode(), query.decode(), int(score)
|
120
|
-
|
121
|
-
|
122
|
-
class DailyRedisScoreStorage(RedisScoreStorage):
|
123
|
-
"""Store data inside different cells depending on current date.
|
124
|
-
|
125
|
-
The longer index exists, the more memory it consumes. But it can be aligned
|
126
|
-
periodically in order to free memory.
|
127
|
-
|
128
|
-
"""
|
75
|
+
def top(cls, query: str, limit: int = 100) -> list[tuple[bytes, float]]:
|
76
|
+
"""Return the top N entities for the given query.
|
129
77
|
|
130
|
-
|
131
|
-
|
132
|
-
|
78
|
+
Args:
|
79
|
+
query: search query
|
80
|
+
limit (optional): maximum number of entities to return
|
133
81
|
|
134
|
-
|
82
|
+
Returns:
|
83
|
+
list of (entity_id, score) tuples
|
84
|
+
"""
|
85
|
+
conn = cls.make_connection()
|
135
86
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
87
|
+
return conn.zrevrange( # type: ignore
|
88
|
+
cls._key(query),
|
89
|
+
0,
|
90
|
+
limit - 1,
|
91
|
+
withscores=True,
|
92
|
+
)
|
140
93
|
|
141
|
-
@
|
142
|
-
def
|
143
|
-
|
144
|
-
|
145
|
-
def inc(self, by: int) -> None:
|
146
|
-
key = self._key()
|
147
|
-
zkey = self._zkey()
|
148
|
-
# type-stubs don't know that signature is (key, amount, value)
|
149
|
-
self.conn.zincrby(key, by, zkey) # type: ignore
|
94
|
+
@classmethod
|
95
|
+
def reset(cls, query: str) -> None:
|
96
|
+
"""Reset scores for the given query.
|
150
97
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
98
|
+
Args:
|
99
|
+
query: search query
|
100
|
+
"""
|
101
|
+
cls.make_connection().delete(cls._key(query))
|
155
102
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
103
|
+
@classmethod
|
104
|
+
def reset_all(cls) -> None:
|
105
|
+
"""Reset all scores."""
|
106
|
+
cursor = 0
|
107
|
+
conn = cls.make_connection()
|
160
108
|
|
161
|
-
|
162
|
-
|
109
|
+
while True:
|
110
|
+
cursor, keys = conn.scan(cursor=cursor, match=cls._key("*"), count=1000) # type: ignore
|
163
111
|
|
164
|
-
|
165
|
-
|
112
|
+
if keys:
|
113
|
+
conn.delete(*keys)
|
166
114
|
|
167
|
-
|
168
|
-
|
169
|
-
conn = cls.connect()
|
170
|
-
common_key = cls._common_key_part()
|
171
|
-
pattern = f"{common_key}:{id_}:*" if id_ else f"{common_key}:*"
|
172
|
-
for key in conn.keys(pattern):
|
173
|
-
_, id_, query = key.decode().rsplit(":", 2)
|
174
|
-
yield id_, query, cls(id_, query).get()
|
115
|
+
if cursor == 0:
|
116
|
+
break
|
@@ -108,7 +108,7 @@ class Collation:
|
|
108
108
|
self.hits = data["hits"]
|
109
109
|
self.query = data["collationQuery"]
|
110
110
|
changes = data["misspellingsAndCorrections"]
|
111
|
-
self.corrections = dict(zip(changes[::2], changes[1::2]))
|
111
|
+
self.corrections = dict(zip(changes[::2], changes[1::2], strict=True))
|
112
112
|
|
113
113
|
def __eq__(self, other):
|
114
114
|
if isinstance(other, int):
|
@@ -145,7 +145,11 @@ class SpellcheckResult:
|
|
145
145
|
def __init__(self, collations: list[Any], suggestions: list[Any]):
|
146
146
|
self.collations = [Collation(item) for item in collations[1::2]]
|
147
147
|
self.suggestions = dict(
|
148
|
-
zip(
|
148
|
+
zip(
|
149
|
+
suggestions[::2],
|
150
|
+
[s["suggestion"] for s in suggestions[1::2]],
|
151
|
+
strict=True,
|
152
|
+
),
|
149
153
|
)
|
150
154
|
|
151
155
|
def best_collations(self, n: int | None = None) -> list[Collation]:
|
@@ -3,10 +3,13 @@ import types
|
|
3
3
|
from unittest import mock
|
4
4
|
from typing import cast
|
5
5
|
|
6
|
+
import factory
|
6
7
|
import pytest
|
8
|
+
from pytest_factoryboy import register
|
7
9
|
|
8
10
|
import ckan.lib.search.query as query
|
9
11
|
from ckan.tests.helpers import call_action
|
12
|
+
from ckan.tests import factories
|
10
13
|
|
11
14
|
|
12
15
|
@pytest.fixture
|
@@ -25,3 +28,13 @@ def search(monkeypatch):
|
|
25
28
|
return patch.call_args.args[1]
|
26
29
|
|
27
30
|
return expose_args
|
31
|
+
|
32
|
+
|
33
|
+
@register(_name="dataset")
|
34
|
+
class DatasetFactory(factories.Dataset):
|
35
|
+
owner_org = factory.LazyFunction(lambda: OrganizationFactory()["id"])
|
36
|
+
|
37
|
+
|
38
|
+
@register(_name="organization")
|
39
|
+
class OrganizationFactory(factories.Organization):
|
40
|
+
pass
|
@@ -5,7 +5,7 @@ import ckan.model as model
|
|
5
5
|
import ckanext.search_tweaks.query_relevance as relevance
|
6
6
|
|
7
7
|
|
8
|
-
@pytest.mark.usefixtures("with_request_context")
|
8
|
+
@pytest.mark.usefixtures("clean_db", "with_request_context")
|
9
9
|
class TestPathHasScore:
|
10
10
|
@pytest.mark.parametrize(
|
11
11
|
"path, has_score",
|
@@ -17,7 +17,7 @@ class TestPathHasScore:
|
|
17
17
|
)
|
18
18
|
def test_search_referrer(self, path, has_score):
|
19
19
|
pkg = model.Package(type="dataset")
|
20
|
-
assert relevance.
|
20
|
+
assert relevance._is_scoring_enabled_for_path(path, pkg) is has_score
|
21
21
|
|
22
22
|
@pytest.mark.parametrize(
|
23
23
|
"path, has_score",
|
@@ -34,7 +34,7 @@ class TestPathHasScore:
|
|
34
34
|
"get",
|
35
35
|
lambda _: model.Group(name="valid", type="organization"),
|
36
36
|
)
|
37
|
-
assert relevance.
|
37
|
+
assert relevance._is_scoring_enabled_for_path(path, pkg) is has_score
|
38
38
|
|
39
39
|
@pytest.mark.parametrize(
|
40
40
|
"path, has_score",
|
@@ -47,10 +47,10 @@ class TestPathHasScore:
|
|
47
47
|
def test_group_referrer(self, path, has_score, monkeypatch):
|
48
48
|
pkg = model.Package(type="dataset")
|
49
49
|
monkeypatch.setattr(model.Group, "get", lambda _: model.Group(name="valid"))
|
50
|
-
assert relevance.
|
50
|
+
assert relevance._is_scoring_enabled_for_path(path, pkg) is has_score
|
51
51
|
|
52
52
|
|
53
|
-
@pytest.mark.usefixtures("with_request_context")
|
53
|
+
@pytest.mark.usefixtures("clean_db", "with_request_context")
|
54
54
|
class TestUpdateScore:
|
55
55
|
@pytest.mark.parametrize(
|
56
56
|
"url, repeat, value",
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from ckan.tests.helpers import call_action
|
4
|
+
|
5
|
+
from ckanext.search_tweaks.query_relevance import QueryScore
|
6
|
+
|
7
|
+
|
8
|
+
@pytest.mark.usefixtures("with_plugins", "clean_db", "clean_redis", "clean_index")
|
9
|
+
class TestSearchScoreBoost:
|
10
|
+
def test_no_score_boost(self, dataset_factory):
|
11
|
+
dataset_factory(title="ocean water")
|
12
|
+
dataset_factory(title="water basin")
|
13
|
+
|
14
|
+
result = call_action("package_search", q="water", fl="id,title,score")[
|
15
|
+
"results"
|
16
|
+
]
|
17
|
+
|
18
|
+
assert abs(result[0]["score"] - result[1]["score"]) < 0.01
|
19
|
+
|
20
|
+
def test_query_relevance_disabled(self, dataset_factory):
|
21
|
+
dataset_1 = dataset_factory(title="ocean water")
|
22
|
+
dataset_2 = dataset_factory(title="water basin")
|
23
|
+
|
24
|
+
QueryScore(dataset_1["id"], "water").increase(10)
|
25
|
+
QueryScore(dataset_2["id"], "water").increase(5)
|
26
|
+
|
27
|
+
result = call_action(
|
28
|
+
"package_search",
|
29
|
+
q="water",
|
30
|
+
fl="id,title,score",
|
31
|
+
extras={"ext_search_tweaks_disable_query_boost": True},
|
32
|
+
)["results"]
|
33
|
+
|
34
|
+
assert abs(result[0]["score"] - result[1]["score"]) < 0.01
|
35
|
+
|
36
|
+
@pytest.mark.ckan_config("ckanext.search_tweaks.common.prefer_boost", "false")
|
37
|
+
def test_query_relevance_boosted_with_bf(self, dataset_factory):
|
38
|
+
dataset_1 = dataset_factory(title="ocean water")
|
39
|
+
dataset_2 = dataset_factory(title="water basin")
|
40
|
+
|
41
|
+
QueryScore(dataset_1["id"], "water").increase(10)
|
42
|
+
QueryScore(dataset_2["id"], "water").increase(5)
|
43
|
+
|
44
|
+
result = call_action("package_search", q="water", fl="id,title,score")[
|
45
|
+
"results"
|
46
|
+
]
|
47
|
+
|
48
|
+
assert abs(result[0]["score"] - result[1]["score"]) > 0.01
|
49
|
+
|
50
|
+
@pytest.mark.ckan_config("ckanext.search_tweaks.common.prefer_boost", "true")
|
51
|
+
def test_query_relevance_boosted_with_boost(self, dataset_factory):
|
52
|
+
dataset_1 = dataset_factory(title="ocean water")
|
53
|
+
dataset_2 = dataset_factory(title="water basin")
|
54
|
+
|
55
|
+
QueryScore(dataset_1["id"], "water").increase(10)
|
56
|
+
QueryScore(dataset_2["id"], "water").increase(5)
|
57
|
+
|
58
|
+
result = call_action("package_search", q="water", fl="id,title,score")[
|
59
|
+
"results"
|
60
|
+
]
|
61
|
+
|
62
|
+
assert abs(result[0]["score"] - result[1]["score"]) > 0.01
|
63
|
+
|
64
|
+
@pytest.mark.skip(reason="use only for profiling")
|
65
|
+
def test_profile_boost_function(self, dataset_factory):
|
66
|
+
first_query = None
|
67
|
+
|
68
|
+
for _ in range(1000):
|
69
|
+
dataset = dataset_factory()
|
70
|
+
ds_query = dataset["title"].split()[0]
|
71
|
+
|
72
|
+
if first_query is None:
|
73
|
+
first_query = ds_query
|
74
|
+
|
75
|
+
QueryScore(dataset["id"], ds_query).increase(1)
|
76
|
+
|
77
|
+
import timeit
|
78
|
+
|
79
|
+
def time_query():
|
80
|
+
call_action("package_search", q=first_query, fl="id,title,score")
|
81
|
+
|
82
|
+
time = timeit.timeit(time_query, number=20)
|
83
|
+
|
84
|
+
print(f"Time: {time}")
|
@@ -1,114 +1,38 @@
|
|
1
1
|
import pytest
|
2
|
-
from ckanext.search_tweaks.query_relevance.storage import (
|
3
|
-
DailyRedisScoreStorage,
|
4
|
-
PermanentRedisScoreStorage,
|
5
|
-
)
|
6
2
|
|
3
|
+
from ckanext.search_tweaks.query_relevance.storage import QueryHitTracker
|
7
4
|
|
8
|
-
@pytest.fixture
|
9
|
-
def storage(storage_class):
|
10
|
-
storage_class.reset_storage()
|
11
|
-
return storage_class
|
12
5
|
|
6
|
+
@pytest.mark.usefixtures("clean_redis")
|
7
|
+
class TestQueryHitTracker:
|
8
|
+
def test_increase(self):
|
9
|
+
QueryHitTracker("id-1", "hello").increase(1)
|
10
|
+
QueryHitTracker("id-2", "hello").increase(5)
|
13
11
|
|
14
|
-
|
15
|
-
"storage_class",
|
16
|
-
[
|
17
|
-
PermanentRedisScoreStorage,
|
18
|
-
DailyRedisScoreStorage,
|
19
|
-
],
|
20
|
-
)
|
21
|
-
class TestStorages:
|
22
|
-
def test_scan(self, storage):
|
23
|
-
assert list(storage.scan()) == []
|
12
|
+
result = QueryHitTracker.top("hello", 2)
|
24
13
|
|
25
|
-
|
26
|
-
s1.inc(10)
|
27
|
-
assert sorted(list(storage.scan())) == sorted(
|
28
|
-
[
|
29
|
-
("key", "query", 10),
|
30
|
-
],
|
31
|
-
)
|
14
|
+
assert result == [(b"id-2", 5.0), (b"id-1", 1.0)]
|
32
15
|
|
33
|
-
|
34
|
-
|
35
|
-
s1.inc(90)
|
36
|
-
assert sorted(list(storage.scan())) == sorted(
|
37
|
-
[
|
38
|
-
("key", "query", 100),
|
39
|
-
("second key", "second query", 5),
|
40
|
-
],
|
41
|
-
)
|
16
|
+
def test_missing_query(self):
|
17
|
+
result = QueryHitTracker.top("hello")
|
42
18
|
|
43
|
-
|
44
|
-
s3.inc(1)
|
45
|
-
assert sorted(list(storage.scan())) == sorted(
|
46
|
-
[
|
47
|
-
("key", "query", 100),
|
48
|
-
("key", "extra query", 1),
|
49
|
-
("second key", "second query", 5),
|
50
|
-
],
|
51
|
-
)
|
19
|
+
assert result == []
|
52
20
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
("key", "extra query", 1),
|
57
|
-
],
|
58
|
-
)
|
21
|
+
def test_expiration(self):
|
22
|
+
tracker = QueryHitTracker("id-1", "hello", ttl=1)
|
23
|
+
tracker.increase(1)
|
59
24
|
|
60
|
-
|
61
|
-
s = storage("not a real key", "not a real query")
|
62
|
-
assert s.get() == 0
|
25
|
+
import time
|
63
26
|
|
64
|
-
|
65
|
-
s = storage("real key", "real value")
|
66
|
-
s.set(10)
|
67
|
-
assert s.get() == 10
|
68
|
-
s.reset()
|
69
|
-
assert s.get() == 0
|
27
|
+
time.sleep(2)
|
70
28
|
|
71
|
-
|
72
|
-
s1 = storage("real key", "hello")
|
73
|
-
s2 = storage("real key", "world")
|
29
|
+
assert tracker.top("hello") == []
|
74
30
|
|
75
|
-
|
76
|
-
|
77
|
-
s1.inc(1)
|
78
|
-
assert s1.get() == 2
|
79
|
-
assert s2.get() == 1
|
31
|
+
def test_reset(self):
|
32
|
+
tracker = QueryHitTracker("id-1", "hello")
|
80
33
|
|
34
|
+
tracker.increase(1)
|
35
|
+
assert tracker.top("hello") == [(b"id-1", 1.0)]
|
81
36
|
|
82
|
-
|
83
|
-
|
84
|
-
def reset_storage(self):
|
85
|
-
DailyRedisScoreStorage.reset_storage()
|
86
|
-
|
87
|
-
def test_score_aggregated(self, freezer):
|
88
|
-
s = DailyRedisScoreStorage("key", "query")
|
89
|
-
freezer.move_to("2012-01-01")
|
90
|
-
s.inc(2)
|
91
|
-
assert s.get() == 2
|
92
|
-
|
93
|
-
freezer.move_to("2012-02-10")
|
94
|
-
s.inc(1)
|
95
|
-
assert s.get() == 3
|
96
|
-
|
97
|
-
freezer.move_to("2012-03-26")
|
98
|
-
s.inc(2)
|
99
|
-
assert s.get() == 5
|
100
|
-
|
101
|
-
def test_score_aligned(self, freezer):
|
102
|
-
s = DailyRedisScoreStorage("key", "query")
|
103
|
-
freezer.move_to("2010-01-01")
|
104
|
-
s.inc(2)
|
105
|
-
freezer.move_to("2011-01-01")
|
106
|
-
s.inc(2)
|
107
|
-
|
108
|
-
freezer.move_to("2012-02-10")
|
109
|
-
s.inc(1)
|
110
|
-
freezer.move_to("2012-03-26")
|
111
|
-
s.inc(2)
|
112
|
-
assert s.get() == 7
|
113
|
-
s.align()
|
114
|
-
assert s.get() == 3
|
37
|
+
tracker.reset("hello")
|
38
|
+
assert tracker.top("hello") == []
|