ckanext-search-tweaks 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ckanext-search-tweaks-0.5.0/ckanext_search_tweaks.egg-info → ckanext-search-tweaks-0.6.0}/PKG-INFO +7 -4
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/__init__.py +0 -1
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/__init__.py +3 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/plugin.py +3 -3
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/cli.py +3 -3
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/config.py +37 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/plugin.py +3 -3
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/views.py +8 -8
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/interfaces.py +32 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/plugin.py +19 -38
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/config.py +30 -0
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/logic/action.py +43 -0
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/logic/auth.py +23 -0
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/plugin.py +47 -0
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/score.py +165 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/query_relevance/__init__.py +1 -2
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/query_relevance/plugin.py +13 -12
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/query_relevance/score.py +1 -1
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/query_relevance/storage.py +7 -14
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/shared.py +13 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/spellcheck/helpers.py +8 -8
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/spellcheck/plugin.py +1 -1
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/query_relevance/test_storage.py +4 -4
- ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/tests/spellcheck/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/spellcheck/test_plugin.py +2 -2
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/test_plugin.py +16 -15
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0/ckanext_search_tweaks.egg-info}/PKG-INFO +7 -4
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/SOURCES.txt +9 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/entry_points.txt +1 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/requires.txt +1 -1
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/pyproject.toml +19 -19
- ckanext-search-tweaks-0.6.0/requirements.txt +0 -0
- ckanext-search-tweaks-0.6.0/setup.cfg +66 -0
- ckanext-search-tweaks-0.6.0/setup.py +15 -0
- ckanext-search-tweaks-0.5.0/ckanext/search_tweaks/__init__.py +0 -19
- ckanext-search-tweaks-0.5.0/ckanext/search_tweaks/interfaces.py +0 -25
- ckanext-search-tweaks-0.5.0/setup.cfg +0 -26
- ckanext-search-tweaks-0.5.0/setup.py +0 -97
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/LICENSE +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/MANIFEST.in +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/README.md +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/assets/advanced-search.css +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/assets/advanced-search.js +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/assets/webassets.yml +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/assets/search-tweaks-reflect-range-in-label.js +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/assets/webassets.yml +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/field_relevance/templates/search_tweaks/field_relevance/promote.html +0 -0
- {ckanext-search-tweaks-0.5.0/ckanext/search_tweaks/tests → ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity}/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0/ckanext/search_tweaks/tests/query_relevance → ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/query_popularity/logic}/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/query_relevance/cli.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/spellcheck/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/spellcheck/cli.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/spellcheck/templates/search_tweaks/did_you_mean.html +0 -0
- {ckanext-search-tweaks-0.5.0/ckanext/search_tweaks/tests/spellcheck → ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/tests}/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/conftest.py +0 -0
- /ckanext-search-tweaks-0.5.0/requirements.txt → /ckanext-search-tweaks-0.6.0/ckanext/search_tweaks/tests/query_relevance/__init__.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/query_relevance/test_plugin.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext/search_tweaks/tests/query_relevance/test_score.py +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/dependency_links.txt +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/namespace_packages.txt +0 -0
- {ckanext-search-tweaks-0.5.0 → ckanext-search-tweaks-0.6.0}/ckanext_search_tweaks.egg-info/top_level.txt +0 -0
{ckanext-search-tweaks-0.5.0/ckanext_search_tweaks.egg-info → ckanext-search-tweaks-0.6.0}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ckanext-search-tweaks
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Home-page: https://github.com/DataShades/ckanext-search-tweaks
|
5
5
|
Author: Sergey Motornyuk
|
6
6
|
Author-email: sergey.motornyuk@linkdigital.com.au
|
@@ -8,11 +8,14 @@ License: AGPL
|
|
8
8
|
Keywords: CKAN
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
10
10
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
11
|
-
Classifier: Programming Language :: Python ::
|
12
|
-
|
11
|
+
Classifier: Programming Language :: Python :: 3.8
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
14
|
Description-Content-Type: text/markdown
|
14
|
-
Provides-Extra: advanced-search
|
15
15
|
License-File: LICENSE
|
16
|
+
Requires-Dist: freezegun
|
17
|
+
Requires-Dist: typing_extensions>=4.0.0
|
18
|
+
Provides-Extra: advanced-search
|
16
19
|
|
17
20
|
[](https://github.com/DataShades/ckanext-search-tweaks/actions)
|
18
21
|
|
@@ -37,14 +37,14 @@ DEFAULT_FORM_DEFINITION = json.dumps(
|
|
37
37
|
{"value": "private", "label": "Private"},
|
38
38
|
],
|
39
39
|
},
|
40
|
-
}
|
40
|
+
},
|
41
41
|
)
|
42
42
|
DEFAULT_FIELD_ORDER = None
|
43
43
|
|
44
44
|
|
45
45
|
def form_config():
|
46
46
|
definition = json.loads(
|
47
|
-
tk.config.get(CONFIG_FORM_DEFINITION, DEFAULT_FORM_DEFINITION)
|
47
|
+
tk.config.get(CONFIG_FORM_DEFINITION, DEFAULT_FORM_DEFINITION),
|
48
48
|
)
|
49
49
|
order = tk.aslist(tk.config.get(CONFIG_FIELD_ORDER, DEFAULT_FIELD_ORDER))
|
50
50
|
if not order:
|
@@ -74,7 +74,7 @@ class AdvancedSearchPlugin(p.SingletonPlugin):
|
|
74
74
|
from ckanext.composite_search.interfaces import ICompositeSearch
|
75
75
|
except ImportError:
|
76
76
|
raise CkanConfigurationException(
|
77
|
-
"ckanext-composite-search is not installed"
|
77
|
+
"ckanext-composite-search is not installed",
|
78
78
|
)
|
79
79
|
if not p.plugin_loaded("composite_search"):
|
80
80
|
msg = "Advanced search requires `composite_search` plugin"
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import ckan.plugins.toolkit as tk
|
4
|
+
from ckan.lib.search.query import QUERY_FIELDS
|
5
|
+
|
6
|
+
CONFIG_QF = "ckanext.search_tweaks.common.qf"
|
7
|
+
DEFAULT_QF = QUERY_FIELDS
|
8
|
+
|
9
|
+
CONFIG_FUZZY = "ckanext.search_tweaks.common.fuzzy_search.enabled"
|
10
|
+
CONFIG_FUZZY_DISTANCE = "ckanext.search_tweaks.common.fuzzy_search.distance"
|
11
|
+
CONFIG_MM = "ckanext.search_tweaks.common.mm"
|
12
|
+
CONFIG_FUZZY_KEEP_ORIGINAL = "ckanext.search_tweaks.common.fuzzy_search.keep_original"
|
13
|
+
CONFIG_PREFER_BOOST = "ckanext.search_tweaks.common.prefer_boost"
|
14
|
+
|
15
|
+
|
16
|
+
def qf() -> str:
|
17
|
+
return tk.config[CONFIG_QF] or DEFAULT_QF
|
18
|
+
|
19
|
+
|
20
|
+
def fuzzy() -> bool:
|
21
|
+
return tk.config[CONFIG_FUZZY]
|
22
|
+
|
23
|
+
|
24
|
+
def fuzzy_distance() -> int:
|
25
|
+
return tk.config[CONFIG_FUZZY_DISTANCE]
|
26
|
+
|
27
|
+
|
28
|
+
def mm() -> str:
|
29
|
+
return tk.config[CONFIG_MM]
|
30
|
+
|
31
|
+
|
32
|
+
def fuzzy_with_original() -> bool:
|
33
|
+
return tk.config[CONFIG_FUZZY_KEEP_ORIGINAL]
|
34
|
+
|
35
|
+
|
36
|
+
def prefer_boost() -> bool:
|
37
|
+
return tk.config[CONFIG_PREFER_BOOST]
|
@@ -5,8 +5,8 @@ from typing import Any
|
|
5
5
|
import ckan.plugins as p
|
6
6
|
import ckan.plugins.toolkit as tk
|
7
7
|
|
8
|
-
from
|
9
|
-
from
|
8
|
+
from ckanext.search_tweaks import feature_disabled
|
9
|
+
from ckanext.search_tweaks.interfaces import ISearchTweaks
|
10
10
|
from . import views
|
11
11
|
|
12
12
|
CONFIG_BOOST_FN = "ckanext.search_tweaks.field_relevance.boost_function"
|
@@ -23,7 +23,7 @@ class FieldRelevancePlugin(p.SingletonPlugin):
|
|
23
23
|
# ISearchTweaks
|
24
24
|
def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
|
25
25
|
if feature_disabled("field_boost", search_params):
|
26
|
-
return
|
26
|
+
return None
|
27
27
|
|
28
28
|
return tk.config.get(CONFIG_BOOST_FN, DEFAULT_BOOST_FN)
|
29
29
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any
|
4
4
|
|
5
5
|
from flask import Blueprint
|
6
6
|
from flask.views import MethodView
|
@@ -29,7 +29,7 @@ field_relevance = Blueprint("search_tweaks_field_relevance", __name__)
|
|
29
29
|
|
30
30
|
def get_blueprints():
|
31
31
|
if tk.asbool(
|
32
|
-
tk.config.get(CONFIG_ENABLE_PROMOTION_ROUTE, DEFAULT_ENABLE_PROMOTION_ROUTE)
|
32
|
+
tk.config.get(CONFIG_ENABLE_PROMOTION_ROUTE, DEFAULT_ENABLE_PROMOTION_ROUTE),
|
33
33
|
):
|
34
34
|
path = tk.config.get(CONFIG_PROMOTION_PATH, DEFAULT_PROMOTION_PATH)
|
35
35
|
field_relevance.add_url_rule(path, view_func=PromoteView.as_view("promote"))
|
@@ -52,9 +52,9 @@ class PromoteView(MethodView):
|
|
52
52
|
tk.get_validator("convert_int"),
|
53
53
|
tk.get_validator("natural_number_validator"),
|
54
54
|
tk.get_validator("limit_to_configured_maximum")(
|
55
|
-
CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION
|
55
|
+
CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION,
|
56
56
|
),
|
57
|
-
]
|
57
|
+
],
|
58
58
|
}
|
59
59
|
|
60
60
|
data, errors = tk.navl_validate(
|
@@ -67,7 +67,7 @@ class PromoteView(MethodView):
|
|
67
67
|
return self.get(id, data, errors)
|
68
68
|
try:
|
69
69
|
pkg_dict = tk.get_action("package_patch")(
|
70
|
-
{}, {"id": id, field: data[field]}
|
70
|
+
{}, {"id": id, field: data[field]},
|
71
71
|
)
|
72
72
|
except tk.ValidationError as e:
|
73
73
|
for k, v in e.error_summary.items():
|
@@ -79,8 +79,8 @@ class PromoteView(MethodView):
|
|
79
79
|
def get(
|
80
80
|
self,
|
81
81
|
id,
|
82
|
-
data:
|
83
|
-
errors:
|
82
|
+
data: dict[str, Any] | None = None,
|
83
|
+
errors: dict[str, Any] | None = None,
|
84
84
|
):
|
85
85
|
self._check_access(id)
|
86
86
|
field = tk.config.get(CONFIG_PROMOTION_FIELD, DEFAULT_PROMOTION_FIELD)
|
@@ -90,7 +90,7 @@ class PromoteView(MethodView):
|
|
90
90
|
"errors": errors or {},
|
91
91
|
"data": data or pkg_dict,
|
92
92
|
"max_promotion": tk.asint(
|
93
|
-
tk.config.get(CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION)
|
93
|
+
tk.config.get(CONFIG_MAX_PROMOTION, DEFAULT_MAX_PROMOTION),
|
94
94
|
),
|
95
95
|
"field_name": field,
|
96
96
|
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from ckan.plugins.interfaces import Interface
|
6
|
+
|
7
|
+
|
8
|
+
class ISearchTweaks(Interface):
|
9
|
+
def get_search_boost_fn(self, search_params: dict[str, Any]) -> str | None:
|
10
|
+
"""Return Solr's boost function applicable to the current search.
|
11
|
+
|
12
|
+
Note: it will be applied as `boost` when
|
13
|
+
`ckanext.search_tweaks.common.prefer_boost` enabled and as `bf`
|
14
|
+
otherwise.
|
15
|
+
|
16
|
+
"""
|
17
|
+
return None
|
18
|
+
|
19
|
+
def get_extra_qf(self, search_params: dict[str, Any]) -> str | None:
|
20
|
+
"""Return an additional fragment of the Solr's qf.
|
21
|
+
|
22
|
+
This fragment will be appended to the current qf
|
23
|
+
"""
|
24
|
+
return None
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
class IQueryPopularity(Interface):
|
29
|
+
def skip_query_popularity(self, params: dict[str, Any]) -> bool:
|
30
|
+
"""Do not index search query.
|
31
|
+
"""
|
32
|
+
return False
|
@@ -1,54 +1,37 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from typing import Any
|
4
|
+
from typing import Any
|
5
5
|
|
6
6
|
import ckan.plugins as plugins
|
7
7
|
import ckan.plugins.toolkit as tk
|
8
|
-
from
|
9
|
-
|
10
|
-
from . import boost_preffered, cli, feature_disabled
|
8
|
+
from . import feature_disabled, config
|
11
9
|
from .interfaces import ISearchTweaks
|
12
10
|
|
13
11
|
log = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
CONFIG_QF = "ckanext.search_tweaks.common.qf"
|
18
|
-
CONFIG_FUZZY = "ckanext.search_tweaks.common.fuzzy_search.enabled"
|
19
|
-
CONFIG_FUZZY_DISTANCE = "ckanext.search_tweaks.common.fuzzy_search.distance"
|
20
|
-
CONFIG_MM = "ckanext.search_tweaks.common.mm"
|
21
|
-
CONFIG_FUZZY_KEEP_ORIGINAL = "ckanext.search_tweaks.common.fuzzy_search.keep_original"
|
22
|
-
|
23
|
-
DEFAULT_QF = QUERY_FIELDS
|
24
|
-
DEFAULT_FUZZY = False
|
25
|
-
DEFAULT_FUZZY_DISTANCE = 1
|
26
|
-
DEFAULT_MM = "1"
|
27
|
-
DEFAULT_FUZZY_KEEP_ORIGINAL = True
|
12
|
+
CONFIG_PREFER_BOOST = "ckanext.search_tweaks.common.prefer_boost"
|
13
|
+
DEFAULT_PREFER_BOOST = True
|
28
14
|
|
29
15
|
|
16
|
+
@tk.blanket.cli
|
17
|
+
@tk.blanket.config_declarations
|
30
18
|
class SearchTweaksPlugin(plugins.SingletonPlugin):
|
31
|
-
plugins.implements(plugins.IClick)
|
32
19
|
plugins.implements(plugins.IPackageController, inherit=True)
|
33
20
|
|
34
|
-
# IClick
|
35
|
-
|
36
|
-
def get_commands(self):
|
37
|
-
return cli.get_commands()
|
38
|
-
|
39
21
|
# IPackageController
|
40
22
|
|
41
|
-
def before_dataset_search(self, search_params:
|
23
|
+
def before_dataset_search(self, search_params: dict[str, Any]):
|
42
24
|
if feature_disabled("everything", search_params):
|
43
25
|
return search_params
|
44
26
|
|
45
|
-
search_params.setdefault("mm",
|
27
|
+
search_params.setdefault("mm", config.mm())
|
46
28
|
|
47
29
|
if "defType" not in search_params:
|
48
30
|
search_params["defType"] = "edismax"
|
49
31
|
|
50
|
-
if
|
32
|
+
if config.prefer_boost() and search_params["defType"] == "edismax":
|
51
33
|
_set_boost(search_params)
|
34
|
+
|
52
35
|
else:
|
53
36
|
_set_bf(search_params)
|
54
37
|
|
@@ -58,7 +41,7 @@ class SearchTweaksPlugin(plugins.SingletonPlugin):
|
|
58
41
|
return search_params
|
59
42
|
|
60
43
|
|
61
|
-
def _set_boost(search_params:
|
44
|
+
def _set_boost(search_params: dict[str, Any]) -> None:
|
62
45
|
boost: list[str] = search_params.setdefault("boost", [])
|
63
46
|
for plugin in plugins.PluginImplementations(ISearchTweaks):
|
64
47
|
extra = plugin.get_search_boost_fn(search_params)
|
@@ -67,7 +50,7 @@ def _set_boost(search_params: SearchParams) -> None:
|
|
67
50
|
boost.append(extra)
|
68
51
|
|
69
52
|
|
70
|
-
def _set_bf(search_params:
|
53
|
+
def _set_bf(search_params: dict[str, Any]) -> None:
|
71
54
|
default_bf: str = search_params.get("bf") or "0"
|
72
55
|
search_params.setdefault("bf", default_bf)
|
73
56
|
for plugin in plugins.PluginImplementations(ISearchTweaks):
|
@@ -77,11 +60,11 @@ def _set_bf(search_params: SearchParams) -> None:
|
|
77
60
|
search_params["bf"] = f"sum({search_params['bf']},{extra_bf})"
|
78
61
|
|
79
62
|
|
80
|
-
def _set_qf(search_params:
|
63
|
+
def _set_qf(search_params: dict[str, Any]) -> None:
|
81
64
|
if feature_disabled("qf", search_params):
|
82
65
|
return
|
83
66
|
|
84
|
-
default_qf: str = search_params.get("qf") or
|
67
|
+
default_qf: str = search_params.get("qf") or config.qf()
|
85
68
|
search_params.setdefault("qf", default_qf)
|
86
69
|
for plugin in plugins.PluginImplementations(ISearchTweaks):
|
87
70
|
extra_qf = plugin.get_extra_qf(search_params)
|
@@ -90,8 +73,8 @@ def _set_qf(search_params: SearchParams) -> None:
|
|
90
73
|
search_params["qf"] += " " + extra_qf
|
91
74
|
|
92
75
|
|
93
|
-
def _set_fuzzy(search_params:
|
94
|
-
if not
|
76
|
+
def _set_fuzzy(search_params: dict[str, Any]) -> None:
|
77
|
+
if not config.fuzzy():
|
95
78
|
return
|
96
79
|
|
97
80
|
if feature_disabled("fuzzy", search_params):
|
@@ -114,18 +97,16 @@ def _set_fuzzy(search_params: SearchParams) -> None:
|
|
114
97
|
if s.isalpha() and s not in ("AND", "OR", "TO")
|
115
98
|
else s,
|
116
99
|
q.split(),
|
117
|
-
)
|
100
|
+
),
|
118
101
|
)
|
119
|
-
if
|
120
|
-
tk.config.get(CONFIG_FUZZY_KEEP_ORIGINAL, DEFAULT_FUZZY_KEEP_ORIGINAL)
|
121
|
-
):
|
102
|
+
if config.fuzzy_with_original():
|
122
103
|
search_params["q"] = f"({fuzzy_q}) OR ({q})"
|
123
104
|
else:
|
124
105
|
search_params["q"] = fuzzy_q
|
125
106
|
|
126
107
|
|
127
108
|
def _get_fuzzy_distance() -> int:
|
128
|
-
distance =
|
109
|
+
distance = config.fuzzy_distance()
|
129
110
|
if distance < 0:
|
130
111
|
log.warning("Cannot use negative fuzzy distance: %s.", distance)
|
131
112
|
distance = 0
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import ckan.plugins.toolkit as tk
|
3
|
+
|
4
|
+
|
5
|
+
def skip_irrefutable() -> bool:
|
6
|
+
return tk.config["ckanext.search_tweaks.query_popularity.skip_irrefutable_search"]
|
7
|
+
|
8
|
+
|
9
|
+
def ignored_symbols() -> set[str]:
|
10
|
+
return set(tk.config["ckanext.search_tweaks.query_popularity.ignored_symbols"])
|
11
|
+
|
12
|
+
|
13
|
+
def ignored_terms() -> list[str]:
|
14
|
+
return tk.config["ckanext.search_tweaks.query_popularity.ignored_terms"]
|
15
|
+
|
16
|
+
|
17
|
+
def throttle() -> int:
|
18
|
+
return tk.config["ckanext.search_tweaks.query_popularity.query_throttle"]
|
19
|
+
|
20
|
+
|
21
|
+
def max_age() -> int:
|
22
|
+
return tk.config["ckanext.search_tweaks.query_popularity.max_age"]
|
23
|
+
|
24
|
+
|
25
|
+
def obsoletion_period() -> int:
|
26
|
+
return tk.config["ckanext.search_tweaks.query_popularity.obsoletion_period"]
|
27
|
+
|
28
|
+
|
29
|
+
def tracked_endpoints() -> list[str]:
|
30
|
+
return tk.config["ckanext.search_tweaks.query_popularity.tracked_endpoints"]
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any
|
3
|
+
from ckan import types
|
4
|
+
import ckan.plugins.toolkit as tk
|
5
|
+
|
6
|
+
from ckanext.search_tweaks.query_popularity.score import Score
|
7
|
+
|
8
|
+
|
9
|
+
@tk.side_effect_free
|
10
|
+
def search_tweaks_query_popularity_list(
|
11
|
+
context: types.Context, data_dict: dict[str, Any]
|
12
|
+
) -> list[dict[str, Any]]:
|
13
|
+
score = Score()
|
14
|
+
|
15
|
+
if tk.asbool(data_dict.get("refresh")):
|
16
|
+
score.refresh()
|
17
|
+
|
18
|
+
limit = tk.asint(data_dict.get("limit", 10))
|
19
|
+
|
20
|
+
return list(score.stats(limit))
|
21
|
+
|
22
|
+
|
23
|
+
@tk.side_effect_free
|
24
|
+
def search_tweaks_query_popularity_export(
|
25
|
+
context: types.Context, data_dict: dict[str, Any]
|
26
|
+
) -> dict[str, Any]:
|
27
|
+
score = Score()
|
28
|
+
|
29
|
+
results = score.export()
|
30
|
+
return {"results": results, "count": len(results)}
|
31
|
+
|
32
|
+
|
33
|
+
@tk.side_effect_free
|
34
|
+
def search_tweaks_query_popularity_ignore(
|
35
|
+
context: types.Context, data_dict: dict[str, Any]
|
36
|
+
):
|
37
|
+
q = tk.get_or_bust(data_dict, "q")
|
38
|
+
score = Score()
|
39
|
+
result = score.ignore(q)
|
40
|
+
if tk.asbool(data_dict.get("remove")):
|
41
|
+
score.drop(q)
|
42
|
+
|
43
|
+
return result
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any
|
3
|
+
from ckan import types
|
4
|
+
|
5
|
+
from ckan.authz import is_authorized
|
6
|
+
|
7
|
+
|
8
|
+
def search_tweaks_query_popularity_list(
|
9
|
+
context: types.Context, data_dict: dict[str, Any]
|
10
|
+
) -> types.AuthResult:
|
11
|
+
return is_authorized("sysadmin", context, data_dict)
|
12
|
+
|
13
|
+
|
14
|
+
def search_tweaks_query_popularity_export(
|
15
|
+
context: types.Context, data_dict: dict[str, Any]
|
16
|
+
) -> types.AuthResult:
|
17
|
+
return is_authorized("sysadmin", context, data_dict)
|
18
|
+
|
19
|
+
|
20
|
+
def search_tweaks_query_popularity_ignore(
|
21
|
+
context: types.Context, data_dict: dict[str, Any]
|
22
|
+
) -> types.AuthResult:
|
23
|
+
return is_authorized("sysadmin", context, data_dict)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import Any
|
3
|
+
import ckan.plugins as p
|
4
|
+
import ckan.plugins.toolkit as tk
|
5
|
+
from ckanext.search_tweaks.interfaces import IQueryPopularity
|
6
|
+
from . import config, score
|
7
|
+
|
8
|
+
|
9
|
+
@tk.blanket.actions
|
10
|
+
@tk.blanket.auth_functions
|
11
|
+
@tk.blanket.config_declarations
|
12
|
+
class QueryPopularityPlugin(p.SingletonPlugin):
|
13
|
+
p.implements(p.IConfigurable)
|
14
|
+
p.implements(p.IPackageController, inherit=True)
|
15
|
+
p.implements(IQueryPopularity, inherit=True)
|
16
|
+
|
17
|
+
def after_dataset_search(self, results: dict[str, Any], params: dict[str, Any]):
|
18
|
+
bp, view = tk.get_endpoint()
|
19
|
+
if bp and view and f"{bp}.{view}" in config.tracked_endpoints():
|
20
|
+
if not any(
|
21
|
+
plugin.skip_query_popularity(params)
|
22
|
+
for plugin in p.PluginImplementations(IQueryPopularity)
|
23
|
+
):
|
24
|
+
self.score.save(params["q"])
|
25
|
+
|
26
|
+
return results
|
27
|
+
|
28
|
+
def configure(self, config: Any):
|
29
|
+
self.score = score.Score()
|
30
|
+
|
31
|
+
def skip_query_popularity(self, params: dict[str, Any]) -> bool:
|
32
|
+
q = params["q"]
|
33
|
+
|
34
|
+
if q == "*:*":
|
35
|
+
return config.skip_irrefutable()
|
36
|
+
|
37
|
+
symbols = config.ignored_symbols()
|
38
|
+
if symbols and set(q) & symbols:
|
39
|
+
return True
|
40
|
+
|
41
|
+
terms = config.ignored_terms()
|
42
|
+
|
43
|
+
for term in terms:
|
44
|
+
if term in q:
|
45
|
+
return True
|
46
|
+
|
47
|
+
return False
|
@@ -0,0 +1,165 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from collections import defaultdict
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
import logging
|
5
|
+
from hashlib import md5
|
6
|
+
from typing import Any, Iterable, cast
|
7
|
+
from operator import itemgetter
|
8
|
+
from ckan.lib.redis import connect_to_redis
|
9
|
+
import ckan.plugins.toolkit as tk
|
10
|
+
from redis import Redis
|
11
|
+
from . import config
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
connect_to_redis: Any
|
15
|
+
|
16
|
+
|
17
|
+
class Score:
|
18
|
+
redis: Redis[bytes]
|
19
|
+
date_format = "%Y-%m-%d %H-%M"
|
20
|
+
|
21
|
+
def __init__(self):
|
22
|
+
self.redis = connect_to_redis()
|
23
|
+
|
24
|
+
site = tk.config["ckan.site_id"]
|
25
|
+
self.prefix = f"{site}:search_tweaks:qp"
|
26
|
+
|
27
|
+
def export(self):
|
28
|
+
data: dict[bytes, dict[str, Any]] = {
|
29
|
+
hash: {"query": query, "records": []}
|
30
|
+
for hash, query in self.redis.hgetall(self.trans_key()).items()
|
31
|
+
}
|
32
|
+
for k, v in self.redis.hscan_iter(self.distribution_key()):
|
33
|
+
date_str, q_hash = k.split(b"/", 1)
|
34
|
+
try:
|
35
|
+
date = datetime.strptime(date_str.decode(), self.date_format)
|
36
|
+
except ValueError:
|
37
|
+
continue
|
38
|
+
|
39
|
+
data[q_hash]["records"].append({"date": date, "count": int(v)})
|
40
|
+
|
41
|
+
return list(data.values())
|
42
|
+
|
43
|
+
def save(self, q: str):
|
44
|
+
q = q.strip()
|
45
|
+
q_hash = self.hash(q)
|
46
|
+
|
47
|
+
if self.is_ignored(q_hash):
|
48
|
+
return
|
49
|
+
|
50
|
+
if self.is_throttling(q_hash):
|
51
|
+
return
|
52
|
+
|
53
|
+
self.redis.hset(self.trans_key(), q_hash, q)
|
54
|
+
|
55
|
+
date_stem = self.format_date_stem(self.now())
|
56
|
+
|
57
|
+
self.redis.hincrby(self.distribution_key(), f"{date_stem}/{q_hash}", 1)
|
58
|
+
|
59
|
+
def drop(self, q: str):
|
60
|
+
q_hash = self.hash(q)
|
61
|
+
dk = self.distribution_key()
|
62
|
+
|
63
|
+
series = self.redis.hscan_iter(dk, f"*/{q_hash}")
|
64
|
+
keys = list(map(itemgetter(0), series))
|
65
|
+
if keys:
|
66
|
+
self.redis.hdel(dk, *keys)
|
67
|
+
|
68
|
+
self.redis.hdel(self.trans_key(), q_hash)
|
69
|
+
self.redis.zrem(self.score_key(), q_hash)
|
70
|
+
|
71
|
+
def is_throttling(self, q_hash: str):
|
72
|
+
user = tk.current_user.name
|
73
|
+
|
74
|
+
throttle_key = f"{self.prefix}:throttle:{user}:{q_hash}"
|
75
|
+
if self.redis.exists(throttle_key):
|
76
|
+
return True
|
77
|
+
|
78
|
+
self.redis.set(throttle_key, 1, ex=config.throttle())
|
79
|
+
return False
|
80
|
+
|
81
|
+
def reset(self):
|
82
|
+
keys = self.redis.keys(f"{self.prefix}:*")
|
83
|
+
if keys:
|
84
|
+
self.redis.delete(*keys)
|
85
|
+
|
86
|
+
def refresh(self):
|
87
|
+
max_age = timedelta(seconds=config.max_age())
|
88
|
+
dk = self.distribution_key()
|
89
|
+
sk = self.score_key()
|
90
|
+
|
91
|
+
expired_dist: set[bytes] = set()
|
92
|
+
distribution = cast(
|
93
|
+
"Iterable[tuple[bytes, bytes]]",
|
94
|
+
self.redis.hscan_iter(dk),
|
95
|
+
)
|
96
|
+
|
97
|
+
scores: dict[bytes, float] = defaultdict(float)
|
98
|
+
|
99
|
+
for k, v in distribution:
|
100
|
+
date_str, q_hash = k.split(b"/", 1)
|
101
|
+
try:
|
102
|
+
date = datetime.strptime(date_str.decode(), self.date_format)
|
103
|
+
except ValueError:
|
104
|
+
log.error("Remove invalid key %s", k)
|
105
|
+
expired_dist.add(k)
|
106
|
+
continue
|
107
|
+
|
108
|
+
age = self.now() - date
|
109
|
+
|
110
|
+
if age > max_age:
|
111
|
+
expired_dist.add(k)
|
112
|
+
continue
|
113
|
+
|
114
|
+
scores[q_hash] += int(v) / (age.seconds // config.obsoletion_period() + 1)
|
115
|
+
|
116
|
+
if expired_dist:
|
117
|
+
self.redis.hdel(dk, *expired_dist)
|
118
|
+
|
119
|
+
expired_scores: set[bytes] = set()
|
120
|
+
for k, v in self.redis.zscan_iter(sk):
|
121
|
+
if k not in scores:
|
122
|
+
expired_scores.add(k)
|
123
|
+
continue
|
124
|
+
if scores:
|
125
|
+
self.redis.zadd(sk, cast(Any, scores))
|
126
|
+
|
127
|
+
if expired_scores:
|
128
|
+
self.redis.zrem(sk, *expired_scores)
|
129
|
+
self.redis.hdel(self.trans_key(), *expired_scores)
|
130
|
+
|
131
|
+
def hash(self, q: str):
|
132
|
+
return md5(q.encode()).hexdigest()
|
133
|
+
|
134
|
+
def is_ignored(self, q_hash: str):
|
135
|
+
return self.redis.sismember(self.ignore_key(), q_hash)
|
136
|
+
|
137
|
+
def ignore(self, q: str):
|
138
|
+
return self.redis.sadd(self.ignore_key(), self.hash(q))
|
139
|
+
|
140
|
+
def now(self):
|
141
|
+
return datetime.utcnow()
|
142
|
+
|
143
|
+
def score_key(self):
|
144
|
+
return f"{self.prefix}:score"
|
145
|
+
|
146
|
+
def trans_key(self):
|
147
|
+
return f"{self.prefix}:trans"
|
148
|
+
|
149
|
+
def ignore_key(self):
|
150
|
+
return f"{self.prefix}:ignore"
|
151
|
+
|
152
|
+
def distribution_key(self):
|
153
|
+
return f"{self.prefix}:distribution"
|
154
|
+
|
155
|
+
def format_date_stem(self, date: datetime):
|
156
|
+
return date.strftime(self.date_format)
|
157
|
+
|
158
|
+
def stats(self, num: int) -> Iterable[dict[str, Any]]:
|
159
|
+
scores: list[tuple[bytes, float]] = self.redis.zrange(
|
160
|
+
self.score_key(), 0, num - 1, desc=True, withscores=True
|
161
|
+
)
|
162
|
+
trans_key = self.trans_key()
|
163
|
+
|
164
|
+
for k, v in scores:
|
165
|
+
yield {"query": self.redis.hget(trans_key, k), "score": v}
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
import contextlib
|
3
|
-
from typing import Optional
|
4
3
|
|
5
4
|
from urllib.parse import urlparse, parse_qs
|
6
5
|
|
@@ -14,7 +13,7 @@ from .score import QueryScore, normalize_query
|
|
14
13
|
__all__ = ["QueryScore", "normalize_query", "update_score_by_url"]
|
15
14
|
|
16
15
|
|
17
|
-
def update_score_by_url(pkg: model.Package, ref:
|
16
|
+
def update_score_by_url(pkg: model.Package, ref: str | None = None) -> bool:
|
18
17
|
"""Make given package more relevant for the current search query."""
|
19
18
|
if tk.request:
|
20
19
|
ref = ref or tk.request.referrer
|