ckanext-search-tweaks 0.6.2__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/MANIFEST.in +1 -1
- {ckanext_search_tweaks-0.6.2/ckanext_search_tweaks.egg-info → ckanext_search_tweaks-1.0.0}/PKG-INFO +121 -123
- ckanext_search_tweaks-1.0.0/README.md +295 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/__init__.py +0 -1
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/assets/advanced-search.css +0 -4
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/plugin.py +11 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/templates/advanced_search/search_form.html +5 -3
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/config_declaration.yaml +24 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/plugin.py +0 -2
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/templates/search_tweaks/field_relevance/promote.html +1 -1
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/views.py +18 -5
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/interfaces.py +1 -3
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/plugin.py +0 -1
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_popularity/config_declaration.yaml +29 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/logic/schema.py +1 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/score.py +2 -1
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/__init__.py +83 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/boost.py +75 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/cli.py +70 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/config.py +29 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/config_declaration.yaml +16 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/plugin.py +46 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/score.py +42 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/query_relevance/storage.py +116 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/spellcheck/helpers.py +6 -2
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/conftest.py +13 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/query_relevance/test_plugin.py +5 -5
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/tests/query_relevance/test_search.py +84 -0
- ckanext_search_tweaks-1.0.0/ckanext/search_tweaks/tests/query_relevance/test_storage.py +38 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/spellcheck/test_plugin.py +4 -2
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0/ckanext_search_tweaks.egg-info}/PKG-INFO +121 -123
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext_search_tweaks.egg-info/SOURCES.txt +6 -0
- ckanext_search_tweaks-1.0.0/ckanext_search_tweaks.egg-info/requires.txt +9 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/pyproject.toml +53 -48
- ckanext_search_tweaks-0.6.2/README.md +0 -302
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/query_relevance/__init__.py +0 -58
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/query_relevance/cli.py +0 -88
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/query_relevance/plugin.py +0 -75
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/query_relevance/score.py +0 -75
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/query_relevance/storage.py +0 -174
- ckanext_search_tweaks-0.6.2/ckanext/search_tweaks/tests/query_relevance/test_storage.py +0 -114
- ckanext_search_tweaks-0.6.2/ckanext_search_tweaks.egg-info/requires.txt +0 -5
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/LICENSE +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/assets/advanced-search.js +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/advanced_search/assets/webassets.yml +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/cli.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/config.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/assets/search-tweaks-reflect-range-in-label.js +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/field_relevance/assets/webassets.yml +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/config.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/logic/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/logic/action.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/logic/auth.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/query_popularity/plugin.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/shared.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/spellcheck/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/spellcheck/cli.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/spellcheck/plugin.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/spellcheck/templates/search_tweaks/did_you_mean.html +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/query_relevance/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/query_relevance/test_score.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/spellcheck/__init__.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext/search_tweaks/tests/test_plugin.py +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext_search_tweaks.egg-info/dependency_links.txt +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext_search_tweaks.egg-info/entry_points.txt +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/ckanext_search_tweaks.egg-info/top_level.txt +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/requirements.txt +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/setup.cfg +0 -0
- {ckanext_search_tweaks-0.6.2 → ckanext_search_tweaks-1.0.0}/setup.py +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
include README.rst
|
2
2
|
include LICENSE
|
3
3
|
include requirements.txt
|
4
|
-
recursive-include ckanext/search_tweaks *.html *.json *.js *.less *.css *.mo *.yml
|
4
|
+
recursive-include ckanext/search_tweaks *.html *.json *.js *.less *.css *.mo *.yml *.yaml
|
5
5
|
recursive-include ckanext/search_tweaks/migration *.ini *.py *.mako
|
{ckanext_search_tweaks-0.6.2/ckanext_search_tweaks.egg-info → ckanext_search_tweaks-1.0.0}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: ckanext-search-tweaks
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Author-email: DataShades <datashades@linkdigital.com.au>, Sergey Motornyuk <sergey.motornyuk@linkdigital.com.au>
|
5
5
|
Maintainer-email: DataShades <datashades@linkdigital.com.au>
|
6
6
|
License: AGPL
|
@@ -8,15 +8,20 @@ Project-URL: Homepage, https://github.com/DataShades/ckanext-search-tweaks
|
|
8
8
|
Keywords: CKAN
|
9
9
|
Classifier: Development Status :: 4 - Beta
|
10
10
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
13
11
|
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
14
|
Description-Content-Type: text/markdown
|
15
15
|
License-File: LICENSE
|
16
16
|
Requires-Dist: freezegun
|
17
17
|
Requires-Dist: typing_extensions>=4.0.0
|
18
|
-
Requires-Dist: ckanext-toolbelt>=0.
|
19
|
-
Provides-Extra:
|
18
|
+
Requires-Dist: ckanext-toolbelt>=0.6.6
|
19
|
+
Provides-Extra: dev
|
20
|
+
Requires-Dist: pytest-ckan; extra == "dev"
|
21
|
+
Requires-Dist: pytest-freezegun; extra == "dev"
|
22
|
+
Requires-Dist: pytest-factoryboy; extra == "dev"
|
23
|
+
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
24
|
+
Dynamic: license-file
|
20
25
|
|
21
26
|
[](https://github.com/DataShades/ckanext-search-tweaks/actions)
|
22
27
|
|
@@ -30,8 +35,8 @@ Compatibility with core CKAN versions:
|
|
30
35
|
|
31
36
|
| CKAN version | Compatible? |
|
32
37
|
|-----------------|-------------|
|
33
|
-
| 2.
|
34
|
-
| 2.
|
38
|
+
| 2.9 and earlier | no |
|
39
|
+
| 2.10+ | yes |
|
35
40
|
|
36
41
|
|
37
42
|
## Installation
|
@@ -65,9 +70,6 @@ available. Bellow are listed all the plugins with their side effects.
|
|
65
70
|
| [search_tweaks_query_relevance](#search_tweaks_query_relevance) | Promote datasets that were visited most frequently for the current search query |
|
66
71
|
| [search_tweaks_field_relevance](#search_tweaks_field_relevance) | Promote dataset depending on value of it's field |
|
67
72
|
| [search_tweaks_spellcheck](#search_tweaks_spellcheck) | Provides "Did you mean?" feature |
|
68
|
-
<!--
|
69
|
-
| [search_tweaks_advanced_search](#search_tweaks_advanced_search) | Basic configuration of ckanext-composite-search's search form |
|
70
|
-
-->
|
71
73
|
|
72
74
|
### <a id="search_tweaks"></a> search_tweaks
|
73
75
|
|
@@ -79,16 +81,18 @@ extension is enabled.
|
|
79
81
|
- Enables `ckanext.search_tweaks.iterfaces.ISearchTweaks` interface with the
|
80
82
|
following methods:
|
81
83
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
84
|
+
```python
|
85
|
+
def get_search_boost_fn(self, search_params: dict[str, Any]) -> Optional[str]:
|
86
|
+
"""Returns optional boost function that will be applied to the search query.
|
87
|
+
"""
|
88
|
+
return None
|
86
89
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
90
|
+
def get_extra_qf(self, search_params: dict[str, Any]) -> Optional[str]:
|
91
|
+
"""Return an additional fragment of the Solr's qf.
|
92
|
+
This fragment will be appended to the current qf
|
93
|
+
"""
|
94
|
+
return None
|
95
|
+
```
|
92
96
|
|
93
97
|
#### CLI
|
94
98
|
|
@@ -99,30 +103,31 @@ following methods:
|
|
99
103
|
|
100
104
|
#### Config settings
|
101
105
|
|
102
|
-
|
103
|
-
|
104
|
-
|
106
|
+
```ini
|
107
|
+
# Rewrite the default value of the qf parameter sent to Solr
|
108
|
+
# (optional, default: value of ckan.lib.search.query.QUERY_FIELDS).
|
109
|
+
ckanext.search_tweaks.common.qf = title^5 text
|
105
110
|
|
106
|
-
|
107
|
-
|
108
|
-
|
111
|
+
# Search by misspelled queries.
|
112
|
+
# (optional, default: false).
|
113
|
+
ckanext.search_tweaks.common.fuzzy_search.enabled = on
|
109
114
|
|
110
|
-
|
111
|
-
|
112
|
-
|
115
|
+
# Maximum number of misspelled letters. Possible values are 1 and 2.
|
116
|
+
# (optional, default: 1).
|
117
|
+
ckanext.search_tweaks.common.fuzzy_search.distance = 2
|
113
118
|
|
114
|
-
|
115
|
-
|
116
|
-
|
119
|
+
# Use `boost` instead of `bf` when `edismax` query parser is active
|
120
|
+
# (optional, default: true).
|
121
|
+
ckanext.search_tweaks.common.prefer_boost = no
|
117
122
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
# Keep original query when using fuzzy search, e.g. "(hello~2) OR (hello)" if true
|
123
|
-
# (optional, default: true).
|
124
|
-
ckanext.search_tweaks.common.fuzzy_search.keep_original
|
123
|
+
# MinimumShouldMatch used in queries
|
124
|
+
# (optional, default: 1).
|
125
|
+
ckanext.search_tweaks.common.mm = 2<-1 5<80%
|
125
126
|
|
127
|
+
# Keep original query when using fuzzy search, e.g. "(hello~2) OR (hello)" if true
|
128
|
+
# (optional, default: true).
|
129
|
+
ckanext.search_tweaks.common.fuzzy_search.keep_original
|
130
|
+
```
|
126
131
|
|
127
132
|
---
|
128
133
|
|
@@ -132,57 +137,41 @@ Increase relevance of datasets for particular query depending on number of
|
|
132
137
|
direct visits of the dataset after running this search. I.e, if user searches
|
133
138
|
for `something` and then visits dataset **B** which is initially displayed in a
|
134
139
|
third row of search results, eventually this dataset will be displayed on the
|
135
|
-
second or even on the first row.
|
136
|
-
first stage, statistics collected and stored inside storage(redis, by default)
|
137
|
-
and then this statistics converted into numeric solr field via cronjob.
|
138
|
-
Finally, Solr's boost function that scales number of visits and improves score
|
139
|
-
for the given query is applied during search.
|
140
|
-
|
141
|
-
Following steps are required in order to configure this plugin:
|
142
|
-
|
143
|
-
- Add field that will store statistics to schema.xml(`query_relevance_` prefix
|
144
|
-
can be changed via config option):
|
145
|
-
|
146
|
-
<dynamicField name="query_relevance_*" type="int" indexed="true" stored="true"/>
|
140
|
+
second or even on the first row.
|
147
141
|
|
148
|
-
|
149
|
-
|
150
|
-
|
142
|
+
This is implemented in two stages:
|
143
|
+
- In the first stage, statistics are collected and stored in Redis.
|
144
|
+
- During search, we apply Solr's boost function to scale the dataset score based on the number of visits.
|
151
145
|
|
152
146
|
#### CLI
|
153
147
|
|
154
|
-
|
155
|
-
|
156
|
-
is affected by this command - all records older than `query_relevance.daily.age` days are removed.
|
148
|
+
```
|
149
|
+
relevance query export - export statistics as CSV.
|
157
150
|
|
158
|
-
|
151
|
+
relevance query import - import statistics from CSV. Note, records that are already in storage but
|
152
|
+
are not listed in CSV won't be removed. It must be done manually
|
159
153
|
|
160
|
-
|
161
|
-
|
154
|
+
relevance query reset - reset all the query relevance scores
|
155
|
+
```
|
162
156
|
|
163
157
|
|
164
158
|
#### Config settings
|
165
159
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
ckanext.search_tweaks.query_relevance.backend = redis-permanent
|
171
|
-
|
172
|
-
# How long(in days) information about dataset visits will be stored in order to
|
173
|
-
# update relevance of dataset in search query.
|
174
|
-
# (optional, default: 90).
|
175
|
-
ckanext.search_tweaks.query_relevance.daily.age = 90
|
160
|
+
```ini
|
161
|
+
# Minimum boost to apply to a search query
|
162
|
+
# (optional, default: 1).
|
163
|
+
ckanext.search_tweaks.query_relevance.min_boost = 1
|
176
164
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
ckanext.search_tweaks.query_relevance.boost_function = recip($field,1,1000,1000)
|
165
|
+
# Maximum boost to apply to a search query. Set more to promote datasets higher
|
166
|
+
# (optional, default: 1.5).
|
167
|
+
ckanext.search_tweaks.query_relevance.max_boost = 2
|
181
168
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
169
|
+
# Maximum number of boosts to apply to a search query
|
170
|
+
# Set more to promote more datasets at once. Note, that a higher
|
171
|
+
# number of boosts may increase the query time.
|
172
|
+
# (optional, default: 60).
|
173
|
+
ckanext.search_tweaks.query_relevance.max_boost_count = 60
|
174
|
+
```
|
186
175
|
|
187
176
|
---
|
188
177
|
### <a id="search_tweaks_field_relevance"></a> search_tweaks_field_relevance
|
@@ -198,17 +187,20 @@ line of code.
|
|
198
187
|
|
199
188
|
#### Config settings
|
200
189
|
|
201
|
-
|
202
|
-
|
203
|
-
|
190
|
+
```ini
|
191
|
+
# Solr boost function for static numeric field
|
192
|
+
# (optional, default: None).
|
193
|
+
ckanext.search_tweaks.field_relevance.boost_function = pow(promoted_level,2)
|
204
194
|
|
205
|
-
|
206
|
-
|
207
|
-
|
195
|
+
# Field with dataset promotion level
|
196
|
+
# (optional, default: promotion_level).
|
197
|
+
ckanext.search_tweaks.field_relevance.blueprint.promotion.field_name = promotion
|
198
|
+
|
199
|
+
# Register pacakge promotion route
|
200
|
+
# (optional, default: False).
|
201
|
+
ckanext.search_tweaks.field_relevance.blueprint.promotion.enabled = true
|
202
|
+
```
|
208
203
|
|
209
|
-
# Register pacakge promotion route
|
210
|
-
# (optional, default: False).
|
211
|
-
ckanext.search_tweaks.field_relevance.blueprint.promotion.enabled = true
|
212
204
|
|
213
205
|
#### Auth functions
|
214
206
|
|
@@ -227,42 +219,54 @@ use it:
|
|
227
219
|
name="spellcheck" class="solr.SpellCheckComponent">` section and add the
|
228
220
|
following item under it:
|
229
221
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
222
|
+
```xml
|
223
|
+
<lst name="spellchecker">
|
224
|
+
<str name="name">did_you_mean</str>
|
225
|
+
<str name="field">did_you_mean</str>
|
226
|
+
<str name="buildOnCommit">false</str>
|
227
|
+
</lst>
|
228
|
+
```
|
235
229
|
|
236
230
|
- Add cron job that will update suggestions dictionary periodically:
|
237
231
|
|
238
|
-
|
232
|
+
```sh
|
233
|
+
ckan search-tweaks spellcheck rebuild
|
234
|
+
```
|
239
235
|
|
240
236
|
- `solrconfig.xml`. Add spellcheck component to the search handler (`<requestHandler
|
241
237
|
name="/select" class="solr.SearchHandler">`):
|
242
238
|
|
243
|
-
|
244
|
-
|
245
|
-
|
239
|
+
```xml
|
240
|
+
<arr name="last-components">
|
241
|
+
<str>spellcheck</str>
|
242
|
+
</arr>
|
243
|
+
```
|
246
244
|
|
247
245
|
- Define spellcheck field in the schema. If you want to use an existing
|
248
246
|
field(`text` for example), change `<str name="field">did_you_mean</str>`
|
249
247
|
value inside `solrconfig.xml` to the name of the selected field instead.
|
250
248
|
|
251
|
-
|
249
|
+
```xml
|
250
|
+
<field name="did_you_mean" type="textgen" indexed="true" multiValued="true" />
|
251
|
+
```
|
252
252
|
|
253
253
|
- **Note:** skip if you've decided to use an existing field in the previous step.
|
254
254
|
<br/>
|
255
255
|
Copy meaningfull values into this field:
|
256
256
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
257
|
+
```xml
|
258
|
+
<copyField source="title" dest="did_you_mean"/>
|
259
|
+
<copyField source="notes" dest="did_you_mean"/>
|
260
|
+
<copyField source="res_name" dest="did_you_mean"/>
|
261
|
+
<copyField source="res_description" dest="did_you_mean"/>
|
262
|
+
<copyField source="extras_*" dest="did_you_mean"/>
|
263
|
+
```
|
262
264
|
|
263
265
|
After that you have to restart Solr service and rebuild search index:
|
264
266
|
|
265
|
-
|
267
|
+
```sh
|
268
|
+
ckan search-index rebuild
|
269
|
+
```
|
266
270
|
|
267
271
|
Now you can use `spellcheck_did_you_mean` template helper that returns better
|
268
272
|
search query when available instead of the current one. Consider including
|
@@ -270,39 +274,33 @@ search query when available instead of the current one. Consider including
|
|
270
274
|
|
271
275
|
#### Config settings
|
272
276
|
|
273
|
-
|
274
|
-
|
275
|
-
|
277
|
+
```ini
|
278
|
+
# Do not show suggestions that have fewer results than current query
|
279
|
+
# (optional, default: true).
|
280
|
+
ckanext.search_tweaks.spellcheck.more_results_only = off
|
276
281
|
|
277
|
-
|
278
|
-
|
279
|
-
|
282
|
+
# How many different suggestions you expect to see for query
|
283
|
+
# (optional, default: 1).
|
284
|
+
ckanext.search_tweaks.spellcheck.max_suggestions = 3
|
285
|
+
```
|
280
286
|
|
281
287
|
#### CLI
|
282
288
|
|
283
289
|
spellcheck rebuild - rebuild/reload spellcheck dictionary.
|
284
290
|
|
285
291
|
---
|
286
|
-
<!--
|
287
|
-
### <a id="search_tweaks_advanced_search"></a> search_tweaks_advanced_search
|
288
|
-
|
289
|
-
Configure `ckanext-composite-search` for the basic usage. One need
|
290
|
-
`composite_search default_composite_search` plugins enabled in order to use
|
291
|
-
this plugin. It registers `advanced_search/search_form.html` snippet which can
|
292
|
-
be just used instead of `search_input` block of CKAN's
|
293
|
-
`snippets/search_form.html`. It can take a number of parameters, check its
|
294
|
-
content for details.
|
295
|
-
-->
|
296
292
|
|
297
293
|
## Developer installation
|
298
294
|
|
299
295
|
To install ckanext-search-tweaks for development, activate your CKAN virtualenv and
|
300
296
|
do:
|
301
297
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
298
|
+
```sh
|
299
|
+
git clone https://github.com/DataShades/ckanext-search-tweaks.git
|
300
|
+
cd ckanext-search-tweaks
|
301
|
+
python setup.py develop
|
302
|
+
pip install -r dev-requirements.txt
|
303
|
+
```
|
306
304
|
|
307
305
|
|
308
306
|
## Tests
|
@@ -0,0 +1,295 @@
|
|
1
|
+
[](https://github.com/DataShades/ckanext-search-tweaks/actions)
|
2
|
+
|
3
|
+
# ckanext-search-tweaks
|
4
|
+
|
5
|
+
Set of tools providing control over search results, sorting, etc.
|
6
|
+
|
7
|
+
## Requirements
|
8
|
+
|
9
|
+
Compatibility with core CKAN versions:
|
10
|
+
|
11
|
+
| CKAN version | Compatible? |
|
12
|
+
|-----------------|-------------|
|
13
|
+
| 2.9 and earlier | no |
|
14
|
+
| 2.10+ | yes |
|
15
|
+
|
16
|
+
|
17
|
+
## Installation
|
18
|
+
|
19
|
+
To install ckanext-search-tweaks:
|
20
|
+
|
21
|
+
1. Activate your CKAN virtual environment, for example:
|
22
|
+
|
23
|
+
. /usr/lib/ckan/default/bin/activate
|
24
|
+
|
25
|
+
2. Install it on the virtualenv
|
26
|
+
|
27
|
+
pip install ckanext-search-tweaks
|
28
|
+
|
29
|
+
3. Add `search_tweaks` to the `ckan.plugins` setting in your CKAN
|
30
|
+
config file (by default the config file is located at
|
31
|
+
`/etc/ckan/default/ckan.ini`).
|
32
|
+
|
33
|
+
4. Restart CKAN.
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
|
37
|
+
This extensions consists of multiple plugins. `search_tweaks` is the main
|
38
|
+
(major) one, that must be enabled all the time. And depending on the set of
|
39
|
+
secondary (minor) plugins, extra features and config options may be
|
40
|
+
available. Bellow are listed all the plugins with their side effects.
|
41
|
+
|
42
|
+
| Plugin | Functionality |
|
43
|
+
|-----------------------------------------------------------------|---------------------------------------------------------------------------------|
|
44
|
+
| [search_tweaks](#search_tweaks) | Allow all the other plugins to be enabled |
|
45
|
+
| [search_tweaks_query_relevance](#search_tweaks_query_relevance) | Promote datasets that were visited most frequently for the current search query |
|
46
|
+
| [search_tweaks_field_relevance](#search_tweaks_field_relevance) | Promote dataset depending on value of it's field |
|
47
|
+
| [search_tweaks_spellcheck](#search_tweaks_spellcheck) | Provides "Did you mean?" feature |
|
48
|
+
|
49
|
+
### <a id="search_tweaks"></a> search_tweaks
|
50
|
+
|
51
|
+
Provides base functionality and essential pieces of logic used by all the other
|
52
|
+
plugins. Must be enabled as long as at least one other plugin from this
|
53
|
+
extension is enabled.
|
54
|
+
|
55
|
+
- Switches search to `edismax` query parser if none was specified
|
56
|
+
- Enables `ckanext.search_tweaks.iterfaces.ISearchTweaks` interface with the
|
57
|
+
following methods:
|
58
|
+
|
59
|
+
```python
|
60
|
+
def get_search_boost_fn(self, search_params: dict[str, Any]) -> Optional[str]:
|
61
|
+
"""Returns optional boost function that will be applied to the search query.
|
62
|
+
"""
|
63
|
+
return None
|
64
|
+
|
65
|
+
def get_extra_qf(self, search_params: dict[str, Any]) -> Optional[str]:
|
66
|
+
"""Return an additional fragment of the Solr's qf.
|
67
|
+
This fragment will be appended to the current qf
|
68
|
+
"""
|
69
|
+
return None
|
70
|
+
```
|
71
|
+
|
72
|
+
#### CLI
|
73
|
+
|
74
|
+
ckan search-tweaks -
|
75
|
+
Root of all the extension specific commands.
|
76
|
+
Every command from minor plugins is registered under this section.
|
77
|
+
|
78
|
+
|
79
|
+
#### Config settings
|
80
|
+
|
81
|
+
```ini
|
82
|
+
# Rewrite the default value of the qf parameter sent to Solr
|
83
|
+
# (optional, default: value of ckan.lib.search.query.QUERY_FIELDS).
|
84
|
+
ckanext.search_tweaks.common.qf = title^5 text
|
85
|
+
|
86
|
+
# Search by misspelled queries.
|
87
|
+
# (optional, default: false).
|
88
|
+
ckanext.search_tweaks.common.fuzzy_search.enabled = on
|
89
|
+
|
90
|
+
# Maximum number of misspelled letters. Possible values are 1 and 2.
|
91
|
+
# (optional, default: 1).
|
92
|
+
ckanext.search_tweaks.common.fuzzy_search.distance = 2
|
93
|
+
|
94
|
+
# Use `boost` instead of `bf` when `edismax` query parser is active
|
95
|
+
# (optional, default: true).
|
96
|
+
ckanext.search_tweaks.common.prefer_boost = no
|
97
|
+
|
98
|
+
# MinimumShouldMatch used in queries
|
99
|
+
# (optional, default: 1).
|
100
|
+
ckanext.search_tweaks.common.mm = 2<-1 5<80%
|
101
|
+
|
102
|
+
# Keep original query when using fuzzy search, e.g. "(hello~2) OR (hello)" if true
|
103
|
+
# (optional, default: true).
|
104
|
+
ckanext.search_tweaks.common.fuzzy_search.keep_original
|
105
|
+
```
|
106
|
+
|
107
|
+
---
|
108
|
+
|
109
|
+
### <a id="search_tweaks_query_relevance"></a> search_tweaks_query_relevance
|
110
|
+
|
111
|
+
Increase relevance of datasets for particular query depending on number of
|
112
|
+
direct visits of the dataset after running this search. I.e, if user searches
|
113
|
+
for `something` and then visits dataset **B** which is initially displayed in a
|
114
|
+
third row of search results, eventually this dataset will be displayed on the
|
115
|
+
second or even on the first row.
|
116
|
+
|
117
|
+
This is implemented in two stages:
|
118
|
+
- In the first stage, statistics are collected and stored in Redis.
|
119
|
+
- During search, we apply Solr's boost function to scale the dataset score based on the number of visits.
|
120
|
+
|
121
|
+
#### CLI
|
122
|
+
|
123
|
+
```
|
124
|
+
relevance query export - export statistics as CSV.
|
125
|
+
|
126
|
+
relevance query import - import statistics from CSV. Note, records that are already in storage but
|
127
|
+
are not listed in CSV won't be removed. It must be done manually
|
128
|
+
|
129
|
+
relevance query reset - reset all the query relevance scores
|
130
|
+
```
|
131
|
+
|
132
|
+
|
133
|
+
#### Config settings
|
134
|
+
|
135
|
+
```ini
|
136
|
+
# Minimum boost to apply to a search query
|
137
|
+
# (optional, default: 1).
|
138
|
+
ckanext.search_tweaks.query_relevance.min_boost = 1
|
139
|
+
|
140
|
+
# Maximum boost to apply to a search query. Set more to promote datasets higher
|
141
|
+
# (optional, default: 1.5).
|
142
|
+
ckanext.search_tweaks.query_relevance.max_boost = 2
|
143
|
+
|
144
|
+
# Maximum number of boosts to apply to a search query
|
145
|
+
# Set more to promote more datasets at once. Note, that a higher
|
146
|
+
# number of boosts may increase the query time.
|
147
|
+
# (optional, default: 60).
|
148
|
+
ckanext.search_tweaks.query_relevance.max_boost_count = 60
|
149
|
+
```
|
150
|
+
|
151
|
+
---
|
152
|
+
### <a id="search_tweaks_field_relevance"></a> search_tweaks_field_relevance
|
153
|
+
|
154
|
+
Increases the relevance of a dataset depending on value of its *numeric*
|
155
|
+
field. For now it's impossible to promote dataset using field with textual type.
|
156
|
+
|
157
|
+
No magic here either, this plugin allows you to specify Solr's boost function
|
158
|
+
that will be used during all the searches. One can achieve exactly the same
|
159
|
+
result using `ISearchTweaks.get_search_boost_fn`. But I expect this option to
|
160
|
+
be used often, so there is a possibility to update relevance without any extra
|
161
|
+
line of code.
|
162
|
+
|
163
|
+
#### Config settings
|
164
|
+
|
165
|
+
```ini
|
166
|
+
# Solr boost function for static numeric field
|
167
|
+
# (optional, default: None).
|
168
|
+
ckanext.search_tweaks.field_relevance.boost_function = pow(promoted_level,2)
|
169
|
+
|
170
|
+
# Field with dataset promotion level
|
171
|
+
# (optional, default: promotion_level).
|
172
|
+
ckanext.search_tweaks.field_relevance.blueprint.promotion.field_name = promotion
|
173
|
+
|
174
|
+
# Register pacakge promotion route
|
175
|
+
# (optional, default: False).
|
176
|
+
ckanext.search_tweaks.field_relevance.blueprint.promotion.enabled = true
|
177
|
+
```
|
178
|
+
|
179
|
+
|
180
|
+
#### Auth functions
|
181
|
+
|
182
|
+
search_tweaks_field_relevance_promote: access package promotion route. Calls `package_update` by default.
|
183
|
+
|
184
|
+
---
|
185
|
+
|
186
|
+
### <a id="search_tweaks_spellcheck"></a> search_tweaks_spellcheck
|
187
|
+
|
188
|
+
Exposes search suggestions from the Solr's spellcheck component to CKAN
|
189
|
+
templates. This plugin doesn't do much and mainly relies on the Solr's built-in
|
190
|
+
functionality. Thus you have to make a lot of changes inside Solr in order to
|
191
|
+
use it:
|
192
|
+
|
193
|
+
- `solrconfig.xml`. Configure spellcheck component. Search for `<searchComponent
|
194
|
+
name="spellcheck" class="solr.SpellCheckComponent">` section and add the
|
195
|
+
following item under it:
|
196
|
+
|
197
|
+
```xml
|
198
|
+
<lst name="spellchecker">
|
199
|
+
<str name="name">did_you_mean</str>
|
200
|
+
<str name="field">did_you_mean</str>
|
201
|
+
<str name="buildOnCommit">false</str>
|
202
|
+
</lst>
|
203
|
+
```
|
204
|
+
|
205
|
+
- Add cron job that will update suggestions dictionary periodically:
|
206
|
+
|
207
|
+
```sh
|
208
|
+
ckan search-tweaks spellcheck rebuild
|
209
|
+
```
|
210
|
+
|
211
|
+
- `solrconfig.xml`. Add spellcheck component to the search handler (`<requestHandler
|
212
|
+
name="/select" class="solr.SearchHandler">`):
|
213
|
+
|
214
|
+
```xml
|
215
|
+
<arr name="last-components">
|
216
|
+
<str>spellcheck</str>
|
217
|
+
</arr>
|
218
|
+
```
|
219
|
+
|
220
|
+
- Define spellcheck field in the schema. If you want to use an existing
|
221
|
+
field(`text` for example), change `<str name="field">did_you_mean</str>`
|
222
|
+
value inside `solrconfig.xml` to the name of the selected field instead.
|
223
|
+
|
224
|
+
```xml
|
225
|
+
<field name="did_you_mean" type="textgen" indexed="true" multiValued="true" />
|
226
|
+
```
|
227
|
+
|
228
|
+
- **Note:** skip if you've decided to use an existing field in the previous step.
|
229
|
+
<br/>
|
230
|
+
Copy meaningfull values into this field:
|
231
|
+
|
232
|
+
```xml
|
233
|
+
<copyField source="title" dest="did_you_mean"/>
|
234
|
+
<copyField source="notes" dest="did_you_mean"/>
|
235
|
+
<copyField source="res_name" dest="did_you_mean"/>
|
236
|
+
<copyField source="res_description" dest="did_you_mean"/>
|
237
|
+
<copyField source="extras_*" dest="did_you_mean"/>
|
238
|
+
```
|
239
|
+
|
240
|
+
After that you have to restart Solr service and rebuild search index:
|
241
|
+
|
242
|
+
```sh
|
243
|
+
ckan search-index rebuild
|
244
|
+
```
|
245
|
+
|
246
|
+
Now you can use `spellcheck_did_you_mean` template helper that returns better
|
247
|
+
search query when available instead of the current one. Consider including
|
248
|
+
`search_tweaks/did_you_mean.html` fragment under search form.
|
249
|
+
|
250
|
+
#### Config settings
|
251
|
+
|
252
|
+
```ini
|
253
|
+
# Do not show suggestions that have fewer results than current query
|
254
|
+
# (optional, default: true).
|
255
|
+
ckanext.search_tweaks.spellcheck.more_results_only = off
|
256
|
+
|
257
|
+
# How many different suggestions you expect to see for query
|
258
|
+
# (optional, default: 1).
|
259
|
+
ckanext.search_tweaks.spellcheck.max_suggestions = 3
|
260
|
+
```
|
261
|
+
|
262
|
+
#### CLI
|
263
|
+
|
264
|
+
spellcheck rebuild - rebuild/reload spellcheck dictionary.
|
265
|
+
|
266
|
+
---
|
267
|
+
|
268
|
+
## Developer installation
|
269
|
+
|
270
|
+
To install ckanext-search-tweaks for development, activate your CKAN virtualenv and
|
271
|
+
do:
|
272
|
+
|
273
|
+
```sh
|
274
|
+
git clone https://github.com/DataShades/ckanext-search-tweaks.git
|
275
|
+
cd ckanext-search-tweaks
|
276
|
+
python setup.py develop
|
277
|
+
pip install -r dev-requirements.txt
|
278
|
+
```
|
279
|
+
|
280
|
+
|
281
|
+
## Tests
|
282
|
+
|
283
|
+
Apart from the default configuration for CKAN testing, you have to create
|
284
|
+
`ckan_search_tweaks` Solr's core, replace its schema with
|
285
|
+
`ckanext/search_tweaks/tests/schema.xml` and make changes to `solrconfig.xml`
|
286
|
+
that are required by `search_tweaks_spellcheck`.
|
287
|
+
|
288
|
+
To run the tests, do:
|
289
|
+
|
290
|
+
pytest --ckan-ini=test.ini ckanext/search_tweaks/tests
|
291
|
+
|
292
|
+
|
293
|
+
## License
|
294
|
+
|
295
|
+
[AGPL](https://www.gnu.org/licenses/agpl-3.0.en.html)
|