searchkick 5.3.0 → 5.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +94 -9
- data/lib/searchkick/index.rb +3 -1
- data/lib/searchkick/index_options.rb +75 -0
- data/lib/searchkick/model.rb +1 -1
- data/lib/searchkick/query.rb +136 -9
- data/lib/searchkick/reranking.rb +28 -0
- data/lib/searchkick/script.rb +11 -0
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +19 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5dfa66d383b0e1a91288b4636fef42b22ac73ec63294a7955ac20298851df1c1
|
4
|
+
data.tar.gz: fbe59c3e85352b01674c16831f67b3906367e6a032769582ee552038853194f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25d7df6cf3861522a99851c8c561f0726a6e922d4aafb0fe646883b86c37ba4c1ec78735940a5eb70056433422fa2987989701260ec7b1e2969fb452758d2d43
|
7
|
+
data.tar.gz: d9b637bba6c90e1c08de0b9cd8239e6b8b03aa86eb0a77aebc6b855ff0744220d775c8b7c8a12e1df736f98ee44dac3c6f11ad3a0506aa51e5c22f120f3c13d3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
## 5.4.0 (2024-09-04)
|
2
|
+
|
3
|
+
- Added `knn` option
|
4
|
+
- Added `rrf` method
|
5
|
+
- Added experimental support for scripting to `where` option
|
6
|
+
- Added warning for `exists` with non-`true` values
|
7
|
+
- Added warning for full reindex and `:queue` mode
|
8
|
+
- Fixed `per_page` method when paginating beyond `max_result_window`
|
9
|
+
- Dropped support for Ruby < 3.1
|
10
|
+
|
11
|
+
## 5.3.1 (2023-11-28)
|
12
|
+
|
13
|
+
- Fixed error with misspellings below and failed queries
|
14
|
+
|
1
15
|
## 5.3.0 (2023-07-02)
|
2
16
|
|
3
17
|
- Fixed error with `cutoff_frequency`
|
data/README.md
CHANGED
@@ -26,7 +26,7 @@ Check out [Searchjoy](https://github.com/ankane/searchjoy) for analytics and [Au
|
|
26
26
|
|
27
27
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
|
28
28
|
|
29
|
-
[![Build Status](https://github.com/ankane/searchkick/workflows/build/badge.svg
|
29
|
+
[![Build Status](https://github.com/ankane/searchkick/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/searchkick/actions)
|
30
30
|
|
31
31
|
## Contents
|
32
32
|
|
@@ -120,9 +120,9 @@ where: {
|
|
120
120
|
category: /frozen .+/, # regexp
|
121
121
|
category: {prefix: "frozen"}, # prefix
|
122
122
|
store_id: {exists: true}, # exists
|
123
|
+
_not: {store_id: 1}, # negate a condition
|
123
124
|
_or: [{in_stock: true}, {backordered: true}],
|
124
|
-
_and: [{in_stock: true}, {backordered: true}]
|
125
|
-
_not: {store_id: 1} # negate a condition
|
125
|
+
_and: [{in_stock: true}, {backordered: true}]
|
126
126
|
}
|
127
127
|
```
|
128
128
|
|
@@ -815,7 +815,7 @@ Product.search("milk", boost_where: {orderer_ids: current_user.id})
|
|
815
815
|
|
816
816
|
Autocomplete predicts what a user will type, making the search experience faster and easier.
|
817
817
|
|
818
|
-
![Autocomplete](https://gist.
|
818
|
+
![Autocomplete](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/autocomplete.png)
|
819
819
|
|
820
820
|
**Note:** To autocomplete on search terms rather than results, check out [Autosuggest](https://github.com/ankane/autosuggest).
|
821
821
|
|
@@ -881,7 +881,7 @@ Then add the search box and JavaScript code to a view.
|
|
881
881
|
|
882
882
|
## Suggestions
|
883
883
|
|
884
|
-
![Suggest](https://gist.
|
884
|
+
![Suggest](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/recursion.png)
|
885
885
|
|
886
886
|
```ruby
|
887
887
|
class Product < ApplicationRecord
|
@@ -900,7 +900,7 @@ products.suggestions # ["peanut butter"]
|
|
900
900
|
|
901
901
|
[Aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html) provide aggregated search data.
|
902
902
|
|
903
|
-
![Aggregations](https://gist.
|
903
|
+
![Aggregations](https://gist.githubusercontent.com/ankane/b6988db2802aca68a589b31e41b44195/raw/40febe948427e5bc53ec4e5dc248822855fef76f/facets.png)
|
904
904
|
|
905
905
|
```ruby
|
906
906
|
products = Product.search("chuck taylor", aggs: [:product_type, :gender, :brand])
|
@@ -1483,7 +1483,15 @@ ENV["ELASTICSEARCH_URL"] = "https://user:password@host1,https://user:password@ho
|
|
1483
1483
|
ENV["OPENSEARCH_URL"] = "https://user:password@host1,https://user:password@host2"
|
1484
1484
|
```
|
1485
1485
|
|
1486
|
-
|
1486
|
+
### Client Options
|
1487
|
+
|
1488
|
+
Create an initializer with:
|
1489
|
+
|
1490
|
+
```ruby
|
1491
|
+
Searchkick.client_options[:reload_connections] = true
|
1492
|
+
```
|
1493
|
+
|
1494
|
+
See the docs for [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/advanced-config.html) or [Opensearch](https://rubydoc.info/gems/opensearch-transport#configuration) for a complete list of options.
|
1487
1495
|
|
1488
1496
|
### Lograge
|
1489
1497
|
|
@@ -1837,9 +1845,86 @@ To query nested data, use dot notation.
|
|
1837
1845
|
Product.search("san", fields: ["store.city"], where: {"store.zip_code" => 12345})
|
1838
1846
|
```
|
1839
1847
|
|
1840
|
-
## Nearest
|
1848
|
+
## Nearest Neighbor Search
|
1849
|
+
|
1850
|
+
*Available for Elasticsearch 8.6+ and OpenSearch 2.4+*
|
1851
|
+
|
1852
|
+
```ruby
|
1853
|
+
class Product < ApplicationRecord
|
1854
|
+
searchkick knn: {embedding: {dimensions: 3, distance: "cosine"}}
|
1855
|
+
end
|
1856
|
+
```
|
1857
|
+
|
1858
|
+
Also supports `euclidean` and `inner_product`
|
1859
|
+
|
1860
|
+
Reindex and search with:
|
1861
|
+
|
1862
|
+
```ruby
|
1863
|
+
Product.search(knn: {field: :embedding, vector: [1, 2, 3]}, limit: 10)
|
1864
|
+
```
|
1865
|
+
|
1866
|
+
## Semantic Search
|
1867
|
+
|
1868
|
+
First, add [nearest neighbor search](#nearest-neighbor-search-unreleased-experimental) to your model
|
1869
|
+
|
1870
|
+
```ruby
|
1871
|
+
class Product < ApplicationRecord
|
1872
|
+
searchkick knn: {embedding: {dimensions: 768, distance: "cosine"}}
|
1873
|
+
end
|
1874
|
+
```
|
1875
|
+
|
1876
|
+
Generate an embedding for each record (you can use an external service or a library like [Informers](https://github.com/ankane/informers))
|
1877
|
+
|
1878
|
+
```ruby
|
1879
|
+
embed = Informers.pipeline("embedding", "Snowflake/snowflake-arctic-embed-m-v1.5")
|
1880
|
+
embed_options = {model_output: "sentence_embedding", pooling: "none"} # specific to embedding model
|
1881
|
+
|
1882
|
+
Product.find_each do |product|
|
1883
|
+
embedding = embed.(product.name, **embed_options)
|
1884
|
+
product.update!(embedding: embedding)
|
1885
|
+
end
|
1886
|
+
```
|
1887
|
+
|
1888
|
+
For search, generate an embedding for the query (the query prefix is specific to the [embedding model](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5))
|
1889
|
+
|
1890
|
+
```ruby
|
1891
|
+
query_prefix = "Represent this sentence for searching relevant passages: "
|
1892
|
+
query_embedding = embed.(query_prefix + query, **embed_options)
|
1893
|
+
```
|
1894
|
+
|
1895
|
+
And perform nearest neighbor search
|
1896
|
+
|
1897
|
+
```ruby
|
1898
|
+
Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20)
|
1899
|
+
```
|
1900
|
+
|
1901
|
+
See a [full example](examples/semantic.rb)
|
1902
|
+
|
1903
|
+
## Hybrid Search
|
1904
|
+
|
1905
|
+
Perform keyword search and semantic search in parallel
|
1906
|
+
|
1907
|
+
```ruby
|
1908
|
+
keyword_search = Product.search(query, limit: 20)
|
1909
|
+
semantic_search = Product.search(knn: {field: :embedding, vector: query_embedding}, limit: 20)
|
1910
|
+
Searchkick.multi_search([keyword_search, semantic_search])
|
1911
|
+
```
|
1912
|
+
|
1913
|
+
To combine the results, use Reciprocal Rank Fusion (RRF)
|
1914
|
+
|
1915
|
+
```ruby
|
1916
|
+
Searchkick::Reranking.rrf(keyword_search, semantic_search).first(5)
|
1917
|
+
```
|
1918
|
+
|
1919
|
+
Or a reranking model
|
1920
|
+
|
1921
|
+
```ruby
|
1922
|
+
rerank = Informers.pipeline("reranking", "mixedbread-ai/mxbai-rerank-xsmall-v1")
|
1923
|
+
results = (keyword_search.to_a + semantic_search.to_a).uniq
|
1924
|
+
rerank.(query, results.map(&:name)).first(5).map { |v| results[v[:doc_id]] }
|
1925
|
+
```
|
1841
1926
|
|
1842
|
-
|
1927
|
+
See a [full example](examples/hybrid.rb)
|
1843
1928
|
|
1844
1929
|
## Reference
|
1845
1930
|
|
data/lib/searchkick/index.rb
CHANGED
@@ -99,7 +99,7 @@ module Searchkick
|
|
99
99
|
record_data = RecordData.new(self, record).record_data
|
100
100
|
|
101
101
|
# remove underscore
|
102
|
-
get_options = record_data.to_h { |k, v| [k.to_s.
|
102
|
+
get_options = record_data.to_h { |k, v| [k.to_s.delete_prefix("_").to_sym, v] }
|
103
103
|
|
104
104
|
client.get(get_options)["_source"]
|
105
105
|
end
|
@@ -352,6 +352,8 @@ module Searchkick
|
|
352
352
|
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
353
353
|
def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil)
|
354
354
|
raise ArgumentError, "wait only available in :async mode" if !wait.nil? && mode != :async
|
355
|
+
# TODO raise ArgumentError in Searchkick 6
|
356
|
+
Searchkick.warn("Full reindex does not support :queue mode - use :async mode instead") if mode == :queue
|
355
357
|
|
356
358
|
if resume
|
357
359
|
index_name = all_indices.sort.last
|
@@ -169,6 +169,21 @@ module Searchkick
|
|
169
169
|
max_shingle_diff: 4
|
170
170
|
}
|
171
171
|
|
172
|
+
if options[:knn]
|
173
|
+
unless Searchkick.knn_support?
|
174
|
+
if Searchkick.opensearch?
|
175
|
+
raise Error, "knn requires OpenSearch 2.4+"
|
176
|
+
else
|
177
|
+
raise Error, "knn requires Elasticsearch 8.6+"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? }
|
182
|
+
# only enable if doing approximate search
|
183
|
+
settings[:index][:knn] = true
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
172
187
|
if options[:case_sensitive]
|
173
188
|
settings[:analysis][:analyzer].each do |_, analyzer|
|
174
189
|
analyzer[:filter].delete("lowercase")
|
@@ -406,6 +421,66 @@ module Searchkick
|
|
406
421
|
mapping[field] = shape_options.merge(type: "geo_shape")
|
407
422
|
end
|
408
423
|
|
424
|
+
(options[:knn] || []).each do |field, knn_options|
|
425
|
+
distance = knn_options[:distance]
|
426
|
+
|
427
|
+
if Searchkick.opensearch?
|
428
|
+
if distance.nil?
|
429
|
+
# avoid server crash if method not specified
|
430
|
+
raise ArgumentError, "Must specify a distance for OpenSearch"
|
431
|
+
end
|
432
|
+
|
433
|
+
vector_options = {
|
434
|
+
type: "knn_vector",
|
435
|
+
dimension: knn_options[:dimensions]
|
436
|
+
}
|
437
|
+
|
438
|
+
if !distance.nil?
|
439
|
+
space_type =
|
440
|
+
case distance
|
441
|
+
when "cosine"
|
442
|
+
"cosinesimil"
|
443
|
+
when "euclidean"
|
444
|
+
"l2"
|
445
|
+
when "inner_product"
|
446
|
+
"innerproduct"
|
447
|
+
else
|
448
|
+
raise ArgumentError, "Unknown distance: #{distance}"
|
449
|
+
end
|
450
|
+
|
451
|
+
vector_options[:method] = {
|
452
|
+
name: "hnsw",
|
453
|
+
space_type: space_type,
|
454
|
+
engine: "lucene"
|
455
|
+
}
|
456
|
+
end
|
457
|
+
|
458
|
+
mapping[field.to_s] = vector_options
|
459
|
+
else
|
460
|
+
vector_options = {
|
461
|
+
type: "dense_vector",
|
462
|
+
dims: knn_options[:dimensions],
|
463
|
+
index: !distance.nil?
|
464
|
+
}
|
465
|
+
|
466
|
+
if !distance.nil?
|
467
|
+
vector_options[:similarity] =
|
468
|
+
case distance
|
469
|
+
when "cosine"
|
470
|
+
"cosine"
|
471
|
+
when "euclidean"
|
472
|
+
"l2_norm"
|
473
|
+
when "inner_product"
|
474
|
+
"max_inner_product"
|
475
|
+
else
|
476
|
+
raise ArgumentError, "Unknown distance: #{distance}"
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
mapping[field.to_s] = vector_options
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
409
484
|
if options[:inheritance]
|
410
485
|
mapping[:type] = keyword_mapping
|
411
486
|
end
|
data/lib/searchkick/model.rb
CHANGED
@@ -4,7 +4,7 @@ module Searchkick
|
|
4
4
|
options = Searchkick.model_options.merge(options)
|
5
5
|
|
6
6
|
unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
|
7
|
-
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
|
7
|
+
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
|
8
8
|
:locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
|
9
9
|
:special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
|
10
10
|
:text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
|
data/lib/searchkick/query.rb
CHANGED
@@ -9,7 +9,7 @@ module Searchkick
|
|
9
9
|
attr_accessor :body
|
10
10
|
|
11
11
|
def_delegators :execute, :map, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary,
|
12
|
-
:
|
12
|
+
:results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
|
13
13
|
:took, :error, :model_name, :entry_name, :total_count, :total_entries,
|
14
14
|
:current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
|
15
15
|
:offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
|
@@ -19,7 +19,7 @@ module Searchkick
|
|
19
19
|
def initialize(klass, term = "*", **options)
|
20
20
|
unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
|
21
21
|
:boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
|
22
|
-
:fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
|
22
|
+
:fields, :highlight, :includes, :index_name, :indices_boost, :knn, :limit, :load,
|
23
23
|
:match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
|
24
24
|
:request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
|
25
25
|
raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
|
@@ -191,7 +191,7 @@ module Searchkick
|
|
191
191
|
end
|
192
192
|
|
193
193
|
def retry_misspellings?(response)
|
194
|
-
@misspellings_below && Results.new(searchkick_klass, response).total_count < @misspellings_below
|
194
|
+
@misspellings_below && response["error"].nil? && Results.new(searchkick_klass, response).total_count < @misspellings_below
|
195
195
|
end
|
196
196
|
|
197
197
|
private
|
@@ -219,7 +219,7 @@ module Searchkick
|
|
219
219
|
)
|
220
220
|
|
221
221
|
raise UnsupportedVersionError
|
222
|
-
elsif e.message
|
222
|
+
elsif e.message.match?(/analyzer \[searchkick_.+\] not found/)
|
223
223
|
raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
|
224
224
|
else
|
225
225
|
raise InvalidQueryError, e.message
|
@@ -259,6 +259,7 @@ module Searchkick
|
|
259
259
|
scroll = options[:scroll]
|
260
260
|
|
261
261
|
max_result_window = searchkick_options[:max_result_window]
|
262
|
+
original_per_page = per_page
|
262
263
|
if max_result_window
|
263
264
|
offset = max_result_window if offset > max_result_window
|
264
265
|
per_page = max_result_window - offset if offset + per_page > max_result_window
|
@@ -387,7 +388,7 @@ module Searchkick
|
|
387
388
|
exclude_field = f
|
388
389
|
exclude_analyzer = "keyword"
|
389
390
|
else
|
390
|
-
analyzer = field
|
391
|
+
analyzer = field.match?(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
|
391
392
|
qs << shared_options.merge(analyzer: analyzer)
|
392
393
|
exclude_analyzer = analyzer
|
393
394
|
end
|
@@ -525,6 +526,9 @@ module Searchkick
|
|
525
526
|
end
|
526
527
|
end
|
527
528
|
|
529
|
+
# knn
|
530
|
+
set_knn(payload, options[:knn], per_page, offset) if options[:knn]
|
531
|
+
|
528
532
|
# pagination
|
529
533
|
pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
|
530
534
|
if !options[:body] || pagination_options
|
@@ -558,7 +562,7 @@ module Searchkick
|
|
558
562
|
|
559
563
|
@body = payload
|
560
564
|
@page = page
|
561
|
-
@per_page =
|
565
|
+
@per_page = original_per_page
|
562
566
|
@padding = padding
|
563
567
|
@load = load
|
564
568
|
@scroll = scroll
|
@@ -875,6 +879,119 @@ module Searchkick
|
|
875
879
|
end
|
876
880
|
end
|
877
881
|
|
882
|
+
def set_knn(payload, knn, per_page, offset)
|
883
|
+
if term != "*"
|
884
|
+
raise ArgumentError, "Use Searchkick.multi_search for hybrid search"
|
885
|
+
end
|
886
|
+
|
887
|
+
field = knn[:field]
|
888
|
+
field_options = searchkick_options.dig(:knn, field.to_sym) || searchkick_options.dig(:knn, field.to_s) || {}
|
889
|
+
vector = knn[:vector]
|
890
|
+
distance = knn[:distance] || field_options[:distance]
|
891
|
+
exact = knn[:exact]
|
892
|
+
exact = field_options[:distance].nil? || distance != field_options[:distance] if exact.nil?
|
893
|
+
k = per_page + offset
|
894
|
+
filter = payload.delete(:query)
|
895
|
+
|
896
|
+
if distance.nil?
|
897
|
+
raise ArgumentError, "distance required"
|
898
|
+
elsif !exact && distance != field_options[:distance]
|
899
|
+
raise ArgumentError, "distance must match searchkick options for approximate search"
|
900
|
+
end
|
901
|
+
|
902
|
+
if Searchkick.opensearch?
|
903
|
+
if exact
|
904
|
+
# https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/#spaces
|
905
|
+
space_type =
|
906
|
+
case distance
|
907
|
+
when "cosine"
|
908
|
+
"cosinesimil"
|
909
|
+
when "euclidean"
|
910
|
+
"l2"
|
911
|
+
when "taxicab"
|
912
|
+
"l1"
|
913
|
+
when "inner_product"
|
914
|
+
"innerproduct"
|
915
|
+
when "chebyshev"
|
916
|
+
"linf"
|
917
|
+
else
|
918
|
+
raise ArgumentError, "Unknown distance: #{distance}"
|
919
|
+
end
|
920
|
+
|
921
|
+
payload[:query] = {
|
922
|
+
script_score: {
|
923
|
+
query: {
|
924
|
+
bool: {
|
925
|
+
must: [filter, {exists: {field: field}}]
|
926
|
+
}
|
927
|
+
},
|
928
|
+
script: {
|
929
|
+
source: "knn_score",
|
930
|
+
lang: "knn",
|
931
|
+
params: {
|
932
|
+
field: field,
|
933
|
+
query_value: vector,
|
934
|
+
space_type: space_type
|
935
|
+
}
|
936
|
+
},
|
937
|
+
boost: distance == "cosine" ? 0.5 : 1.0
|
938
|
+
}
|
939
|
+
}
|
940
|
+
else
|
941
|
+
payload[:query] = {
|
942
|
+
knn: {
|
943
|
+
field.to_sym => {
|
944
|
+
vector: vector,
|
945
|
+
k: k,
|
946
|
+
filter: filter
|
947
|
+
}
|
948
|
+
}
|
949
|
+
}
|
950
|
+
end
|
951
|
+
else
|
952
|
+
if exact
|
953
|
+
# https://github.com/elastic/elasticsearch/blob/main/docs/reference/vectors/vector-functions.asciidoc
|
954
|
+
source =
|
955
|
+
case distance
|
956
|
+
when "cosine"
|
957
|
+
"(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5"
|
958
|
+
when "euclidean"
|
959
|
+
"double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)"
|
960
|
+
when "taxicab"
|
961
|
+
"1 / (1 + l1norm(params.query_vector, params.field))"
|
962
|
+
when "inner_product"
|
963
|
+
"double dot = dotProduct(params.query_vector, params.field); dot > 0 ? dot + 1 : 1 / (1 - dot)"
|
964
|
+
else
|
965
|
+
raise ArgumentError, "Unknown distance: #{distance}"
|
966
|
+
end
|
967
|
+
|
968
|
+
payload[:query] = {
|
969
|
+
script_score: {
|
970
|
+
query: {
|
971
|
+
bool: {
|
972
|
+
must: [filter, {exists: {field: field}}]
|
973
|
+
}
|
974
|
+
},
|
975
|
+
script: {
|
976
|
+
source: source,
|
977
|
+
params: {
|
978
|
+
field: field,
|
979
|
+
query_vector: vector
|
980
|
+
}
|
981
|
+
}
|
982
|
+
}
|
983
|
+
}
|
984
|
+
else
|
985
|
+
payload[:knn] = {
|
986
|
+
field: field,
|
987
|
+
query_vector: vector,
|
988
|
+
k: k,
|
989
|
+
filter: filter
|
990
|
+
}
|
991
|
+
end
|
992
|
+
end
|
993
|
+
end
|
994
|
+
|
878
995
|
def set_post_filters(payload, post_filters)
|
879
996
|
payload[:post_filter] = {
|
880
997
|
bool: {
|
@@ -884,7 +1001,8 @@ module Searchkick
|
|
884
1001
|
end
|
885
1002
|
|
886
1003
|
def set_order(payload)
|
887
|
-
|
1004
|
+
value = options[:order]
|
1005
|
+
payload[:sort] = value.is_a?(Enumerable) ? value : {value => :asc}
|
888
1006
|
end
|
889
1007
|
|
890
1008
|
# provides *very* basic protection from unfiltered parameters
|
@@ -908,8 +1026,12 @@ module Searchkick
|
|
908
1026
|
filters << {bool: {must_not: where_filters(value)}}
|
909
1027
|
elsif field == :_and
|
910
1028
|
filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
|
911
|
-
|
912
|
-
|
1029
|
+
elsif field == :_script
|
1030
|
+
unless value.is_a?(Script)
|
1031
|
+
raise TypeError, "expected Searchkick::Script"
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
filters << {script: {script: {source: value.source, lang: value.lang, params: value.params}}}
|
913
1035
|
else
|
914
1036
|
# expand ranges
|
915
1037
|
if value.is_a?(Range)
|
@@ -1002,6 +1124,11 @@ module Searchkick
|
|
1002
1124
|
when :in
|
1003
1125
|
filters << term_filters(field, op_value)
|
1004
1126
|
when :exists
|
1127
|
+
# TODO add support for false in Searchkick 6
|
1128
|
+
if op_value != true
|
1129
|
+
# TODO raise error in Searchkick 6
|
1130
|
+
Searchkick.warn("Passing a value other than true to exists is not supported")
|
1131
|
+
end
|
1005
1132
|
filters << {exists: {field: field}}
|
1006
1133
|
else
|
1007
1134
|
range_query =
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Searchkick
|
2
|
+
module Reranking
|
3
|
+
def self.rrf(first_ranking, *rankings, k: 60)
|
4
|
+
rankings.unshift(first_ranking)
|
5
|
+
rankings.map!(&:to_ary)
|
6
|
+
|
7
|
+
ranks = []
|
8
|
+
results = []
|
9
|
+
rankings.each do |ranking|
|
10
|
+
ranks << ranking.map.with_index.to_h { |v, i| [v, i + 1] }
|
11
|
+
results.concat(ranking)
|
12
|
+
end
|
13
|
+
|
14
|
+
results =
|
15
|
+
results.uniq.map do |result|
|
16
|
+
score =
|
17
|
+
ranks.sum do |rank|
|
18
|
+
r = rank[result]
|
19
|
+
r ? 1.0 / (k + r) : 0.0
|
20
|
+
end
|
21
|
+
|
22
|
+
{result: result, score: score}
|
23
|
+
end
|
24
|
+
|
25
|
+
results.sort_by { |v| -v[:score] }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/searchkick/version.rb
CHANGED
data/lib/searchkick.rb
CHANGED
@@ -25,7 +25,9 @@ require_relative "searchkick/record_data"
|
|
25
25
|
require_relative "searchkick/record_indexer"
|
26
26
|
require_relative "searchkick/relation"
|
27
27
|
require_relative "searchkick/relation_indexer"
|
28
|
+
require_relative "searchkick/reranking"
|
28
29
|
require_relative "searchkick/results"
|
30
|
+
require_relative "searchkick/script"
|
29
31
|
require_relative "searchkick/version"
|
30
32
|
require_relative "searchkick/where"
|
31
33
|
|
@@ -141,6 +143,15 @@ module Searchkick
|
|
141
143
|
Gem::Version.new(server_version.split("-")[0]) < Gem::Version.new(version.split("-")[0])
|
142
144
|
end
|
143
145
|
|
146
|
+
# private
|
147
|
+
def self.knn_support?
|
148
|
+
if opensearch?
|
149
|
+
!server_below?("2.4.0", true)
|
150
|
+
else
|
151
|
+
!server_below?("8.6.0")
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
144
155
|
def self.search(term = "*", model: nil, **options, &block)
|
145
156
|
options = options.dup
|
146
157
|
klass = model
|
@@ -182,13 +193,20 @@ module Searchkick
|
|
182
193
|
queries = queries.map { |q| q.send(:query) }
|
183
194
|
event = {
|
184
195
|
name: "Multi Search",
|
185
|
-
body: queries.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
|
196
|
+
body: queries.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
|
186
197
|
}
|
187
198
|
ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do
|
188
199
|
MultiSearch.new(queries).perform
|
189
200
|
end
|
190
201
|
end
|
191
202
|
|
203
|
+
# script
|
204
|
+
|
205
|
+
# experimental
|
206
|
+
def self.script(source, **options)
|
207
|
+
Script.new(source, **options)
|
208
|
+
end
|
209
|
+
|
192
210
|
# callbacks
|
193
211
|
|
194
212
|
def self.enable_callbacks
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchkick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activemodel
|
@@ -69,7 +69,9 @@ files:
|
|
69
69
|
- lib/searchkick/reindex_v2_job.rb
|
70
70
|
- lib/searchkick/relation.rb
|
71
71
|
- lib/searchkick/relation_indexer.rb
|
72
|
+
- lib/searchkick/reranking.rb
|
72
73
|
- lib/searchkick/results.rb
|
74
|
+
- lib/searchkick/script.rb
|
73
75
|
- lib/searchkick/version.rb
|
74
76
|
- lib/searchkick/where.rb
|
75
77
|
- lib/tasks/searchkick.rake
|
@@ -85,14 +87,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
87
|
requirements:
|
86
88
|
- - ">="
|
87
89
|
- !ruby/object:Gem::Version
|
88
|
-
version: '3'
|
90
|
+
version: '3.1'
|
89
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
92
|
requirements:
|
91
93
|
- - ">="
|
92
94
|
- !ruby/object:Gem::Version
|
93
95
|
version: '0'
|
94
96
|
requirements: []
|
95
|
-
rubygems_version: 3.
|
97
|
+
rubygems_version: 3.5.11
|
96
98
|
signing_key:
|
97
99
|
specification_version: 4
|
98
100
|
summary: Intelligent search made easy with Rails and Elasticsearch or OpenSearch
|