solr_lite 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/explain_entry.rb +50 -0
- data/lib/explainer.rb +34 -0
- data/lib/facet_field.rb +60 -0
- data/lib/filter_query.rb +68 -0
- data/lib/response.rb +114 -0
- data/lib/search_params.rb +167 -0
- data/lib/solr.rb +174 -0
- data/lib/solr_lite.rb +1 -0
- data/lib/spellcheck.rb +58 -0
- metadata +53 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: d21097855d584cb46f8ee7e0ac4cd25daf0208e8
|
|
4
|
+
data.tar.gz: 3b420f8645190a2ea69d23077bc7634cfa71dc74
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: a510c01a80f26d5ef2d9a1eb71dd5ee55fbe8045598a05952bc4ad8c0e5ec36ef45f3c96b07b28cf47e3f68b3c56f9c19b3836c561e0a17a5542256541bbe4cf
|
|
7
|
+
data.tar.gz: 9a906c6991457e1c2cc91b4d43bd22be1a12392ed5628f639a5f133145e99df4133f93fe5aabc039a5e3718be4519bbc4cf15ce0a106db9a770c5afacd4f69f1
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
module SolrLite
|
|
2
|
+
class ExplainEntry
|
|
3
|
+
attr_accessor :key, :score, :matches, :top_matches, :text
|
|
4
|
+
|
|
5
|
+
def initialize(key, text)
|
|
6
|
+
@key = key
|
|
7
|
+
# get rid of this one extraneous linebreak that Solr includes
|
|
8
|
+
@text = text.gsub("\n), product of:", ", product of:")
|
|
9
|
+
@matches = get_matches(@text)
|
|
10
|
+
@score = @matches.first.split(" ").first
|
|
11
|
+
@top_matches = get_top_matches(@matches)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
# For a given entry, returns an array with the lines that include match
|
|
16
|
+
# information (notice that we also include line with the coord() value)
|
|
17
|
+
def get_matches(text)
|
|
18
|
+
lines = text.split("\n")
|
|
19
|
+
lines.select {|l| l.include?("(MATCH)") || l.include?("coord(")}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# A subset of the matches that includes only the match that was picked
|
|
23
|
+
# i.e. the one with the higher score.
|
|
24
|
+
# (notice that we also include line with the coord() value)
|
|
25
|
+
def get_top_matches(matches)
|
|
26
|
+
top = []
|
|
27
|
+
token_score = nil
|
|
28
|
+
matches.each do |line|
|
|
29
|
+
if line.include?("max of")
|
|
30
|
+
token_score = line.strip.split().first
|
|
31
|
+
else
|
|
32
|
+
if token_score == nil
|
|
33
|
+
# If we don't have a score to match this line is probably a
|
|
34
|
+
# "product of" or "sum of" marker. Include it.
|
|
35
|
+
top << line
|
|
36
|
+
else
|
|
37
|
+
if line.strip.start_with?(token_score)
|
|
38
|
+
top << line
|
|
39
|
+
elsif line.include?("coord(")
|
|
40
|
+
top << line
|
|
41
|
+
else
|
|
42
|
+
# Ignore it, must be a line with a match that was not picked.
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
top
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
data/lib/explainer.rb
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
require "explain_entry.rb"
|
|
2
|
+
module SolrLite
|
|
3
|
+
class Explainer
|
|
4
|
+
attr_accessor :entries
|
|
5
|
+
|
|
6
|
+
# solr_response_hash a Solr HTTP response parsed via JSON.parse()
|
|
7
|
+
def initialize(solr_reponse_hash)
|
|
8
|
+
@explain = solr_reponse_hash.fetch("debug", {}).fetch("explain", [])
|
|
9
|
+
@entries = @explain.map do |ex|
|
|
10
|
+
key = ex[0]
|
|
11
|
+
text = ex[1]
|
|
12
|
+
ExplainEntry.new(ex[0], ex[1])
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# solr_response (string) is the Solr HTTP response from a query
|
|
17
|
+
def self.from_response(solr_response)
|
|
18
|
+
hash = JSON.parse(solr_response)
|
|
19
|
+
Explainer.new(hash)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Raw string with the explain information for each entry
|
|
23
|
+
def text()
|
|
24
|
+
text = ""
|
|
25
|
+
@entries.each do |entry|
|
|
26
|
+
text += "-- #{entry.key} {\r\n"
|
|
27
|
+
text += "#{entry.text}\r\n"
|
|
28
|
+
text += "}\r\n"
|
|
29
|
+
text += "\r\n"
|
|
30
|
+
end
|
|
31
|
+
text
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/facet_field.rb
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
module SolrLite
|
|
2
|
+
class FacetField
|
|
3
|
+
|
|
4
|
+
class FacetValue
|
|
5
|
+
attr_accessor :text, :count, :remove_url, :add_url
|
|
6
|
+
def initialize(text = "", count = 0, remove_url = nil)
|
|
7
|
+
@text = text
|
|
8
|
+
@count = count
|
|
9
|
+
@remove_url = remove_url
|
|
10
|
+
@add_url = nil
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
attr_accessor :name, :title, :values
|
|
15
|
+
def initialize(name, display_value)
|
|
16
|
+
@name = name # field name in Solr
|
|
17
|
+
@title = display_value
|
|
18
|
+
@values = []
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_qs(text)
|
|
22
|
+
"#{@name}|#{CGI.escape(text)}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_value(text, count)
|
|
26
|
+
@values << FacetValue.new(text, count)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def value_count(text)
|
|
30
|
+
v = @values.find {|v| v.text == text}
|
|
31
|
+
return 0 if v == nil
|
|
32
|
+
v.count
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def set_remove_url_for(value, url)
|
|
36
|
+
@values.each do |v|
|
|
37
|
+
if v.text == value
|
|
38
|
+
v.remove_url = url
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def set_add_url_for(value, url)
|
|
44
|
+
@values.each do |v|
|
|
45
|
+
if v.text == value
|
|
46
|
+
v.add_url = url
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def set_urls_for(value, add_url, remove_url)
|
|
52
|
+
@values.each do |v|
|
|
53
|
+
if v.text == value
|
|
54
|
+
v.add_url = add_url
|
|
55
|
+
v.remove_url = remove_url
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
data/lib/filter_query.rb
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
require "cgi"
|
|
2
|
+
|
|
3
|
+
module SolrLite
|
|
4
|
+
# Represents an "fq" in Solr. Field is the field to filter by
|
|
5
|
+
# and value the value to filter by. In a Solr query are represented
|
|
6
|
+
# as "fq=field:value"
|
|
7
|
+
class FilterQuery
|
|
8
|
+
attr_accessor :field, :value, :solr_value, :qs_value, :form_value
|
|
9
|
+
attr_accessor :title, :remove_url
|
|
10
|
+
|
|
11
|
+
def initialize(field, values)
|
|
12
|
+
@field = field
|
|
13
|
+
@value = values.join("|")
|
|
14
|
+
@solr_value = to_solr_fq_value(field, values)
|
|
15
|
+
@qs_value = "#{field}"
|
|
16
|
+
values.each do |v|
|
|
17
|
+
@qs_value += "|#{CGI.escape(v)}" # URL friendly (no : or quotes)
|
|
18
|
+
end
|
|
19
|
+
@form_value = "#{field}|#{@value}" # HTML Form friendly (no encoding, the form auto-encodes on POST)
|
|
20
|
+
@title = field # default to field name
|
|
21
|
+
@remove_url = nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# qs is assumed to be the value taken from the query string
|
|
25
|
+
# in the form `field|value` or `field|value1|valueN`.
|
|
26
|
+
#
|
|
27
|
+
# Sometimes(*) the string comes URL encoded, for example:
|
|
28
|
+
# `field|hello`
|
|
29
|
+
# `field|hello%20world`
|
|
30
|
+
# CGI.unespace handles these cases nicer than URL.decode
|
|
31
|
+
#
|
|
32
|
+
# (*) Values coming from HTML forms submitted via HTTP POST tend
|
|
33
|
+
# to be encoded slighly different than value submitted via
|
|
34
|
+
# HTTP GET requests.
|
|
35
|
+
def self.from_query_string(qs)
|
|
36
|
+
tokens = CGI.unescape(qs).split("|")
|
|
37
|
+
return nil if tokens.count < 2
|
|
38
|
+
field = ""
|
|
39
|
+
values = []
|
|
40
|
+
tokens.each_with_index do |token, i|
|
|
41
|
+
if i == 0
|
|
42
|
+
field = token
|
|
43
|
+
else
|
|
44
|
+
values << token
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
FilterQuery.new(field, values)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
# Creates a filter query (fq) string as needed by Solr from
|
|
52
|
+
# an array of values. Handles single and multi-value gracefully.
|
|
53
|
+
# For single-value it returns "(field:value)". For multi-value
|
|
54
|
+
# it returns "(field:value1) OR (field:value2)"
|
|
55
|
+
def to_solr_fq_value(field, values)
|
|
56
|
+
solr_value = ""
|
|
57
|
+
values.each_with_index do |v, i|
|
|
58
|
+
solr_value += '(' + field + ':"' + v + '")'
|
|
59
|
+
lastValue = (i == (values.length-1))
|
|
60
|
+
if !lastValue
|
|
61
|
+
solr_value += " OR "
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
# Very important to escape the : otherwise URL.parse throws an error in Linux
|
|
65
|
+
CGI.escape(solr_value)
|
|
66
|
+
end
|
|
67
|
+
end # class
|
|
68
|
+
end # module
|
data/lib/response.rb
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
require "facet_field.rb"
|
|
2
|
+
require "explainer.rb"
|
|
3
|
+
require "spellcheck.rb"
|
|
4
|
+
module SolrLite
|
|
5
|
+
class Response
|
|
6
|
+
attr_accessor :items, :solr_response
|
|
7
|
+
|
|
8
|
+
def initialize(solr_response, params)
|
|
9
|
+
@solr_response = solr_response
|
|
10
|
+
@params = params
|
|
11
|
+
@explainer = nil
|
|
12
|
+
set_facet_values()
|
|
13
|
+
|
|
14
|
+
# This value can be set by the client if we want to use a custom
|
|
15
|
+
# representation of solr_docs
|
|
16
|
+
@items = []
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def ok?
|
|
20
|
+
return true if status == 0
|
|
21
|
+
return true if status >= 200 && status <= 299
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def status
|
|
26
|
+
return -1 if @solr_response["responseHeader"] == nil
|
|
27
|
+
@solr_response["responseHeader"]["status"]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def error_msg
|
|
31
|
+
return "" if @solr_response["error"] == nil
|
|
32
|
+
return "" if @solr_response["error"]["msg"] == nil
|
|
33
|
+
@solr_response["error"]["msg"]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Total number documents found in solr
|
|
37
|
+
# usually larger than solr_docs.count
|
|
38
|
+
def num_found
|
|
39
|
+
@solr_response["response"]["numFound"]
|
|
40
|
+
rescue
|
|
41
|
+
0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def num_pages
|
|
45
|
+
return 0 if page_size == 0
|
|
46
|
+
pages = (num_found / page_size).to_i
|
|
47
|
+
pages += 1 if (num_found % page_size) != 0
|
|
48
|
+
pages
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def page_size
|
|
52
|
+
@solr_response["responseHeader"]["params"]["rows"].to_i
|
|
53
|
+
rescue
|
|
54
|
+
0
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Start position for retrieval (used for pagination)
|
|
58
|
+
def start
|
|
59
|
+
@solr_response["response"]["start"].to_i
|
|
60
|
+
rescue
|
|
61
|
+
0
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def end
|
|
65
|
+
[start + page_size, num_found].min
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def page
|
|
69
|
+
return 1 if page_size == 0 # fail safe
|
|
70
|
+
(start / page_size) + 1
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Raw solr_docs
|
|
74
|
+
def solr_docs
|
|
75
|
+
@solr_response["response"]["docs"]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def facets
|
|
79
|
+
@params.facets
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def set_facet_values()
|
|
83
|
+
return if @solr_response["facet_counts"] == nil
|
|
84
|
+
solr_facets = @solr_response["facet_counts"]["facet_fields"]
|
|
85
|
+
solr_facets.each do |solr_facet|
|
|
86
|
+
# solr_facet is an array with two elements, e.g.
|
|
87
|
+
# ["record_type", ["PEOPLE", 32, "ORGANIZATION", 4]]
|
|
88
|
+
#
|
|
89
|
+
# the first element has the field for the facet (record_type)
|
|
90
|
+
# the second element is an array with of value/count pairs (PEOPLE/32, ORG/4)
|
|
91
|
+
field_name = solr_facet[0]
|
|
92
|
+
facet_field = @params.facet_for_field(field_name)
|
|
93
|
+
values = solr_facet[1]
|
|
94
|
+
pairs = values.count/2
|
|
95
|
+
for pair in (1..pairs)
|
|
96
|
+
index = (pair-1) * 2
|
|
97
|
+
text = values[index]
|
|
98
|
+
count = values[index+1]
|
|
99
|
+
facet_field.add_value(text, count)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
# TODO: make sure we sort the FacetField.VALUES descending by count
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def explainer()
|
|
106
|
+
@explainer ||= SolrLite::Explainer.new(@solr_response)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def spellcheck()
|
|
110
|
+
@spellcheck ||= SolrLite::Spellcheck.new(@solr_response)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
end # class
|
|
114
|
+
end # module
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
require "filter_query.rb"
|
|
2
|
+
require "facet_field.rb"
|
|
3
|
+
module SolrLite
|
|
4
|
+
class SearchParams
|
|
5
|
+
attr_accessor :q, :fq, :facets, :page, :page_size, :fl, :sort, :facet_limit
|
|
6
|
+
|
|
7
|
+
DEFAULT_PAGE_SIZE = 20
|
|
8
|
+
|
|
9
|
+
def initialize(q = "", fq = [], facets = [])
|
|
10
|
+
@q = q
|
|
11
|
+
@fq = fq # array of FilterQuery
|
|
12
|
+
@facets = facets # array of FacetField
|
|
13
|
+
@page = 1
|
|
14
|
+
@page_size = DEFAULT_PAGE_SIZE
|
|
15
|
+
@fl = nil
|
|
16
|
+
@sort = ""
|
|
17
|
+
@facet_limit = nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def facet_for_field(field)
|
|
21
|
+
@facets.find {|f| f.name == field}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def set_facet_remove_url(field, value, url)
|
|
25
|
+
facet = facet_for_field(field)
|
|
26
|
+
if facet != nil
|
|
27
|
+
facet.set_remove_url_for(value, url)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def start_row()
|
|
32
|
+
(@page - 1) * @page_size
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def star_row=(start)
|
|
36
|
+
# recalculate the page
|
|
37
|
+
if @page_size == 0
|
|
38
|
+
@page = 0
|
|
39
|
+
else
|
|
40
|
+
@page = (start / @page_size) + 1
|
|
41
|
+
end
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns the string that we need render on the Browser to execute
|
|
46
|
+
# a search with the current parameters.
|
|
47
|
+
#
|
|
48
|
+
# facet_to_ignore: a FilterQuery object with a value to ignore when
|
|
49
|
+
# creating the query string.
|
|
50
|
+
# q_override: a string with a Solr query to use instead of the current q value
|
|
51
|
+
def to_user_query_string(facet_to_ignore = nil, q_override = nil)
|
|
52
|
+
qs = ""
|
|
53
|
+
q_value = q_override != nil ? q_override : @q
|
|
54
|
+
if q_value != "" && @q != "*"
|
|
55
|
+
qs += "&q=#{@q}"
|
|
56
|
+
end
|
|
57
|
+
@fq.each do |filter|
|
|
58
|
+
if facet_to_ignore != nil && filter.solr_value == facet_to_ignore.solr_value
|
|
59
|
+
# don't add this to the query string
|
|
60
|
+
else
|
|
61
|
+
qs += "&fq=#{filter.qs_value}"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
qs += "&rows=#{@page_size}" if @page_size != DEFAULT_PAGE_SIZE
|
|
65
|
+
qs += "&page=#{@page}" if @page != 1
|
|
66
|
+
# Don't surface this to the UI for now
|
|
67
|
+
# (since we don't let the user change the sorting)
|
|
68
|
+
# qs += "&sort=#{@sort}" if sort != ""
|
|
69
|
+
qs
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def to_user_query_string_no_q()
|
|
73
|
+
to_user_query_string(nil, '')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Returns the string that we need to pass Solr to execute a search
|
|
77
|
+
# with the current parameters.
|
|
78
|
+
def to_solr_query_string(extra_fqs = [])
|
|
79
|
+
qs = ""
|
|
80
|
+
if @q != ""
|
|
81
|
+
qs += "&q=#{@q}"
|
|
82
|
+
end
|
|
83
|
+
@fq.each do |filter|
|
|
84
|
+
qs += "&fq=#{filter.solr_value}"
|
|
85
|
+
end
|
|
86
|
+
extra_fqs.each do |filter|
|
|
87
|
+
qs += "&fq=#{filter.solr_value}"
|
|
88
|
+
end
|
|
89
|
+
qs += "&rows=#{@page_size}"
|
|
90
|
+
qs += "&start=#{start_row()}"
|
|
91
|
+
if sort != ""
|
|
92
|
+
qs += "&sort=#{CGI.escape(@sort)}"
|
|
93
|
+
end
|
|
94
|
+
if @facets.count > 0
|
|
95
|
+
qs += "&facet=on"
|
|
96
|
+
@facets.each do |f|
|
|
97
|
+
qs += "&facet.field=#{f.name}"
|
|
98
|
+
qs += "&f.#{f.name}.facet.mincount=1"
|
|
99
|
+
if @facet_limit != nil
|
|
100
|
+
qs += "&f.#{f.name}.facet.limit=#{@facet_limit}"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
qs
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Returns an array of values that can be added to an HTML form
|
|
108
|
+
# to represent the current search parameters. Notice that we do
|
|
109
|
+
# NOT include the `q` parameter there is typically an explicit
|
|
110
|
+
# HTML form value for it on the form.
|
|
111
|
+
def to_form_values()
|
|
112
|
+
values = []
|
|
113
|
+
|
|
114
|
+
# We create an individual fq_n HTML form value for each
|
|
115
|
+
# fq value because Rails does not like the same value on the form.
|
|
116
|
+
@fq.each_with_index do |filter, i|
|
|
117
|
+
values << {name: "fq_#{i}", value: filter.form_value}
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
values << {name: "rows", value: @page_size} if @page_size != DEFAULT_PAGE_SIZE
|
|
121
|
+
values << {name: "page", value: @page} if @page != 1
|
|
122
|
+
# Don't surface this to the UI for now
|
|
123
|
+
# (since we don't let the user change the sorting)
|
|
124
|
+
# values << {name: "sort", value: @sort} if sort != ""
|
|
125
|
+
values
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def to_s()
|
|
129
|
+
"q=#{@q}\nfq=#{@fq}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def self.from_query_string(qs, facet_fields = [])
|
|
133
|
+
params = SearchParams.new
|
|
134
|
+
params.facets = facet_fields
|
|
135
|
+
tokens = qs.split("&")
|
|
136
|
+
tokens.each do |token|
|
|
137
|
+
values = token.split("=")
|
|
138
|
+
name = values[0]
|
|
139
|
+
value = values[1]
|
|
140
|
+
fq = nil
|
|
141
|
+
next if value == nil || value.empty?
|
|
142
|
+
case
|
|
143
|
+
when name == "q"
|
|
144
|
+
params.q = value
|
|
145
|
+
when name == "rows"
|
|
146
|
+
params.page_size = value.to_i
|
|
147
|
+
when name == "page"
|
|
148
|
+
params.page = value.to_i
|
|
149
|
+
when name == "fq" || name.start_with?("fq_")
|
|
150
|
+
# Query string contains fq when _we_ build the query string, for
|
|
151
|
+
# example as the user clicks on different facets on the UI.
|
|
152
|
+
# A query string can have multiple fq values.
|
|
153
|
+
#
|
|
154
|
+
# Query string contains fq_n when _Rails_ pushes HTML FORM values to
|
|
155
|
+
# the query string. Rails does not like duplicate values in forms
|
|
156
|
+
# and therefore we force them to be different by appending a number
|
|
157
|
+
# to them (fq_1, f1_2, ...)
|
|
158
|
+
fq = FilterQuery.from_query_string(value)
|
|
159
|
+
if fq != nil
|
|
160
|
+
params.fq << fq
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
params
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
data/lib/solr.rb
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
require "net/http"
|
|
2
|
+
require "time"
|
|
3
|
+
require "json"
|
|
4
|
+
require "search_params.rb"
|
|
5
|
+
require "response.rb"
|
|
6
|
+
module SolrLite
|
|
7
|
+
class Solr
|
|
8
|
+
# Creates an instance of the Solr class.
|
|
9
|
+
# Parameters:
|
|
10
|
+
# solr_url: string with the URL to Solr ("http://localhost:8983/solr/bibdata")
|
|
11
|
+
# logger: an instance of Rails::logger if using Rails.
|
|
12
|
+
# Could also be SolrLite::Logger which defaults to the console.
|
|
13
|
+
# Or nil to omit logging.
|
|
14
|
+
def initialize(solr_url, logger = nil)
|
|
15
|
+
raise "No solr_url was indicated" if solr_url == nil
|
|
16
|
+
@solr_url = solr_url
|
|
17
|
+
@logger = logger
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Fetches a Solr document by id.
|
|
21
|
+
# Parameters:
|
|
22
|
+
# id: ID of the document to fetch.
|
|
23
|
+
# q_field: Query field (defaults to "q")
|
|
24
|
+
# fl: list of fields to fetch (defaults to "*")
|
|
25
|
+
#
|
|
26
|
+
# Returns a hash with the document information or nil if no document was found.
|
|
27
|
+
# Raises an exception if more than one document was found.
|
|
28
|
+
def get(id, q_field = "q", fl = "*")
|
|
29
|
+
query_string = "#{q_field}=id%3A#{id}" # %3A => :
|
|
30
|
+
query_string += "&fl=#{fl}"
|
|
31
|
+
query_string += "&wt=json&indent=on"
|
|
32
|
+
url = "#{@solr_url}/select?#{query_string}"
|
|
33
|
+
solr_response = Response.new(http_get(url), nil)
|
|
34
|
+
if solr_response.num_found > 1
|
|
35
|
+
raise "More than one record found for id #{id}"
|
|
36
|
+
end
|
|
37
|
+
solr_response.solr_docs.first
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Issues a search request to Solr.
|
|
41
|
+
# Parameters:
|
|
42
|
+
# params: an instance of SolrParams.
|
|
43
|
+
# extra_fqs: array of FilterQuery objects. This is used to
|
|
44
|
+
# add filters to the search that we don't want to allow the
|
|
45
|
+
# user to override.
|
|
46
|
+
# qf: Used to override the server's qf value.
|
|
47
|
+
# mm: Used to override the server's mm value.
|
|
48
|
+
# debug: true to include debugQuery info in the response. (defaults to false)
|
|
49
|
+
#
|
|
50
|
+
# Returns an instance of SolrLite::Response
|
|
51
|
+
def search(params, extra_fqs = [], qf = nil, mm = nil, debug = false)
|
|
52
|
+
if params.fl != nil
|
|
53
|
+
query_string = "fl=#{params.fl.join(",")}"
|
|
54
|
+
else
|
|
55
|
+
query_string = "" # use Solr defaults
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
query_string += "&wt=json&indent=on"
|
|
59
|
+
query_string += "&" + params.to_solr_query_string(extra_fqs)
|
|
60
|
+
query_string += "&q.op=AND"
|
|
61
|
+
|
|
62
|
+
if qf != nil
|
|
63
|
+
query_string += "&qf=#{CGI.escape(qf)}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
if mm != nil
|
|
67
|
+
query_string += "&mm=#{CGI.escape(mm)}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
if debug
|
|
71
|
+
query_string += "&debugQuery=true"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
url = "#{@solr_url}/select?#{query_string}"
|
|
75
|
+
http_response = http_get(url)
|
|
76
|
+
response = Response.new(http_response, params)
|
|
77
|
+
response
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# shortcut for search
|
|
81
|
+
def search_text(terms)
|
|
82
|
+
params = SearchParams.new(terms)
|
|
83
|
+
search(params)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def start_row(page, page_size)
|
|
87
|
+
(page - 1) * page_size
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def update(json)
|
|
91
|
+
url = @solr_url + "/update?commit=true"
|
|
92
|
+
solr_response = http_post_json(url, json)
|
|
93
|
+
solr_response
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def delete_by_id(id)
|
|
97
|
+
# Use XML format here because that's the only way I could get
|
|
98
|
+
# the delete to recognize ids with a colon (e.g. bdr:123).
|
|
99
|
+
# Using JSON caused the Solr parser to choke.
|
|
100
|
+
#
|
|
101
|
+
# Notice that they payload is XML but the response is JSON (wt=json)
|
|
102
|
+
url = @solr_url + "/update?commit=true&wt=json"
|
|
103
|
+
payload = "<delete><id>#{id}</id></delete>"
|
|
104
|
+
http_response = http_post(url, payload, "text/xml")
|
|
105
|
+
solr_response = Response.new(JSON.parse(http_response), nil)
|
|
106
|
+
solr_response
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def delete_by_query(query)
|
|
110
|
+
url = @solr_url + "/update?commit=true"
|
|
111
|
+
payload = '{ "delete" : { "query" : "' + query + '" } }'
|
|
112
|
+
solr_response = http_post_json(url, payload)
|
|
113
|
+
solr_response
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def delete_all!()
|
|
117
|
+
delete_by_query("*:*")
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
def http_post_json(url, payload)
|
|
122
|
+
content_type = "application/json"
|
|
123
|
+
http_response = http_post(url, payload, content_type)
|
|
124
|
+
Response.new(JSON.parse(http_response), nil)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def http_post(url, payload, content_type)
|
|
128
|
+
start = Time.now
|
|
129
|
+
log_msg("Solr HTTP POST #{url}")
|
|
130
|
+
uri = URI.parse(url)
|
|
131
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
132
|
+
if url.start_with?("https://")
|
|
133
|
+
http.use_ssl = true
|
|
134
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
135
|
+
end
|
|
136
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
137
|
+
request["Content-Type"] = content_type
|
|
138
|
+
request.body = payload
|
|
139
|
+
response = http.request(request)
|
|
140
|
+
log_elapsed(start, "Solr HTTP POST")
|
|
141
|
+
response.body
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def http_get(url)
|
|
145
|
+
start = Time.now
|
|
146
|
+
log_msg("Solr HTTP GET #{url}")
|
|
147
|
+
uri = URI.parse(url)
|
|
148
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
149
|
+
if url.start_with?("https://")
|
|
150
|
+
http.use_ssl = true
|
|
151
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
152
|
+
end
|
|
153
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
154
|
+
request["Content-Type"] = "application/json"
|
|
155
|
+
response = http.request(request)
|
|
156
|
+
log_elapsed(start, "Solr HTTP GET")
|
|
157
|
+
JSON.parse(response.body)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def elapsed_ms(start)
|
|
161
|
+
((Time.now - start) * 1000).to_i
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def log_elapsed(start, msg)
|
|
165
|
+
log_msg("#{msg} took #{elapsed_ms(start)} ms")
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def log_msg(msg)
|
|
169
|
+
if @logger != nil
|
|
170
|
+
@logger.info(msg)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
data/lib/solr_lite.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require "solr.rb"
|
data/lib/spellcheck.rb
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
module SolrLite
|
|
2
|
+
class Spellcheck
|
|
3
|
+
def initialize(solr_reponse_hash)
|
|
4
|
+
@spellcheck = solr_reponse_hash.fetch("spellcheck", {})
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
def suggestions()
|
|
8
|
+
@suggestions ||= @spellcheck.fetch("suggestions",[])
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def collations()
|
|
12
|
+
@collations ||= begin
|
|
13
|
+
collations = @spellcheck.fetch("collations",nil)
|
|
14
|
+
if collations != nil
|
|
15
|
+
if collations.kind_of?(Array)
|
|
16
|
+
# We must be in Solr6, use the collation information as-is
|
|
17
|
+
else
|
|
18
|
+
# uh-oh...
|
|
19
|
+
[]
|
|
20
|
+
end
|
|
21
|
+
else
|
|
22
|
+
# We must be on Solr4, mimic the structure of the Solr6 results
|
|
23
|
+
# which is an array in the form:
|
|
24
|
+
#
|
|
25
|
+
# ["collation", {"collationQuery": "wordA"}, "collation", {"collationQuery": "wordB"}, ...]
|
|
26
|
+
#
|
|
27
|
+
# As a reference, the structure in Solr4 is slightly different in that
|
|
28
|
+
# the collationQuery information is in an array within an array:
|
|
29
|
+
#
|
|
30
|
+
# ["collation", ["collationQuery", "wordA"], "collation"["collationQuery", "wordB"], ...]
|
|
31
|
+
#
|
|
32
|
+
collations = []
|
|
33
|
+
suggestions = suggestions()
|
|
34
|
+
suggestions.each_with_index do |x, i|
|
|
35
|
+
if x == "collation"
|
|
36
|
+
collationQuery = suggestions[i+1]
|
|
37
|
+
word = collationQuery[1]
|
|
38
|
+
collations << "collation"
|
|
39
|
+
collations << {"collationQuery" => word}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
collations
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# def spellcheck_correctly_spelled()
|
|
48
|
+
# @spellcheck.fetch("correctlySpelled", true)
|
|
49
|
+
# end
|
|
50
|
+
|
|
51
|
+
def top_collation_query()
|
|
52
|
+
colls = collations()
|
|
53
|
+
return nil if colls.length < 2
|
|
54
|
+
top_collation = colls[1] || {}
|
|
55
|
+
top_collation.fetch("collationQuery", nil)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: solr_lite
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.3
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hector Correa
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-02-23 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: A lightweight gem to connect to Solr and run queries. Requires no extra
|
|
14
|
+
dependencies.
|
|
15
|
+
email: hector_correa@brown.edu
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files: []
|
|
19
|
+
files:
|
|
20
|
+
- lib/explain_entry.rb
|
|
21
|
+
- lib/explainer.rb
|
|
22
|
+
- lib/facet_field.rb
|
|
23
|
+
- lib/filter_query.rb
|
|
24
|
+
- lib/response.rb
|
|
25
|
+
- lib/search_params.rb
|
|
26
|
+
- lib/solr.rb
|
|
27
|
+
- lib/solr_lite.rb
|
|
28
|
+
- lib/spellcheck.rb
|
|
29
|
+
homepage: https://github.com/Brown-University-Library/solr_lite
|
|
30
|
+
licenses:
|
|
31
|
+
- MIT
|
|
32
|
+
metadata: {}
|
|
33
|
+
post_install_message:
|
|
34
|
+
rdoc_options: []
|
|
35
|
+
require_paths:
|
|
36
|
+
- lib
|
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - ">="
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: '0'
|
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
requirements: []
|
|
48
|
+
rubyforge_project:
|
|
49
|
+
rubygems_version: 2.5.1
|
|
50
|
+
signing_key:
|
|
51
|
+
specification_version: 4
|
|
52
|
+
summary: A lightweight gem to connect to Solr and run queries
|
|
53
|
+
test_files: []
|