rifle 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +18 -2
- data/lib/rifle.rb +26 -17
- data/lib/rifle/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -30,8 +30,24 @@ urn and a payload
|
|
30
30
|
|
31
31
|
# Payloads
|
32
32
|
|
33
|
-
Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed and
|
34
|
-
|
33
|
+
Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed on store by exact words, and collapsed words if punctuation
|
34
|
+
is in the middle of a word.
|
35
|
+
|
36
|
+
E.g, given the following payload
|
37
|
+
|
38
|
+
O'Connor has telephone +(44)798765432?
|
39
|
+
|
40
|
+
Any of the following search terms will match
|
41
|
+
|
42
|
+
O'Connor
|
43
|
+
Oconnor
|
44
|
+
+44798765432
|
45
|
+
+(44)798765432
|
46
|
+
0798765432 # <= Special case, UK phone prefixes are ignored
|
47
|
+
|
48
|
+
# Fuzzy Matching
|
49
|
+
|
50
|
+
Items can be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
|
35
51
|
|
36
52
|
E.g, given the following payload
|
37
53
|
|
data/lib/rifle.rb
CHANGED
@@ -20,8 +20,9 @@ module Rifle
|
|
20
20
|
Processor.new.index_resource(urn, hash)
|
21
21
|
end
|
22
22
|
|
23
|
-
def self.search(words,
|
24
|
-
|
23
|
+
def self.search(words, options = {})
|
24
|
+
options = {urns_only: options} if !!options == options # Support old boolean only arg
|
25
|
+
Processor.new.search_for(words, options)
|
25
26
|
end
|
26
27
|
|
27
28
|
class Processor
|
@@ -56,29 +57,29 @@ module Rifle
|
|
56
57
|
metaphones
|
57
58
|
end
|
58
59
|
|
59
|
-
def search_for(sentence,
|
60
|
-
p "Rifle searching for #{sentence},
|
60
|
+
def search_for(sentence, options)
|
61
|
+
p "Rifle searching for #{sentence}, options #{options}"
|
61
62
|
words = get_words_array_from_text(sentence)
|
62
63
|
metaphones = get_metaphones_from_word_set(Set.new(words))
|
63
64
|
|
64
65
|
urns = nil
|
65
66
|
metaphones.each do |metaphone|
|
66
67
|
new_urns = get_urns_for_metaphone(metaphone)
|
67
|
-
p metaphone
|
68
68
|
urns = urns.nil? ? Set.new(new_urns) : urns.intersection(new_urns)
|
69
69
|
end
|
70
70
|
urns ||= Set.new
|
71
71
|
|
72
72
|
p "Rifle found #{urns.size} urns"
|
73
|
-
if urns_only
|
73
|
+
if options[:urns_only]
|
74
74
|
urns
|
75
75
|
else
|
76
|
-
urns.map { |u|
|
76
|
+
full_results = urns.map { |u|
|
77
77
|
{
|
78
|
-
|
79
|
-
|
78
|
+
urn: u,
|
79
|
+
payload: get_payload_for_urn(u)
|
80
80
|
}
|
81
81
|
}
|
82
|
+
full_results = full_results.sort! { |a, b| DateTime.parse(b[:payload]['updated_at']) <=> DateTime.parse(a[:payload]['updated_at']) }
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
@@ -104,17 +105,25 @@ module Rifle
|
|
104
105
|
|
105
106
|
def get_words_array_from_text(text)
|
106
107
|
return [] if !text.is_a?(String)
|
107
|
-
text
|
108
|
-
end
|
108
|
+
text = text.downcase
|
109
109
|
|
110
|
-
|
110
|
+
# First get the smallest parts, split by anything that isn't a letter or number
|
111
|
+
results = text.split(/[^a-zA-Z0-9]/)
|
112
|
+
# Now add the text blocks just with punctuation removed. eg O'Connor -> OConnor
|
113
|
+
by_spaces = text.split(' ')
|
114
|
+
results = results + by_spaces.map { |w| w.gsub(/[^a-zA-Z0-9]/, '') }
|
111
115
|
# Add extra search terms. EG, other phone number layouts
|
112
|
-
|
113
|
-
# Here we have to strip all the front +44 and replace with 0.
|
114
|
-
|
115
|
-
|
116
|
-
|
116
|
+
results = results + by_spaces.select { |w| w.start_with?('+44') }.map { |w|
|
117
|
+
# Here we have to strip all the front +44 and replace with 0. Also, store the one without a prefix.
|
118
|
+
["0#{w[3..-1]}", w[3..-1]]
|
119
|
+
}
|
120
|
+
# Unique
|
121
|
+
results = results.flatten.uniq
|
122
|
+
|
123
|
+
results.select { |w| w.length >= Rifle.settings.min_word_length }
|
124
|
+
end
|
117
125
|
|
126
|
+
def get_metaphones_from_word_set(words)
|
118
127
|
# Removed ignored words
|
119
128
|
words.subtract Rifle.settings.ignored_words
|
120
129
|
# Get the parts
|
data/lib/rifle/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rifle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
type: :runtime
|