rifle 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +18 -2
- data/lib/rifle.rb +26 -17
- data/lib/rifle/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -30,8 +30,24 @@ urn and a payload
|
|
30
30
|
|
31
31
|
# Payloads
|
32
32
|
|
33
|
-
Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed and
|
34
|
-
|
33
|
+
Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed on store by exact words, and collapsed words if punctuation
|
34
|
+
is in the middle of a word.
|
35
|
+
|
36
|
+
E.g, given the following payload
|
37
|
+
|
38
|
+
O'Connor has telephone +(44)798765432?
|
39
|
+
|
40
|
+
Any of the following search terms will match
|
41
|
+
|
42
|
+
O'Connor
|
43
|
+
Oconnor
|
44
|
+
+44798765432
|
45
|
+
+(44)798765432
|
46
|
+
0798765432 # <= Special case, UK phone prefixes are ignored
|
47
|
+
|
48
|
+
# Fuzzy Matching
|
49
|
+
|
50
|
+
Items can be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
|
35
51
|
|
36
52
|
E.g, given the following payload
|
37
53
|
|
data/lib/rifle.rb
CHANGED
@@ -20,8 +20,9 @@ module Rifle
|
|
20
20
|
Processor.new.index_resource(urn, hash)
|
21
21
|
end
|
22
22
|
|
23
|
-
def self.search(words,
|
24
|
-
|
23
|
+
def self.search(words, options = {})
|
24
|
+
options = {urns_only: options} if !!options == options # Support old boolean only arg
|
25
|
+
Processor.new.search_for(words, options)
|
25
26
|
end
|
26
27
|
|
27
28
|
class Processor
|
@@ -56,29 +57,29 @@ module Rifle
|
|
56
57
|
metaphones
|
57
58
|
end
|
58
59
|
|
59
|
-
def search_for(sentence,
|
60
|
-
p "Rifle searching for #{sentence},
|
60
|
+
def search_for(sentence, options)
|
61
|
+
p "Rifle searching for #{sentence}, options #{options}"
|
61
62
|
words = get_words_array_from_text(sentence)
|
62
63
|
metaphones = get_metaphones_from_word_set(Set.new(words))
|
63
64
|
|
64
65
|
urns = nil
|
65
66
|
metaphones.each do |metaphone|
|
66
67
|
new_urns = get_urns_for_metaphone(metaphone)
|
67
|
-
p metaphone
|
68
68
|
urns = urns.nil? ? Set.new(new_urns) : urns.intersection(new_urns)
|
69
69
|
end
|
70
70
|
urns ||= Set.new
|
71
71
|
|
72
72
|
p "Rifle found #{urns.size} urns"
|
73
|
-
if urns_only
|
73
|
+
if options[:urns_only]
|
74
74
|
urns
|
75
75
|
else
|
76
|
-
urns.map { |u|
|
76
|
+
full_results = urns.map { |u|
|
77
77
|
{
|
78
|
-
|
79
|
-
|
78
|
+
urn: u,
|
79
|
+
payload: get_payload_for_urn(u)
|
80
80
|
}
|
81
81
|
}
|
82
|
+
full_results = full_results.sort! { |a, b| DateTime.parse(b[:payload]['updated_at']) <=> DateTime.parse(a[:payload]['updated_at']) }
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
@@ -104,17 +105,25 @@ module Rifle
|
|
104
105
|
|
105
106
|
def get_words_array_from_text(text)
|
106
107
|
return [] if !text.is_a?(String)
|
107
|
-
text
|
108
|
-
end
|
108
|
+
text = text.downcase
|
109
109
|
|
110
|
-
|
110
|
+
# First get the smallest parts, split by anything that isn't a letter or number
|
111
|
+
results = text.split(/[^a-zA-Z0-9]/)
|
112
|
+
# Now add the text blocks just with punctuation removed. eg O'Connor -> OConnor
|
113
|
+
by_spaces = text.split(' ')
|
114
|
+
results = results + by_spaces.map { |w| w.gsub(/[^a-zA-Z0-9]/, '') }
|
111
115
|
# Add extra search terms. EG, other phone number layouts
|
112
|
-
|
113
|
-
# Here we have to strip all the front +44 and replace with 0.
|
114
|
-
|
115
|
-
|
116
|
-
|
116
|
+
results = results + by_spaces.select { |w| w.start_with?('+44') }.map { |w|
|
117
|
+
# Here we have to strip all the front +44 and replace with 0. Also, store the one without a prefix.
|
118
|
+
["0#{w[3..-1]}", w[3..-1]]
|
119
|
+
}
|
120
|
+
# Unique
|
121
|
+
results = results.flatten.uniq
|
122
|
+
|
123
|
+
results.select { |w| w.length >= Rifle.settings.min_word_length }
|
124
|
+
end
|
117
125
|
|
126
|
+
def get_metaphones_from_word_set(words)
|
118
127
|
# Removed ignored words
|
119
128
|
words.subtract Rifle.settings.ignored_words
|
120
129
|
# Get the parts
|
data/lib/rifle/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rifle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
type: :runtime
|