rifle 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README.md +18 -2
  2. data/lib/rifle.rb +26 -17
  3. data/lib/rifle/version.rb +1 -1
  4. metadata +2 -2
data/README.md CHANGED
@@ -30,8 +30,24 @@ urn and a payload
30
30
 
31
31
  # Payloads
32
32
 
33
- Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed and can
34
- be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
33
+ Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed on store by exact words, and collapsed words if punctuation
34
+ is in the middle of a word.
35
+
36
+ E.g, given the following payload
37
+
38
+ O'Connor has telephone +(44)798765432?
39
+
40
+ Any of the following search terms will match
41
+
42
+ O'Connor
43
+ Oconnor
44
+ +44798765432
45
+ +(44)798765432
46
+ 0798765432 # <= Special case, UK phone prefixes are ignored
47
+
48
+ # Fuzzy Matching
49
+
50
+ Items can be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
35
51
 
36
52
  E.g, given the following payload
37
53
 
@@ -20,8 +20,9 @@ module Rifle
20
20
  Processor.new.index_resource(urn, hash)
21
21
  end
22
22
 
23
- def self.search(words, urns_only = false)
24
- Processor.new.search_for(words, urns_only)
23
+ def self.search(words, options = {})
24
+ options = {urns_only: options} if !!options == options # Support old boolean only arg
25
+ Processor.new.search_for(words, options)
25
26
  end
26
27
 
27
28
  class Processor
@@ -56,29 +57,29 @@ module Rifle
56
57
  metaphones
57
58
  end
58
59
 
59
- def search_for(sentence, urns_only)
60
- p "Rifle searching for #{sentence}, urns only #{urns_only}"
60
+ def search_for(sentence, options)
61
+ p "Rifle searching for #{sentence}, options #{options}"
61
62
  words = get_words_array_from_text(sentence)
62
63
  metaphones = get_metaphones_from_word_set(Set.new(words))
63
64
 
64
65
  urns = nil
65
66
  metaphones.each do |metaphone|
66
67
  new_urns = get_urns_for_metaphone(metaphone)
67
- p metaphone
68
68
  urns = urns.nil? ? Set.new(new_urns) : urns.intersection(new_urns)
69
69
  end
70
70
  urns ||= Set.new
71
71
 
72
72
  p "Rifle found #{urns.size} urns"
73
- if urns_only
73
+ if options[:urns_only]
74
74
  urns
75
75
  else
76
- urns.map { |u|
76
+ full_results = urns.map { |u|
77
77
  {
78
- urn: u,
79
- payload: get_payload_for_urn(u)
78
+ urn: u,
79
+ payload: get_payload_for_urn(u)
80
80
  }
81
81
  }
82
+ full_results = full_results.sort! { |a, b| DateTime.parse(b[:payload]['updated_at']) <=> DateTime.parse(a[:payload]['updated_at']) }
82
83
  end
83
84
  end
84
85
 
@@ -104,17 +105,25 @@ module Rifle
104
105
 
105
106
  def get_words_array_from_text(text)
106
107
  return [] if !text.is_a?(String)
107
- text.downcase.split(/[^a-zA-Z0-9]/).select { |w| w.length >= Rifle.settings.min_word_length }
108
- end
108
+ text = text.downcase
109
109
 
110
- def get_metaphones_from_word_set(words)
110
+ # First get the smallest parts, split by anything that isn't a letter or number
111
+ results = text.split(/[^a-zA-Z0-9]/)
112
+ # Now add the text blocks just with punctuation removed. eg O'Connor -> OConnor
113
+ by_spaces = text.split(' ')
114
+ results = results + by_spaces.map { |w| w.gsub(/[^a-zA-Z0-9]/, '') }
111
115
  # Add extra search terms. EG, other phone number layouts
112
- words = Set.new(words.map { |w|
113
- # Here we have to strip all the front +44 and replace with 0.
114
- w = w.start_with?('+44') ? "0#{w[3..-1]}" : w
115
- w = w.start_with?('+') ? w[1..-1] : w # Also strip + signs, as they are ignored by Resque in keys
116
- })
116
+ results = results + by_spaces.select { |w| w.start_with?('+44') }.map { |w|
117
+ # Here we have to strip all the front +44 and replace with 0. Also, store the one without a prefix.
118
+ ["0#{w[3..-1]}", w[3..-1]]
119
+ }
120
+ # Unique
121
+ results = results.flatten.uniq
122
+
123
+ results.select { |w| w.length >= Rifle.settings.min_word_length }
124
+ end
117
125
 
126
+ def get_metaphones_from_word_set(words)
118
127
  # Removed ignored words
119
128
  words.subtract Rifle.settings.ignored_words
120
129
  # Get the parts
@@ -1,3 +1,3 @@
1
1
  module Rifle
2
- VERSION = '0.1.7'
2
+ VERSION = '0.1.8'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rifle
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-20 00:00:00.000000000 Z
12
+ date: 2013-01-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  type: :runtime