rifle 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README.md +18 -2
  2. data/lib/rifle.rb +26 -17
  3. data/lib/rifle/version.rb +1 -1
  4. metadata +2 -2
data/README.md CHANGED
@@ -30,8 +30,24 @@ urn and a payload
30
30
 
31
31
  # Payloads
32
32
 
33
- Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed and can
34
- be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
33
+ Payloads are expected to be hashes, identified by a company-wide unique id (a urn). These are indexed on store by exact words, and collapsed words if punctuation
34
+ is in the middle of a word.
35
+
36
+ E.g, given the following payload
37
+
38
+ O'Connor has telephone +(44)798765432?
39
+
40
+ Any of the following search terms will match
41
+
42
+ O'Connor
43
+ Oconnor
44
+ +44798765432
45
+ +(44)798765432
46
+ 0798765432 # <= Special case, UK phone prefixes are ignored
47
+
48
+ # Fuzzy Matching
49
+
50
+ Items can be searched by metaphone if fuzzy_matching is enabled. That is, the search term need not be exact.
35
51
 
36
52
  E.g, given the following payload
37
53
 
@@ -20,8 +20,9 @@ module Rifle
20
20
  Processor.new.index_resource(urn, hash)
21
21
  end
22
22
 
23
- def self.search(words, urns_only = false)
24
- Processor.new.search_for(words, urns_only)
23
+ def self.search(words, options = {})
24
+ options = {urns_only: options} if !!options == options # Support old boolean only arg
25
+ Processor.new.search_for(words, options)
25
26
  end
26
27
 
27
28
  class Processor
@@ -56,29 +57,29 @@ module Rifle
56
57
  metaphones
57
58
  end
58
59
 
59
- def search_for(sentence, urns_only)
60
- p "Rifle searching for #{sentence}, urns only #{urns_only}"
60
+ def search_for(sentence, options)
61
+ p "Rifle searching for #{sentence}, options #{options}"
61
62
  words = get_words_array_from_text(sentence)
62
63
  metaphones = get_metaphones_from_word_set(Set.new(words))
63
64
 
64
65
  urns = nil
65
66
  metaphones.each do |metaphone|
66
67
  new_urns = get_urns_for_metaphone(metaphone)
67
- p metaphone
68
68
  urns = urns.nil? ? Set.new(new_urns) : urns.intersection(new_urns)
69
69
  end
70
70
  urns ||= Set.new
71
71
 
72
72
  p "Rifle found #{urns.size} urns"
73
- if urns_only
73
+ if options[:urns_only]
74
74
  urns
75
75
  else
76
- urns.map { |u|
76
+ full_results = urns.map { |u|
77
77
  {
78
- urn: u,
79
- payload: get_payload_for_urn(u)
78
+ urn: u,
79
+ payload: get_payload_for_urn(u)
80
80
  }
81
81
  }
82
+ full_results = full_results.sort! { |a, b| DateTime.parse(b[:payload]['updated_at']) <=> DateTime.parse(a[:payload]['updated_at']) }
82
83
  end
83
84
  end
84
85
 
@@ -104,17 +105,25 @@ module Rifle
104
105
 
105
106
  def get_words_array_from_text(text)
106
107
  return [] if !text.is_a?(String)
107
- text.downcase.split(/[^a-zA-Z0-9]/).select { |w| w.length >= Rifle.settings.min_word_length }
108
- end
108
+ text = text.downcase
109
109
 
110
- def get_metaphones_from_word_set(words)
110
+ # First get the smallest parts, split by anything that isn't a letter or number
111
+ results = text.split(/[^a-zA-Z0-9]/)
112
+ # Now add the text blocks just with punctuation removed. eg O'Connor -> OConnor
113
+ by_spaces = text.split(' ')
114
+ results = results + by_spaces.map { |w| w.gsub(/[^a-zA-Z0-9]/, '') }
111
115
  # Add extra search terms. EG, other phone number layouts
112
- words = Set.new(words.map { |w|
113
- # Here we have to strip all the front +44 and replace with 0.
114
- w = w.start_with?('+44') ? "0#{w[3..-1]}" : w
115
- w = w.start_with?('+') ? w[1..-1] : w # Also strip + signs, as they are ignored by Resque in keys
116
- })
116
+ results = results + by_spaces.select { |w| w.start_with?('+44') }.map { |w|
117
+ # Here we have to strip all the front +44 and replace with 0. Also, store the one without a prefix.
118
+ ["0#{w[3..-1]}", w[3..-1]]
119
+ }
120
+ # Unique
121
+ results = results.flatten.uniq
122
+
123
+ results.select { |w| w.length >= Rifle.settings.min_word_length }
124
+ end
117
125
 
126
+ def get_metaphones_from_word_set(words)
118
127
  # Removed ignored words
119
128
  words.subtract Rifle.settings.ignored_words
120
129
  # Get the parts
@@ -1,3 +1,3 @@
1
1
  module Rifle
2
- VERSION = '0.1.7'
2
+ VERSION = '0.1.8'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rifle
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-20 00:00:00.000000000 Z
12
+ date: 2013-01-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  type: :runtime