smart_search 0.0.55 → 0.0.61

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ebbed44ee232de1e1b002de0114c895f22669cbe
4
- data.tar.gz: 6fe910045758f11f1c1fccc40229ca3b14919361
3
+ metadata.gz: d1dba8372dab82dd2cce0ebc37101e2ebcf2fba1
4
+ data.tar.gz: a71c65f0d8adac611ce88e75a43ea30d36a5d63a
5
5
  SHA512:
6
- metadata.gz: 92f42ecd8d1f22b42221f93453ff31df341d546771525183c8fef42cd900aa05db1148c3251832e75e28e4f38170916b9aee4bef9862bc8d72b06959f89a17eb
7
- data.tar.gz: b1d4e94ec2556002bce8a453d6830eae3eb6808f1af30580e988bf5ccb785ff4eee7014d6ad7951a3eae5b9d0a22e5b64eb72ad9763f6185c7c73fa00ed46495
6
+ metadata.gz: 6a932397b5b2eafae190335567419028ef0ad2547c2156c1aab1ca5b1aed4204caaa646c82bd1f0b062eecc88662f06ba06422b0c59336ff1cfc910c670c1e27
7
+ data.tar.gz: 6b544662bb2360c1bba224a580d02fc8d5b46153200d77734f2297471b8c67d7362deadc7c93d6fbbf7cd961617e9cb48fa8bb373e14584b88688cbe21edbbde
@@ -0,0 +1,5 @@
1
+ module SmartSearch
2
+ class SmartSearchEngine < Rails::Engine
3
+ isolate_namespace SmartSearch
4
+ end
5
+ end
data/lib/smart_search.rb CHANGED
@@ -1,7 +1,11 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require "smart_search"
3
+ require "smart_similarity"
4
+ require "smart_search_history"
3
5
  require "add_search_tags"
4
6
 
7
+ require "smart_search/smart_search_engine"
8
+
5
9
  module SmartSearch
6
10
 
7
11
  def self.included(base)
@@ -9,18 +13,27 @@ module SmartSearch
9
13
  end
10
14
 
11
15
  module ClassMethods
16
+ # Enable SmartSearch for the current ActiveRecord model.
17
+ # accepts options:
18
+ # - :on, define which attributes to add to the search index
19
+ # - :conditions, define default scope for all queries made
20
+ # - :group, group by column
21
+ # - :order, order by column
22
+ # see readme for details
12
23
  def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
13
24
  if table_exists?
14
25
  # Check if search_tags exists
15
26
  if !is_smart_search? || options[:force] == true
16
- puts "\nAdding SmartSearch to #{self.name}: #{options.inspect}"
17
- cattr_accessor :condition_default, :group_default, :tags, :order_default
27
+
28
+ cattr_accessor :condition_default, :group_default, :tags, :order_default, :enable_similarity
18
29
  send :include, InstanceMethods
19
30
  if self.column_names.index("search_tags").nil?
20
31
  ::AddSearchTags.add_to_table(self.table_name)
21
32
  end
22
33
  self.send(:before_save, :create_search_tags)
23
-
34
+
35
+ self.enable_similarity = true
36
+
24
37
  # options zuweisen
25
38
  if options[:conditions].is_a?(String) && !options[:conditions].blank?
26
39
  self.condition_default = options[:conditions]
@@ -41,22 +54,40 @@ module SmartSearch
41
54
  end
42
55
  end
43
56
 
57
+ # Verify if SmartSearch already loaded for this model
44
58
  def is_smart_search?
45
59
  self.included_modules.include?(InstanceMethods)
46
60
  end
47
61
 
62
+ # defines where to look for a partial to load when displaying results for this model
48
63
  def result_template_path
49
64
  "/search/results/#{self.name.split("::").last.underscore}"
50
65
  end
51
66
 
67
+ # Serach database for given search tags
52
68
  def find_by_tags(tags = "", options = {})
53
69
  if self.is_smart_search?
70
+
71
+ # Save Data for similarity analysis
72
+ if tags.size > 3
73
+ self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
74
+ end
75
+
54
76
  tags = tags.split(" ")
55
77
 
56
78
  # Fallback for Empty String
57
79
  tags << "#" if tags.empty?
58
80
 
59
- tags.map! {|t| "search_tags LIKE '%#{t.downcase}%'"}
81
+ # Similarity
82
+ if self.enable_similarity == true
83
+ tags.map! do |t|
84
+ similars = SmartSimilarity.similars(t, :increment_counter => true).join("|")
85
+ "search_tags REGEXP '#{similars}'"
86
+ end
87
+
88
+ else
89
+ tags.map! {|t| "search_tags LIKE '%#{t}%'"}
90
+ end
60
91
 
61
92
 
62
93
  results = self.where("(#{tags.join(' AND ')})")
@@ -85,15 +116,24 @@ module SmartSearch
85
116
  end
86
117
  end
87
118
 
119
+ # reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
88
120
  def set_search_index
89
121
  s = self.all.size.to_f
90
122
  self.all.each_with_index do |a, i|
91
- a.create_search_tags
123
+ a.create_search_tags rescue nil
92
124
  a.send(:update_without_callbacks)
93
125
  done = ((i+1).to_f/s)*100
94
126
  printf "Set search index for #{self.name}: #{done}%% \r"
95
127
  end
96
128
  end
129
+
130
+ # Create all search tags for this table into similarity index
131
+ def set_similarity_index
132
+
133
+ search_tags_list = self.connection.select_all("SELECT search_tags from #{self.table_name}").map {|r| r["search_tags"]}
134
+
135
+ SmartSimilarity.create_from_text(search_tags_list.join(" "))
136
+ end
97
137
 
98
138
  end
99
139
 
@@ -103,6 +143,7 @@ module SmartSearch
103
143
  self.class.result_template_path
104
144
  end
105
145
 
146
+ # create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
106
147
  def create_search_tags
107
148
  tags = []
108
149
  self.class.tags.each do |tag|
@@ -0,0 +1,7 @@
1
+ # Saves all queries made so the data can be used to build a similarity index based on the queries made
2
+ # by actual users of the website
3
+ class SmartSearchHistory < ActiveRecord::Base
4
+
5
+
6
+
7
+ end
@@ -0,0 +1,125 @@
1
+ # This class is used to build similiarity index
2
+ class SmartSimilarity < ActiveRecord::Base
3
+ #= Configuration
4
+ serialize :similarities, Array
5
+ self.table_name = "smart_search_similarities"
6
+ require "amatch"
7
+
8
+ #== Associations
9
+ # => Stuff in Here
10
+
11
+ #== Plugins and modules
12
+ #=== PlugIns
13
+ # => Stuff in Here
14
+
15
+ #=== include Modules
16
+ # => Stuff in Here
17
+
18
+ #== Konstanten
19
+ # Defines the min. result of word simililarity check
20
+ SIMILARITY_FACTOR = 0.8
21
+ # Defines first simililarity check method
22
+ SIMILARITY_METHOD_1 = :jarowinkler
23
+ # Defines first simililarity check method
24
+ SIMILARITY_METHOD_2 = :levenshtein
25
+
26
+ # An average of both results will generated and compered with 'SIMILARITY_FACTOR'
27
+
28
+ # Limit Number of similar words
29
+ SIMILARITY_LIMIT = 8
30
+
31
+ #== Validation and Callbacks
32
+ #=== Validation
33
+
34
+ #=== Callbacks
35
+ # => Stuff in Here
36
+ # => END
37
+
38
+ # Create similarity data based on the given text
39
+ # This method is used to generate date from every source, e.g. file, url, single words etc..
40
+ def self.create_from_text(text)
41
+ # prepare text
42
+ prepared_text = text.downcase.split(/\b/).uniq
43
+ prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
44
+ list = {}
45
+ prepared_text.each do |word|
46
+ # Load index from database
47
+ words_in_db = self.find_by_phrase(word)
48
+ if words_in_db.nil?
49
+ self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
50
+ current = []
51
+ else
52
+ current = words_in_db.similarities
53
+ end
54
+
55
+ current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR}
56
+
57
+ list[word] = current.uniq
58
+ end
59
+
60
+ # Write to Database
61
+ list.each do |word, sims|
62
+ sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
63
+
64
+ self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
65
+ end
66
+ end
67
+
68
+ # Add one simgle word to database and check if there are already similars
69
+ def self.add_word(word)
70
+ words = [word]
71
+ phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
72
+ words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
73
+
74
+ self.create_from_text(words.join(" "))
75
+ end
76
+
77
+ # Load an entire file to the index.
78
+ # Best used for loading big dictionary files.
79
+ # Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
80
+ def self.load_file(path)
81
+ count = %x{wc -l #{path}}.split[0].to_i
82
+ puts "loading file: #{path}"
83
+ puts "=> #{count} rows"
84
+ File.open(path, "r").to_a.seperate([8,count].min).each_with_index do |stack, si|
85
+ Spawnling.new(:argv => "sim-file-#{si}") do
86
+ QueryLog.info "sim-file-#{si}"
87
+ stack.each_with_index do |l,i|
88
+ QueryLog.info "#{si}: #{i.fdiv(count).round(4)} %"
89
+ self.add_word(l)
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ # Load words from website and save them to index
96
+ def self.load_url(url)
97
+ self.create_from_text(%x(curl #{url}))
98
+ end
99
+
100
+ # Loads your created query history and saves them to the index
101
+ def self.load_from_query_history
102
+ queries = self.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}.join(" ")
103
+ self.create_from_text(queries)
104
+ self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
105
+ end
106
+
107
+ # Get array of similar words including orig word
108
+ def self.similars(word, options = {})
109
+ list = self.where(:phrase => word).first
110
+ if list.nil?
111
+ return [word]
112
+ else
113
+ self.increment_counter(:count, list.id)
114
+ return [word, list.similarities].flatten
115
+ end
116
+ end
117
+
118
+ # Return match score for two words bases und the two defined similarity methods
119
+ def self.match_words(word1, word2)
120
+ x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
121
+ x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
122
+ return (x1+x2)/2.0
123
+ end
124
+
125
+ end
@@ -0,0 +1,30 @@
1
+ namespace :smart_search do
2
+ desc "Load similarity data from query history"
3
+ task :similarity_from_query_history => :environment do
4
+ require File.expand_path("../../smart_similarity", __FILE__)
5
+ SmartSimilarity.load_from_query_history
6
+ end
7
+
8
+ desc "Load similarity data from file - Use FILE=path/to/file to specify file"
9
+ task :similarity_from_file => :environment do
10
+ require File.expand_path("../../smart_similarity", __FILE__)
11
+ if ENV['FILE_PATH'].nil?
12
+ raise ArgumentError, "No file specified. "
13
+ elsif !File.exist?(ENV['FILE_PATH'])
14
+ raise ArgumentError, "File not found "
15
+ else
16
+ SmartSimilarity.load_file(ENV['FILE_PATH'])
17
+ end
18
+ end
19
+
20
+ desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
21
+ task :similarity_from_url => :environment do
22
+ require File.expand_path("../../smart_similarity", __FILE__)
23
+ if ENV['URL'].nil?
24
+ raise ArgumentError, "No URL specified. "
25
+ else
26
+ SmartSimilarity.load_url(ENV['URL'])
27
+ end
28
+ end
29
+
30
+ end
metadata CHANGED
@@ -1,15 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smart_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.55
4
+ version: 0.0.61
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Eck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2014-02-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.2.9
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.9
27
+ - !ruby/object:Gem::Dependency
28
+ name: amatch
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: spawnling
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: friendly_extensions
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.61
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.61
13
69
  description: Adds easy to use full-text search to ActiveRecord models, based the attributes
14
70
  you want to search.
15
71
  email: it-support@friends-systems.de
@@ -20,6 +76,10 @@ files:
20
76
  - config/routes.rb
21
77
  - lib/add_search_tags.rb
22
78
  - lib/smart_search.rb
79
+ - lib/smart_search/smart_search_engine.rb
80
+ - lib/smart_search_history.rb
81
+ - lib/smart_similarity.rb
82
+ - lib/tasks/smart_search.rake
23
83
  - test/test_helper.rb
24
84
  - test/unit/smart_search_test.rb
25
85
  homepage: https://rubygems.org/gems/smart_search