smart_search 0.0.55 → 0.0.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ebbed44ee232de1e1b002de0114c895f22669cbe
4
- data.tar.gz: 6fe910045758f11f1c1fccc40229ca3b14919361
3
+ metadata.gz: d1dba8372dab82dd2cce0ebc37101e2ebcf2fba1
4
+ data.tar.gz: a71c65f0d8adac611ce88e75a43ea30d36a5d63a
5
5
  SHA512:
6
- metadata.gz: 92f42ecd8d1f22b42221f93453ff31df341d546771525183c8fef42cd900aa05db1148c3251832e75e28e4f38170916b9aee4bef9862bc8d72b06959f89a17eb
7
- data.tar.gz: b1d4e94ec2556002bce8a453d6830eae3eb6808f1af30580e988bf5ccb785ff4eee7014d6ad7951a3eae5b9d0a22e5b64eb72ad9763f6185c7c73fa00ed46495
6
+ metadata.gz: 6a932397b5b2eafae190335567419028ef0ad2547c2156c1aab1ca5b1aed4204caaa646c82bd1f0b062eecc88662f06ba06422b0c59336ff1cfc910c670c1e27
7
+ data.tar.gz: 6b544662bb2360c1bba224a580d02fc8d5b46153200d77734f2297471b8c67d7362deadc7c93d6fbbf7cd961617e9cb48fa8bb373e14584b88688cbe21edbbde
@@ -0,0 +1,5 @@
1
+ module SmartSearch
2
+ class SmartSearchEngine < Rails::Engine
3
+ isolate_namespace SmartSearch
4
+ end
5
+ end
data/lib/smart_search.rb CHANGED
@@ -1,7 +1,11 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require "smart_search"
3
+ require "smart_similarity"
4
+ require "smart_search_history"
3
5
  require "add_search_tags"
4
6
 
7
+ require "smart_search/smart_search_engine"
8
+
5
9
  module SmartSearch
6
10
 
7
11
  def self.included(base)
@@ -9,18 +13,27 @@ module SmartSearch
9
13
  end
10
14
 
11
15
  module ClassMethods
16
+ # Enable SmartSearch for the current ActiveRecord model.
17
+ # accepts options:
18
+ # - :on, define which attributes to add to the search index
19
+ # - :conditions, define default scope for all queries made
20
+ # - :group, group by column
21
+ # - :order, order by column
22
+ # see readme for details
12
23
  def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
13
24
  if table_exists?
14
25
  # Check if search_tags exists
15
26
  if !is_smart_search? || options[:force] == true
16
- puts "\nAdding SmartSearch to #{self.name}: #{options.inspect}"
17
- cattr_accessor :condition_default, :group_default, :tags, :order_default
27
+
28
+ cattr_accessor :condition_default, :group_default, :tags, :order_default, :enable_similarity
18
29
  send :include, InstanceMethods
19
30
  if self.column_names.index("search_tags").nil?
20
31
  ::AddSearchTags.add_to_table(self.table_name)
21
32
  end
22
33
  self.send(:before_save, :create_search_tags)
23
-
34
+
35
+ self.enable_similarity = true
36
+
24
37
  # options zuweisen
25
38
  if options[:conditions].is_a?(String) && !options[:conditions].blank?
26
39
  self.condition_default = options[:conditions]
@@ -41,22 +54,40 @@ module SmartSearch
41
54
  end
42
55
  end
43
56
 
57
+ # Verify if SmartSearch already loaded for this model
44
58
  def is_smart_search?
45
59
  self.included_modules.include?(InstanceMethods)
46
60
  end
47
61
 
62
+ # defines where to look for a partial to load when displaying results for this model
48
63
  def result_template_path
49
64
  "/search/results/#{self.name.split("::").last.underscore}"
50
65
  end
51
66
 
67
+ # Serach database for given search tags
52
68
  def find_by_tags(tags = "", options = {})
53
69
  if self.is_smart_search?
70
+
71
+ # Save Data for similarity analysis
72
+ if tags.size > 3
73
+ self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
74
+ end
75
+
54
76
  tags = tags.split(" ")
55
77
 
56
78
  # Fallback for Empty String
57
79
  tags << "#" if tags.empty?
58
80
 
59
- tags.map! {|t| "search_tags LIKE '%#{t.downcase}%'"}
81
+ # Similarity
82
+ if self.enable_similarity == true
83
+ tags.map! do |t|
84
+ similars = SmartSimilarity.similars(t, :increment_counter => true).join("|")
85
+ "search_tags REGEXP '#{similars}'"
86
+ end
87
+
88
+ else
89
+ tags.map! {|t| "search_tags LIKE '%#{t}%'"}
90
+ end
60
91
 
61
92
 
62
93
  results = self.where("(#{tags.join(' AND ')})")
@@ -85,15 +116,24 @@ module SmartSearch
85
116
  end
86
117
  end
87
118
 
119
+ # reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
88
120
  def set_search_index
89
121
  s = self.all.size.to_f
90
122
  self.all.each_with_index do |a, i|
91
- a.create_search_tags
123
+ a.create_search_tags rescue nil
92
124
  a.send(:update_without_callbacks)
93
125
  done = ((i+1).to_f/s)*100
94
126
  printf "Set search index for #{self.name}: #{done}%% \r"
95
127
  end
96
128
  end
129
+
130
+ # Create all search tags for this table into similarity index
131
+ def set_similarity_index
132
+
133
+ search_tags_list = self.connection.select_all("SELECT search_tags from #{self.table_name}").map {|r| r["search_tags"]}
134
+
135
+ SmartSimilarity.create_from_text(search_tags_list.join(" "))
136
+ end
97
137
 
98
138
  end
99
139
 
@@ -103,6 +143,7 @@ module SmartSearch
103
143
  self.class.result_template_path
104
144
  end
105
145
 
146
+ # create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
106
147
  def create_search_tags
107
148
  tags = []
108
149
  self.class.tags.each do |tag|
@@ -0,0 +1,7 @@
1
+ # Saves all queries made so the data can be used to build a similarity index based on the queries made
2
+ # by actual users of the website
3
+ class SmartSearchHistory < ActiveRecord::Base
4
+
5
+
6
+
7
+ end
@@ -0,0 +1,125 @@
1
+ # This class is used to build similiarity index
2
+ class SmartSimilarity < ActiveRecord::Base
3
+ #= Configuration
4
+ serialize :similarities, Array
5
+ self.table_name = "smart_search_similarities"
6
+ require "amatch"
7
+
8
+ #== Associations
9
+ # => Stuff in Here
10
+
11
+ #== Plugins and modules
12
+ #=== PlugIns
13
+ # => Stuff in Here
14
+
15
+ #=== include Modules
16
+ # => Stuff in Here
17
+
18
+ #== Konstanten
19
+ # Defines the min. result of word simililarity check
20
+ SIMILARITY_FACTOR = 0.8
21
+ # Defines first simililarity check method
22
+ SIMILARITY_METHOD_1 = :jarowinkler
23
+ # Defines first simililarity check method
24
+ SIMILARITY_METHOD_2 = :levenshtein
25
+
26
+ # An average of both results will generated and compered with 'SIMILARITY_FACTOR'
27
+
28
+ # Limit Number of similar words
29
+ SIMILARITY_LIMIT = 8
30
+
31
+ #== Validation and Callbacks
32
+ #=== Validation
33
+
34
+ #=== Callbacks
35
+ # => Stuff in Here
36
+ # => END
37
+
38
+ # Create similarity data based on the given text
39
+ # This method is used to generate date from every source, e.g. file, url, single words etc..
40
+ def self.create_from_text(text)
41
+ # prepare text
42
+ prepared_text = text.downcase.split(/\b/).uniq
43
+ prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
44
+ list = {}
45
+ prepared_text.each do |word|
46
+ # Load index from database
47
+ words_in_db = self.find_by_phrase(word)
48
+ if words_in_db.nil?
49
+ self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
50
+ current = []
51
+ else
52
+ current = words_in_db.similarities
53
+ end
54
+
55
+ current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR}
56
+
57
+ list[word] = current.uniq
58
+ end
59
+
60
+ # Write to Database
61
+ list.each do |word, sims|
62
+ sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
63
+
64
+ self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
65
+ end
66
+ end
67
+
68
+ # Add one simgle word to database and check if there are already similars
69
+ def self.add_word(word)
70
+ words = [word]
71
+ phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
72
+ words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
73
+
74
+ self.create_from_text(words.join(" "))
75
+ end
76
+
77
+ # Load an entire file to the index.
78
+ # Best used for loading big dictionary files.
79
+ # Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
80
+ def self.load_file(path)
81
+ count = %x{wc -l #{path}}.split[0].to_i
82
+ puts "loading file: #{path}"
83
+ puts "=> #{count} rows"
84
+ File.open(path, "r").to_a.seperate([8,count].min).each_with_index do |stack, si|
85
+ Spawnling.new(:argv => "sim-file-#{si}") do
86
+ QueryLog.info "sim-file-#{si}"
87
+ stack.each_with_index do |l,i|
88
+ QueryLog.info "#{si}: #{i.fdiv(count).round(4)} %"
89
+ self.add_word(l)
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ # Load words from website and save them to index
96
+ def self.load_url(url)
97
+ self.create_from_text(%x(curl #{url}))
98
+ end
99
+
100
+ # Loads your created query history and saves them to the index
101
+ def self.load_from_query_history
102
+ queries = self.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}.join(" ")
103
+ self.create_from_text(queries)
104
+ self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
105
+ end
106
+
107
+ # Get array of similar words including orig word
108
+ def self.similars(word, options = {})
109
+ list = self.where(:phrase => word).first
110
+ if list.nil?
111
+ return [word]
112
+ else
113
+ self.increment_counter(:count, list.id)
114
+ return [word, list.similarities].flatten
115
+ end
116
+ end
117
+
118
+ # Return match score for two words bases und the two defined similarity methods
119
+ def self.match_words(word1, word2)
120
+ x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
121
+ x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
122
+ return (x1+x2)/2.0
123
+ end
124
+
125
+ end
@@ -0,0 +1,30 @@
1
+ namespace :smart_search do
2
+ desc "Load similarity data from query history"
3
+ task :similarity_from_query_history => :environment do
4
+ require File.expand_path("../../smart_similarity", __FILE__)
5
+ SmartSimilarity.load_from_query_history
6
+ end
7
+
8
+ desc "Load similarity data from file - Use FILE=path/to/file to specify file"
9
+ task :similarity_from_file => :environment do
10
+ require File.expand_path("../../smart_similarity", __FILE__)
11
+ if ENV['FILE_PATH'].nil?
12
+ raise ArgumentError, "No file specified. "
13
+ elsif !File.exist?(ENV['FILE_PATH'])
14
+ raise ArgumentError, "File not found "
15
+ else
16
+ SmartSimilarity.load_file(ENV['FILE_PATH'])
17
+ end
18
+ end
19
+
20
+ desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
21
+ task :similarity_from_url => :environment do
22
+ require File.expand_path("../../smart_similarity", __FILE__)
23
+ if ENV['URL'].nil?
24
+ raise ArgumentError, "No URL specified. "
25
+ else
26
+ SmartSimilarity.load_url(ENV['URL'])
27
+ end
28
+ end
29
+
30
+ end
metadata CHANGED
@@ -1,15 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smart_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.55
4
+ version: 0.0.61
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Eck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-11 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2014-02-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.2.9
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 3.2.9
27
+ - !ruby/object:Gem::Dependency
28
+ name: amatch
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: spawnling
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: friendly_extensions
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.61
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.61
13
69
  description: Adds easy to use full-text search to ActiveRecord models, based the attributes
14
70
  you want to search.
15
71
  email: it-support@friends-systems.de
@@ -20,6 +76,10 @@ files:
20
76
  - config/routes.rb
21
77
  - lib/add_search_tags.rb
22
78
  - lib/smart_search.rb
79
+ - lib/smart_search/smart_search_engine.rb
80
+ - lib/smart_search_history.rb
81
+ - lib/smart_similarity.rb
82
+ - lib/tasks/smart_search.rake
23
83
  - test/test_helper.rb
24
84
  - test/unit/smart_search_test.rb
25
85
  homepage: https://rubygems.org/gems/smart_search