smart_search 0.0.55 → 0.0.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/smart_search/smart_search_engine.rb +5 -0
- data/lib/smart_search.rb +46 -5
- data/lib/smart_search_history.rb +7 -0
- data/lib/smart_similarity.rb +125 -0
- data/lib/tasks/smart_search.rake +30 -0
- metadata +63 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1dba8372dab82dd2cce0ebc37101e2ebcf2fba1
|
4
|
+
data.tar.gz: a71c65f0d8adac611ce88e75a43ea30d36a5d63a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a932397b5b2eafae190335567419028ef0ad2547c2156c1aab1ca5b1aed4204caaa646c82bd1f0b062eecc88662f06ba06422b0c59336ff1cfc910c670c1e27
|
7
|
+
data.tar.gz: 6b544662bb2360c1bba224a580d02fc8d5b46153200d77734f2297471b8c67d7362deadc7c93d6fbbf7cd961617e9cb48fa8bb373e14584b88688cbe21edbbde
|
data/lib/smart_search.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require "smart_search"
|
3
|
+
require "smart_similarity"
|
4
|
+
require "smart_search_history"
|
3
5
|
require "add_search_tags"
|
4
6
|
|
7
|
+
require "smart_search/smart_search_engine"
|
8
|
+
|
5
9
|
module SmartSearch
|
6
10
|
|
7
11
|
def self.included(base)
|
@@ -9,18 +13,27 @@ module SmartSearch
|
|
9
13
|
end
|
10
14
|
|
11
15
|
module ClassMethods
|
16
|
+
# Enable SmartSearch for the current ActiveRecord model.
|
17
|
+
# accepts options:
|
18
|
+
# - :on, define which attributes to add to the search index
|
19
|
+
# - :conditions, define default scope for all queries made
|
20
|
+
# - :group, group by column
|
21
|
+
# - :order, order by column
|
22
|
+
# see readme for details
|
12
23
|
def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
|
13
24
|
if table_exists?
|
14
25
|
# Check if search_tags exists
|
15
26
|
if !is_smart_search? || options[:force] == true
|
16
|
-
|
17
|
-
cattr_accessor :condition_default, :group_default, :tags, :order_default
|
27
|
+
|
28
|
+
cattr_accessor :condition_default, :group_default, :tags, :order_default, :enable_similarity
|
18
29
|
send :include, InstanceMethods
|
19
30
|
if self.column_names.index("search_tags").nil?
|
20
31
|
::AddSearchTags.add_to_table(self.table_name)
|
21
32
|
end
|
22
33
|
self.send(:before_save, :create_search_tags)
|
23
|
-
|
34
|
+
|
35
|
+
self.enable_similarity = true
|
36
|
+
|
24
37
|
# options zuweisen
|
25
38
|
if options[:conditions].is_a?(String) && !options[:conditions].blank?
|
26
39
|
self.condition_default = options[:conditions]
|
@@ -41,22 +54,40 @@ module SmartSearch
|
|
41
54
|
end
|
42
55
|
end
|
43
56
|
|
57
|
+
# Verify if SmartSearch already loaded for this model
|
44
58
|
def is_smart_search?
|
45
59
|
self.included_modules.include?(InstanceMethods)
|
46
60
|
end
|
47
61
|
|
62
|
+
# defines where to look for a partial to load when displaying results for this model
|
48
63
|
def result_template_path
|
49
64
|
"/search/results/#{self.name.split("::").last.underscore}"
|
50
65
|
end
|
51
66
|
|
67
|
+
# Serach database for given search tags
|
52
68
|
def find_by_tags(tags = "", options = {})
|
53
69
|
if self.is_smart_search?
|
70
|
+
|
71
|
+
# Save Data for similarity analysis
|
72
|
+
if tags.size > 3
|
73
|
+
self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
|
74
|
+
end
|
75
|
+
|
54
76
|
tags = tags.split(" ")
|
55
77
|
|
56
78
|
# Fallback for Empty String
|
57
79
|
tags << "#" if tags.empty?
|
58
80
|
|
59
|
-
|
81
|
+
# Similarity
|
82
|
+
if self.enable_similarity == true
|
83
|
+
tags.map! do |t|
|
84
|
+
similars = SmartSimilarity.similars(t, :increment_counter => true).join("|")
|
85
|
+
"search_tags REGEXP '#{similars}'"
|
86
|
+
end
|
87
|
+
|
88
|
+
else
|
89
|
+
tags.map! {|t| "search_tags LIKE '%#{t}%'"}
|
90
|
+
end
|
60
91
|
|
61
92
|
|
62
93
|
results = self.where("(#{tags.join(' AND ')})")
|
@@ -85,15 +116,24 @@ module SmartSearch
|
|
85
116
|
end
|
86
117
|
end
|
87
118
|
|
119
|
+
# reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
|
88
120
|
def set_search_index
|
89
121
|
s = self.all.size.to_f
|
90
122
|
self.all.each_with_index do |a, i|
|
91
|
-
a.create_search_tags
|
123
|
+
a.create_search_tags rescue nil
|
92
124
|
a.send(:update_without_callbacks)
|
93
125
|
done = ((i+1).to_f/s)*100
|
94
126
|
printf "Set search index for #{self.name}: #{done}%% \r"
|
95
127
|
end
|
96
128
|
end
|
129
|
+
|
130
|
+
# Create all search tags for this table into similarity index
|
131
|
+
def set_similarity_index
|
132
|
+
|
133
|
+
search_tags_list = self.connection.select_all("SELECT search_tags from #{self.table_name}").map {|r| r["search_tags"]}
|
134
|
+
|
135
|
+
SmartSimilarity.create_from_text(search_tags_list.join(" "))
|
136
|
+
end
|
97
137
|
|
98
138
|
end
|
99
139
|
|
@@ -103,6 +143,7 @@ module SmartSearch
|
|
103
143
|
self.class.result_template_path
|
104
144
|
end
|
105
145
|
|
146
|
+
# create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
|
106
147
|
def create_search_tags
|
107
148
|
tags = []
|
108
149
|
self.class.tags.each do |tag|
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# This class is used to build similiarity index
|
2
|
+
class SmartSimilarity < ActiveRecord::Base
|
3
|
+
#= Configuration
|
4
|
+
serialize :similarities, Array
|
5
|
+
self.table_name = "smart_search_similarities"
|
6
|
+
require "amatch"
|
7
|
+
|
8
|
+
#== Associations
|
9
|
+
# => Stuff in Here
|
10
|
+
|
11
|
+
#== Plugins and modules
|
12
|
+
#=== PlugIns
|
13
|
+
# => Stuff in Here
|
14
|
+
|
15
|
+
#=== include Modules
|
16
|
+
# => Stuff in Here
|
17
|
+
|
18
|
+
#== Konstanten
|
19
|
+
# Defines the min. result of word simililarity check
|
20
|
+
SIMILARITY_FACTOR = 0.8
|
21
|
+
# Defines first simililarity check method
|
22
|
+
SIMILARITY_METHOD_1 = :jarowinkler
|
23
|
+
# Defines first simililarity check method
|
24
|
+
SIMILARITY_METHOD_2 = :levenshtein
|
25
|
+
|
26
|
+
# An average of both results will generated and compered with 'SIMILARITY_FACTOR'
|
27
|
+
|
28
|
+
# Limit Number of similar words
|
29
|
+
SIMILARITY_LIMIT = 8
|
30
|
+
|
31
|
+
#== Validation and Callbacks
|
32
|
+
#=== Validation
|
33
|
+
|
34
|
+
#=== Callbacks
|
35
|
+
# => Stuff in Here
|
36
|
+
# => END
|
37
|
+
|
38
|
+
# Create similarity data based on the given text
|
39
|
+
# This method is used to generate date from every source, e.g. file, url, single words etc..
|
40
|
+
def self.create_from_text(text)
|
41
|
+
# prepare text
|
42
|
+
prepared_text = text.downcase.split(/\b/).uniq
|
43
|
+
prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
|
44
|
+
list = {}
|
45
|
+
prepared_text.each do |word|
|
46
|
+
# Load index from database
|
47
|
+
words_in_db = self.find_by_phrase(word)
|
48
|
+
if words_in_db.nil?
|
49
|
+
self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
|
50
|
+
current = []
|
51
|
+
else
|
52
|
+
current = words_in_db.similarities
|
53
|
+
end
|
54
|
+
|
55
|
+
current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR}
|
56
|
+
|
57
|
+
list[word] = current.uniq
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write to Database
|
61
|
+
list.each do |word, sims|
|
62
|
+
sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
|
63
|
+
|
64
|
+
self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Add one simgle word to database and check if there are already similars
|
69
|
+
def self.add_word(word)
|
70
|
+
words = [word]
|
71
|
+
phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
|
72
|
+
words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
|
73
|
+
|
74
|
+
self.create_from_text(words.join(" "))
|
75
|
+
end
|
76
|
+
|
77
|
+
# Load an entire file to the index.
|
78
|
+
# Best used for loading big dictionary files.
|
79
|
+
# Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
|
80
|
+
def self.load_file(path)
|
81
|
+
count = %x{wc -l #{path}}.split[0].to_i
|
82
|
+
puts "loading file: #{path}"
|
83
|
+
puts "=> #{count} rows"
|
84
|
+
File.open(path, "r").to_a.seperate([8,count].min).each_with_index do |stack, si|
|
85
|
+
Spawnling.new(:argv => "sim-file-#{si}") do
|
86
|
+
QueryLog.info "sim-file-#{si}"
|
87
|
+
stack.each_with_index do |l,i|
|
88
|
+
QueryLog.info "#{si}: #{i.fdiv(count).round(4)} %"
|
89
|
+
self.add_word(l)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Load words from website and save them to index
|
96
|
+
def self.load_url(url)
|
97
|
+
self.create_from_text(%x(curl #{url}))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Loads your created query history and saves them to the index
|
101
|
+
def self.load_from_query_history
|
102
|
+
queries = self.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}.join(" ")
|
103
|
+
self.create_from_text(queries)
|
104
|
+
self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
|
105
|
+
end
|
106
|
+
|
107
|
+
# Get array of similar words including orig word
|
108
|
+
def self.similars(word, options = {})
|
109
|
+
list = self.where(:phrase => word).first
|
110
|
+
if list.nil?
|
111
|
+
return [word]
|
112
|
+
else
|
113
|
+
self.increment_counter(:count, list.id)
|
114
|
+
return [word, list.similarities].flatten
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Return match score for two words bases und the two defined similarity methods
|
119
|
+
def self.match_words(word1, word2)
|
120
|
+
x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
|
121
|
+
x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
|
122
|
+
return (x1+x2)/2.0
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
namespace :smart_search do
|
2
|
+
desc "Load similarity data from query history"
|
3
|
+
task :similarity_from_query_history => :environment do
|
4
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
5
|
+
SmartSimilarity.load_from_query_history
|
6
|
+
end
|
7
|
+
|
8
|
+
desc "Load similarity data from file - Use FILE=path/to/file to specify file"
|
9
|
+
task :similarity_from_file => :environment do
|
10
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
11
|
+
if ENV['FILE_PATH'].nil?
|
12
|
+
raise ArgumentError, "No file specified. "
|
13
|
+
elsif !File.exist?(ENV['FILE_PATH'])
|
14
|
+
raise ArgumentError, "File not found "
|
15
|
+
else
|
16
|
+
SmartSimilarity.load_file(ENV['FILE_PATH'])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
|
21
|
+
task :similarity_from_url => :environment do
|
22
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
23
|
+
if ENV['URL'].nil?
|
24
|
+
raise ArgumentError, "No URL specified. "
|
25
|
+
else
|
26
|
+
SmartSimilarity.load_url(ENV['URL'])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smart_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.61
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Eck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
11
|
+
date: 2014-02-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rails
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.2.9
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.2.9
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: spawnling
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: friendly_extensions
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.0.61
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.0.61
|
13
69
|
description: Adds easy to use full-text search to ActiveRecord models, based the attributes
|
14
70
|
you want to search.
|
15
71
|
email: it-support@friends-systems.de
|
@@ -20,6 +76,10 @@ files:
|
|
20
76
|
- config/routes.rb
|
21
77
|
- lib/add_search_tags.rb
|
22
78
|
- lib/smart_search.rb
|
79
|
+
- lib/smart_search/smart_search_engine.rb
|
80
|
+
- lib/smart_search_history.rb
|
81
|
+
- lib/smart_similarity.rb
|
82
|
+
- lib/tasks/smart_search.rake
|
23
83
|
- test/test_helper.rb
|
24
84
|
- test/unit/smart_search_test.rb
|
25
85
|
homepage: https://rubygems.org/gems/smart_search
|