smart_search 0.0.55 → 0.0.61
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/smart_search/smart_search_engine.rb +5 -0
- data/lib/smart_search.rb +46 -5
- data/lib/smart_search_history.rb +7 -0
- data/lib/smart_similarity.rb +125 -0
- data/lib/tasks/smart_search.rake +30 -0
- metadata +63 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1dba8372dab82dd2cce0ebc37101e2ebcf2fba1
|
4
|
+
data.tar.gz: a71c65f0d8adac611ce88e75a43ea30d36a5d63a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a932397b5b2eafae190335567419028ef0ad2547c2156c1aab1ca5b1aed4204caaa646c82bd1f0b062eecc88662f06ba06422b0c59336ff1cfc910c670c1e27
|
7
|
+
data.tar.gz: 6b544662bb2360c1bba224a580d02fc8d5b46153200d77734f2297471b8c67d7362deadc7c93d6fbbf7cd961617e9cb48fa8bb373e14584b88688cbe21edbbde
|
data/lib/smart_search.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require "smart_search"
|
3
|
+
require "smart_similarity"
|
4
|
+
require "smart_search_history"
|
3
5
|
require "add_search_tags"
|
4
6
|
|
7
|
+
require "smart_search/smart_search_engine"
|
8
|
+
|
5
9
|
module SmartSearch
|
6
10
|
|
7
11
|
def self.included(base)
|
@@ -9,18 +13,27 @@ module SmartSearch
|
|
9
13
|
end
|
10
14
|
|
11
15
|
module ClassMethods
|
16
|
+
# Enable SmartSearch for the current ActiveRecord model.
|
17
|
+
# accepts options:
|
18
|
+
# - :on, define which attributes to add to the search index
|
19
|
+
# - :conditions, define default scope for all queries made
|
20
|
+
# - :group, group by column
|
21
|
+
# - :order, order by column
|
22
|
+
# see readme for details
|
12
23
|
def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
|
13
24
|
if table_exists?
|
14
25
|
# Check if search_tags exists
|
15
26
|
if !is_smart_search? || options[:force] == true
|
16
|
-
|
17
|
-
cattr_accessor :condition_default, :group_default, :tags, :order_default
|
27
|
+
|
28
|
+
cattr_accessor :condition_default, :group_default, :tags, :order_default, :enable_similarity
|
18
29
|
send :include, InstanceMethods
|
19
30
|
if self.column_names.index("search_tags").nil?
|
20
31
|
::AddSearchTags.add_to_table(self.table_name)
|
21
32
|
end
|
22
33
|
self.send(:before_save, :create_search_tags)
|
23
|
-
|
34
|
+
|
35
|
+
self.enable_similarity = true
|
36
|
+
|
24
37
|
# options zuweisen
|
25
38
|
if options[:conditions].is_a?(String) && !options[:conditions].blank?
|
26
39
|
self.condition_default = options[:conditions]
|
@@ -41,22 +54,40 @@ module SmartSearch
|
|
41
54
|
end
|
42
55
|
end
|
43
56
|
|
57
|
+
# Verify if SmartSearch already loaded for this model
|
44
58
|
def is_smart_search?
|
45
59
|
self.included_modules.include?(InstanceMethods)
|
46
60
|
end
|
47
61
|
|
62
|
+
# defines where to look for a partial to load when displaying results for this model
|
48
63
|
def result_template_path
|
49
64
|
"/search/results/#{self.name.split("::").last.underscore}"
|
50
65
|
end
|
51
66
|
|
67
|
+
# Serach database for given search tags
|
52
68
|
def find_by_tags(tags = "", options = {})
|
53
69
|
if self.is_smart_search?
|
70
|
+
|
71
|
+
# Save Data for similarity analysis
|
72
|
+
if tags.size > 3
|
73
|
+
self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
|
74
|
+
end
|
75
|
+
|
54
76
|
tags = tags.split(" ")
|
55
77
|
|
56
78
|
# Fallback for Empty String
|
57
79
|
tags << "#" if tags.empty?
|
58
80
|
|
59
|
-
|
81
|
+
# Similarity
|
82
|
+
if self.enable_similarity == true
|
83
|
+
tags.map! do |t|
|
84
|
+
similars = SmartSimilarity.similars(t, :increment_counter => true).join("|")
|
85
|
+
"search_tags REGEXP '#{similars}'"
|
86
|
+
end
|
87
|
+
|
88
|
+
else
|
89
|
+
tags.map! {|t| "search_tags LIKE '%#{t}%'"}
|
90
|
+
end
|
60
91
|
|
61
92
|
|
62
93
|
results = self.where("(#{tags.join(' AND ')})")
|
@@ -85,15 +116,24 @@ module SmartSearch
|
|
85
116
|
end
|
86
117
|
end
|
87
118
|
|
119
|
+
# reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
|
88
120
|
def set_search_index
|
89
121
|
s = self.all.size.to_f
|
90
122
|
self.all.each_with_index do |a, i|
|
91
|
-
a.create_search_tags
|
123
|
+
a.create_search_tags rescue nil
|
92
124
|
a.send(:update_without_callbacks)
|
93
125
|
done = ((i+1).to_f/s)*100
|
94
126
|
printf "Set search index for #{self.name}: #{done}%% \r"
|
95
127
|
end
|
96
128
|
end
|
129
|
+
|
130
|
+
# Create all search tags for this table into similarity index
|
131
|
+
def set_similarity_index
|
132
|
+
|
133
|
+
search_tags_list = self.connection.select_all("SELECT search_tags from #{self.table_name}").map {|r| r["search_tags"]}
|
134
|
+
|
135
|
+
SmartSimilarity.create_from_text(search_tags_list.join(" "))
|
136
|
+
end
|
97
137
|
|
98
138
|
end
|
99
139
|
|
@@ -103,6 +143,7 @@ module SmartSearch
|
|
103
143
|
self.class.result_template_path
|
104
144
|
end
|
105
145
|
|
146
|
+
# create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
|
106
147
|
def create_search_tags
|
107
148
|
tags = []
|
108
149
|
self.class.tags.each do |tag|
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# This class is used to build similiarity index
|
2
|
+
class SmartSimilarity < ActiveRecord::Base
|
3
|
+
#= Configuration
|
4
|
+
serialize :similarities, Array
|
5
|
+
self.table_name = "smart_search_similarities"
|
6
|
+
require "amatch"
|
7
|
+
|
8
|
+
#== Associations
|
9
|
+
# => Stuff in Here
|
10
|
+
|
11
|
+
#== Plugins and modules
|
12
|
+
#=== PlugIns
|
13
|
+
# => Stuff in Here
|
14
|
+
|
15
|
+
#=== include Modules
|
16
|
+
# => Stuff in Here
|
17
|
+
|
18
|
+
#== Konstanten
|
19
|
+
# Defines the min. result of word simililarity check
|
20
|
+
SIMILARITY_FACTOR = 0.8
|
21
|
+
# Defines first simililarity check method
|
22
|
+
SIMILARITY_METHOD_1 = :jarowinkler
|
23
|
+
# Defines first simililarity check method
|
24
|
+
SIMILARITY_METHOD_2 = :levenshtein
|
25
|
+
|
26
|
+
# An average of both results will generated and compered with 'SIMILARITY_FACTOR'
|
27
|
+
|
28
|
+
# Limit Number of similar words
|
29
|
+
SIMILARITY_LIMIT = 8
|
30
|
+
|
31
|
+
#== Validation and Callbacks
|
32
|
+
#=== Validation
|
33
|
+
|
34
|
+
#=== Callbacks
|
35
|
+
# => Stuff in Here
|
36
|
+
# => END
|
37
|
+
|
38
|
+
# Create similarity data based on the given text
|
39
|
+
# This method is used to generate date from every source, e.g. file, url, single words etc..
|
40
|
+
def self.create_from_text(text)
|
41
|
+
# prepare text
|
42
|
+
prepared_text = text.downcase.split(/\b/).uniq
|
43
|
+
prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
|
44
|
+
list = {}
|
45
|
+
prepared_text.each do |word|
|
46
|
+
# Load index from database
|
47
|
+
words_in_db = self.find_by_phrase(word)
|
48
|
+
if words_in_db.nil?
|
49
|
+
self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
|
50
|
+
current = []
|
51
|
+
else
|
52
|
+
current = words_in_db.similarities
|
53
|
+
end
|
54
|
+
|
55
|
+
current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR}
|
56
|
+
|
57
|
+
list[word] = current.uniq
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write to Database
|
61
|
+
list.each do |word, sims|
|
62
|
+
sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
|
63
|
+
|
64
|
+
self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Add one simgle word to database and check if there are already similars
|
69
|
+
def self.add_word(word)
|
70
|
+
words = [word]
|
71
|
+
phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
|
72
|
+
words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
|
73
|
+
|
74
|
+
self.create_from_text(words.join(" "))
|
75
|
+
end
|
76
|
+
|
77
|
+
# Load an entire file to the index.
|
78
|
+
# Best used for loading big dictionary files.
|
79
|
+
# Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
|
80
|
+
def self.load_file(path)
|
81
|
+
count = %x{wc -l #{path}}.split[0].to_i
|
82
|
+
puts "loading file: #{path}"
|
83
|
+
puts "=> #{count} rows"
|
84
|
+
File.open(path, "r").to_a.seperate([8,count].min).each_with_index do |stack, si|
|
85
|
+
Spawnling.new(:argv => "sim-file-#{si}") do
|
86
|
+
QueryLog.info "sim-file-#{si}"
|
87
|
+
stack.each_with_index do |l,i|
|
88
|
+
QueryLog.info "#{si}: #{i.fdiv(count).round(4)} %"
|
89
|
+
self.add_word(l)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Load words from website and save them to index
|
96
|
+
def self.load_url(url)
|
97
|
+
self.create_from_text(%x(curl #{url}))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Loads your created query history and saves them to the index
|
101
|
+
def self.load_from_query_history
|
102
|
+
queries = self.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}.join(" ")
|
103
|
+
self.create_from_text(queries)
|
104
|
+
self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
|
105
|
+
end
|
106
|
+
|
107
|
+
# Get array of similar words including orig word
|
108
|
+
def self.similars(word, options = {})
|
109
|
+
list = self.where(:phrase => word).first
|
110
|
+
if list.nil?
|
111
|
+
return [word]
|
112
|
+
else
|
113
|
+
self.increment_counter(:count, list.id)
|
114
|
+
return [word, list.similarities].flatten
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Return match score for two words bases und the two defined similarity methods
|
119
|
+
def self.match_words(word1, word2)
|
120
|
+
x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
|
121
|
+
x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
|
122
|
+
return (x1+x2)/2.0
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
namespace :smart_search do
|
2
|
+
desc "Load similarity data from query history"
|
3
|
+
task :similarity_from_query_history => :environment do
|
4
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
5
|
+
SmartSimilarity.load_from_query_history
|
6
|
+
end
|
7
|
+
|
8
|
+
desc "Load similarity data from file - Use FILE=path/to/file to specify file"
|
9
|
+
task :similarity_from_file => :environment do
|
10
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
11
|
+
if ENV['FILE_PATH'].nil?
|
12
|
+
raise ArgumentError, "No file specified. "
|
13
|
+
elsif !File.exist?(ENV['FILE_PATH'])
|
14
|
+
raise ArgumentError, "File not found "
|
15
|
+
else
|
16
|
+
SmartSimilarity.load_file(ENV['FILE_PATH'])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
|
21
|
+
task :similarity_from_url => :environment do
|
22
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
23
|
+
if ENV['URL'].nil?
|
24
|
+
raise ArgumentError, "No URL specified. "
|
25
|
+
else
|
26
|
+
SmartSimilarity.load_url(ENV['URL'])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smart_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.61
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Eck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
11
|
+
date: 2014-02-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rails
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.2.9
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.2.9
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: spawnling
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: friendly_extensions
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.0.61
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.0.61
|
13
69
|
description: Adds easy to use full-text search to ActiveRecord models, based the attributes
|
14
70
|
you want to search.
|
15
71
|
email: it-support@friends-systems.de
|
@@ -20,6 +76,10 @@ files:
|
|
20
76
|
- config/routes.rb
|
21
77
|
- lib/add_search_tags.rb
|
22
78
|
- lib/smart_search.rb
|
79
|
+
- lib/smart_search/smart_search_engine.rb
|
80
|
+
- lib/smart_search_history.rb
|
81
|
+
- lib/smart_similarity.rb
|
82
|
+
- lib/tasks/smart_search.rake
|
23
83
|
- test/test_helper.rb
|
24
84
|
- test/unit/smart_search_test.rb
|
25
85
|
homepage: https://rubygems.org/gems/smart_search
|