acts_as_tokenizable 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README +0 -0
- data/README.rdoc +17 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/acts_as_tokenizable.gemspec +58 -0
- data/init.rb +1 -0
- data/lib/acts_as_tokenizable/acts_as_tokenizable.rb +42 -0
- data/lib/acts_as_tokenizable/string_extensions.rb +66 -0
- data/lib/acts_as_tokenizable.rb +9 -0
- data/lib/tasks/tokenizer.rake +57 -0
- data/test/helper.rb +10 -0
- data/test/test_acts_as_tokenizable.rb +7 -0
- metadata +80 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Splendeo
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
File without changes
|
data/README.rdoc
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= acts_as_tokenizable
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
13
|
+
* Send me a pull request. Bonus points for topic branches.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2010 Splendeo. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "acts_as_tokenizable"
|
8
|
+
gem.summary = "Acts as tokenizable"
|
9
|
+
gem.description = "Make ActiveRecord models easily searchable via tokens."
|
10
|
+
gem.email = "github@splendeo.es"
|
11
|
+
gem.homepage = "http://github.com/splendeo/acts_as_tokenizable"
|
12
|
+
gem.authors = ["Enrique Garcia Cota", "Francisco de Juan"]
|
13
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "acts_as_tokenizable #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{acts_as_tokenizable}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Enrique Garcia Cota", "Francisco de Juan"]
|
12
|
+
s.date = %q{2010-01-12}
|
13
|
+
s.description = %q{Make ActiveRecord models easily searchable via tokens.}
|
14
|
+
s.email = %q{github@splendeo.es}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README",
|
18
|
+
"README.rdoc"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
"LICENSE",
|
22
|
+
"README",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"acts_as_tokenizable.gemspec",
|
27
|
+
"init.rb",
|
28
|
+
"lib/acts_as_tokenizable.rb",
|
29
|
+
"lib/acts_as_tokenizable/acts_as_tokenizable.rb",
|
30
|
+
"lib/acts_as_tokenizable/string_extensions.rb",
|
31
|
+
"lib/tasks/tokenizer.rake",
|
32
|
+
"test/helper.rb",
|
33
|
+
"test/test_acts_as_tokenizable.rb"
|
34
|
+
]
|
35
|
+
s.homepage = %q{http://github.com/splendeo/acts_as_tokenizable}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.5}
|
39
|
+
s.summary = %q{Acts as tokenizable}
|
40
|
+
s.test_files = [
|
41
|
+
"test/helper.rb",
|
42
|
+
"test/test_acts_as_tokenizable.rb"
|
43
|
+
]
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
47
|
+
s.specification_version = 3
|
48
|
+
|
49
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
53
|
+
end
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'acts_as_tokenizable'
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ActsAsTokenizable
|
2
|
+
require 'acts_as_tokenizable/string_extensions'
|
3
|
+
|
4
|
+
# default to_token method. needs to have a "name" property on the object.
|
5
|
+
# override for more complex token generation
|
6
|
+
def to_token
|
7
|
+
raise NoMethodError.new("You must redefine to_token in your model. Example: self.name.to_token()")
|
8
|
+
end
|
9
|
+
|
10
|
+
#makes self.token=self.to_token, in a convoluted way
|
11
|
+
def tokenize
|
12
|
+
self.send("#{self.class.token_field_name}=", self.to_token)
|
13
|
+
end
|
14
|
+
|
15
|
+
module ClassMethods
|
16
|
+
attr_accessor :token_field_name
|
17
|
+
|
18
|
+
# search_token parameter is used by tokenized_by. This function allows for preparation
|
19
|
+
# before tokenized_by function is invoked. Usually this means removing
|
20
|
+
# stop words, replacing words.
|
21
|
+
# By default it tokenizes each word and removes duplicates.
|
22
|
+
def prepare_search_token(search_token)
|
23
|
+
search_token.words_to_token
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.included(base)
|
28
|
+
base.class_eval do
|
29
|
+
extend ClassMethods
|
30
|
+
|
31
|
+
named_scope :tokenized_by, lambda {|search_token|
|
32
|
+
search_strings = []
|
33
|
+
search_values = []
|
34
|
+
prepare_search_token(search_token).words.each do |w|
|
35
|
+
search_strings.push("#{token_field_name} LIKE ?")
|
36
|
+
search_values.push("%#{w}%")
|
37
|
+
end
|
38
|
+
{:conditions => [search_strings.join(' AND '), *search_values]}
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
String.class_eval do
|
2
|
+
|
3
|
+
#converts accented letters into ascii equivalents (i.e. ñ becomes n)
|
4
|
+
def normalize
|
5
|
+
#this version is in the forums but didnt work for me
|
6
|
+
#string = string.chars.normalize(:kd).gsub!(/[^\x00-\x7F]/n,'').to_s
|
7
|
+
mb_chars.normalize(:d).gsub(/[^\x00-\x7F]/n,'').to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
#returns true if numeric, false, otherwise
|
11
|
+
def numeric?
|
12
|
+
true if Float(self) rescue
|
13
|
+
return false
|
14
|
+
end
|
15
|
+
|
16
|
+
#returns an array of strings containing the words on this string. removes spaces, strange chars, etc
|
17
|
+
def words
|
18
|
+
gsub(/\W/, ' ').split
|
19
|
+
end
|
20
|
+
|
21
|
+
#removes certain words from a string.
|
22
|
+
# As a side-effect, all word-separators are converted to the separator char
|
23
|
+
def remove_words(words_array, separator = ' ')
|
24
|
+
(words - words_array).join separator
|
25
|
+
end
|
26
|
+
|
27
|
+
# replaces certain words on a string.
|
28
|
+
# As a side-effect, all word-separators are converted to the separator char
|
29
|
+
def replace_words(replacements, separator = ' ')
|
30
|
+
replaced_words = words
|
31
|
+
replacements.each do |candidates,replacement|
|
32
|
+
candidates.each do |candidate|
|
33
|
+
replaced_words=replaced_words.collect {|w| w==candidate ? replacement : w}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
replaced_words.join separator
|
37
|
+
end
|
38
|
+
|
39
|
+
# returns an array that contains, in order:
|
40
|
+
# * the numeric parts, converted to numbers
|
41
|
+
# * the non-numeric parts, as text
|
42
|
+
# this is useful for sorting alphanumerically. For example:
|
43
|
+
# ["A1", "A12", "A2"].sort_by{|x| x.alphanumerics} => ["A1", "A2", "A12"]
|
44
|
+
#
|
45
|
+
# inspired by : http://blog.labnotes.org/2007/12/13/rounded-corners-173-beautiful-code/
|
46
|
+
def alphanumerics
|
47
|
+
split(/(\d+)/).map { |v| v =~ /\d/ ? v.to_i : v }
|
48
|
+
end
|
49
|
+
|
50
|
+
#convert into something that can be used as an indexation key
|
51
|
+
def to_token(max_length=255)
|
52
|
+
string = self.normalize.strip.downcase.gsub(/\W/, '') #remove all non-alphanumeric
|
53
|
+
string = string.squeeze unless string.numeric? #remove duplicates, except on pure numbers
|
54
|
+
return string[0..(max_length-1)]
|
55
|
+
end
|
56
|
+
|
57
|
+
#convert into something that can be used on links
|
58
|
+
def to_slug(separator='-')
|
59
|
+
self.strip.downcase.normalize.words.join(separator)
|
60
|
+
end
|
61
|
+
|
62
|
+
#tokenizes each word individually, and joins the word with the separator char.
|
63
|
+
def words_to_token(max_length=255, separator = ' ')
|
64
|
+
words.collect{|w| w.to_token}.uniq.join(separator)[0..(max_length-1)]
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'config/environment'
|
2
|
+
|
3
|
+
def array_of_active_record_models
|
4
|
+
Dir.glob(RAILS_ROOT + '/app/models/*.rb').each { |file| require file }
|
5
|
+
|
6
|
+
models_with_token = ActiveRecord::Base.send(:subclasses).select{|m| m.respond_to?(:tokenized_by)}
|
7
|
+
end
|
8
|
+
|
9
|
+
def tokenize_records(records)
|
10
|
+
total_count = records.size
|
11
|
+
|
12
|
+
count = 0
|
13
|
+
|
14
|
+
records.each do |record|
|
15
|
+
record.tokenize #this generates tokens
|
16
|
+
record.save false #this saves without checking validations
|
17
|
+
count += 1
|
18
|
+
print "\r#{count}/#{total_count}"
|
19
|
+
GC.start if count % 1000 == 0 #launch garbage collection each 1000 registers
|
20
|
+
end
|
21
|
+
puts ""
|
22
|
+
end
|
23
|
+
|
24
|
+
def tokenize_models(regenerate = false)
|
25
|
+
start = Time.now
|
26
|
+
puts "Start token generation"
|
27
|
+
puts "++++++++++++++++++++++++++++++++"
|
28
|
+
|
29
|
+
array_of_active_record_models.each do |model|
|
30
|
+
puts "Generating new tokens for #{model.name.pluralize}"
|
31
|
+
|
32
|
+
conditions = "#{model.token_field_name} IS NULL OR #{model.token_field_name} = ''" unless regenerate
|
33
|
+
|
34
|
+
records_without_token = model.all(:conditions => conditions)
|
35
|
+
if records_without_token.size > 0
|
36
|
+
tokenize_records(records_without_token)
|
37
|
+
else
|
38
|
+
puts "There are no records without token"
|
39
|
+
puts "++++++++++++++++++++++++++++++++"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
puts "Elapsed time " + (Time.now - start).seconds.to_s + " seconds"
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
namespace :tokens do
|
47
|
+
desc "Generates the token for objects without tokens."
|
48
|
+
task :generate => :environment do
|
49
|
+
tokenize_models
|
50
|
+
end
|
51
|
+
|
52
|
+
desc "Re-builds the token for all objects."
|
53
|
+
task :regenerate => :environment do
|
54
|
+
tokenize_models(true)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: acts_as_tokenizable
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Enrique Garcia Cota
|
8
|
+
- Francisco de Juan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2010-01-12 00:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: thoughtbot-shoulda
|
18
|
+
type: :development
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
description: Make ActiveRecord models easily searchable via tokens.
|
27
|
+
email: github@splendeo.es
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- LICENSE
|
34
|
+
- README
|
35
|
+
- README.rdoc
|
36
|
+
files:
|
37
|
+
- LICENSE
|
38
|
+
- README
|
39
|
+
- README.rdoc
|
40
|
+
- Rakefile
|
41
|
+
- VERSION
|
42
|
+
- acts_as_tokenizable.gemspec
|
43
|
+
- init.rb
|
44
|
+
- lib/acts_as_tokenizable.rb
|
45
|
+
- lib/acts_as_tokenizable/acts_as_tokenizable.rb
|
46
|
+
- lib/acts_as_tokenizable/string_extensions.rb
|
47
|
+
- lib/tasks/tokenizer.rake
|
48
|
+
- test/helper.rb
|
49
|
+
- test/test_acts_as_tokenizable.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/splendeo/acts_as_tokenizable
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options:
|
56
|
+
- --charset=UTF-8
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "0"
|
64
|
+
version:
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 1.3.5
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: Acts as tokenizable
|
78
|
+
test_files:
|
79
|
+
- test/helper.rb
|
80
|
+
- test/test_acts_as_tokenizable.rb
|