drtom-textacular 4.0.0.alpha.20160302

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +199 -0
  3. data/Gemfile +3 -0
  4. data/README.md +196 -0
  5. data/Rakefile +61 -0
  6. data/lib/textacular.rb +160 -0
  7. data/lib/textacular/full_text_indexer.rb +66 -0
  8. data/lib/textacular/migration_generator.rb +31 -0
  9. data/lib/textacular/postgres_module_installer.rb +57 -0
  10. data/lib/textacular/rails.rb +14 -0
  11. data/lib/textacular/searchable.rb +20 -0
  12. data/lib/textacular/tasks.rb +23 -0
  13. data/lib/textacular/trigram_installer.rb +18 -0
  14. data/lib/textacular/version.rb +7 -0
  15. data/spec/config.travis.yml +8 -0
  16. data/spec/config.yml.example +5 -0
  17. data/spec/spec_helper.rb +104 -0
  18. data/spec/support/ar_stand_in.rb +4 -0
  19. data/spec/support/character.rb +7 -0
  20. data/spec/support/game.rb +5 -0
  21. data/spec/support/game_extended_with_textacular.rb +5 -0
  22. data/spec/support/game_extended_with_textacular_and_custom_language.rb +7 -0
  23. data/spec/support/game_fail.rb +3 -0
  24. data/spec/support/game_fail_extended_with_textacular.rb +5 -0
  25. data/spec/support/not_there.rb +3 -0
  26. data/spec/support/textacular_web_comic.rb +7 -0
  27. data/spec/support/web_comic.rb +7 -0
  28. data/spec/support/web_comic_with_searchable.rb +6 -0
  29. data/spec/support/web_comic_with_searchable_name.rb +6 -0
  30. data/spec/support/web_comic_with_searchable_name_and_author.rb +6 -0
  31. data/spec/textacular/full_text_indexer_spec.rb +69 -0
  32. data/spec/textacular/migration_generator_spec.rb +67 -0
  33. data/spec/textacular/searchable_spec.rb +194 -0
  34. data/spec/textacular/trigram_installer_spec.rb +24 -0
  35. data/spec/textacular_spec.rb +287 -0
  36. metadata +210 -0
@@ -0,0 +1,160 @@
1
+ require 'active_record'
2
+
3
+ require 'textacular/version'
4
+
5
+ module Textacular
6
+ autoload :FullTextIndexer, 'textacular/full_text_indexer'
7
+ autoload :PostgresModuleInstaller, 'textacular/postgres_module_installer'
8
+ autoload :TrigramInstaller, 'textacular/trigram_installer'
9
+ autoload :MigrationGenerator, 'textacular/migration_generator'
10
+
11
+ def self.searchable_language
12
+ 'english'
13
+ end
14
+
15
+ def search(query = "", exclusive = true)
16
+ basic_search(query, exclusive)
17
+ end
18
+
19
+ def basic_search(query = "", exclusive = true)
20
+ exclusive, query = munge_exclusive_and_query(exclusive, query)
21
+ parsed_query_hash = parse_query_hash(query)
22
+ similarities, conditions = basic_similarities_and_conditions(parsed_query_hash)
23
+ assemble_query(similarities, conditions, exclusive)
24
+ end
25
+
26
+ def advanced_search(query = "", exclusive = true)
27
+ exclusive, query = munge_exclusive_and_query(exclusive, query)
28
+ parsed_query_hash = parse_query_hash(query)
29
+ similarities, conditions = advanced_similarities_and_conditions(parsed_query_hash)
30
+ assemble_query(similarities, conditions, exclusive)
31
+ end
32
+
33
+ def fuzzy_search(query = '', exclusive = true)
34
+ exclusive, query = munge_exclusive_and_query(exclusive, query)
35
+ parsed_query_hash = parse_query_hash(query)
36
+ similarities, conditions = fuzzy_similarities_and_conditions(parsed_query_hash)
37
+ assemble_query(similarities, conditions, exclusive)
38
+ end
39
+
40
+ private
41
+
42
+ def munge_exclusive_and_query(exclusive, query)
43
+ unless query.is_a?(Hash)
44
+ exclusive = false
45
+ query = searchable_columns.inject({}) do |terms, column|
46
+ terms.merge column => query.to_s
47
+ end
48
+ end
49
+
50
+ [exclusive, query]
51
+ end
52
+
53
+ def parse_query_hash(query, table_name = quoted_table_name)
54
+ table_name = connection.quote_table_name(table_name)
55
+
56
+ results = []
57
+
58
+ query.each do |column_or_table, search_term|
59
+ if search_term.is_a?(Hash)
60
+ results += parse_query_hash(search_term, column_or_table)
61
+ else
62
+ column = connection.quote_column_name(column_or_table)
63
+ search_term = connection.quote Helper.normalize(search_term)
64
+
65
+ results << [table_name, column, search_term]
66
+ end
67
+ end
68
+
69
+ results
70
+ end
71
+
72
+ def basic_similarities_and_conditions(parsed_query_hash)
73
+ parsed_query_hash.inject([[], []]) do |(similarities, conditions), query_args|
74
+ similarities << basic_similarity_string(*query_args)
75
+ conditions << basic_condition_string(*query_args)
76
+
77
+ [similarities, conditions]
78
+ end
79
+ end
80
+
81
+ def basic_similarity_string(table_name, column, search_term)
82
+ "COALESCE(ts_rank(to_tsvector(#{quoted_language}, #{table_name}.#{column}::text), plainto_tsquery(#{quoted_language}, #{search_term}::text)), 0)"
83
+ end
84
+
85
+ def basic_condition_string(table_name, column, search_term)
86
+ "to_tsvector(#{quoted_language}, #{table_name}.#{column}::text) @@ plainto_tsquery(#{quoted_language}, #{search_term}::text)"
87
+ end
88
+
89
+ def advanced_similarities_and_conditions(parsed_query_hash)
90
+ parsed_query_hash.inject([[], []]) do |(similarities, conditions), query_args|
91
+ similarities << advanced_similarity_string(*query_args)
92
+ conditions << advanced_condition_string(*query_args)
93
+
94
+ [similarities, conditions]
95
+ end
96
+ end
97
+
98
+ def advanced_similarity_string(table_name, column, search_term)
99
+ "COALESCE(ts_rank(to_tsvector(#{quoted_language}, #{table_name}.#{column}::text), to_tsquery(#{quoted_language}, #{search_term}::text)), 0)"
100
+ end
101
+
102
+ def advanced_condition_string(table_name, column, search_term)
103
+ "to_tsvector(#{quoted_language}, #{table_name}.#{column}::text) @@ to_tsquery(#{quoted_language}, #{search_term}::text)"
104
+ end
105
+
106
+ def fuzzy_similarities_and_conditions(parsed_query_hash)
107
+ parsed_query_hash.inject([[], []]) do |(similarities, conditions), query_args|
108
+ similarities << fuzzy_similarity_string(*query_args)
109
+ conditions << fuzzy_condition_string(*query_args)
110
+
111
+ [similarities, conditions]
112
+ end
113
+ end
114
+
115
+ def fuzzy_similarity_string(table_name, column, search_term)
116
+ "COALESCE(similarity(#{table_name}.#{column}, #{search_term}), 0)"
117
+ end
118
+
119
+ def fuzzy_condition_string(table_name, column, search_term)
120
+ "(#{table_name}.#{column} % #{search_term})"
121
+ end
122
+
123
+ def assemble_query(similarities, conditions, exclusive)
124
+ rank = connection.quote_column_name('rank' + rand(100000000000000000).to_s)
125
+
126
+ select("#{quoted_table_name + '.*,' if select_values.empty?} #{similarities.join(" + ")} AS #{rank}").
127
+ where(conditions.join(exclusive ? " AND " : " OR ")).
128
+ order("#{rank} DESC")
129
+ end
130
+
131
+ def select_values
132
+ if ActiveRecord::VERSION::MAJOR >= 4
133
+ all.select_values
134
+ else
135
+ scoped.select_values
136
+ end
137
+ end
138
+
139
+ def searchable_columns
140
+ columns.select {|column| [:string, :text].include? column.type }.map(&:name)
141
+ end
142
+
143
+ def quoted_language
144
+ @quoted_language ||= connection.quote(searchable_language)
145
+ end
146
+
147
+ def searchable_language
148
+ Textacular.searchable_language
149
+ end
150
+
151
+ module Helper
152
+ class << self
153
+ def normalize(query)
154
+ query.to_s.gsub(/\s(?![\&|\!|\|])/, '\\\\ ')
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ require 'textacular/rails' if defined?(::Rails)
@@ -0,0 +1,66 @@
1
+ class Textacular::FullTextIndexer
2
+ def generate_migration(model_name)
3
+ content = <<-MIGRATION
4
+ class #{model_name}FullTextSearch < ActiveRecord::Migration
5
+ def self.up
6
+ execute(<<-SQL.strip)
7
+ #{up_migration(model_name)}
8
+ SQL
9
+ end
10
+
11
+ def self.down
12
+ execute(<<-SQL.strip)
13
+ #{down_migration(model_name)}
14
+ SQL
15
+ end
16
+ end
17
+ MIGRATION
18
+ filename = "#{model_name.underscore}_full_text_search"
19
+ generator = Textacular::MigrationGenerator.new(content, filename)
20
+ generator.generate_migration
21
+ end
22
+
23
+ private
24
+
25
+ def up_migration(model_name)
26
+ migration_with_type(model_name, :up)
27
+ end
28
+
29
+ def down_migration(model_name)
30
+ migration_with_type(model_name, :down)
31
+ end
32
+
33
+ def migration_with_type(model_name, type)
34
+ sql_lines = ''
35
+
36
+ model = Kernel.const_get(model_name)
37
+ model.indexable_columns.each do |column|
38
+ sql_lines << drop_index_sql_for(model, column)
39
+ sql_lines << create_index_sql_for(model, column) if type == :up
40
+ end
41
+
42
+ sql_lines.strip.gsub("\n","\n ")
43
+ end
44
+
45
+ def drop_index_sql_for(model, column)
46
+ "DROP index IF EXISTS #{index_name_for(model, column)};\n"
47
+ end
48
+
49
+ def create_index_sql_for(model, column)
50
+ # The spacing gets sort of wonky in here.
51
+
52
+ <<-SQL
53
+ CREATE index #{index_name_for(model, column)}
54
+ ON #{model.table_name}
55
+ USING gin(to_tsvector("#{dictionary}", "#{model.table_name}"."#{column}"::text));
56
+ SQL
57
+ end
58
+
59
+ def index_name_for(model, column)
60
+ "#{model.table_name}_#{column}_fts_idx"
61
+ end
62
+
63
+ def dictionary
64
+ Textacular.searchable_language
65
+ end
66
+ end
@@ -0,0 +1,31 @@
1
+ require 'fileutils'
2
+
3
+ class Textacular::MigrationGenerator
4
+ def initialize(filename, content)
5
+ @filename = filename
6
+ @content = content
7
+ end
8
+
9
+ def generate_migration
10
+ stream_output do |io|
11
+ io.puts(@content)
12
+ end
13
+ end
14
+
15
+ def stream_output(now = Time.now.utc, &block)
16
+ if !@output_stream && defined?(Rails)
17
+ FileUtils.mkdir_p(File.dirname(migration_file_name(now)))
18
+ File.open(migration_file_name(now), 'w', &block)
19
+ else
20
+ @output_stream ||= $stdout
21
+
22
+ yield @output_stream
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def migration_file_name(now = Time.now.utc)
29
+ File.join(Rails.root, 'db', 'migrate',"#{now.strftime('%Y%m%d%H%M%S')}_#{@filename}.rb")
30
+ end
31
+ end
@@ -0,0 +1,57 @@
1
+ module Textacular
2
+ class PostgresModuleInstaller
3
+ def install_module(module_name)
4
+ major, minor, patch = postgres_version.split('.')
5
+
6
+ if major.to_i >= 9 && minor.to_i >= 1
7
+ install_postgres_91_module(module_name)
8
+ else
9
+ install_postgres_90_module(module_name)
10
+ end
11
+ end
12
+
13
+ def db_name
14
+ @db_name ||= ActiveRecord::Base.connection.current_database
15
+ end
16
+
17
+ private
18
+
19
+ def postgres_version
20
+ @postgres_version ||= ask_pg_config('version').match(/PostgreSQL ([0-9]+(\.[0-9]+)*)/)[1]
21
+ end
22
+
23
+ def postgres_share_dir
24
+ @share_dir ||= ask_pg_config('sharedir')
25
+ end
26
+
27
+ def ask_pg_config(argument)
28
+ result = `pg_config --#{argument}`.chomp
29
+
30
+ raise RuntimeError, "Cannot find Postgres's #{argument}." unless $?.success?
31
+
32
+ result
33
+ end
34
+
35
+ def install_postgres_90_module(module_name)
36
+ module_location = "#{postgres_share_dir}/contrib/#{module_name}.sql"
37
+
38
+ unless system("ls #{module_location}")
39
+ raise RuntimeError, "Cannot find the #{module_name} module. Was it compiled and installed?"
40
+ end
41
+
42
+ unless system("psql -d #{db_name} -f #{module_location}")
43
+ raise RuntimeError, "`psql -d #{db_name} -f #{module_location}` cannot complete successfully."
44
+ end
45
+ end
46
+
47
+ def install_postgres_91_module(module_name)
48
+ module_location = "#{postgres_share_dir}/extension/#{module_name}.control"
49
+
50
+ unless system("ls #{module_location}")
51
+ raise RuntimeError, "Cannot find the #{module_name} module. Was it compiled and installed?"
52
+ end
53
+
54
+ ActiveRecord::Base.connection.execute("CREATE EXTENSION #{module_name};")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,14 @@
1
+ # Module used to conform to Rails 3 plugin API
2
+ require File.expand_path(File.dirname(__FILE__) + '/../textacular')
3
+
4
+ module Textacular
5
+ class Railtie < Rails::Railtie
6
+ initializer "textacular.configure_rails_initialization" do
7
+ ActiveRecord::Base.extend(Textacular)
8
+ end
9
+
10
+ rake_tasks do
11
+ load 'textacular/tasks.rb'
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ require 'textacular'
2
+
3
+ def Searchable(*searchable_columns)
4
+ Module.new do
5
+
6
+ include Textacular
7
+
8
+ define_method(:searchable_columns) do
9
+ searchable_columns.map(&:to_s)
10
+ end
11
+
12
+ private :searchable_columns
13
+
14
+ def indexable_columns
15
+ searchable_columns.to_enum
16
+ end
17
+ end
18
+ end
19
+
20
+ Searchable = Textacular
@@ -0,0 +1,23 @@
1
+ require 'rake'
2
+ require 'textacular'
3
+
4
+ namespace :textacular do
5
+ desc 'Create full text search index migration, give the model for which you want to create the indexes'
6
+ task :create_index_migration, [:model_name] => :environment do |task, args|
7
+ raise 'A model name is required' unless args[:model_name]
8
+ Textacular::FullTextIndexer.new.generate_migration(args[:model_name])
9
+ end
10
+
11
+ desc "Install trigram text search module"
12
+ task :install_trigram => [:environment] do
13
+ installer = Textacular::PostgresModuleInstaller.new
14
+ installer.install_module('pg_trgm')
15
+
16
+ puts "Trigram text search module successfully installed into '#{installer.db_name}' database."
17
+ end
18
+
19
+ desc "Create trigram installer migration"
20
+ task :create_trigram_migration => [:environment] do
21
+ installer = Textacular::TrigramInstaller.new.generate_migration
22
+ end
23
+ end
@@ -0,0 +1,18 @@
1
+ class Textacular::TrigramInstaller
2
+ def generate_migration
3
+ content = <<-MIGRATION
4
+ class InstallTrigram < ActiveRecord::Migration
5
+ def self.up
6
+ ActiveRecord::Base.connection.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
7
+ end
8
+
9
+ def self.down
10
+ ActiveRecord::Base.connection.execute("DROP EXTENSION pg_trgm;")
11
+ end
12
+ end
13
+ MIGRATION
14
+ filename = "install_trigram"
15
+ generator = Textacular::MigrationGenerator.new(filename, content)
16
+ generator.generate_migration
17
+ end
18
+ end
@@ -0,0 +1,7 @@
1
+ module Textacular
2
+ VERSION = '4.0.0.alpha.20160302'
3
+
4
+ def self.version
5
+ VERSION
6
+ end
7
+ end
@@ -0,0 +1,8 @@
1
+ pool: 5
2
+ timeout: 5000
3
+ host: localhost
4
+ adapter: postgresql
5
+ username: postgres
6
+ password:
7
+ database: textacular_test
8
+ min_messages: ERROR
@@ -0,0 +1,5 @@
1
+ database: textacular
2
+ username: <%= ENV[ 'USER' ] %>
3
+ pool: 5
4
+ timeout: 5000
5
+ adapter: postgresql
@@ -0,0 +1,104 @@
1
+ require 'pry'
2
+ require 'textacular'
3
+ require 'database_cleaner'
4
+ require 'yaml'
5
+
6
+ config = YAML.load_file File.expand_path(File.dirname(__FILE__) + '/config.yml')
7
+ ActiveRecord::Base.establish_connection config.merge(:adapter => :postgresql)
8
+
9
+ # This file was generated by the `rspec --init` command. Conventionally, all
10
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
11
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
12
+ # file to always be loaded, without a need to explicitly require it in any files.
13
+ #
14
+ # Given that it is always loaded, you are encouraged to keep this file as
15
+ # light-weight as possible. Requiring heavyweight dependencies from this file
16
+ # will add to the boot time of your test suite on EVERY test run, even for an
17
+ # individual file that may not need all of that loaded. Instead, consider making
18
+ # a separate helper file that requires the additional dependencies and performs
19
+ # the additional setup, and require it from the spec files that actually need it.
20
+ #
21
+ # The `.rspec` file also contains a few flags that are not defaults but that
22
+ # users commonly want.
23
+ #
24
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
25
+ RSpec.configure do |config|
26
+ # rspec-expectations config goes here. You can use an alternate
27
+ # assertion/expectation library such as wrong or the stdlib/minitest
28
+ # assertions if you prefer.
29
+ config.expect_with :rspec do |expectations|
30
+ # This option will default to `true` in RSpec 4. It makes the `description`
31
+ # and `failure_message` of custom matchers include text for helper methods
32
+ # defined using `chain`, e.g.:
33
+ # be_bigger_than(2).and_smaller_than(4).description
34
+ # # => "be bigger than 2 and smaller than 4"
35
+ # ...rather than:
36
+ # # => "be bigger than 2"
37
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
38
+ end
39
+
40
+ # rspec-mocks config goes here. You can use an alternate test double
41
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
42
+ config.mock_with :rspec do |mocks|
43
+ # Prevents you from mocking or stubbing a method that does not exist on
44
+ # a real object. This is generally recommended, and will default to
45
+ # `true` in RSpec 4.
46
+ mocks.verify_partial_doubles = true
47
+ end
48
+
49
+ # These two settings work together to allow you to limit a spec run
50
+ # to individual examples or groups you care about by tagging them with
51
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
52
+ # get run.
53
+ config.filter_run :focus
54
+ config.run_all_when_everything_filtered = true
55
+
56
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
57
+ # For more details, see:
58
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
59
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
60
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
61
+ config.disable_monkey_patching!
62
+
63
+ # This setting enables warnings. It's recommended, but in some cases may
64
+ # be too noisy due to issues in dependencies.
65
+ config.warnings = true
66
+
67
+ # Many RSpec users commonly either run the entire suite or an individual
68
+ # file, and it's useful to allow more verbose output when running an
69
+ # individual spec file.
70
+ # if config.files_to_run.one?
71
+ # Use the documentation formatter for detailed output,
72
+ # unless a formatter has already been configured
73
+ # (e.g. via a command-line flag).
74
+ # config.default_formatter = 'doc'
75
+ # end
76
+
77
+ # Print the 10 slowest examples and example groups at the
78
+ # end of the spec run, to help surface which specs are running
79
+ # particularly slow.
80
+ # config.profile_examples = 10
81
+
82
+ # Run specs in random order to surface order dependencies. If you find an
83
+ # order dependency and want to debug it, you can fix the order by providing
84
+ # the seed, which is printed after each run.
85
+ # --seed 1234
86
+ config.order = :random
87
+
88
+ # Seed global randomization in this process using the `--seed` CLI option.
89
+ # Setting this allows you to use `--seed` to deterministically reproduce
90
+ # test failures related to randomization by passing the same `--seed` value
91
+ # as the one that triggered the failure.
92
+ Kernel.srand config.seed
93
+
94
+ config.before(:suite) do
95
+ DatabaseCleaner.strategy = :transaction
96
+ DatabaseCleaner.clean_with(:truncation)
97
+ end
98
+
99
+ config.around(:each) do |example|
100
+ DatabaseCleaner.cleaning do
101
+ example.run
102
+ end
103
+ end
104
+ end