hayfork 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/.travis.yml +16 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +200 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/hayfork.gemspec +33 -0
- data/lib/generators/hayfork/haystack_generator.rb +45 -0
- data/lib/generators/hayfork/rebuild_generator.rb +49 -0
- data/lib/generators/hayfork/templates/migrations/create.rb +51 -0
- data/lib/generators/hayfork/templates/migrations/rebuild.rb +8 -0
- data/lib/generators/hayfork/templates/model.rb +14 -0
- data/lib/generators/hayfork/templates/query.rb +10 -0
- data/lib/generators/hayfork/templates/query/exact_phrase.rb +19 -0
- data/lib/generators/hayfork/templates/query/object.rb +4 -0
- data/lib/generators/hayfork/templates/query/parser.rb +33 -0
- data/lib/generators/hayfork/templates/triggers.rb +21 -0
- data/lib/hayfork.rb +35 -0
- data/lib/hayfork/binding.rb +67 -0
- data/lib/hayfork/delete_sql.rb +24 -0
- data/lib/hayfork/errors.rb +4 -0
- data/lib/hayfork/insert_sql.rb +22 -0
- data/lib/hayfork/join.rb +32 -0
- data/lib/hayfork/query_object.rb +38 -0
- data/lib/hayfork/query_parser.rb +35 -0
- data/lib/hayfork/statement.rb +132 -0
- data/lib/hayfork/statement_builder.rb +78 -0
- data/lib/hayfork/trigger_builder.rb +17 -0
- data/lib/hayfork/triggers.rb +76 -0
- data/lib/hayfork/unaccent.rb +12 -0
- data/lib/hayfork/update_sql.rb +112 -0
- data/lib/hayfork/version.rb +3 -0
- data/postgres/unaccent.rules +1049 -0
- metadata +227 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
class Create<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
|
2
|
+
def change
|
3
|
+
enable_extension :unaccent
|
4
|
+
|
5
|
+
# Full-Text Search in Postgres can be configured extensively.
|
6
|
+
#
|
7
|
+
# Custom Parsers are able to match entities in documents and handle them
|
8
|
+
# differently. Custom Dictionaries can be used to tune search for different
|
9
|
+
# languages and to ignore certain words.
|
10
|
+
#
|
11
|
+
# Learn more at https://www.postgresql.org/docs/10/textsearch.html
|
12
|
+
#
|
13
|
+
# This block creates a TEXT SEARCH CONFIGURATION specifically for use
|
14
|
+
# by Hayfork that uses an English dictionary and normalizes characters
|
15
|
+
# by stripping accent marks and converting special characters (like smart
|
16
|
+
# quotes) to their easily-typed ASCII counterparts (like straight quotes).
|
17
|
+
#
|
18
|
+
execute <<~SQL
|
19
|
+
CREATE TEXT SEARCH CONFIGURATION public.hayfork ( COPY = pg_catalog.english );
|
20
|
+
ALTER TEXT SEARCH CONFIGURATION public.hayfork ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, hword, hword_part, word WITH unaccent, english_stem;
|
21
|
+
SQL
|
22
|
+
|
23
|
+
create_table :<%= table_name %>, id: false do |t|
|
24
|
+
t.string :<%= Hayfork::SEARCH_RESULT_TYPE %>, null: false
|
25
|
+
t.integer :<%= Hayfork::SEARCH_RESULT_ID %>, null: false
|
26
|
+
|
27
|
+
t.string :<%= Hayfork::SOURCE_TYPE %>, null: false
|
28
|
+
t.integer :<%= Hayfork::SOURCE_ID %>, null: false
|
29
|
+
t.string :<%= Hayfork::FIELD %>, null: false
|
30
|
+
|
31
|
+
t.tsvector :<%= Hayfork::SEARCH_VECTOR %>
|
32
|
+
t.text :<%= Hayfork::TEXT %>, null: false
|
33
|
+
|
34
|
+
# Add additional columns to <%= table_name %> here.
|
35
|
+
#
|
36
|
+
# For example, to allow users to search only documents of their
|
37
|
+
# own creation:
|
38
|
+
#
|
39
|
+
# t.belongs_to :user, null: false
|
40
|
+
#
|
41
|
+
|
42
|
+
# If you add columns that will always be used in searches (like `user_id`),
|
43
|
+
# consider including them in this index. For example:
|
44
|
+
#
|
45
|
+
# enable_extension :btree_gist
|
46
|
+
# t.index [:tenant_id, :<%= Hayfork::SEARCH_VECTOR %>], using: "gist"
|
47
|
+
#
|
48
|
+
t.index :<%= Hayfork::SEARCH_VECTOR %>, using: "gist"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class <%= class_name %> < ActiveRecord::Base
|
2
|
+
<% unless table_name == class_name.tableize -%>
|
3
|
+
self.table_name = "<%= table_name %>"
|
4
|
+
<% end -%>
|
5
|
+
|
6
|
+
belongs_to :search_result, polymorphic: true
|
7
|
+
|
8
|
+
def self.search(querystring)
|
9
|
+
::Query.parse(querystring).against(all)
|
10
|
+
.select(:search_result_type, :search_result_id).distinct
|
11
|
+
.preload(:search_result).map(&:search_result)
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Query
|
2
|
+
class ExactPhrase < Struct.new(:words)
|
3
|
+
|
4
|
+
def apply(set)
|
5
|
+
set.where(Arel::Nodes::InfixOperation.new("@@",
|
6
|
+
set.arel_table[Hayfork::SEARCH_VECTOR],
|
7
|
+
to_tsquery(Hayfork.default_dictionary, words.join(" <-> "))))
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def to_tsquery(dictionary, querystring)
|
13
|
+
Arel::Nodes::NamedFunction.new("to_tsquery", [
|
14
|
+
Arel::Nodes.build_quoted(dictionary),
|
15
|
+
Arel::Nodes.build_quoted(querystring) ])
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative "exact_phrase"
|
2
|
+
|
3
|
+
module Query
|
4
|
+
class Parser < Hayfork::QueryParser
|
5
|
+
|
6
|
+
def parse_phrase(querystring, phrases)
|
7
|
+
tokenize_words(querystring).each do |word|
|
8
|
+
phrases << Query::ExactPhrase.new([ word ])
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_exact_phrase(querystring, phrases)
|
13
|
+
phrases << Query::ExactPhrase.new(tokenize_words(querystring))
|
14
|
+
end
|
15
|
+
|
16
|
+
def tokenize_words(querystring)
|
17
|
+
# Postgres does not handle hyphens well.
|
18
|
+
#
|
19
|
+
# Notice how, in the following example, the way it breaks up
|
20
|
+
# the hyphenated word throws off the index (Jesus is the fifth word
|
21
|
+
# not the third or fourth). This prevents you from constructing
|
22
|
+
# an exact-phrase query for a hyphenated word:
|
23
|
+
#
|
24
|
+
# > select to_tsvector('hayfork', 'thou long-expected jesus');
|
25
|
+
# { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# We'll coerce Postgres into treating hyphenated words as two words.
|
29
|
+
querystring.to_s.scan(/\w+/)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Hayfork.maintain(<%= class_name %>) do
|
2
|
+
|
3
|
+
# Define triggers here.
|
4
|
+
# Learn more at https://github.com/boblail/hayfork#libhaystack_triggersrb
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# Hayfork.maintain(Haystack) do
|
9
|
+
#
|
10
|
+
# foreach(Employee) do
|
11
|
+
# insert(:full_name)
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# foreach(Project) do
|
15
|
+
# insert(:title)
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
|
21
|
+
end
|
data/lib/hayfork.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "hayfork/errors"
|
2
|
+
require "hayfork/join"
|
3
|
+
require "hayfork/query_object"
|
4
|
+
require "hayfork/query_parser"
|
5
|
+
require "hayfork/trigger_builder"
|
6
|
+
require "hayfork/triggers"
|
7
|
+
require "hayfork/unaccent"
|
8
|
+
require "hayfork/version"
|
9
|
+
|
10
|
+
module Hayfork
|
11
|
+
extend Hayfork::Unaccent, Hayfork::Join
|
12
|
+
|
13
|
+
TEXT = "text".freeze
|
14
|
+
SEARCH_VECTOR = "search_vector".freeze
|
15
|
+
SEARCH_RESULT_TYPE = "search_result_type".freeze
|
16
|
+
SEARCH_RESULT_ID = "search_result_id".freeze
|
17
|
+
SOURCE_TYPE = "source_type".freeze
|
18
|
+
SOURCE_ID = "source_id".freeze
|
19
|
+
FIELD = "field".freeze
|
20
|
+
|
21
|
+
@default_weight = "C".freeze
|
22
|
+
@default_dictionary = "hayfork".freeze
|
23
|
+
|
24
|
+
class << self
|
25
|
+
attr_accessor :default_weight, :default_dictionary
|
26
|
+
|
27
|
+
def maintain(haystack, &block)
|
28
|
+
triggers = Triggers.new(haystack)
|
29
|
+
TriggerBuilder.new(triggers).instance_eval(&block)
|
30
|
+
haystack.singleton_class.send(:attr_reader, :triggers)
|
31
|
+
haystack.instance_variable_set :@triggers, triggers
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class Binding < Struct.new(:statement, :column, :raw_value)
|
3
|
+
|
4
|
+
def to_sql
|
5
|
+
"#{quoted_value} \"#{column.name}\""
|
6
|
+
end
|
7
|
+
alias to_s to_sql
|
8
|
+
|
9
|
+
def column_name
|
10
|
+
column.name
|
11
|
+
end
|
12
|
+
alias key column_name
|
13
|
+
|
14
|
+
def quoted_value
|
15
|
+
case raw_value
|
16
|
+
when Arel::Attributes::Attribute
|
17
|
+
value_column = raw_value.relation.send(:type_caster).send(:types).columns_hash[raw_value.name.to_s]
|
18
|
+
fail Hayfork::ColumnNotFoundError, "'#{raw_value.name}' is not a column on '#{raw_value.relation.name}'" unless value_column
|
19
|
+
|
20
|
+
value = "#{raw_value.relation.name}.#{raw_value.name}"
|
21
|
+
|
22
|
+
unless column.sql_type == value_column.sql_type
|
23
|
+
type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
|
24
|
+
value = "#{value}::#{type}"
|
25
|
+
end
|
26
|
+
|
27
|
+
if statement.unnest? && [Hayfork::SEARCH_VECTOR, Hayfork::TEXT].member?(column.name)
|
28
|
+
value = "unnest(string_to_array(#{value}, E'\\n'))"
|
29
|
+
end
|
30
|
+
|
31
|
+
if column.type == :tsvector
|
32
|
+
|
33
|
+
# Postgres does not handle hyphens well.
|
34
|
+
#
|
35
|
+
# Notice how, in the following example, the way it breaks up
|
36
|
+
# those words throws off the index (Jesus is the fifth word
|
37
|
+
# not the third or fourth). This prevents you from constructing
|
38
|
+
# an exact-phrase query for a hyphenated word:
|
39
|
+
#
|
40
|
+
# > select to_tsvector('hayfork', 'thou long-expected jesus');
|
41
|
+
# { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# We'll coerce Postgres into treating hyphenated words as two words.
|
45
|
+
|
46
|
+
value = "setweight(to_tsvector('#{statement.dictionary}', replace(#{value}, '-', ' ')), '#{statement.weight}')"
|
47
|
+
end
|
48
|
+
|
49
|
+
value
|
50
|
+
|
51
|
+
when Arel::Nodes::Node
|
52
|
+
raw_value.to_sql
|
53
|
+
|
54
|
+
else
|
55
|
+
type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
|
56
|
+
"#{statement.haystack.connection.quote(raw_value)}::#{type}"
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
SPECIAL_CASTS = {
|
62
|
+
"character varying" => "varchar",
|
63
|
+
"tsvector" => "varchar"
|
64
|
+
}.freeze
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class DeleteSql
|
3
|
+
attr_reader :haystack, :relation, :bindings
|
4
|
+
|
5
|
+
def initialize(haystack, relation, bindings)
|
6
|
+
@haystack = haystack
|
7
|
+
@relation = relation
|
8
|
+
@bindings = bindings.reject { |binding| binding.key == Hayfork::SEARCH_VECTOR || binding.key == Hayfork::TEXT }
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_sql
|
12
|
+
select_statement = relation.select(bindings.map(&:to_s))
|
13
|
+
select_statement = select_statement.from("(SELECT OLD.*) \"#{relation.table_name}\"")
|
14
|
+
|
15
|
+
constraints = bindings.map { |binding| "#{haystack.table_name}.#{binding.key}=x.#{binding.key}" }.join(" AND ")
|
16
|
+
|
17
|
+
<<~SQL
|
18
|
+
DELETE FROM #{haystack.table_name} USING (#{select_statement.to_sql}) "x" WHERE #{constraints};
|
19
|
+
SQL
|
20
|
+
end
|
21
|
+
alias to_s to_sql
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class InsertSql
|
3
|
+
attr_reader :haystack, :relation, :bindings
|
4
|
+
|
5
|
+
def initialize(haystack, relation, bindings)
|
6
|
+
@haystack = haystack
|
7
|
+
@relation = relation
|
8
|
+
@bindings = bindings
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_sql(from: true)
|
12
|
+
select_statement = relation.select(bindings.map(&:to_s))
|
13
|
+
select_statement = select_statement.from("(SELECT NEW.*) \"#{relation.table_name}\"") if from
|
14
|
+
|
15
|
+
<<~SQL
|
16
|
+
INSERT INTO #{haystack.table_name} (#{bindings.map(&:key).join(", ")}) SELECT * FROM (#{select_statement.to_sql}) "x" WHERE "x"."#{Hayfork::TEXT}" != '';
|
17
|
+
SQL
|
18
|
+
end
|
19
|
+
alias to_s to_sql
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/hayfork/join.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Hayfork
|
2
|
+
module Join
|
3
|
+
|
4
|
+
def join(relation, join_value)
|
5
|
+
case join_value
|
6
|
+
when String
|
7
|
+
relation.joins(join_value)
|
8
|
+
|
9
|
+
when Symbol
|
10
|
+
reflection = reflection_for(relation.model, join_value)
|
11
|
+
case reflection.macro
|
12
|
+
when :has_many, :has_and_belongs_to_many
|
13
|
+
relation.left_outer_joins(join_value).where(reflection.klass.arel_table[:id].not_eq(nil))
|
14
|
+
when :belongs_to, :has_one
|
15
|
+
relation.joins(join_value)
|
16
|
+
else
|
17
|
+
fail NotImplementedError, "Joins haven't been implemented for #{reflection.macro.inspect} associations"
|
18
|
+
end
|
19
|
+
|
20
|
+
else
|
21
|
+
fail NotImplementedError, "Statement#joins does not yet accept #{join_value.class} params like #{join_value.inspect}. Will this scenario work with `has_many through:` or `has_one through:`?"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def reflection_for(model, association)
|
26
|
+
reflection = model.reflect_on_association(association)
|
27
|
+
fail AssociationNotFoundError, "Association ':#{association}' not found on '#{model}'" unless reflection
|
28
|
+
reflection
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class QueryObject
|
3
|
+
attr_reader :phrases
|
4
|
+
|
5
|
+
def initialize(phrases)
|
6
|
+
@phrases = phrases
|
7
|
+
end
|
8
|
+
|
9
|
+
def against(set)
|
10
|
+
return set.none if phrases.none?
|
11
|
+
return phrases.first.apply(set) if phrases.one?
|
12
|
+
|
13
|
+
# The haystack may contain more than one hit per result.
|
14
|
+
#
|
15
|
+
# A composite query may match more than one hit for a result.
|
16
|
+
# For example, it may match both a hymn's author and its tune.
|
17
|
+
#
|
18
|
+
# If we search more than one phrase, we want to find all of
|
19
|
+
# the hits that match *_any_* of the search phrases but only
|
20
|
+
# return hits for results that match *_all_* of the phrases.
|
21
|
+
filter_hits_by_any_phrase(filter_results_that_match_all_phrases(set))
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def filter_hits_by_any_phrase(set)
|
27
|
+
phrases[1..-1].inject(phrases.first.apply(set)) { |memo, phrase| memo.or(phrase.apply(set)) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter_results_that_match_all_phrases(set)
|
31
|
+
scope = set.select(Hayfork::SEARCH_RESULT_ID)
|
32
|
+
set.where(
|
33
|
+
set.arel_table[Hayfork::SEARCH_RESULT_ID].in(
|
34
|
+
phrases[1..-1].inject(phrases.first.apply(scope).arel) { |memo, phrase| Arel::Nodes::Intersect.new(memo, phrase.apply(scope).arel) }))
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class QueryParser
|
3
|
+
attr_reader :klass, :querystring
|
4
|
+
|
5
|
+
def initialize(klass, querystring)
|
6
|
+
@klass = klass
|
7
|
+
@querystring = querystring
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse!
|
11
|
+
phrases = []
|
12
|
+
scanner = StringScanner.new(Hayfork.unaccent(querystring))
|
13
|
+
|
14
|
+
until scanner.eos?
|
15
|
+
parse_phrase(scanner.scan(/[^"]+/), phrases)
|
16
|
+
break if scanner.eos?
|
17
|
+
|
18
|
+
scanner.getch # "
|
19
|
+
parse_exact_phrase(scanner.scan(/[^"]+/), phrases)
|
20
|
+
scanner.getch # "
|
21
|
+
end
|
22
|
+
|
23
|
+
klass.new(phrases)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_phrase(querystring, phrases)
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_exact_phrase(querystring, phrases)
|
31
|
+
raise NotImplementedError
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|