hayfork 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/.travis.yml +16 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +200 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/hayfork.gemspec +33 -0
- data/lib/generators/hayfork/haystack_generator.rb +45 -0
- data/lib/generators/hayfork/rebuild_generator.rb +49 -0
- data/lib/generators/hayfork/templates/migrations/create.rb +51 -0
- data/lib/generators/hayfork/templates/migrations/rebuild.rb +8 -0
- data/lib/generators/hayfork/templates/model.rb +14 -0
- data/lib/generators/hayfork/templates/query.rb +10 -0
- data/lib/generators/hayfork/templates/query/exact_phrase.rb +19 -0
- data/lib/generators/hayfork/templates/query/object.rb +4 -0
- data/lib/generators/hayfork/templates/query/parser.rb +33 -0
- data/lib/generators/hayfork/templates/triggers.rb +21 -0
- data/lib/hayfork.rb +35 -0
- data/lib/hayfork/binding.rb +67 -0
- data/lib/hayfork/delete_sql.rb +24 -0
- data/lib/hayfork/errors.rb +4 -0
- data/lib/hayfork/insert_sql.rb +22 -0
- data/lib/hayfork/join.rb +32 -0
- data/lib/hayfork/query_object.rb +38 -0
- data/lib/hayfork/query_parser.rb +35 -0
- data/lib/hayfork/statement.rb +132 -0
- data/lib/hayfork/statement_builder.rb +78 -0
- data/lib/hayfork/trigger_builder.rb +17 -0
- data/lib/hayfork/triggers.rb +76 -0
- data/lib/hayfork/unaccent.rb +12 -0
- data/lib/hayfork/update_sql.rb +112 -0
- data/lib/hayfork/version.rb +3 -0
- data/postgres/unaccent.rules +1049 -0
- metadata +227 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
class Create<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
|
2
|
+
def change
|
3
|
+
enable_extension :unaccent
|
4
|
+
|
5
|
+
# Full-Text Search in Postgres can be configured extensively.
|
6
|
+
#
|
7
|
+
# Custom Parsers are able to match entities in documents and handle them
|
8
|
+
# differently. Custom Dictionaries can be used to tune search for different
|
9
|
+
# languages and to ignore certain words.
|
10
|
+
#
|
11
|
+
# Learn more at https://www.postgresql.org/docs/10/textsearch.html
|
12
|
+
#
|
13
|
+
# This block creates a TEXT SEARCH CONFIGURATION specifically for use
|
14
|
+
# by Hayfork that uses an English dictionary and normalizes characters
|
15
|
+
# by stripping accent marks and converting special characters (like smart
|
16
|
+
# quotes) to their easily-typed ASCII counterparts (like straight quotes).
|
17
|
+
#
|
18
|
+
execute <<~SQL
|
19
|
+
CREATE TEXT SEARCH CONFIGURATION public.hayfork ( COPY = pg_catalog.english );
|
20
|
+
ALTER TEXT SEARCH CONFIGURATION public.hayfork ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, hword, hword_part, word WITH unaccent, english_stem;
|
21
|
+
SQL
|
22
|
+
|
23
|
+
create_table :<%= table_name %>, id: false do |t|
|
24
|
+
t.string :<%= Hayfork::SEARCH_RESULT_TYPE %>, null: false
|
25
|
+
t.integer :<%= Hayfork::SEARCH_RESULT_ID %>, null: false
|
26
|
+
|
27
|
+
t.string :<%= Hayfork::SOURCE_TYPE %>, null: false
|
28
|
+
t.integer :<%= Hayfork::SOURCE_ID %>, null: false
|
29
|
+
t.string :<%= Hayfork::FIELD %>, null: false
|
30
|
+
|
31
|
+
t.tsvector :<%= Hayfork::SEARCH_VECTOR %>
|
32
|
+
t.text :<%= Hayfork::TEXT %>, null: false
|
33
|
+
|
34
|
+
# Add additional columns to <%= table_name %> here.
|
35
|
+
#
|
36
|
+
# For example, to allow users to search only documents of their
|
37
|
+
# own creation:
|
38
|
+
#
|
39
|
+
# t.belongs_to :user, null: false
|
40
|
+
#
|
41
|
+
|
42
|
+
# If you add columns that will always be used in searches (like `user_id`),
|
43
|
+
# consider including them in this index. For example:
|
44
|
+
#
|
45
|
+
# enable_extension :btree_gist
|
46
|
+
# t.index [:tenant_id, :<%= Hayfork::SEARCH_VECTOR %>], using: "gist"
|
47
|
+
#
|
48
|
+
t.index :<%= Hayfork::SEARCH_VECTOR %>, using: "gist"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class <%= class_name %> < ActiveRecord::Base
|
2
|
+
<% unless table_name == class_name.tableize -%>
|
3
|
+
self.table_name = "<%= table_name %>"
|
4
|
+
<% end -%>
|
5
|
+
|
6
|
+
belongs_to :search_result, polymorphic: true
|
7
|
+
|
8
|
+
def self.search(querystring)
|
9
|
+
::Query.parse(querystring).against(all)
|
10
|
+
.select(:search_result_type, :search_result_id).distinct
|
11
|
+
.preload(:search_result).map(&:search_result)
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Query
|
2
|
+
class ExactPhrase < Struct.new(:words)
|
3
|
+
|
4
|
+
def apply(set)
|
5
|
+
set.where(Arel::Nodes::InfixOperation.new("@@",
|
6
|
+
set.arel_table[Hayfork::SEARCH_VECTOR],
|
7
|
+
to_tsquery(Hayfork.default_dictionary, words.join(" <-> "))))
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def to_tsquery(dictionary, querystring)
|
13
|
+
Arel::Nodes::NamedFunction.new("to_tsquery", [
|
14
|
+
Arel::Nodes.build_quoted(dictionary),
|
15
|
+
Arel::Nodes.build_quoted(querystring) ])
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative "exact_phrase"
|
2
|
+
|
3
|
+
module Query
|
4
|
+
class Parser < Hayfork::QueryParser
|
5
|
+
|
6
|
+
def parse_phrase(querystring, phrases)
|
7
|
+
tokenize_words(querystring).each do |word|
|
8
|
+
phrases << Query::ExactPhrase.new([ word ])
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_exact_phrase(querystring, phrases)
|
13
|
+
phrases << Query::ExactPhrase.new(tokenize_words(querystring))
|
14
|
+
end
|
15
|
+
|
16
|
+
def tokenize_words(querystring)
|
17
|
+
# Postgres does not handle hyphens well.
|
18
|
+
#
|
19
|
+
# Notice how, in the following example, the way it breaks up
|
20
|
+
# the hyphenated word throws off the index (Jesus is the fifth word
|
21
|
+
# not the third or fourth). This prevents you from constructing
|
22
|
+
# an exact-phrase query for a hyphenated word:
|
23
|
+
#
|
24
|
+
# > select to_tsvector('hayfork', 'thou long-expected jesus');
|
25
|
+
# { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# We'll coerce Postgres into treating hyphenated words as two words.
|
29
|
+
querystring.to_s.scan(/\w+/)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Hayfork.maintain(<%= class_name %>) do
|
2
|
+
|
3
|
+
# Define triggers here.
|
4
|
+
# Learn more at https://github.com/boblail/hayfork#libhaystack_triggersrb
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# Hayfork.maintain(Haystack) do
|
9
|
+
#
|
10
|
+
# foreach(Employee) do
|
11
|
+
# insert(:full_name)
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# foreach(Project) do
|
15
|
+
# insert(:title)
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
|
21
|
+
end
|
data/lib/hayfork.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "hayfork/errors"
|
2
|
+
require "hayfork/join"
|
3
|
+
require "hayfork/query_object"
|
4
|
+
require "hayfork/query_parser"
|
5
|
+
require "hayfork/trigger_builder"
|
6
|
+
require "hayfork/triggers"
|
7
|
+
require "hayfork/unaccent"
|
8
|
+
require "hayfork/version"
|
9
|
+
|
10
|
+
module Hayfork
|
11
|
+
extend Hayfork::Unaccent, Hayfork::Join
|
12
|
+
|
13
|
+
TEXT = "text".freeze
|
14
|
+
SEARCH_VECTOR = "search_vector".freeze
|
15
|
+
SEARCH_RESULT_TYPE = "search_result_type".freeze
|
16
|
+
SEARCH_RESULT_ID = "search_result_id".freeze
|
17
|
+
SOURCE_TYPE = "source_type".freeze
|
18
|
+
SOURCE_ID = "source_id".freeze
|
19
|
+
FIELD = "field".freeze
|
20
|
+
|
21
|
+
@default_weight = "C".freeze
|
22
|
+
@default_dictionary = "hayfork".freeze
|
23
|
+
|
24
|
+
class << self
|
25
|
+
attr_accessor :default_weight, :default_dictionary
|
26
|
+
|
27
|
+
def maintain(haystack, &block)
|
28
|
+
triggers = Triggers.new(haystack)
|
29
|
+
TriggerBuilder.new(triggers).instance_eval(&block)
|
30
|
+
haystack.singleton_class.send(:attr_reader, :triggers)
|
31
|
+
haystack.instance_variable_set :@triggers, triggers
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class Binding < Struct.new(:statement, :column, :raw_value)
|
3
|
+
|
4
|
+
def to_sql
|
5
|
+
"#{quoted_value} \"#{column.name}\""
|
6
|
+
end
|
7
|
+
alias to_s to_sql
|
8
|
+
|
9
|
+
def column_name
|
10
|
+
column.name
|
11
|
+
end
|
12
|
+
alias key column_name
|
13
|
+
|
14
|
+
def quoted_value
|
15
|
+
case raw_value
|
16
|
+
when Arel::Attributes::Attribute
|
17
|
+
value_column = raw_value.relation.send(:type_caster).send(:types).columns_hash[raw_value.name.to_s]
|
18
|
+
fail Hayfork::ColumnNotFoundError, "'#{raw_value.name}' is not a column on '#{raw_value.relation.name}'" unless value_column
|
19
|
+
|
20
|
+
value = "#{raw_value.relation.name}.#{raw_value.name}"
|
21
|
+
|
22
|
+
unless column.sql_type == value_column.sql_type
|
23
|
+
type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
|
24
|
+
value = "#{value}::#{type}"
|
25
|
+
end
|
26
|
+
|
27
|
+
if statement.unnest? && [Hayfork::SEARCH_VECTOR, Hayfork::TEXT].member?(column.name)
|
28
|
+
value = "unnest(string_to_array(#{value}, E'\\n'))"
|
29
|
+
end
|
30
|
+
|
31
|
+
if column.type == :tsvector
|
32
|
+
|
33
|
+
# Postgres does not handle hyphens well.
|
34
|
+
#
|
35
|
+
# Notice how, in the following example, the way it breaks up
|
36
|
+
# those words throws off the index (Jesus is the fifth word
|
37
|
+
# not the third or fourth). This prevents you from constructing
|
38
|
+
# an exact-phrase query for a hyphenated word:
|
39
|
+
#
|
40
|
+
# > select to_tsvector('hayfork', 'thou long-expected jesus');
|
41
|
+
# { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# We'll coerce Postgres into treating hyphenated words as two words.
|
45
|
+
|
46
|
+
value = "setweight(to_tsvector('#{statement.dictionary}', replace(#{value}, '-', ' ')), '#{statement.weight}')"
|
47
|
+
end
|
48
|
+
|
49
|
+
value
|
50
|
+
|
51
|
+
when Arel::Nodes::Node
|
52
|
+
raw_value.to_sql
|
53
|
+
|
54
|
+
else
|
55
|
+
type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
|
56
|
+
"#{statement.haystack.connection.quote(raw_value)}::#{type}"
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
SPECIAL_CASTS = {
|
62
|
+
"character varying" => "varchar",
|
63
|
+
"tsvector" => "varchar"
|
64
|
+
}.freeze
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class DeleteSql
|
3
|
+
attr_reader :haystack, :relation, :bindings
|
4
|
+
|
5
|
+
def initialize(haystack, relation, bindings)
|
6
|
+
@haystack = haystack
|
7
|
+
@relation = relation
|
8
|
+
@bindings = bindings.reject { |binding| binding.key == Hayfork::SEARCH_VECTOR || binding.key == Hayfork::TEXT }
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_sql
|
12
|
+
select_statement = relation.select(bindings.map(&:to_s))
|
13
|
+
select_statement = select_statement.from("(SELECT OLD.*) \"#{relation.table_name}\"")
|
14
|
+
|
15
|
+
constraints = bindings.map { |binding| "#{haystack.table_name}.#{binding.key}=x.#{binding.key}" }.join(" AND ")
|
16
|
+
|
17
|
+
<<~SQL
|
18
|
+
DELETE FROM #{haystack.table_name} USING (#{select_statement.to_sql}) "x" WHERE #{constraints};
|
19
|
+
SQL
|
20
|
+
end
|
21
|
+
alias to_s to_sql
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class InsertSql
|
3
|
+
attr_reader :haystack, :relation, :bindings
|
4
|
+
|
5
|
+
def initialize(haystack, relation, bindings)
|
6
|
+
@haystack = haystack
|
7
|
+
@relation = relation
|
8
|
+
@bindings = bindings
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_sql(from: true)
|
12
|
+
select_statement = relation.select(bindings.map(&:to_s))
|
13
|
+
select_statement = select_statement.from("(SELECT NEW.*) \"#{relation.table_name}\"") if from
|
14
|
+
|
15
|
+
<<~SQL
|
16
|
+
INSERT INTO #{haystack.table_name} (#{bindings.map(&:key).join(", ")}) SELECT * FROM (#{select_statement.to_sql}) "x" WHERE "x"."#{Hayfork::TEXT}" != '';
|
17
|
+
SQL
|
18
|
+
end
|
19
|
+
alias to_s to_sql
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/hayfork/join.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Hayfork
|
2
|
+
module Join
|
3
|
+
|
4
|
+
def join(relation, join_value)
|
5
|
+
case join_value
|
6
|
+
when String
|
7
|
+
relation.joins(join_value)
|
8
|
+
|
9
|
+
when Symbol
|
10
|
+
reflection = reflection_for(relation.model, join_value)
|
11
|
+
case reflection.macro
|
12
|
+
when :has_many, :has_and_belongs_to_many
|
13
|
+
relation.left_outer_joins(join_value).where(reflection.klass.arel_table[:id].not_eq(nil))
|
14
|
+
when :belongs_to, :has_one
|
15
|
+
relation.joins(join_value)
|
16
|
+
else
|
17
|
+
fail NotImplementedError, "Joins haven't been implemented for #{reflection.macro.inspect} associations"
|
18
|
+
end
|
19
|
+
|
20
|
+
else
|
21
|
+
fail NotImplementedError, "Statement#joins does not yet accept #{join_value.class} params like #{join_value.inspect}. Will this scenario work with `has_many through:` or `has_one through:`?"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def reflection_for(model, association)
|
26
|
+
reflection = model.reflect_on_association(association)
|
27
|
+
fail AssociationNotFoundError, "Association ':#{association}' not found on '#{model}'" unless reflection
|
28
|
+
reflection
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class QueryObject
|
3
|
+
attr_reader :phrases
|
4
|
+
|
5
|
+
def initialize(phrases)
|
6
|
+
@phrases = phrases
|
7
|
+
end
|
8
|
+
|
9
|
+
def against(set)
|
10
|
+
return set.none if phrases.none?
|
11
|
+
return phrases.first.apply(set) if phrases.one?
|
12
|
+
|
13
|
+
# The haystack may contain more than one hit per result.
|
14
|
+
#
|
15
|
+
# A composite query may match more than one hit for a result.
|
16
|
+
# For example, it may match both a hymn's author and its tune.
|
17
|
+
#
|
18
|
+
# If we search more than one phrase, we want to find all of
|
19
|
+
# the hits that match *_any_* of the search phrases but only
|
20
|
+
# return hits for results that match *_all_* of the phrases.
|
21
|
+
filter_hits_by_any_phrase(filter_results_that_match_all_phrases(set))
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def filter_hits_by_any_phrase(set)
|
27
|
+
phrases[1..-1].inject(phrases.first.apply(set)) { |memo, phrase| memo.or(phrase.apply(set)) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter_results_that_match_all_phrases(set)
|
31
|
+
scope = set.select(Hayfork::SEARCH_RESULT_ID)
|
32
|
+
set.where(
|
33
|
+
set.arel_table[Hayfork::SEARCH_RESULT_ID].in(
|
34
|
+
phrases[1..-1].inject(phrases.first.apply(scope).arel) { |memo, phrase| Arel::Nodes::Intersect.new(memo, phrase.apply(scope).arel) }))
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Hayfork
|
2
|
+
class QueryParser
|
3
|
+
attr_reader :klass, :querystring
|
4
|
+
|
5
|
+
def initialize(klass, querystring)
|
6
|
+
@klass = klass
|
7
|
+
@querystring = querystring
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse!
|
11
|
+
phrases = []
|
12
|
+
scanner = StringScanner.new(Hayfork.unaccent(querystring))
|
13
|
+
|
14
|
+
until scanner.eos?
|
15
|
+
parse_phrase(scanner.scan(/[^"]+/), phrases)
|
16
|
+
break if scanner.eos?
|
17
|
+
|
18
|
+
scanner.getch # "
|
19
|
+
parse_exact_phrase(scanner.scan(/[^"]+/), phrases)
|
20
|
+
scanner.getch # "
|
21
|
+
end
|
22
|
+
|
23
|
+
klass.new(phrases)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_phrase(querystring, phrases)
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_exact_phrase(querystring, phrases)
|
31
|
+
raise NotImplementedError
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|