hayfork 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ class Create<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension :unaccent
4
+
5
+ # Full-Text Search in Postgres can be configured extensively.
6
+ #
7
+ # Custom Parsers are able to match entities in documents and handle them
8
+ # differently. Custom Dictionaries can be used to tune search for different
9
+ # languages and to ignore certain words.
10
+ #
11
+ # Learn more at https://www.postgresql.org/docs/10/textsearch.html
12
+ #
13
+ # This block creates a TEXT SEARCH CONFIGURATION specifically for use
14
+ # by Hayfork that uses an English dictionary and normalizes characters
15
+ # by stripping accent marks and converting special characters (like smart
16
+ # quotes) to their easily-typed ASCII counterparts (like straight quotes).
17
+ #
18
+ execute <<~SQL
19
+ CREATE TEXT SEARCH CONFIGURATION public.hayfork ( COPY = pg_catalog.english );
20
+ ALTER TEXT SEARCH CONFIGURATION public.hayfork ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, hword, hword_part, word WITH unaccent, english_stem;
21
+ SQL
22
+
23
+ create_table :<%= table_name %>, id: false do |t|
24
+ t.string :<%= Hayfork::SEARCH_RESULT_TYPE %>, null: false
25
+ t.integer :<%= Hayfork::SEARCH_RESULT_ID %>, null: false
26
+
27
+ t.string :<%= Hayfork::SOURCE_TYPE %>, null: false
28
+ t.integer :<%= Hayfork::SOURCE_ID %>, null: false
29
+ t.string :<%= Hayfork::FIELD %>, null: false
30
+
31
+ t.tsvector :<%= Hayfork::SEARCH_VECTOR %>
32
+ t.text :<%= Hayfork::TEXT %>, null: false
33
+
34
+ # Add additional columns to <%= table_name %> here.
35
+ #
36
+ # For example, to allow users to search only documents of their
37
+ # own creation:
38
+ #
39
+ # t.belongs_to :user, null: false
40
+ #
41
+
42
+ # If you add columns that will always be used in searches (like `user_id`),
43
+ # consider including them in this index. For example:
44
+ #
45
+ # enable_extension :btree_gist
46
+ # t.index [:tenant_id, :<%= Hayfork::SEARCH_VECTOR %>], using: "gist"
47
+ #
48
+ t.index :<%= Hayfork::SEARCH_VECTOR %>, using: "gist"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,8 @@
1
+ require "triggers"
2
+
3
+ class Rebuild<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
4
+ def up
5
+ execute <%= class_name %>.triggers.replace
6
+ execute <%= class_name %>.triggers.rebuild.lines
7
+ end
8
+ end
@@ -0,0 +1,14 @@
1
+ class <%= class_name %> < ActiveRecord::Base
2
+ <% unless table_name == class_name.tableize -%>
3
+ self.table_name = "<%= table_name %>"
4
+ <% end -%>
5
+
6
+ belongs_to :search_result, polymorphic: true
7
+
8
+ def self.search(querystring)
9
+ ::Query.parse(querystring).against(all)
10
+ .select(:search_result_type, :search_result_id).distinct
11
+ .preload(:search_result).map(&:search_result)
12
+ end
13
+
14
+ end
@@ -0,0 +1,10 @@
1
+ require_relative "query/parser"
2
+ require_relative "query/object"
3
+
4
+ module Query
5
+
6
+ def self.parse(querystring)
7
+ Query::Parser.new(Query::Object, querystring).parse!
8
+ end
9
+
10
+ end
@@ -0,0 +1,19 @@
1
+ module Query
2
+ class ExactPhrase < Struct.new(:words)
3
+
4
+ def apply(set)
5
+ set.where(Arel::Nodes::InfixOperation.new("@@",
6
+ set.arel_table[Hayfork::SEARCH_VECTOR],
7
+ to_tsquery(Hayfork.default_dictionary, words.join(" <-> "))))
8
+ end
9
+
10
+ private
11
+
12
+ def to_tsquery(dictionary, querystring)
13
+ Arel::Nodes::NamedFunction.new("to_tsquery", [
14
+ Arel::Nodes.build_quoted(dictionary),
15
+ Arel::Nodes.build_quoted(querystring) ])
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ module Query
2
+ class Object < Hayfork::QueryObject
3
+ end
4
+ end
@@ -0,0 +1,33 @@
1
+ require_relative "exact_phrase"
2
+
3
+ module Query
4
+ class Parser < Hayfork::QueryParser
5
+
6
+ def parse_phrase(querystring, phrases)
7
+ tokenize_words(querystring).each do |word|
8
+ phrases << Query::ExactPhrase.new([ word ])
9
+ end
10
+ end
11
+
12
+ def parse_exact_phrase(querystring, phrases)
13
+ phrases << Query::ExactPhrase.new(tokenize_words(querystring))
14
+ end
15
+
16
+ def tokenize_words(querystring)
17
+ # Postgres does not handle hyphens well.
18
+ #
19
+ # Notice how, in the following example, the way it breaks up
20
+ # the hyphenated word throws off the index (Jesus is the fifth word
21
+ # not the third or fourth). This prevents you from constructing
22
+ # an exact-phrase query for a hyphenated word:
23
+ #
24
+ # > select to_tsvector('hayfork', 'thou long-expected jesus');
25
+ # { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
26
+ #
27
+ #
28
+ # We'll coerce Postgres into treating hyphenated words as two words.
29
+ querystring.to_s.scan(/\w+/)
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,21 @@
1
+ Hayfork.maintain(<%= class_name %>) do
2
+
3
+ # Define triggers here.
4
+ # Learn more at https://github.com/boblail/hayfork#libhaystack_triggersrb
5
+ #
6
+ # Example:
7
+ #
8
+ # Hayfork.maintain(Haystack) do
9
+ #
10
+ # foreach(Employee) do
11
+ # insert(:full_name)
12
+ # end
13
+ #
14
+ # foreach(Project) do
15
+ # insert(:title)
16
+ # end
17
+ #
18
+ # end
19
+ #
20
+
21
+ end
data/lib/hayfork.rb ADDED
@@ -0,0 +1,35 @@
1
+ require "hayfork/errors"
2
+ require "hayfork/join"
3
+ require "hayfork/query_object"
4
+ require "hayfork/query_parser"
5
+ require "hayfork/trigger_builder"
6
+ require "hayfork/triggers"
7
+ require "hayfork/unaccent"
8
+ require "hayfork/version"
9
+
10
+ module Hayfork
11
+ extend Hayfork::Unaccent, Hayfork::Join
12
+
13
+ TEXT = "text".freeze
14
+ SEARCH_VECTOR = "search_vector".freeze
15
+ SEARCH_RESULT_TYPE = "search_result_type".freeze
16
+ SEARCH_RESULT_ID = "search_result_id".freeze
17
+ SOURCE_TYPE = "source_type".freeze
18
+ SOURCE_ID = "source_id".freeze
19
+ FIELD = "field".freeze
20
+
21
+ @default_weight = "C".freeze
22
+ @default_dictionary = "hayfork".freeze
23
+
24
+ class << self
25
+ attr_accessor :default_weight, :default_dictionary
26
+
27
+ def maintain(haystack, &block)
28
+ triggers = Triggers.new(haystack)
29
+ TriggerBuilder.new(triggers).instance_eval(&block)
30
+ haystack.singleton_class.send(:attr_reader, :triggers)
31
+ haystack.instance_variable_set :@triggers, triggers
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,67 @@
1
+ module Hayfork
2
+ class Binding < Struct.new(:statement, :column, :raw_value)
3
+
4
+ def to_sql
5
+ "#{quoted_value} \"#{column.name}\""
6
+ end
7
+ alias to_s to_sql
8
+
9
+ def column_name
10
+ column.name
11
+ end
12
+ alias key column_name
13
+
14
+ def quoted_value
15
+ case raw_value
16
+ when Arel::Attributes::Attribute
17
+ value_column = raw_value.relation.send(:type_caster).send(:types).columns_hash[raw_value.name.to_s]
18
+ fail Hayfork::ColumnNotFoundError, "'#{raw_value.name}' is not a column on '#{raw_value.relation.name}'" unless value_column
19
+
20
+ value = "#{raw_value.relation.name}.#{raw_value.name}"
21
+
22
+ unless column.sql_type == value_column.sql_type
23
+ type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
24
+ value = "#{value}::#{type}"
25
+ end
26
+
27
+ if statement.unnest? && [Hayfork::SEARCH_VECTOR, Hayfork::TEXT].member?(column.name)
28
+ value = "unnest(string_to_array(#{value}, E'\\n'))"
29
+ end
30
+
31
+ if column.type == :tsvector
32
+
33
+ # Postgres does not handle hyphens well.
34
+ #
35
+ # Notice how, in the following example, the way it breaks up
36
+ # those words throws off the index (Jesus is the fifth word
37
+ # not the third or fourth). This prevents you from constructing
38
+ # an exact-phrase query for a hyphenated word:
39
+ #
40
+ # > select to_tsvector('hayfork', 'thou long-expected jesus');
41
+ # { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
42
+ #
43
+ #
44
+ # We'll coerce Postgres into treating hyphenated words as two words.
45
+
46
+ value = "setweight(to_tsvector('#{statement.dictionary}', replace(#{value}, '-', ' ')), '#{statement.weight}')"
47
+ end
48
+
49
+ value
50
+
51
+ when Arel::Nodes::Node
52
+ raw_value.to_sql
53
+
54
+ else
55
+ type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
56
+ "#{statement.haystack.connection.quote(raw_value)}::#{type}"
57
+
58
+ end
59
+ end
60
+
61
+ SPECIAL_CASTS = {
62
+ "character varying" => "varchar",
63
+ "tsvector" => "varchar"
64
+ }.freeze
65
+
66
+ end
67
+ end
@@ -0,0 +1,24 @@
1
+ module Hayfork
2
+ class DeleteSql
3
+ attr_reader :haystack, :relation, :bindings
4
+
5
+ def initialize(haystack, relation, bindings)
6
+ @haystack = haystack
7
+ @relation = relation
8
+ @bindings = bindings.reject { |binding| binding.key == Hayfork::SEARCH_VECTOR || binding.key == Hayfork::TEXT }
9
+ end
10
+
11
+ def to_sql
12
+ select_statement = relation.select(bindings.map(&:to_s))
13
+ select_statement = select_statement.from("(SELECT OLD.*) \"#{relation.table_name}\"")
14
+
15
+ constraints = bindings.map { |binding| "#{haystack.table_name}.#{binding.key}=x.#{binding.key}" }.join(" AND ")
16
+
17
+ <<~SQL
18
+ DELETE FROM #{haystack.table_name} USING (#{select_statement.to_sql}) "x" WHERE #{constraints};
19
+ SQL
20
+ end
21
+ alias to_s to_sql
22
+
23
+ end
24
+ end
@@ -0,0 +1,4 @@
1
+ module Hayfork
2
+ class AssociationNotFoundError < ArgumentError; end
3
+ class ColumnNotFoundError < ArgumentError; end
4
+ end
@@ -0,0 +1,22 @@
1
+ module Hayfork
2
+ class InsertSql
3
+ attr_reader :haystack, :relation, :bindings
4
+
5
+ def initialize(haystack, relation, bindings)
6
+ @haystack = haystack
7
+ @relation = relation
8
+ @bindings = bindings
9
+ end
10
+
11
+ def to_sql(from: true)
12
+ select_statement = relation.select(bindings.map(&:to_s))
13
+ select_statement = select_statement.from("(SELECT NEW.*) \"#{relation.table_name}\"") if from
14
+
15
+ <<~SQL
16
+ INSERT INTO #{haystack.table_name} (#{bindings.map(&:key).join(", ")}) SELECT * FROM (#{select_statement.to_sql}) "x" WHERE "x"."#{Hayfork::TEXT}" != '';
17
+ SQL
18
+ end
19
+ alias to_s to_sql
20
+
21
+ end
22
+ end
@@ -0,0 +1,32 @@
1
+ module Hayfork
2
+ module Join
3
+
4
+ def join(relation, join_value)
5
+ case join_value
6
+ when String
7
+ relation.joins(join_value)
8
+
9
+ when Symbol
10
+ reflection = reflection_for(relation.model, join_value)
11
+ case reflection.macro
12
+ when :has_many, :has_and_belongs_to_many
13
+ relation.left_outer_joins(join_value).where(reflection.klass.arel_table[:id].not_eq(nil))
14
+ when :belongs_to, :has_one
15
+ relation.joins(join_value)
16
+ else
17
+ fail NotImplementedError, "Joins haven't been implemented for #{reflection.macro.inspect} associations"
18
+ end
19
+
20
+ else
21
+ fail NotImplementedError, "Statement#joins does not yet accept #{join_value.class} params like #{join_value.inspect}. Will this scenario work with `has_many through:` or `has_one through:`?"
22
+ end
23
+ end
24
+
25
+ def reflection_for(model, association)
26
+ reflection = model.reflect_on_association(association)
27
+ fail AssociationNotFoundError, "Association ':#{association}' not found on '#{model}'" unless reflection
28
+ reflection
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,38 @@
1
+ module Hayfork
2
+ class QueryObject
3
+ attr_reader :phrases
4
+
5
+ def initialize(phrases)
6
+ @phrases = phrases
7
+ end
8
+
9
+ def against(set)
10
+ return set.none if phrases.none?
11
+ return phrases.first.apply(set) if phrases.one?
12
+
13
+ # The haystack may contain more than one hit per result.
14
+ #
15
+ # A composite query may match more than one hit for a result.
16
+ # For example, it may match both a hymn's author and its tune.
17
+ #
18
+ # If we search more than one phrase, we want to find all of
19
+ # the hits that match *_any_* of the search phrases but only
20
+ # return hits for results that match *_all_* of the phrases.
21
+ filter_hits_by_any_phrase(filter_results_that_match_all_phrases(set))
22
+ end
23
+
24
+ private
25
+
26
+ def filter_hits_by_any_phrase(set)
27
+ phrases[1..-1].inject(phrases.first.apply(set)) { |memo, phrase| memo.or(phrase.apply(set)) }
28
+ end
29
+
30
+ def filter_results_that_match_all_phrases(set)
31
+ scope = set.select(Hayfork::SEARCH_RESULT_ID)
32
+ set.where(
33
+ set.arel_table[Hayfork::SEARCH_RESULT_ID].in(
34
+ phrases[1..-1].inject(phrases.first.apply(scope).arel) { |memo, phrase| Arel::Nodes::Intersect.new(memo, phrase.apply(scope).arel) }))
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,35 @@
1
+ module Hayfork
2
+ class QueryParser
3
+ attr_reader :klass, :querystring
4
+
5
+ def initialize(klass, querystring)
6
+ @klass = klass
7
+ @querystring = querystring
8
+ end
9
+
10
+ def parse!
11
+ phrases = []
12
+ scanner = StringScanner.new(Hayfork.unaccent(querystring))
13
+
14
+ until scanner.eos?
15
+ parse_phrase(scanner.scan(/[^"]+/), phrases)
16
+ break if scanner.eos?
17
+
18
+ scanner.getch # "
19
+ parse_exact_phrase(scanner.scan(/[^"]+/), phrases)
20
+ scanner.getch # "
21
+ end
22
+
23
+ klass.new(phrases)
24
+ end
25
+
26
+ def parse_phrase(querystring, phrases)
27
+ raise NotImplementedError
28
+ end
29
+
30
+ def parse_exact_phrase(querystring, phrases)
31
+ raise NotImplementedError
32
+ end
33
+
34
+ end
35
+ end