hayfork 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ class Create<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension :unaccent
4
+
5
+ # Full-Text Search in Postgres can be configured extensively.
6
+ #
7
+ # Custom Parsers are able to match entities in documents and handle them
8
+ # differently. Custom Dictionaries can be used to tune search for different
9
+ # languages and to ignore certain words.
10
+ #
11
+ # Learn more at https://www.postgresql.org/docs/10/textsearch.html
12
+ #
13
+ # This block creates a TEXT SEARCH CONFIGURATION specifically for use
14
+ # by Hayfork that uses an English dictionary and normalizes characters
15
+ # by stripping accent marks and converting special characters (like smart
16
+ # quotes) to their easily-typed ASCII counterparts (like straight quotes).
17
+ #
18
+ execute <<~SQL
19
+ CREATE TEXT SEARCH CONFIGURATION public.hayfork ( COPY = pg_catalog.english );
20
+ ALTER TEXT SEARCH CONFIGURATION public.hayfork ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, hword, hword_part, word WITH unaccent, english_stem;
21
+ SQL
22
+
23
+ create_table :<%= table_name %>, id: false do |t|
24
+ t.string :<%= Hayfork::SEARCH_RESULT_TYPE %>, null: false
25
+ t.integer :<%= Hayfork::SEARCH_RESULT_ID %>, null: false
26
+
27
+ t.string :<%= Hayfork::SOURCE_TYPE %>, null: false
28
+ t.integer :<%= Hayfork::SOURCE_ID %>, null: false
29
+ t.string :<%= Hayfork::FIELD %>, null: false
30
+
31
+ t.tsvector :<%= Hayfork::SEARCH_VECTOR %>
32
+ t.text :<%= Hayfork::TEXT %>, null: false
33
+
34
+ # Add additional columns to <%= table_name %> here.
35
+ #
36
+ # For example, to allow users to search only documents of their
37
+ # own creation:
38
+ #
39
+ # t.belongs_to :user, null: false
40
+ #
41
+
42
+ # If you add columns that will always be used in searches (like `user_id`),
43
+ # consider including them in this index. For example:
44
+ #
45
+ # enable_extension :btree_gist
46
+ # t.index [:tenant_id, :<%= Hayfork::SEARCH_VECTOR %>], using: "gist"
47
+ #
48
+ t.index :<%= Hayfork::SEARCH_VECTOR %>, using: "gist"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,8 @@
1
+ require "triggers"
2
+
3
+ class Rebuild<%= table_name.camelize %> < ActiveRecord::Migration<%= migration_version %>
4
+ def up
5
+ execute <%= class_name %>.triggers.replace
6
+ execute <%= class_name %>.triggers.rebuild.lines
7
+ end
8
+ end
@@ -0,0 +1,14 @@
1
+ class <%= class_name %> < ActiveRecord::Base
2
+ <% unless table_name == class_name.tableize -%>
3
+ self.table_name = "<%= table_name %>"
4
+ <% end -%>
5
+
6
+ belongs_to :search_result, polymorphic: true
7
+
8
+ def self.search(querystring)
9
+ ::Query.parse(querystring).against(all)
10
+ .select(:search_result_type, :search_result_id).distinct
11
+ .preload(:search_result).map(&:search_result)
12
+ end
13
+
14
+ end
@@ -0,0 +1,10 @@
1
+ require_relative "query/parser"
2
+ require_relative "query/object"
3
+
4
+ module Query
5
+
6
+ def self.parse(querystring)
7
+ Query::Parser.new(Query::Object, querystring).parse!
8
+ end
9
+
10
+ end
@@ -0,0 +1,19 @@
1
+ module Query
2
+ class ExactPhrase < Struct.new(:words)
3
+
4
+ def apply(set)
5
+ set.where(Arel::Nodes::InfixOperation.new("@@",
6
+ set.arel_table[Hayfork::SEARCH_VECTOR],
7
+ to_tsquery(Hayfork.default_dictionary, words.join(" <-> "))))
8
+ end
9
+
10
+ private
11
+
12
+ def to_tsquery(dictionary, querystring)
13
+ Arel::Nodes::NamedFunction.new("to_tsquery", [
14
+ Arel::Nodes.build_quoted(dictionary),
15
+ Arel::Nodes.build_quoted(querystring) ])
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ module Query
2
+ class Object < Hayfork::QueryObject
3
+ end
4
+ end
@@ -0,0 +1,33 @@
1
+ require_relative "exact_phrase"
2
+
3
+ module Query
4
+ class Parser < Hayfork::QueryParser
5
+
6
+ def parse_phrase(querystring, phrases)
7
+ tokenize_words(querystring).each do |word|
8
+ phrases << Query::ExactPhrase.new([ word ])
9
+ end
10
+ end
11
+
12
+ def parse_exact_phrase(querystring, phrases)
13
+ phrases << Query::ExactPhrase.new(tokenize_words(querystring))
14
+ end
15
+
16
+ def tokenize_words(querystring)
17
+ # Postgres does not handle hyphens well.
18
+ #
19
+ # Notice how, in the following example, the way it breaks up
20
+ # the hyphenated word throws off the index (Jesus is the fifth word
21
+ # not the third or fourth). This prevents you from constructing
22
+ # an exact-phrase query for a hyphenated word:
23
+ #
24
+ # > select to_tsvector('hayfork', 'thou long-expected jesus');
25
+ # { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
26
+ #
27
+ #
28
+ # We'll coerce Postgres into treating hyphenated words as two words.
29
+ querystring.to_s.scan(/\w+/)
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,21 @@
1
+ Hayfork.maintain(<%= class_name %>) do
2
+
3
+ # Define triggers here.
4
+ # Learn more at https://github.com/boblail/hayfork#libhaystack_triggersrb
5
+ #
6
+ # Example:
7
+ #
8
+ # Hayfork.maintain(Haystack) do
9
+ #
10
+ # foreach(Employee) do
11
+ # insert(:full_name)
12
+ # end
13
+ #
14
+ # foreach(Project) do
15
+ # insert(:title)
16
+ # end
17
+ #
18
+ # end
19
+ #
20
+
21
+ end
data/lib/hayfork.rb ADDED
@@ -0,0 +1,35 @@
1
+ require "hayfork/errors"
2
+ require "hayfork/join"
3
+ require "hayfork/query_object"
4
+ require "hayfork/query_parser"
5
+ require "hayfork/trigger_builder"
6
+ require "hayfork/triggers"
7
+ require "hayfork/unaccent"
8
+ require "hayfork/version"
9
+
10
+ module Hayfork
11
+ extend Hayfork::Unaccent, Hayfork::Join
12
+
13
+ TEXT = "text".freeze
14
+ SEARCH_VECTOR = "search_vector".freeze
15
+ SEARCH_RESULT_TYPE = "search_result_type".freeze
16
+ SEARCH_RESULT_ID = "search_result_id".freeze
17
+ SOURCE_TYPE = "source_type".freeze
18
+ SOURCE_ID = "source_id".freeze
19
+ FIELD = "field".freeze
20
+
21
+ @default_weight = "C".freeze
22
+ @default_dictionary = "hayfork".freeze
23
+
24
+ class << self
25
+ attr_accessor :default_weight, :default_dictionary
26
+
27
+ def maintain(haystack, &block)
28
+ triggers = Triggers.new(haystack)
29
+ TriggerBuilder.new(triggers).instance_eval(&block)
30
+ haystack.singleton_class.send(:attr_reader, :triggers)
31
+ haystack.instance_variable_set :@triggers, triggers
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,67 @@
1
+ module Hayfork
2
+ class Binding < Struct.new(:statement, :column, :raw_value)
3
+
4
+ def to_sql
5
+ "#{quoted_value} \"#{column.name}\""
6
+ end
7
+ alias to_s to_sql
8
+
9
+ def column_name
10
+ column.name
11
+ end
12
+ alias key column_name
13
+
14
+ def quoted_value
15
+ case raw_value
16
+ when Arel::Attributes::Attribute
17
+ value_column = raw_value.relation.send(:type_caster).send(:types).columns_hash[raw_value.name.to_s]
18
+ fail Hayfork::ColumnNotFoundError, "'#{raw_value.name}' is not a column on '#{raw_value.relation.name}'" unless value_column
19
+
20
+ value = "#{raw_value.relation.name}.#{raw_value.name}"
21
+
22
+ unless column.sql_type == value_column.sql_type
23
+ type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
24
+ value = "#{value}::#{type}"
25
+ end
26
+
27
+ if statement.unnest? && [Hayfork::SEARCH_VECTOR, Hayfork::TEXT].member?(column.name)
28
+ value = "unnest(string_to_array(#{value}, E'\\n'))"
29
+ end
30
+
31
+ if column.type == :tsvector
32
+
33
+ # Postgres does not handle hyphens well.
34
+ #
35
+ # Notice how, in the following example, the way it breaks up
36
+ # those words throws off the index (Jesus is the fifth word
37
+ # not the third or fourth). This prevents you from constructing
38
+ # an exact-phrase query for a hyphenated word:
39
+ #
40
+ # > select to_tsvector('hayfork', 'thou long-expected jesus');
41
+ # { 'expect':4 'jesus':5 'long':3 'long-expect':2 'thou':1 }
42
+ #
43
+ #
44
+ # We'll coerce Postgres into treating hyphenated words as two words.
45
+
46
+ value = "setweight(to_tsvector('#{statement.dictionary}', replace(#{value}, '-', ' ')), '#{statement.weight}')"
47
+ end
48
+
49
+ value
50
+
51
+ when Arel::Nodes::Node
52
+ raw_value.to_sql
53
+
54
+ else
55
+ type = SPECIAL_CASTS.fetch(column.sql_type, column.sql_type)
56
+ "#{statement.haystack.connection.quote(raw_value)}::#{type}"
57
+
58
+ end
59
+ end
60
+
61
+ SPECIAL_CASTS = {
62
+ "character varying" => "varchar",
63
+ "tsvector" => "varchar"
64
+ }.freeze
65
+
66
+ end
67
+ end
@@ -0,0 +1,24 @@
1
+ module Hayfork
2
+ class DeleteSql
3
+ attr_reader :haystack, :relation, :bindings
4
+
5
+ def initialize(haystack, relation, bindings)
6
+ @haystack = haystack
7
+ @relation = relation
8
+ @bindings = bindings.reject { |binding| binding.key == Hayfork::SEARCH_VECTOR || binding.key == Hayfork::TEXT }
9
+ end
10
+
11
+ def to_sql
12
+ select_statement = relation.select(bindings.map(&:to_s))
13
+ select_statement = select_statement.from("(SELECT OLD.*) \"#{relation.table_name}\"")
14
+
15
+ constraints = bindings.map { |binding| "#{haystack.table_name}.#{binding.key}=x.#{binding.key}" }.join(" AND ")
16
+
17
+ <<~SQL
18
+ DELETE FROM #{haystack.table_name} USING (#{select_statement.to_sql}) "x" WHERE #{constraints};
19
+ SQL
20
+ end
21
+ alias to_s to_sql
22
+
23
+ end
24
+ end
@@ -0,0 +1,4 @@
1
+ module Hayfork
2
+ class AssociationNotFoundError < ArgumentError; end
3
+ class ColumnNotFoundError < ArgumentError; end
4
+ end
@@ -0,0 +1,22 @@
1
+ module Hayfork
2
+ class InsertSql
3
+ attr_reader :haystack, :relation, :bindings
4
+
5
+ def initialize(haystack, relation, bindings)
6
+ @haystack = haystack
7
+ @relation = relation
8
+ @bindings = bindings
9
+ end
10
+
11
+ def to_sql(from: true)
12
+ select_statement = relation.select(bindings.map(&:to_s))
13
+ select_statement = select_statement.from("(SELECT NEW.*) \"#{relation.table_name}\"") if from
14
+
15
+ <<~SQL
16
+ INSERT INTO #{haystack.table_name} (#{bindings.map(&:key).join(", ")}) SELECT * FROM (#{select_statement.to_sql}) "x" WHERE "x"."#{Hayfork::TEXT}" != '';
17
+ SQL
18
+ end
19
+ alias to_s to_sql
20
+
21
+ end
22
+ end
@@ -0,0 +1,32 @@
1
+ module Hayfork
2
+ module Join
3
+
4
+ def join(relation, join_value)
5
+ case join_value
6
+ when String
7
+ relation.joins(join_value)
8
+
9
+ when Symbol
10
+ reflection = reflection_for(relation.model, join_value)
11
+ case reflection.macro
12
+ when :has_many, :has_and_belongs_to_many
13
+ relation.left_outer_joins(join_value).where(reflection.klass.arel_table[:id].not_eq(nil))
14
+ when :belongs_to, :has_one
15
+ relation.joins(join_value)
16
+ else
17
+ fail NotImplementedError, "Joins haven't been implemented for #{reflection.macro.inspect} associations"
18
+ end
19
+
20
+ else
21
+ fail NotImplementedError, "Statement#joins does not yet accept #{join_value.class} params like #{join_value.inspect}. Will this scenario work with `has_many through:` or `has_one through:`?"
22
+ end
23
+ end
24
+
25
+ def reflection_for(model, association)
26
+ reflection = model.reflect_on_association(association)
27
+ fail AssociationNotFoundError, "Association ':#{association}' not found on '#{model}'" unless reflection
28
+ reflection
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,38 @@
1
+ module Hayfork
2
+ class QueryObject
3
+ attr_reader :phrases
4
+
5
+ def initialize(phrases)
6
+ @phrases = phrases
7
+ end
8
+
9
+ def against(set)
10
+ return set.none if phrases.none?
11
+ return phrases.first.apply(set) if phrases.one?
12
+
13
+ # The haystack may contain more than one hit per result.
14
+ #
15
+ # A composite query may match more than one hit for a result.
16
+ # For example, it may match both a hymn's author and its tune.
17
+ #
18
+ # If we search more than one phrase, we want to find all of
19
+ # the hits that match *_any_* of the search phrases but only
20
+ # return hits for results that match *_all_* of the phrases.
21
+ filter_hits_by_any_phrase(filter_results_that_match_all_phrases(set))
22
+ end
23
+
24
+ private
25
+
26
+ def filter_hits_by_any_phrase(set)
27
+ phrases[1..-1].inject(phrases.first.apply(set)) { |memo, phrase| memo.or(phrase.apply(set)) }
28
+ end
29
+
30
+ def filter_results_that_match_all_phrases(set)
31
+ scope = set.select(Hayfork::SEARCH_RESULT_ID)
32
+ set.where(
33
+ set.arel_table[Hayfork::SEARCH_RESULT_ID].in(
34
+ phrases[1..-1].inject(phrases.first.apply(scope).arel) { |memo, phrase| Arel::Nodes::Intersect.new(memo, phrase.apply(scope).arel) }))
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,35 @@
1
+ module Hayfork
2
+ class QueryParser
3
+ attr_reader :klass, :querystring
4
+
5
+ def initialize(klass, querystring)
6
+ @klass = klass
7
+ @querystring = querystring
8
+ end
9
+
10
+ def parse!
11
+ phrases = []
12
+ scanner = StringScanner.new(Hayfork.unaccent(querystring))
13
+
14
+ until scanner.eos?
15
+ parse_phrase(scanner.scan(/[^"]+/), phrases)
16
+ break if scanner.eos?
17
+
18
+ scanner.getch # "
19
+ parse_exact_phrase(scanner.scan(/[^"]+/), phrases)
20
+ scanner.getch # "
21
+ end
22
+
23
+ klass.new(phrases)
24
+ end
25
+
26
+ def parse_phrase(querystring, phrases)
27
+ raise NotImplementedError
28
+ end
29
+
30
+ def parse_exact_phrase(querystring, phrases)
31
+ raise NotImplementedError
32
+ end
33
+
34
+ end
35
+ end