tantiny 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module Tantiny
6
+ class Query
7
+ TYPES = %i[
8
+ all empty term fuzzy_term
9
+ phrase regex range facet
10
+ smart prefix
11
+ ].freeze
12
+
13
+ DEFAULT_BOOST = 1.0
14
+ DEFAULT_FUZZY_DISTANCE = 1
15
+
16
+ class << self
17
+ def conjunction(*queries)
18
+ # @type var queries: Array[untyped]
19
+ queries.one? ? queries.first : __conjunction(queries)
20
+ end
21
+
22
+ def disjunction(*queries)
23
+ # @type var queries: Array[untyped]
24
+ queries.one? ? queries.first : __disjunction(queries)
25
+ end
26
+
27
+ def all_query(_index = nil)
28
+ __new_all_query
29
+ end
30
+
31
+ def empty_query(_index = nil)
32
+ __new_empty_query
33
+ end
34
+
35
+ def term_query(index, fields, term, **options)
36
+ allowed_fields = text_and_strings(index)
37
+ construct_query(index, :term, allowed_fields, fields, [term.to_s], **options)
38
+ end
39
+
40
+ def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **options)
41
+ params = [term.to_s, distance.to_i]
42
+ allowed_fields = text_and_strings(index)
43
+ construct_query(index, :fuzzy_term, allowed_fields, fields, params, **options)
44
+ end
45
+
46
+ def phrase_query(index, fields, phrase, **options)
47
+ queries = [*fields].map do |f|
48
+ terms = index.schema.tokenizer_for(f).terms(phrase)
49
+ allowed_fields = index.schema.text_fields
50
+ construct_query(index, :phrase, allowed_fields, f, [terms], **options)
51
+ end
52
+
53
+ queries.empty? ? empty_query : disjunction(*queries)
54
+ end
55
+
56
+ def regex_query(index, fields, regex, **options)
57
+ allowed_fields = text_and_strings(index)
58
+ construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **options)
59
+ end
60
+
61
+ def prefix_query(index, fields, prefix, **options)
62
+ regex_query(index, fields, Regexp.escape(prefix) + ".*", **options)
63
+ end
64
+
65
+ def range_query(index, fields, range, **options)
66
+ schema = index.schema
67
+
68
+ case range.first
69
+ when Integer
70
+ allowed_fields = schema.integer_fields
71
+ from, to = [range.min, range.max]
72
+ when Float
73
+ allowed_fields = schema.double_fields
74
+ from, to = [range.first, range.last]
75
+ when Date, DateTime
76
+ # @type var range: Range[Date | DateTime]
77
+ allowed_fields = schema.date_fields
78
+ from, to = [Helpers.timestamp(range.first), Helpers.timestamp(range.last)]
79
+ else
80
+ raise UnsupportedRange.new(range.first.class)
81
+ end
82
+
83
+ # @type var allowed_fields: Array[Symbol]
84
+ construct_query(index, :range, allowed_fields, fields, [from, to], **options)
85
+ end
86
+
87
+ def facet_query(index, field, path, **options)
88
+ allowed_fields = index.schema.facet_fields
89
+ construct_query(index, :facet, allowed_fields, field, [path], **options)
90
+ end
91
+
92
+ def smart_query(index, fields, query_string, **options)
93
+ fuzzy_distance = options[:fuzzy_distance]
94
+ boost_factor = options.fetch(:boost, DEFAULT_BOOST)
95
+
96
+ field_queries = [*fields].map do |field|
97
+ terms = index.schema.tokenizer_for(field).terms(query_string)
98
+
99
+ # See: https://github.com/soutaro/steep/issues/272
100
+ # @type block: nil | Query
101
+ next if terms.empty?
102
+
103
+ term_queries = terms.map do |term|
104
+ if fuzzy_distance.nil?
105
+ term_query(index, field, term)
106
+ else
107
+ fuzzy_term_query(index, field, term, fuzzy_distance)
108
+ end
109
+ end
110
+
111
+ # @type var terms: untyped
112
+ # @type var term_queries: untyped
113
+ last_term_query = prefix_query(index, field, terms.last) | term_queries.last
114
+
115
+ conjunction(last_term_query, *term_queries[0...-1])
116
+ end.compact
117
+
118
+ disjunction(*field_queries).boost(boost_factor)
119
+ end
120
+
121
+ private
122
+
123
+ # Can't use variadic argument `params` here due to:
124
+ # https://github.com/soutaro/steep/issues/480
125
+ def construct_query(index, query_type, allowed_fields, fields, params, **options)
126
+ queries = [*fields].map do |field|
127
+ supported = allowed_fields.include?(field)
128
+ raise UnsupportedField.new(field) unless supported
129
+
130
+ send("__new_#{query_type}_query", index, field.to_s, *params)
131
+ end
132
+
133
+ return empty_query if fields.empty?
134
+
135
+ disjunction(*queries).boost(options.fetch(:boost, DEFAULT_BOOST))
136
+ end
137
+
138
+ def text_and_strings(index)
139
+ index.schema.text_fields | index.schema.string_fields
140
+ end
141
+ end
142
+
143
+ def |(other)
144
+ raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
145
+
146
+ self.class.disjunction(self, other)
147
+ end
148
+
149
+ def &(other)
150
+ raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
151
+
152
+ self.class.conjunction(self, other)
153
+ end
154
+
155
+ def !
156
+ __negation
157
+ end
158
+
159
+ def boost(boost_factor)
160
+ return self if boost_factor == DEFAULT_BOOST
161
+
162
+ __boost(boost_factor.to_f)
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ class Schema
5
+ attr_reader :default_tokenizer,
6
+ :id_field,
7
+ :text_fields,
8
+ :string_fields,
9
+ :integer_fields,
10
+ :double_fields,
11
+ :date_fields,
12
+ :facet_fields,
13
+ :field_tokenizers
14
+
15
+ def initialize(tokenizer, &block)
16
+ @default_tokenizer = tokenizer
17
+ @id_field = :id
18
+ @text_fields = []
19
+ @string_fields = []
20
+ @integer_fields = []
21
+ @double_fields = []
22
+ @date_fields = []
23
+ @facet_fields = []
24
+ @field_tokenizers = {}
25
+
26
+ instance_exec(&block)
27
+ end
28
+
29
+ def tokenizer_for(field)
30
+ field_tokenizers[field] || default_tokenizer
31
+ end
32
+
33
+ private
34
+
35
+ def id(key) = @id_field = key
36
+
37
+ def string(key) = @string_fields << key
38
+
39
+ def integer(key) = @integer_fields << key
40
+
41
+ def double(key) = @double_fields << key
42
+
43
+ def date(key) = @date_fields << key
44
+
45
+ def facet(key) = @facet_fields << key
46
+
47
+ def text(key, tokenizer: nil)
48
+ @field_tokenizers[key] = tokenizer if tokenizer
49
+
50
+ @text_fields << key
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ class Tokenizer
5
+ def self.default
6
+ new(:simple)
7
+ end
8
+
9
+ def self.new(kind, **options)
10
+ case kind
11
+ when :simple
12
+ __new_simple_tokenizer
13
+ when :stemmer
14
+ language = options[:language] || :en
15
+ __new_stemmer_tokenizer(language.to_s)
16
+ when :ngram
17
+ prefix_only = options.fetch(:prefix_only, false)
18
+ __new_ngram_tokenizer(options[:min], options[:max], prefix_only)
19
+ else
20
+ raise UnknownTokenizer.new(kind)
21
+ end
22
+ end
23
+
24
+ def terms(string)
25
+ __extract_terms(string)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ VERSION = "0.2.2" # {x-release-please-version}
5
+ end
data/lib/tantiny.rb ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby-next/language/setup"
4
+ RubyNext::Language.setup_gem_load_path
5
+
6
+ require "rutie"
7
+
8
+ require "tantiny/version"
9
+ require "tantiny/errors"
10
+ require "tantiny/helpers"
11
+ require "tantiny/schema"
12
+ require "tantiny/tokenizer"
13
+ require "tantiny/query"
14
+ require "tantiny/index"
15
+
16
+ require_relative "tantiny.so"
17
+
18
+ module Tantiny
19
+ end
data/lib/tantiny.so ADDED
Binary file
@@ -0,0 +1,20 @@
1
+ module Tantiny
2
+ class TantivyError < StandardError
3
+ end
4
+
5
+ class UnknownField < StandardError
6
+ def initialize: () -> void
7
+ end
8
+
9
+ class UnknownTokenizer < StandardError
10
+ def initialize: (Symbol tokenizer_type) -> void
11
+ end
12
+
13
+ class UnsupportedRange < StandardError
14
+ def initialize: (Class range_type) -> void
15
+ end
16
+
17
+ class UnsupportedField < StandardError
18
+ def initialize: (Symbol field) -> void
19
+ end
20
+ end
@@ -0,0 +1,6 @@
1
+
2
+ module Tantiny
3
+ module Helpers
4
+ def self.timestamp: ((Date | DateTime) date) -> String
5
+ end
6
+ end
@@ -0,0 +1,82 @@
1
+ module Tantiny
2
+ class Index
3
+ DEFAULT_INDEX_SIZE: Integer
4
+ DEFAULT_LIMIT: Integer
5
+
6
+ def self.new: (
7
+ String path,
8
+ **untyped options
9
+ ) { (*untyped) -> void } -> Index
10
+
11
+ def self.__new: (
12
+ String path,
13
+ Integer index_size,
14
+ Tokenizer default_tokenizer,
15
+ Hash[String, Tokenizer] field_tokenizers,
16
+ Array[String] text_fields,
17
+ Array[String] string_fields,
18
+ Array[String] integer_fields,
19
+ Array[String] double_fields,
20
+ Array[String] date_fields,
21
+ Array[String] facet_fields
22
+ ) -> Index
23
+
24
+ attr_reader schema: Schema
25
+
26
+ def commit: () -> void
27
+ def reload: () -> void
28
+ def <<: (untyped document) -> void
29
+ def delete: (String id) -> void
30
+
31
+ def search: (
32
+ (Query | String) query,
33
+ ?limit: Integer,
34
+ **untyped smart_query_options
35
+ ) -> Array[String]
36
+
37
+ def all_query: () -> Query
38
+ def empty_query: () -> Query
39
+ def term_query: (fields fields, String term, **untyped options) -> Query
40
+ def fuzzy_term_query: (fields fields, String term, ?Integer distance, **untyped options) -> Query
41
+ def phrase_query: (fields fields, String phrase, **untyped options) -> Query
42
+ def regex_query: (fields fields, String regex, **untyped options) -> Query
43
+ def prefix_query: (fields fields, String prefix, **untyped options) -> Query
44
+ def facet_query: (Symbol field, String path, **untyped options) -> Query
45
+ def range_query: (fields fields, Range[numeric | date] range, **untyped options) -> Query
46
+ def smart_query: (fields fields, String query_string, **untyped options) -> Query
47
+
48
+ def __commit: () -> void
49
+ def __reload: () -> void
50
+
51
+ def __add_document: (
52
+ String id,
53
+ Hash[String, String] text_fields,
54
+ Hash[String, String] string_fields,
55
+ Hash[String, Integer] integer_fields,
56
+ Hash[String, Float] double_fields,
57
+ Hash[String, String] date_fields,
58
+ Hash[String, String] facet_fields
59
+ ) -> void
60
+
61
+ def __delete_document: (String id) -> void
62
+
63
+ def __search: (Query query, Integer limit) -> Array[String]
64
+
65
+ private
66
+
67
+ attr_writer schema: Schema
68
+
69
+ def slice_document: (
70
+ untyped document,
71
+ Array[Symbol] fields
72
+ ) { (untyped v) -> untyped } -> Hash[String, untyped]
73
+
74
+ def default_search: (
75
+ String query_string,
76
+ Integer limit,
77
+ ?fuzzy_distance: Integer
78
+ ) -> Array[String]
79
+
80
+ def resolve: (untyped document, Symbol field) -> untyped
81
+ end
82
+ end
@@ -0,0 +1,135 @@
1
+ module Tantiny
2
+ class Query
3
+ TYPES: Array[Symbol]
4
+ DEFAULT_BOOST: Float
5
+ DEFAULT_FUZZY_DISTANCE: Integer
6
+
7
+ def self.disjunction: (*Query queries) -> Query
8
+ def self.conjunction: (*Query queries) -> Query
9
+
10
+ def self.all_query: (?Index _index) -> Query
11
+ def self.empty_query: (?Index _index) -> Query
12
+
13
+ def self.term_query: (
14
+ Index index,
15
+ fields fields,
16
+ String term,
17
+ **untyped options
18
+ ) -> Query
19
+
20
+ def self.fuzzy_term_query: (
21
+ Index index,
22
+ fields fields,
23
+ String term,
24
+ ?Integer distance,
25
+ **untyped options
26
+ ) -> Query
27
+
28
+ def self.phrase_query: (
29
+ Index index,
30
+ fields fields,
31
+ String phrase,
32
+ **untyped options
33
+ ) -> Query
34
+
35
+ def self.regex_query: (
36
+ Index index,
37
+ fields fields,
38
+ String regex,
39
+ **untyped options
40
+ ) -> Query
41
+
42
+ def self.prefix_query: (
43
+ Index index,
44
+ fields fields,
45
+ String prefix,
46
+ **untyped options
47
+ ) -> Query
48
+
49
+ def self.facet_query: (
50
+ Index index,
51
+ Symbol field,
52
+ String path,
53
+ **untyped options
54
+ ) -> Query
55
+
56
+ def self.range_query: (
57
+ Index index,
58
+ fields fields,
59
+ Range[numeric | date] range,
60
+ **untyped options
61
+ ) -> Query
62
+
63
+ def self.smart_query: (
64
+ Index index,
65
+ fields fields,
66
+ String query_string,
67
+ **untyped options
68
+ ) -> Query
69
+
70
+ def self.__new_all_query: () -> Query
71
+ def self.__new_empty_query: () -> Query
72
+
73
+ def self.__new_term_query: (
74
+ Index index,
75
+ String field,
76
+ String term
77
+ ) -> Query
78
+
79
+ def self.__new_fuzzy_term_query: (
80
+ Index index,
81
+ String field,
82
+ String term,
83
+ Integer distance
84
+ ) -> Query
85
+
86
+ def self.__new_regex_query: (
87
+ Index index,
88
+ String field,
89
+ String regex
90
+ ) -> Query
91
+
92
+ def self.__new_range_query: (
93
+ Index index,
94
+ String field,
95
+ untyped from,
96
+ untyped to
97
+ ) -> Query
98
+
99
+ def self.__new_phrase_query: (
100
+ Index index,
101
+ String field,
102
+ Array[String] terms
103
+ ) -> Query
104
+
105
+ def self.__new_facet_query: (
106
+ Index index,
107
+ String field,
108
+ String path
109
+ ) -> Query
110
+
111
+ def self.__disjunction: (Array[Query] queries) -> Query
112
+ def self.__conjunction: (Array[Query] queries) -> Query
113
+
114
+ def |: (Query query) -> Query
115
+ def &: (Query query) -> Query
116
+ def !: () -> Query
117
+ def boost: (numeric boost_factor) -> Query
118
+
119
+ def __negation: () -> Query
120
+ def __boost: (Float boost_factor) -> Query
121
+
122
+ private
123
+
124
+ def self.construct_query: (
125
+ Index index,
126
+ Symbol query_type,
127
+ Array[Symbol] allowed_fields,
128
+ fields fields,
129
+ Array[untyped] params,
130
+ **untyped options
131
+ ) -> Query
132
+
133
+ def self.text_and_strings: (Index index) -> Array[Symbol]
134
+ end
135
+ end
@@ -0,0 +1,26 @@
1
+ module Tantiny
2
+ class Schema
3
+ attr_reader default_tokenizer: Tokenizer
4
+ attr_reader id_field: Symbol
5
+ attr_reader text_fields: Array[Symbol]
6
+ attr_reader string_fields: Array[Symbol]
7
+ attr_reader integer_fields: Array[Symbol]
8
+ attr_reader double_fields: Array[Symbol]
9
+ attr_reader date_fields: Array[Symbol]
10
+ attr_reader facet_fields: Array[Symbol]
11
+ attr_reader field_tokenizers: Hash[Symbol, Tokenizer]
12
+
13
+ def initialize: (Tokenizer tokenizer) { (*untyped) -> void } -> void
14
+ def tokenizer_for: (Symbol field) -> Tokenizer
15
+
16
+ private
17
+
18
+ def id: (Symbol key) -> void
19
+ def text: (Symbol key, ?tokenizer: Tokenizer) -> void
20
+ def string: (Symbol key) -> void
21
+ def integer: (Symbol key) -> void
22
+ def double: (Symbol key) -> void
23
+ def date: (Symbol key) -> void
24
+ def facet: (Symbol key) -> void
25
+ end
26
+ end
@@ -0,0 +1,25 @@
1
+ module Tantiny
2
+ class Tokenizer
3
+ def self.default: () -> Tokenizer
4
+
5
+ def self.new: (Symbol kind, **untyped options) -> Tokenizer
6
+
7
+ def self.__new_ngram_tokenizer: (
8
+ Integer min,
9
+ Integer max,
10
+ bool prefix_only
11
+ ) -> Tokenizer
12
+
13
+ def self.__new_stemmer_tokenizer: (
14
+ String locale_code
15
+ ) -> Tokenizer
16
+
17
+ def self.__new_simple_tokenizer: () -> Tokenizer
18
+
19
+ public
20
+
21
+ def terms: (String string) -> Array[String]
22
+
23
+ def __extract_terms: (String string) -> Array[String]
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Tantiny
2
+ VERSION: String
3
+ end
data/sig/tantiny.rbs ADDED
@@ -0,0 +1,5 @@
1
+ module Tantiny
2
+ type date = Date | DateTime
3
+ type numeric = Integer | Float
4
+ type fields = Array[Symbol] | Symbol
5
+ end