tantiny 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module Tantiny
6
+ class Query
7
+ TYPES = %i[
8
+ all empty term fuzzy_term
9
+ phrase regex range facet
10
+ smart prefix
11
+ ].freeze
12
+
13
+ DEFAULT_BOOST = 1.0
14
+ DEFAULT_FUZZY_DISTANCE = 1
15
+
16
+ class << self
17
+ def conjunction(*queries)
18
+ # @type var queries: Array[untyped]
19
+ queries.one? ? queries.first : __conjunction(queries)
20
+ end
21
+
22
+ def disjunction(*queries)
23
+ # @type var queries: Array[untyped]
24
+ queries.one? ? queries.first : __disjunction(queries)
25
+ end
26
+
27
+ def all_query(_index = nil)
28
+ __new_all_query
29
+ end
30
+
31
+ def empty_query(_index = nil)
32
+ __new_empty_query
33
+ end
34
+
35
+ def term_query(index, fields, term, **options)
36
+ allowed_fields = text_and_strings(index)
37
+ construct_query(index, :term, allowed_fields, fields, [term.to_s], **options)
38
+ end
39
+
40
+ def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **options)
41
+ params = [term.to_s, distance.to_i]
42
+ allowed_fields = text_and_strings(index)
43
+ construct_query(index, :fuzzy_term, allowed_fields, fields, params, **options)
44
+ end
45
+
46
+ def phrase_query(index, fields, phrase, **options)
47
+ queries = [*fields].map do |f|
48
+ terms = index.schema.tokenizer_for(f).terms(phrase)
49
+ allowed_fields = index.schema.text_fields
50
+ construct_query(index, :phrase, allowed_fields, f, [terms], **options)
51
+ end
52
+
53
+ queries.empty? ? empty_query : disjunction(*queries)
54
+ end
55
+
56
+ def regex_query(index, fields, regex, **options)
57
+ allowed_fields = text_and_strings(index)
58
+ construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **options)
59
+ end
60
+
61
+ def prefix_query(index, fields, prefix, **options)
62
+ regex_query(index, fields, Regexp.escape(prefix) + ".*", **options)
63
+ end
64
+
65
+ def range_query(index, fields, range, **options)
66
+ schema = index.schema
67
+
68
+ case range.first
69
+ when Integer
70
+ allowed_fields = schema.integer_fields
71
+ from, to = [range.min, range.max]
72
+ when Float
73
+ allowed_fields = schema.double_fields
74
+ from, to = [range.first, range.last]
75
+ when Date, DateTime
76
+ # @type var range: Range[Date | DateTime]
77
+ allowed_fields = schema.date_fields
78
+ from, to = [Helpers.timestamp(range.first), Helpers.timestamp(range.last)]
79
+ else
80
+ raise UnsupportedRange.new(range.first.class)
81
+ end
82
+
83
+ # @type var allowed_fields: Array[Symbol]
84
+ construct_query(index, :range, allowed_fields, fields, [from, to], **options)
85
+ end
86
+
87
+ def facet_query(index, field, path, **options)
88
+ allowed_fields = index.schema.facet_fields
89
+ construct_query(index, :facet, allowed_fields, field, [path], **options)
90
+ end
91
+
92
+ def smart_query(index, fields, query_string, **options)
93
+ fuzzy_distance = options[:fuzzy_distance]
94
+ boost_factor = options.fetch(:boost, DEFAULT_BOOST)
95
+
96
+ field_queries = [*fields].map do |field|
97
+ terms = index.schema.tokenizer_for(field).terms(query_string)
98
+
99
+ # See: https://github.com/soutaro/steep/issues/272
100
+ # @type block: nil | Query
101
+ next if terms.empty?
102
+
103
+ term_queries = terms.map do |term|
104
+ if fuzzy_distance.nil?
105
+ term_query(index, field, term)
106
+ else
107
+ fuzzy_term_query(index, field, term, fuzzy_distance)
108
+ end
109
+ end
110
+
111
+ # @type var terms: untyped
112
+ # @type var term_queries: untyped
113
+ last_term_query = prefix_query(index, field, terms.last) | term_queries.last
114
+
115
+ conjunction(last_term_query, *term_queries[0...-1])
116
+ end.compact
117
+
118
+ disjunction(*field_queries).boost(boost_factor)
119
+ end
120
+
121
+ private
122
+
123
+ # Can't use variadic argument `params` here due to:
124
+ # https://github.com/soutaro/steep/issues/480
125
+ def construct_query(index, query_type, allowed_fields, fields, params, **options)
126
+ queries = [*fields].map do |field|
127
+ supported = allowed_fields.include?(field)
128
+ raise UnsupportedField.new(field) unless supported
129
+
130
+ send("__new_#{query_type}_query", index, field.to_s, *params)
131
+ end
132
+
133
+ return empty_query if fields.empty?
134
+
135
+ disjunction(*queries).boost(options.fetch(:boost, DEFAULT_BOOST))
136
+ end
137
+
138
+ def text_and_strings(index)
139
+ index.schema.text_fields | index.schema.string_fields
140
+ end
141
+ end
142
+
143
+ def |(other)
144
+ raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
145
+
146
+ self.class.disjunction(self, other)
147
+ end
148
+
149
+ def &(other)
150
+ raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
151
+
152
+ self.class.conjunction(self, other)
153
+ end
154
+
155
+ def !
156
+ __negation
157
+ end
158
+
159
+ def boost(boost_factor)
160
+ return self if boost_factor == DEFAULT_BOOST
161
+
162
+ __boost(boost_factor.to_f)
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ class Schema
5
+ attr_reader :default_tokenizer,
6
+ :id_field,
7
+ :text_fields,
8
+ :string_fields,
9
+ :integer_fields,
10
+ :double_fields,
11
+ :date_fields,
12
+ :facet_fields,
13
+ :field_tokenizers
14
+
15
+ def initialize(tokenizer, &block)
16
+ @default_tokenizer = tokenizer
17
+ @id_field = :id
18
+ @text_fields = []
19
+ @string_fields = []
20
+ @integer_fields = []
21
+ @double_fields = []
22
+ @date_fields = []
23
+ @facet_fields = []
24
+ @field_tokenizers = {}
25
+
26
+ instance_exec(&block)
27
+ end
28
+
29
+ def tokenizer_for(field)
30
+ field_tokenizers[field] || default_tokenizer
31
+ end
32
+
33
+ private
34
+
35
+ def id(key) = @id_field = key
36
+
37
+ def string(key) = @string_fields << key
38
+
39
+ def integer(key) = @integer_fields << key
40
+
41
+ def double(key) = @double_fields << key
42
+
43
+ def date(key) = @date_fields << key
44
+
45
+ def facet(key) = @facet_fields << key
46
+
47
+ def text(key, tokenizer: nil)
48
+ @field_tokenizers[key] = tokenizer if tokenizer
49
+
50
+ @text_fields << key
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ class Tokenizer
5
+ def self.default
6
+ new(:simple)
7
+ end
8
+
9
+ def self.new(kind, **options)
10
+ case kind
11
+ when :simple
12
+ __new_simple_tokenizer
13
+ when :stemmer
14
+ language = options[:language] || :en
15
+ __new_stemmer_tokenizer(language.to_s)
16
+ when :ngram
17
+ prefix_only = options.fetch(:prefix_only, false)
18
+ __new_ngram_tokenizer(options[:min], options[:max], prefix_only)
19
+ else
20
+ raise UnknownTokenizer.new(kind)
21
+ end
22
+ end
23
+
24
+ def terms(string)
25
+ __extract_terms(string)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tantiny
4
+ VERSION = "0.2.2" # {x-release-please-version}
5
+ end
data/lib/tantiny.rb ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby-next/language/setup"
4
+ RubyNext::Language.setup_gem_load_path
5
+
6
+ require "rutie"
7
+
8
+ require "tantiny/version"
9
+ require "tantiny/errors"
10
+ require "tantiny/helpers"
11
+ require "tantiny/schema"
12
+ require "tantiny/tokenizer"
13
+ require "tantiny/query"
14
+ require "tantiny/index"
15
+
16
+ require_relative "tantiny.so"
17
+
18
+ module Tantiny
19
+ end
data/lib/tantiny.so ADDED
Binary file
@@ -0,0 +1,20 @@
1
+ module Tantiny
2
+ class TantivyError < StandardError
3
+ end
4
+
5
+ class UnknownField < StandardError
6
+ def initialize: () -> void
7
+ end
8
+
9
+ class UnknownTokenizer < StandardError
10
+ def initialize: (Symbol tokenizer_type) -> void
11
+ end
12
+
13
+ class UnsupportedRange < StandardError
14
+ def initialize: (Class range_type) -> void
15
+ end
16
+
17
+ class UnsupportedField < StandardError
18
+ def initialize: (Symbol field) -> void
19
+ end
20
+ end
@@ -0,0 +1,6 @@
1
+
2
+ module Tantiny
3
+ module Helpers
4
+ def self.timestamp: ((Date | DateTime) date) -> String
5
+ end
6
+ end
@@ -0,0 +1,82 @@
1
+ module Tantiny
2
+ class Index
3
+ DEFAULT_INDEX_SIZE: Integer
4
+ DEFAULT_LIMIT: Integer
5
+
6
+ def self.new: (
7
+ String path,
8
+ **untyped options
9
+ ) { (*untyped) -> void } -> Index
10
+
11
+ def self.__new: (
12
+ String path,
13
+ Integer index_size,
14
+ Tokenizer default_tokenizer,
15
+ Hash[String, Tokenizer] field_tokenizers,
16
+ Array[String] text_fields,
17
+ Array[String] string_fields,
18
+ Array[String] integer_fields,
19
+ Array[String] double_fields,
20
+ Array[String] date_fields,
21
+ Array[String] facet_fields
22
+ ) -> Index
23
+
24
+ attr_reader schema: Schema
25
+
26
+ def commit: () -> void
27
+ def reload: () -> void
28
+ def <<: (untyped document) -> void
29
+ def delete: (String id) -> void
30
+
31
+ def search: (
32
+ (Query | String) query,
33
+ ?limit: Integer,
34
+ **untyped smart_query_options
35
+ ) -> Array[String]
36
+
37
+ def all_query: () -> Query
38
+ def empty_query: () -> Query
39
+ def term_query: (fields fields, String term, **untyped options) -> Query
40
+ def fuzzy_term_query: (fields fields, String term, ?Integer distance, **untyped options) -> Query
41
+ def phrase_query: (fields fields, String phrase, **untyped options) -> Query
42
+ def regex_query: (fields fields, String regex, **untyped options) -> Query
43
+ def prefix_query: (fields fields, String prefix, **untyped options) -> Query
44
+ def facet_query: (Symbol field, String path, **untyped options) -> Query
45
+ def range_query: (fields fields, Range[numeric | date] range, **untyped options) -> Query
46
+ def smart_query: (fields fields, String query_string, **untyped options) -> Query
47
+
48
+ def __commit: () -> void
49
+ def __reload: () -> void
50
+
51
+ def __add_document: (
52
+ String id,
53
+ Hash[String, String] text_fields,
54
+ Hash[String, String] string_fields,
55
+ Hash[String, Integer] integer_fields,
56
+ Hash[String, Float] double_fields,
57
+ Hash[String, String] date_fields,
58
+ Hash[String, String] facet_fields
59
+ ) -> void
60
+
61
+ def __delete_document: (String id) -> void
62
+
63
+ def __search: (Query query, Integer limit) -> Array[String]
64
+
65
+ private
66
+
67
+ attr_writer schema: Schema
68
+
69
+ def slice_document: (
70
+ untyped document,
71
+ Array[Symbol] fields
72
+ ) { (untyped v) -> untyped } -> Hash[String, untyped]
73
+
74
+ def default_search: (
75
+ String query_string,
76
+ Integer limit,
77
+ ?fuzzy_distance: Integer
78
+ ) -> Array[String]
79
+
80
+ def resolve: (untyped document, Symbol field) -> untyped
81
+ end
82
+ end
@@ -0,0 +1,135 @@
1
+ module Tantiny
2
+ class Query
3
+ TYPES: Array[Symbol]
4
+ DEFAULT_BOOST: Float
5
+ DEFAULT_FUZZY_DISTANCE: Integer
6
+
7
+ def self.disjunction: (*Query queries) -> Query
8
+ def self.conjunction: (*Query queries) -> Query
9
+
10
+ def self.all_query: (?Index _index) -> Query
11
+ def self.empty_query: (?Index _index) -> Query
12
+
13
+ def self.term_query: (
14
+ Index index,
15
+ fields fields,
16
+ String term,
17
+ **untyped options
18
+ ) -> Query
19
+
20
+ def self.fuzzy_term_query: (
21
+ Index index,
22
+ fields fields,
23
+ String term,
24
+ ?Integer distance,
25
+ **untyped options
26
+ ) -> Query
27
+
28
+ def self.phrase_query: (
29
+ Index index,
30
+ fields fields,
31
+ String phrase,
32
+ **untyped options
33
+ ) -> Query
34
+
35
+ def self.regex_query: (
36
+ Index index,
37
+ fields fields,
38
+ String regex,
39
+ **untyped options
40
+ ) -> Query
41
+
42
+ def self.prefix_query: (
43
+ Index index,
44
+ fields fields,
45
+ String prefix,
46
+ **untyped options
47
+ ) -> Query
48
+
49
+ def self.facet_query: (
50
+ Index index,
51
+ Symbol field,
52
+ String path,
53
+ **untyped options
54
+ ) -> Query
55
+
56
+ def self.range_query: (
57
+ Index index,
58
+ fields fields,
59
+ Range[numeric | date] range,
60
+ **untyped options
61
+ ) -> Query
62
+
63
+ def self.smart_query: (
64
+ Index index,
65
+ fields fields,
66
+ String query_string,
67
+ **untyped options
68
+ ) -> Query
69
+
70
+ def self.__new_all_query: () -> Query
71
+ def self.__new_empty_query: () -> Query
72
+
73
+ def self.__new_term_query: (
74
+ Index index,
75
+ String field,
76
+ String term
77
+ ) -> Query
78
+
79
+ def self.__new_fuzzy_term_query: (
80
+ Index index,
81
+ String field,
82
+ String term,
83
+ Integer distance
84
+ ) -> Query
85
+
86
+ def self.__new_regex_query: (
87
+ Index index,
88
+ String field,
89
+ String regex
90
+ ) -> Query
91
+
92
+ def self.__new_range_query: (
93
+ Index index,
94
+ String field,
95
+ untyped from,
96
+ untyped to
97
+ ) -> Query
98
+
99
+ def self.__new_phrase_query: (
100
+ Index index,
101
+ String field,
102
+ Array[String] terms
103
+ ) -> Query
104
+
105
+ def self.__new_facet_query: (
106
+ Index index,
107
+ String field,
108
+ String path
109
+ ) -> Query
110
+
111
+ def self.__disjunction: (Array[Query] queries) -> Query
112
+ def self.__conjunction: (Array[Query] queries) -> Query
113
+
114
+ def |: (Query query) -> Query
115
+ def &: (Query query) -> Query
116
+ def !: () -> Query
117
+ def boost: (numeric boost_factor) -> Query
118
+
119
+ def __negation: () -> Query
120
+ def __boost: (Float boost_factor) -> Query
121
+
122
+ private
123
+
124
+ def self.construct_query: (
125
+ Index index,
126
+ Symbol query_type,
127
+ Array[Symbol] allowed_fields,
128
+ fields fields,
129
+ Array[untyped] params,
130
+ **untyped options
131
+ ) -> Query
132
+
133
+ def self.text_and_strings: (Index index) -> Array[Symbol]
134
+ end
135
+ end
@@ -0,0 +1,26 @@
1
+ module Tantiny
2
+ class Schema
3
+ attr_reader default_tokenizer: Tokenizer
4
+ attr_reader id_field: Symbol
5
+ attr_reader text_fields: Array[Symbol]
6
+ attr_reader string_fields: Array[Symbol]
7
+ attr_reader integer_fields: Array[Symbol]
8
+ attr_reader double_fields: Array[Symbol]
9
+ attr_reader date_fields: Array[Symbol]
10
+ attr_reader facet_fields: Array[Symbol]
11
+ attr_reader field_tokenizers: Hash[Symbol, Tokenizer]
12
+
13
+ def initialize: (Tokenizer tokenizer) { (*untyped) -> void } -> void
14
+ def tokenizer_for: (Symbol field) -> Tokenizer
15
+
16
+ private
17
+
18
+ def id: (Symbol key) -> void
19
+ def text: (Symbol key, ?tokenizer: Tokenizer) -> void
20
+ def string: (Symbol key) -> void
21
+ def integer: (Symbol key) -> void
22
+ def double: (Symbol key) -> void
23
+ def date: (Symbol key) -> void
24
+ def facet: (Symbol key) -> void
25
+ end
26
+ end
@@ -0,0 +1,25 @@
1
+ module Tantiny
2
+ class Tokenizer
3
+ def self.default: () -> Tokenizer
4
+
5
+ def self.new: (Symbol kind, **untyped options) -> Tokenizer
6
+
7
+ def self.__new_ngram_tokenizer: (
8
+ Integer min,
9
+ Integer max,
10
+ bool prefix_only
11
+ ) -> Tokenizer
12
+
13
+ def self.__new_stemmer_tokenizer: (
14
+ String locale_code
15
+ ) -> Tokenizer
16
+
17
+ def self.__new_simple_tokenizer: () -> Tokenizer
18
+
19
+ public
20
+
21
+ def terms: (String string) -> Array[String]
22
+
23
+ def __extract_terms: (String string) -> Array[String]
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Tantiny
2
+ VERSION: String
3
+ end
data/sig/tantiny.rbs ADDED
@@ -0,0 +1,5 @@
1
+ module Tantiny
2
+ type date = Date | DateTime
3
+ type numeric = Integer | Float
4
+ type fields = Array[Symbol] | Symbol
5
+ end