typesensual 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/schema'
4
+ require 'typesensual/state_helpers'
5
+ require 'typesensual/callbacks' if defined?(ActiveRecord)
6
+
7
+ class Typesensual
8
+ class Collection
9
+ include StateHelpers
10
+
11
+ # The pattern we use for collection names, `name:env@version`, where `name`
12
+ # is the name of the index, `env` is the environment, and `version` is the
13
+ # timestamp of the collection's creation. If a name doesn't follow this
14
+ # pattern, `name` collects everything.
15
+ COLLECTION_NAME_PATTERN = /^
16
+ (?<name>.*?) # the name of the collection cannot include : or @
17
+ (?::(?<env>.*?))? # the env is optional, but also cannot include : or @
18
+ (?:@(?<version>\d+))? # the version is also optional but must be an integer
19
+ $/x.freeze
20
+
21
+ include StateHelpers
22
+
23
+ # @overload initialize(collection)
24
+ # Initialize a new collection from a Typesense collection hash
25
+ #
26
+ # @param collection [Hash] the Typesense collection hash
27
+ # * `created_at` [Integer] the timestamp of the collection's creation
28
+ # * `default_sorting_field` [String] the default sorting field
29
+ # * `enable_nested_fields` [Boolean] whether nested fields are enabled
30
+ # * `fields` [Array<Hash>] the fields in the collection
31
+ # * `name` [String] the name of the collection
32
+ # * `num_documents` [Integer] the number of documents in the collection
33
+ # * `symbols_to_index` [String] the symbols to index
34
+ # * `token_separators` [String] the token separators
35
+ # @overload initialize(name)
36
+ # Initialize a new collection, loading info from Typesense
37
+ #
38
+ # @param name [String] the name of the collection
39
+ # @raise [Typesense::Error::ObjectNotFound] if the collection doesn't exist
40
+ def initialize(collection_or_name)
41
+ @collection = if collection_or_name.is_a?(Hash)
42
+ collection_or_name.deep_stringify_keys
43
+ else
44
+ client.collections[collection_or_name].retrieve
45
+ end
46
+ end
47
+
48
+ # Reload the underlying collection data from Typesense
49
+ # @return [self]
50
+ def reload
51
+ @collection = client.collections[name].retrieve
52
+ self
53
+ end
54
+
55
+ # The time the collection was created, as tracked by Typesense
56
+ # @return [Time] the time the collection was created
57
+ def created_at
58
+ @created_at ||= Time.strptime(@collection['created_at'].to_s, '%s')
59
+ end
60
+
61
+ # The default sorting field for the collection
62
+ # @return [String] the default sorting field
63
+ def default_sorting_field
64
+ @collection['default_sorting_field']
65
+ end
66
+
67
+ # Whether the collection has nested fields enabled
68
+ # @return [Boolean] whether nested fields are enabled
69
+ def enable_nested_fields?
70
+ @collection['enable_nested_fields']
71
+ end
72
+
73
+ # The fields in the collection
74
+ # @return [Array<Field>] the field information
75
+ def fields
76
+ @collection['fields'].map do |field|
77
+ Field.new(field)
78
+ end
79
+ end
80
+
81
+ # The raw, underlying name of the collection
82
+ # @return [String] the name of the collection
83
+ def name
84
+ @collection['name']
85
+ end
86
+
87
+ # The number of documents in the collection
88
+ # @return [Integer] the number of documents in the collection
89
+ def num_documents
90
+ @collection['num_documents']
91
+ end
92
+
93
+ # Special characters in strings which should be indexed as text
94
+ # @return [Array<String>] the symbols to index
95
+ def symbols_to_index
96
+ @collection['symbols_to_index']
97
+ end
98
+
99
+ # Additional characters to be treated as separators when indexing text
100
+ # @return [Array<String>] the token separators
101
+ def token_separators
102
+ @collection['token_separators']
103
+ end
104
+
105
+ # The name of the index, parsed from the Typesensual collection naming scheme.
106
+ # @see COLLECTION_NAME_PATTERN
107
+ # @return [String] the name of the index
108
+ def index_name
109
+ parsed_name['name']
110
+ end
111
+
112
+ # The environment the collection is in, parsed from the Typesensual collection
113
+ # naming scheme.
114
+ # @see COLLECTION_NAME_PATTERN
115
+ # @return [String] the environment the collection is in
116
+ def env
117
+ parsed_name['env']
118
+ end
119
+
120
+ # The version of the collection, parsed from the Typesensual collection naming
121
+ # scheme.
122
+ # @see COLLECTION_NAME_PATTERN
123
+ # @return [String] the version of the collection
124
+ def version
125
+ parsed_name['version']
126
+ end
127
+
128
+ # Creates the collection in Typesense
129
+ # @return [self]
130
+ def create!
131
+ client.collections.create(@collection)
132
+ self
133
+ end
134
+
135
+ # Deletes the collection in Typesense
136
+ # @return [void]
137
+ def delete!
138
+ typesense_collection.delete
139
+ end
140
+
141
+ # Create a new collection using the given collection hash
142
+ #
143
+ # @param collection [Hash] the Typesense collection hash
144
+ # * `default_sorting_field` [String] the default sorting field
145
+ # * `enable_nested_fields` [Boolean] whether nested fields are enabled
146
+ # * `fields` [Array<Hash>] the fields in the collection
147
+ # * `name` [String] the name of the collection
148
+ # * `symbols_to_index` [String] the symbols to index
149
+ # * `token_separators` [String] the token separators
150
+ # @return [Collection] the created collection
151
+ def self.create!(collection)
152
+ new(collection).tap(&:create!)
153
+ end
154
+
155
+ # Insert a single document into typesense
156
+ #
157
+ # @param doc [Hash] the document to insert
158
+ # @return [Boolean] if the document was inserted successfully
159
+ def insert_one!(doc)
160
+ typesense_collection.documents.create(doc)
161
+ end
162
+
163
+ # Insert many documents into typesense. Notably, the input can be an enumerable
164
+ # or enumerator, which will be batched into groups of `batch_size` and inserted
165
+ # with the ID of any failed rows being provided in the response.
166
+ #
167
+ # @param docs [Enumerable<Hash>] the documents to insert
168
+ # @return [Array<Hash>] any failed insertions
169
+ def insert_many!(docs, batch_size: 100)
170
+ docs.lazy.each_slice(batch_size).with_object([]) do |slice, failures|
171
+ results = typesense_collection.documents.import(slice, return_id: true)
172
+ failures.push(*results.reject { |result| result['success'] })
173
+ end
174
+ end
175
+
176
+ # Remove a single document from typesense
177
+ #
178
+ # @param id [String] the ID of the document to remove
179
+ # @return [void]
180
+ def remove_one!(id)
181
+ typesense_collection.documents[id.to_s].delete
182
+ end
183
+
184
+ def search(query:, query_by:)
185
+ Search.new(
186
+ collection: typesense_collection,
187
+ query: query,
188
+ query_by: query_by
189
+ )
190
+ end
191
+
192
+ def typesense_collection
193
+ @typesense_collection ||= client.collections[name]
194
+ end
195
+
196
+ private
197
+
198
+ def parsed_name
199
+ @parsed_name ||= name.match(COLLECTION_NAME_PATTERN)
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Config
5
+ attr_accessor :nodes, :api_key
6
+ attr_writer :env, :client
7
+
8
+ def initialize(&block)
9
+ yield self if block
10
+ end
11
+
12
+ def env
13
+ @env ||= (defined?(Rails) ? Rails.env : nil)
14
+ end
15
+
16
+ def client
17
+ @client ||= Typesense::Client.new(connection_options)
18
+ end
19
+
20
+ private
21
+
22
+ def connection_options
23
+ { nodes: nodes, api_key: api_key }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Field
5
+ def initialize(hash)
6
+ @field = hash.stringify_keys
7
+ end
8
+
9
+ def facet?
10
+ @field['facet']
11
+ end
12
+
13
+ def index?
14
+ @field['index']
15
+ end
16
+
17
+ def infix?
18
+ @field['infix']
19
+ end
20
+
21
+ def locale
22
+ @field['locale'].presence
23
+ end
24
+
25
+ def name
26
+ if @field['name'].is_a?(Regexp)
27
+ @field['name'].source
28
+ else
29
+ @field['name'].to_s
30
+ end
31
+ end
32
+
33
+ def optional?
34
+ @field['optional']
35
+ end
36
+
37
+ def sort?
38
+ @field['sort']
39
+ end
40
+
41
+ def type
42
+ @field['type']
43
+ end
44
+
45
+ def to_h
46
+ @field.to_h.merge!(
47
+ 'name' => name,
48
+ 'locale' => locale
49
+ ).compact!
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/schema'
4
+ require 'typesensual/state_helpers'
5
+
6
+ class Typesensual
7
+ # Represents your index as a set of collections in Typesense. Manages
8
+ # Typesense aliases, but with some extra functionality for managing multiple
9
+ # environments and versions, and a nice DSL for defining your schema.
10
+ #
11
+ # @example Defining an index
12
+ # class PostsIndex < Typesensual::Index
13
+ # # Optional, default is inferred from the class name
14
+ # index_name 'user_posts'
15
+ #
16
+ # schema do
17
+ # field 'text', type: 'string[]'
18
+ # field 'user', type: 'string', facet: true
19
+ # end
20
+ # end
21
+ class Index
22
+ include StateHelpers
23
+
24
+ # Get or set the name for this index
25
+ #
26
+ # @overload index_name(value)
27
+ # Explicitly set the index name
28
+ #
29
+ # @param value [String] the name to identify this index
30
+ # @return [void]
31
+ #
32
+ # @overload index_name
33
+ # Get the index name (either explicitly set or inferred from the class name)
34
+ #
35
+ # @return [String] the name of this index
36
+ def self.index_name(value = nil)
37
+ if value
38
+ @index_name = value
39
+ else
40
+ @index_name ||= name.underscore.sub(/_index$/, '')
41
+ end
42
+ end
43
+
44
+ # The alias name for this index in the current environment
45
+ #
46
+ # @return [String] the alias name
47
+ def self.alias_name
48
+ [index_name, env].compact.join(':')
49
+ end
50
+
51
+ # Generate a new collection name for the given version
52
+ #
53
+ # @param version [String] the version to generate the collection name for
54
+ # @return [String] the generated collection name
55
+ def self.collection_name_for(version: Time.now.strftime('%s'))
56
+ "#{alias_name}@#{version}"
57
+ end
58
+
59
+ # Create a new collection for this index
60
+ #
61
+ # @param version [String] the version to create the collection for
62
+ # @return [Collection] the newly created collection
63
+ def self.create!(version: Time.now.strftime('%s'))
64
+ generated_name = collection_name_for(version: version)
65
+
66
+ Collection.create!(@schema.to_h.merge('name' => generated_name))
67
+ end
68
+
69
+ # Get the collections for this index
70
+ #
71
+ # @return [Array<Collection>] the collections that match the alias
72
+ def self.collections
73
+ Typesensual.collections.filter do |collection|
74
+ collection.index_name == index_name
75
+ end
76
+ end
77
+
78
+ def self.collection_for(version:)
79
+ Typesensual.collections.find do |collection|
80
+ collection.version == version
81
+ end
82
+ end
83
+
84
+ # Get the collection that the alias points to
85
+ #
86
+ # @return [Collection] the collection that the alias points to
87
+ def self.collection
88
+ @collection ||= Collection.new(alias_name)
89
+ rescue Typesense::Error::ObjectNotFound
90
+ nil
91
+ end
92
+
93
+ # Define the schema for the collection
94
+ #
95
+ # See {Schema} for more information
96
+ def self.schema(&block)
97
+ @schema = Typesensual::Schema.new(&block)
98
+ end
99
+
100
+ # Updates the alias to point to the given collection name
101
+ #
102
+ # @param name [String, Collection] the collection to point the alias to
103
+ def self.update_alias!(name_or_collection)
104
+ name = if name_or_collection.is_a?(Collection)
105
+ name_or_collection.name
106
+ else
107
+ name_or_collection
108
+ end
109
+
110
+ client.aliases.upsert(alias_name, collection_name: name)
111
+ end
112
+
113
+ # Indexes the given records into a collection, then updates the alias to
114
+ # point to it.
115
+ #
116
+ # @param records [Enumerable] the records to index
117
+ # @param collection [Collection] the collection to index into, defaults to a
118
+ # new collection
119
+ def self.reindex!(ids, collection: create!)
120
+ index_many(ids, collection: collection)
121
+
122
+ update_alias!(collection)
123
+ end
124
+
125
+ # The method to implement to index *one* record.
126
+ #
127
+ # @return [Hash] the document to upsert in Typesense
128
+ def index_one(_id); end
129
+
130
+ def self.index_one(id, collection: self.collection)
131
+ collection.insert_one!(new.index_one(id))
132
+ end
133
+
134
+ # The method to implement to index *many* records
135
+ # Unlike {#index_one}, this method should yield successive records to index
136
+ #
137
+ # @yield [Hash] a document to upsert in Typesense
138
+ def index_many(ids)
139
+ ids.each do |id|
140
+ yield index_one(id)
141
+ end
142
+ end
143
+
144
+ def self.index_many(ids, collection: self.collection, batch_size: 100)
145
+ collection.insert_many!(
146
+ new.enum_for(:index_many, ids),
147
+ batch_size: batch_size
148
+ )
149
+ end
150
+
151
+ def self.remove_one(id, collection: self.collection)
152
+ collection.remove_one!(id)
153
+ end
154
+
155
+ if defined?(ActiveRecord)
156
+ def self.ar_callbacks
157
+ Typesensual::Callbacks.new(self)
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Railtie < Rails::Railtie
5
+ rake_tasks do
6
+ load 'tasks/typesensual.rake'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+
5
+ class Typesensual
6
+ class RakeHelper
7
+ HEADER_FORMAT = Paint["==> %s\n", :bold]
8
+ LIST_ROW_FORMAT = "%<prefix>4s %<version>-20s %<created_at>-20s %<documents>-20s\n"
9
+ LIST_HEADER = Paint[format(
10
+ LIST_ROW_FORMAT,
11
+ prefix: '', version: 'Version', created_at: 'Created At', documents: 'Documents'
12
+ ), :bold, :underline]
13
+
14
+ class << self
15
+ # List the collections in all Index descendants
16
+ #
17
+ # @param output [IO] The output stream to write to
18
+ # @example
19
+ # rake typesensual:list
20
+ def list(output: $stdout)
21
+ # Build up a hash of indices and their (sorted) collections
22
+ indices = Index.descendants.to_h do |index|
23
+ [index, index.collections.sort_by(&:created_at).reverse]
24
+ end
25
+
26
+ indices.each do |index, collections|
27
+ alias_name = index.collection.name
28
+
29
+ output.printf(HEADER_FORMAT, index.name.titleize)
30
+ output.printf(LIST_HEADER)
31
+
32
+ collections.each do |collection|
33
+ output.printf(LIST_ROW_FORMAT,
34
+ prefix: collection.name == alias_name ? '->' : '',
35
+ version: collection.version,
36
+ created_at: collection.created_at.strftime('%Y-%m-%d %H:%M:%S'),
37
+ documents: collection.num_documents)
38
+ end
39
+
40
+ output.printf("\n")
41
+ end
42
+ end
43
+
44
+ # Index all records from a model into an index
45
+ #
46
+ # @param index [String] The name of the index to index into
47
+ # @param model [String] The name of the model to index from
48
+ # @example
49
+ # rake typesensual:index[FooIndex,Foo]
50
+ def index(index:, model:, output: $stdout)
51
+ index = index.safe_constantize
52
+ model = model.safe_constantize
53
+
54
+ collection = index.create!
55
+ output.printf(
56
+ Paint["==> Indexing %<model>s into %<index>s (Version %<version>s)\n", :bold],
57
+ model: model.name,
58
+ index: index.name,
59
+ version: collection.version
60
+ )
61
+ failures = index.index_many(
62
+ model.ids,
63
+ collection: collection
64
+ )
65
+
66
+ failures.each do |failure|
67
+ output.puts(failure.to_json)
68
+ end
69
+ end
70
+
71
+ # Update the alias for an index to point to a specific version
72
+ #
73
+ # @param index [String] The name of the index to update
74
+ # @param version [String] The version to update the alias to
75
+ # @example
76
+ # rake typesensual:update_alias[FooIndex,1]
77
+ def update_alias(index:, version:, output: $stdout)
78
+ index = index.safe_constantize
79
+ old_coll = index.collection
80
+ new_coll = index.collection_for(version: version)
81
+
82
+ unless new_coll
83
+ output.puts(Paint["--> No such version #{version} for #{index.name}", :bold])
84
+ return
85
+ end
86
+
87
+ output.puts(Paint["==> Alias for #{index.name}", :bold])
88
+ output.printf(
89
+ "Old: %<version>s (%<created_at>s)\n",
90
+ version: old_coll&.version || 'None',
91
+ created_at: old_coll&.created_at&.strftime('%Y-%m-%d %H:%M:%S') || 'N/A'
92
+ )
93
+ index.update_alias!(new_coll)
94
+
95
+ output.printf(
96
+ "New: %<version>s (%<created_at>s)\n",
97
+ version: new_coll.version,
98
+ created_at: new_coll.created_at.strftime('%Y-%m-%d %H:%M:%S')
99
+ )
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/field'
4
+
5
+ class Typesensual
6
+ class Schema
7
+ def initialize(&block)
8
+ instance_eval(&block)
9
+ end
10
+
11
+ def field(name, type: 'auto', locale: nil, facet: nil, index: nil, optional: nil)
12
+ @fields ||= []
13
+ @fields << Field.new(
14
+ name: name,
15
+ type: type,
16
+ locale: locale,
17
+ facet: facet,
18
+ index: index,
19
+ optional: optional
20
+ )
21
+ end
22
+
23
+ def token_separators(*separators)
24
+ @token_separators = separators
25
+ end
26
+
27
+ def symbols_to_index(*symbols)
28
+ @symbols_to_index = symbols
29
+ end
30
+
31
+ def default_sorting_field(field_name)
32
+ @default_sorting_field = field_name.to_s
33
+ end
34
+
35
+ def enable_nested_fields(value = true) # rubocop:disable Style/OptionalBooleanParameter
36
+ @enable_nested_fields = value
37
+ end
38
+
39
+ def to_h
40
+ {
41
+ 'fields' => @fields&.map(&:to_h),
42
+ 'token_separators' => @token_separators,
43
+ 'symbols_to_index' => @symbols_to_index,
44
+ 'default_sorting_field' => @default_sorting_field,
45
+ 'enable_nested_fields' => @enable_nested_fields
46
+ }.compact!
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Search
5
+ class Hit
6
+ # {
7
+ # "highlights": [
8
+ # {
9
+ # "field": "company_name",
10
+ # "matched_tokens": ["Stark"],
11
+ # "snippet": "<mark>Stark</mark> Industries"
12
+ # }
13
+ # ],
14
+ # "document": {
15
+ # "id": "124",
16
+ # "company_name": "Stark Industries",
17
+ # "num_employees": 5215,
18
+ # "country": "USA"
19
+ # },
20
+ # "text_match": 130916
21
+ # }
22
+ # @param collection [Hash] the Typesense hit hash
23
+ # * `highlights` [Array<Hash>] the highlights for the hit
24
+ # * `document` [Hash] the matching document
25
+ # * `text_match` [Integer] the text matching score
26
+ def initialize(hit)
27
+ @hit = hit
28
+ end
29
+
30
+ def highlights
31
+ @hit['highlights']
32
+ end
33
+
34
+ def document
35
+ @hit['document']
36
+ end
37
+
38
+ def score
39
+ @hit['text_match']
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Search
5
+ class Results
6
+ def initialize(results)
7
+ @results = results
8
+ end
9
+
10
+ def hits
11
+ @results['hits'].map { |hit| Hit.new(hit) }
12
+ end
13
+
14
+ def count
15
+ @results['found']
16
+ end
17
+
18
+ def out_of
19
+ @results['out_of']
20
+ end
21
+
22
+ def current_page
23
+ @results['page']
24
+ end
25
+
26
+ def first_page?
27
+ current_page == 1
28
+ end
29
+
30
+ def last_page?
31
+ current_page == total_pages
32
+ end
33
+
34
+ def prev_page
35
+ current_page - 1 unless first_page?
36
+ end
37
+
38
+ def next_page
39
+ current_page + 1 unless last_page?
40
+ end
41
+
42
+ def search_time_ms
43
+ @results['search_time_ms']
44
+ end
45
+
46
+ def total_pages
47
+ (@results['found'] / @results['per_page'].to_f).ceil
48
+ end
49
+ end
50
+ end
51
+ end