typesensual 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/schema'
4
+ require 'typesensual/state_helpers'
5
+ require 'typesensual/callbacks' if defined?(ActiveRecord)
6
+
7
+ class Typesensual
8
+ class Collection
9
+ include StateHelpers
10
+
11
+ # The pattern we use for collection names, `name:env@version`, where `name`
12
+ # is the name of the index, `env` is the environment, and `version` is the
13
+ # timestamp of the collection's creation. If a name doesn't follow this
14
+ # pattern, `name` collects everything.
15
+ COLLECTION_NAME_PATTERN = /^
16
+ (?<name>.*?) # the name of the collection cannot include : or @
17
+ (?::(?<env>.*?))? # the env is optional, but also cannot include : or @
18
+ (?:@(?<version>\d+))? # the version is also optional but must be an integer
19
+ $/x.freeze
20
+
21
+ include StateHelpers
22
+
23
+ # @overload initialize(collection)
24
+ # Initialize a new collection from a Typesense collection hash
25
+ #
26
+ # @param collection [Hash] the Typesense collection hash
27
+ # * `created_at` [Integer] the timestamp of the collection's creation
28
+ # * `default_sorting_field` [String] the default sorting field
29
+ # * `enable_nested_fields` [Boolean] whether nested fields are enabled
30
+ # * `fields` [Array<Hash>] the fields in the collection
31
+ # * `name` [String] the name of the collection
32
+ # * `num_documents` [Integer] the number of documents in the collection
33
+ # * `symbols_to_index` [String] the symbols to index
34
+ # * `token_separators` [String] the token separators
35
+ # @overload initialize(name)
36
+ # Initialize a new collection, loading info from Typesense
37
+ #
38
+ # @param name [String] the name of the collection
39
+ # @raise [Typesense::Error::ObjectNotFound] if the collection doesn't exist
40
+ def initialize(collection_or_name)
41
+ @collection = if collection_or_name.is_a?(Hash)
42
+ collection_or_name.deep_stringify_keys
43
+ else
44
+ client.collections[collection_or_name].retrieve
45
+ end
46
+ end
47
+
48
+ # Reload the underlying collection data from Typesense
49
+ # @return [self]
50
+ def reload
51
+ @collection = client.collections[name].retrieve
52
+ self
53
+ end
54
+
55
+ # The time the collection was created, as tracked by Typesense
56
+ # @return [Time] the time the collection was created
57
+ def created_at
58
+ @created_at ||= Time.strptime(@collection['created_at'].to_s, '%s')
59
+ end
60
+
61
+ # The default sorting field for the collection
62
+ # @return [String] the default sorting field
63
+ def default_sorting_field
64
+ @collection['default_sorting_field']
65
+ end
66
+
67
+ # Whether the collection has nested fields enabled
68
+ # @return [Boolean] whether nested fields are enabled
69
+ def enable_nested_fields?
70
+ @collection['enable_nested_fields']
71
+ end
72
+
73
+ # The fields in the collection
74
+ # @return [Array<Field>] the field information
75
+ def fields
76
+ @collection['fields'].map do |field|
77
+ Field.new(field)
78
+ end
79
+ end
80
+
81
+ # The raw, underlying name of the collection
82
+ # @return [String] the name of the collection
83
+ def name
84
+ @collection['name']
85
+ end
86
+
87
+ # The number of documents in the collection
88
+ # @return [Integer] the number of documents in the collection
89
+ def num_documents
90
+ @collection['num_documents']
91
+ end
92
+
93
+ # Special characters in strings which should be indexed as text
94
+ # @return [Array<String>] the symbols to index
95
+ def symbols_to_index
96
+ @collection['symbols_to_index']
97
+ end
98
+
99
+ # Additional characters to be treated as separators when indexing text
100
+ # @return [Array<String>] the token separators
101
+ def token_separators
102
+ @collection['token_separators']
103
+ end
104
+
105
+ # The name of the index, parsed from the Typesensual collection naming scheme.
106
+ # @see COLLECTION_NAME_PATTERN
107
+ # @return [String] the name of the index
108
+ def index_name
109
+ parsed_name['name']
110
+ end
111
+
112
+ # The environment the collection is in, parsed from the Typesensual collection
113
+ # naming scheme.
114
+ # @see COLLECTION_NAME_PATTERN
115
+ # @return [String] the environment the collection is in
116
+ def env
117
+ parsed_name['env']
118
+ end
119
+
120
+ # The version of the collection, parsed from the Typesensual collection naming
121
+ # scheme.
122
+ # @see COLLECTION_NAME_PATTERN
123
+ # @return [String] the version of the collection
124
+ def version
125
+ parsed_name['version']
126
+ end
127
+
128
+ # Creates the collection in Typesense
129
+ # @return [self]
130
+ def create!
131
+ client.collections.create(@collection)
132
+ self
133
+ end
134
+
135
+ # Deletes the collection in Typesense
136
+ # @return [void]
137
+ def delete!
138
+ typesense_collection.delete
139
+ end
140
+
141
+ # Create a new collection using the given collection hash
142
+ #
143
+ # @param collection [Hash] the Typesense collection hash
144
+ # * `default_sorting_field` [String] the default sorting field
145
+ # * `enable_nested_fields` [Boolean] whether nested fields are enabled
146
+ # * `fields` [Array<Hash>] the fields in the collection
147
+ # * `name` [String] the name of the collection
148
+ # * `symbols_to_index` [String] the symbols to index
149
+ # * `token_separators` [String] the token separators
150
+ # @return [Collection] the created collection
151
+ def self.create!(collection)
152
+ new(collection).tap(&:create!)
153
+ end
154
+
155
+ # Insert a single document into typesense
156
+ #
157
+ # @param doc [Hash] the document to insert
158
+ # @return [Boolean] if the document was inserted successfully
159
+ def insert_one!(doc)
160
+ typesense_collection.documents.create(doc)
161
+ end
162
+
163
+ # Insert many documents into typesense. Notably, the input can be an enumerable
164
+ # or enumerator, which will be batched into groups of `batch_size` and inserted
165
+ # with the ID of any failed rows being provided in the response.
166
+ #
167
+ # @param docs [Enumerable<Hash>] the documents to insert
168
+ # @return [Array<Hash>] any failed insertions
169
+ def insert_many!(docs, batch_size: 100)
170
+ docs.lazy.each_slice(batch_size).with_object([]) do |slice, failures|
171
+ results = typesense_collection.documents.import(slice, return_id: true)
172
+ failures.push(*results.reject { |result| result['success'] })
173
+ end
174
+ end
175
+
176
+ # Remove a single document from typesense
177
+ #
178
+ # @param id [String] the ID of the document to remove
179
+ # @return [void]
180
+ def remove_one!(id)
181
+ typesense_collection.documents[id.to_s].delete
182
+ end
183
+
184
+ def search(query:, query_by:)
185
+ Search.new(
186
+ collection: typesense_collection,
187
+ query: query,
188
+ query_by: query_by
189
+ )
190
+ end
191
+
192
+ def typesense_collection
193
+ @typesense_collection ||= client.collections[name]
194
+ end
195
+
196
+ private
197
+
198
+ def parsed_name
199
+ @parsed_name ||= name.match(COLLECTION_NAME_PATTERN)
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Config
5
+ attr_accessor :nodes, :api_key
6
+ attr_writer :env, :client
7
+
8
+ def initialize(&block)
9
+ yield self if block
10
+ end
11
+
12
+ def env
13
+ @env ||= (defined?(Rails) ? Rails.env : nil)
14
+ end
15
+
16
+ def client
17
+ @client ||= Typesense::Client.new(connection_options)
18
+ end
19
+
20
+ private
21
+
22
+ def connection_options
23
+ { nodes: nodes, api_key: api_key }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Field
5
+ def initialize(hash)
6
+ @field = hash.stringify_keys
7
+ end
8
+
9
+ def facet?
10
+ @field['facet']
11
+ end
12
+
13
+ def index?
14
+ @field['index']
15
+ end
16
+
17
+ def infix?
18
+ @field['infix']
19
+ end
20
+
21
+ def locale
22
+ @field['locale'].presence
23
+ end
24
+
25
+ def name
26
+ if @field['name'].is_a?(Regexp)
27
+ @field['name'].source
28
+ else
29
+ @field['name'].to_s
30
+ end
31
+ end
32
+
33
+ def optional?
34
+ @field['optional']
35
+ end
36
+
37
+ def sort?
38
+ @field['sort']
39
+ end
40
+
41
+ def type
42
+ @field['type']
43
+ end
44
+
45
+ def to_h
46
+ @field.to_h.merge!(
47
+ 'name' => name,
48
+ 'locale' => locale
49
+ ).compact!
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/schema'
4
+ require 'typesensual/state_helpers'
5
+
6
+ class Typesensual
7
+ # Represents your index as a set of collections in Typesense. Manages
8
+ # Typesense aliases, but with some extra functionality for managing multiple
9
+ # environments and versions, and a nice DSL for defining your schema.
10
+ #
11
+ # @example Defining an index
12
+ # class PostsIndex < Typesensual::Index
13
+ # # Optional, default is inferred from the class name
14
+ # index_name 'user_posts'
15
+ #
16
+ # schema do
17
+ # field 'text', type: 'string[]'
18
+ # field 'user', type: 'string', facet: true
19
+ # end
20
+ # end
21
+ class Index
22
+ include StateHelpers
23
+
24
+ # Get or set the name for this index
25
+ #
26
+ # @overload index_name(value)
27
+ # Explicitly set the index name
28
+ #
29
+ # @param value [String] the name to identify this index
30
+ # @return [void]
31
+ #
32
+ # @overload index_name
33
+ # Get the index name (either explicitly set or inferred from the class name)
34
+ #
35
+ # @return [String] the name of this index
36
+ def self.index_name(value = nil)
37
+ if value
38
+ @index_name = value
39
+ else
40
+ @index_name ||= name.underscore.sub(/_index$/, '')
41
+ end
42
+ end
43
+
44
+ # The alias name for this index in the current environment
45
+ #
46
+ # @return [String] the alias name
47
+ def self.alias_name
48
+ [index_name, env].compact.join(':')
49
+ end
50
+
51
+ # Generate a new collection name for the given version
52
+ #
53
+ # @param version [String] the version to generate the collection name for
54
+ # @return [String] the generated collection name
55
+ def self.collection_name_for(version: Time.now.strftime('%s'))
56
+ "#{alias_name}@#{version}"
57
+ end
58
+
59
+ # Create a new collection for this index
60
+ #
61
+ # @param version [String] the version to create the collection for
62
+ # @return [Collection] the newly created collection
63
+ def self.create!(version: Time.now.strftime('%s'))
64
+ generated_name = collection_name_for(version: version)
65
+
66
+ Collection.create!(@schema.to_h.merge('name' => generated_name))
67
+ end
68
+
69
+ # Get the collections for this index
70
+ #
71
+ # @return [Array<Collection>] the collections that match the alias
72
+ def self.collections
73
+ Typesensual.collections.filter do |collection|
74
+ collection.index_name == index_name
75
+ end
76
+ end
77
+
78
+ def self.collection_for(version:)
79
+ Typesensual.collections.find do |collection|
80
+ collection.version == version
81
+ end
82
+ end
83
+
84
+ # Get the collection that the alias points to
85
+ #
86
+ # @return [Collection] the collection that the alias points to
87
+ def self.collection
88
+ @collection ||= Collection.new(alias_name)
89
+ rescue Typesense::Error::ObjectNotFound
90
+ nil
91
+ end
92
+
93
+ # Define the schema for the collection
94
+ #
95
+ # See {Schema} for more information
96
+ def self.schema(&block)
97
+ @schema = Typesensual::Schema.new(&block)
98
+ end
99
+
100
+ # Updates the alias to point to the given collection name
101
+ #
102
+ # @param name [String, Collection] the collection to point the alias to
103
+ def self.update_alias!(name_or_collection)
104
+ name = if name_or_collection.is_a?(Collection)
105
+ name_or_collection.name
106
+ else
107
+ name_or_collection
108
+ end
109
+
110
+ client.aliases.upsert(alias_name, collection_name: name)
111
+ end
112
+
113
+ # Indexes the given records into a collection, then updates the alias to
114
+ # point to it.
115
+ #
116
+ # @param records [Enumerable] the records to index
117
+ # @param collection [Collection] the collection to index into, defaults to a
118
+ # new collection
119
+ def self.reindex!(ids, collection: create!)
120
+ index_many(ids, collection: collection)
121
+
122
+ update_alias!(collection)
123
+ end
124
+
125
+ # The method to implement to index *one* record.
126
+ #
127
+ # @return [Hash] the document to upsert in Typesense
128
+ def index_one(_id); end
129
+
130
+ def self.index_one(id, collection: self.collection)
131
+ collection.insert_one!(new.index_one(id))
132
+ end
133
+
134
+ # The method to implement to index *many* records
135
+ # Unlike {#index_one}, this method should yield successive records to index
136
+ #
137
+ # @yield [Hash] a document to upsert in Typesense
138
+ def index_many(ids)
139
+ ids.each do |id|
140
+ yield index_one(id)
141
+ end
142
+ end
143
+
144
+ def self.index_many(ids, collection: self.collection, batch_size: 100)
145
+ collection.insert_many!(
146
+ new.enum_for(:index_many, ids),
147
+ batch_size: batch_size
148
+ )
149
+ end
150
+
151
+ def self.remove_one(id, collection: self.collection)
152
+ collection.remove_one!(id)
153
+ end
154
+
155
+ if defined?(ActiveRecord)
156
+ def self.ar_callbacks
157
+ Typesensual::Callbacks.new(self)
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Railtie < Rails::Railtie
5
+ rake_tasks do
6
+ load 'tasks/typesensual.rake'
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+
5
+ class Typesensual
6
+ class RakeHelper
7
+ HEADER_FORMAT = Paint["==> %s\n", :bold]
8
+ LIST_ROW_FORMAT = "%<prefix>4s %<version>-20s %<created_at>-20s %<documents>-20s\n"
9
+ LIST_HEADER = Paint[format(
10
+ LIST_ROW_FORMAT,
11
+ prefix: '', version: 'Version', created_at: 'Created At', documents: 'Documents'
12
+ ), :bold, :underline]
13
+
14
+ class << self
15
+ # List the collections in all Index descendants
16
+ #
17
+ # @param output [IO] The output stream to write to
18
+ # @example
19
+ # rake typesensual:list
20
+ def list(output: $stdout)
21
+ # Build up a hash of indices and their (sorted) collections
22
+ indices = Index.descendants.to_h do |index|
23
+ [index, index.collections.sort_by(&:created_at).reverse]
24
+ end
25
+
26
+ indices.each do |index, collections|
27
+ alias_name = index.collection.name
28
+
29
+ output.printf(HEADER_FORMAT, index.name.titleize)
30
+ output.printf(LIST_HEADER)
31
+
32
+ collections.each do |collection|
33
+ output.printf(LIST_ROW_FORMAT,
34
+ prefix: collection.name == alias_name ? '->' : '',
35
+ version: collection.version,
36
+ created_at: collection.created_at.strftime('%Y-%m-%d %H:%M:%S'),
37
+ documents: collection.num_documents)
38
+ end
39
+
40
+ output.printf("\n")
41
+ end
42
+ end
43
+
44
+ # Index all records from a model into an index
45
+ #
46
+ # @param index [String] The name of the index to index into
47
+ # @param model [String] The name of the model to index from
48
+ # @example
49
+ # rake typesensual:index[FooIndex,Foo]
50
+ def index(index:, model:, output: $stdout)
51
+ index = index.safe_constantize
52
+ model = model.safe_constantize
53
+
54
+ collection = index.create!
55
+ output.printf(
56
+ Paint["==> Indexing %<model>s into %<index>s (Version %<version>s)\n", :bold],
57
+ model: model.name,
58
+ index: index.name,
59
+ version: collection.version
60
+ )
61
+ failures = index.index_many(
62
+ model.ids,
63
+ collection: collection
64
+ )
65
+
66
+ failures.each do |failure|
67
+ output.puts(failure.to_json)
68
+ end
69
+ end
70
+
71
+ # Update the alias for an index to point to a specific version
72
+ #
73
+ # @param index [String] The name of the index to update
74
+ # @param version [String] The version to update the alias to
75
+ # @example
76
+ # rake typesensual:update_alias[FooIndex,1]
77
+ def update_alias(index:, version:, output: $stdout)
78
+ index = index.safe_constantize
79
+ old_coll = index.collection
80
+ new_coll = index.collection_for(version: version)
81
+
82
+ unless new_coll
83
+ output.puts(Paint["--> No such version #{version} for #{index.name}", :bold])
84
+ return
85
+ end
86
+
87
+ output.puts(Paint["==> Alias for #{index.name}", :bold])
88
+ output.printf(
89
+ "Old: %<version>s (%<created_at>s)\n",
90
+ version: old_coll&.version || 'None',
91
+ created_at: old_coll&.created_at&.strftime('%Y-%m-%d %H:%M:%S') || 'N/A'
92
+ )
93
+ index.update_alias!(new_coll)
94
+
95
+ output.printf(
96
+ "New: %<version>s (%<created_at>s)\n",
97
+ version: new_coll.version,
98
+ created_at: new_coll.created_at.strftime('%Y-%m-%d %H:%M:%S')
99
+ )
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typesensual/field'
4
+
5
+ class Typesensual
6
+ class Schema
7
+ def initialize(&block)
8
+ instance_eval(&block)
9
+ end
10
+
11
+ def field(name, type: 'auto', locale: nil, facet: nil, index: nil, optional: nil)
12
+ @fields ||= []
13
+ @fields << Field.new(
14
+ name: name,
15
+ type: type,
16
+ locale: locale,
17
+ facet: facet,
18
+ index: index,
19
+ optional: optional
20
+ )
21
+ end
22
+
23
+ def token_separators(*separators)
24
+ @token_separators = separators
25
+ end
26
+
27
+ def symbols_to_index(*symbols)
28
+ @symbols_to_index = symbols
29
+ end
30
+
31
+ def default_sorting_field(field_name)
32
+ @default_sorting_field = field_name.to_s
33
+ end
34
+
35
+ def enable_nested_fields(value = true) # rubocop:disable Style/OptionalBooleanParameter
36
+ @enable_nested_fields = value
37
+ end
38
+
39
+ def to_h
40
+ {
41
+ 'fields' => @fields&.map(&:to_h),
42
+ 'token_separators' => @token_separators,
43
+ 'symbols_to_index' => @symbols_to_index,
44
+ 'default_sorting_field' => @default_sorting_field,
45
+ 'enable_nested_fields' => @enable_nested_fields
46
+ }.compact!
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Search
5
+ class Hit
6
+ # {
7
+ # "highlights": [
8
+ # {
9
+ # "field": "company_name",
10
+ # "matched_tokens": ["Stark"],
11
+ # "snippet": "<mark>Stark</mark> Industries"
12
+ # }
13
+ # ],
14
+ # "document": {
15
+ # "id": "124",
16
+ # "company_name": "Stark Industries",
17
+ # "num_employees": 5215,
18
+ # "country": "USA"
19
+ # },
20
+ # "text_match": 130916
21
+ # }
22
+ # @param collection [Hash] the Typesense hit hash
23
+ # * `highlights` [Array<Hash>] the highlights for the hit
24
+ # * `document` [Hash] the matching document
25
+ # * `text_match` [Integer] the text matching score
26
+ def initialize(hit)
27
+ @hit = hit
28
+ end
29
+
30
+ def highlights
31
+ @hit['highlights']
32
+ end
33
+
34
+ def document
35
+ @hit['document']
36
+ end
37
+
38
+ def score
39
+ @hit['text_match']
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Typesensual
4
+ class Search
5
+ class Results
6
+ def initialize(results)
7
+ @results = results
8
+ end
9
+
10
+ def hits
11
+ @results['hits'].map { |hit| Hit.new(hit) }
12
+ end
13
+
14
+ def count
15
+ @results['found']
16
+ end
17
+
18
+ def out_of
19
+ @results['out_of']
20
+ end
21
+
22
+ def current_page
23
+ @results['page']
24
+ end
25
+
26
+ def first_page?
27
+ current_page == 1
28
+ end
29
+
30
+ def last_page?
31
+ current_page == total_pages
32
+ end
33
+
34
+ def prev_page
35
+ current_page - 1 unless first_page?
36
+ end
37
+
38
+ def next_page
39
+ current_page + 1 unless last_page?
40
+ end
41
+
42
+ def search_time_ms
43
+ @results['search_time_ms']
44
+ end
45
+
46
+ def total_pages
47
+ (@results['found'] / @results['per_page'].to_f).ceil
48
+ end
49
+ end
50
+ end
51
+ end