noiseless 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +28 -0
  3. data/README.md +214 -0
  4. data/lib/application_search.rb +15 -0
  5. data/lib/noiseless/adapter.rb +313 -0
  6. data/lib/noiseless/adapters/elasticsearch.rb +70 -0
  7. data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
  8. data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
  9. data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
  10. data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
  11. data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
  12. data/lib/noiseless/adapters/open_search.rb +208 -0
  13. data/lib/noiseless/adapters/postgresql.rb +171 -0
  14. data/lib/noiseless/adapters/typesense.rb +70 -0
  15. data/lib/noiseless/adapters.rb +14 -0
  16. data/lib/noiseless/ast/aggregation.rb +56 -0
  17. data/lib/noiseless/ast/bool.rb +16 -0
  18. data/lib/noiseless/ast/bulk.rb +18 -0
  19. data/lib/noiseless/ast/collapse.rb +16 -0
  20. data/lib/noiseless/ast/combined_fields.rb +33 -0
  21. data/lib/noiseless/ast/conversation.rb +29 -0
  22. data/lib/noiseless/ast/filter.rb +15 -0
  23. data/lib/noiseless/ast/hybrid.rb +35 -0
  24. data/lib/noiseless/ast/image_query.rb +29 -0
  25. data/lib/noiseless/ast/join.rb +31 -0
  26. data/lib/noiseless/ast/match.rb +15 -0
  27. data/lib/noiseless/ast/multi_match.rb +24 -0
  28. data/lib/noiseless/ast/paginate.rb +15 -0
  29. data/lib/noiseless/ast/prefix.rb +15 -0
  30. data/lib/noiseless/ast/range.rb +18 -0
  31. data/lib/noiseless/ast/root.rb +69 -0
  32. data/lib/noiseless/ast/search_after.rb +14 -0
  33. data/lib/noiseless/ast/sort.rb +15 -0
  34. data/lib/noiseless/ast/vector.rb +27 -0
  35. data/lib/noiseless/ast/wildcard.rb +15 -0
  36. data/lib/noiseless/ast.rb +30 -0
  37. data/lib/noiseless/bulk_importer.rb +195 -0
  38. data/lib/noiseless/callbacks.rb +138 -0
  39. data/lib/noiseless/connection_manager.rb +26 -0
  40. data/lib/noiseless/document_manager.rb +137 -0
  41. data/lib/noiseless/dsl.rb +107 -0
  42. data/lib/noiseless/generators/application_search_generator.rb +24 -0
  43. data/lib/noiseless/instrumentation.rb +174 -0
  44. data/lib/noiseless/introspection/console.rb +228 -0
  45. data/lib/noiseless/introspection/query_visualizer.rb +533 -0
  46. data/lib/noiseless/introspection.rb +221 -0
  47. data/lib/noiseless/mapping.rb +253 -0
  48. data/lib/noiseless/mapping_definition_processor.rb +231 -0
  49. data/lib/noiseless/model.rb +111 -0
  50. data/lib/noiseless/model_registry.rb +77 -0
  51. data/lib/noiseless/multi_search.rb +244 -0
  52. data/lib/noiseless/pagination.rb +375 -0
  53. data/lib/noiseless/query_builder.rb +284 -0
  54. data/lib/noiseless/railtie.rb +35 -0
  55. data/lib/noiseless/response/aggregations.rb +46 -0
  56. data/lib/noiseless/response/empty.rb +20 -0
  57. data/lib/noiseless/response/records.rb +94 -0
  58. data/lib/noiseless/response/results.rb +110 -0
  59. data/lib/noiseless/response/suggestions.rb +55 -0
  60. data/lib/noiseless/response.rb +98 -0
  61. data/lib/noiseless/response_factory.rb +32 -0
  62. data/lib/noiseless/runtime_reset_middleware.rb +15 -0
  63. data/lib/noiseless/search_index_update_job.rb +84 -0
  64. data/lib/noiseless/test_case.rb +230 -0
  65. data/lib/noiseless/test_helper.rb +295 -0
  66. data/lib/noiseless/version.rb +2 -2
  67. data/lib/noiseless.rb +130 -2
  68. data/lib/tasks/benchmark.rake +35 -0
  69. data/lib/tasks/release.rake +22 -0
  70. data/lib/tasks/test.rake +11 -0
  71. metadata +260 -14
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ class Root < Node
6
+ attr_reader :indexes, :bool, :sort, :paginate, :vector, :collapse, :search_after,
7
+ :aggregations, :hybrid, :pipeline, :image_query, :conversation, :joins,
8
+ :remove_duplicates, :facet_sample_slope, :pinned_hits
9
+
10
+ def initialize(indexes:, bool:, sort:, paginate:, vector: nil, collapse: nil, search_after: nil,
11
+ aggregations: [], hybrid: nil, pipeline: nil, image_query: nil, conversation: nil, joins: [],
12
+ remove_duplicates: nil, facet_sample_slope: nil, pinned_hits: nil)
13
+ super()
14
+ @indexes = Array(indexes)
15
+ @bool = bool
16
+ @sort = sort
17
+ @paginate = paginate
18
+ @vector = vector
19
+ @collapse = collapse
20
+ @search_after = search_after
21
+ @aggregations = aggregations
22
+ @hybrid = hybrid
23
+ @pipeline = pipeline
24
+ @image_query = image_query
25
+ @conversation = conversation
26
+ @joins = joins
27
+ @remove_duplicates = remove_duplicates
28
+ @facet_sample_slope = facet_sample_slope
29
+ @pinned_hits = pinned_hits
30
+ end
31
+
32
+ def vector_search?
33
+ !@vector.nil?
34
+ end
35
+
36
+ def hybrid_search?
37
+ !@hybrid.nil?
38
+ end
39
+
40
+ def has_pipeline?
41
+ !@pipeline.nil?
42
+ end
43
+
44
+ def collapsed?
45
+ !@collapse.nil?
46
+ end
47
+
48
+ def cursor_pagination?
49
+ !@search_after.nil?
50
+ end
51
+
52
+ def aggregated?
53
+ @aggregations.any?
54
+ end
55
+
56
+ def image_search?
57
+ !@image_query.nil?
58
+ end
59
+
60
+ def conversational?
61
+ !@conversation.nil?
62
+ end
63
+
64
+ def has_joins?
65
+ @joins.any?
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ class SearchAfter < Node
6
+ attr_reader :values
7
+
8
+ def initialize(values)
9
+ super()
10
+ @values = Array(values)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ class Sort < Node
6
+ attr_reader :field, :direction
7
+
8
+ def initialize(field, direction)
9
+ super()
10
+ @field = field
11
+ @direction = direction
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ # Vector search node for semantic/embedding-based search
6
+ # Used with pgvector in PostgreSQL or knn in OpenSearch
7
+ class Vector < Node
8
+ attr_reader :field, :embedding, :k, :distance_metric
9
+
10
+ # @param field [Symbol, String] The embedding column/field
11
+ # @param embedding [Array<Float>] The query embedding vector
12
+ # @param k [Integer] Number of nearest neighbors (default: 10)
13
+ # @param distance_metric [Symbol] :cosine, :l2, or :inner_product (default: :cosine)
14
+ def initialize(field, embedding, k: 10, distance_metric: :cosine)
15
+ super()
16
+ @field = field
17
+ @embedding = embedding
18
+ @k = k
19
+ @distance_metric = distance_metric
20
+ end
21
+
22
+ def dimension
23
+ @embedding&.size || 0
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ class Wildcard < Node
6
+ attr_reader :field, :value
7
+
8
+ def initialize(field, value)
9
+ super()
10
+ @field = field
11
+ @value = value
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module AST
5
+ class Node
6
+ def to_h
7
+ hash = {}
8
+ instance_variables.each do |var|
9
+ key = var.to_s.delete("@").to_sym
10
+ value = instance_variable_get(var)
11
+
12
+ hash[key] = case value
13
+ when Node
14
+ value.to_h
15
+ when Array
16
+ value.map { |item| item.is_a?(Node) ? item.to_h : item }
17
+ else
18
+ value
19
+ end
20
+ end
21
+
22
+ # Include the class name for better introspection
23
+ hash[:_type] = self.class.name.split("::").last
24
+ hash
25
+ end
26
+
27
+ alias to_hash to_h
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ class BulkImporter
5
+ attr_reader :model_class, :errors
6
+
7
+ def initialize(model_class, connection: nil)
8
+ @model_class = model_class
9
+ @connection = connection || model_class.connection
10
+ @errors = []
11
+ end
12
+
13
+ def import(relation_or_records = nil,
14
+ batch_size: 1000,
15
+ transform: nil,
16
+ preprocess: nil,
17
+ force: false,
18
+ refresh: true,
19
+ **)
20
+ @errors.clear
21
+
22
+ # Create index if force is true
23
+ if force
24
+ delete_index
25
+ create_index
26
+ end
27
+
28
+ # Get records to import
29
+ records = resolve_records(relation_or_records)
30
+
31
+ total_imported = 0
32
+
33
+ records.each_slice(batch_size) do |batch|
34
+ # Apply preprocessing to the entire batch
35
+ processed_batch = preprocess ? preprocess.call(batch) : batch
36
+
37
+ # Transform individual records and build actions
38
+ actions = build_bulk_actions(processed_batch, transform)
39
+
40
+ # Execute bulk operation
41
+ begin
42
+ client = Noiseless.connections.client(@connection)
43
+ response = client.bulk(actions, refresh: refresh, **)
44
+
45
+ # Check for errors in response
46
+ collect_errors(response, processed_batch)
47
+
48
+ total_imported += actions.size
49
+ rescue StandardError => e
50
+ @errors << {
51
+ error: e.message,
52
+ batch: processed_batch.map { |r| identify_record(r) }
53
+ }
54
+ end
55
+ end
56
+
57
+ {
58
+ imported: total_imported,
59
+ errors: @errors.size,
60
+ error_details: @errors
61
+ }
62
+ end
63
+
64
+ def import_scoped(scope, **)
65
+ import(scope, **)
66
+ end
67
+
68
+ def reindex(batch_size: 1000, **)
69
+ raise ArgumentError, "Model class #{model_class} must respond to :all for reindexing" unless model_class.respond_to?(:all)
70
+
71
+ import(model_class.all, batch_size: batch_size, force: true, **)
72
+ end
73
+
74
+ private
75
+
76
+ def resolve_records(relation_or_records)
77
+ case relation_or_records
78
+ when nil
79
+ model_class.respond_to?(:all) ? model_class.all : []
80
+ when String, Symbol
81
+ # Assume it's a scope name
82
+ if model_class.respond_to?(relation_or_records)
83
+ model_class.public_send(relation_or_records)
84
+ else
85
+ []
86
+ end
87
+ else
88
+ relation_or_records
89
+ end
90
+ end
91
+
92
+ def build_bulk_actions(batch, transform)
93
+ batch.filter_map do |record|
94
+ # Apply transform function if provided
95
+ document = transform ? transform.call(record) : default_transform(record)
96
+ next unless document
97
+
98
+ {
99
+ index: {
100
+ _index: index_name,
101
+ _id: extract_id(record),
102
+ data: document
103
+ }
104
+ }
105
+ rescue StandardError => e
106
+ @errors << {
107
+ error: e.message,
108
+ record: identify_record(record)
109
+ }
110
+ nil
111
+ end
112
+ end
113
+
114
+ def default_transform(record)
115
+ if record.respond_to?(:to_h)
116
+ record.to_h
117
+ elsif record.respond_to?(:attributes)
118
+ record.attributes
119
+ else
120
+ record
121
+ end
122
+ end
123
+
124
+ def extract_id(record)
125
+ if record.respond_to?(:id)
126
+ record.id
127
+ elsif record.is_a?(Hash)
128
+ record[:id] || record["id"]
129
+ else
130
+ record.object_id
131
+ end
132
+ end
133
+
134
+ def identify_record(record)
135
+ id = extract_id(record)
136
+ {
137
+ id: id,
138
+ class: record.class.name,
139
+ object_id: record.object_id
140
+ }
141
+ end
142
+
143
+ def collect_errors(response, batch)
144
+ return unless response.is_a?(Hash) && response["items"]
145
+
146
+ response["items"].each_with_index do |item, index|
147
+ action = item.keys.first
148
+ result = item[action]
149
+
150
+ next unless result["error"]
151
+
152
+ record = batch[index]
153
+ @errors << {
154
+ error: result["error"],
155
+ record: identify_record(record),
156
+ status: result["status"]
157
+ }
158
+ end
159
+ end
160
+
161
+ def index_name
162
+ @index_name ||= if model_class.respond_to?(:search_index)
163
+ Array(model_class.search_index).first
164
+ else
165
+ model_class.name.demodulize.underscore.pluralize
166
+ end
167
+ end
168
+
169
+ def delete_index
170
+ client = Noiseless.connections.client(@connection)
171
+ client.delete_index(index_name)
172
+ rescue StandardError => _e
173
+ # Index might not exist, which is fine
174
+ nil
175
+ end
176
+
177
+ def create_index
178
+ return unless model_class.respond_to?(:mapping)
179
+
180
+ mapping_block = model_class.mapping
181
+ return unless mapping_block
182
+
183
+ begin
184
+ _client = Noiseless.connections.client(@connection)
185
+ # This would need to be implemented in the adapter
186
+ # client.create_index(index_name, mapping: mapping_block)
187
+ rescue StandardError => e
188
+ @errors << {
189
+ error: "Failed to create index: #{e.message}",
190
+ index: index_name
191
+ }
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support/concern"
4
+
5
+ module Noiseless
6
+ module Callbacks
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ after_save :update_search_index_on_save
11
+ after_destroy :remove_from_search_index
12
+ after_commit :update_search_index_on_commit, on: %i[create update]
13
+ after_commit :remove_from_search_index_on_commit, on: :destroy
14
+ end
15
+
16
+ class_methods do
17
+ def auto_index(enabled: true, **options)
18
+ @auto_index_enabled = enabled
19
+ @auto_index_options = options
20
+ end
21
+
22
+ def auto_index_enabled?
23
+ @auto_index_enabled != false
24
+ end
25
+
26
+ def auto_index_options
27
+ @auto_index_options ||= {}
28
+ end
29
+
30
+ def skip_auto_index
31
+ previous_value = Thread.current[:noiseless_skip_auto_index]
32
+ Thread.current[:noiseless_skip_auto_index] = true
33
+ yield
34
+ ensure
35
+ Thread.current[:noiseless_skip_auto_index] = previous_value
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def should_update_search_index?
42
+ return false if Thread.current[:noiseless_skip_auto_index]
43
+ return false unless self.class.auto_index_enabled?
44
+
45
+ # Only update if we have searchable content
46
+ if respond_to?(:searchable?)
47
+ searchable?
48
+ else
49
+ true
50
+ end
51
+ end
52
+
53
+ def update_search_index_on_save
54
+ return unless should_update_search_index?
55
+
56
+ update_search_index_async if noiseless_new_record? || (respond_to?(:changed?) && changed?)
57
+ rescue Net::ProtocolError, JSON::ParserError, Timeout::Error => e
58
+ handle_search_index_error(e, :update)
59
+ end
60
+
61
+ def update_search_index_on_commit
62
+ return unless should_update_search_index?
63
+
64
+ update_search_index_async
65
+ rescue Net::ProtocolError, JSON::ParserError, Timeout::Error => e
66
+ handle_search_index_error(e, :update)
67
+ end
68
+
69
+ def remove_from_search_index
70
+ return unless should_update_search_index?
71
+
72
+ remove_from_search_index_async
73
+ rescue Net::ProtocolError, JSON::ParserError, Timeout::Error => e
74
+ handle_search_index_error(e, :delete)
75
+ end
76
+
77
+ def remove_from_search_index_on_commit
78
+ return unless should_update_search_index?
79
+
80
+ remove_from_search_index_async
81
+ rescue Net::ProtocolError, JSON::ParserError, Timeout::Error => e
82
+ handle_search_index_error(e, :delete)
83
+ end
84
+
85
+ def update_search_index_async
86
+ options = self.class.auto_index_options
87
+
88
+ if options[:async]
89
+ # Queue for background processing
90
+ SearchIndexUpdateJob.perform_later(
91
+ self.class.name,
92
+ id,
93
+ "update",
94
+ options
95
+ )
96
+ else
97
+ # Immediate update
98
+ document_manager.update_document(**options)
99
+ end
100
+ end
101
+
102
+ def remove_from_search_index_async
103
+ options = self.class.auto_index_options
104
+
105
+ if options[:async]
106
+ # Queue for background processing
107
+ SearchIndexUpdateJob.perform_later(
108
+ self.class.name,
109
+ id,
110
+ "delete",
111
+ options
112
+ )
113
+ else
114
+ # Immediate removal
115
+ document_manager.delete_document(**options)
116
+ end
117
+ end
118
+
119
+ def handle_search_index_error(error, operation)
120
+ options = self.class.auto_index_options
121
+
122
+ if options[:raise_on_error]
123
+ raise error
124
+ elsif (logger = Rails.logger)
125
+ # Log the error or handle silently based on configuration
126
+ logger.error "Noiseless: Failed to #{operation} search index for #{self.class.name}##{id}: #{error.message}"
127
+ end
128
+ end
129
+
130
+ def noiseless_new_record?
131
+ if respond_to?(:persisted?)
132
+ !persisted?
133
+ else
134
+ !id
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ class ConnectionManager
5
+ def initialize
6
+ @clients = {}
7
+ @configs = {}
8
+ end
9
+
10
+ # Register a named client statically from YAML (boot-time only)
11
+ def register(name, adapter:, hosts:)
12
+ @configs[name.to_sym] = { adapter: adapter, hosts: hosts }
13
+ end
14
+
15
+ # Retrieve a client; defaults to :primary
16
+ def client(name = :primary)
17
+ name = name.to_sym
18
+
19
+ # Lazy-load the adapter only when actually used
20
+ @clients[name] ||= begin
21
+ config = @configs.fetch(name) { raise "Unknown connection: #{name}" }
22
+ Adapters.lookup(config[:adapter], hosts: config[:hosts])
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ class DocumentManager
5
+ def initialize(model_instance, connection: nil)
6
+ @model_instance = model_instance
7
+ @connection = connection || model_instance.class.connection
8
+ end
9
+
10
+ def index_document(refresh: false, **)
11
+ document = build_document
12
+ return false unless document
13
+
14
+ client = Noiseless.connections.client(@connection)
15
+ client.index_document(
16
+ index: index_name,
17
+ id: document_id,
18
+ document: document,
19
+ refresh: refresh,
20
+ **
21
+ )
22
+ end
23
+
24
+ def update_document(refresh: false, detect_changes: true, **)
25
+ if detect_changes && supports_dirty_tracking?
26
+ return false unless has_changes?
27
+
28
+ changes = extract_changes
29
+ return false if changes.empty?
30
+
31
+ client = Noiseless.connections.client(@connection)
32
+ client.update_document(
33
+ index: index_name,
34
+ id: document_id,
35
+ changes: changes,
36
+ refresh: refresh,
37
+ **
38
+ )
39
+ else
40
+ # Fall back to full document update
41
+ index_document(refresh: refresh, **)
42
+ end
43
+ end
44
+
45
+ def delete_document(refresh: false, **)
46
+ client = Noiseless.connections.client(@connection)
47
+ client.delete_document(
48
+ index: index_name,
49
+ id: document_id,
50
+ refresh: refresh,
51
+ **
52
+ )
53
+ end
54
+
55
+ def document_exists?
56
+ client = Noiseless.connections.client(@connection)
57
+ client.document_exists?(
58
+ index: index_name,
59
+ id: document_id
60
+ )
61
+ end
62
+
63
+ private
64
+
65
+ attr_reader :model_instance
66
+
67
+ def build_document
68
+ if model_instance.respond_to?(:to_search_document)
69
+ model_instance.to_search_document
70
+ elsif model_instance.respond_to?(:to_h)
71
+ model_instance.to_h
72
+ elsif model_instance.respond_to?(:attributes)
73
+ model_instance.attributes
74
+ end
75
+ end
76
+
77
+ def document_id
78
+ if model_instance.respond_to?(:id)
79
+ model_instance.id
80
+ elsif model_instance.respond_to?(:[])
81
+ model_instance[:id] || model_instance["id"]
82
+ else
83
+ model_instance.object_id
84
+ end
85
+ end
86
+
87
+ def index_name
88
+ @index_name ||= if model_instance.class.respond_to?(:search_index)
89
+ Array(model_instance.class.search_index).first
90
+ else
91
+ model_instance.class.name.demodulize.underscore.pluralize
92
+ end
93
+ end
94
+
95
+ def supports_dirty_tracking?
96
+ model_instance.respond_to?(:changed_attributes) ||
97
+ model_instance.respond_to?(:changes) ||
98
+ model_instance.respond_to?(:changed?)
99
+ end
100
+
101
+ def has_changes?
102
+ return true unless supports_dirty_tracking?
103
+
104
+ if model_instance.respond_to?(:changed?)
105
+ model_instance.changed?
106
+ elsif model_instance.respond_to?(:changes)
107
+ !model_instance.changes.empty?
108
+ elsif model_instance.respond_to?(:changed_attributes)
109
+ !model_instance.changed_attributes.empty?
110
+ else
111
+ true
112
+ end
113
+ end
114
+
115
+ def extract_changes
116
+ changes = {}
117
+
118
+ if model_instance.respond_to?(:changes)
119
+ # ActiveModel::Dirty style changes hash
120
+ model_instance.changes.each do |attr, (_old_value, new_value)|
121
+ changes[attr] = new_value
122
+ end
123
+ elsif model_instance.respond_to?(:changed_attributes)
124
+ # Get current values for changed attributes
125
+ model_instance.changed_attributes.each_key do |attr|
126
+ if model_instance.respond_to?(attr)
127
+ changes[attr] = model_instance.public_send(attr)
128
+ elsif model_instance.respond_to?(:[])
129
+ changes[attr] = model_instance[attr]
130
+ end
131
+ end
132
+ end
133
+
134
+ changes
135
+ end
136
+ end
137
+ end