noiseless 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +313 -0
- data/lib/noiseless/adapters/elasticsearch.rb +70 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
- data/lib/noiseless/adapters/open_search.rb +208 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +70 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/filter.rb +15 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +15 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +15 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +15 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +130 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +260 -14
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module Introspection
|
|
5
|
+
# Runtime adapter and module detection
|
|
6
|
+
def adapter_info
|
|
7
|
+
{
|
|
8
|
+
adapter_type: self.class.name.split("::").last.underscore.to_sym,
|
|
9
|
+
execution_mode: :async,
|
|
10
|
+
execution_module: detect_execution_module,
|
|
11
|
+
capabilities: adapter_capabilities,
|
|
12
|
+
engine_name: engine_name
|
|
13
|
+
}
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def detect_execution_module
|
|
17
|
+
execution_modules = singleton_class.included_modules.select do |mod|
|
|
18
|
+
mod.name&.include?("ExecutionModules") ||
|
|
19
|
+
mod.name&.include?("Execution")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
execution_modules.map do |mod|
|
|
23
|
+
{
|
|
24
|
+
name: mod.name,
|
|
25
|
+
async: true
|
|
26
|
+
}
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def adapter_capabilities
|
|
31
|
+
capabilities = [:async_support]
|
|
32
|
+
|
|
33
|
+
# Check for bulk operations
|
|
34
|
+
capabilities << :bulk_operations if respond_to?(:bulk)
|
|
35
|
+
|
|
36
|
+
# Check for search operations
|
|
37
|
+
capabilities << :search if respond_to?(:search)
|
|
38
|
+
|
|
39
|
+
# Check for index management
|
|
40
|
+
capabilities << :index_management if respond_to?(:create_index)
|
|
41
|
+
|
|
42
|
+
# Check for document operations
|
|
43
|
+
capabilities << :document_operations if respond_to?(:index_document)
|
|
44
|
+
|
|
45
|
+
# Engine-specific capabilities
|
|
46
|
+
case self.class.name
|
|
47
|
+
when /OpenSearch/
|
|
48
|
+
capabilities += %i[point_in_time_search search_templates] if respond_to?(:point_in_time_search)
|
|
49
|
+
when /Typesense/
|
|
50
|
+
capabilities += %i[typo_tolerance faceted_search] if respond_to?(:faceted_search)
|
|
51
|
+
when /Elasticsearch/
|
|
52
|
+
capabilities += %i[aggregations percolate] if respond_to?(:percolate)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
capabilities.uniq
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def engine_name
|
|
59
|
+
case self.class.name
|
|
60
|
+
when /OpenSearch/ then :opensearch
|
|
61
|
+
when /Elasticsearch/ then :elasticsearch
|
|
62
|
+
when /Typesense/ then :typesense
|
|
63
|
+
else :base
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Query execution introspection
|
|
68
|
+
def explain_query(ast_node, **)
|
|
69
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
70
|
+
|
|
71
|
+
explanation = {
|
|
72
|
+
adapter: adapter_info,
|
|
73
|
+
ast: ast_node.to_h,
|
|
74
|
+
engine_query: nil,
|
|
75
|
+
execution_plan: [],
|
|
76
|
+
performance: {}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Convert AST to engine query
|
|
80
|
+
conversion_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
81
|
+
engine_query = ast_to_hash(ast_node)
|
|
82
|
+
conversion_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - conversion_start
|
|
83
|
+
|
|
84
|
+
explanation[:engine_query] = engine_query
|
|
85
|
+
explanation[:performance][:ast_conversion_ms] = (conversion_time * 1000).round(3)
|
|
86
|
+
|
|
87
|
+
# Build execution plan
|
|
88
|
+
explanation[:execution_plan] = build_execution_plan(ast_node, **)
|
|
89
|
+
|
|
90
|
+
total_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
91
|
+
explanation[:performance][:total_explanation_ms] = (total_time * 1000).round(3)
|
|
92
|
+
|
|
93
|
+
explanation
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def build_execution_plan(_ast_node, **_opts)
|
|
97
|
+
plan = []
|
|
98
|
+
|
|
99
|
+
plan << {
|
|
100
|
+
step: "ast_validation",
|
|
101
|
+
description: "Validate AST structure",
|
|
102
|
+
estimated_cost: "O(n) where n = AST node count"
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
plan << {
|
|
106
|
+
step: "query_conversion",
|
|
107
|
+
description: "Convert AST to #{engine_name} query format",
|
|
108
|
+
estimated_cost: "O(n) where n = AST complexity"
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
plan << {
|
|
112
|
+
step: "async_task_creation",
|
|
113
|
+
description: "Wrap execution in Async::Task",
|
|
114
|
+
estimated_cost: "O(1)"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
plan << {
|
|
118
|
+
step: "engine_execution",
|
|
119
|
+
description: "Execute query on #{engine_name}",
|
|
120
|
+
estimated_cost: "Variable - depends on query complexity and data size"
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
plan << {
|
|
124
|
+
step: "response_processing",
|
|
125
|
+
description: "Convert engine response to Noiseless format",
|
|
126
|
+
estimated_cost: "O(m) where m = result count"
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
plan
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Cross-engine compatibility analysis
|
|
133
|
+
def compatibility_matrix
|
|
134
|
+
available_adapters = [
|
|
135
|
+
Noiseless::Adapters::Elasticsearch,
|
|
136
|
+
Noiseless::Adapters::OpenSearch,
|
|
137
|
+
Noiseless::Adapters::Typesense
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
matrix = {}
|
|
141
|
+
|
|
142
|
+
available_adapters.each do |adapter_class|
|
|
143
|
+
adapter = adapter_class.new
|
|
144
|
+
adapter_name = adapter.engine_name
|
|
145
|
+
|
|
146
|
+
matrix[:"#{adapter_name}_async"] = {
|
|
147
|
+
available: true,
|
|
148
|
+
capabilities: adapter.adapter_capabilities,
|
|
149
|
+
engine_name: adapter.engine_name
|
|
150
|
+
}
|
|
151
|
+
rescue StandardError => e
|
|
152
|
+
fallback_key = adapter_class.name.split("::").last.underscore
|
|
153
|
+
matrix[:"#{fallback_key}_async"] = {
|
|
154
|
+
available: false,
|
|
155
|
+
error: e.message
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
matrix
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Performance profiling
|
|
163
|
+
def profile_query(ast_node, iterations: 100, **)
|
|
164
|
+
results = {
|
|
165
|
+
adapter: adapter_info,
|
|
166
|
+
iterations: iterations,
|
|
167
|
+
measurements: [],
|
|
168
|
+
summary: {}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
iterations.times do |_i|
|
|
172
|
+
measurement = measure_single_execution(ast_node, **)
|
|
173
|
+
results[:measurements] << measurement
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Calculate summary statistics
|
|
177
|
+
times = results[:measurements].map { |m| m[:total_time_ms] }
|
|
178
|
+
results[:summary] = {
|
|
179
|
+
min_ms: times.min,
|
|
180
|
+
max_ms: times.max,
|
|
181
|
+
avg_ms: (times.sum / times.size).round(3),
|
|
182
|
+
median_ms: times.sort[times.size / 2],
|
|
183
|
+
std_dev_ms: calculate_std_dev(times).round(3)
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
results
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
private
|
|
190
|
+
|
|
191
|
+
def measure_single_execution(ast_node, **_opts)
|
|
192
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
193
|
+
|
|
194
|
+
# Convert AST
|
|
195
|
+
conversion_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
196
|
+
_engine_query = ast_to_hash(ast_node)
|
|
197
|
+
conversion_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - conversion_start
|
|
198
|
+
|
|
199
|
+
# Execute (mock execution for testing)
|
|
200
|
+
execution_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
201
|
+
sleep(0.001) # Simulate async execution
|
|
202
|
+
execution_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - execution_start
|
|
203
|
+
|
|
204
|
+
total_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
205
|
+
|
|
206
|
+
{
|
|
207
|
+
ast_conversion_ms: (conversion_time * 1000).round(3),
|
|
208
|
+
execution_ms: (execution_time * 1000).round(3),
|
|
209
|
+
total_time_ms: (total_time * 1000).round(3)
|
|
210
|
+
}
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def calculate_std_dev(values)
|
|
214
|
+
return 0.0 if values.size <= 1
|
|
215
|
+
|
|
216
|
+
mean = values.sum / values.size.to_f
|
|
217
|
+
variance = values.sum { |v| (v - mean)**2 } / (values.size - 1).to_f
|
|
218
|
+
Math.sqrt(variance)
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
class Mapping
|
|
5
|
+
class << self
|
|
6
|
+
def inherited(subclass)
|
|
7
|
+
super
|
|
8
|
+
# Inherit mappings and settings from parent class
|
|
9
|
+
subclass.instance_variable_set(:@mapping_definition, @mapping_definition&.dup)
|
|
10
|
+
subclass.instance_variable_set(:@index_settings, @index_settings&.dup)
|
|
11
|
+
subclass.instance_variable_set(:@analyzers, @analyzers&.dup)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def mapping(&)
|
|
15
|
+
if block_given?
|
|
16
|
+
@mapping_definition = MappingDefinition.new
|
|
17
|
+
@mapping_definition.instance_eval(&)
|
|
18
|
+
end
|
|
19
|
+
@mapping_definition
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def settings(settings_hash = nil, &)
|
|
23
|
+
if settings_hash
|
|
24
|
+
@index_settings = (@index_settings || {}).merge(settings_hash)
|
|
25
|
+
elsif block_given?
|
|
26
|
+
@index_settings ||= {}
|
|
27
|
+
builder = SettingsBuilder.new(@index_settings)
|
|
28
|
+
builder.instance_eval(&)
|
|
29
|
+
end
|
|
30
|
+
@index_settings
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def analyzer(name, definition)
|
|
34
|
+
@analyzers ||= {}
|
|
35
|
+
@analyzers[name.to_s] = definition
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def analyzers
|
|
39
|
+
@analyzers || {}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def index_name(name = nil)
|
|
43
|
+
@index_name = name.to_s if name
|
|
44
|
+
@index_name
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def to_mapping_hash
|
|
48
|
+
return {} unless @mapping_definition
|
|
49
|
+
|
|
50
|
+
{ properties: @mapping_definition.to_hash }
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def to_settings_hash
|
|
54
|
+
settings = @index_settings || {}
|
|
55
|
+
|
|
56
|
+
# Add analyzers to settings if any are defined
|
|
57
|
+
if @analyzers&.any?
|
|
58
|
+
settings = settings.deep_merge(
|
|
59
|
+
analysis: {
|
|
60
|
+
analyzer: @analyzers
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
settings
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def load_settings_from_file(file_path)
|
|
69
|
+
return unless File.exist?(file_path)
|
|
70
|
+
|
|
71
|
+
content = File.read(file_path)
|
|
72
|
+
parsed_settings = case File.extname(file_path)
|
|
73
|
+
when ".json"
|
|
74
|
+
JSON.parse(content)
|
|
75
|
+
when ".yml", ".yaml"
|
|
76
|
+
YAML.load(content)
|
|
77
|
+
else
|
|
78
|
+
raise ArgumentError, "Unsupported file format: #{File.extname(file_path)}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
settings(parsed_settings)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Instance methods
|
|
86
|
+
def initialize(document)
|
|
87
|
+
@document = document
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_h
|
|
91
|
+
if @document.respond_to?(:to_search_document)
|
|
92
|
+
@document.to_search_document
|
|
93
|
+
elsif @document.respond_to?(:to_h)
|
|
94
|
+
@document.to_h
|
|
95
|
+
elsif @document.respond_to?(:attributes)
|
|
96
|
+
@document.attributes
|
|
97
|
+
else
|
|
98
|
+
@document
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.deserialize(hit)
|
|
103
|
+
hit["_source"]
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# DSL for building mapping definitions
|
|
108
|
+
class MappingDefinition
|
|
109
|
+
def initialize
|
|
110
|
+
@properties = {}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def field(name, type, **options)
|
|
114
|
+
@properties[name.to_s] = { type: type.to_s }.merge(options)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def keyword(name, **)
|
|
118
|
+
field(name, :keyword, **)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def text(name, **)
|
|
122
|
+
field(name, :text, **)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def integer(name, **)
|
|
126
|
+
field(name, :integer, **)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def long(name, **)
|
|
130
|
+
field(name, :long, **)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def float(name, **)
|
|
134
|
+
field(name, :float, **)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def double(name, **)
|
|
138
|
+
field(name, :double, **)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def boolean(name, **)
|
|
142
|
+
field(name, :boolean, **)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def date(name, **)
|
|
146
|
+
field(name, :date, **)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def geo_point(name, **)
|
|
150
|
+
field(name, :geo_point, **)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def object(name, **, &)
|
|
154
|
+
if block_given?
|
|
155
|
+
nested_mapping = MappingDefinition.new
|
|
156
|
+
nested_mapping.instance_eval(&)
|
|
157
|
+
field(name, :object, properties: nested_mapping.to_hash, **)
|
|
158
|
+
else
|
|
159
|
+
field(name, :object, **)
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def nested(name, **, &)
|
|
164
|
+
if block_given?
|
|
165
|
+
nested_mapping = MappingDefinition.new
|
|
166
|
+
nested_mapping.instance_eval(&)
|
|
167
|
+
field(name, :nested, properties: nested_mapping.to_hash, **)
|
|
168
|
+
else
|
|
169
|
+
field(name, :nested, **)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def to_hash
|
|
174
|
+
@properties
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# DSL for building settings
|
|
179
|
+
class SettingsBuilder
|
|
180
|
+
def initialize(settings_hash)
|
|
181
|
+
@settings = settings_hash
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def number_of_shards(count)
|
|
185
|
+
@settings[:number_of_shards] = count
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def number_of_replicas(count)
|
|
189
|
+
@settings[:number_of_replicas] = count
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def refresh_interval(interval)
|
|
193
|
+
@settings[:refresh_interval] = interval
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def max_result_window(size)
|
|
197
|
+
@settings[:max_result_window] = size
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def analysis(&)
|
|
201
|
+
@settings[:analysis] ||= {}
|
|
202
|
+
builder = AnalysisBuilder.new(@settings[:analysis])
|
|
203
|
+
builder.instance_eval(&)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# DSL for analysis settings
|
|
208
|
+
class AnalysisBuilder
|
|
209
|
+
def initialize(analysis_hash)
|
|
210
|
+
@analysis = analysis_hash
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def analyzer(name, &)
|
|
214
|
+
@analysis[:analyzer] ||= {}
|
|
215
|
+
builder = AnalyzerBuilder.new
|
|
216
|
+
builder.instance_eval(&)
|
|
217
|
+
@analysis[:analyzer][name.to_s] = builder.to_hash
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def tokenizer(name, definition)
|
|
221
|
+
@analysis[:tokenizer] ||= {}
|
|
222
|
+
@analysis[:tokenizer][name.to_s] = definition
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def filter(name, definition)
|
|
226
|
+
@analysis[:filter] ||= {}
|
|
227
|
+
@analysis[:filter][name.to_s] = definition
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# DSL for analyzer definition
|
|
232
|
+
class AnalyzerBuilder
|
|
233
|
+
def initialize
|
|
234
|
+
@definition = {}
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def tokenizer(name)
|
|
238
|
+
@definition[:tokenizer] = name.to_s
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def filter(*names)
|
|
242
|
+
@definition[:filter] = names.map(&:to_s)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def char_filter(*names)
|
|
246
|
+
@definition[:char_filter] = names.map(&:to_s)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def to_hash
|
|
250
|
+
@definition
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
class MappingDefinitionProcessor
|
|
5
|
+
def self.process(mapping_block)
|
|
6
|
+
return {} unless mapping_block
|
|
7
|
+
|
|
8
|
+
processor = new
|
|
9
|
+
processor.instance_eval(&mapping_block)
|
|
10
|
+
processor.to_index_config
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
@settings = {}
|
|
15
|
+
@properties = {}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def settings(&)
|
|
19
|
+
settings_builder = SettingsBuilder.new
|
|
20
|
+
settings_builder.instance_eval(&)
|
|
21
|
+
@settings = settings_builder.to_hash
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def properties(&)
|
|
25
|
+
properties_builder = PropertiesBuilder.new
|
|
26
|
+
properties_builder.instance_eval(&)
|
|
27
|
+
@properties = properties_builder.to_hash
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Delegate to settings builder for nested methods
|
|
31
|
+
def analysis(&)
|
|
32
|
+
settings { analysis(&) }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def to_index_config
|
|
36
|
+
config = {}
|
|
37
|
+
config[:settings] = @settings unless @settings.empty?
|
|
38
|
+
config[:mappings] = { properties: @properties } unless @properties.empty?
|
|
39
|
+
config
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
class SettingsBuilder
|
|
43
|
+
def initialize
|
|
44
|
+
@settings = {}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def index(&)
|
|
48
|
+
index_builder = IndexSettingsBuilder.new
|
|
49
|
+
index_builder.instance_eval(&)
|
|
50
|
+
@settings[:index] = index_builder.to_hash
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def analysis(&)
|
|
54
|
+
analysis_builder = AnalysisBuilder.new
|
|
55
|
+
analysis_builder.instance_eval(&)
|
|
56
|
+
@settings[:analysis] = analysis_builder.to_hash
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def to_hash
|
|
60
|
+
@settings
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
class IndexSettingsBuilder
|
|
65
|
+
def initialize
|
|
66
|
+
@index_settings = {}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def analysis(&)
|
|
70
|
+
analysis_builder = AnalysisBuilder.new
|
|
71
|
+
analysis_builder.instance_eval(&)
|
|
72
|
+
@index_settings[:analysis] = analysis_builder.to_hash
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def to_hash
|
|
76
|
+
@index_settings
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
class AnalysisBuilder
|
|
81
|
+
def initialize
|
|
82
|
+
@analysis = {}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def normalizer(name = nil, &)
|
|
86
|
+
@analysis[:normalizer] ||= {}
|
|
87
|
+
if name && block_given?
|
|
88
|
+
# Define a specific normalizer
|
|
89
|
+
normalizer_builder = NormalizerBuilder.new
|
|
90
|
+
normalizer_builder.instance_eval(&)
|
|
91
|
+
@analysis[:normalizer][name] = normalizer_builder.to_hash
|
|
92
|
+
elsif block_given?
|
|
93
|
+
# Handle nested normalizer definitions
|
|
94
|
+
normalizer_definitions = NormalizerDefinitions.new
|
|
95
|
+
normalizer_definitions.instance_eval(&)
|
|
96
|
+
@analysis[:normalizer].merge!(normalizer_definitions.to_hash)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def analyzer(name = nil, &)
|
|
101
|
+
@analysis[:analyzer] ||= {}
|
|
102
|
+
if name && block_given?
|
|
103
|
+
# Define a specific analyzer
|
|
104
|
+
analyzer_builder = AnalyzerBuilder.new
|
|
105
|
+
analyzer_builder.instance_eval(&)
|
|
106
|
+
@analysis[:analyzer][name] = analyzer_builder.to_hash
|
|
107
|
+
elsif block_given?
|
|
108
|
+
# Handle nested analyzer definitions
|
|
109
|
+
analyzer_definitions = AnalyzerDefinitions.new
|
|
110
|
+
analyzer_definitions.instance_eval(&)
|
|
111
|
+
@analysis[:analyzer].merge!(analyzer_definitions.to_hash)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def to_hash
|
|
116
|
+
@analysis
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
class NormalizerBuilder
|
|
121
|
+
def initialize
|
|
122
|
+
@config = {}
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def type(value)
|
|
126
|
+
@config[:type] = value
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def char_filter(filters)
|
|
130
|
+
@config[:char_filter] = filters
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def filter(filters)
|
|
134
|
+
@config[:filter] = filters
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def to_hash
|
|
138
|
+
@config
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
class AnalyzerDefinitions
|
|
143
|
+
def initialize
|
|
144
|
+
@analyzers = {}
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def method_missing(name, &)
|
|
148
|
+
if block_given?
|
|
149
|
+
analyzer_builder = AnalyzerBuilder.new
|
|
150
|
+
analyzer_builder.instance_eval(&)
|
|
151
|
+
@analyzers[name] = analyzer_builder.to_hash
|
|
152
|
+
else
|
|
153
|
+
super
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def respond_to_missing?(_name, _include_private = false)
|
|
158
|
+
true
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def to_hash
|
|
162
|
+
@analyzers
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
class NormalizerDefinitions
|
|
167
|
+
def initialize
|
|
168
|
+
@normalizers = {}
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def method_missing(name, &)
|
|
172
|
+
if block_given?
|
|
173
|
+
normalizer_builder = NormalizerBuilder.new
|
|
174
|
+
normalizer_builder.instance_eval(&)
|
|
175
|
+
@normalizers[name] = normalizer_builder.to_hash
|
|
176
|
+
else
|
|
177
|
+
super
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def respond_to_missing?(_name, _include_private = false)
|
|
182
|
+
true
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def to_hash
|
|
186
|
+
@normalizers
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
class AnalyzerBuilder
|
|
191
|
+
def initialize
|
|
192
|
+
@config = {}
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def type(value)
|
|
196
|
+
@config[:type] = value
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def stopwords(value)
|
|
200
|
+
@config[:stopwords] = value
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def filter(filters)
|
|
204
|
+
@config[:filter] = filters
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def to_hash
|
|
208
|
+
@config
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
class PropertiesBuilder
|
|
213
|
+
def initialize
|
|
214
|
+
@properties = {}
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Define a property with a symbol name and type
|
|
218
|
+
def method_missing(name, type_or_field, options = {})
|
|
219
|
+
@properties[name] = { type: type_or_field }.merge(options)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def respond_to_missing?(_name, _include_private = false)
|
|
223
|
+
true
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def to_hash
|
|
227
|
+
@properties
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|