stretchy-model 0.6.0 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/containers/Dockerfile.opensearch +4 -3
- data/docker-compose.yml +32 -19
- data/lib/elasticsearch/api/actions/machine_learning/models/delete_model.rb +33 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/deploy.rb +31 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/get_model.rb +43 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/get_status.rb +31 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/params_registry.rb +45 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/register.rb +45 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/undeploy.rb +32 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/update_model.rb +39 -0
- data/lib/elasticsearch/api/namespace/machine_learning/model.rb +27 -0
- data/lib/opensearch/api/actions/machine_learning/models/delete_model.rb +33 -0
- data/lib/opensearch/api/actions/machine_learning/models/deploy.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/get_model.rb +44 -0
- data/lib/opensearch/api/actions/machine_learning/models/get_status.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/params_registry.rb +45 -0
- data/lib/opensearch/api/actions/machine_learning/models/register.rb +45 -0
- data/lib/opensearch/api/actions/machine_learning/models/undeploy.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/update_model.rb +39 -0
- data/lib/opensearch/api/namespace/machine_learning/model.rb +27 -0
- data/lib/stretchy/attributes/type/date_time.rb +50 -0
- data/lib/stretchy/attributes/type/rank_features.rb +11 -1
- data/lib/stretchy/attributes.rb +1 -0
- data/lib/stretchy/common.rb +5 -0
- data/lib/stretchy/delegation/gateway_delegation.rb +8 -2
- data/lib/stretchy/machine_learning/model.rb +192 -0
- data/lib/stretchy/open_search_compatibility.rb +4 -0
- data/lib/stretchy/pipeline.rb +123 -0
- data/lib/stretchy/pipelines/processor.rb +55 -0
- data/lib/stretchy/querying.rb +1 -0
- data/lib/stretchy/rails/instrumentation/publishers.rb +31 -0
- data/lib/{rails → stretchy/rails}/instrumentation/railtie.rb +11 -6
- data/lib/stretchy/relation.rb +1 -0
- data/lib/stretchy/relations/query_builder.rb +73 -3
- data/lib/stretchy/relations/query_methods.rb +32 -0
- data/lib/stretchy/shared_scopes.rb +6 -1
- data/lib/stretchy/version.rb +1 -1
- data/lib/stretchy.rb +7 -2
- metadata +40 -5
- data/lib/rails/instrumentation/publishers.rb +0 -29
@@ -0,0 +1,31 @@
|
|
1
|
+
module OpenSearch
|
2
|
+
module API
|
3
|
+
module MachineLearning
|
4
|
+
module Models
|
5
|
+
module Actions
|
6
|
+
# Register a model.
|
7
|
+
#
|
8
|
+
# @option arguments [String] :id The model id
|
9
|
+
# @option arguments [Hash] :body The deploy options
|
10
|
+
#
|
11
|
+
#
|
12
|
+
# POST /_plugins/_ml/models/<model_id>/_undeploy
|
13
|
+
def undeploy(arguments = {})
|
14
|
+
raise ArgumentError, "Required argument 'id' missing" unless arguments[:id]
|
15
|
+
_id = arguments.delete(:id)
|
16
|
+
arguments = arguments.clone
|
17
|
+
headers = arguments.delete(:headers) || {}
|
18
|
+
|
19
|
+
method = OpenSearch::API::HTTP_POST
|
20
|
+
path = "_plugins/_ml/models/#{Utils.__listify(_id)}/_undeploy"
|
21
|
+
params = Utils.__validate_and_extract_params arguments, ParamsRegistry.get(__method__)
|
22
|
+
|
23
|
+
body = arguments[:body]
|
24
|
+
perform_request(method, path, params, body, headers).body
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module OpenSearch
|
2
|
+
module API
|
3
|
+
module MachineLearning
|
4
|
+
module Models
|
5
|
+
module Actions
|
6
|
+
# Returns a model.
|
7
|
+
#
|
8
|
+
# @option arguments [String] :id The model id
|
9
|
+
# @option arguments [Hash] :body The request fields
|
10
|
+
#
|
11
|
+
# Example
|
12
|
+
# update_model(id: 109sdj0asl092, "rate_limiter": {
|
13
|
+
# "limit": "4",
|
14
|
+
# "unit": "MINUTES"
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
#
|
18
|
+
# PUT /_plugins/_ml/models/<model_id>
|
19
|
+
def update_model(arguments = {})
|
20
|
+
raise ArgumentError, "Required argument 'body' missing" unless arguments[:body]
|
21
|
+
raise ArgumentError, "Required argument 'id' missing" unless arguments[:id]
|
22
|
+
|
23
|
+
_id = arguments.delete(:id)
|
24
|
+
|
25
|
+
headers = arguments.delete(:headers) || {}
|
26
|
+
|
27
|
+
method = OpenSearch::API::HTTP_PUT
|
28
|
+
path = "_plugins/_ml/models/#{Utils.__listify(_id)}"
|
29
|
+
params = Utils.__validate_and_extract_params arguments, ParamsRegistry.get(__method__)
|
30
|
+
|
31
|
+
body = arguments[:body]
|
32
|
+
perform_request(method, path, params, body, headers).body
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
Dir[File.expand_path('../../actions/**/*.rb', __dir__)].sort.each { |f| require f }
|
2
|
+
|
3
|
+
module OpenSearch
|
4
|
+
module API
|
5
|
+
module MachineLearning
|
6
|
+
module Models
|
7
|
+
module Actions; end
|
8
|
+
|
9
|
+
# Client for the "machine_learning/models" namespace (includes the {MachineLearning::Models::Actions} methods)
|
10
|
+
#
|
11
|
+
class MachineLearningClient
|
12
|
+
include MachineLearning::Models::Actions
|
13
|
+
include OpenSearch::API::Common::Client::Base
|
14
|
+
include OpenSearch::API::Common::Client
|
15
|
+
end
|
16
|
+
|
17
|
+
# Proxy method for {MachineLearningModel}, available in the receiving object
|
18
|
+
#
|
19
|
+
def machine_learning
|
20
|
+
@machine_learning ||= MachineLearningClient.new(self)
|
21
|
+
end
|
22
|
+
|
23
|
+
alias ml machine_learning
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -24,6 +24,17 @@ module Stretchy::Attributes::Type
|
|
24
24
|
class DateTime < Stretchy::Attributes::Type::Base
|
25
25
|
OPTIONS = [:doc_values, :format, :locale, :ignore_malformed, :index, :null_value, :on_script_error, :script, :store, :meta]
|
26
26
|
attr_reader *OPTIONS
|
27
|
+
include ActiveModel::Type::Helpers::Timezone
|
28
|
+
include ActiveModel::Type::Helpers::AcceptsMultiparameterTime.new(
|
29
|
+
defaults: { 4 => 0, 5 => 0 }
|
30
|
+
)
|
31
|
+
include ActiveModel::Type::Helpers::TimeValue
|
32
|
+
|
33
|
+
def initialize(**args)
|
34
|
+
@model_format = args.delete(:model_format)
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
27
38
|
def type
|
28
39
|
:datetime
|
29
40
|
end
|
@@ -31,5 +42,44 @@ module Stretchy::Attributes::Type
|
|
31
42
|
def type_for_database
|
32
43
|
:date
|
33
44
|
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def cast_value(value)
|
48
|
+
return apply_seconds_precision(value) unless value.is_a?(::String)
|
49
|
+
return if value.empty?
|
50
|
+
|
51
|
+
fast_string_to_time(value) || fallback_string_to_time(value) || custom_string_to_time(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# '0.123456' -> 123456
|
55
|
+
# '1.123456' -> 123456
|
56
|
+
def microseconds(time)
|
57
|
+
time[:sec_fraction] ? (time[:sec_fraction] * 1_000_000).to_i : 0
|
58
|
+
end
|
59
|
+
|
60
|
+
def custom_string_to_time(string)
|
61
|
+
::Date.strptime(string, @model_format)
|
62
|
+
end
|
63
|
+
|
64
|
+
def fallback_string_to_time(string)
|
65
|
+
time_hash = begin
|
66
|
+
::Date._parse(string)
|
67
|
+
rescue ArgumentError => e
|
68
|
+
end
|
69
|
+
return unless time_hash
|
70
|
+
|
71
|
+
time_hash[:sec_fraction] = microseconds(time_hash)
|
72
|
+
|
73
|
+
new_time(*time_hash.values_at(:year, :mon, :mday, :hour, :min, :sec, :sec_fraction, :offset))
|
74
|
+
end
|
75
|
+
|
76
|
+
def value_from_multiparameter_assignment(values_hash)
|
77
|
+
missing_parameters = [1, 2, 3].delete_if { |key| values_hash.key?(key) }
|
78
|
+
unless missing_parameters.empty?
|
79
|
+
raise ArgumentError, "Provided hash #{values_hash} doesn't contain necessary keys: #{missing_parameters}"
|
80
|
+
end
|
81
|
+
super
|
82
|
+
end
|
83
|
+
|
34
84
|
end
|
35
85
|
end
|
@@ -14,11 +14,21 @@ module Stretchy::Attributes::Type
|
|
14
14
|
# end
|
15
15
|
#
|
16
16
|
# Returns nothing.
|
17
|
-
class RankFeatures < Stretchy::Attributes::Type::
|
17
|
+
class RankFeatures < Stretchy::Attributes::Type::Hash
|
18
18
|
OPTIONS = [:positive_score_impact]
|
19
19
|
|
20
|
+
def mappings(name)
|
21
|
+
options = {type: type}
|
22
|
+
OPTIONS.each { |option| options[option] = send(option) unless send(option).nil? }
|
23
|
+
{ name => options }.as_json
|
24
|
+
end
|
25
|
+
|
20
26
|
def type
|
21
27
|
:rank_features
|
22
28
|
end
|
29
|
+
|
30
|
+
def type_for_database
|
31
|
+
:rank_features
|
32
|
+
end
|
23
33
|
end
|
24
34
|
end
|
data/lib/stretchy/attributes.rb
CHANGED
@@ -43,6 +43,7 @@ module Stretchy
|
|
43
43
|
ActiveModel::Type.register(:percolator, Stretchy::Attributes::Type::Percolator)
|
44
44
|
ActiveModel::Type.register(:point, Stretchy::Attributes::Type::Point)
|
45
45
|
ActiveModel::Type.register(:rank_feature, Stretchy::Attributes::Type::RankFeature)
|
46
|
+
ActiveModel::Type.register(:rank_features, Stretchy::Attributes::Type::RankFeatures)
|
46
47
|
|
47
48
|
ActiveModel::Type.register(:text, Stretchy::Attributes::Type::Text)
|
48
49
|
ActiveModel::Type.register(:token_count, Stretchy::Attributes::Type::TokenCount)
|
data/lib/stretchy/common.rb
CHANGED
@@ -30,14 +30,20 @@ module Stretchy
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
+
def index_settings(settings={})
|
34
|
+
@index_settings ||= settings
|
35
|
+
@index_settings.merge!(default_pipeline: default_pipeline.to_s) if default_pipeline
|
36
|
+
@index_settings.with_indifferent_access
|
37
|
+
end
|
38
|
+
|
33
39
|
def reload_gateway_configuration!
|
34
40
|
@gateway = nil
|
35
41
|
end
|
36
42
|
|
37
43
|
def gateway(&block)
|
38
44
|
reload_gateway_configuration! if @gateway && @gateway.client != Stretchy.configuration.client
|
39
|
-
|
40
|
-
@gateway ||= Stretchy::Repository.create(client: Stretchy.configuration.client, index_name: index_name, klass: base_class, mapping: base_class.attribute_mappings.merge(dynamic: true))
|
45
|
+
|
46
|
+
@gateway ||= Stretchy::Repository.create(client: Stretchy.configuration.client, index_name: index_name, klass: base_class, mapping: base_class.attribute_mappings.merge(dynamic: true), settings: index_settings)
|
41
47
|
# block.arity < 1 ? @gateway.instance_eval(&block) : block.call(@gateway) if block_given?
|
42
48
|
@gateway
|
43
49
|
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module Stretchy
|
2
|
+
module MachineLearning
|
3
|
+
class Model
|
4
|
+
PRETRAINED_MODELS = {
|
5
|
+
:neural_sparse => {
|
6
|
+
:encoding => 'amazon/neural-sparse/opensearch-neural-sparse-encoding-v1',
|
7
|
+
:encoding_doc => 'amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1',
|
8
|
+
:tokenizer => 'amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1'
|
9
|
+
},
|
10
|
+
:cross_encoder => {
|
11
|
+
:minilm_6 => 'huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2',
|
12
|
+
:minilm_12 => 'huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2'
|
13
|
+
},
|
14
|
+
:sentence_transformers => {
|
15
|
+
:roberta_all => 'huggingface/sentence-transformers/all-distilroberta-v1',
|
16
|
+
:msmarco => 'huggingface/sentence-transformers/msmarco-distilroberta-base-v2',
|
17
|
+
:minilm_6 => 'huggingface/sentence-transformers/all-MiniLM-L6-v2',
|
18
|
+
:minilm_12 => 'huggingface/sentence-transformers/all-MiniLM-L12-v2',
|
19
|
+
:mpnet => 'huggingface/sentence-transformers/all-mpnet-base-v',
|
20
|
+
:multi_qa_minilm_6 => 'huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1',
|
21
|
+
:multi_qa_mpnet => 'huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1',
|
22
|
+
:paraphrase_minilm_3 => 'huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2',
|
23
|
+
:paraphrase_multilingual_minilm_12 => 'huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
|
24
|
+
:paraphrase_mpnet => 'huggingface/sentence-transformers/paraphrase-mpnet-base-v2',
|
25
|
+
:multilingual_distiluse_cased => 'huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1'
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
cattr_reader :client do
|
30
|
+
Stretchy.configuration.client.ml
|
31
|
+
end
|
32
|
+
|
33
|
+
class << self
|
34
|
+
attr_accessor :model, :group_id
|
35
|
+
|
36
|
+
def all
|
37
|
+
client.get_model
|
38
|
+
end
|
39
|
+
|
40
|
+
def ml_on_all_nodes!
|
41
|
+
settings = {
|
42
|
+
"persistent": {
|
43
|
+
"plugins": {
|
44
|
+
"ml_commons": {
|
45
|
+
"only_run_on_ml_node": "false",
|
46
|
+
"model_access_control_enabled": "true",
|
47
|
+
"native_memory_threshold": "99"
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
Stretchy.configuration.client.cluster.put_settings body: settings
|
53
|
+
end
|
54
|
+
|
55
|
+
def ml_on_ml_nodes!
|
56
|
+
settings = {
|
57
|
+
"persistent": {
|
58
|
+
"plugins": {
|
59
|
+
"ml_commons": {
|
60
|
+
"only_run_on_ml_node": "true",
|
61
|
+
"model_access_control_enabled": "true",
|
62
|
+
"native_memory_threshold": "99"
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
Stretchy.configuration.client.cluster.put_settings body: settings
|
68
|
+
end
|
69
|
+
|
70
|
+
def model_lookup(model)
|
71
|
+
@flattened_models ||= PRETRAINED_MODELS.flat_map do |key, value|
|
72
|
+
value.map do |sub_key, sub_value|
|
73
|
+
["#{key}_#{sub_key}".to_sym, sub_value]
|
74
|
+
end
|
75
|
+
end.to_h
|
76
|
+
|
77
|
+
@flattened_models[model.to_sym] || model.to_s
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
attr_accessor :model,
|
82
|
+
:group_id,
|
83
|
+
:version,
|
84
|
+
:description,
|
85
|
+
:model_format,
|
86
|
+
:enabled,
|
87
|
+
:connector_id,
|
88
|
+
:connector,
|
89
|
+
:function_name,
|
90
|
+
:model_config,
|
91
|
+
:model_content_hash_value,
|
92
|
+
:url
|
93
|
+
|
94
|
+
attr_reader :task_id, :model_id, :deploy_id
|
95
|
+
|
96
|
+
def initialize(args = {})
|
97
|
+
model_name = args.delete(:model)
|
98
|
+
args.each do |k,v|
|
99
|
+
self.send("#{k}=", v)
|
100
|
+
end
|
101
|
+
@model = self.class.model_lookup model_name
|
102
|
+
end
|
103
|
+
|
104
|
+
def register
|
105
|
+
begin
|
106
|
+
response = client.register(body: self.to_hash, deploy: true)
|
107
|
+
|
108
|
+
@task_id = response['task_id']
|
109
|
+
|
110
|
+
yield self if block_given?
|
111
|
+
|
112
|
+
@model_id
|
113
|
+
rescue => e
|
114
|
+
Stretchy.logger.error "Error registering model: #{e.message}"
|
115
|
+
false
|
116
|
+
end
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
def registered?
|
121
|
+
response = status
|
122
|
+
@model_id = response['model_id'] if response['model_id']
|
123
|
+
response['state'] == 'COMPLETED' && @model_id.present?
|
124
|
+
end
|
125
|
+
|
126
|
+
def status
|
127
|
+
client.get_status(task_id: self.task_id)
|
128
|
+
end
|
129
|
+
|
130
|
+
def deploy
|
131
|
+
@deployed = nil
|
132
|
+
@deploy_id = client.deploy(id: self.model_id)['task_id']
|
133
|
+
yield self if block_given?
|
134
|
+
@deploy_id
|
135
|
+
end
|
136
|
+
|
137
|
+
def undeploy
|
138
|
+
@deployed = nil
|
139
|
+
response = client.undeploy(id: self.model_id)
|
140
|
+
yield self if block_given?
|
141
|
+
response
|
142
|
+
end
|
143
|
+
|
144
|
+
def deployed?
|
145
|
+
return @deployed if @deployed
|
146
|
+
response = client.get_model(id: self.model_id)
|
147
|
+
# raise "Model not deployed" if response['model_state'] == 'FAILED'
|
148
|
+
@deployed = response['model_state'] == 'DEPLOYED'
|
149
|
+
end
|
150
|
+
|
151
|
+
def delete
|
152
|
+
client.delete_model(id: self.model_id)
|
153
|
+
end
|
154
|
+
|
155
|
+
def client
|
156
|
+
@@client
|
157
|
+
end
|
158
|
+
|
159
|
+
def find
|
160
|
+
client.get_model(id: self.model_id)
|
161
|
+
end
|
162
|
+
|
163
|
+
def to_hash
|
164
|
+
{
|
165
|
+
name: self.model,
|
166
|
+
model_group_id: self.group_id,
|
167
|
+
version: self.version,
|
168
|
+
description: self.description,
|
169
|
+
model_format: self.model_format,
|
170
|
+
is_enabled: self.enabled?
|
171
|
+
}.compact
|
172
|
+
end
|
173
|
+
|
174
|
+
def enabled?
|
175
|
+
self.enabled
|
176
|
+
end
|
177
|
+
|
178
|
+
def wait_until_complete(max_attempts: 20, sleep_time: 4)
|
179
|
+
attempts = 0
|
180
|
+
loop do
|
181
|
+
result = yield
|
182
|
+
break if result
|
183
|
+
attempts += 1
|
184
|
+
break if attempts >= max_attempts
|
185
|
+
sleep(sleep_time)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'opensearch/api/namespace/machine_learning/model'
|
2
|
+
|
1
3
|
module Stretchy
|
2
4
|
module OpenSearchCompatibility
|
3
5
|
extend ActiveSupport::Concern
|
@@ -79,6 +81,8 @@ module Stretchy
|
|
79
81
|
|
80
82
|
::Elasticsearch::Persistence::Repository.send(:include, patch)
|
81
83
|
::Elasticsearch::Persistence::Repository.send(:include, store)
|
84
|
+
OpenSearch::API.send(:include, OpenSearch::API::MachineLearning::Models)
|
85
|
+
|
82
86
|
end
|
83
87
|
|
84
88
|
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Stretchy
|
2
|
+
class Pipeline
|
3
|
+
cattr_reader :client do
|
4
|
+
Stretchy.configuration.client.ingest
|
5
|
+
end
|
6
|
+
|
7
|
+
class << self
|
8
|
+
attr_accessor :description, :pipeline_name, :processors
|
9
|
+
|
10
|
+
def pipeline_name(name = nil)
|
11
|
+
return @pipeline_name if name.nil? && @pipeline_name
|
12
|
+
@pipeline_name = name || self.name.split('::').last.underscore
|
13
|
+
end
|
14
|
+
|
15
|
+
def description(desc = nil)
|
16
|
+
@description = desc if desc
|
17
|
+
@description
|
18
|
+
end
|
19
|
+
|
20
|
+
def processor(type, opts = {})
|
21
|
+
@processors ||= []
|
22
|
+
@processors << Stretchy::Pipelines::Processor.new(type, opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
def all
|
26
|
+
begin
|
27
|
+
client.get_pipeline
|
28
|
+
rescue not_found => e
|
29
|
+
return {}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def find(id)
|
34
|
+
client.get_pipeline(id: id)
|
35
|
+
end
|
36
|
+
|
37
|
+
def simulate(docs, verbose: true)
|
38
|
+
client.simulate(id: self.pipeline_name, body: {docs: docs}, verbose: verbose)
|
39
|
+
end
|
40
|
+
|
41
|
+
# PUT _ingest/pipeline/<pipeline-name>
|
42
|
+
def create!
|
43
|
+
client.put_pipeline(id: self.pipeline_name, body: self.to_hash)
|
44
|
+
end
|
45
|
+
|
46
|
+
# DELETE _ingest/pipeline/<pipeline-name>
|
47
|
+
def delete!
|
48
|
+
client.delete_pipeline(id: self.pipeline_name)
|
49
|
+
end
|
50
|
+
|
51
|
+
def exists?
|
52
|
+
begin
|
53
|
+
self.find(self.pipeline_name).present?
|
54
|
+
rescue not_found => e
|
55
|
+
return false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def to_hash
|
61
|
+
{
|
62
|
+
description: self.description,
|
63
|
+
processors: self.processors.map(&:to_hash)
|
64
|
+
}.as_json
|
65
|
+
end
|
66
|
+
|
67
|
+
protected
|
68
|
+
def not_found
|
69
|
+
@not_found ||= Object.const_get("#{client.class.name.split('::').first}::Transport::Transport::Errors::NotFound")
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
attr_accessor :description, :pipeline_name, :processors
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@description = self.class.description
|
78
|
+
@pipeline_name = self.class.pipeline_name
|
79
|
+
@processors = self.class.processors
|
80
|
+
end
|
81
|
+
|
82
|
+
# GET _ingest/pipeline/<pipeline-name>
|
83
|
+
def find
|
84
|
+
self.class.find(self.pipeline_name)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Simulates the pipeline.
|
88
|
+
#
|
89
|
+
# Request body fields
|
90
|
+
#
|
91
|
+
# The following table lists the request body fields used to run a pipeline.
|
92
|
+
#
|
93
|
+
# Field Required Type Description
|
94
|
+
# docs Required Array The documents to be used to test the pipeline.
|
95
|
+
# pipeline Optional Object The pipeline to be simulated. If the pipeline identifier is not included, then the response simulates the latest pipeline created.
|
96
|
+
# The docs field can include subfields listed in the following table.
|
97
|
+
#
|
98
|
+
# Field Required Type Description
|
99
|
+
# source Required Object The document’s JSON body.
|
100
|
+
# id Optional String A unique document identifier. The identifier cannot be used elsewhere in the index.
|
101
|
+
# index Optional String The index where the document’s transformed data appears.
|
102
|
+
def simulate(docs, verbose: true)
|
103
|
+
self.class.simulate(docs, verbose: verbose)
|
104
|
+
end
|
105
|
+
|
106
|
+
def exists?
|
107
|
+
self.class.exists?
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_hash
|
111
|
+
{
|
112
|
+
description: self.description,
|
113
|
+
processors: self.processors.map(&:to_hash)
|
114
|
+
}.as_json
|
115
|
+
end
|
116
|
+
|
117
|
+
def client
|
118
|
+
@client ||= Stretchy.configuration.client.ingest
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stretchy::Pipelines
|
2
|
+
# Creates a new processor for a pipeline
|
3
|
+
#
|
4
|
+
# Processor type Description
|
5
|
+
# append Adds one or more values to a field in a document.
|
6
|
+
# bytes Converts a human-readable byte value to its value in bytes.
|
7
|
+
# convert Changes the data type of a field in a document.
|
8
|
+
# copy Copies an entire object in an existing field to another field.
|
9
|
+
# csv Extracts CSVs and stores them as individual fields in a document.
|
10
|
+
# date Parses dates from fields and then uses the date or timestamp as the timestamp for a document.
|
11
|
+
# date_index_name Indexes documents into time-based indexes based on a date or timestamp field in a document.
|
12
|
+
# dissect Extracts structured fields from a text field using a defined pattern.
|
13
|
+
# dot_expander Expands a field with dots into an object field.
|
14
|
+
# drop Drops a document without indexing it or raising any errors.
|
15
|
+
# fail Raises an exception and stops the execution of a pipeline.
|
16
|
+
# foreach Allows for another processor to be applied to each element of an array or an object field in a document.
|
17
|
+
# geoip Adds information about the geographical location of an IP address.
|
18
|
+
# geojson-feature Indexes GeoJSON data into a geospatial field.
|
19
|
+
# grok Parses and structures unstructured data using pattern matching.
|
20
|
+
# gsub Replaces or deletes substrings within a string field of a document.
|
21
|
+
# html_strip Removes HTML tags from a text field and returns the plain text content.
|
22
|
+
# ip2geo Adds information about the geographical location of an IPv4 or IPv6 address.
|
23
|
+
# join Concatenates each element of an array into a single string using a separator character between each element.
|
24
|
+
# json Converts a JSON string into a structured JSON object.
|
25
|
+
# kv Automatically parses key-value pairs in a field.
|
26
|
+
# lowercase Converts text in a specific field to lowercase letters.
|
27
|
+
# pipeline Runs an inner pipeline.
|
28
|
+
# remove Removes fields from a document.
|
29
|
+
# script Runs an inline or stored script on incoming documents.
|
30
|
+
# set Sets the value of a field to a specified value.
|
31
|
+
# sort Sorts the elements of an array in ascending or descending order.
|
32
|
+
# sparse_encoding Generates a sparse vector/token and weights from text fields for neural sparse search using sparse retrieval.
|
33
|
+
# split Splits a field into an array using a separator character.
|
34
|
+
# text_embedding Generates vector embeddings from text fields for semantic search.
|
35
|
+
# text_image_embedding Generates combined vector embeddings from text and image fields for multimodal neural search.
|
36
|
+
# trim Removes leading and trailing white space from a string field.
|
37
|
+
# uppercase Converts text in a specific field to uppercase letters.
|
38
|
+
# urldecode Decodes a string from URL-encoded format.
|
39
|
+
# user_agent Extracts details from the user agent sent by a browser to its web requests.
|
40
|
+
#
|
41
|
+
class Processor
|
42
|
+
|
43
|
+
attr_reader :type, :opts, :description
|
44
|
+
|
45
|
+
def initialize(type, opts = {})
|
46
|
+
@type = type
|
47
|
+
@description = opts[:description]
|
48
|
+
@opts = opts
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_hash
|
52
|
+
{ type => @opts }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/stretchy/querying.rb
CHANGED
@@ -3,6 +3,7 @@ module Stretchy
|
|
3
3
|
delegate :first, :first!, :last, :last!, :exists?, :has_field, :any?, :many?, to: :all
|
4
4
|
delegate :order, :limit, :size, :sort, :rewhere, :eager_load, :includes, :create_with, :none, :unscope, to: :all
|
5
5
|
delegate :or_filter, :fields, :source, :highlight, to: :all
|
6
|
+
delegate :neural_sparse, :neural, :hybrid, to: :all
|
6
7
|
delegate *Stretchy::Relations::AggregationMethods::AGGREGATION_METHODS, to: :all
|
7
8
|
|
8
9
|
delegate :skip_callbacks, :routing, :search_options, to: :all
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Stretchy
|
2
|
+
module Rails
|
3
|
+
module Instrumentation
|
4
|
+
module Publishers
|
5
|
+
|
6
|
+
module Record
|
7
|
+
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
unless method_defined?(:search_without_instrumentation!)
|
12
|
+
alias_method :search_without_instrumentation!, :search
|
13
|
+
alias_method :search, :search_with_instrumentation!
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def search_with_instrumentation!(query_or_definition, options={})
|
18
|
+
ActiveSupport::Notifications.instrument "search.stretchy",
|
19
|
+
name: "Search",
|
20
|
+
klass: self.base_class.to_s,
|
21
|
+
search: {index: self.index_name, body: query_or_definition }.merge(options) do
|
22
|
+
search_without_instrumentation!(query_or_definition, options)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|