stretchy-model 0.6.0 → 0.6.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/containers/Dockerfile.opensearch +4 -3
- data/docker-compose.yml +32 -19
- data/lib/elasticsearch/api/actions/machine_learning/models/delete_model.rb +33 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/deploy.rb +31 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/get_model.rb +43 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/get_status.rb +31 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/params_registry.rb +45 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/register.rb +45 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/undeploy.rb +32 -0
- data/lib/elasticsearch/api/actions/machine_learning/models/update_model.rb +39 -0
- data/lib/elasticsearch/api/namespace/machine_learning/model.rb +27 -0
- data/lib/opensearch/api/actions/machine_learning/models/delete_model.rb +33 -0
- data/lib/opensearch/api/actions/machine_learning/models/deploy.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/get_model.rb +44 -0
- data/lib/opensearch/api/actions/machine_learning/models/get_status.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/params_registry.rb +45 -0
- data/lib/opensearch/api/actions/machine_learning/models/register.rb +45 -0
- data/lib/opensearch/api/actions/machine_learning/models/undeploy.rb +31 -0
- data/lib/opensearch/api/actions/machine_learning/models/update_model.rb +39 -0
- data/lib/opensearch/api/namespace/machine_learning/model.rb +27 -0
- data/lib/stretchy/attributes/type/date_time.rb +50 -0
- data/lib/stretchy/attributes/type/rank_features.rb +11 -1
- data/lib/stretchy/attributes.rb +1 -0
- data/lib/stretchy/common.rb +5 -0
- data/lib/stretchy/delegation/gateway_delegation.rb +8 -2
- data/lib/stretchy/machine_learning/model.rb +192 -0
- data/lib/stretchy/open_search_compatibility.rb +4 -0
- data/lib/stretchy/pipeline.rb +123 -0
- data/lib/stretchy/pipelines/processor.rb +55 -0
- data/lib/stretchy/querying.rb +1 -0
- data/lib/stretchy/rails/instrumentation/publishers.rb +31 -0
- data/lib/{rails → stretchy/rails}/instrumentation/railtie.rb +11 -6
- data/lib/stretchy/relation.rb +1 -0
- data/lib/stretchy/relations/query_builder.rb +73 -3
- data/lib/stretchy/relations/query_methods.rb +32 -0
- data/lib/stretchy/shared_scopes.rb +6 -1
- data/lib/stretchy/version.rb +1 -1
- data/lib/stretchy.rb +7 -2
- metadata +40 -5
- data/lib/rails/instrumentation/publishers.rb +0 -29
@@ -0,0 +1,31 @@
|
|
1
|
+
module OpenSearch
|
2
|
+
module API
|
3
|
+
module MachineLearning
|
4
|
+
module Models
|
5
|
+
module Actions
|
6
|
+
# Register a model.
|
7
|
+
#
|
8
|
+
# @option arguments [String] :id The model id
|
9
|
+
# @option arguments [Hash] :body The deploy options
|
10
|
+
#
|
11
|
+
#
|
12
|
+
# POST /_plugins/_ml/models/<model_id>/_undeploy
|
13
|
+
def undeploy(arguments = {})
|
14
|
+
raise ArgumentError, "Required argument 'id' missing" unless arguments[:id]
|
15
|
+
_id = arguments.delete(:id)
|
16
|
+
arguments = arguments.clone
|
17
|
+
headers = arguments.delete(:headers) || {}
|
18
|
+
|
19
|
+
method = OpenSearch::API::HTTP_POST
|
20
|
+
path = "_plugins/_ml/models/#{Utils.__listify(_id)}/_undeploy"
|
21
|
+
params = Utils.__validate_and_extract_params arguments, ParamsRegistry.get(__method__)
|
22
|
+
|
23
|
+
body = arguments[:body]
|
24
|
+
perform_request(method, path, params, body, headers).body
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module OpenSearch
|
2
|
+
module API
|
3
|
+
module MachineLearning
|
4
|
+
module Models
|
5
|
+
module Actions
|
6
|
+
# Returns a model.
|
7
|
+
#
|
8
|
+
# @option arguments [String] :id The model id
|
9
|
+
# @option arguments [Hash] :body The request fields
|
10
|
+
#
|
11
|
+
# Example
|
12
|
+
# update_model(id: 109sdj0asl092, "rate_limiter": {
|
13
|
+
# "limit": "4",
|
14
|
+
# "unit": "MINUTES"
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
#
|
18
|
+
# PUT /_plugins/_ml/models/<model_id>
|
19
|
+
def update_model(arguments = {})
|
20
|
+
raise ArgumentError, "Required argument 'body' missing" unless arguments[:body]
|
21
|
+
raise ArgumentError, "Required argument 'id' missing" unless arguments[:id]
|
22
|
+
|
23
|
+
_id = arguments.delete(:id)
|
24
|
+
|
25
|
+
headers = arguments.delete(:headers) || {}
|
26
|
+
|
27
|
+
method = OpenSearch::API::HTTP_PUT
|
28
|
+
path = "_plugins/_ml/models/#{Utils.__listify(_id)}"
|
29
|
+
params = Utils.__validate_and_extract_params arguments, ParamsRegistry.get(__method__)
|
30
|
+
|
31
|
+
body = arguments[:body]
|
32
|
+
perform_request(method, path, params, body, headers).body
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
Dir[File.expand_path('../../actions/**/*.rb', __dir__)].sort.each { |f| require f }
|
2
|
+
|
3
|
+
module OpenSearch
|
4
|
+
module API
|
5
|
+
module MachineLearning
|
6
|
+
module Models
|
7
|
+
module Actions; end
|
8
|
+
|
9
|
+
# Client for the "machine_learning/models" namespace (includes the {MachineLearning::Models::Actions} methods)
|
10
|
+
#
|
11
|
+
class MachineLearningClient
|
12
|
+
include MachineLearning::Models::Actions
|
13
|
+
include OpenSearch::API::Common::Client::Base
|
14
|
+
include OpenSearch::API::Common::Client
|
15
|
+
end
|
16
|
+
|
17
|
+
# Proxy method for {MachineLearningModel}, available in the receiving object
|
18
|
+
#
|
19
|
+
def machine_learning
|
20
|
+
@machine_learning ||= MachineLearningClient.new(self)
|
21
|
+
end
|
22
|
+
|
23
|
+
alias ml machine_learning
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -24,6 +24,17 @@ module Stretchy::Attributes::Type
|
|
24
24
|
class DateTime < Stretchy::Attributes::Type::Base
|
25
25
|
OPTIONS = [:doc_values, :format, :locale, :ignore_malformed, :index, :null_value, :on_script_error, :script, :store, :meta]
|
26
26
|
attr_reader *OPTIONS
|
27
|
+
include ActiveModel::Type::Helpers::Timezone
|
28
|
+
include ActiveModel::Type::Helpers::AcceptsMultiparameterTime.new(
|
29
|
+
defaults: { 4 => 0, 5 => 0 }
|
30
|
+
)
|
31
|
+
include ActiveModel::Type::Helpers::TimeValue
|
32
|
+
|
33
|
+
def initialize(**args)
|
34
|
+
@model_format = args.delete(:model_format)
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
27
38
|
def type
|
28
39
|
:datetime
|
29
40
|
end
|
@@ -31,5 +42,44 @@ module Stretchy::Attributes::Type
|
|
31
42
|
def type_for_database
|
32
43
|
:date
|
33
44
|
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def cast_value(value)
|
48
|
+
return apply_seconds_precision(value) unless value.is_a?(::String)
|
49
|
+
return if value.empty?
|
50
|
+
|
51
|
+
fast_string_to_time(value) || fallback_string_to_time(value) || custom_string_to_time(value)
|
52
|
+
end
|
53
|
+
|
54
|
+
# '0.123456' -> 123456
|
55
|
+
# '1.123456' -> 123456
|
56
|
+
def microseconds(time)
|
57
|
+
time[:sec_fraction] ? (time[:sec_fraction] * 1_000_000).to_i : 0
|
58
|
+
end
|
59
|
+
|
60
|
+
def custom_string_to_time(string)
|
61
|
+
::Date.strptime(string, @model_format)
|
62
|
+
end
|
63
|
+
|
64
|
+
def fallback_string_to_time(string)
|
65
|
+
time_hash = begin
|
66
|
+
::Date._parse(string)
|
67
|
+
rescue ArgumentError => e
|
68
|
+
end
|
69
|
+
return unless time_hash
|
70
|
+
|
71
|
+
time_hash[:sec_fraction] = microseconds(time_hash)
|
72
|
+
|
73
|
+
new_time(*time_hash.values_at(:year, :mon, :mday, :hour, :min, :sec, :sec_fraction, :offset))
|
74
|
+
end
|
75
|
+
|
76
|
+
def value_from_multiparameter_assignment(values_hash)
|
77
|
+
missing_parameters = [1, 2, 3].delete_if { |key| values_hash.key?(key) }
|
78
|
+
unless missing_parameters.empty?
|
79
|
+
raise ArgumentError, "Provided hash #{values_hash} doesn't contain necessary keys: #{missing_parameters}"
|
80
|
+
end
|
81
|
+
super
|
82
|
+
end
|
83
|
+
|
34
84
|
end
|
35
85
|
end
|
@@ -14,11 +14,21 @@ module Stretchy::Attributes::Type
|
|
14
14
|
# end
|
15
15
|
#
|
16
16
|
# Returns nothing.
|
17
|
-
class RankFeatures < Stretchy::Attributes::Type::
|
17
|
+
class RankFeatures < Stretchy::Attributes::Type::Hash
|
18
18
|
OPTIONS = [:positive_score_impact]
|
19
19
|
|
20
|
+
def mappings(name)
|
21
|
+
options = {type: type}
|
22
|
+
OPTIONS.each { |option| options[option] = send(option) unless send(option).nil? }
|
23
|
+
{ name => options }.as_json
|
24
|
+
end
|
25
|
+
|
20
26
|
def type
|
21
27
|
:rank_features
|
22
28
|
end
|
29
|
+
|
30
|
+
def type_for_database
|
31
|
+
:rank_features
|
32
|
+
end
|
23
33
|
end
|
24
34
|
end
|
data/lib/stretchy/attributes.rb
CHANGED
@@ -43,6 +43,7 @@ module Stretchy
|
|
43
43
|
ActiveModel::Type.register(:percolator, Stretchy::Attributes::Type::Percolator)
|
44
44
|
ActiveModel::Type.register(:point, Stretchy::Attributes::Type::Point)
|
45
45
|
ActiveModel::Type.register(:rank_feature, Stretchy::Attributes::Type::RankFeature)
|
46
|
+
ActiveModel::Type.register(:rank_features, Stretchy::Attributes::Type::RankFeatures)
|
46
47
|
|
47
48
|
ActiveModel::Type.register(:text, Stretchy::Attributes::Type::Text)
|
48
49
|
ActiveModel::Type.register(:token_count, Stretchy::Attributes::Type::TokenCount)
|
data/lib/stretchy/common.rb
CHANGED
@@ -30,14 +30,20 @@ module Stretchy
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
33
|
+
def index_settings(settings={})
|
34
|
+
@index_settings ||= settings
|
35
|
+
@index_settings.merge!(default_pipeline: default_pipeline.to_s) if default_pipeline
|
36
|
+
@index_settings.with_indifferent_access
|
37
|
+
end
|
38
|
+
|
33
39
|
def reload_gateway_configuration!
|
34
40
|
@gateway = nil
|
35
41
|
end
|
36
42
|
|
37
43
|
def gateway(&block)
|
38
44
|
reload_gateway_configuration! if @gateway && @gateway.client != Stretchy.configuration.client
|
39
|
-
|
40
|
-
@gateway ||= Stretchy::Repository.create(client: Stretchy.configuration.client, index_name: index_name, klass: base_class, mapping: base_class.attribute_mappings.merge(dynamic: true))
|
45
|
+
|
46
|
+
@gateway ||= Stretchy::Repository.create(client: Stretchy.configuration.client, index_name: index_name, klass: base_class, mapping: base_class.attribute_mappings.merge(dynamic: true), settings: index_settings)
|
41
47
|
# block.arity < 1 ? @gateway.instance_eval(&block) : block.call(@gateway) if block_given?
|
42
48
|
@gateway
|
43
49
|
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module Stretchy
|
2
|
+
module MachineLearning
|
3
|
+
class Model
|
4
|
+
PRETRAINED_MODELS = {
|
5
|
+
:neural_sparse => {
|
6
|
+
:encoding => 'amazon/neural-sparse/opensearch-neural-sparse-encoding-v1',
|
7
|
+
:encoding_doc => 'amazon/neural-sparse/opensearch-neural-sparse-encoding-doc-v1',
|
8
|
+
:tokenizer => 'amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1'
|
9
|
+
},
|
10
|
+
:cross_encoder => {
|
11
|
+
:minilm_6 => 'huggingface/cross-encoders/ms-marco-MiniLM-L-6-v2',
|
12
|
+
:minilm_12 => 'huggingface/cross-encoders/ms-marco-MiniLM-L-12-v2'
|
13
|
+
},
|
14
|
+
:sentence_transformers => {
|
15
|
+
:roberta_all => 'huggingface/sentence-transformers/all-distilroberta-v1',
|
16
|
+
:msmarco => 'huggingface/sentence-transformers/msmarco-distilroberta-base-v2',
|
17
|
+
:minilm_6 => 'huggingface/sentence-transformers/all-MiniLM-L6-v2',
|
18
|
+
:minilm_12 => 'huggingface/sentence-transformers/all-MiniLM-L12-v2',
|
19
|
+
:mpnet => 'huggingface/sentence-transformers/all-mpnet-base-v',
|
20
|
+
:multi_qa_minilm_6 => 'huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1',
|
21
|
+
:multi_qa_mpnet => 'huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1',
|
22
|
+
:paraphrase_minilm_3 => 'huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2',
|
23
|
+
:paraphrase_multilingual_minilm_12 => 'huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
|
24
|
+
:paraphrase_mpnet => 'huggingface/sentence-transformers/paraphrase-mpnet-base-v2',
|
25
|
+
:multilingual_distiluse_cased => 'huggingface/sentence-transformers/distiluse-base-multilingual-cased-v1'
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
cattr_reader :client do
|
30
|
+
Stretchy.configuration.client.ml
|
31
|
+
end
|
32
|
+
|
33
|
+
class << self
|
34
|
+
attr_accessor :model, :group_id
|
35
|
+
|
36
|
+
def all
|
37
|
+
client.get_model
|
38
|
+
end
|
39
|
+
|
40
|
+
def ml_on_all_nodes!
|
41
|
+
settings = {
|
42
|
+
"persistent": {
|
43
|
+
"plugins": {
|
44
|
+
"ml_commons": {
|
45
|
+
"only_run_on_ml_node": "false",
|
46
|
+
"model_access_control_enabled": "true",
|
47
|
+
"native_memory_threshold": "99"
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
Stretchy.configuration.client.cluster.put_settings body: settings
|
53
|
+
end
|
54
|
+
|
55
|
+
def ml_on_ml_nodes!
|
56
|
+
settings = {
|
57
|
+
"persistent": {
|
58
|
+
"plugins": {
|
59
|
+
"ml_commons": {
|
60
|
+
"only_run_on_ml_node": "true",
|
61
|
+
"model_access_control_enabled": "true",
|
62
|
+
"native_memory_threshold": "99"
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
Stretchy.configuration.client.cluster.put_settings body: settings
|
68
|
+
end
|
69
|
+
|
70
|
+
def model_lookup(model)
|
71
|
+
@flattened_models ||= PRETRAINED_MODELS.flat_map do |key, value|
|
72
|
+
value.map do |sub_key, sub_value|
|
73
|
+
["#{key}_#{sub_key}".to_sym, sub_value]
|
74
|
+
end
|
75
|
+
end.to_h
|
76
|
+
|
77
|
+
@flattened_models[model.to_sym] || model.to_s
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
attr_accessor :model,
|
82
|
+
:group_id,
|
83
|
+
:version,
|
84
|
+
:description,
|
85
|
+
:model_format,
|
86
|
+
:enabled,
|
87
|
+
:connector_id,
|
88
|
+
:connector,
|
89
|
+
:function_name,
|
90
|
+
:model_config,
|
91
|
+
:model_content_hash_value,
|
92
|
+
:url
|
93
|
+
|
94
|
+
attr_reader :task_id, :model_id, :deploy_id
|
95
|
+
|
96
|
+
def initialize(args = {})
|
97
|
+
model_name = args.delete(:model)
|
98
|
+
args.each do |k,v|
|
99
|
+
self.send("#{k}=", v)
|
100
|
+
end
|
101
|
+
@model = self.class.model_lookup model_name
|
102
|
+
end
|
103
|
+
|
104
|
+
def register
|
105
|
+
begin
|
106
|
+
response = client.register(body: self.to_hash, deploy: true)
|
107
|
+
|
108
|
+
@task_id = response['task_id']
|
109
|
+
|
110
|
+
yield self if block_given?
|
111
|
+
|
112
|
+
@model_id
|
113
|
+
rescue => e
|
114
|
+
Stretchy.logger.error "Error registering model: #{e.message}"
|
115
|
+
false
|
116
|
+
end
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
def registered?
|
121
|
+
response = status
|
122
|
+
@model_id = response['model_id'] if response['model_id']
|
123
|
+
response['state'] == 'COMPLETED' && @model_id.present?
|
124
|
+
end
|
125
|
+
|
126
|
+
def status
|
127
|
+
client.get_status(task_id: self.task_id)
|
128
|
+
end
|
129
|
+
|
130
|
+
def deploy
|
131
|
+
@deployed = nil
|
132
|
+
@deploy_id = client.deploy(id: self.model_id)['task_id']
|
133
|
+
yield self if block_given?
|
134
|
+
@deploy_id
|
135
|
+
end
|
136
|
+
|
137
|
+
def undeploy
|
138
|
+
@deployed = nil
|
139
|
+
response = client.undeploy(id: self.model_id)
|
140
|
+
yield self if block_given?
|
141
|
+
response
|
142
|
+
end
|
143
|
+
|
144
|
+
def deployed?
|
145
|
+
return @deployed if @deployed
|
146
|
+
response = client.get_model(id: self.model_id)
|
147
|
+
# raise "Model not deployed" if response['model_state'] == 'FAILED'
|
148
|
+
@deployed = response['model_state'] == 'DEPLOYED'
|
149
|
+
end
|
150
|
+
|
151
|
+
def delete
|
152
|
+
client.delete_model(id: self.model_id)
|
153
|
+
end
|
154
|
+
|
155
|
+
def client
|
156
|
+
@@client
|
157
|
+
end
|
158
|
+
|
159
|
+
def find
|
160
|
+
client.get_model(id: self.model_id)
|
161
|
+
end
|
162
|
+
|
163
|
+
def to_hash
|
164
|
+
{
|
165
|
+
name: self.model,
|
166
|
+
model_group_id: self.group_id,
|
167
|
+
version: self.version,
|
168
|
+
description: self.description,
|
169
|
+
model_format: self.model_format,
|
170
|
+
is_enabled: self.enabled?
|
171
|
+
}.compact
|
172
|
+
end
|
173
|
+
|
174
|
+
def enabled?
|
175
|
+
self.enabled
|
176
|
+
end
|
177
|
+
|
178
|
+
def wait_until_complete(max_attempts: 20, sleep_time: 4)
|
179
|
+
attempts = 0
|
180
|
+
loop do
|
181
|
+
result = yield
|
182
|
+
break if result
|
183
|
+
attempts += 1
|
184
|
+
break if attempts >= max_attempts
|
185
|
+
sleep(sleep_time)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'opensearch/api/namespace/machine_learning/model'
|
2
|
+
|
1
3
|
module Stretchy
|
2
4
|
module OpenSearchCompatibility
|
3
5
|
extend ActiveSupport::Concern
|
@@ -79,6 +81,8 @@ module Stretchy
|
|
79
81
|
|
80
82
|
::Elasticsearch::Persistence::Repository.send(:include, patch)
|
81
83
|
::Elasticsearch::Persistence::Repository.send(:include, store)
|
84
|
+
OpenSearch::API.send(:include, OpenSearch::API::MachineLearning::Models)
|
85
|
+
|
82
86
|
end
|
83
87
|
|
84
88
|
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Stretchy
|
2
|
+
class Pipeline
|
3
|
+
cattr_reader :client do
|
4
|
+
Stretchy.configuration.client.ingest
|
5
|
+
end
|
6
|
+
|
7
|
+
class << self
|
8
|
+
attr_accessor :description, :pipeline_name, :processors
|
9
|
+
|
10
|
+
def pipeline_name(name = nil)
|
11
|
+
return @pipeline_name if name.nil? && @pipeline_name
|
12
|
+
@pipeline_name = name || self.name.split('::').last.underscore
|
13
|
+
end
|
14
|
+
|
15
|
+
def description(desc = nil)
|
16
|
+
@description = desc if desc
|
17
|
+
@description
|
18
|
+
end
|
19
|
+
|
20
|
+
def processor(type, opts = {})
|
21
|
+
@processors ||= []
|
22
|
+
@processors << Stretchy::Pipelines::Processor.new(type, opts)
|
23
|
+
end
|
24
|
+
|
25
|
+
def all
|
26
|
+
begin
|
27
|
+
client.get_pipeline
|
28
|
+
rescue not_found => e
|
29
|
+
return {}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def find(id)
|
34
|
+
client.get_pipeline(id: id)
|
35
|
+
end
|
36
|
+
|
37
|
+
def simulate(docs, verbose: true)
|
38
|
+
client.simulate(id: self.pipeline_name, body: {docs: docs}, verbose: verbose)
|
39
|
+
end
|
40
|
+
|
41
|
+
# PUT _ingest/pipeline/<pipeline-name>
|
42
|
+
def create!
|
43
|
+
client.put_pipeline(id: self.pipeline_name, body: self.to_hash)
|
44
|
+
end
|
45
|
+
|
46
|
+
# DELETE _ingest/pipeline/<pipeline-name>
|
47
|
+
def delete!
|
48
|
+
client.delete_pipeline(id: self.pipeline_name)
|
49
|
+
end
|
50
|
+
|
51
|
+
def exists?
|
52
|
+
begin
|
53
|
+
self.find(self.pipeline_name).present?
|
54
|
+
rescue not_found => e
|
55
|
+
return false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def to_hash
|
61
|
+
{
|
62
|
+
description: self.description,
|
63
|
+
processors: self.processors.map(&:to_hash)
|
64
|
+
}.as_json
|
65
|
+
end
|
66
|
+
|
67
|
+
protected
|
68
|
+
def not_found
|
69
|
+
@not_found ||= Object.const_get("#{client.class.name.split('::').first}::Transport::Transport::Errors::NotFound")
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
attr_accessor :description, :pipeline_name, :processors
|
75
|
+
|
76
|
+
def initialize
|
77
|
+
@description = self.class.description
|
78
|
+
@pipeline_name = self.class.pipeline_name
|
79
|
+
@processors = self.class.processors
|
80
|
+
end
|
81
|
+
|
82
|
+
# GET _ingest/pipeline/<pipeline-name>
|
83
|
+
def find
|
84
|
+
self.class.find(self.pipeline_name)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Simulates the pipeline.
|
88
|
+
#
|
89
|
+
# Request body fields
|
90
|
+
#
|
91
|
+
# The following table lists the request body fields used to run a pipeline.
|
92
|
+
#
|
93
|
+
# Field Required Type Description
|
94
|
+
# docs Required Array The documents to be used to test the pipeline.
|
95
|
+
# pipeline Optional Object The pipeline to be simulated. If the pipeline identifier is not included, then the response simulates the latest pipeline created.
|
96
|
+
# The docs field can include subfields listed in the following table.
|
97
|
+
#
|
98
|
+
# Field Required Type Description
|
99
|
+
# source Required Object The document’s JSON body.
|
100
|
+
# id Optional String A unique document identifier. The identifier cannot be used elsewhere in the index.
|
101
|
+
# index Optional String The index where the document’s transformed data appears.
|
102
|
+
def simulate(docs, verbose: true)
|
103
|
+
self.class.simulate(docs, verbose: verbose)
|
104
|
+
end
|
105
|
+
|
106
|
+
def exists?
|
107
|
+
self.class.exists?
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_hash
|
111
|
+
{
|
112
|
+
description: self.description,
|
113
|
+
processors: self.processors.map(&:to_hash)
|
114
|
+
}.as_json
|
115
|
+
end
|
116
|
+
|
117
|
+
def client
|
118
|
+
@client ||= Stretchy.configuration.client.ingest
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Stretchy::Pipelines
|
2
|
+
# Creates a new processor for a pipeline
|
3
|
+
#
|
4
|
+
# Processor type Description
|
5
|
+
# append Adds one or more values to a field in a document.
|
6
|
+
# bytes Converts a human-readable byte value to its value in bytes.
|
7
|
+
# convert Changes the data type of a field in a document.
|
8
|
+
# copy Copies an entire object in an existing field to another field.
|
9
|
+
# csv Extracts CSVs and stores them as individual fields in a document.
|
10
|
+
# date Parses dates from fields and then uses the date or timestamp as the timestamp for a document.
|
11
|
+
# date_index_name Indexes documents into time-based indexes based on a date or timestamp field in a document.
|
12
|
+
# dissect Extracts structured fields from a text field using a defined pattern.
|
13
|
+
# dot_expander Expands a field with dots into an object field.
|
14
|
+
# drop Drops a document without indexing it or raising any errors.
|
15
|
+
# fail Raises an exception and stops the execution of a pipeline.
|
16
|
+
# foreach Allows for another processor to be applied to each element of an array or an object field in a document.
|
17
|
+
# geoip Adds information about the geographical location of an IP address.
|
18
|
+
# geojson-feature Indexes GeoJSON data into a geospatial field.
|
19
|
+
# grok Parses and structures unstructured data using pattern matching.
|
20
|
+
# gsub Replaces or deletes substrings within a string field of a document.
|
21
|
+
# html_strip Removes HTML tags from a text field and returns the plain text content.
|
22
|
+
# ip2geo Adds information about the geographical location of an IPv4 or IPv6 address.
|
23
|
+
# join Concatenates each element of an array into a single string using a separator character between each element.
|
24
|
+
# json Converts a JSON string into a structured JSON object.
|
25
|
+
# kv Automatically parses key-value pairs in a field.
|
26
|
+
# lowercase Converts text in a specific field to lowercase letters.
|
27
|
+
# pipeline Runs an inner pipeline.
|
28
|
+
# remove Removes fields from a document.
|
29
|
+
# script Runs an inline or stored script on incoming documents.
|
30
|
+
# set Sets the value of a field to a specified value.
|
31
|
+
# sort Sorts the elements of an array in ascending or descending order.
|
32
|
+
# sparse_encoding Generates a sparse vector/token and weights from text fields for neural sparse search using sparse retrieval.
|
33
|
+
# split Splits a field into an array using a separator character.
|
34
|
+
# text_embedding Generates vector embeddings from text fields for semantic search.
|
35
|
+
# text_image_embedding Generates combined vector embeddings from text and image fields for multimodal neural search.
|
36
|
+
# trim Removes leading and trailing white space from a string field.
|
37
|
+
# uppercase Converts text in a specific field to uppercase letters.
|
38
|
+
# urldecode Decodes a string from URL-encoded format.
|
39
|
+
# user_agent Extracts details from the user agent sent by a browser to its web requests.
|
40
|
+
#
|
41
|
+
class Processor
|
42
|
+
|
43
|
+
attr_reader :type, :opts, :description
|
44
|
+
|
45
|
+
def initialize(type, opts = {})
|
46
|
+
@type = type
|
47
|
+
@description = opts[:description]
|
48
|
+
@opts = opts
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_hash
|
52
|
+
{ type => @opts }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/stretchy/querying.rb
CHANGED
@@ -3,6 +3,7 @@ module Stretchy
|
|
3
3
|
delegate :first, :first!, :last, :last!, :exists?, :has_field, :any?, :many?, to: :all
|
4
4
|
delegate :order, :limit, :size, :sort, :rewhere, :eager_load, :includes, :create_with, :none, :unscope, to: :all
|
5
5
|
delegate :or_filter, :fields, :source, :highlight, to: :all
|
6
|
+
delegate :neural_sparse, :neural, :hybrid, to: :all
|
6
7
|
delegate *Stretchy::Relations::AggregationMethods::AGGREGATION_METHODS, to: :all
|
7
8
|
|
8
9
|
delegate :skip_callbacks, :routing, :search_options, to: :all
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Stretchy
|
2
|
+
module Rails
|
3
|
+
module Instrumentation
|
4
|
+
module Publishers
|
5
|
+
|
6
|
+
module Record
|
7
|
+
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
unless method_defined?(:search_without_instrumentation!)
|
12
|
+
alias_method :search_without_instrumentation!, :search
|
13
|
+
alias_method :search, :search_with_instrumentation!
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def search_with_instrumentation!(query_or_definition, options={})
|
18
|
+
ActiveSupport::Notifications.instrument "search.stretchy",
|
19
|
+
name: "Search",
|
20
|
+
klass: self.base_class.to_s,
|
21
|
+
search: {index: self.index_name, body: query_or_definition }.merge(options) do
|
22
|
+
search_without_instrumentation!(query_or_definition, options)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|