connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -1,42 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
- # frozen_string_literal: true
7
-
8
- require 'active_support/core_ext/hash/indifferent_access'
9
- require 'active_support/core_ext/object/blank'
10
- require 'core/filtering/simple_rule'
11
-
12
- module Connectors
13
- module Base
14
- class SimpleRulesParser
15
- def initialize(rules)
16
- @rules = (rules || []).map(&:with_indifferent_access).filter { |r| r[:id] != 'DEFAULT' }.sort_by { |r| r[:order] }
17
- end
18
-
19
- def parse
20
- merge_rules(@rules.map do |rule_hash|
21
- rule = Core::Filtering::SimpleRule.new(rule_hash)
22
- unless rule.is_include? || rule.is_exclude?
23
- raise "Unknown policy: #{rule.policy}"
24
- end
25
- parse_rule(rule)
26
- end)
27
- end
28
-
29
- private
30
-
31
- # merge all rules into a filter object or array
32
- # in a base case, does no transformations
33
- def merge_rules(rules)
34
- rules || []
35
- end
36
-
37
- def parse_rule(_rule)
38
- raise 'Not implemented'
39
- end
40
- end
41
- end
42
- end
@@ -1,35 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_validator'
10
-
11
- module Connectors
12
- module Example
13
- class ExampleAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
-
15
- def is_snippet_valid?
16
- # TODO: real filtering validation will follow later
17
- errors = [
18
- {
19
- :ids => ['missing-implementation'],
20
- :messages => ['Filtering is not implemented yet for the example connector']
21
- }
22
- ]
23
-
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
31
- end
32
-
33
- end
34
- end
35
- end
@@ -1,35 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_validator'
10
-
11
- module Connectors
12
- module GitLab
13
- class GitLabAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
-
15
- def is_snippet_valid?
16
- # TODO: real filtering validation will follow later
17
- errors = [
18
- {
19
- :ids => ['missing-implementation'],
20
- :messages => ['Filtering is not implemented yet for the GitLab connector']
21
- }
22
- ]
23
-
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
31
- end
32
-
33
- end
34
- end
35
- end
@@ -1,22 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_against_schema_validator'
10
- require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
-
12
- module Connectors
13
- module MongoDB
14
- class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
15
-
16
- def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
- super
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,292 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Connectors
10
- module MongoDB
11
- module AdvancedSnippet
12
- # Pipeline stages: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
13
- ALLOWED_PIPELINE_STAGES = %w[
14
- $addFields $bucket $bucketAuto $changeStream $collStats $count $densify
15
- $documents $facet $fill $geoNear $graphLookup $group $indexStats $limit
16
- $listSessions $lookup $match $merge $out $planCacheStats $project $redact
17
- $replaceRoot $replaceWith $sample $search $searchMeta $set $setWindowFields
18
- $skip $sort $sortByCount $unionWith $unset $unwind
19
- ]
20
-
21
- # All except the $out, $merge, $geoNear, and $changeStream stages can appear multiple times in a pipeline.
22
- # Source: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
23
- PIPELINE_STAGES_ALLOWED_ONCE = %w[$out $merge $geoNear $changeStream]
24
-
25
- NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
- READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
- STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
- MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
29
-
30
- AGGREGATION_PIPELINE = lambda { |pipeline|
31
- return false unless pipeline.is_a?(Array)
32
-
33
- allowed_once_appearances = Set.new
34
-
35
- pipeline.flat_map(&:keys).each do |key|
36
- return false unless ALLOWED_PIPELINE_STAGES.include?(key)
37
-
38
- if PIPELINE_STAGES_ALLOWED_ONCE.include?(key)
39
- return false if allowed_once_appearances.include?(key)
40
-
41
- allowed_once_appearances.add(key)
42
- end
43
- end
44
-
45
- true
46
- }
47
-
48
- # Ruby has no 'Boolean' class
49
- BOOLEAN = ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) }
50
-
51
- COLLATION = {
52
- :name => 'collation',
53
- :type => Hash,
54
- :optional => true,
55
- :fields => [
56
- {
57
- :name => 'locale',
58
- :type => String,
59
- :optional => true
60
- },
61
- {
62
- :name => 'caseLevel',
63
- :type => BOOLEAN,
64
- :optional => true
65
- },
66
- {
67
- :name => 'caseFirst',
68
- :type => String,
69
- :optional => true
70
- },
71
- {
72
- :name => 'strength',
73
- :type => Integer,
74
- :optional => true
75
- },
76
- {
77
- :name => 'numericOrdering',
78
- :type => BOOLEAN,
79
- :optional => true
80
- },
81
- {
82
- :name => 'alternate',
83
- :type => String,
84
- :optional => true
85
- },
86
- {
87
- :name => 'maxVariable',
88
- :type => String,
89
- :optional => true
90
- },
91
- {
92
- :name => 'backwards',
93
- :type => BOOLEAN,
94
- :optional => true
95
- },
96
- ]
97
- }
98
-
99
- CURSOR_TYPE = ->(cursor) { [:tailable, :tailable_await].include?(cursor) }
100
-
101
- # Aggregate options: https://www.mongodb.com/docs/manual/reference/method/db.collection.aggregate/
102
- AGGREGATE_OPTIONS = {
103
- :name => 'options',
104
- :type => Hash,
105
- :optional => true,
106
- :fields => [
107
- {
108
- :name => 'explain',
109
- :type => BOOLEAN,
110
- :optional => true
111
- },
112
- {
113
- :name => 'allowDiskUse',
114
- :type => BOOLEAN,
115
- :optional => true
116
- },
117
- {
118
- :name => 'cursor',
119
- :type => Hash,
120
- :optional => true,
121
- :fields => [
122
- {
123
- :name => 'batchSize',
124
- :type => NON_NEGATIVE_INTEGER
125
- }
126
- ]
127
- },
128
- {
129
- :name => 'maxTimeMS',
130
- :type => NON_NEGATIVE_INTEGER,
131
- :optional => true
132
- },
133
- {
134
- :name => 'bypassDocumentValidation',
135
- :type => BOOLEAN,
136
- :optional => true
137
- },
138
- {
139
- :name => 'readConcern',
140
- :type => Hash,
141
- :optional => true,
142
- :fields => [
143
- {
144
- :name => 'level',
145
- :type => READ_CONCERN_LEVEL
146
- }
147
- ]
148
- },
149
- COLLATION,
150
- {
151
- :name => 'hint',
152
- :type => STRING_OR_DOCUMENT,
153
- :optional => true
154
- },
155
- {
156
- :name => 'comment',
157
- :type => String,
158
- :optional => true
159
- },
160
- {
161
- :name => 'writeConcern',
162
- :type => Hash,
163
- :optional => true
164
- },
165
- {
166
- :name => 'let',
167
- :type => Hash,
168
- :optional => true
169
- }
170
- ]
171
- }
172
-
173
- AGGREGATE_PIPELINE = {
174
- :name => 'pipeline',
175
- :type => AGGREGATION_PIPELINE,
176
- :optional => true,
177
- }
178
-
179
- AGGREGATE = {
180
- :name => 'aggregate',
181
- :type => Hash,
182
- :optional => true,
183
- :fields => [
184
- AGGREGATE_PIPELINE,
185
- AGGREGATE_OPTIONS
186
- ]
187
- }
188
-
189
- FIND_OPTIONS = {
190
- :name => 'options',
191
- :type => Hash,
192
- :optional => true,
193
- :fields => [
194
- {
195
- :name => 'allowDiskUse',
196
- :type => BOOLEAN,
197
- :optional => true
198
- },
199
- {
200
- :name => 'allowPartialResults',
201
- :type => BOOLEAN,
202
- :optional => true
203
- },
204
- {
205
- :name => 'batchSize',
206
- :type => NON_NEGATIVE_INTEGER,
207
- :optional => true
208
- },
209
- COLLATION,
210
- {
211
- :name => 'cursorType',
212
- :type => CURSOR_TYPE,
213
- :optional => true
214
- },
215
- {
216
- :name => 'limit',
217
- :type => NON_NEGATIVE_INTEGER,
218
- :optional => true
219
- },
220
- {
221
- :name => 'maxTimeMS',
222
- :type => NON_NEGATIVE_INTEGER,
223
- :optional => true
224
- },
225
- {
226
- :name => 'modifiers',
227
- :type => Hash,
228
- :optional => true
229
- },
230
- {
231
- :name => 'noCursorTimeout',
232
- :type => BOOLEAN,
233
- :optional => true
234
- },
235
- {
236
- :name => 'oplogReplay',
237
- :type => BOOLEAN,
238
- :optional => true
239
- },
240
- {
241
- :name => 'projection',
242
- :type => Hash,
243
- :optional => true
244
- },
245
- {
246
- :name => 'skip',
247
- :type => NON_NEGATIVE_INTEGER,
248
- :optional => true
249
- },
250
- {
251
- :name => 'sort',
252
- :type => Hash,
253
- :optional => true
254
- },
255
- {
256
- :name => 'let',
257
- :type => Hash,
258
- :optional => true
259
- }
260
- ]
261
- }
262
-
263
- # TODO: return true for now. Will be more involved (basically needs full query parsing or "dummy" execution against a running instance)
264
- FILTER = ->(_filter) { true }
265
-
266
- FIND_FILTER = {
267
- :name => 'filter',
268
- :type => FILTER
269
- }
270
-
271
- FIND = {
272
- :name => 'find',
273
- :type => Hash,
274
- :optional => true,
275
- :fields => [
276
- FIND_OPTIONS,
277
- FIND_FILTER
278
- ]
279
- }
280
-
281
- SCHEMA = {
282
- :fields => {
283
- :constraints => MUTUAL_EXCLUSIVE_FILTER,
284
- :values => [
285
- AGGREGATE,
286
- FIND
287
- ]
288
- }
289
- }
290
- end
291
- end
292
- end
@@ -1,81 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/object'
10
- require 'connectors/base/simple_rules_parser'
11
- require 'core/filtering/simple_rule'
12
-
13
- module Connectors
14
- module MongoDB
15
- class MongoRulesParser < Connectors::Base::SimpleRulesParser
16
- def parse_rule(rule)
17
- field = rule.field
18
- value = rule.value
19
- unless value.present?
20
- raise "value is required for field: #{field}"
21
- end
22
- unless field.present?
23
- raise "field is required for rule: #{rule}"
24
- end
25
- op = rule.rule
26
- case op
27
- when Core::Filtering::SimpleRule::Rule::EQUALS
28
- parse_equals(rule)
29
- when Core::Filtering::SimpleRule::Rule::GREATER_THAN
30
- parse_greater_than(rule)
31
- when Core::Filtering::SimpleRule::Rule::LESS_THAN
32
- parse_less_than(rule)
33
- when Core::Filtering::SimpleRule::Rule::REGEX
34
- parse_regex(rule)
35
- else
36
- raise "Unknown operator: #{op}"
37
- end
38
- end
39
-
40
- def merge_rules(rules)
41
- return {} if rules.empty?
42
- return rules[0] if rules.size == 1
43
- { '$and' => rules }
44
- end
45
-
46
- private
47
-
48
- def parse_equals(rule)
49
- if rule.is_include?
50
- { rule.field => rule.value }
51
- else
52
- { rule.field => { '$ne' => rule.value } }
53
- end
54
- end
55
-
56
- def parse_greater_than(rule)
57
- if rule.is_include?
58
- { rule.field => { '$gt' => rule.value } }
59
- else
60
- { rule.field => { '$lte' => rule.value } }
61
- end
62
- end
63
-
64
- def parse_less_than(rule)
65
- if rule.is_include?
66
- { rule.field => { '$lt' => rule.value } }
67
- else
68
- { rule.field => { '$gte' => rule.value } }
69
- end
70
- end
71
-
72
- def parse_regex(rule)
73
- if rule.is_include?
74
- { rule.field => /#{rule.value}/ }
75
- else
76
- { rule.field => { '$not' => /#{rule.value}/ } }
77
- end
78
- end
79
- end
80
- end
81
- end
@@ -1,43 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- require 'utility/logger'
8
- require 'utility/exception_tracking'
9
- require 'utility/error_monitor'
10
-
11
- module Connectors
12
- class TolerableErrorHelper
13
- def initialize(error_monitor)
14
- @error_monitor = error_monitor
15
- end
16
-
17
- def yield_single_document(identifier: nil)
18
- Utility::Logger.debug("Extracting single document for #{identifier}") if identifier
19
- yield
20
- @error_monitor.note_success
21
- rescue *fatal_exception_classes => e
22
- Utility::ExceptionTracking.augment_exception(e)
23
- Utility::Logger.error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
24
- raise
25
- rescue StandardError => e
26
- Utility::ExceptionTracking.augment_exception(e)
27
- Utility::Logger.warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
28
- @error_monitor.note_error(e, :id => e.id)
29
- end
30
-
31
- private
32
-
33
- def identifying_error_message(identifier)
34
- identifier.present? ? " of '#{identifier}'" : ''
35
- end
36
-
37
- def fatal_exception_classes
38
- [
39
- Utility::ErrorMonitor::MonitoringError
40
- ]
41
- end
42
- end
43
- end