connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -10
- data/lib/app/config.rb +2 -0
- data/lib/app/dispatcher.rb +17 -1
- data/lib/app/preflight_check.rb +15 -0
- data/lib/connectors/base/connector.rb +37 -4
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/connector_status.rb +4 -4
- data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
- data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
- data/lib/connectors/example/connector.rb +43 -4
- data/lib/connectors/gitlab/connector.rb +16 -2
- data/lib/connectors/mongodb/connector.rb +173 -50
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/registry.rb +2 -2
- data/lib/connectors/sync_status.rb +23 -4
- data/lib/core/configuration.rb +4 -2
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +29 -18
- data/lib/core/elastic_connector_actions.rb +331 -32
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/native_scheduler.rb +3 -0
- data/lib/core/scheduler.rb +43 -10
- data/lib/core/single_scheduler.rb +3 -0
- data/lib/core/sync_job_runner.rb +78 -18
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/common.rb +20 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/errors.rb +5 -0
- data/lib/utility/es_client.rb +6 -2
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -3
- metadata +27 -18
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
data/lib/utility/errors.rb
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'active_support/core_ext/string'
|
8
|
+
require 'elasticsearch'
|
8
9
|
|
9
10
|
module Utility
|
10
11
|
class DocumentError
|
@@ -31,6 +32,8 @@ module Utility
|
|
31
32
|
end
|
32
33
|
|
33
34
|
class ClientError < StandardError; end
|
35
|
+
|
36
|
+
class InvalidFilterConfigError < StandardError; end
|
34
37
|
class EvictionWithNoProgressError < StandardError; end
|
35
38
|
class EvictionError < StandardError
|
36
39
|
attr_accessor :cursors
|
@@ -89,6 +92,7 @@ module Utility
|
|
89
92
|
class InvalidTokenError < StandardError; end
|
90
93
|
class TokenRefreshFailedError < StandardError; end
|
91
94
|
class ConnectorNotAvailableError < StandardError; end
|
95
|
+
class AuthorizationError < StandardError; end
|
92
96
|
|
93
97
|
# For when we want to explicitly set a #cause but can't
|
94
98
|
class ExplicitlyCausedError < StandardError
|
@@ -124,6 +128,7 @@ module Utility
|
|
124
128
|
end
|
125
129
|
end
|
126
130
|
|
131
|
+
AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
|
127
132
|
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
133
|
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
134
|
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
data/lib/utility/es_client.rb
CHANGED
@@ -20,8 +20,8 @@ module Utility
|
|
20
20
|
attr_reader :cause
|
21
21
|
end
|
22
22
|
|
23
|
-
def initialize(es_config)
|
24
|
-
super(connection_configs(es_config))
|
23
|
+
def initialize(es_config, &block)
|
24
|
+
super(connection_configs(es_config), &block)
|
25
25
|
end
|
26
26
|
|
27
27
|
def connection_configs(es_config)
|
@@ -39,6 +39,10 @@ module Utility
|
|
39
39
|
configs[:log] = es_config[:log] || false
|
40
40
|
configs[:trace] = es_config[:trace] || false
|
41
41
|
|
42
|
+
# transport options
|
43
|
+
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
|
+
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
|
+
|
42
46
|
# if log or trace is activated, we use the application logger
|
43
47
|
configs[:logger] = if configs[:log] || configs[:trace]
|
44
48
|
Utility::Logger.logger
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Filtering
|
11
|
+
class << self
|
12
|
+
def extract_filter(filtering)
|
13
|
+
return {} unless filtering.present?
|
14
|
+
|
15
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
16
|
+
filter = filtering.is_a?(Array) ? filtering.first : filtering
|
17
|
+
|
18
|
+
filter.present? ? filter : {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/utility/logger.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'config'
|
7
8
|
require 'logger'
|
8
9
|
require 'active_support/core_ext/module'
|
9
10
|
require 'active_support/core_ext/string/filters'
|
@@ -23,7 +24,7 @@ module Utility
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def logger
|
26
|
-
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
@logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
28
|
end
|
28
29
|
|
29
30
|
SUPPORTED_LOG_LEVELS.each do |level|
|
data/lib/utility.rb
CHANGED
@@ -4,13 +4,15 @@
|
|
4
4
|
# you may not use this file except in compliance with the Elastic License.
|
5
5
|
#
|
6
6
|
|
7
|
+
require 'utility/bulk_queue'
|
8
|
+
require 'utility/common'
|
7
9
|
require 'utility/constants'
|
8
10
|
require 'utility/cron'
|
11
|
+
require 'utility/elasticsearch/index/mappings'
|
12
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|
13
|
+
require 'utility/environment'
|
9
14
|
require 'utility/errors'
|
10
15
|
require 'utility/es_client'
|
11
|
-
require 'utility/environment'
|
12
16
|
require 'utility/exception_tracking'
|
13
17
|
require 'utility/extension_mapping_util'
|
14
18
|
require 'utility/logger'
|
15
|
-
require 'utility/elasticsearch/index/mappings'
|
16
|
-
require 'utility/elasticsearch/index/text_analysis_settings'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.4.pre.20221114T233727Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -198,14 +198,14 @@ dependencies:
|
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 8.
|
201
|
+
version: 8.5.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 8.
|
208
|
+
version: 8.5.0
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: faraday
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -398,33 +398,39 @@ files:
|
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
399
|
- lib/connectors/base/connector.rb
|
400
400
|
- lib/connectors/base/custom_client.rb
|
401
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
402
|
- lib/connectors/connector_status.rb
|
402
403
|
- lib/connectors/crawler/scheduler.rb
|
404
|
+
- lib/connectors/example/attachments/first_attachment.txt
|
405
|
+
- lib/connectors/example/attachments/second_attachment.txt
|
406
|
+
- lib/connectors/example/attachments/third_attachment.txt
|
403
407
|
- lib/connectors/example/connector.rb
|
404
|
-
- lib/connectors/example/example_attachments/first_attachment.txt
|
405
|
-
- lib/connectors/example/example_attachments/second_attachment.txt
|
406
|
-
- lib/connectors/example/example_attachments/third_attachment.txt
|
407
408
|
- lib/connectors/gitlab/adapter.rb
|
408
409
|
- lib/connectors/gitlab/connector.rb
|
409
410
|
- lib/connectors/gitlab/custom_client.rb
|
410
411
|
- lib/connectors/gitlab/extractor.rb
|
411
412
|
- lib/connectors/mongodb/connector.rb
|
413
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
414
|
- lib/connectors/registry.rb
|
413
415
|
- lib/connectors/sync_status.rb
|
414
|
-
- lib/connectors_app/\
|
415
416
|
- lib/connectors_service.rb
|
416
417
|
- lib/connectors_utility.rb
|
417
418
|
- lib/core.rb
|
418
419
|
- lib/core/configuration.rb
|
420
|
+
- lib/core/connector_job.rb
|
419
421
|
- lib/core/connector_settings.rb
|
420
422
|
- lib/core/elastic_connector_actions.rb
|
423
|
+
- lib/core/filtering.rb
|
424
|
+
- lib/core/filtering/post_process_engine.rb
|
425
|
+
- lib/core/filtering/post_process_result.rb
|
426
|
+
- lib/core/filtering/simple_rule.rb
|
427
|
+
- lib/core/filtering/validation_job_runner.rb
|
428
|
+
- lib/core/filtering/validation_status.rb
|
421
429
|
- lib/core/heartbeat.rb
|
430
|
+
- lib/core/ingestion.rb
|
431
|
+
- lib/core/ingestion/es_sink.rb
|
432
|
+
- lib/core/ingestion/ingester.rb
|
422
433
|
- lib/core/native_scheduler.rb
|
423
|
-
- lib/core/output_sink.rb
|
424
|
-
- lib/core/output_sink/base_sink.rb
|
425
|
-
- lib/core/output_sink/combined_sink.rb
|
426
|
-
- lib/core/output_sink/console_sink.rb
|
427
|
-
- lib/core/output_sink/es_sink.rb
|
428
434
|
- lib/core/scheduler.rb
|
429
435
|
- lib/core/single_scheduler.rb
|
430
436
|
- lib/core/sync_job_runner.rb
|
@@ -433,6 +439,8 @@ files:
|
|
433
439
|
- lib/stubs/connectors/stats.rb
|
434
440
|
- lib/stubs/service_type.rb
|
435
441
|
- lib/utility.rb
|
442
|
+
- lib/utility/bulk_queue.rb
|
443
|
+
- lib/utility/common.rb
|
436
444
|
- lib/utility/constants.rb
|
437
445
|
- lib/utility/cron.rb
|
438
446
|
- lib/utility/elasticsearch/index/language_data.yml
|
@@ -443,6 +451,7 @@ files:
|
|
443
451
|
- lib/utility/es_client.rb
|
444
452
|
- lib/utility/exception_tracking.rb
|
445
453
|
- lib/utility/extension_mapping_util.rb
|
454
|
+
- lib/utility/filtering.rb
|
446
455
|
- lib/utility/logger.rb
|
447
456
|
- lib/utility/middleware/basic_auth.rb
|
448
457
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -451,7 +460,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
451
460
|
licenses:
|
452
461
|
- Elastic-2.0
|
453
462
|
metadata: {}
|
454
|
-
post_install_message:
|
463
|
+
post_install_message:
|
455
464
|
rdoc_options: []
|
456
465
|
require_paths:
|
457
466
|
- lib
|
@@ -462,12 +471,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
462
471
|
version: '0'
|
463
472
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
464
473
|
requirements:
|
465
|
-
- - "
|
474
|
+
- - ">"
|
466
475
|
- !ruby/object:Gem::Version
|
467
|
-
version:
|
476
|
+
version: 1.3.1
|
468
477
|
requirements: []
|
469
478
|
rubygems_version: 3.0.3.1
|
470
|
-
signing_key:
|
479
|
+
signing_key:
|
471
480
|
specification_version: 4
|
472
481
|
summary: Gem containing Elastic connectors service
|
473
482
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
-
def ingest(document)
|
15
|
-
print_header 'Got a single document:'
|
16
|
-
puts document
|
17
|
-
end
|
18
|
-
|
19
|
-
def flush(size: nil)
|
20
|
-
print_header 'Flushing'
|
21
|
-
puts "Flush size: #{size}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def ingest_multiple(documents)
|
25
|
-
print_header 'Got multiple documents:'
|
26
|
-
puts documents
|
27
|
-
end
|
28
|
-
|
29
|
-
def delete(id)
|
30
|
-
print_header "Deleting single id: #{id}"
|
31
|
-
puts id
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
print_header "Deleting several ids: #{ids}"
|
36
|
-
puts ids
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def print_delim
|
42
|
-
puts '----------------------------------------------------'
|
43
|
-
end
|
44
|
-
|
45
|
-
def print_header(header)
|
46
|
-
print_delim
|
47
|
-
puts header
|
48
|
-
print_delim
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/numeric/time'
|
10
|
-
require 'app/config'
|
11
|
-
require 'core/output_sink/base_sink'
|
12
|
-
require 'utility/es_client'
|
13
|
-
require 'utility/logger'
|
14
|
-
|
15
|
-
module Core::OutputSink
|
16
|
-
class EsSink < Core::OutputSink::BaseSink
|
17
|
-
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
-
super()
|
19
|
-
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
-
@index_name = index_name
|
21
|
-
@request_pipeline = request_pipeline
|
22
|
-
@operation_queue = []
|
23
|
-
@flush_threshold = flush_threshold
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest(document)
|
27
|
-
return if document.blank?
|
28
|
-
|
29
|
-
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
-
flush if ready_to_flush?
|
31
|
-
end
|
32
|
-
|
33
|
-
def delete(doc_id)
|
34
|
-
return if doc_id.nil?
|
35
|
-
|
36
|
-
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
-
flush if ready_to_flush?
|
38
|
-
end
|
39
|
-
|
40
|
-
def flush(size: nil)
|
41
|
-
flush_size = size || @flush_threshold
|
42
|
-
|
43
|
-
while @operation_queue.any?
|
44
|
-
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
-
send_data(data_to_flush)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def ingest_multiple(documents)
|
50
|
-
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
-
documents.each { |doc| ingest(doc) }
|
52
|
-
end
|
53
|
-
|
54
|
-
def delete_multiple(ids)
|
55
|
-
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
-
ids.each { |id| delete(id) }
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
attr_accessor :index_name
|
62
|
-
|
63
|
-
def send_data(ops)
|
64
|
-
return if ops.empty?
|
65
|
-
|
66
|
-
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
-
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
-
end
|
69
|
-
|
70
|
-
def ready_to_flush?
|
71
|
-
@operation_queue.size >= @flush_threshold
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|