connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -5,6 +5,7 @@
5
5
  #
6
6
 
7
7
  require 'active_support/core_ext/string'
8
+ require 'elasticsearch'
8
9
 
9
10
  module Utility
10
11
  class DocumentError
@@ -31,6 +32,8 @@ module Utility
31
32
  end
32
33
 
33
34
  class ClientError < StandardError; end
35
+
36
+ class InvalidFilterConfigError < StandardError; end
34
37
  class EvictionWithNoProgressError < StandardError; end
35
38
  class EvictionError < StandardError
36
39
  attr_accessor :cursors
@@ -89,6 +92,7 @@ module Utility
89
92
  class InvalidTokenError < StandardError; end
90
93
  class TokenRefreshFailedError < StandardError; end
91
94
  class ConnectorNotAvailableError < StandardError; end
95
+ class AuthorizationError < StandardError; end
92
96
 
93
97
  # For when we want to explicitly set a #cause but can't
94
98
  class ExplicitlyCausedError < StandardError
@@ -124,6 +128,7 @@ module Utility
124
128
  end
125
129
  end
126
130
 
131
+ AUTHORIZATION_ERRORS = [Elastic::Transport::Transport::Errors::Unauthorized]
127
132
  INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
128
133
  INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
129
134
  UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
@@ -20,8 +20,8 @@ module Utility
20
20
  attr_reader :cause
21
21
  end
22
22
 
23
- def initialize(es_config)
24
- super(connection_configs(es_config))
23
+ def initialize(es_config, &block)
24
+ super(connection_configs(es_config), &block)
25
25
  end
26
26
 
27
27
  def connection_configs(es_config)
@@ -39,6 +39,10 @@ module Utility
39
39
  configs[:log] = es_config[:log] || false
40
40
  configs[:trace] = es_config[:trace] || false
41
41
 
42
+ # transport options
43
+ configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
44
+ configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
45
+
42
46
  # if log or trace is activated, we use the application logger
43
47
  configs[:logger] = if configs[:log] || configs[:trace]
44
48
  Utility::Logger.logger
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -4,6 +4,7 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'config'
7
8
  require 'logger'
8
9
  require 'active_support/core_ext/module'
9
10
  require 'active_support/core_ext/string/filters'
@@ -23,7 +24,7 @@ module Utility
23
24
  end
24
25
 
25
26
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
28
  end
28
29
 
29
30
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,13 +4,15 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'utility/bulk_queue'
8
+ require 'utility/common'
7
9
  require 'utility/constants'
8
10
  require 'utility/cron'
11
+ require 'utility/elasticsearch/index/mappings'
12
+ require 'utility/elasticsearch/index/text_analysis_settings'
13
+ require 'utility/environment'
9
14
  require 'utility/errors'
10
15
  require 'utility/es_client'
11
- require 'utility/environment'
12
16
  require 'utility/exception_tracking'
13
17
  require 'utility/extension_mapping_util'
14
18
  require 'utility/logger'
15
- require 'utility/elasticsearch/index/mappings'
16
- require 'utility/elasticsearch/index/text_analysis_settings'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.3
4
+ version: 8.6.0.4.pre.20221114T233727Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2022-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -198,14 +198,14 @@ dependencies:
198
198
  requirements:
199
199
  - - "~>"
200
200
  - !ruby/object:Gem::Version
201
- version: 8.4.0
201
+ version: 8.5.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
- version: 8.4.0
208
+ version: 8.5.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: faraday
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -398,33 +398,39 @@ files:
398
398
  - lib/connectors/base/adapter.rb
399
399
  - lib/connectors/base/connector.rb
400
400
  - lib/connectors/base/custom_client.rb
401
+ - lib/connectors/base/simple_rules_parser.rb
401
402
  - lib/connectors/connector_status.rb
402
403
  - lib/connectors/crawler/scheduler.rb
404
+ - lib/connectors/example/attachments/first_attachment.txt
405
+ - lib/connectors/example/attachments/second_attachment.txt
406
+ - lib/connectors/example/attachments/third_attachment.txt
403
407
  - lib/connectors/example/connector.rb
404
- - lib/connectors/example/example_attachments/first_attachment.txt
405
- - lib/connectors/example/example_attachments/second_attachment.txt
406
- - lib/connectors/example/example_attachments/third_attachment.txt
407
408
  - lib/connectors/gitlab/adapter.rb
408
409
  - lib/connectors/gitlab/connector.rb
409
410
  - lib/connectors/gitlab/custom_client.rb
410
411
  - lib/connectors/gitlab/extractor.rb
411
412
  - lib/connectors/mongodb/connector.rb
413
+ - lib/connectors/mongodb/mongo_rules_parser.rb
412
414
  - lib/connectors/registry.rb
413
415
  - lib/connectors/sync_status.rb
414
- - lib/connectors_app/\
415
416
  - lib/connectors_service.rb
416
417
  - lib/connectors_utility.rb
417
418
  - lib/core.rb
418
419
  - lib/core/configuration.rb
420
+ - lib/core/connector_job.rb
419
421
  - lib/core/connector_settings.rb
420
422
  - lib/core/elastic_connector_actions.rb
423
+ - lib/core/filtering.rb
424
+ - lib/core/filtering/post_process_engine.rb
425
+ - lib/core/filtering/post_process_result.rb
426
+ - lib/core/filtering/simple_rule.rb
427
+ - lib/core/filtering/validation_job_runner.rb
428
+ - lib/core/filtering/validation_status.rb
421
429
  - lib/core/heartbeat.rb
430
+ - lib/core/ingestion.rb
431
+ - lib/core/ingestion/es_sink.rb
432
+ - lib/core/ingestion/ingester.rb
422
433
  - lib/core/native_scheduler.rb
423
- - lib/core/output_sink.rb
424
- - lib/core/output_sink/base_sink.rb
425
- - lib/core/output_sink/combined_sink.rb
426
- - lib/core/output_sink/console_sink.rb
427
- - lib/core/output_sink/es_sink.rb
428
434
  - lib/core/scheduler.rb
429
435
  - lib/core/single_scheduler.rb
430
436
  - lib/core/sync_job_runner.rb
@@ -433,6 +439,8 @@ files:
433
439
  - lib/stubs/connectors/stats.rb
434
440
  - lib/stubs/service_type.rb
435
441
  - lib/utility.rb
442
+ - lib/utility/bulk_queue.rb
443
+ - lib/utility/common.rb
436
444
  - lib/utility/constants.rb
437
445
  - lib/utility/cron.rb
438
446
  - lib/utility/elasticsearch/index/language_data.yml
@@ -443,6 +451,7 @@ files:
443
451
  - lib/utility/es_client.rb
444
452
  - lib/utility/exception_tracking.rb
445
453
  - lib/utility/extension_mapping_util.rb
454
+ - lib/utility/filtering.rb
446
455
  - lib/utility/logger.rb
447
456
  - lib/utility/middleware/basic_auth.rb
448
457
  - lib/utility/middleware/bearer_auth.rb
@@ -451,7 +460,7 @@ homepage: https://github.com/elastic/connectors-ruby
451
460
  licenses:
452
461
  - Elastic-2.0
453
462
  metadata: {}
454
- post_install_message:
463
+ post_install_message:
455
464
  rdoc_options: []
456
465
  require_paths:
457
466
  - lib
@@ -462,12 +471,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
462
471
  version: '0'
463
472
  required_rubygems_version: !ruby/object:Gem::Requirement
464
473
  requirements:
465
- - - ">="
474
+ - - ">"
466
475
  - !ruby/object:Gem::Version
467
- version: '0'
476
+ version: 1.3.1
468
477
  requirements: []
469
478
  rubygems_version: 3.0.3.1
470
- signing_key:
479
+ signing_key:
471
480
  specification_version: 4
472
481
  summary: Gem containing Elastic connectors service
473
482
  test_files: []
@@ -1,33 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Core
10
- module OutputSink
11
- class BaseSink
12
- def ingest(_document)
13
- raise 'not implemented'
14
- end
15
-
16
- def ingest_multiple(_documents)
17
- raise 'not implemented'
18
- end
19
-
20
- def delete(_id)
21
- raise 'not implemented'
22
- end
23
-
24
- def delete_multiple(_ids)
25
- raise 'not implemented'
26
- end
27
-
28
- def flush(_size: nil)
29
- raise 'not implemented'
30
- end
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink/base_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class CombinedSink < Core::OutputSink::BaseSink
14
- def initialize(sinks = [])
15
- @sinks = sinks
16
- end
17
-
18
- def ingest(document)
19
- @sinks.each { |sink| sink.ingest(document) }
20
- end
21
-
22
- def flush(size: nil)
23
- @sinks.each { |sink| sink.flush(size: size) }
24
- end
25
-
26
- def ingest_multiple(documents)
27
- @sinks.each { |sink| sink.ingest_multiple(documents) }
28
- end
29
-
30
- def delete(id)
31
- @sinks.each { |sink| sink.delete(id) }
32
- end
33
-
34
- def delete_multiple(ids)
35
- @sinks.each { |sink| sink.delete_multiple(ids) }
36
- end
37
- end
38
- end
@@ -1,51 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'core/output_sink'
10
- require 'utility/logger'
11
-
12
- module Core::OutputSink
13
- class ConsoleSink < Core::OutputSink::BaseSink
14
- def ingest(document)
15
- print_header 'Got a single document:'
16
- puts document
17
- end
18
-
19
- def flush(size: nil)
20
- print_header 'Flushing'
21
- puts "Flush size: #{size}"
22
- end
23
-
24
- def ingest_multiple(documents)
25
- print_header 'Got multiple documents:'
26
- puts documents
27
- end
28
-
29
- def delete(id)
30
- print_header "Deleting single id: #{id}"
31
- puts id
32
- end
33
-
34
- def delete_multiple(ids)
35
- print_header "Deleting several ids: #{ids}"
36
- puts ids
37
- end
38
-
39
- private
40
-
41
- def print_delim
42
- puts '----------------------------------------------------'
43
- end
44
-
45
- def print_header(header)
46
- print_delim
47
- puts header
48
- print_delim
49
- end
50
- end
51
- end
@@ -1,74 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/numeric/time'
10
- require 'app/config'
11
- require 'core/output_sink/base_sink'
12
- require 'utility/es_client'
13
- require 'utility/logger'
14
-
15
- module Core::OutputSink
16
- class EsSink < Core::OutputSink::BaseSink
17
- def initialize(index_name, request_pipeline, flush_threshold = 50)
18
- super()
19
- @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
- @index_name = index_name
21
- @request_pipeline = request_pipeline
22
- @operation_queue = []
23
- @flush_threshold = flush_threshold
24
- end
25
-
26
- def ingest(document)
27
- return if document.blank?
28
-
29
- @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
- flush if ready_to_flush?
31
- end
32
-
33
- def delete(doc_id)
34
- return if doc_id.nil?
35
-
36
- @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
- flush if ready_to_flush?
38
- end
39
-
40
- def flush(size: nil)
41
- flush_size = size || @flush_threshold
42
-
43
- while @operation_queue.any?
44
- data_to_flush = @operation_queue.pop(flush_size)
45
- send_data(data_to_flush)
46
- end
47
- end
48
-
49
- def ingest_multiple(documents)
50
- Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
- documents.each { |doc| ingest(doc) }
52
- end
53
-
54
- def delete_multiple(ids)
55
- Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
- ids.each { |id| delete(id) }
57
- end
58
-
59
- private
60
-
61
- attr_accessor :index_name
62
-
63
- def send_data(ops)
64
- return if ops.empty?
65
-
66
- @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
- Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
- end
69
-
70
- def ready_to_flush?
71
- @operation_queue.size >= @flush_threshold
72
- end
73
- end
74
- end