connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/dispatcher.rb +12 -0
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/connector.rb +19 -12
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +15 -0
- data/lib/connectors/gitlab/connector.rb +15 -1
- data/lib/connectors/mongodb/connector.rb +55 -36
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/sync_job_runner.rb +65 -17
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +16 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.4.pre.
|
4
|
+
version: 8.6.0.4.pre.20221114T233727Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -398,6 +398,7 @@ files:
|
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
399
|
- lib/connectors/base/connector.rb
|
400
400
|
- lib/connectors/base/custom_client.rb
|
401
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
402
|
- lib/connectors/connector_status.rb
|
402
403
|
- lib/connectors/crawler/scheduler.rb
|
403
404
|
- lib/connectors/example/attachments/first_attachment.txt
|
@@ -409,21 +410,27 @@ files:
|
|
409
410
|
- lib/connectors/gitlab/custom_client.rb
|
410
411
|
- lib/connectors/gitlab/extractor.rb
|
411
412
|
- lib/connectors/mongodb/connector.rb
|
413
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
414
|
- lib/connectors/registry.rb
|
413
415
|
- lib/connectors/sync_status.rb
|
414
416
|
- lib/connectors_service.rb
|
415
417
|
- lib/connectors_utility.rb
|
416
418
|
- lib/core.rb
|
417
419
|
- lib/core/configuration.rb
|
420
|
+
- lib/core/connector_job.rb
|
418
421
|
- lib/core/connector_settings.rb
|
419
422
|
- lib/core/elastic_connector_actions.rb
|
423
|
+
- lib/core/filtering.rb
|
424
|
+
- lib/core/filtering/post_process_engine.rb
|
425
|
+
- lib/core/filtering/post_process_result.rb
|
426
|
+
- lib/core/filtering/simple_rule.rb
|
427
|
+
- lib/core/filtering/validation_job_runner.rb
|
428
|
+
- lib/core/filtering/validation_status.rb
|
420
429
|
- lib/core/heartbeat.rb
|
430
|
+
- lib/core/ingestion.rb
|
431
|
+
- lib/core/ingestion/es_sink.rb
|
432
|
+
- lib/core/ingestion/ingester.rb
|
421
433
|
- lib/core/native_scheduler.rb
|
422
|
-
- lib/core/output_sink.rb
|
423
|
-
- lib/core/output_sink/base_sink.rb
|
424
|
-
- lib/core/output_sink/combined_sink.rb
|
425
|
-
- lib/core/output_sink/console_sink.rb
|
426
|
-
- lib/core/output_sink/es_sink.rb
|
427
434
|
- lib/core/scheduler.rb
|
428
435
|
- lib/core/single_scheduler.rb
|
429
436
|
- lib/core/sync_job_runner.rb
|
@@ -432,6 +439,7 @@ files:
|
|
432
439
|
- lib/stubs/connectors/stats.rb
|
433
440
|
- lib/stubs/service_type.rb
|
434
441
|
- lib/utility.rb
|
442
|
+
- lib/utility/bulk_queue.rb
|
435
443
|
- lib/utility/common.rb
|
436
444
|
- lib/utility/constants.rb
|
437
445
|
- lib/utility/cron.rb
|
@@ -443,6 +451,7 @@ files:
|
|
443
451
|
- lib/utility/es_client.rb
|
444
452
|
- lib/utility/exception_tracking.rb
|
445
453
|
- lib/utility/extension_mapping_util.rb
|
454
|
+
- lib/utility/filtering.rb
|
446
455
|
- lib/utility/logger.rb
|
447
456
|
- lib/utility/middleware/basic_auth.rb
|
448
457
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
-
def ingest(document)
|
15
|
-
print_header 'Got a single document:'
|
16
|
-
puts document
|
17
|
-
end
|
18
|
-
|
19
|
-
def flush(size: nil)
|
20
|
-
print_header 'Flushing'
|
21
|
-
puts "Flush size: #{size}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def ingest_multiple(documents)
|
25
|
-
print_header 'Got multiple documents:'
|
26
|
-
puts documents
|
27
|
-
end
|
28
|
-
|
29
|
-
def delete(id)
|
30
|
-
print_header "Deleting single id: #{id}"
|
31
|
-
puts id
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
print_header "Deleting several ids: #{ids}"
|
36
|
-
puts ids
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def print_delim
|
42
|
-
puts '----------------------------------------------------'
|
43
|
-
end
|
44
|
-
|
45
|
-
def print_header(header)
|
46
|
-
print_delim
|
47
|
-
puts header
|
48
|
-
print_delim
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/numeric/time'
|
10
|
-
require 'app/config'
|
11
|
-
require 'core/output_sink/base_sink'
|
12
|
-
require 'utility/es_client'
|
13
|
-
require 'utility/logger'
|
14
|
-
|
15
|
-
module Core::OutputSink
|
16
|
-
class EsSink < Core::OutputSink::BaseSink
|
17
|
-
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
-
super()
|
19
|
-
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
-
@index_name = index_name
|
21
|
-
@request_pipeline = request_pipeline
|
22
|
-
@operation_queue = []
|
23
|
-
@flush_threshold = flush_threshold
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest(document)
|
27
|
-
return if document.blank?
|
28
|
-
|
29
|
-
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
-
flush if ready_to_flush?
|
31
|
-
end
|
32
|
-
|
33
|
-
def delete(doc_id)
|
34
|
-
return if doc_id.nil?
|
35
|
-
|
36
|
-
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
-
flush if ready_to_flush?
|
38
|
-
end
|
39
|
-
|
40
|
-
def flush(size: nil)
|
41
|
-
flush_size = size || @flush_threshold
|
42
|
-
|
43
|
-
while @operation_queue.any?
|
44
|
-
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
-
send_data(data_to_flush)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def ingest_multiple(documents)
|
50
|
-
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
-
documents.each { |doc| ingest(doc) }
|
52
|
-
end
|
53
|
-
|
54
|
-
def delete_multiple(ids)
|
55
|
-
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
-
ids.each { |id| delete(id) }
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
attr_accessor :index_name
|
62
|
-
|
63
|
-
def send_data(ops)
|
64
|
-
return if ops.empty?
|
65
|
-
|
66
|
-
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
-
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
-
end
|
69
|
-
|
70
|
-
def ready_to_flush?
|
71
|
-
@operation_queue.size >= @flush_threshold
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|