connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/dispatcher.rb +12 -0
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/connector.rb +19 -12
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +15 -0
- data/lib/connectors/gitlab/connector.rb +15 -1
- data/lib/connectors/mongodb/connector.rb +55 -36
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/sync_job_runner.rb +65 -17
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +16 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.4.pre.
|
4
|
+
version: 8.6.0.4.pre.20221114T233727Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -398,6 +398,7 @@ files:
|
|
398
398
|
- lib/connectors/base/adapter.rb
|
399
399
|
- lib/connectors/base/connector.rb
|
400
400
|
- lib/connectors/base/custom_client.rb
|
401
|
+
- lib/connectors/base/simple_rules_parser.rb
|
401
402
|
- lib/connectors/connector_status.rb
|
402
403
|
- lib/connectors/crawler/scheduler.rb
|
403
404
|
- lib/connectors/example/attachments/first_attachment.txt
|
@@ -409,21 +410,27 @@ files:
|
|
409
410
|
- lib/connectors/gitlab/custom_client.rb
|
410
411
|
- lib/connectors/gitlab/extractor.rb
|
411
412
|
- lib/connectors/mongodb/connector.rb
|
413
|
+
- lib/connectors/mongodb/mongo_rules_parser.rb
|
412
414
|
- lib/connectors/registry.rb
|
413
415
|
- lib/connectors/sync_status.rb
|
414
416
|
- lib/connectors_service.rb
|
415
417
|
- lib/connectors_utility.rb
|
416
418
|
- lib/core.rb
|
417
419
|
- lib/core/configuration.rb
|
420
|
+
- lib/core/connector_job.rb
|
418
421
|
- lib/core/connector_settings.rb
|
419
422
|
- lib/core/elastic_connector_actions.rb
|
423
|
+
- lib/core/filtering.rb
|
424
|
+
- lib/core/filtering/post_process_engine.rb
|
425
|
+
- lib/core/filtering/post_process_result.rb
|
426
|
+
- lib/core/filtering/simple_rule.rb
|
427
|
+
- lib/core/filtering/validation_job_runner.rb
|
428
|
+
- lib/core/filtering/validation_status.rb
|
420
429
|
- lib/core/heartbeat.rb
|
430
|
+
- lib/core/ingestion.rb
|
431
|
+
- lib/core/ingestion/es_sink.rb
|
432
|
+
- lib/core/ingestion/ingester.rb
|
421
433
|
- lib/core/native_scheduler.rb
|
422
|
-
- lib/core/output_sink.rb
|
423
|
-
- lib/core/output_sink/base_sink.rb
|
424
|
-
- lib/core/output_sink/combined_sink.rb
|
425
|
-
- lib/core/output_sink/console_sink.rb
|
426
|
-
- lib/core/output_sink/es_sink.rb
|
427
434
|
- lib/core/scheduler.rb
|
428
435
|
- lib/core/single_scheduler.rb
|
429
436
|
- lib/core/sync_job_runner.rb
|
@@ -432,6 +439,7 @@ files:
|
|
432
439
|
- lib/stubs/connectors/stats.rb
|
433
440
|
- lib/stubs/service_type.rb
|
434
441
|
- lib/utility.rb
|
442
|
+
- lib/utility/bulk_queue.rb
|
435
443
|
- lib/utility/common.rb
|
436
444
|
- lib/utility/constants.rb
|
437
445
|
- lib/utility/cron.rb
|
@@ -443,6 +451,7 @@ files:
|
|
443
451
|
- lib/utility/es_client.rb
|
444
452
|
- lib/utility/exception_tracking.rb
|
445
453
|
- lib/utility/extension_mapping_util.rb
|
454
|
+
- lib/utility/filtering.rb
|
446
455
|
- lib/utility/logger.rb
|
447
456
|
- lib/utility/middleware/basic_auth.rb
|
448
457
|
- lib/utility/middleware/bearer_auth.rb
|
@@ -1,33 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
module Core
|
10
|
-
module OutputSink
|
11
|
-
class BaseSink
|
12
|
-
def ingest(_document)
|
13
|
-
raise 'not implemented'
|
14
|
-
end
|
15
|
-
|
16
|
-
def ingest_multiple(_documents)
|
17
|
-
raise 'not implemented'
|
18
|
-
end
|
19
|
-
|
20
|
-
def delete(_id)
|
21
|
-
raise 'not implemented'
|
22
|
-
end
|
23
|
-
|
24
|
-
def delete_multiple(_ids)
|
25
|
-
raise 'not implemented'
|
26
|
-
end
|
27
|
-
|
28
|
-
def flush(_size: nil)
|
29
|
-
raise 'not implemented'
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink/base_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class CombinedSink < Core::OutputSink::BaseSink
|
14
|
-
def initialize(sinks = [])
|
15
|
-
@sinks = sinks
|
16
|
-
end
|
17
|
-
|
18
|
-
def ingest(document)
|
19
|
-
@sinks.each { |sink| sink.ingest(document) }
|
20
|
-
end
|
21
|
-
|
22
|
-
def flush(size: nil)
|
23
|
-
@sinks.each { |sink| sink.flush(size: size) }
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest_multiple(documents)
|
27
|
-
@sinks.each { |sink| sink.ingest_multiple(documents) }
|
28
|
-
end
|
29
|
-
|
30
|
-
def delete(id)
|
31
|
-
@sinks.each { |sink| sink.delete(id) }
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
@sinks.each { |sink| sink.delete_multiple(ids) }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
-
def ingest(document)
|
15
|
-
print_header 'Got a single document:'
|
16
|
-
puts document
|
17
|
-
end
|
18
|
-
|
19
|
-
def flush(size: nil)
|
20
|
-
print_header 'Flushing'
|
21
|
-
puts "Flush size: #{size}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def ingest_multiple(documents)
|
25
|
-
print_header 'Got multiple documents:'
|
26
|
-
puts documents
|
27
|
-
end
|
28
|
-
|
29
|
-
def delete(id)
|
30
|
-
print_header "Deleting single id: #{id}"
|
31
|
-
puts id
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
print_header "Deleting several ids: #{ids}"
|
36
|
-
puts ids
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def print_delim
|
42
|
-
puts '----------------------------------------------------'
|
43
|
-
end
|
44
|
-
|
45
|
-
def print_header(header)
|
46
|
-
print_delim
|
47
|
-
puts header
|
48
|
-
print_delim
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/numeric/time'
|
10
|
-
require 'app/config'
|
11
|
-
require 'core/output_sink/base_sink'
|
12
|
-
require 'utility/es_client'
|
13
|
-
require 'utility/logger'
|
14
|
-
|
15
|
-
module Core::OutputSink
|
16
|
-
class EsSink < Core::OutputSink::BaseSink
|
17
|
-
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
-
super()
|
19
|
-
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
-
@index_name = index_name
|
21
|
-
@request_pipeline = request_pipeline
|
22
|
-
@operation_queue = []
|
23
|
-
@flush_threshold = flush_threshold
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest(document)
|
27
|
-
return if document.blank?
|
28
|
-
|
29
|
-
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
-
flush if ready_to_flush?
|
31
|
-
end
|
32
|
-
|
33
|
-
def delete(doc_id)
|
34
|
-
return if doc_id.nil?
|
35
|
-
|
36
|
-
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
-
flush if ready_to_flush?
|
38
|
-
end
|
39
|
-
|
40
|
-
def flush(size: nil)
|
41
|
-
flush_size = size || @flush_threshold
|
42
|
-
|
43
|
-
while @operation_queue.any?
|
44
|
-
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
-
send_data(data_to_flush)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def ingest_multiple(documents)
|
50
|
-
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
-
documents.each { |doc| ingest(doc) }
|
52
|
-
end
|
53
|
-
|
54
|
-
def delete_multiple(ids)
|
55
|
-
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
-
ids.each { |id| delete(id) }
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
attr_accessor :index_name
|
62
|
-
|
63
|
-
def send_data(ops)
|
64
|
-
return if ops.empty?
|
65
|
-
|
66
|
-
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
-
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
-
end
|
69
|
-
|
70
|
-
def ready_to_flush?
|
71
|
-
@operation_queue.size >= @flush_threshold
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|