connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T010623Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +9 -8
- data/lib/app/app.rb +4 -0
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +44 -17
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
- data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
- data/lib/connectors/base/connector.rb +43 -14
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +6 -0
- data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/gitlab/connector.rb +6 -1
- data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
- data/lib/connectors/mongodb/connector.rb +47 -43
- data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
- data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/connectors/sync_status.rb +6 -1
- data/lib/connectors/tolerable_error_helper.rb +43 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +210 -0
- data/lib/core/connector_settings.rb +52 -16
- data/lib/core/elastic_connector_actions.rb +320 -59
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +118 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
- data/lib/core/jobs/consumer.rb +132 -0
- data/lib/core/jobs/producer.rb +26 -0
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/single_scheduler.rb +1 -1
- data/lib/core/sync_job_runner.rb +80 -16
- data/lib/core.rb +4 -0
- data/lib/utility/bulk_queue.rb +87 -0
- data/lib/utility/constants.rb +7 -0
- data/lib/utility/error_monitor.rb +108 -0
- data/lib/utility/errors.rb +0 -12
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +1 -1
- data/lib/utility.rb +11 -4
- metadata +31 -12
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -1,51 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'core/output_sink'
|
10
|
-
require 'utility/logger'
|
11
|
-
|
12
|
-
module Core::OutputSink
|
13
|
-
class ConsoleSink < Core::OutputSink::BaseSink
|
14
|
-
def ingest(document)
|
15
|
-
print_header 'Got a single document:'
|
16
|
-
puts document
|
17
|
-
end
|
18
|
-
|
19
|
-
def flush(size: nil)
|
20
|
-
print_header 'Flushing'
|
21
|
-
puts "Flush size: #{size}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def ingest_multiple(documents)
|
25
|
-
print_header 'Got multiple documents:'
|
26
|
-
puts documents
|
27
|
-
end
|
28
|
-
|
29
|
-
def delete(id)
|
30
|
-
print_header "Deleting single id: #{id}"
|
31
|
-
puts id
|
32
|
-
end
|
33
|
-
|
34
|
-
def delete_multiple(ids)
|
35
|
-
print_header "Deleting several ids: #{ids}"
|
36
|
-
puts ids
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def print_delim
|
42
|
-
puts '----------------------------------------------------'
|
43
|
-
end
|
44
|
-
|
45
|
-
def print_header(header)
|
46
|
-
print_delim
|
47
|
-
puts header
|
48
|
-
print_delim
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
-
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
-
# you may not use this file except in compliance with the Elastic License.
|
5
|
-
#
|
6
|
-
|
7
|
-
# frozen_string_literal: true
|
8
|
-
|
9
|
-
require 'active_support/core_ext/numeric/time'
|
10
|
-
require 'app/config'
|
11
|
-
require 'core/output_sink/base_sink'
|
12
|
-
require 'utility/es_client'
|
13
|
-
require 'utility/logger'
|
14
|
-
|
15
|
-
module Core::OutputSink
|
16
|
-
class EsSink < Core::OutputSink::BaseSink
|
17
|
-
def initialize(index_name, request_pipeline, flush_threshold = 50)
|
18
|
-
super()
|
19
|
-
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
20
|
-
@index_name = index_name
|
21
|
-
@request_pipeline = request_pipeline
|
22
|
-
@operation_queue = []
|
23
|
-
@flush_threshold = flush_threshold
|
24
|
-
end
|
25
|
-
|
26
|
-
def ingest(document)
|
27
|
-
return if document.blank?
|
28
|
-
|
29
|
-
@operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
|
30
|
-
flush if ready_to_flush?
|
31
|
-
end
|
32
|
-
|
33
|
-
def delete(doc_id)
|
34
|
-
return if doc_id.nil?
|
35
|
-
|
36
|
-
@operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
|
37
|
-
flush if ready_to_flush?
|
38
|
-
end
|
39
|
-
|
40
|
-
def flush(size: nil)
|
41
|
-
flush_size = size || @flush_threshold
|
42
|
-
|
43
|
-
while @operation_queue.any?
|
44
|
-
data_to_flush = @operation_queue.pop(flush_size)
|
45
|
-
send_data(data_to_flush)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def ingest_multiple(documents)
|
50
|
-
Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
|
51
|
-
documents.each { |doc| ingest(doc) }
|
52
|
-
end
|
53
|
-
|
54
|
-
def delete_multiple(ids)
|
55
|
-
Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
|
56
|
-
ids.each { |id| delete(id) }
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
attr_accessor :index_name
|
62
|
-
|
63
|
-
def send_data(ops)
|
64
|
-
return if ops.empty?
|
65
|
-
|
66
|
-
@client.bulk(:body => ops, :pipeline => @request_pipeline)
|
67
|
-
Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
|
68
|
-
end
|
69
|
-
|
70
|
-
def ready_to_flush?
|
71
|
-
@operation_queue.size >= @flush_threshold
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|