connectors_service 8.6.0.4.pre.20221116T024501Z → 8.7.0.0.pre.20221117T004928Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -9
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +2 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/ingestion/es_sink.rb +1 -1
- data/lib/core/jobs/consumer.rb +20 -2
- data/lib/core/sync_job_runner.rb +10 -2
- data/lib/utility/bulk_queue.rb +3 -1
- data/lib/utility/constants.rb +5 -0
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39e0bb6ae283bcb1afebd57e715342b7e30a7178c33653dfd05fee6f322061e6
|
4
|
+
data.tar.gz: e1fd33b8d28d7d906b4b8eea672a34c0be12e0a124977bbeb10c62c147690a8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb6b96ac0c07e9ed4cbe87f16a17844771c12d7a7ccbb2e8c7aeb571cb146f67e5a0489b1e73ff907613dcda442912ba09c193e0e8e4e11fec553ebe9afdec12
|
7
|
+
data.tar.gz: c3c2fc12ac9de6ed37a85dbfb46725e2de37c08beba453c3e9d8a3f0c6c56a078a1c5987931251ca84f18e5a9388b6698ff43abe3ce547d291537b3040b15b0a
|
data/config/connectors.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# general metadata
|
2
|
-
version: 8.
|
3
|
-
repository:
|
4
|
-
revision:
|
2
|
+
version: 8.7.0.0-20221117T004928Z
|
3
|
+
repository: https://github.com/elastic/connectors-ruby.git
|
4
|
+
revision: 294214a26b0fe9a4347763b01de681c336e8daae
|
5
5
|
elasticsearch:
|
6
|
+
cloud_id: CHANGEME
|
6
7
|
hosts: http://localhost:9200
|
7
|
-
api_key:
|
8
|
+
api_key: CHANGEME
|
8
9
|
retry_on_failure: 3
|
9
10
|
request_timeout: 120
|
10
11
|
disable_warnings: true
|
@@ -14,11 +15,11 @@ thread_pool:
|
|
14
15
|
min_threads: 0
|
15
16
|
max_threads: 5
|
16
17
|
max_queue: 100
|
17
|
-
log_level:
|
18
|
-
ecs_logging:
|
18
|
+
log_level: info
|
19
|
+
ecs_logging: true
|
19
20
|
poll_interval: 3
|
20
21
|
termination_timeout: 60
|
21
22
|
heartbeat_interval: 1800
|
22
|
-
native_mode:
|
23
|
-
connector_id:
|
24
|
-
service_type:
|
23
|
+
native_mode: true
|
24
|
+
connector_id: CHANGEME
|
25
|
+
service_type: CHANGEME
|
data/lib/app/config.rb
CHANGED
@@ -54,6 +54,9 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
54
54
|
optional(:poll_interval).value(:integer)
|
55
55
|
optional(:termination_timeout).value(:integer)
|
56
56
|
optional(:heartbeat_interval).value(:integer)
|
57
|
+
|
58
|
+
optional(:max_ingestion_queue_size).value(:integer) # items
|
59
|
+
optional(:max_ingestion_queue_bytes).value(:integer) # bytes
|
57
60
|
end
|
58
61
|
end
|
59
62
|
|
data/lib/app/dispatcher.rb
CHANGED
@@ -133,6 +133,8 @@ module App
|
|
133
133
|
min_threads: MIN_THREADS,
|
134
134
|
max_threads: MAX_THREADS,
|
135
135
|
max_queue: MAX_QUEUE,
|
136
|
+
max_ingestion_queue_size: (App::Config.max_ingestion_queue_size || Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE).to_i,
|
137
|
+
max_ingestion_queue_bytes: (App::Config.max_ingestion_queue_bytes || Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES).to_i,
|
136
138
|
scheduler: scheduler
|
137
139
|
)
|
138
140
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsApp
|
8
|
+
module Errors
|
9
|
+
INVALID_API_KEY = 'INVALID_API_KEY'
|
10
|
+
UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
|
11
|
+
INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
|
12
|
+
end
|
13
|
+
end
|
@@ -21,7 +21,7 @@ require 'elasticsearch/api'
|
|
21
21
|
module Core
|
22
22
|
module Ingestion
|
23
23
|
class EsSink
|
24
|
-
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size =
|
24
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
25
25
|
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
26
26
|
@index_name = index_name
|
27
27
|
@request_pipeline = request_pipeline
|
data/lib/core/jobs/consumer.rb
CHANGED
@@ -6,10 +6,20 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Core
|
10
12
|
module Jobs
|
11
13
|
class Consumer
|
12
|
-
def initialize(scheduler:,
|
14
|
+
def initialize(scheduler:,
|
15
|
+
max_ingestion_queue_size:,
|
16
|
+
max_ingestion_queue_bytes:,
|
17
|
+
poll_interval: 3,
|
18
|
+
termination_timeout: 60,
|
19
|
+
min_threads: 1,
|
20
|
+
max_threads: 5,
|
21
|
+
max_queue: 100,
|
22
|
+
idle_time: 5)
|
13
23
|
@scheduler = scheduler
|
14
24
|
@poll_interval = poll_interval
|
15
25
|
@termination_timeout = termination_timeout
|
@@ -18,6 +28,9 @@ module Core
|
|
18
28
|
@max_queue = max_queue
|
19
29
|
@idle_time = idle_time
|
20
30
|
|
31
|
+
@max_ingestion_queue_size = max_ingestion_queue_size
|
32
|
+
@max_ingestion_queue_bytes = max_ingestion_queue_bytes
|
33
|
+
|
21
34
|
@running = Concurrent::AtomicBoolean.new(false)
|
22
35
|
end
|
23
36
|
|
@@ -76,7 +89,12 @@ module Core
|
|
76
89
|
pool.post do
|
77
90
|
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
78
91
|
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
79
|
-
job_runner = Core::SyncJobRunner.new(
|
92
|
+
job_runner = Core::SyncJobRunner.new(
|
93
|
+
connector_settings,
|
94
|
+
job,
|
95
|
+
@max_ingestion_queue_size,
|
96
|
+
@max_ingestion_queue_bytes
|
97
|
+
)
|
80
98
|
job_runner.execute
|
81
99
|
rescue Core::JobAlreadyRunningError
|
82
100
|
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -23,9 +23,17 @@ module Core
|
|
23
23
|
class SyncJobRunner
|
24
24
|
JOB_REPORTING_INTERVAL = 10
|
25
25
|
|
26
|
-
def initialize(connector_settings, job)
|
26
|
+
def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
|
27
27
|
@connector_settings = connector_settings
|
28
|
-
@sink = Core::Ingestion::EsSink.new(
|
28
|
+
@sink = Core::Ingestion::EsSink.new(
|
29
|
+
connector_settings.index_name,
|
30
|
+
@connector_settings.request_pipeline,
|
31
|
+
Utility::BulkQueue.new(
|
32
|
+
max_ingestion_queue_size,
|
33
|
+
max_ingestion_queue_bytes
|
34
|
+
),
|
35
|
+
max_ingestion_queue_bytes
|
36
|
+
)
|
29
37
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
30
38
|
@sync_finished = false
|
31
39
|
@sync_error = nil
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
|
7
7
|
require 'json'
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Utility
|
10
12
|
class BulkQueue
|
11
13
|
class QueueOverflowError < StandardError; end
|
12
14
|
|
13
15
|
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold =
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
15
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
16
18
|
@size_threshold = size_threshold.freeze
|
17
19
|
|
data/lib/utility/constants.rb
CHANGED
@@ -18,5 +18,10 @@ module Utility
|
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
19
|
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
20
|
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
21
26
|
end
|
22
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004928Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -420,6 +420,7 @@ files:
|
|
420
420
|
- lib/connectors/registry.rb
|
421
421
|
- lib/connectors/sync_status.rb
|
422
422
|
- lib/connectors/tolerable_error_helper.rb
|
423
|
+
- lib/connectors_app/\
|
423
424
|
- lib/connectors_service.rb
|
424
425
|
- lib/connectors_utility.rb
|
425
426
|
- lib/core.rb
|
@@ -469,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
469
470
|
licenses:
|
470
471
|
- Elastic-2.0
|
471
472
|
metadata: {}
|
472
|
-
post_install_message:
|
473
|
+
post_install_message:
|
473
474
|
rdoc_options: []
|
474
475
|
require_paths:
|
475
476
|
- lib
|
@@ -485,7 +486,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
485
486
|
version: 1.3.1
|
486
487
|
requirements: []
|
487
488
|
rubygems_version: 3.0.3.1
|
488
|
-
signing_key:
|
489
|
+
signing_key:
|
489
490
|
specification_version: 4
|
490
491
|
summary: Gem containing Elastic connectors service
|
491
492
|
test_files: []
|