connectors_service 8.6.0.4.pre.20221116T024501Z → 8.7.0.0.pre.20221117T004928Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +10 -9
- data/lib/app/config.rb +3 -0
- data/lib/app/dispatcher.rb +2 -0
- data/lib/connectors_app/// +13 -0
- data/lib/core/ingestion/es_sink.rb +1 -1
- data/lib/core/jobs/consumer.rb +20 -2
- data/lib/core/sync_job_runner.rb +10 -2
- data/lib/utility/bulk_queue.rb +3 -1
- data/lib/utility/constants.rb +5 -0
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39e0bb6ae283bcb1afebd57e715342b7e30a7178c33653dfd05fee6f322061e6
|
4
|
+
data.tar.gz: e1fd33b8d28d7d906b4b8eea672a34c0be12e0a124977bbeb10c62c147690a8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb6b96ac0c07e9ed4cbe87f16a17844771c12d7a7ccbb2e8c7aeb571cb146f67e5a0489b1e73ff907613dcda442912ba09c193e0e8e4e11fec553ebe9afdec12
|
7
|
+
data.tar.gz: c3c2fc12ac9de6ed37a85dbfb46725e2de37c08beba453c3e9d8a3f0c6c56a078a1c5987931251ca84f18e5a9388b6698ff43abe3ce547d291537b3040b15b0a
|
data/config/connectors.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# general metadata
|
2
|
-
version: 8.
|
3
|
-
repository:
|
4
|
-
revision:
|
2
|
+
version: 8.7.0.0-20221117T004928Z
|
3
|
+
repository: https://github.com/elastic/connectors-ruby.git
|
4
|
+
revision: 294214a26b0fe9a4347763b01de681c336e8daae
|
5
5
|
elasticsearch:
|
6
|
+
cloud_id: CHANGEME
|
6
7
|
hosts: http://localhost:9200
|
7
|
-
api_key:
|
8
|
+
api_key: CHANGEME
|
8
9
|
retry_on_failure: 3
|
9
10
|
request_timeout: 120
|
10
11
|
disable_warnings: true
|
@@ -14,11 +15,11 @@ thread_pool:
|
|
14
15
|
min_threads: 0
|
15
16
|
max_threads: 5
|
16
17
|
max_queue: 100
|
17
|
-
log_level:
|
18
|
-
ecs_logging:
|
18
|
+
log_level: info
|
19
|
+
ecs_logging: true
|
19
20
|
poll_interval: 3
|
20
21
|
termination_timeout: 60
|
21
22
|
heartbeat_interval: 1800
|
22
|
-
native_mode:
|
23
|
-
connector_id:
|
24
|
-
service_type:
|
23
|
+
native_mode: true
|
24
|
+
connector_id: CHANGEME
|
25
|
+
service_type: CHANGEME
|
data/lib/app/config.rb
CHANGED
@@ -54,6 +54,9 @@ puts "Parsing #{CONFIG_FILE} configuration file."
|
|
54
54
|
optional(:poll_interval).value(:integer)
|
55
55
|
optional(:termination_timeout).value(:integer)
|
56
56
|
optional(:heartbeat_interval).value(:integer)
|
57
|
+
|
58
|
+
optional(:max_ingestion_queue_size).value(:integer) # items
|
59
|
+
optional(:max_ingestion_queue_bytes).value(:integer) # bytes
|
57
60
|
end
|
58
61
|
end
|
59
62
|
|
data/lib/app/dispatcher.rb
CHANGED
@@ -133,6 +133,8 @@ module App
|
|
133
133
|
min_threads: MIN_THREADS,
|
134
134
|
max_threads: MAX_THREADS,
|
135
135
|
max_queue: MAX_QUEUE,
|
136
|
+
max_ingestion_queue_size: (App::Config.max_ingestion_queue_size || Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_SIZE).to_i,
|
137
|
+
max_ingestion_queue_bytes: (App::Config.max_ingestion_queue_bytes || Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES).to_i,
|
136
138
|
scheduler: scheduler
|
137
139
|
)
|
138
140
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsApp
|
8
|
+
module Errors
|
9
|
+
INVALID_API_KEY = 'INVALID_API_KEY'
|
10
|
+
UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
|
11
|
+
INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
|
12
|
+
end
|
13
|
+
end
|
@@ -21,7 +21,7 @@ require 'elasticsearch/api'
|
|
21
21
|
module Core
|
22
22
|
module Ingestion
|
23
23
|
class EsSink
|
24
|
-
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size =
|
24
|
+
def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
25
25
|
@client = Utility::EsClient.new(App::Config[:elasticsearch])
|
26
26
|
@index_name = index_name
|
27
27
|
@request_pipeline = request_pipeline
|
data/lib/core/jobs/consumer.rb
CHANGED
@@ -6,10 +6,20 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Core
|
10
12
|
module Jobs
|
11
13
|
class Consumer
|
12
|
-
def initialize(scheduler:,
|
14
|
+
def initialize(scheduler:,
|
15
|
+
max_ingestion_queue_size:,
|
16
|
+
max_ingestion_queue_bytes:,
|
17
|
+
poll_interval: 3,
|
18
|
+
termination_timeout: 60,
|
19
|
+
min_threads: 1,
|
20
|
+
max_threads: 5,
|
21
|
+
max_queue: 100,
|
22
|
+
idle_time: 5)
|
13
23
|
@scheduler = scheduler
|
14
24
|
@poll_interval = poll_interval
|
15
25
|
@termination_timeout = termination_timeout
|
@@ -18,6 +28,9 @@ module Core
|
|
18
28
|
@max_queue = max_queue
|
19
29
|
@idle_time = idle_time
|
20
30
|
|
31
|
+
@max_ingestion_queue_size = max_ingestion_queue_size
|
32
|
+
@max_ingestion_queue_bytes = max_ingestion_queue_bytes
|
33
|
+
|
21
34
|
@running = Concurrent::AtomicBoolean.new(false)
|
22
35
|
end
|
23
36
|
|
@@ -76,7 +89,12 @@ module Core
|
|
76
89
|
pool.post do
|
77
90
|
Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
|
78
91
|
Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
|
79
|
-
job_runner = Core::SyncJobRunner.new(
|
92
|
+
job_runner = Core::SyncJobRunner.new(
|
93
|
+
connector_settings,
|
94
|
+
job,
|
95
|
+
@max_ingestion_queue_size,
|
96
|
+
@max_ingestion_queue_bytes
|
97
|
+
)
|
80
98
|
job_runner.execute
|
81
99
|
rescue Core::JobAlreadyRunningError
|
82
100
|
Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
|
data/lib/core/sync_job_runner.rb
CHANGED
@@ -23,9 +23,17 @@ module Core
|
|
23
23
|
class SyncJobRunner
|
24
24
|
JOB_REPORTING_INTERVAL = 10
|
25
25
|
|
26
|
-
def initialize(connector_settings, job)
|
26
|
+
def initialize(connector_settings, job, max_ingestion_queue_size, max_ingestion_queue_bytes)
|
27
27
|
@connector_settings = connector_settings
|
28
|
-
@sink = Core::Ingestion::EsSink.new(
|
28
|
+
@sink = Core::Ingestion::EsSink.new(
|
29
|
+
connector_settings.index_name,
|
30
|
+
@connector_settings.request_pipeline,
|
31
|
+
Utility::BulkQueue.new(
|
32
|
+
max_ingestion_queue_size,
|
33
|
+
max_ingestion_queue_bytes
|
34
|
+
),
|
35
|
+
max_ingestion_queue_bytes
|
36
|
+
)
|
29
37
|
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
30
38
|
@sync_finished = false
|
31
39
|
@sync_error = nil
|
data/lib/utility/bulk_queue.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
|
7
7
|
require 'json'
|
8
8
|
|
9
|
+
require 'utility/constants'
|
10
|
+
|
9
11
|
module Utility
|
10
12
|
class BulkQueue
|
11
13
|
class QueueOverflowError < StandardError; end
|
12
14
|
|
13
15
|
# 500 items or 5MB
|
14
|
-
def initialize(operation_count_threshold =
|
16
|
+
def initialize(operation_count_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_LENGTH, size_threshold = Utility::Constants::DEFAULT_MAX_INGESTION_QUEUE_BYTES)
|
15
17
|
@operation_count_threshold = operation_count_threshold.freeze
|
16
18
|
@size_threshold = size_threshold.freeze
|
17
19
|
|
data/lib/utility/constants.rb
CHANGED
@@ -18,5 +18,10 @@ module Utility
|
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
19
|
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
20
|
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
|
+
|
22
|
+
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
|
+
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
24
|
+
# Maximum size of either whole BULK Elasticsearch operation or one document in it
|
25
|
+
DEFAULT_MAX_INGESTION_QUEUE_BYTES = 5 * 1024 * 1024
|
21
26
|
end
|
22
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_service
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0.pre.20221117T004928Z
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -420,6 +420,7 @@ files:
|
|
420
420
|
- lib/connectors/registry.rb
|
421
421
|
- lib/connectors/sync_status.rb
|
422
422
|
- lib/connectors/tolerable_error_helper.rb
|
423
|
+
- lib/connectors_app/\
|
423
424
|
- lib/connectors_service.rb
|
424
425
|
- lib/connectors_utility.rb
|
425
426
|
- lib/core.rb
|
@@ -469,7 +470,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
469
470
|
licenses:
|
470
471
|
- Elastic-2.0
|
471
472
|
metadata: {}
|
472
|
-
post_install_message:
|
473
|
+
post_install_message:
|
473
474
|
rdoc_options: []
|
474
475
|
require_paths:
|
475
476
|
- lib
|
@@ -485,7 +486,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
485
486
|
version: 1.3.1
|
486
487
|
requirements: []
|
487
488
|
rubygems_version: 3.0.3.1
|
488
|
-
signing_key:
|
489
|
+
signing_key:
|
489
490
|
specification_version: 4
|
490
491
|
summary: Gem containing Elastic connectors service
|
491
492
|
test_files: []
|