connectors_service 8.6.0.4.pre.20221114T233727Z → 8.6.0.4.pre.20221116T024501Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +4 -4
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/dispatcher.rb +30 -17
  5. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  6. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  7. data/lib/connectors/base/connector.rb +27 -5
  8. data/lib/connectors/example/connector.rb +3 -12
  9. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  10. data/lib/connectors/gitlab/connector.rb +3 -12
  11. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  12. data/lib/connectors/mongodb/connector.rb +9 -24
  13. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  14. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  15. data/lib/connectors/sync_status.rb +6 -1
  16. data/lib/connectors/tolerable_error_helper.rb +43 -0
  17. data/lib/core/connector_job.rb +96 -23
  18. data/lib/core/connector_settings.rb +29 -6
  19. data/lib/core/elastic_connector_actions.rb +77 -55
  20. data/lib/core/filtering/validation_job_runner.rb +1 -1
  21. data/lib/core/ingestion/es_sink.rb +68 -9
  22. data/lib/core/ingestion.rb +0 -1
  23. data/lib/core/jobs/consumer.rb +114 -0
  24. data/lib/core/jobs/producer.rb +26 -0
  25. data/lib/core/single_scheduler.rb +1 -1
  26. data/lib/core/sync_job_runner.rb +20 -12
  27. data/lib/core.rb +2 -0
  28. data/lib/utility/error_monitor.rb +108 -0
  29. data/lib/utility/errors.rb +0 -12
  30. data/lib/utility/logger.rb +0 -1
  31. data/lib/utility.rb +6 -0
  32. metadata +12 -3
  33. data/lib/core/ingestion/ingester.rb +0 -90
@@ -4,7 +4,6 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
- require 'config'
8
7
  require 'logger'
9
8
  require 'active_support/core_ext/module'
10
9
  require 'active_support/core_ext/string/filters'
data/lib/utility.rb CHANGED
@@ -4,6 +4,8 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ # !!!!!!!!
8
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
7
9
  require 'utility/bulk_queue'
8
10
  require 'utility/common'
9
11
  require 'utility/constants'
@@ -11,8 +13,12 @@ require 'utility/cron'
11
13
  require 'utility/elasticsearch/index/mappings'
12
14
  require 'utility/elasticsearch/index/text_analysis_settings'
13
15
  require 'utility/environment'
16
+ require 'utility/error_monitor'
14
17
  require 'utility/errors'
18
+ require 'utility/filtering'
15
19
  require 'utility/es_client'
16
20
  require 'utility/exception_tracking'
17
21
  require 'utility/extension_mapping_util'
18
22
  require 'utility/logger'
23
+ # IF YOU EDIT THIS FILE, YOU MUST EDIT THE `connectors_utility.gemspec`
24
+ # !!!!!!!!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: connectors_service
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.6.0.4.pre.20221114T233727Z
4
+ version: 8.6.0.4.pre.20221116T024501Z
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-14 00:00:00.000000000 Z
11
+ date: 2022-11-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -396,6 +396,8 @@ files:
396
396
  - lib/app/version.rb
397
397
  - lib/connectors.rb
398
398
  - lib/connectors/base/adapter.rb
399
+ - lib/connectors/base/advanced_snippet_against_schema_validator.rb
400
+ - lib/connectors/base/advanced_snippet_validator.rb
399
401
  - lib/connectors/base/connector.rb
400
402
  - lib/connectors/base/custom_client.rb
401
403
  - lib/connectors/base/simple_rules_parser.rb
@@ -405,14 +407,19 @@ files:
405
407
  - lib/connectors/example/attachments/second_attachment.txt
406
408
  - lib/connectors/example/attachments/third_attachment.txt
407
409
  - lib/connectors/example/connector.rb
410
+ - lib/connectors/example/example_advanced_snippet_validator.rb
408
411
  - lib/connectors/gitlab/adapter.rb
409
412
  - lib/connectors/gitlab/connector.rb
410
413
  - lib/connectors/gitlab/custom_client.rb
411
414
  - lib/connectors/gitlab/extractor.rb
415
+ - lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb
412
416
  - lib/connectors/mongodb/connector.rb
417
+ - lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb
418
+ - lib/connectors/mongodb/mongo_advanced_snippet_schema.rb
413
419
  - lib/connectors/mongodb/mongo_rules_parser.rb
414
420
  - lib/connectors/registry.rb
415
421
  - lib/connectors/sync_status.rb
422
+ - lib/connectors/tolerable_error_helper.rb
416
423
  - lib/connectors_service.rb
417
424
  - lib/connectors_utility.rb
418
425
  - lib/core.rb
@@ -429,7 +436,8 @@ files:
429
436
  - lib/core/heartbeat.rb
430
437
  - lib/core/ingestion.rb
431
438
  - lib/core/ingestion/es_sink.rb
432
- - lib/core/ingestion/ingester.rb
439
+ - lib/core/jobs/consumer.rb
440
+ - lib/core/jobs/producer.rb
433
441
  - lib/core/native_scheduler.rb
434
442
  - lib/core/scheduler.rb
435
443
  - lib/core/single_scheduler.rb
@@ -447,6 +455,7 @@ files:
447
455
  - lib/utility/elasticsearch/index/mappings.rb
448
456
  - lib/utility/elasticsearch/index/text_analysis_settings.rb
449
457
  - lib/utility/environment.rb
458
+ - lib/utility/error_monitor.rb
450
459
  - lib/utility/errors.rb
451
460
  - lib/utility/es_client.rb
452
461
  - lib/utility/exception_tracking.rb
@@ -1,90 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'utility/logger'
10
-
11
- module Core
12
- module Ingestion
13
- class Ingester
14
- def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
15
- @sink_strategy = sink_strategy
16
- @max_allowed_document_size = max_allowed_document_size
17
-
18
- @ingested_count = 0
19
- @ingested_volume = 0
20
- @deleted_count = 0
21
- end
22
-
23
- def ingest(document)
24
- unless document&.any?
25
- Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
26
- return
27
- end
28
-
29
- serialized_document = @sink_strategy.serialize(document)
30
- document_size = serialized_document.bytesize
31
-
32
- if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
33
- Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
34
- return
35
- end
36
-
37
- @sink_strategy.ingest(document['id'], serialized_document)
38
-
39
- @ingested_count += 1
40
- @ingested_volume += document_size
41
- end
42
-
43
- def ingest_multiple(documents)
44
- documents.each { |doc| ingest(doc) }
45
- end
46
-
47
- def delete(id)
48
- return if id.nil?
49
-
50
- @sink_strategy.delete(id)
51
-
52
- @deleted_count += 1
53
- end
54
-
55
- def delete_multiple(ids)
56
- ids.each { |id| delete(id) }
57
- end
58
-
59
- def flush
60
- @sink_strategy.flush
61
- end
62
-
63
- def ingestion_stats
64
- {
65
- :indexed_document_count => @ingested_count,
66
- :indexed_document_volume => @ingested_volume,
67
- :deleted_document_count => @deleted_count
68
- }
69
- end
70
-
71
- private
72
-
73
- def do_ingest(_id, _serialized_document)
74
- raise NotImplementedError
75
- end
76
-
77
- def do_delete(_id)
78
- raise NotImplementedError
79
- end
80
-
81
- def do_flush
82
- raise NotImplementedError
83
- end
84
-
85
- def do_serialize(_document)
86
- raise NotImplementedError
87
- end
88
- end
89
- end
90
- end