gitlab-labkit 1.3.4 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gitlab-labkit.gemspec +5 -1
- data/lib/gitlab-labkit.rb +3 -0
- data/lib/labkit/application_sli/README.md +69 -0
- data/lib/labkit/application_sli/apdex.rb +23 -0
- data/lib/labkit/application_sli/error_rate.rb +23 -0
- data/lib/labkit/application_sli.rb +76 -0
- data/lib/labkit/logging/field_validator.rb +10 -11
- data/lib/labkit/middleware/sidekiq/tracing/client.rb +1 -1
- data/lib/labkit/middleware/sidekiq/tracing/server.rb +1 -1
- data/lib/labkit/tracing/README.md +716 -0
- data/lib/labkit/tracing/abstract_instrumenter.rb +1 -1
- data/lib/labkit/tracing/adapters/base_span.rb +35 -0
- data/lib/labkit/tracing/adapters/base_tracer.rb +39 -0
- data/lib/labkit/tracing/adapters/opentelemetry_span.rb +73 -0
- data/lib/labkit/tracing/adapters/opentelemetry_tracer.rb +102 -0
- data/lib/labkit/tracing/adapters/opentracing_span.rb +70 -0
- data/lib/labkit/tracing/adapters/opentracing_tracer.rb +50 -0
- data/lib/labkit/tracing/auto_initialize.rb +46 -0
- data/lib/labkit/tracing/factory.rb +26 -38
- data/lib/labkit/tracing/grpc/client_interceptor.rb +1 -1
- data/lib/labkit/tracing/grpc/server_interceptor.rb +2 -2
- data/lib/labkit/tracing/jaeger_factory.rb +12 -9
- data/lib/labkit/tracing/open_telemetry_factory.rb +218 -0
- data/lib/labkit/tracing/open_tracing_factory.rb +48 -0
- data/lib/labkit/tracing/rack_middleware.rb +1 -1
- data/lib/labkit/tracing/railtie.rb +15 -0
- data/lib/labkit/tracing/tracing_utils.rb +37 -34
- data/lib/labkit/tracing.rb +108 -5
- data/lib/labkit/user_experience_sli/null.rb +2 -0
- metadata +78 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bf6262472d851bcdfeb96b0ed3b7acf25f462b0b968d2ac2f431ea946b68b33a
|
|
4
|
+
data.tar.gz: b04bfc82c7ea63848849af40c7940788796b486a16c41416de824b7440f01992
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7c5a7b9002239cf23e7609fd8c07174bf588c0f2f1f2177616d6f98f4490c8e32f74cb7c8e62c1bf8ae839da24e30023e9bd733f1a6e40bbf75484d614a3d9ad
|
|
7
|
+
data.tar.gz: e699b4a653c268c7333bdad41006a5c52d0fbdaf0aa4c21ca90bbb396695d6519695243e0548e0f5439f9bd11366c6a3359c013766deadaa6996e31bbbfb7c7a
|
data/gitlab-labkit.gemspec
CHANGED
|
@@ -27,8 +27,11 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_runtime_dependency "grpc", ">= 1.75" # Be sure to update the "grpc-tools" dev_dependency too
|
|
28
28
|
spec.add_runtime_dependency "google-protobuf", ">= 3.25", "< 5.0"
|
|
29
29
|
spec.add_runtime_dependency "jaeger-client", "~> 1.1.0"
|
|
30
|
-
spec.add_runtime_dependency
|
|
30
|
+
spec.add_runtime_dependency "json_schemer", ">= 2.3.0", "< 3.0"
|
|
31
31
|
spec.add_runtime_dependency "openssl", "~> 3.3.2"
|
|
32
|
+
spec.add_runtime_dependency "opentelemetry-sdk", "~> 1.10"
|
|
33
|
+
spec.add_runtime_dependency "opentelemetry-instrumentation-all", "~> 0.89.1"
|
|
34
|
+
spec.add_runtime_dependency "opentelemetry-exporter-otlp", "~> 0.31.1"
|
|
32
35
|
spec.add_runtime_dependency "opentracing", "~> 0.4"
|
|
33
36
|
spec.add_runtime_dependency "pg_query", ">= 6.1.0", "< 7.0"
|
|
34
37
|
spec.add_runtime_dependency "prometheus-client-mmap", ">= 1.2", "< 2.0"
|
|
@@ -46,6 +49,7 @@ Gem::Specification.new do |spec|
|
|
|
46
49
|
spec.add_development_dependency "pry", "~> 0.12"
|
|
47
50
|
spec.add_development_dependency "pry-byebug", "~> 3.11"
|
|
48
51
|
spec.add_development_dependency "rack", "~> 2.0"
|
|
52
|
+
spec.add_development_dependency "railties", ">= 5.0.0", "< 8.1.0"
|
|
49
53
|
spec.add_development_dependency "rake", "~> 13.2"
|
|
50
54
|
spec.add_development_dependency "rest-client", "~> 2.1.0"
|
|
51
55
|
spec.add_development_dependency "rspec", "~> 3.12.0"
|
data/lib/gitlab-labkit.rb
CHANGED
|
@@ -10,6 +10,7 @@ module Labkit
|
|
|
10
10
|
autoload :Context, "labkit/context"
|
|
11
11
|
autoload :Correlation, "labkit/correlation"
|
|
12
12
|
autoload :CoveredExperience, "labkit/user_experience_sli" # Backward compatibility alias
|
|
13
|
+
autoload :ApplicationSli, "labkit/application_sli"
|
|
13
14
|
autoload :UserExperienceSli, "labkit/user_experience_sli"
|
|
14
15
|
autoload :FIPS, "labkit/fips"
|
|
15
16
|
autoload :Tracing, "labkit/tracing"
|
|
@@ -46,4 +47,6 @@ module Labkit
|
|
|
46
47
|
autoload :HTTPClientPublisher, "labkit/httpclient_publisher"
|
|
47
48
|
end
|
|
48
49
|
|
|
50
|
+
Labkit::Tracing::AutoInitialize.initialize! if defined?(Labkit::Tracing)
|
|
51
|
+
|
|
49
52
|
# rubocop:enable Naming/FileName
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Application SLIs
|
|
2
|
+
|
|
3
|
+
This module provides [Application Service Level Indicators(SLIs)](https://docs.gitlab.com/development/application_slis/)
|
|
4
|
+
for monitoring and observability. It allows defining SLIs directly in Ruby code, keeping the definition of operations and their success close to the implementation.
|
|
5
|
+
|
|
6
|
+
Two SLI types are available:
|
|
7
|
+
|
|
8
|
+
- **`Labkit::ApplicationSli::Apdex`** - Measures the performance of successful operations using a success rate.
|
|
9
|
+
- **`Labkit::ApplicationSli::ErrorRate`** - Measures the rate of unsuccessful operations using an error rate.
|
|
10
|
+
|
|
11
|
+
## Defining a new SLI
|
|
12
|
+
|
|
13
|
+
When you define an SLI, two [Prometheus counters](https://prometheus.io/docs/concepts/metric_types/#counter) are emitted. Both contain a total operation count, and a numerator counter for the success or error rate.
|
|
14
|
+
|
|
15
|
+
`Labkit::ApplicationSli::Apdex` defines:
|
|
16
|
+
|
|
17
|
+
- `gitlab_sli_<name>_apdex_total` - total number of measurements
|
|
18
|
+
- `gitlab_sli_<name>_apdex_success_total` - number of successful measurements
|
|
19
|
+
|
|
20
|
+
`Labkit::ApplicationSli::ErrorRate` defines:
|
|
21
|
+
|
|
22
|
+
- `gitlab_sli_<name>_total` - total number of measurements
|
|
23
|
+
- `gitlab_sli_<name>_error_total` - number of error measurements
|
|
24
|
+
|
|
25
|
+
## Initializing an SLI
|
|
26
|
+
|
|
27
|
+
Before the first Prometheus scrape, initialize the SLI with all possible label combinations to [avoid missing metrics](https://prometheus.io/docs/practices/instrumentation/#avoid-missing-metrics):
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
Labkit::ApplicationSli::Apdex.initialize_sli(:received_email, [
|
|
31
|
+
{
|
|
32
|
+
feature_category: :team_planning,
|
|
33
|
+
email_type: :create_issue
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
feature_category: :service_desk,
|
|
37
|
+
email_type: :service_desk
|
|
38
|
+
}
|
|
39
|
+
])
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Tracking operations
|
|
43
|
+
|
|
44
|
+
Increment the SLI counters using the `#increment` method with the appropriate labels.
|
|
45
|
+
|
|
46
|
+
For `Apdex`, pass `success:` to indicate whether the operation met the performance target:
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
Labkit::ApplicationSli::Apdex[:received_email].increment(
|
|
50
|
+
labels: {
|
|
51
|
+
feature_category: :service_desk,
|
|
52
|
+
email_type: :service_desk
|
|
53
|
+
},
|
|
54
|
+
success: issue_created?
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For `ErrorRate`, pass `error:` to indicate whether the operation failed:
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
Labkit::ApplicationSli::ErrorRate[:merge].increment(
|
|
62
|
+
labels: {
|
|
63
|
+
merge_type: :fast_forward
|
|
64
|
+
},
|
|
65
|
+
error: !merge_success?
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
When `success:` (or `error:`) is truthy, both the total and numerator counters are incremented. When falsy, only the total counter is incremented.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Labkit
|
|
4
|
+
module ApplicationSli
|
|
5
|
+
class Apdex
|
|
6
|
+
include Labkit::ApplicationSli
|
|
7
|
+
|
|
8
|
+
def increment(labels:, success:)
|
|
9
|
+
super(labels: labels, increment_numerator: success)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def counter_name(suffix)
|
|
15
|
+
:"#{COUNTER_PREFIX}_#{name}_apdex_#{suffix}"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def numerator_counter
|
|
19
|
+
prometheus.counter(counter_name('success_total'), "Number of successful measurements for #{name}")
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Labkit
|
|
4
|
+
module ApplicationSli
|
|
5
|
+
class ErrorRate
|
|
6
|
+
include Labkit::ApplicationSli
|
|
7
|
+
|
|
8
|
+
def increment(labels:, error:)
|
|
9
|
+
super(labels: labels, increment_numerator: error)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def counter_name(suffix)
|
|
15
|
+
:"#{COUNTER_PREFIX}_#{name}_#{suffix}"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def numerator_counter
|
|
19
|
+
prometheus.counter(counter_name('error_total'), "Number of error measurements for #{name}")
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Labkit
|
|
4
|
+
module ApplicationSli
|
|
5
|
+
COUNTER_PREFIX = 'gitlab_sli'
|
|
6
|
+
|
|
7
|
+
module ClassMethods
|
|
8
|
+
INITIALIZATION_MUTEX = Mutex.new
|
|
9
|
+
|
|
10
|
+
def [](name)
|
|
11
|
+
known_slis[name] || initialize_sli(name, [])
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize_sli(name, possible_label_combinations)
|
|
15
|
+
INITIALIZATION_MUTEX.synchronize do
|
|
16
|
+
next known_slis[name] if initialized?(name)
|
|
17
|
+
|
|
18
|
+
sli = new(name)
|
|
19
|
+
sli.initialize_counters(possible_label_combinations)
|
|
20
|
+
known_slis[name] = sli
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def initialized?(name)
|
|
25
|
+
known_slis.key?(name) && known_slis[name].initialized?
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def known_slis
|
|
31
|
+
@known_slis ||= {}
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.included(mod)
|
|
36
|
+
mod.extend(ClassMethods)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
attr_reader :name
|
|
40
|
+
|
|
41
|
+
def initialize(name)
|
|
42
|
+
@name = name
|
|
43
|
+
@initialized_with_combinations = false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def initialize_counters(possible_label_combinations)
|
|
47
|
+
@initialized_with_combinations = possible_label_combinations.any?
|
|
48
|
+
possible_label_combinations.each do |label_combination|
|
|
49
|
+
total_counter.get(label_combination)
|
|
50
|
+
numerator_counter.get(label_combination)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def increment(labels:, increment_numerator:)
|
|
55
|
+
total_counter.increment(labels)
|
|
56
|
+
numerator_counter.increment(labels) if increment_numerator
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def initialized?
|
|
60
|
+
@initialized_with_combinations
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def total_counter
|
|
66
|
+
prometheus.counter(counter_name('total'), "Total number of measurements for #{name}")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def prometheus
|
|
70
|
+
Labkit::Metrics::Client
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
autoload :Apdex, "labkit/application_sli/apdex"
|
|
74
|
+
autoload :ErrorRate, "labkit/application_sli/error_rate"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
4
|
-
require
|
|
5
|
-
require 'yaml'
|
|
3
|
+
require "json"
|
|
4
|
+
require "yaml"
|
|
6
5
|
|
|
7
|
-
require_relative
|
|
8
|
-
require_relative
|
|
9
|
-
require_relative
|
|
6
|
+
require_relative "field_validator/config"
|
|
7
|
+
require_relative "field_validator/log_interceptor"
|
|
8
|
+
require_relative "field_validator/registry"
|
|
10
9
|
|
|
11
10
|
module Labkit
|
|
12
11
|
module Logging
|
|
@@ -46,14 +45,14 @@ module Labkit
|
|
|
46
45
|
|
|
47
46
|
return if detected_offenses.empty? && new_offenses.empty? && removed_offenses.empty?
|
|
48
47
|
|
|
49
|
-
in_ci = ENV[
|
|
48
|
+
in_ci = ENV["CI"] == "true"
|
|
50
49
|
|
|
51
50
|
output_ndjson(detected_offenses) if in_ci
|
|
52
51
|
|
|
53
52
|
# Auto-remove fixed offenses (not in CI to avoid race conditions)
|
|
54
53
|
handle_removed_offenses(removed_offenses) if removed_offenses.any? && !in_ci
|
|
55
54
|
|
|
56
|
-
if ENV[
|
|
55
|
+
if ENV["LABKIT_LOGGING_TODO_UPDATE"] == "true"
|
|
57
56
|
handle_update(new_offenses)
|
|
58
57
|
elsif new_offenses.any?
|
|
59
58
|
handle_new_offenses(new_offenses)
|
|
@@ -98,7 +97,7 @@ module Labkit
|
|
|
98
97
|
end
|
|
99
98
|
|
|
100
99
|
def handle_new_offenses(new_offenses)
|
|
101
|
-
if ENV[
|
|
100
|
+
if ENV["CI"] == "true" && Config.skip_ci_failure?
|
|
102
101
|
warn baseline_generation_message(new_offenses)
|
|
103
102
|
else
|
|
104
103
|
warn report_new_offenses(new_offenses)
|
|
@@ -120,7 +119,7 @@ module Labkit
|
|
|
120
119
|
"Documentation: https://gitlab.com/gitlab-org/ruby/gems/labkit-ruby/-/blob/master/doc/FIELD_STANDARDIZATION.md",
|
|
121
120
|
"",
|
|
122
121
|
"--- Offenses Summary ---",
|
|
123
|
-
"Total offenses: #{offenses.size} across #{offenses.map { |o| o['callsite'] }.uniq.size} file(s)",
|
|
122
|
+
"Total offenses: #{offenses.size} across #{offenses.map { |o| o['callsite'] }.uniq.size} file(s)", # rubocop:disable Rails/Pluck
|
|
124
123
|
""
|
|
125
124
|
]
|
|
126
125
|
lines.join("\n")
|
|
@@ -157,7 +156,7 @@ module Labkit
|
|
|
157
156
|
|
|
158
157
|
lines << ""
|
|
159
158
|
lines << ("=" * 80)
|
|
160
|
-
lines << "Total: #{new_offenses.size} new offense(s) in #{new_offenses.map { |o| o['callsite'] }.uniq.size} file(s)"
|
|
159
|
+
lines << "Total: #{new_offenses.size} new offense(s) in #{new_offenses.map { |o| o['callsite'] }.uniq.size} file(s)" # rubocop:disable Rails/Pluck
|
|
161
160
|
lines << ""
|
|
162
161
|
lines << "See https://gitlab.com/gitlab-org/ruby/gems/labkit-ruby/-/blob/master/doc/FIELD_STANDARDIZATION.md"
|
|
163
162
|
lines << ("=" * 80)
|
|
@@ -17,7 +17,7 @@ module Labkit
|
|
|
17
17
|
def call(_worker_class, job, _queue, _redis_pool)
|
|
18
18
|
Labkit::Tracing::TracingUtils.with_tracing(operation_name: "sidekiq:#{job_class(job)}", tags: tags_from_job(job, SPAN_KIND)) do |span|
|
|
19
19
|
# Inject the details directly into the job
|
|
20
|
-
Labkit::Tracing::TracingUtils.tracer.
|
|
20
|
+
Labkit::Tracing::TracingUtils.tracer.inject_context(span, job)
|
|
21
21
|
|
|
22
22
|
yield
|
|
23
23
|
end
|
|
@@ -15,7 +15,7 @@ module Labkit
|
|
|
15
15
|
SPAN_KIND = "server"
|
|
16
16
|
|
|
17
17
|
def call(_worker, job, _queue)
|
|
18
|
-
context = Labkit::Tracing::TracingUtils.tracer.
|
|
18
|
+
context = Labkit::Tracing::TracingUtils.tracer.extract_context(job)
|
|
19
19
|
|
|
20
20
|
Labkit::Tracing::TracingUtils.with_tracing(operation_name: "sidekiq:#{job_class(job)}", child_of: context, tags: tags_from_job(job, SPAN_KIND)) { |_span| yield }
|
|
21
21
|
end
|