conductor_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +142 -0
- data/LICENSE +190 -0
- data/README.md +517 -0
- data/examples/agentic_workflows/llm_chat.rb +106 -0
- data/examples/dynamic_workflow.rb +177 -0
- data/examples/event_handler.rb +94 -0
- data/examples/event_listener_examples.rb +430 -0
- data/examples/helloworld/greetings_worker.rb +24 -0
- data/examples/helloworld/helloworld.rb +99 -0
- data/examples/kitchensink.rb +213 -0
- data/examples/metadata_journey.rb +189 -0
- data/examples/metrics_example.rb +284 -0
- data/examples/new_dsl_demo.rb +141 -0
- data/examples/orkes/http_poll.rb +83 -0
- data/examples/orkes/secrets_example.rb +69 -0
- data/examples/orkes/wait_for_webhook.rb +90 -0
- data/examples/prompt_journey.rb +245 -0
- data/examples/rag_workflow.rb +167 -0
- data/examples/schedule_journey.rb +244 -0
- data/examples/simple_worker.rb +125 -0
- data/examples/simple_workflow.rb +89 -0
- data/examples/task_context_example.rb +257 -0
- data/examples/task_listener_example.rb +192 -0
- data/examples/worker_configuration_example.rb +282 -0
- data/examples/workflow_dsl.rb +316 -0
- data/examples/workflow_ops.rb +305 -0
- data/lib/conductor/client/authorization_client.rb +238 -0
- data/lib/conductor/client/integration_client.rb +108 -0
- data/lib/conductor/client/metadata_client.rb +139 -0
- data/lib/conductor/client/prompt_client.rb +58 -0
- data/lib/conductor/client/scheduler_client.rb +132 -0
- data/lib/conductor/client/schema_client.rb +32 -0
- data/lib/conductor/client/secret_client.rb +48 -0
- data/lib/conductor/client/task_client.rb +168 -0
- data/lib/conductor/client/workflow_client.rb +242 -0
- data/lib/conductor/configuration/authentication_settings.rb +17 -0
- data/lib/conductor/configuration.rb +103 -0
- data/lib/conductor/exceptions.rb +86 -0
- data/lib/conductor/http/api/application_resource_api.rb +107 -0
- data/lib/conductor/http/api/authorization_resource_api.rb +56 -0
- data/lib/conductor/http/api/event_resource_api.rb +133 -0
- data/lib/conductor/http/api/gateway_auth_resource_api.rb +48 -0
- data/lib/conductor/http/api/group_resource_api.rb +76 -0
- data/lib/conductor/http/api/integration_resource_api.rb +145 -0
- data/lib/conductor/http/api/metadata_resource_api.rb +231 -0
- data/lib/conductor/http/api/prompt_resource_api.rb +81 -0
- data/lib/conductor/http/api/role_resource_api.rb +60 -0
- data/lib/conductor/http/api/scheduler_resource_api.rb +211 -0
- data/lib/conductor/http/api/schema_resource_api.rb +82 -0
- data/lib/conductor/http/api/secret_resource_api.rb +134 -0
- data/lib/conductor/http/api/task_resource_api.rb +321 -0
- data/lib/conductor/http/api/token_resource_api.rb +42 -0
- data/lib/conductor/http/api/user_resource_api.rb +59 -0
- data/lib/conductor/http/api/workflow_bulk_resource_api.rb +91 -0
- data/lib/conductor/http/api/workflow_resource_api.rb +451 -0
- data/lib/conductor/http/api_client.rb +437 -0
- data/lib/conductor/http/models/authentication_config.rb +67 -0
- data/lib/conductor/http/models/authorization_request.rb +39 -0
- data/lib/conductor/http/models/base_model.rb +162 -0
- data/lib/conductor/http/models/bulk_response.rb +39 -0
- data/lib/conductor/http/models/conductor_application.rb +39 -0
- data/lib/conductor/http/models/conductor_user.rb +53 -0
- data/lib/conductor/http/models/create_or_update_application_request.rb +24 -0
- data/lib/conductor/http/models/create_or_update_role_request.rb +27 -0
- data/lib/conductor/http/models/event_handler.rb +130 -0
- data/lib/conductor/http/models/generate_token_request.rb +27 -0
- data/lib/conductor/http/models/group.rb +36 -0
- data/lib/conductor/http/models/integration.rb +70 -0
- data/lib/conductor/http/models/integration_api.rb +53 -0
- data/lib/conductor/http/models/integration_api_update.rb +43 -0
- data/lib/conductor/http/models/integration_update.rb +36 -0
- data/lib/conductor/http/models/permission.rb +24 -0
- data/lib/conductor/http/models/poll_data.rb +33 -0
- data/lib/conductor/http/models/prompt_template.rb +59 -0
- data/lib/conductor/http/models/prompt_template_test_request.rb +43 -0
- data/lib/conductor/http/models/rerun_workflow_request.rb +37 -0
- data/lib/conductor/http/models/role.rb +27 -0
- data/lib/conductor/http/models/schema_def.rb +59 -0
- data/lib/conductor/http/models/search_result.rb +187 -0
- data/lib/conductor/http/models/skip_task_request.rb +27 -0
- data/lib/conductor/http/models/start_workflow_request.rb +68 -0
- data/lib/conductor/http/models/subject_ref.rb +35 -0
- data/lib/conductor/http/models/tag_object.rb +36 -0
- data/lib/conductor/http/models/target_ref.rb +39 -0
- data/lib/conductor/http/models/task.rb +156 -0
- data/lib/conductor/http/models/task_def.rb +95 -0
- data/lib/conductor/http/models/task_exec_log.rb +30 -0
- data/lib/conductor/http/models/task_result.rb +115 -0
- data/lib/conductor/http/models/task_result_status.rb +24 -0
- data/lib/conductor/http/models/token.rb +33 -0
- data/lib/conductor/http/models/upsert_group_request.rb +30 -0
- data/lib/conductor/http/models/upsert_user_request.rb +39 -0
- data/lib/conductor/http/models/workflow.rb +202 -0
- data/lib/conductor/http/models/workflow_def.rb +73 -0
- data/lib/conductor/http/models/workflow_schedule.rb +100 -0
- data/lib/conductor/http/models/workflow_state_update.rb +30 -0
- data/lib/conductor/http/models/workflow_status_constants.rb +57 -0
- data/lib/conductor/http/models/workflow_task.rb +169 -0
- data/lib/conductor/http/models/workflow_test_request.rb +67 -0
- data/lib/conductor/http/rest_client.rb +211 -0
- data/lib/conductor/orkes/models/access_key.rb +56 -0
- data/lib/conductor/orkes/models/granted_permission.rb +27 -0
- data/lib/conductor/orkes/models/metadata_tag.rb +15 -0
- data/lib/conductor/orkes/models/rate_limit_tag.rb +15 -0
- data/lib/conductor/orkes/orkes_clients.rb +69 -0
- data/lib/conductor/version.rb +5 -0
- data/lib/conductor/worker/events/conductor_event.rb +40 -0
- data/lib/conductor/worker/events/global_dispatcher.rb +37 -0
- data/lib/conductor/worker/events/http_events.rb +25 -0
- data/lib/conductor/worker/events/listener_registry.rb +40 -0
- data/lib/conductor/worker/events/listeners.rb +34 -0
- data/lib/conductor/worker/events/sync_event_dispatcher.rb +78 -0
- data/lib/conductor/worker/events/task_runner_events.rb +271 -0
- data/lib/conductor/worker/events/workflow_events.rb +49 -0
- data/lib/conductor/worker/fiber_executor.rb +532 -0
- data/lib/conductor/worker/ractor_task_runner.rb +501 -0
- data/lib/conductor/worker/task_context.rb +114 -0
- data/lib/conductor/worker/task_definition_registrar.rb +322 -0
- data/lib/conductor/worker/task_handler.rb +360 -0
- data/lib/conductor/worker/task_in_progress.rb +60 -0
- data/lib/conductor/worker/task_runner.rb +538 -0
- data/lib/conductor/worker/telemetry/metrics_collector.rb +196 -0
- data/lib/conductor/worker/telemetry/prometheus_backend.rb +224 -0
- data/lib/conductor/worker/worker.rb +355 -0
- data/lib/conductor/worker/worker_config.rb +154 -0
- data/lib/conductor/worker/worker_registry.rb +71 -0
- data/lib/conductor/workflow/dsl/input_ref.rb +37 -0
- data/lib/conductor/workflow/dsl/output_ref.rb +44 -0
- data/lib/conductor/workflow/dsl/parallel_builder.rb +49 -0
- data/lib/conductor/workflow/dsl/switch_builder.rb +74 -0
- data/lib/conductor/workflow/dsl/task_ref.rb +178 -0
- data/lib/conductor/workflow/dsl/workflow_builder.rb +1016 -0
- data/lib/conductor/workflow/dsl/workflow_definition.rb +150 -0
- data/lib/conductor/workflow/llm/chat_message.rb +47 -0
- data/lib/conductor/workflow/llm/embedding_model.rb +19 -0
- data/lib/conductor/workflow/llm/tool_call.rb +43 -0
- data/lib/conductor/workflow/llm/tool_spec.rb +46 -0
- data/lib/conductor/workflow/task_type.rb +68 -0
- data/lib/conductor/workflow/timeout_policy.rb +31 -0
- data/lib/conductor/workflow/workflow_executor.rb +373 -0
- data/lib/conductor.rb +192 -0
- metadata +359 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require_relative '../events/listeners'
|
|
5
|
+
require_relative '../events/global_dispatcher'
|
|
6
|
+
|
|
7
|
+
module Conductor
|
|
8
|
+
module Worker
|
|
9
|
+
module Telemetry
|
|
10
|
+
# MetricsCollector - Canonical SDK worker metrics from the
|
|
11
|
+
# harmonization spec (sdk-metrics-harmonization.md).
|
|
12
|
+
#
|
|
13
|
+
# Uses camelCase domain labels (taskType, workflowType) and includes
|
|
14
|
+
# status labels on time histograms.
|
|
15
|
+
class MetricsCollector
|
|
16
|
+
include Events::TaskRunnerEventsListener
|
|
17
|
+
include Events::WorkflowEventsListener
|
|
18
|
+
include Events::HttpEventsListener
|
|
19
|
+
|
|
20
|
+
STATUS_SUCCESS = 'SUCCESS'
|
|
21
|
+
STATUS_FAILURE = 'FAILURE'
|
|
22
|
+
|
|
23
|
+
# @param backend [Symbol, Object] :null, :prometheus, or a custom backend
|
|
24
|
+
# @param subscribe_global_http [Boolean] Auto-subscribe to GlobalDispatcher
|
|
25
|
+
# for HttpApiRequest events from the HTTP layer (default true).
|
|
26
|
+
# @param measure_payload_size [Boolean] Record workflow_input_size_bytes
|
|
27
|
+
# (requires JSON serialization; default true). Set false to skip
|
|
28
|
+
# serialization overhead for large payloads.
|
|
29
|
+
# @param logger [Logger, nil] Optional logger for diagnostic output in rescue blocks
|
|
30
|
+
# @return [MetricsCollector]
|
|
31
|
+
def self.create(backend: :null, subscribe_global_http: true, measure_payload_size: true, logger: nil)
|
|
32
|
+
new(backend: backend, subscribe_global_http: subscribe_global_http,
|
|
33
|
+
measure_payload_size: measure_payload_size, logger: logger)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(backend: :null, subscribe_global_http: true, measure_payload_size: true, logger: nil)
|
|
37
|
+
@backend = load_backend(backend)
|
|
38
|
+
@logger = logger || Logger.new(File::NULL)
|
|
39
|
+
@measure_payload_size = measure_payload_size
|
|
40
|
+
@http_listener = nil
|
|
41
|
+
subscribe_to_global_http_events if subscribe_global_http
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
attr_reader :backend, :measure_payload_size
|
|
45
|
+
|
|
46
|
+
def stop
|
|
47
|
+
return unless @http_listener
|
|
48
|
+
|
|
49
|
+
Events::GlobalDispatcher.instance.unregister(Events::HttpApiRequest, @http_listener)
|
|
50
|
+
@http_listener = nil
|
|
51
|
+
rescue StandardError => e
|
|
52
|
+
@logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# --- Task Runner Event Handlers ---
|
|
56
|
+
|
|
57
|
+
def on_poll_started(event)
|
|
58
|
+
@backend.increment('task_poll_total', labels: { taskType: event.task_type })
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def on_poll_completed(event)
|
|
62
|
+
observe_time('task_poll_time_seconds', event.duration_ms,
|
|
63
|
+
{ taskType: event.task_type, status: STATUS_SUCCESS })
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def on_poll_failure(event)
|
|
67
|
+
@backend.increment('task_poll_error_total',
|
|
68
|
+
labels: { taskType: event.task_type, exception: event.cause.class.name })
|
|
69
|
+
observe_time('task_poll_time_seconds', event.duration_ms,
|
|
70
|
+
{ taskType: event.task_type, status: STATUS_FAILURE })
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def on_task_execution_started(event)
|
|
74
|
+
@backend.increment('task_execution_started_total', labels: { taskType: event.task_type })
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def on_task_execution_completed(event)
|
|
78
|
+
observe_time('task_execute_time_seconds', event.duration_ms,
|
|
79
|
+
{ taskType: event.task_type, status: STATUS_SUCCESS })
|
|
80
|
+
|
|
81
|
+
return unless event.output_size_bytes
|
|
82
|
+
|
|
83
|
+
@backend.observe('task_result_size_bytes', event.output_size_bytes,
|
|
84
|
+
labels: { taskType: event.task_type })
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def on_task_execution_failure(event)
|
|
88
|
+
@backend.increment('task_execute_error_total',
|
|
89
|
+
labels: { taskType: event.task_type, exception: event.cause.class.name })
|
|
90
|
+
observe_time('task_execute_time_seconds', event.duration_ms,
|
|
91
|
+
{ taskType: event.task_type, status: STATUS_FAILURE })
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def on_task_update_completed(event)
|
|
95
|
+
observe_time('task_update_time_seconds', event.duration_ms,
|
|
96
|
+
{ taskType: event.task_type, status: STATUS_SUCCESS })
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def on_task_update_failure(event)
|
|
100
|
+
@backend.increment('task_update_error_total',
|
|
101
|
+
labels: { taskType: event.task_type, exception: event.cause.class.name })
|
|
102
|
+
|
|
103
|
+
return unless event.respond_to?(:duration_ms) && event.duration_ms
|
|
104
|
+
|
|
105
|
+
observe_time('task_update_time_seconds', event.duration_ms,
|
|
106
|
+
{ taskType: event.task_type, status: STATUS_FAILURE })
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def on_task_paused(event)
|
|
110
|
+
@backend.increment('task_paused_total', labels: { taskType: event.task_type })
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def on_thread_uncaught_exception(event)
|
|
114
|
+
@backend.increment('thread_uncaught_exceptions_total',
|
|
115
|
+
labels: { exception: event.cause.class.name })
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def on_active_workers_changed(event)
|
|
119
|
+
@backend.set('active_workers', event.count, labels: { taskType: event.task_type })
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# --- Workflow Event Handlers ---
|
|
123
|
+
|
|
124
|
+
def on_workflow_start_error(event)
|
|
125
|
+
@backend.increment('workflow_start_error_total',
|
|
126
|
+
labels: { workflowType: event.workflow_type,
|
|
127
|
+
exception: event.cause.class.name })
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def on_workflow_input_size(event)
|
|
131
|
+
return unless @measure_payload_size
|
|
132
|
+
|
|
133
|
+
@backend.observe('workflow_input_size_bytes', event.size_bytes,
|
|
134
|
+
labels: { workflowType: event.workflow_type,
|
|
135
|
+
version: (event.version || '').to_s })
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# --- HTTP Event Handlers ---
|
|
139
|
+
|
|
140
|
+
def on_http_api_request(event)
|
|
141
|
+
observe_time('http_api_client_request_seconds', event.duration_ms,
|
|
142
|
+
{ method: event.method, uri: event.uri, status: event.status })
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private
|
|
146
|
+
|
|
147
|
+
def observe_time(name, duration_ms, labels)
|
|
148
|
+
@backend.observe(name, duration_ms / 1000.0, labels: labels)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def subscribe_to_global_http_events
|
|
152
|
+
@http_listener = ->(event) { on_http_api_request(event) }
|
|
153
|
+
Events::GlobalDispatcher.instance.register(Events::HttpApiRequest, @http_listener)
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
@logger.debug { "Telemetry error (non-fatal): #{e.class}: #{e.message}" }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def load_backend(backend)
|
|
159
|
+
case backend
|
|
160
|
+
when :null, nil
|
|
161
|
+
NullBackend.new
|
|
162
|
+
when :prometheus
|
|
163
|
+
load_prometheus_backend
|
|
164
|
+
else
|
|
165
|
+
backend
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def load_prometheus_backend
|
|
170
|
+
require_relative 'prometheus_backend'
|
|
171
|
+
PrometheusBackend.new
|
|
172
|
+
rescue LoadError
|
|
173
|
+
raise ConfigurationError,
|
|
174
|
+
"The 'prometheus-client' gem is required for Prometheus metrics. " \
|
|
175
|
+
"Add `gem 'prometheus-client'` to your Gemfile."
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# NullBackend - No-op backend for metrics
|
|
180
|
+
# Used when metrics are disabled or not configured
|
|
181
|
+
class NullBackend
|
|
182
|
+
def increment(name, labels: {})
|
|
183
|
+
# No-op
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def observe(name, value, labels: {})
|
|
187
|
+
# No-op
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def set(name, value, labels: {})
|
|
191
|
+
# No-op
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Conductor
|
|
4
|
+
module Worker
|
|
5
|
+
module Telemetry
|
|
6
|
+
# PrometheusBackend - Prometheus backend for the canonical SDK metric catalog.
|
|
7
|
+
#
|
|
8
|
+
# Pre-registers every metric from the harmonization spec with its canonical
|
|
9
|
+
# label set and bucket configuration. Uses camelCase domain labels (taskType,
|
|
10
|
+
# workflowType) per the canonical convention.
|
|
11
|
+
class PrometheusBackend
|
|
12
|
+
TIME_BUCKETS = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10].freeze
|
|
13
|
+
SIZE_BUCKETS = [100, 1000, 10_000, 100_000, 1_000_000, 10_000_000].freeze
|
|
14
|
+
|
|
15
|
+
COUNTER_LABELS = {
|
|
16
|
+
'task_poll_total' => %i[taskType],
|
|
17
|
+
'task_execution_started_total' => %i[taskType],
|
|
18
|
+
'task_poll_error_total' => %i[taskType exception],
|
|
19
|
+
'task_execute_error_total' => %i[taskType exception],
|
|
20
|
+
'task_update_error_total' => %i[taskType exception],
|
|
21
|
+
'task_paused_total' => %i[taskType],
|
|
22
|
+
'thread_uncaught_exceptions_total' => %i[exception],
|
|
23
|
+
'workflow_start_error_total' => %i[workflowType exception]
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
HISTOGRAM_LABELS = {
|
|
27
|
+
'task_poll_time_seconds' => %i[taskType status],
|
|
28
|
+
'task_execute_time_seconds' => %i[taskType status],
|
|
29
|
+
'task_update_time_seconds' => %i[taskType status],
|
|
30
|
+
'http_api_client_request_seconds' => %i[method uri status],
|
|
31
|
+
'task_result_size_bytes' => %i[taskType],
|
|
32
|
+
'workflow_input_size_bytes' => %i[workflowType version]
|
|
33
|
+
}.freeze
|
|
34
|
+
|
|
35
|
+
GAUGE_LABELS = {
|
|
36
|
+
'active_workers' => %i[taskType]
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
HISTOGRAM_BUCKETS = {
|
|
40
|
+
'task_result_size_bytes' => SIZE_BUCKETS,
|
|
41
|
+
'workflow_input_size_bytes' => SIZE_BUCKETS
|
|
42
|
+
}.freeze
|
|
43
|
+
|
|
44
|
+
def initialize(registry: nil)
|
|
45
|
+
load_prometheus_client
|
|
46
|
+
@registry = registry || Prometheus::Client.registry
|
|
47
|
+
@counters = {}
|
|
48
|
+
@histograms = {}
|
|
49
|
+
@gauges = {}
|
|
50
|
+
setup_metrics
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def increment(name, labels: {}, value: 1)
|
|
54
|
+
metric = get_or_create_counter(name)
|
|
55
|
+
metric.increment(labels: normalize_labels(name, labels, COUNTER_LABELS), by: value)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def observe(name, value, labels: {})
|
|
59
|
+
metric = get_or_create_histogram(name)
|
|
60
|
+
metric.observe(value, labels: normalize_labels(name, labels, HISTOGRAM_LABELS))
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def set(name, value, labels: {})
|
|
64
|
+
metric = get_or_create_gauge(name)
|
|
65
|
+
metric.set(value, labels: normalize_labels(name, labels, GAUGE_LABELS))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
attr_reader :registry
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def load_prometheus_client
|
|
73
|
+
require 'prometheus/client'
|
|
74
|
+
rescue LoadError
|
|
75
|
+
raise ConfigurationError,
|
|
76
|
+
"The 'prometheus-client' gem is required for Prometheus metrics. " \
|
|
77
|
+
"Add `gem 'prometheus-client'` to your Gemfile."
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def setup_metrics
|
|
81
|
+
COUNTER_LABELS.each do |name, _|
|
|
82
|
+
register_counter(name, "Counter for #{name}")
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
HISTOGRAM_LABELS.each do |name, _|
|
|
86
|
+
register_histogram(name, "Histogram for #{name}")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
GAUGE_LABELS.each do |name, _|
|
|
90
|
+
register_gauge(name, "Gauge for #{name}")
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def register_counter(name, docstring)
|
|
95
|
+
metric_name = name.to_sym
|
|
96
|
+
labels = COUNTER_LABELS.fetch(name, %i[taskType])
|
|
97
|
+
@counters[name] = register_or_reuse(metric_name) do
|
|
98
|
+
Prometheus::Client::Counter.new(metric_name, docstring: docstring, labels: labels)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def register_histogram(name, docstring)
|
|
103
|
+
metric_name = name.to_sym
|
|
104
|
+
labels = HISTOGRAM_LABELS.fetch(name, %i[taskType])
|
|
105
|
+
buckets = HISTOGRAM_BUCKETS[name] || TIME_BUCKETS
|
|
106
|
+
@histograms[name] = register_or_reuse(metric_name) do
|
|
107
|
+
Prometheus::Client::Histogram.new(metric_name, docstring: docstring,
|
|
108
|
+
labels: labels, buckets: buckets)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def register_gauge(name, docstring)
|
|
113
|
+
metric_name = name.to_sym
|
|
114
|
+
labels = GAUGE_LABELS.fetch(name, %i[taskType])
|
|
115
|
+
@gauges[name] = register_or_reuse(metric_name) do
|
|
116
|
+
Prometheus::Client::Gauge.new(metric_name, docstring: docstring, labels: labels)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def register_or_reuse(metric_name)
|
|
121
|
+
if @registry.exist?(metric_name)
|
|
122
|
+
@registry.get(metric_name)
|
|
123
|
+
else
|
|
124
|
+
metric = yield
|
|
125
|
+
@registry.register(metric)
|
|
126
|
+
metric
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def get_or_create_counter(name)
|
|
131
|
+
@counters[name] ||= register_or_reuse(name.to_sym) do
|
|
132
|
+
labels = COUNTER_LABELS.fetch(name, %i[taskType])
|
|
133
|
+
Prometheus::Client::Counter.new(name.to_sym, docstring: "Counter for #{name}", labels: labels)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def get_or_create_histogram(name)
|
|
138
|
+
@histograms[name] ||= register_or_reuse(name.to_sym) do
|
|
139
|
+
labels = HISTOGRAM_LABELS.fetch(name, %i[taskType])
|
|
140
|
+
buckets = HISTOGRAM_BUCKETS[name] || TIME_BUCKETS
|
|
141
|
+
Prometheus::Client::Histogram.new(name.to_sym, docstring: "Histogram for #{name}",
|
|
142
|
+
labels: labels, buckets: buckets)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def get_or_create_gauge(name)
|
|
147
|
+
@gauges[name] ||= register_or_reuse(name.to_sym) do
|
|
148
|
+
labels = GAUGE_LABELS.fetch(name, %i[taskType])
|
|
149
|
+
Prometheus::Client::Gauge.new(name.to_sym, docstring: "Gauge for #{name}", labels: labels)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Align provided labels to the declared label set for the metric.
|
|
154
|
+
# Missing keys get empty-string defaults; unknown keys are dropped.
|
|
155
|
+
def normalize_labels(name, labels, schema)
|
|
156
|
+
symbolized = {}
|
|
157
|
+
labels.each do |key, value|
|
|
158
|
+
next if value.nil?
|
|
159
|
+
|
|
160
|
+
symbolized[key.to_sym] = value.to_s
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
declared = schema[name]
|
|
164
|
+
return symbolized unless declared
|
|
165
|
+
|
|
166
|
+
declared.each_with_object({}) do |key, acc|
|
|
167
|
+
acc[key] = symbolized.key?(key) ? symbolized[key] : ''
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# MetricsServer - HTTP server for exposing Prometheus metrics
|
|
173
|
+
# Serves metrics at /metrics endpoint
|
|
174
|
+
class MetricsServer
|
|
175
|
+
DEFAULT_PORT = 9090
|
|
176
|
+
|
|
177
|
+
# @param port [Integer] Port to listen on (default: 9090)
|
|
178
|
+
# @param registry [Prometheus::Client::Registry] Prometheus registry
|
|
179
|
+
def initialize(port: DEFAULT_PORT, registry: nil)
|
|
180
|
+
require 'prometheus/client'
|
|
181
|
+
require 'prometheus/client/formats/text'
|
|
182
|
+
require 'webrick'
|
|
183
|
+
|
|
184
|
+
@port = port
|
|
185
|
+
@registry = registry || Prometheus::Client.registry
|
|
186
|
+
@server = nil
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Start the metrics server in a background thread
|
|
190
|
+
# @return [Thread] Server thread
|
|
191
|
+
def start
|
|
192
|
+
@server = WEBrick::HTTPServer.new(
|
|
193
|
+
Port: @port,
|
|
194
|
+
Logger: WEBrick::Log.new('/dev/null'),
|
|
195
|
+
AccessLog: []
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
@server.mount_proc '/metrics' do |_req, res|
|
|
199
|
+
res.content_type = 'text/plain; version=0.0.4'
|
|
200
|
+
res.body = Prometheus::Client::Formats::Text.marshal(@registry)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
@server.mount_proc '/health' do |_req, res|
|
|
204
|
+
res.content_type = 'application/json'
|
|
205
|
+
res.body = '{"status":"healthy"}'
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
@thread = Thread.new { @server.start }
|
|
209
|
+
@thread.name = 'prometheus-metrics-server'
|
|
210
|
+
@thread
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Stop the metrics server
|
|
214
|
+
def stop
|
|
215
|
+
@server&.shutdown
|
|
216
|
+
@thread&.join(5)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# @return [Integer] Server port
|
|
220
|
+
attr_reader :port
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|