tasker-engine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (605) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +443 -0
  4. data/Rakefile +10 -0
  5. data/app/controllers/tasker/analytics_controller.rb +179 -0
  6. data/app/controllers/tasker/application_controller.rb +45 -0
  7. data/app/controllers/tasker/graphql_controller.rb +193 -0
  8. data/app/controllers/tasker/handlers_controller.rb +217 -0
  9. data/app/controllers/tasker/health_controller.rb +229 -0
  10. data/app/controllers/tasker/metrics_controller.rb +111 -0
  11. data/app/controllers/tasker/page_sort.rb +97 -0
  12. data/app/controllers/tasker/task_diagrams_controller.rb +30 -0
  13. data/app/controllers/tasker/tasks_controller.rb +123 -0
  14. data/app/controllers/tasker/workflow_steps_controller.rb +69 -0
  15. data/app/graphql/examples/all_tasks.graphql +22 -0
  16. data/app/graphql/examples/pending_tasks.graphql +23 -0
  17. data/app/graphql/tasker/graph_ql_types/annotation_type.rb +14 -0
  18. data/app/graphql/tasker/graph_ql_types/base_argument.rb +9 -0
  19. data/app/graphql/tasker/graph_ql_types/base_connection.rb +11 -0
  20. data/app/graphql/tasker/graph_ql_types/base_edge.rb +10 -0
  21. data/app/graphql/tasker/graph_ql_types/base_enum.rb +9 -0
  22. data/app/graphql/tasker/graph_ql_types/base_field.rb +10 -0
  23. data/app/graphql/tasker/graph_ql_types/base_input_object.rb +10 -0
  24. data/app/graphql/tasker/graph_ql_types/base_interface.rb +14 -0
  25. data/app/graphql/tasker/graph_ql_types/base_object.rb +10 -0
  26. data/app/graphql/tasker/graph_ql_types/base_scalar.rb +9 -0
  27. data/app/graphql/tasker/graph_ql_types/base_union.rb +11 -0
  28. data/app/graphql/tasker/graph_ql_types/dependent_system_object_map_type.rb +18 -0
  29. data/app/graphql/tasker/graph_ql_types/dependent_system_type.rb +13 -0
  30. data/app/graphql/tasker/graph_ql_types/mutation_type.rb +16 -0
  31. data/app/graphql/tasker/graph_ql_types/named_step_type.rb +16 -0
  32. data/app/graphql/tasker/graph_ql_types/named_task_type.rb +14 -0
  33. data/app/graphql/tasker/graph_ql_types/named_tasks_named_step_type.rb +19 -0
  34. data/app/graphql/tasker/graph_ql_types/node_type.rb +12 -0
  35. data/app/graphql/tasker/graph_ql_types/query_type.rb +20 -0
  36. data/app/graphql/tasker/graph_ql_types/task_annotation_type.rb +17 -0
  37. data/app/graphql/tasker/graph_ql_types/task_interface.rb +17 -0
  38. data/app/graphql/tasker/graph_ql_types/task_type.rb +26 -0
  39. data/app/graphql/tasker/graph_ql_types/workflow_step_type.rb +154 -0
  40. data/app/graphql/tasker/graph_ql_types.rb +42 -0
  41. data/app/graphql/tasker/mutations/base_mutation.rb +13 -0
  42. data/app/graphql/tasker/mutations/cancel_step.rb +29 -0
  43. data/app/graphql/tasker/mutations/cancel_task.rb +29 -0
  44. data/app/graphql/tasker/mutations/create_task.rb +52 -0
  45. data/app/graphql/tasker/mutations/update_step.rb +36 -0
  46. data/app/graphql/tasker/mutations/update_task.rb +41 -0
  47. data/app/graphql/tasker/queries/all_annotation_types.rb +17 -0
  48. data/app/graphql/tasker/queries/all_tasks.rb +23 -0
  49. data/app/graphql/tasker/queries/base_query.rb +9 -0
  50. data/app/graphql/tasker/queries/helpers.rb +16 -0
  51. data/app/graphql/tasker/queries/one_step.rb +24 -0
  52. data/app/graphql/tasker/queries/one_task.rb +18 -0
  53. data/app/graphql/tasker/queries/tasks_by_annotation.rb +31 -0
  54. data/app/graphql/tasker/queries/tasks_by_status.rb +30 -0
  55. data/app/graphql/tasker/tasker_rails_schema.rb +52 -0
  56. data/app/jobs/tasker/application_job.rb +8 -0
  57. data/app/jobs/tasker/metrics_export_job.rb +252 -0
  58. data/app/jobs/tasker/task_runner_job.rb +224 -0
  59. data/app/models/tasker/annotation_type.rb +26 -0
  60. data/app/models/tasker/application_record.rb +70 -0
  61. data/app/models/tasker/dependent_system.rb +26 -0
  62. data/app/models/tasker/dependent_system_object_map.rb +64 -0
  63. data/app/models/tasker/diagram/edge.rb +106 -0
  64. data/app/models/tasker/diagram/flowchart.rb +137 -0
  65. data/app/models/tasker/diagram/node.rb +99 -0
  66. data/app/models/tasker/named_step.rb +41 -0
  67. data/app/models/tasker/named_task.rb +121 -0
  68. data/app/models/tasker/named_tasks_named_step.rb +82 -0
  69. data/app/models/tasker/step_dag_relationship.rb +65 -0
  70. data/app/models/tasker/step_readiness_status.rb +59 -0
  71. data/app/models/tasker/task.rb +424 -0
  72. data/app/models/tasker/task_annotation.rb +36 -0
  73. data/app/models/tasker/task_diagram.rb +332 -0
  74. data/app/models/tasker/task_execution_context.rb +29 -0
  75. data/app/models/tasker/task_namespace.rb +41 -0
  76. data/app/models/tasker/task_transition.rb +235 -0
  77. data/app/models/tasker/workflow_step.rb +461 -0
  78. data/app/models/tasker/workflow_step_edge.rb +94 -0
  79. data/app/models/tasker/workflow_step_transition.rb +434 -0
  80. data/app/serializers/tasker/annotation_type_serializer.rb +8 -0
  81. data/app/serializers/tasker/handler_serializer.rb +109 -0
  82. data/app/serializers/tasker/task_annotation_serializer.rb +32 -0
  83. data/app/serializers/tasker/task_serializer.rb +168 -0
  84. data/app/serializers/tasker/workflow_step_serializer.rb +27 -0
  85. data/app/services/tasker/analytics_service.rb +409 -0
  86. data/app/views/tasker/task/_diagram.html.erb +32 -0
  87. data/config/initializers/dry_struct.rb +11 -0
  88. data/config/initializers/statesman.rb +6 -0
  89. data/config/initializers/tasker_orchestration.rb +17 -0
  90. data/config/initializers/time_formats.rb +4 -0
  91. data/config/routes.rb +34 -0
  92. data/config/tasker/subscriptions/example_integrations.yml +67 -0
  93. data/config/tasker/system_events.yml +305 -0
  94. data/db/functions/calculate_dependency_levels_v01.sql +45 -0
  95. data/db/functions/get_analytics_metrics_v01.sql +137 -0
  96. data/db/functions/get_slowest_steps_v01.sql +82 -0
  97. data/db/functions/get_slowest_tasks_v01.sql +96 -0
  98. data/db/functions/get_step_readiness_status_batch_v01.sql +140 -0
  99. data/db/functions/get_step_readiness_status_v01.sql +139 -0
  100. data/db/functions/get_system_health_counts_v01.sql +108 -0
  101. data/db/functions/get_task_execution_context_v01.sql +108 -0
  102. data/db/functions/get_task_execution_contexts_batch_v01.sql +104 -0
  103. data/db/init/schema.sql +2277 -0
  104. data/db/migrate/20250701165431_initial_tasker_schema.rb +116 -0
  105. data/db/views/tasker_step_dag_relationships_v01.sql +69 -0
  106. data/docs/APPLICATION_GENERATOR.md +384 -0
  107. data/docs/AUTH.md +1780 -0
  108. data/docs/CIRCUIT_BREAKER.md +224 -0
  109. data/docs/DEVELOPER_GUIDE.md +2665 -0
  110. data/docs/EVENT_SYSTEM.md +637 -0
  111. data/docs/EXECUTION_CONFIGURATION.md +341 -0
  112. data/docs/FLOW_CHART.md +149 -0
  113. data/docs/HEALTH.md +542 -0
  114. data/docs/METRICS.md +731 -0
  115. data/docs/OPTIMIZATION_PLAN.md +1479 -0
  116. data/docs/OVERVIEW.md +552 -0
  117. data/docs/QUICK_START.md +270 -0
  118. data/docs/REGISTRY_SYSTEMS.md +373 -0
  119. data/docs/REST_API.md +632 -0
  120. data/docs/ROADMAP.md +221 -0
  121. data/docs/SQL_FUNCTIONS.md +1408 -0
  122. data/docs/TASK_DIAGRAM.md +252 -0
  123. data/docs/TASK_EXECUTION_CONTROL_FLOW.md +237 -0
  124. data/docs/TELEMETRY.md +795 -0
  125. data/docs/TROUBLESHOOTING.md +756 -0
  126. data/docs/TaskHandlerGenerator.html +255 -0
  127. data/docs/Tasker/Analysis/RuntimeGraphAnalyzer.html +907 -0
  128. data/docs/Tasker/Analysis/TemplateGraphAnalyzer.html +1236 -0
  129. data/docs/Tasker/Analysis.html +117 -0
  130. data/docs/Tasker/AnalyticsController.html +450 -0
  131. data/docs/Tasker/AnalyticsService/BottleneckAnalytics.html +816 -0
  132. data/docs/Tasker/AnalyticsService/PerformanceAnalytics.html +586 -0
  133. data/docs/Tasker/AnalyticsService.html +2221 -0
  134. data/docs/Tasker/AnnotationType.html +137 -0
  135. data/docs/Tasker/AnnotationTypeSerializer.html +124 -0
  136. data/docs/Tasker/ApplicationController.html +147 -0
  137. data/docs/Tasker/ApplicationJob.html +128 -0
  138. data/docs/Tasker/ApplicationRecord.html +378 -0
  139. data/docs/Tasker/Authentication/AuthenticationError.html +124 -0
  140. data/docs/Tasker/Authentication/ConfigurationError.html +124 -0
  141. data/docs/Tasker/Authentication/Coordinator.html +242 -0
  142. data/docs/Tasker/Authentication/Interface.html +560 -0
  143. data/docs/Tasker/Authentication/InterfaceError.html +124 -0
  144. data/docs/Tasker/Authentication/NoneAuthenticator.html +338 -0
  145. data/docs/Tasker/Authentication.html +119 -0
  146. data/docs/Tasker/Authorization/AuthorizationError.html +139 -0
  147. data/docs/Tasker/Authorization/BaseCoordinator.html +927 -0
  148. data/docs/Tasker/Authorization/ConfigurationError.html +153 -0
  149. data/docs/Tasker/Authorization/ResourceConstants/ACTIONS.html +428 -0
  150. data/docs/Tasker/Authorization/ResourceConstants/RESOURCES.html +365 -0
  151. data/docs/Tasker/Authorization/ResourceConstants.html +146 -0
  152. data/docs/Tasker/Authorization/ResourceRegistry.html +882 -0
  153. data/docs/Tasker/Authorization/UnauthorizedError.html +153 -0
  154. data/docs/Tasker/Authorization.html +582 -0
  155. data/docs/Tasker/CacheCapabilities.html +167 -0
  156. data/docs/Tasker/CacheStrategy.html +1297 -0
  157. data/docs/Tasker/Concerns/Authenticatable.html +116 -0
  158. data/docs/Tasker/Concerns/Authorizable/AdminStatusChecker.html +256 -0
  159. data/docs/Tasker/Concerns/Authorizable.html +816 -0
  160. data/docs/Tasker/Concerns/ControllerAuthorizable.html +157 -0
  161. data/docs/Tasker/Concerns/EventPublisher.html +4023 -0
  162. data/docs/Tasker/Concerns/IdempotentStateTransitions.html +806 -0
  163. data/docs/Tasker/Concerns/LifecycleEventHelpers.html +129 -0
  164. data/docs/Tasker/Concerns/OrchestrationPublisher.html +129 -0
  165. data/docs/Tasker/Concerns/StateMachineBase/ClassMethods.html +1075 -0
  166. data/docs/Tasker/Concerns/StateMachineBase/StateMachineBase/ClassMethods.html +191 -0
  167. data/docs/Tasker/Concerns/StateMachineBase/StateMachineBase.html +126 -0
  168. data/docs/Tasker/Concerns/StateMachineBase.html +153 -0
  169. data/docs/Tasker/Concerns/StructuredLogging.html +1413 -0
  170. data/docs/Tasker/Concerns.html +117 -0
  171. data/docs/Tasker/Configuration/AuthConfiguration.html +1023 -0
  172. data/docs/Tasker/Configuration/ConfigurationProxy.html +581 -0
  173. data/docs/Tasker/Configuration/DatabaseConfiguration.html +475 -0
  174. data/docs/Tasker/Configuration/EngineConfiguration.html +1265 -0
  175. data/docs/Tasker/Configuration/HealthConfiguration.html +791 -0
  176. data/docs/Tasker/Configuration/TelemetryConfiguration.html +1308 -0
  177. data/docs/Tasker/Configuration/TelemetryConfigurationProxy.html +388 -0
  178. data/docs/Tasker/Configuration.html +1669 -0
  179. data/docs/Tasker/ConfigurationError.html +143 -0
  180. data/docs/Tasker/ConfiguredTask.html +514 -0
  181. data/docs/Tasker/Constants/EventDefinitions.html +590 -0
  182. data/docs/Tasker/Constants/LifecycleEvents.html +137 -0
  183. data/docs/Tasker/Constants/ObservabilityEvents/Step.html +152 -0
  184. data/docs/Tasker/Constants/ObservabilityEvents/Task.html +142 -0
  185. data/docs/Tasker/Constants/ObservabilityEvents.html +126 -0
  186. data/docs/Tasker/Constants/RegistryEvents.html +285 -0
  187. data/docs/Tasker/Constants/StepEvents.html +177 -0
  188. data/docs/Tasker/Constants/TaskEvents.html +167 -0
  189. data/docs/Tasker/Constants/TaskExecution/ExecutionStatus.html +207 -0
  190. data/docs/Tasker/Constants/TaskExecution/HealthStatus.html +191 -0
  191. data/docs/Tasker/Constants/TaskExecution/RecommendedAction.html +207 -0
  192. data/docs/Tasker/Constants/TaskExecution.html +126 -0
  193. data/docs/Tasker/Constants/TaskFinalization/ErrorMessages.html +132 -0
  194. data/docs/Tasker/Constants/TaskFinalization/PendingReasons.html +207 -0
  195. data/docs/Tasker/Constants/TaskFinalization/ReenqueueReasons.html +239 -0
  196. data/docs/Tasker/Constants/TaskFinalization.html +126 -0
  197. data/docs/Tasker/Constants/TaskStatuses.html +223 -0
  198. data/docs/Tasker/Constants/TestEvents.html +163 -0
  199. data/docs/Tasker/Constants/WorkflowEvents.html +222 -0
  200. data/docs/Tasker/Constants/WorkflowStepStatuses.html +223 -0
  201. data/docs/Tasker/Constants.html +561 -0
  202. data/docs/Tasker/DependentSystem.html +137 -0
  203. data/docs/Tasker/DependentSystemObjectMap.html +250 -0
  204. data/docs/Tasker/DetectorRegistry.html +598 -0
  205. data/docs/Tasker/Diagram/Edge.html +1191 -0
  206. data/docs/Tasker/Diagram/Flowchart.html +1539 -0
  207. data/docs/Tasker/Diagram/Node.html +1165 -0
  208. data/docs/Tasker/Diagram.html +117 -0
  209. data/docs/Tasker/Engine.html +215 -0
  210. data/docs/Tasker/Error.html +139 -0
  211. data/docs/Tasker/Events/Bus.html +1226 -0
  212. data/docs/Tasker/Events/Catalog/CatalogPrinter.html +258 -0
  213. data/docs/Tasker/Events/Catalog/CustomEventRegistrar.html +276 -0
  214. data/docs/Tasker/Events/Catalog/ExamplePayloadGenerator.html +294 -0
  215. data/docs/Tasker/Events/Catalog.html +1291 -0
  216. data/docs/Tasker/Events/CustomRegistry.html +943 -0
  217. data/docs/Tasker/Events/DefinitionLoader.html +575 -0
  218. data/docs/Tasker/Events/EventPayloadBuilder/ErrorInfoExtractor.html +286 -0
  219. data/docs/Tasker/Events/EventPayloadBuilder/StepPayloadBuilder.html +312 -0
  220. data/docs/Tasker/Events/EventPayloadBuilder.html +664 -0
  221. data/docs/Tasker/Events/Publisher.html +365 -0
  222. data/docs/Tasker/Events/Subscribers/BaseSubscriber/ErrorCategorizer/ErrorTypeClassifier.html +1128 -0
  223. data/docs/Tasker/Events/Subscribers/BaseSubscriber/ErrorCategorizer.html +270 -0
  224. data/docs/Tasker/Events/Subscribers/BaseSubscriber/MetricTagsExtractor.html +266 -0
  225. data/docs/Tasker/Events/Subscribers/BaseSubscriber.html +2556 -0
  226. data/docs/Tasker/Events/Subscribers/MetricsSubscriber.html +723 -0
  227. data/docs/Tasker/Events/Subscribers/TelemetrySubscriber.html +2251 -0
  228. data/docs/Tasker/Events/Subscribers.html +117 -0
  229. data/docs/Tasker/Events/SubscriptionLoader.html +493 -0
  230. data/docs/Tasker/Events.html +294 -0
  231. data/docs/Tasker/EventsGenerator.html +459 -0
  232. data/docs/Tasker/Functions/FunctionBasedAnalyticsMetrics/AnalyticsMetrics.html +135 -0
  233. data/docs/Tasker/Functions/FunctionBasedAnalyticsMetrics.html +412 -0
  234. data/docs/Tasker/Functions/FunctionBasedDependencyLevels.html +598 -0
  235. data/docs/Tasker/Functions/FunctionBasedSlowestSteps/SlowestStep.html +135 -0
  236. data/docs/Tasker/Functions/FunctionBasedSlowestSteps.html +453 -0
  237. data/docs/Tasker/Functions/FunctionBasedSlowestTasks/SlowestTask.html +135 -0
  238. data/docs/Tasker/Functions/FunctionBasedSlowestTasks.html +453 -0
  239. data/docs/Tasker/Functions/FunctionBasedStepReadinessStatus.html +1457 -0
  240. data/docs/Tasker/Functions/FunctionBasedSystemHealthCounts/HealthMetrics.html +135 -0
  241. data/docs/Tasker/Functions/FunctionBasedSystemHealthCounts.html +370 -0
  242. data/docs/Tasker/Functions/FunctionBasedTaskExecutionContext.html +1250 -0
  243. data/docs/Tasker/Functions/FunctionWrapper.html +479 -0
  244. data/docs/Tasker/Functions.html +117 -0
  245. data/docs/Tasker/Generators/AuthenticatorGenerator/UsageInstructionsFormatter.html +244 -0
  246. data/docs/Tasker/Generators/AuthenticatorGenerator.html +373 -0
  247. data/docs/Tasker/Generators/AuthorizationCoordinatorGenerator.html +430 -0
  248. data/docs/Tasker/Generators/SubscriberGenerator.html +377 -0
  249. data/docs/Tasker/Generators/TaskHandlerGenerator.html +263 -0
  250. data/docs/Tasker/Generators.html +117 -0
  251. data/docs/Tasker/GraphQLTypes/AnnotationType.html +132 -0
  252. data/docs/Tasker/GraphQLTypes/BaseArgument.html +124 -0
  253. data/docs/Tasker/GraphQLTypes/BaseConnection.html +124 -0
  254. data/docs/Tasker/GraphQLTypes/BaseEdge.html +130 -0
  255. data/docs/Tasker/GraphQLTypes/BaseEnum.html +124 -0
  256. data/docs/Tasker/GraphQLTypes/BaseField.html +124 -0
  257. data/docs/Tasker/GraphQLTypes/BaseInputObject.html +124 -0
  258. data/docs/Tasker/GraphQLTypes/BaseInterface.html +116 -0
  259. data/docs/Tasker/GraphQLTypes/BaseObject.html +128 -0
  260. data/docs/Tasker/GraphQLTypes/BaseScalar.html +124 -0
  261. data/docs/Tasker/GraphQLTypes/BaseUnion.html +124 -0
  262. data/docs/Tasker/GraphQLTypes/DependentSystemObjectMapType.html +132 -0
  263. data/docs/Tasker/GraphQLTypes/DependentSystemType.html +132 -0
  264. data/docs/Tasker/GraphQLTypes/MutationType.html +132 -0
  265. data/docs/Tasker/GraphQLTypes/NamedStepType.html +132 -0
  266. data/docs/Tasker/GraphQLTypes/NamedTaskType.html +132 -0
  267. data/docs/Tasker/GraphQLTypes/NamedTasksNamedStepType.html +132 -0
  268. data/docs/Tasker/GraphQLTypes/NodeType.html +118 -0
  269. data/docs/Tasker/GraphQLTypes/QueryType.html +139 -0
  270. data/docs/Tasker/GraphQLTypes/TaskAnnotationType.html +132 -0
  271. data/docs/Tasker/GraphQLTypes/TaskInterface.html +111 -0
  272. data/docs/Tasker/GraphQLTypes/TaskType.html +201 -0
  273. data/docs/Tasker/GraphQLTypes/WorkflowStepType.html +694 -0
  274. data/docs/Tasker/GraphQLTypes.html +130 -0
  275. data/docs/Tasker/GraphqlController.html +251 -0
  276. data/docs/Tasker/HandlerFactory.html +1518 -0
  277. data/docs/Tasker/HandlerSerializer.html +682 -0
  278. data/docs/Tasker/HandlersController.html +574 -0
  279. data/docs/Tasker/HashIdentityStrategy.html +278 -0
  280. data/docs/Tasker/Health/ReadinessChecker.html +712 -0
  281. data/docs/Tasker/Health/StatusChecker.html +653 -0
  282. data/docs/Tasker/Health.html +117 -0
  283. data/docs/Tasker/HealthController.html +523 -0
  284. data/docs/Tasker/IdentityStrategy.html +276 -0
  285. data/docs/Tasker/InvalidTaskHandlerConfig.html +135 -0
  286. data/docs/Tasker/LifecycleEvents/Events/Step.html +162 -0
  287. data/docs/Tasker/LifecycleEvents/Events/Task.html +162 -0
  288. data/docs/Tasker/LifecycleEvents/Events.html +204 -0
  289. data/docs/Tasker/LifecycleEvents/Publisher.html +132 -0
  290. data/docs/Tasker/LifecycleEvents.html +799 -0
  291. data/docs/Tasker/Logging/CorrelationIdGenerator.html +688 -0
  292. data/docs/Tasker/Logging.html +115 -0
  293. data/docs/Tasker/MetricsController.html +293 -0
  294. data/docs/Tasker/MetricsExportJob.html +414 -0
  295. data/docs/Tasker/Mutations/BaseMutation.html +128 -0
  296. data/docs/Tasker/Mutations/CancelStep.html +219 -0
  297. data/docs/Tasker/Mutations/CancelTask.html +221 -0
  298. data/docs/Tasker/Mutations/CreateTask.html +243 -0
  299. data/docs/Tasker/Mutations/UpdateStep.html +243 -0
  300. data/docs/Tasker/Mutations/UpdateTask.html +243 -0
  301. data/docs/Tasker/Mutations.html +117 -0
  302. data/docs/Tasker/NamedStep.html +216 -0
  303. data/docs/Tasker/NamedTask.html +910 -0
  304. data/docs/Tasker/NamedTasksNamedStep.html +435 -0
  305. data/docs/Tasker/Orchestration/BackoffCalculator.html +404 -0
  306. data/docs/Tasker/Orchestration/ConnectionBuilder/ConfigValidator.html +258 -0
  307. data/docs/Tasker/Orchestration/ConnectionBuilder.html +435 -0
  308. data/docs/Tasker/Orchestration/ConnectionPoolIntelligence.html +513 -0
  309. data/docs/Tasker/Orchestration/Coordinator.html +641 -0
  310. data/docs/Tasker/Orchestration/FutureStateAnalyzer.html +1045 -0
  311. data/docs/Tasker/Orchestration/Orchestrator.html +679 -0
  312. data/docs/Tasker/Orchestration/PluginIntegration.html +1127 -0
  313. data/docs/Tasker/Orchestration/ResponseProcessor.html +504 -0
  314. data/docs/Tasker/Orchestration/RetryHeaderParser.html +304 -0
  315. data/docs/Tasker/Orchestration/StepExecutor.html +995 -0
  316. data/docs/Tasker/Orchestration/StepSequenceFactory.html +644 -0
  317. data/docs/Tasker/Orchestration/TaskFinalizer/BlockageChecker.html +264 -0
  318. data/docs/Tasker/Orchestration/TaskFinalizer/ContextManager.html +254 -0
  319. data/docs/Tasker/Orchestration/TaskFinalizer/DelayCalculator.html +556 -0
  320. data/docs/Tasker/Orchestration/TaskFinalizer/FinalizationDecisionMaker.html +348 -0
  321. data/docs/Tasker/Orchestration/TaskFinalizer/FinalizationProcessor.html +286 -0
  322. data/docs/Tasker/Orchestration/TaskFinalizer/ReasonDeterminer.html +432 -0
  323. data/docs/Tasker/Orchestration/TaskFinalizer/ReenqueueManager.html +296 -0
  324. data/docs/Tasker/Orchestration/TaskFinalizer/UnclearStateHandler.html +314 -0
  325. data/docs/Tasker/Orchestration/TaskFinalizer.html +1212 -0
  326. data/docs/Tasker/Orchestration/TaskInitializer.html +766 -0
  327. data/docs/Tasker/Orchestration/TaskReenqueuer.html +506 -0
  328. data/docs/Tasker/Orchestration/ViableStepDiscovery.html +442 -0
  329. data/docs/Tasker/Orchestration/WorkflowCoordinator.html +510 -0
  330. data/docs/Tasker/Orchestration.html +130 -0
  331. data/docs/Tasker/PageSort/PageSortParamsBuilder.html +296 -0
  332. data/docs/Tasker/PageSort.html +247 -0
  333. data/docs/Tasker/PermanentError.html +518 -0
  334. data/docs/Tasker/ProceduralError.html +147 -0
  335. data/docs/Tasker/Queries/AllAnnotationTypes.html +217 -0
  336. data/docs/Tasker/Queries/AllTasks.html +221 -0
  337. data/docs/Tasker/Queries/BaseQuery.html +128 -0
  338. data/docs/Tasker/Queries/Helpers.html +187 -0
  339. data/docs/Tasker/Queries/OneStep.html +225 -0
  340. data/docs/Tasker/Queries/OneTask.html +217 -0
  341. data/docs/Tasker/Queries/TasksByAnnotation.html +231 -0
  342. data/docs/Tasker/Queries/TasksByStatus.html +233 -0
  343. data/docs/Tasker/Queries.html +119 -0
  344. data/docs/Tasker/Railtie.html +124 -0
  345. data/docs/Tasker/Registry/BaseRegistry.html +1690 -0
  346. data/docs/Tasker/Registry/EventPublisher.html +667 -0
  347. data/docs/Tasker/Registry/InterfaceValidator.html +569 -0
  348. data/docs/Tasker/Registry/RegistrationError.html +132 -0
  349. data/docs/Tasker/Registry/RegistryError.html +139 -0
  350. data/docs/Tasker/Registry/StatisticsCollector.html +841 -0
  351. data/docs/Tasker/Registry/SubscriberRegistry.html +1504 -0
  352. data/docs/Tasker/Registry/ValidationError.html +132 -0
  353. data/docs/Tasker/Registry.html +119 -0
  354. data/docs/Tasker/RetryableError.html +515 -0
  355. data/docs/Tasker/StateMachine/Compatibility.html +282 -0
  356. data/docs/Tasker/StateMachine/InvalidStateTransition.html +135 -0
  357. data/docs/Tasker/StateMachine/StepStateMachine/StandardizedPayloadBuilder.html +260 -0
  358. data/docs/Tasker/StateMachine/StepStateMachine.html +2215 -0
  359. data/docs/Tasker/StateMachine/TaskStateMachine.html +734 -0
  360. data/docs/Tasker/StateMachine.html +602 -0
  361. data/docs/Tasker/StepDagRelationship.html +657 -0
  362. data/docs/Tasker/StepHandler/Api/Config.html +1091 -0
  363. data/docs/Tasker/StepHandler/Api.html +884 -0
  364. data/docs/Tasker/StepHandler/AutomaticEventPublishing.html +321 -0
  365. data/docs/Tasker/StepHandler/Base.html +970 -0
  366. data/docs/Tasker/StepHandler.html +119 -0
  367. data/docs/Tasker/StepReadinessStatus.html +836 -0
  368. data/docs/Tasker/Task.html +2575 -0
  369. data/docs/Tasker/TaskAnnotation.html +137 -0
  370. data/docs/Tasker/TaskAnnotationSerializer.html +124 -0
  371. data/docs/Tasker/TaskBuilder/StepNameValidator.html +264 -0
  372. data/docs/Tasker/TaskBuilder/StepTemplateDefiner.html +264 -0
  373. data/docs/Tasker/TaskBuilder.html +764 -0
  374. data/docs/Tasker/TaskDiagram/StepToStepEdgeBuilder.html +260 -0
  375. data/docs/Tasker/TaskDiagram/TaskToRootStepEdgeBuilder.html +290 -0
  376. data/docs/Tasker/TaskDiagram.html +548 -0
  377. data/docs/Tasker/TaskDiagramsController.html +240 -0
  378. data/docs/Tasker/TaskExecutionContext.html +469 -0
  379. data/docs/Tasker/TaskHandler/ClassMethods/StepTemplateDefiner/ClassBasedEventRegistrar.html +238 -0
  380. data/docs/Tasker/TaskHandler/ClassMethods/StepTemplateDefiner/YamlEventRegistrar.html +254 -0
  381. data/docs/Tasker/TaskHandler/ClassMethods/StepTemplateDefiner.html +988 -0
  382. data/docs/Tasker/TaskHandler/ClassMethods.html +357 -0
  383. data/docs/Tasker/TaskHandler/InstanceMethods.html +1396 -0
  384. data/docs/Tasker/TaskHandler/StepGroup.html +1748 -0
  385. data/docs/Tasker/TaskHandler.html +271 -0
  386. data/docs/Tasker/TaskNamespace.html +312 -0
  387. data/docs/Tasker/TaskRunnerJob.html +406 -0
  388. data/docs/Tasker/TaskSerializer.html +474 -0
  389. data/docs/Tasker/TaskTransition.html +1517 -0
  390. data/docs/Tasker/TaskWorkflowSummary.html +988 -0
  391. data/docs/Tasker/TaskerRailsSchema/InvalidObjectTypeError.html +132 -0
  392. data/docs/Tasker/TaskerRailsSchema/TypeResolutionError.html +139 -0
  393. data/docs/Tasker/TaskerRailsSchema/UnknownInterfaceError.html +132 -0
  394. data/docs/Tasker/TaskerRailsSchema.html +384 -0
  395. data/docs/Tasker/TasksController.html +595 -0
  396. data/docs/Tasker/Telemetry/EventMapping.html +1307 -0
  397. data/docs/Tasker/Telemetry/EventRouter.html +2178 -0
  398. data/docs/Tasker/Telemetry/Events/ExportEvents.html +246 -0
  399. data/docs/Tasker/Telemetry/Events.html +115 -0
  400. data/docs/Tasker/Telemetry/ExportCoordinator/DistributedLockTimeoutError.html +135 -0
  401. data/docs/Tasker/Telemetry/ExportCoordinator.html +2137 -0
  402. data/docs/Tasker/Telemetry/IntelligentCacheManager.html +1083 -0
  403. data/docs/Tasker/Telemetry/LogBackend.html +1088 -0
  404. data/docs/Tasker/Telemetry/MetricTypes/Counter.html +1054 -0
  405. data/docs/Tasker/Telemetry/MetricTypes/Gauge.html +1270 -0
  406. data/docs/Tasker/Telemetry/MetricTypes/Histogram.html +1492 -0
  407. data/docs/Tasker/Telemetry/MetricTypes.html +153 -0
  408. data/docs/Tasker/Telemetry/MetricsBackend.html +2510 -0
  409. data/docs/Tasker/Telemetry/MetricsExportService.html +578 -0
  410. data/docs/Tasker/Telemetry/PluginRegistry.html +1774 -0
  411. data/docs/Tasker/Telemetry/Plugins/BaseExporter.html +1835 -0
  412. data/docs/Tasker/Telemetry/Plugins/CsvExporter.html +768 -0
  413. data/docs/Tasker/Telemetry/Plugins/JsonExporter.html +747 -0
  414. data/docs/Tasker/Telemetry/Plugins.html +117 -0
  415. data/docs/Tasker/Telemetry/PrometheusExporter.html +481 -0
  416. data/docs/Tasker/Telemetry/TraceBackend.html +891 -0
  417. data/docs/Tasker/Telemetry.html +130 -0
  418. data/docs/Tasker/Types/AuthConfig.html +886 -0
  419. data/docs/Tasker/Types/BackoffConfig.html +1063 -0
  420. data/docs/Tasker/Types/BaseConfig.html +227 -0
  421. data/docs/Tasker/Types/CacheConfig.html +1731 -0
  422. data/docs/Tasker/Types/DatabaseConfig.html +388 -0
  423. data/docs/Tasker/Types/DependencyGraph.html +526 -0
  424. data/docs/Tasker/Types/DependencyGraphConfig.html +753 -0
  425. data/docs/Tasker/Types/EngineConfig.html +1181 -0
  426. data/docs/Tasker/Types/ExecutionConfig.html +1963 -0
  427. data/docs/Tasker/Types/GraphEdge.html +517 -0
  428. data/docs/Tasker/Types/GraphMetadata.html +781 -0
  429. data/docs/Tasker/Types/GraphNode.html +694 -0
  430. data/docs/Tasker/Types/HealthConfig.html +784 -0
  431. data/docs/Tasker/Types/StepSequence.html +353 -0
  432. data/docs/Tasker/Types/StepTemplate.html +1193 -0
  433. data/docs/Tasker/Types/TaskRequest.html +1179 -0
  434. data/docs/Tasker/Types/TelemetryConfig.html +2746 -0
  435. data/docs/Tasker/Types.html +154 -0
  436. data/docs/Tasker/WorkflowStep/StepFinder.html +282 -0
  437. data/docs/Tasker/WorkflowStep.html +2724 -0
  438. data/docs/Tasker/WorkflowStepEdge.html +304 -0
  439. data/docs/Tasker/WorkflowStepSerializer.html +305 -0
  440. data/docs/Tasker/WorkflowStepTransition/TransitionDescriptionFormatter.html +282 -0
  441. data/docs/Tasker/WorkflowStepTransition.html +2201 -0
  442. data/docs/Tasker/WorkflowStepsController.html +462 -0
  443. data/docs/Tasker.html +452 -0
  444. data/docs/VISION.md +584 -0
  445. data/docs/WHY.md +21 -0
  446. data/docs/_index.html +2375 -0
  447. data/docs/class_list.html +54 -0
  448. data/docs/css/common.css +1 -0
  449. data/docs/css/full_list.css +58 -0
  450. data/docs/css/style.css +503 -0
  451. data/docs/events/migration_plan_outcomes.md +80 -0
  452. data/docs/file.README.html +541 -0
  453. data/docs/file_list.html +59 -0
  454. data/docs/frames.html +22 -0
  455. data/docs/index.html +541 -0
  456. data/docs/js/app.js +344 -0
  457. data/docs/js/full_list.js +242 -0
  458. data/docs/js/jquery.js +4 -0
  459. data/docs/method_list.html +9182 -0
  460. data/docs/top-level-namespace.html +110 -0
  461. data/lib/generators/tasker/authenticator_generator.rb +301 -0
  462. data/lib/generators/tasker/authorization_coordinator_generator.rb +139 -0
  463. data/lib/generators/tasker/events_generator.rb +91 -0
  464. data/lib/generators/tasker/subscriber_generator.rb +107 -0
  465. data/lib/generators/tasker/task_handler_generator.rb +138 -0
  466. data/lib/generators/tasker/templates/api_token_authenticator.rb.erb +113 -0
  467. data/lib/generators/tasker/templates/api_token_authenticator_spec.rb.erb +144 -0
  468. data/lib/generators/tasker/templates/authorization_coordinator.rb.erb +95 -0
  469. data/lib/generators/tasker/templates/authorization_coordinator_spec.rb.erb +142 -0
  470. data/lib/generators/tasker/templates/custom_authenticator.rb.erb +108 -0
  471. data/lib/generators/tasker/templates/custom_authenticator_spec.rb.erb +162 -0
  472. data/lib/generators/tasker/templates/custom_events.yml.erb +62 -0
  473. data/lib/generators/tasker/templates/custom_subscriber.rb.erb +72 -0
  474. data/lib/generators/tasker/templates/devise_authenticator.rb.erb +101 -0
  475. data/lib/generators/tasker/templates/devise_authenticator_spec.rb.erb +126 -0
  476. data/lib/generators/tasker/templates/initialize.rb.erb +202 -0
  477. data/lib/generators/tasker/templates/jwt_authenticator.rb.erb +144 -0
  478. data/lib/generators/tasker/templates/jwt_authenticator_spec.rb.erb +298 -0
  479. data/lib/generators/tasker/templates/metrics_subscriber.rb.erb +258 -0
  480. data/lib/generators/tasker/templates/metrics_subscriber_spec.rb.erb +308 -0
  481. data/lib/generators/tasker/templates/omniauth_authenticator.rb.erb +135 -0
  482. data/lib/generators/tasker/templates/omniauth_authenticator_spec.rb.erb +196 -0
  483. data/lib/generators/tasker/templates/opentelemetry_initializer.rb +52 -0
  484. data/lib/generators/tasker/templates/subscriber.rb.erb +64 -0
  485. data/lib/generators/tasker/templates/subscriber_spec.rb.erb +80 -0
  486. data/lib/generators/tasker/templates/task_config.yaml.erb +117 -0
  487. data/lib/generators/tasker/templates/task_handler.rb.erb +59 -0
  488. data/lib/generators/tasker/templates/task_handler_spec.rb.erb +159 -0
  489. data/lib/tasker/analysis/runtime_graph_analyzer.rb +1168 -0
  490. data/lib/tasker/analysis/template_graph_analyzer.rb +328 -0
  491. data/lib/tasker/authentication/coordinator.rb +78 -0
  492. data/lib/tasker/authentication/errors.rb +9 -0
  493. data/lib/tasker/authentication/interface.rb +36 -0
  494. data/lib/tasker/authentication/none_authenticator.rb +26 -0
  495. data/lib/tasker/authorization/base_coordinator.rb +112 -0
  496. data/lib/tasker/authorization/errors.rb +26 -0
  497. data/lib/tasker/authorization/resource_constants.rb +74 -0
  498. data/lib/tasker/authorization/resource_registry.rb +143 -0
  499. data/lib/tasker/authorization.rb +75 -0
  500. data/lib/tasker/cache_capabilities.rb +131 -0
  501. data/lib/tasker/cache_strategy.rb +469 -0
  502. data/lib/tasker/concerns/authenticatable.rb +41 -0
  503. data/lib/tasker/concerns/authorizable.rb +204 -0
  504. data/lib/tasker/concerns/controller_authorizable.rb +124 -0
  505. data/lib/tasker/concerns/event_publisher.rb +716 -0
  506. data/lib/tasker/concerns/idempotent_state_transitions.rb +128 -0
  507. data/lib/tasker/concerns/state_machine_base.rb +218 -0
  508. data/lib/tasker/concerns/structured_logging.rb +387 -0
  509. data/lib/tasker/configuration.rb +325 -0
  510. data/lib/tasker/constants/event_definitions.rb +147 -0
  511. data/lib/tasker/constants/registry_events.rb +54 -0
  512. data/lib/tasker/constants.rb +417 -0
  513. data/lib/tasker/engine.rb +90 -0
  514. data/lib/tasker/errors.rb +90 -0
  515. data/lib/tasker/events/catalog.rb +432 -0
  516. data/lib/tasker/events/custom_registry.rb +175 -0
  517. data/lib/tasker/events/definition_loader.rb +199 -0
  518. data/lib/tasker/events/event_payload_builder.rb +461 -0
  519. data/lib/tasker/events/publisher.rb +149 -0
  520. data/lib/tasker/events/subscribers/base_subscriber.rb +601 -0
  521. data/lib/tasker/events/subscribers/metrics_subscriber.rb +120 -0
  522. data/lib/tasker/events/subscribers/telemetry_subscriber.rb +462 -0
  523. data/lib/tasker/events/subscription_loader.rb +161 -0
  524. data/lib/tasker/events.rb +37 -0
  525. data/lib/tasker/functions/function_based_analytics_metrics.rb +103 -0
  526. data/lib/tasker/functions/function_based_dependency_levels.rb +54 -0
  527. data/lib/tasker/functions/function_based_slowest_steps.rb +84 -0
  528. data/lib/tasker/functions/function_based_slowest_tasks.rb +84 -0
  529. data/lib/tasker/functions/function_based_step_readiness_status.rb +183 -0
  530. data/lib/tasker/functions/function_based_system_health_counts.rb +94 -0
  531. data/lib/tasker/functions/function_based_task_execution_context.rb +148 -0
  532. data/lib/tasker/functions/function_wrapper.rb +42 -0
  533. data/lib/tasker/functions.rb +12 -0
  534. data/lib/tasker/handler_factory.rb +322 -0
  535. data/lib/tasker/health/readiness_checker.rb +186 -0
  536. data/lib/tasker/health/status_checker.rb +203 -0
  537. data/lib/tasker/identity_strategy.rb +38 -0
  538. data/lib/tasker/logging/correlation_id_generator.rb +120 -0
  539. data/lib/tasker/orchestration/backoff_calculator.rb +184 -0
  540. data/lib/tasker/orchestration/connection_builder.rb +122 -0
  541. data/lib/tasker/orchestration/connection_pool_intelligence.rb +177 -0
  542. data/lib/tasker/orchestration/coordinator.rb +119 -0
  543. data/lib/tasker/orchestration/future_state_analyzer.rb +137 -0
  544. data/lib/tasker/orchestration/plugin_integration.rb +124 -0
  545. data/lib/tasker/orchestration/response_processor.rb +168 -0
  546. data/lib/tasker/orchestration/retry_header_parser.rb +78 -0
  547. data/lib/tasker/orchestration/step_executor.rb +941 -0
  548. data/lib/tasker/orchestration/step_sequence_factory.rb +67 -0
  549. data/lib/tasker/orchestration/task_finalizer.rb +564 -0
  550. data/lib/tasker/orchestration/task_initializer.rb +140 -0
  551. data/lib/tasker/orchestration/task_reenqueuer.rb +71 -0
  552. data/lib/tasker/orchestration/viable_step_discovery.rb +65 -0
  553. data/lib/tasker/orchestration/workflow_coordinator.rb +294 -0
  554. data/lib/tasker/orchestration.rb +45 -0
  555. data/lib/tasker/railtie.rb +9 -0
  556. data/lib/tasker/registry/base_registry.rb +177 -0
  557. data/lib/tasker/registry/event_publisher.rb +91 -0
  558. data/lib/tasker/registry/interface_validator.rb +140 -0
  559. data/lib/tasker/registry/statistics_collector.rb +381 -0
  560. data/lib/tasker/registry/subscriber_registry.rb +285 -0
  561. data/lib/tasker/registry.rb +22 -0
  562. data/lib/tasker/state_machine/step_state_machine.rb +508 -0
  563. data/lib/tasker/state_machine/task_state_machine.rb +192 -0
  564. data/lib/tasker/state_machine.rb +83 -0
  565. data/lib/tasker/step_handler/api.rb +410 -0
  566. data/lib/tasker/step_handler/base.rb +206 -0
  567. data/lib/tasker/task_builder.rb +432 -0
  568. data/lib/tasker/task_handler/class_methods.rb +324 -0
  569. data/lib/tasker/task_handler/instance_methods.rb +293 -0
  570. data/lib/tasker/task_handler/step_group.rb +182 -0
  571. data/lib/tasker/task_handler.rb +43 -0
  572. data/lib/tasker/telemetry/event_mapping.rb +126 -0
  573. data/lib/tasker/telemetry/event_router.rb +318 -0
  574. data/lib/tasker/telemetry/events/export_events.rb +38 -0
  575. data/lib/tasker/telemetry/export_coordinator.rb +497 -0
  576. data/lib/tasker/telemetry/intelligent_cache_manager.rb +508 -0
  577. data/lib/tasker/telemetry/log_backend.rb +224 -0
  578. data/lib/tasker/telemetry/metric_types.rb +368 -0
  579. data/lib/tasker/telemetry/metrics_backend.rb +1227 -0
  580. data/lib/tasker/telemetry/metrics_export_service.rb +392 -0
  581. data/lib/tasker/telemetry/plugin_registry.rb +333 -0
  582. data/lib/tasker/telemetry/plugins/base_exporter.rb +246 -0
  583. data/lib/tasker/telemetry/plugins/csv_exporter.rb +198 -0
  584. data/lib/tasker/telemetry/plugins/json_exporter.rb +141 -0
  585. data/lib/tasker/telemetry/prometheus_exporter.rb +249 -0
  586. data/lib/tasker/telemetry/trace_backend.rb +186 -0
  587. data/lib/tasker/telemetry.rb +59 -0
  588. data/lib/tasker/types/auth_config.rb +81 -0
  589. data/lib/tasker/types/backoff_config.rb +142 -0
  590. data/lib/tasker/types/cache_config.rb +257 -0
  591. data/lib/tasker/types/database_config.rb +39 -0
  592. data/lib/tasker/types/dependency_graph.rb +225 -0
  593. data/lib/tasker/types/dependency_graph_config.rb +149 -0
  594. data/lib/tasker/types/engine_config.rb +131 -0
  595. data/lib/tasker/types/execution_config.rb +289 -0
  596. data/lib/tasker/types/health_config.rb +84 -0
  597. data/lib/tasker/types/step_sequence.rb +24 -0
  598. data/lib/tasker/types/step_template.rb +63 -0
  599. data/lib/tasker/types/task_request.rb +60 -0
  600. data/lib/tasker/types/telemetry_config.rb +273 -0
  601. data/lib/tasker/types.rb +64 -0
  602. data/lib/tasker/version.rb +7 -0
  603. data/lib/tasker.rb +82 -0
  604. data/lib/tasks/tasker_tasks.rake +302 -0
  605. metadata +958 -0
data/docs/TELEMETRY.md ADDED
@@ -0,0 +1,795 @@
1
+ # Tasker Telemetry and Observability
2
+
3
+ ## Overview
4
+
5
+ Tasker includes comprehensive telemetry capabilities to provide insights into task execution, workflow steps, and overall system performance. The telemetry system leverages OpenTelemetry standards and a unified event architecture to ensure compatibility with a wide range of observability tools and platforms.
6
+
7
+ ## OpenTelemetry Architecture: Metrics vs Spans
8
+
9
+ Tasker follows OpenTelemetry best practices by using **both** metrics and spans for different purposes:
10
+
11
+ ### **Spans** 🔍 (Primary Focus)
12
+ - **Purpose**: Individual trace records with detailed context for debugging and analysis
13
+ - **Use Cases**:
14
+ - "Why did task #12345 take 30 seconds?"
15
+ - "What was the exact execution path for this failed workflow?"
16
+ - "Which step in the order process is the bottleneck?"
17
+ - **Benefits**:
18
+ - Complete request context and timing
19
+ - Parent-child relationships show workflow hierarchy
20
+ - Error propagation with full stack traces
21
+ - Rich attributes for detailed analysis
22
+ - **Implementation**: `TelemetrySubscriber` creates hierarchical spans for tasks and steps
23
+
24
+ ### **Metrics** 📊 (Derived or Separate)
25
+ - **Purpose**: Aggregated numerical data for dashboards, alerts, and SLIs/SLOs
26
+ - **Use Cases**:
27
+ - "How many tasks completed in the last hour?"
28
+ - "What's the 95th percentile task duration?"
29
+ - "Alert if error rate exceeds 5%"
30
+ - **Benefits**:
31
+ - Very efficient storage and querying
32
+ - Perfect for real-time dashboards
33
+ - Lightweight for high-volume scenarios
34
+ - **Implementation**: Can be derived from span data or collected separately
35
+
36
+ ### **Recommended Strategy**
37
+
38
+ ```mermaid
39
+ flowchart LR
40
+ subgraph Production["Production Architecture"]
41
+ Events["Tasker Events"] --> TelemetrySubscriber["TelemetrySubscriber<br/>(Spans Only)"]
42
+ TelemetrySubscriber --> Jaeger["Jaeger/Zipkin<br/>(Detailed Traces)"]
43
+ TelemetrySubscriber --> |"Derive Metrics"| MetricsBackend["Metrics Backend<br/>(DataDog/Prometheus)"]
44
+
45
+ Events --> CustomSubscribers["Custom Subscribers"]
46
+ CustomSubscribers --> |"Direct Metrics"| MetricsBackend
47
+ CustomSubscribers --> Alerts["Alerting Systems"]
48
+ end
49
+
50
+ style TelemetrySubscriber fill:#e1f5fe,stroke:#01579b
51
+ style CustomSubscribers fill:#f3e5f5,stroke:#4a148c
52
+ ```
53
+
54
+ ## Key Features
55
+
56
+ - **Unified Event System** - Single `Events::Publisher` with consistent event publishing patterns
57
+ - **Standardized Event Payloads** - `EventPayloadBuilder` ensures consistent telemetry data structure
58
+ - **Production-Ready OpenTelemetry Integration** - Full instrumentation stack with safety mechanisms
59
+ - **Hierarchical Span Creation** - Proper parent-child relationships for complex workflows
60
+ - **Automatic Step Error Persistence** - Complete error data capture with atomic transactions
61
+ - **Memory-Safe Operation** - Database connection pooling and leak prevention
62
+ - **Comprehensive Event Lifecycle Tracking** - Task, step, workflow, and orchestration events
63
+ - **Sensitive Data Filtering** - Automatic security and privacy protection
64
+ - **Developer-Friendly API** - Clean `EventPublisher` concern for easy event publishing
65
+ - **Custom Event Subscribers** - Generator and BaseSubscriber for creating integrations
66
+ - **Event Discovery System** - Complete event catalog with documentation and examples
67
+
68
+ ## Architecture
69
+
70
+ Tasker's telemetry is built on a unified event system with these main components:
71
+
72
+ 1. **Events::Publisher** - Centralized event publishing using dry-events with OpenTelemetry integration
73
+ 2. **EventPublisher Concern** - Clean interface providing `publish_event()`, `publish_step_event()`, etc.
74
+ 3. **EventPayloadBuilder** - Standardized payload creation for consistent telemetry data
75
+ 4. **TelemetrySubscriber** - Converts events to OpenTelemetry spans (spans only, no metrics)
76
+ 5. **Event Catalog** - Complete event discovery and documentation system
77
+ 6. **BaseSubscriber** - Foundation for creating custom event subscribers
78
+ 7. **Subscriber Generator** - Tool for creating custom integrations with external services
79
+ 8. **Configuration** - OpenTelemetry setup with production-ready safety mechanisms
80
+
81
+ ### Event Flow
82
+
83
+ ```mermaid
84
+ flowchart LR
85
+ subgraph Business["Business Logic"]
86
+ TaskHandler["Task Handler"]
87
+ StepLogic["Step Logic"]
88
+ Orchestration["Orchestration"]
89
+ StateMachine["State Machines"]
90
+ end
91
+
92
+ subgraph EventSystem["Unified Event System"]
93
+ Publisher["Events::Publisher"]
94
+ PayloadBuilder["EventPayloadBuilder"]
95
+ TelemetrySubscriber["TelemetrySubscriber<br/>(Spans)"]
96
+ end
97
+
98
+ subgraph Observability["Observability Stack"]
99
+ OpenTelemetry["OpenTelemetry"]
100
+ Jaeger["Jaeger"]
101
+ Zipkin["Zipkin"]
102
+ Honeycomb["Honeycomb"]
103
+ end
104
+
105
+ subgraph CustomMetrics["Custom Metrics"]
106
+ MetricsSubscriber["MetricsSubscriber"]
107
+ DataDog["DataDog"]
108
+ Prometheus["Prometheus"]
109
+ end
110
+
111
+ TaskHandler -->|publish_step_completed| Publisher
112
+ StepLogic -->|publish_step_failed| Publisher
113
+ Orchestration -->|publish_workflow_event| Publisher
114
+ StateMachine -->|publish_task_event| Publisher
115
+
116
+ Publisher --> PayloadBuilder
117
+ PayloadBuilder --> TelemetrySubscriber
118
+ PayloadBuilder --> MetricsSubscriber
119
+
120
+ TelemetrySubscriber --> OpenTelemetry
121
+ OpenTelemetry --> Jaeger
122
+ OpenTelemetry --> Zipkin
123
+ OpenTelemetry --> Honeycomb
124
+
125
+ MetricsSubscriber --> DataDog
126
+ MetricsSubscriber --> Prometheus
127
+
128
+ classDef business fill:#d4f1f9,stroke:#0b79a8
129
+ classDef events fill:#fff2cc,stroke:#d6b656
130
+ classDef observability fill:#e1d5e7,stroke:#9673a6
131
+ classDef metrics fill:#dae8fc,stroke:#6c8ebf
132
+
133
+ class Business business
134
+ class EventSystem events
135
+ class Observability observability
136
+ class CustomMetrics metrics
137
+ ```
138
+
139
+ ## Two Complementary Observability Systems
140
+
141
+ Tasker provides **two distinct but complementary observability systems** designed for different use cases:
142
+
143
+ ### 🔍 **TelemetrySubscriber (Event-Driven Spans)**
144
+ - **Purpose**: Detailed tracing and debugging with OpenTelemetry spans
145
+ - **Trigger**: Automatic via event subscription (no manual instrumentation needed)
146
+ - **Use Cases**:
147
+ - "Why did task #12345 fail?"
148
+ - "What's the execution path through this workflow?"
149
+ - "Which step is causing the bottleneck?"
150
+ - **Data**: Rich contextual information, hierarchical relationships, error details
151
+ - **Storage**: OpenTelemetry backends (Jaeger, Zipkin, Honeycomb)
152
+ - **Performance**: Optimized for detailed context, not high-volume aggregation
153
+
154
+ ### 📊 **MetricsBackend (Native Metrics Collection)**
155
+ - **Purpose**: High-performance aggregated metrics for dashboards and alerting
156
+ - **Trigger**: Direct collection during workflow execution (no events)
157
+ - **Use Cases**:
158
+ - "How many tasks completed in the last hour?"
159
+ - "What's the 95th percentile execution time?"
160
+ - "Alert if error rate exceeds 5%"
161
+ - **Data**: Numerical counters, gauges, histograms with labels
162
+ - **Storage**: Prometheus, JSON, CSV exports
163
+ - **Performance**: Optimized for high-volume, low-latency operations
164
+
165
+ ### Why Two Systems?
166
+
167
+ **Performance**: Native metrics avoid event publishing overhead for high-frequency operations
168
+ **Reliability**: Metrics collection continues even if event system has issues
169
+ **Flexibility**: Choose appropriate storage backend for each use case
170
+ **Scalability**: Each system optimized for its specific workload
171
+
172
+ ### ✅ **TelemetrySubscriber** - Spans Only (Event-Driven)
173
+ ```ruby
174
+ class TelemetrySubscriber < BaseSubscriber
175
+ # ONLY creates OpenTelemetry spans with hierarchical context
176
+ # Triggered automatically by Tasker's event system
177
+ # Does NOT record metrics - focuses on detailed tracing
178
+
179
+ def handle_task_completed(event)
180
+ # Create comprehensive span with all context
181
+ finish_task_span(event, :ok, extract_attributes(event))
182
+ end
183
+ end
184
+ ```
185
+
186
+ ### ✅ **MetricsBackend** - Native Collection (Direct)
187
+ ```ruby
188
+ class MetricsBackend
189
+ # ONLY records aggregated metrics for dashboards/alerts
190
+ # Called directly during workflow execution
191
+ # Does NOT create spans - focuses on operational data
192
+
193
+ def record_task_completion(task_name:, duration:, status:)
194
+ # Record metrics for dashboards and alerting
195
+ counter("tasker.task.executions", status: status, task_name: task_name)
196
+ histogram("tasker.task.duration", value: duration, task_name: task_name)
197
+ end
198
+ end
199
+ ```
200
+
201
+ ### ✅ **Clean Architecture** - Complementary Systems
202
+ ```ruby
203
+ # config/initializers/tasker.rb
204
+ Tasker.configuration do |config|
205
+ config.telemetry do |tel|
206
+ # Enable OpenTelemetry spans (event-driven)
207
+ tel.enabled = true
208
+
209
+ # Enable native metrics collection (direct)
210
+ tel.metrics_enabled = true
211
+ tel.metrics_format = 'prometheus'
212
+ end
213
+ end
214
+ ```
215
+
216
+ See [METRICS.md](METRICS.md) for comprehensive details on the native metrics system including cache strategies, Kubernetes integration, and production deployment patterns.
217
+
218
+ ## Configuration
219
+
220
+ ### Tasker Configuration
221
+
222
+ Configure Tasker's telemetry in `config/initializers/tasker.rb`:
223
+
224
+ ```ruby
225
+ Tasker.configuration do |config|
226
+ config.telemetry do |tel|
227
+ # Enable telemetry (TelemetrySubscriber for OpenTelemetry spans)
228
+ tel.enabled = true
229
+
230
+ # Service name used for OpenTelemetry traces (default: 'tasker')
231
+ tel.service_name = 'my_app_tasker'
232
+
233
+ # Service version used for OpenTelemetry traces (default: Tasker::VERSION)
234
+ tel.service_version = '1.2.3'
235
+
236
+ # Parameters to filter from telemetry data for privacy and security
237
+ tel.filter_parameters = [:password, :api_key, 'credit_card.number', /token/i]
238
+
239
+ # The mask to use when filtering sensitive data (default: '[FILTERED]')
240
+ tel.filter_mask = '***REDACTED***'
241
+ end
242
+ end
243
+ ```
244
+
245
+ ### OpenTelemetry Configuration
246
+
247
+ Configure OpenTelemetry with production-ready settings in `config/initializers/opentelemetry.rb`:
248
+
249
+ ```ruby
250
+ require 'opentelemetry/sdk'
251
+ require 'opentelemetry-exporter-otlp'
252
+ require 'opentelemetry/instrumentation/all'
253
+
254
+ # Configure OpenTelemetry
255
+ OpenTelemetry::SDK.configure do |c|
256
+ # Use the configured service name
257
+ c.service_name = Tasker.configuration.telemetry.service_name
258
+
259
+ # Configure OTLP exporter to send to local Jaeger
260
+ otlp_exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
261
+ endpoint: 'http://localhost:4318/v1/traces'
262
+ )
263
+
264
+ # Add the OTLP exporter
265
+ c.add_span_processor(
266
+ OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(otlp_exporter)
267
+ )
268
+
269
+ # Configure resource with additional attributes
270
+ c.resource = OpenTelemetry::SDK::Resources::Resource.create({
271
+ 'service.name' => Tasker.configuration.telemetry.service_name,
272
+ 'service.version' => Tasker.configuration.telemetry.service_version,
273
+ 'service.framework' => 'tasker'
274
+ })
275
+
276
+ # Production-ready instrumentation configuration
277
+ # Excludes Faraday instrumentation due to known bug (see generator template for details)
278
+ c.use_all({ 'OpenTelemetry::Instrumentation::Faraday' => { enabled: false } })
279
+ end
280
+ ```
281
+
282
+ ## Custom Telemetry Integrations
283
+
284
+ Beyond OpenTelemetry, Tasker's event system enables easy integration with any observability or monitoring service:
285
+
286
+ ### Creating Custom Metrics Subscribers
287
+
288
+ Tasker now provides specialized tooling for metrics collection:
289
+
290
+ ```bash
291
+ # Generate a specialized metrics subscriber with helper methods
292
+ rails generate tasker:subscriber metrics --metrics --events task.completed task.failed step.completed step.failed
293
+
294
+ # Generate a regular subscriber for other integrations
295
+ rails generate tasker:subscriber pager_duty --events task.failed step.failed
296
+ ```
297
+
298
+ The `--metrics` flag creates a specialized subscriber with:
299
+ - Built-in helper methods for extracting timing, error, and performance metrics
300
+ - Automatic tag generation for categorization
301
+ - Examples for StatsD, DataDog, Prometheus, and other metrics systems
302
+ - Safe numeric value extraction with defaults
303
+ - Production-ready patterns for operational monitoring
304
+
305
+ ### Creating Custom Subscribers
306
+
307
+ For non-metrics integrations, use the regular subscriber generator:
308
+
309
+ ### Example Custom Integrations
310
+
311
+ **Metrics Collection (Using Helper Methods)**:
312
+ ```ruby
313
+ class MetricsSubscriber < Tasker::Events::Subscribers::BaseSubscriber
314
+ subscribe_to 'task.completed', 'step.completed'
315
+
316
+ def handle_task_completed(event)
317
+ # Use built-in helper methods for standardized data extraction
318
+ timing = extract_timing_metrics(event)
319
+ tags = extract_metric_tags(event)
320
+
321
+ # Record task completion metrics using helpers
322
+ record_histogram('tasker.task.duration', timing[:execution_duration], tags)
323
+ record_counter('tasker.task.completed', 1, tags)
324
+
325
+ if timing[:step_count] > 0
326
+ record_gauge('tasker.workflow.step_count', timing[:step_count], tags)
327
+ end
328
+ end
329
+
330
+ def handle_step_completed(event)
331
+ timing = extract_timing_metrics(event)
332
+ tags = extract_metric_tags(event)
333
+
334
+ # Record step-level metrics
335
+ record_histogram('tasker.step.duration', timing[:execution_duration], tags)
336
+ end
337
+
338
+ private
339
+
340
+ # Customize for your metrics backend (StatsD example)
341
+ def record_histogram(name, value, tags = [])
342
+ StatsD.histogram(name, value, tags: tags)
343
+ end
344
+
345
+ def record_counter(name, value, tags = [])
346
+ StatsD.increment(name, value, tags: tags)
347
+ end
348
+
349
+ def record_gauge(name, value, tags = [])
350
+ StatsD.gauge(name, value, tags: tags)
351
+ end
352
+ end
353
+ ```
354
+
355
+ **Error Tracking (Sentry)**:
356
+ ```ruby
357
+ class SentrySubscriber < Tasker::Events::Subscribers::BaseSubscriber
358
+ subscribe_to 'task.failed', 'step.failed'
359
+
360
+ def handle_task_failed(event)
361
+ task_id = safe_get(event, :task_id)
362
+ error_message = safe_get(event, :error_message, 'Unknown error')
363
+
364
+ Sentry.capture_message(error_message, level: 'error', fingerprint: ['tasker', 'task_failed', task_id])
365
+ end
366
+ end
367
+ ```
368
+
369
+ ### Metrics Helper Methods
370
+
371
+ BaseSubscriber now includes specialized helper methods for extracting common metrics data:
372
+
373
+ ```ruby
374
+ # Extract timing metrics (duration, step counts, etc.)
375
+ timing = extract_timing_metrics(event)
376
+ # => { execution_duration: 45.2, step_count: 5, completed_steps: 5, failed_steps: 0 }
377
+
378
+ # Extract error metrics with categorization
379
+ error = extract_error_metrics(event)
380
+ # => { error_type: 'timeout', attempt_number: 2, is_retryable: true, final_failure: false }
381
+
382
+ # Extract performance metrics
383
+ perf = extract_performance_metrics(event)
384
+ # => { memory_usage: 1024, cpu_time: 2.5, queue_time: 0.1, processing_time: 1.8 }
385
+
386
+ # Generate standardized tags for categorization
387
+ tags = extract_metric_tags(event)
388
+ # => ['task:order_process', 'environment:production', 'retryable:true']
389
+
390
+ # Build consistent metric names
391
+ metric_name = build_metric_name('tasker.task', 'completed')
392
+ # => 'tasker.task.completed'
393
+
394
+ # Extract numeric values safely
395
+ duration = extract_numeric_metric(event, :execution_duration, 0.0)
396
+ # => 45.2 (with proper type conversion and defaults)
397
+ ```
398
+
399
+ These helpers standardize metrics extraction and ensure consistency across different subscriber implementations.
400
+
401
+ For complete documentation on creating custom subscribers and integration examples, see [EVENT_SYSTEM.md](EVENT_SYSTEM.md).
402
+
403
+ ## Integration with OpenTelemetry
404
+
405
+ Tasker's unified event system automatically integrates with OpenTelemetry through the enhanced `TelemetrySubscriber`. For each task:
406
+
407
+ 1. **Root Task Span**: A root span (`tasker.task.execution`) is created when the task starts and stored for the entire task lifecycle
408
+ 2. **Child Step Spans**: Child spans (`tasker.step.execution`) are created for each step with proper parent-child relationships to the root task span
409
+ 3. **Hierarchical Context**: All spans maintain proper parent-child relationships, ensuring full traceability in Jaeger/Zipkin
410
+ 4. **Event Annotations**: Each span includes relevant events (task.started, step.completed, etc.) with comprehensive attributes
411
+ 5. **Error Propagation**: Error status and messages are properly propagated through the span hierarchy
412
+ 6. **Performance Metrics**: Execution duration and attempt tracking are captured at both task and step levels
413
+
414
+ ### Span Hierarchy Example
415
+
416
+ ```
417
+ tasker.task.execution (task_id: 123, task_name: order_process)
418
+ ├── events: [task.started, task.completed]
419
+ ├── attributes: { tasker.task_id: "123", tasker.task_name: "order_process", tasker.total_steps: 5 }
420
+ ├── status: OK
421
+ └── child spans:
422
+ ├── tasker.step.execution (step: fetch_cart)
423
+ │ ├── events: [step.completed]
424
+ │ ├── attributes: { tasker.task_id: "123", tasker.step_id: "456", tasker.step_name: "fetch_cart", tasker.execution_duration: "1.23" }
425
+ │ └── status: OK
426
+ ├── tasker.step.execution (step: validate_products)
427
+ │ ├── events: [step.completed]
428
+ │ ├── attributes: { tasker.task_id: "123", tasker.step_id: "457", tasker.step_name: "validate_products", tasker.execution_duration: "2.34" }
429
+ │ └── status: OK
430
+ └── tasker.step.execution (step: process_payment)
431
+ ├── events: [step.failed]
432
+ ├── attributes: { tasker.task_id: "123", tasker.step_id: "458", tasker.step_name: "process_payment", tasker.error: "Payment gateway timeout" }
433
+ └── status: ERROR
434
+ ```
435
+
436
+ ### Key Improvements
437
+
438
+ - **Proper Hierarchical Context**: All step spans are now properly parented to their task span
439
+ - **Consistent Span Names**: Standardized span names (`tasker.task.execution`, `tasker.step.execution`) make filtering and querying easier
440
+ - **Rich Event Annotations**: Spans include relevant lifecycle events as annotations for detailed timeline visibility
441
+ - **Error Context Preservation**: Failed steps maintain full error context while still being linked to their parent task
442
+ - **Task ID Propagation**: All spans include the task_id for easy correlation across the entire workflow
443
+
444
+ ## Best Practices
445
+
446
+ ### 1. Single Responsibility for Telemetry Components
447
+
448
+ ```ruby
449
+ # ✅ GOOD: TelemetrySubscriber focuses only on spans
450
+ class TelemetrySubscriber < BaseSubscriber
451
+ def handle_task_completed(event)
452
+ # Only create detailed spans for debugging
453
+ finish_task_span(event, :ok, extract_attributes(event))
454
+ end
455
+ end
456
+
457
+ # ✅ GOOD: MetricsSubscriber focuses only on metrics
458
+ class MetricsSubscriber < BaseSubscriber
459
+ def handle_task_completed(event)
460
+ # Only record metrics for dashboards/alerts
461
+ StatsD.histogram('task.duration', safe_get(event, :execution_duration, 0))
462
+ end
463
+ end
464
+
465
+ # ❌ BAD: Don't mix both in one subscriber
466
+ class MixedSubscriber < BaseSubscriber
467
+ def handle_task_completed(event)
468
+ finish_task_span(event, :ok, extract_attributes(event)) # Spans
469
+ StatsD.histogram('task.duration', event[:duration]) # Metrics - causes confusion
470
+ end
471
+ end
472
+ ```
473
+
474
+ ### 2. Avoid Duplication Between Systems
475
+
476
+ ```ruby
477
+ # ✅ GOOD: Simple, clean telemetry configuration
478
+ config.telemetry.enabled = true # TelemetrySubscriber creates OpenTelemetry spans
479
+
480
+ # ✅ GOOD: Create separate subscribers for different purposes
481
+ # - TelemetrySubscriber: OpenTelemetry spans for debugging
482
+ # - MetricsSubscriber: Operational metrics for dashboards
483
+ # - AlertingSubscriber: Critical alerts for incidents
484
+ ```
485
+
486
+ ### 3. Use Spans for Debugging, Metrics for Operations
487
+
488
+ ```ruby
489
+ # ✅ Spans: "Why did task #12345 fail?"
490
+ # Use when you need detailed context for specific instances
491
+
492
+ # ✅ Metrics: "How many tasks are failing per hour?"
493
+ # Use when you need aggregated data for dashboards/alerts
494
+ ```
495
+
496
+ ### 4. Production Sampling Strategy
497
+
498
+ ```ruby
499
+ # Consider span sampling for high-volume production environments
500
+ OpenTelemetry::SDK.configure do |c|
501
+ # Sample 10% of traces to reduce storage costs while maintaining observability
502
+ c.add_span_processor(
503
+ OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
504
+ otlp_exporter,
505
+ schedule_delay: 5_000, # 5 seconds
506
+ max_queue_size: 2_048, # Queue size
507
+ max_export_batch_size: 512 # Batch size
508
+ )
509
+ )
510
+
511
+ # Use probabilistic sampler for production
512
+ c.use('OpenTelemetry::SDK::Trace::Samplers::ProbabilitySampler', 0.1) # 10% sampling
513
+ end
514
+
515
+ # Keep 100% of metrics - they're much cheaper to store
516
+ ```
517
+
518
+ ## Event Payload Standardization
519
+
520
+ The `EventPayloadBuilder` ensures all events have consistent, comprehensive payloads:
521
+
522
+ ### Step Event Payloads
523
+
524
+ ```ruby
525
+ {
526
+ # Core identifiers (always present)
527
+ task_id: "task_123",
528
+ step_id: "step_456",
529
+ step_name: "fetch_cart",
530
+
531
+ # Timing information
532
+ started_at: "2025-06-01T12:00:00Z",
533
+ completed_at: "2025-06-01T12:00:02Z",
534
+ execution_duration: 2.34,
535
+
536
+ # Retry and attempt tracking
537
+ attempt_number: 1,
538
+ retry_limit: 3,
539
+
540
+ # Event metadata
541
+ event_type: "completed",
542
+ timestamp: "2025-06-01T12:00:02Z"
543
+ }
544
+ ```
545
+
546
+ ### Task Event Payloads
547
+
548
+ ```ruby
549
+ {
550
+ # Core identifiers
551
+ task_id: "task_123",
552
+ task_name: "order_processing",
553
+
554
+ # Timing information
555
+ started_at: "2025-06-01T12:00:00Z",
556
+ completed_at: "2025-06-01T12:05:30Z",
557
+
558
+ # Task statistics (from optimized queries)
559
+ total_steps: 5,
560
+ completed_steps: 5,
561
+ failed_steps: 0,
562
+
563
+ # Event metadata
564
+ event_type: "completed",
565
+ timestamp: "2025-06-01T12:05:30Z"
566
+ }
567
+ ```
568
+
569
+ ## Developing with Telemetry
570
+
571
+ ### Using EventPublisher Concern
572
+
573
+ When implementing custom task handlers, events are **automatically published** around your business logic:
574
+
575
+ ```ruby
576
+ class MyCustomStepHandler < Tasker::StepHandler::Base
577
+ def process(task, sequence, step)
578
+ # Events are published automatically around this method:
579
+ # 1. publish_step_started(step) - fired before this method
580
+ # 2. publish_step_completed(step) - fired after successful completion
581
+ # 3. publish_step_failed(step, error: exception) - fired if exception occurs
582
+
583
+ # Just implement your business logic and return the results:
584
+ result = perform_complex_operation(task.context)
585
+ { success: true, data: result }
586
+
587
+ # No need to manually publish events or set step.results - they happen automatically!
588
+ end
589
+ end
590
+ ```
591
+
592
+ #### For API Step Handlers
593
+
594
+ API step handlers follow the same automatic event publishing pattern:
595
+
596
+ ```ruby
597
+ class MyApiStepHandler < Tasker::StepHandler::Api
598
+ def process(task, sequence, step)
599
+ # Events published automatically around the entire process() flow
600
+ # Just focus on making your API call:
601
+
602
+ user_id = task.context['user_id']
603
+ connection.get("/users/#{user_id}/profile")
604
+
605
+ # Automatic events:
606
+ # - step_started before process
607
+ # - step_completed after successful process
608
+ # - step_failed if exception occurs
609
+ end
610
+
611
+ # Optional: custom response processing
612
+ def process(task, sequence, step)
613
+ # Let parent handle API call and basic response processing
614
+ super
615
+
616
+ # Add custom processing
617
+ user_data = step.results.body['user']
618
+ step.results = { user: user_data, processed_at: Time.current }
619
+ end
620
+ end
621
+ ```
622
+
623
+ **Key Architecture Points:**
624
+ - ✅ **Implement `process()`** for regular step handlers (your business logic)
625
+ - ✅ **Implement `process()`** for API step handlers (your HTTP request)
626
+ - ✅ **Optionally override `process()`** in API handlers for custom response processing
627
+ - ✅ **Optionally override `process_results()`** to customize how return values are stored in `step.results`
628
+ - ⚠️ **Never override `handle()`** - it's framework-only code that publishes events and coordinates execution
629
+
630
+ #### Alternative: Manual Event Publishing (Advanced Use Cases)
631
+
632
+ ### Manual Event Publishing (Advanced Use Cases)
633
+
634
+ For special cases where you need additional custom events, you can still manually publish them:
635
+
636
+ ```ruby
637
+ class MyStepHandlerWithCustomEvents < Tasker::StepHandler::Base
638
+ include Tasker::Concerns::EventPublisher
639
+
640
+ def process(task, sequence, step)
641
+ # Custom domain-specific event (before your business logic)
642
+ publish_event('order.validation_started', {
643
+ order_id: task.context['order_id'],
644
+ validation_rules: get_validation_rules
645
+ })
646
+
647
+ # Your business logic
648
+ validation_result = validate_order(task.context)
649
+
650
+ # Another custom event (after your business logic)
651
+ publish_event('order.validation_completed', {
652
+ order_id: task.context['order_id'],
653
+ validation_passed: validation_result[:passed]
654
+ })
655
+
656
+ # Return results - they will be stored in step.results automatically
657
+ { validation_passed: validation_result[:passed], details: validation_result[:details] }
658
+ end
659
+ end
660
+ ```
661
+
662
+ ## Error Handling and Observability
663
+
664
+ Tasker automatically captures comprehensive error information:
665
+
666
+ ```ruby
667
+ # Error events automatically include:
668
+ {
669
+ task_id: "task_123",
670
+ step_id: "step_456",
671
+ step_name: "payment_processing",
672
+ error_message: "Payment gateway timeout",
673
+ error_class: "PaymentGateway::TimeoutError",
674
+ backtrace: ["app/services/payment.rb:45", "..."],
675
+ attempt_number: 2,
676
+ retry_limit: 3,
677
+ event_type: "failed",
678
+ timestamp: "2025-06-01T12:00:15Z"
679
+ }
680
+ ```
681
+
682
+ ## Production Considerations
683
+
684
+ ### Memory Management
685
+
686
+ Tasker includes production-ready memory management:
687
+ - Database connection pooling prevents connection exhaustion
688
+ - Explicit cleanup in concurrent processing (`futures.clear()`)
689
+ - Batched processing limits (`MAX_CONCURRENT_STEPS = 3`)
690
+
691
+ ### OpenTelemetry Safety
692
+
693
+ The system includes safety mechanisms for production use:
694
+ - Selective instrumentation excludes problematic components (Faraday)
695
+ - PostgreSQL instrumentation safely re-enabled after connection improvements
696
+ - Error isolation prevents telemetry failures from affecting core workflow
697
+
698
+ ### Performance Optimization
699
+
700
+ - Optimized payload building with single database queries (`WorkflowStep.task_completion_stats`)
701
+ - Immediate event emission (no custom batching overhead)
702
+ - Lightweight event publishing with standardized payloads
703
+
704
+ ## Troubleshooting
705
+
706
+ ### Common Issues
707
+
708
+ - **Missing Events**: Check that `EventPublisher` concern is included in step handlers
709
+ - **Payload Issues**: Use domain-specific methods like `publish_step_completed(step)` for standardized payloads
710
+ - **Parameter Confusion**: Use clean API methods instead of legacy `publish_step_event()` with redundant `event_type:` parameters
711
+ - **Error Information Missing**: Use `publish_step_failed(step, error: exception)` for automatic error capture
712
+ - **OpenTelemetry Errors**: Ensure Faraday instrumentation is disabled (known bug)
713
+ - **Memory Issues**: Verify database connection pooling is configured
714
+ - **Performance Impact**: Monitor for excessive event publishing in high-throughput scenarios
715
+
716
+ ### Span Duplication Issues
717
+
718
+ - **Multiple Telemetry Subscribers**: If you see unexpected behavior, ensure you're not creating multiple subscribers that handle the same events. Use single-responsibility subscribers:
719
+ ```ruby
720
+ # ✅ GOOD: Single responsibility per subscriber
721
+ config.telemetry.enabled = true # TelemetrySubscriber for spans only
722
+ # Create separate MetricsSubscriber for operational data
723
+ ```
724
+
725
+ - **Metrics and Spans Mixed**: If you're seeing both metrics and spans for the same events, separate them into different subscribers with single responsibilities
726
+
727
+ ### Hierarchical Context Issues
728
+
729
+ - **Standalone Spans in Jaeger**: If you see individual spans without parent-child relationships, ensure the `TelemetrySubscriber` is properly registered and OpenTelemetry is configured
730
+ - **Missing Task Context**: Step spans should appear as children of task spans. If steps appear as standalone spans, check that `task.start_requested` events are being published before step events
731
+ - **Broken Span Hierarchy**: Verify that the task ID is consistently included in all event payloads - this is critical for maintaining span relationships
732
+
733
+ ### Debug Commands
734
+
735
+ ```bash
736
+ # Verify OpenTelemetry configuration
737
+ bundle exec rails runner "puts OpenTelemetry.tracer_provider.inspect"
738
+
739
+ # Check event publisher availability
740
+ bundle exec rails runner "puts Tasker::Events::Publisher.instance.inspect"
741
+
742
+ # Validate telemetry subscriber
743
+ bundle exec rails runner "puts Tasker::Events::Subscribers::TelemetrySubscriber.new.inspect"
744
+
745
+ # Test TelemetrySubscriber span management
746
+ bundle exec rails runner "
747
+ subscriber = Tasker::Events::Subscribers::TelemetrySubscriber.new
748
+ puts 'OpenTelemetry available: ' + subscriber.send(:opentelemetry_available?).to_s
749
+ puts 'Telemetry enabled: ' + subscriber.send(:telemetry_enabled?).to_s
750
+ "
751
+
752
+ # Test span creation with sample events
753
+ bundle exec rails runner "
754
+ subscriber = Tasker::Events::Subscribers::TelemetrySubscriber.new
755
+
756
+ # Simulate task start event
757
+ task_event = { task_id: 'test-123', task_name: 'test_task' }
758
+ subscriber.handle_task_start_requested(task_event)
759
+
760
+ # Check if span was stored
761
+ span = subscriber.send(:get_task_span, 'test-123')
762
+ puts 'Task span created: ' + (!span.nil?).to_s
763
+
764
+ # Simulate step event
765
+ step_event = { task_id: 'test-123', step_id: 'step-456', step_name: 'test_step' }
766
+ subscriber.handle_step_completed(step_event)
767
+
768
+ # Clean up
769
+ subscriber.send(:remove_task_span, 'test-123')
770
+ "
771
+ ```
772
+
773
+ ### Log Monitoring
774
+
775
+ Look for these log patterns:
776
+ - `Instrumentation: OpenTelemetry::Instrumentation::* was successfully installed`
777
+ - `Instrumentation: OpenTelemetry::Instrumentation::Faraday failed to install` (expected)
778
+ - Event publishing errors: `Error publishing event * :`
779
+
780
+ ## Summary
781
+
782
+ The key insight you had is correct - **spans can provide much of the same information as metrics**, but they serve different purposes:
783
+
784
+ - **Spans**: Rich individual trace records for debugging specific issues
785
+ - **Metrics**: Aggregated operational data for dashboards and alerting
786
+
787
+ **Recommended approach**:
788
+
789
+ 1. **Use `TelemetrySubscriber` for comprehensive spans** (captures everything for debugging)
790
+ 2. **Create separate `MetricsSubscriber` for operational metrics** (lightweight data for dashboards)
791
+ 3. **Use single-responsibility subscribers** for clean separation of concerns
792
+ 4. **Consider deriving metrics from spans** in high-maturity setups instead of separate collection
793
+
794
+ This gives you the best of both worlds: detailed debugging capability through spans and efficient operational monitoring through metrics.
795
+