RubyGems - job-workflow - Versions diffs - 0.1.3 - Mend

job-workflow 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +91 -0
data/CHANGELOG.md +23 -0
data/LICENSE.txt +21 -0
data/README.md +47 -0
data/Rakefile +55 -0
data/Steepfile +10 -0
data/guides/API_REFERENCE.md +112 -0
data/guides/BEST_PRACTICES.md +113 -0
data/guides/CACHE_STORE_INTEGRATION.md +145 -0
data/guides/CONDITIONAL_EXECUTION.md +66 -0
data/guides/DEPENDENCY_WAIT.md +386 -0
data/guides/DRY_RUN.md +390 -0
data/guides/DSL_BASICS.md +216 -0
data/guides/ERROR_HANDLING.md +187 -0
data/guides/GETTING_STARTED.md +524 -0
data/guides/INSTRUMENTATION.md +131 -0
data/guides/LIFECYCLE_HOOKS.md +415 -0
data/guides/NAMESPACES.md +75 -0
data/guides/OPENTELEMETRY_INTEGRATION.md +86 -0
data/guides/PARALLEL_PROCESSING.md +302 -0
data/guides/PRODUCTION_DEPLOYMENT.md +110 -0
data/guides/QUEUE_MANAGEMENT.md +141 -0
data/guides/README.md +174 -0
data/guides/SCHEDULED_JOBS.md +165 -0
data/guides/STRUCTURED_LOGGING.md +268 -0
data/guides/TASK_OUTPUTS.md +240 -0
data/guides/TESTING_STRATEGY.md +56 -0
data/guides/THROTTLING.md +198 -0
data/guides/TROUBLESHOOTING.md +53 -0
data/guides/WORKFLOW_COMPOSITION.md +675 -0
data/guides/WORKFLOW_STATUS_QUERY.md +288 -0
data/lib/job-workflow.rb +3 -0
data/lib/job_workflow/argument_def.rb +16 -0
data/lib/job_workflow/arguments.rb +40 -0
data/lib/job_workflow/auto_scaling/adapter/aws_adapter.rb +66 -0
data/lib/job_workflow/auto_scaling/adapter.rb +31 -0
data/lib/job_workflow/auto_scaling/configuration.rb +85 -0
data/lib/job_workflow/auto_scaling/executor.rb +43 -0
data/lib/job_workflow/auto_scaling.rb +69 -0
data/lib/job_workflow/cache_store_adapters.rb +46 -0
data/lib/job_workflow/context.rb +352 -0
data/lib/job_workflow/dry_run_config.rb +31 -0
data/lib/job_workflow/dsl.rb +236 -0
data/lib/job_workflow/error_hook.rb +24 -0
data/lib/job_workflow/hook.rb +24 -0
data/lib/job_workflow/hook_registry.rb +66 -0
data/lib/job_workflow/instrumentation/log_subscriber.rb +194 -0
data/lib/job_workflow/instrumentation/opentelemetry_subscriber.rb +221 -0
data/lib/job_workflow/instrumentation.rb +257 -0
data/lib/job_workflow/job_status.rb +92 -0
data/lib/job_workflow/logger.rb +86 -0
data/lib/job_workflow/namespace.rb +36 -0
data/lib/job_workflow/output.rb +81 -0
data/lib/job_workflow/output_def.rb +14 -0
data/lib/job_workflow/queue.rb +74 -0
data/lib/job_workflow/queue_adapter.rb +38 -0
data/lib/job_workflow/queue_adapters/abstract.rb +87 -0
data/lib/job_workflow/queue_adapters/null_adapter.rb +127 -0
data/lib/job_workflow/queue_adapters/solid_queue_adapter.rb +224 -0
data/lib/job_workflow/runner.rb +173 -0
data/lib/job_workflow/schedule.rb +46 -0
data/lib/job_workflow/semaphore.rb +71 -0
data/lib/job_workflow/task.rb +83 -0
data/lib/job_workflow/task_callable.rb +43 -0
data/lib/job_workflow/task_context.rb +70 -0
data/lib/job_workflow/task_dependency_wait.rb +66 -0
data/lib/job_workflow/task_enqueue.rb +50 -0
data/lib/job_workflow/task_graph.rb +43 -0
data/lib/job_workflow/task_job_status.rb +70 -0
data/lib/job_workflow/task_output.rb +51 -0
data/lib/job_workflow/task_retry.rb +64 -0
data/lib/job_workflow/task_throttle.rb +46 -0
data/lib/job_workflow/version.rb +5 -0
data/lib/job_workflow/workflow.rb +87 -0
data/lib/job_workflow/workflow_status.rb +112 -0
data/lib/job_workflow.rb +59 -0
data/rbs_collection.lock.yaml +172 -0
data/rbs_collection.yaml +14 -0
data/sig/generated/job-workflow.rbs +2 -0
data/sig/generated/job_workflow/argument_def.rbs +14 -0
data/sig/generated/job_workflow/arguments.rbs +26 -0
data/sig/generated/job_workflow/auto_scaling/adapter/aws_adapter.rbs +32 -0
data/sig/generated/job_workflow/auto_scaling/adapter.rbs +22 -0
data/sig/generated/job_workflow/auto_scaling/configuration.rbs +50 -0
data/sig/generated/job_workflow/auto_scaling/executor.rbs +29 -0
data/sig/generated/job_workflow/auto_scaling.rbs +47 -0
data/sig/generated/job_workflow/cache_store_adapters.rbs +28 -0
data/sig/generated/job_workflow/context.rbs +155 -0
data/sig/generated/job_workflow/dry_run_config.rbs +16 -0
data/sig/generated/job_workflow/dsl.rbs +117 -0
data/sig/generated/job_workflow/error_hook.rbs +18 -0
data/sig/generated/job_workflow/hook.rbs +18 -0
data/sig/generated/job_workflow/hook_registry.rbs +47 -0
data/sig/generated/job_workflow/instrumentation/log_subscriber.rbs +102 -0
data/sig/generated/job_workflow/instrumentation/opentelemetry_subscriber.rbs +113 -0
data/sig/generated/job_workflow/instrumentation.rbs +138 -0
data/sig/generated/job_workflow/job_status.rbs +46 -0
data/sig/generated/job_workflow/logger.rbs +56 -0
data/sig/generated/job_workflow/namespace.rbs +24 -0
data/sig/generated/job_workflow/output.rbs +39 -0
data/sig/generated/job_workflow/output_def.rbs +12 -0
data/sig/generated/job_workflow/queue.rbs +49 -0
data/sig/generated/job_workflow/queue_adapter.rbs +18 -0
data/sig/generated/job_workflow/queue_adapters/abstract.rbs +56 -0
data/sig/generated/job_workflow/queue_adapters/null_adapter.rbs +73 -0
data/sig/generated/job_workflow/queue_adapters/solid_queue_adapter.rbs +111 -0
data/sig/generated/job_workflow/runner.rbs +66 -0
data/sig/generated/job_workflow/schedule.rbs +34 -0
data/sig/generated/job_workflow/semaphore.rbs +37 -0
data/sig/generated/job_workflow/task.rbs +60 -0
data/sig/generated/job_workflow/task_callable.rbs +30 -0
data/sig/generated/job_workflow/task_context.rbs +52 -0
data/sig/generated/job_workflow/task_dependency_wait.rbs +42 -0
data/sig/generated/job_workflow/task_enqueue.rbs +27 -0
data/sig/generated/job_workflow/task_graph.rbs +27 -0
data/sig/generated/job_workflow/task_job_status.rbs +42 -0
data/sig/generated/job_workflow/task_output.rbs +29 -0
data/sig/generated/job_workflow/task_retry.rbs +30 -0
data/sig/generated/job_workflow/task_throttle.rbs +20 -0
data/sig/generated/job_workflow/version.rbs +5 -0
data/sig/generated/job_workflow/workflow.rbs +48 -0
data/sig/generated/job_workflow/workflow_status.rbs +55 -0
data/sig/generated/job_workflow.rbs +8 -0
data/sig-private/activejob.rbs +35 -0
data/sig-private/activesupport.rbs +23 -0
data/sig-private/aws.rbs +32 -0
data/sig-private/opentelemetry.rbs +40 -0
data/sig-private/solid_queue.rbs +108 -0
data/tmp/.keep +0 -0
metadata +190 -0

data/guides/TASK_OUTPUTS.md ADDED Viewed

@@ -0,0 +1,240 @@
+# Task Outputs
+JobWorkflow allows tasks to define and collect outputs, making it easy to access task execution results. This is particularly useful when you need to use results from previous tasks in subsequent tasks or when collecting results from parallel map tasks.
+## Defining Task Outputs
+Use the `output:` option to define the structure of task outputs. Specify output field names and their types as a hash.
+### Basic Output Definition
+```ruby
+class DataProcessingJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :input_value, "Integer", default: 0
+  # Define task with outputs
+  task :calculate, output: { result: "Integer", message: "String" } do |ctx|
+    input_value = ctx.arguments.input_value
+    # Return a hash with the defined keys
+    {
+      result: input_value * 2,
+      message: "Calculation complete"
+    }
+  end
+  # Access the output from another task
+  task :report, depends_on: [:calculate] do |ctx|
+    puts "Result: #{ctx.output[:calculate].first.result}"
+    puts "Message: #{ctx.output[:calculate].first.message}"
+  end
+end
+```
+### Output with Map Tasks
+Outputs from map tasks are collected as an array, with one output per iteration.
+```ruby
+class BatchCalculationJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :numbers, "Array[Integer]", default: []
+  # Map task with output definition
+  task :double_numbers,
+       each: ->(ctx) { ctx.arguments.numbers },
+       output: { doubled: "Integer", original: "Integer" } do |ctx|
+       value = ctx.each_value
+    {
+      doubled: value * 2,
+      original: value
+    }
+  end
+  # Access all outputs from the map task
+  task :summarize, depends_on: [:double_numbers] do |ctx|
+    ctx.output[:double_numbers].each do |output|
+      puts "Original: #{output.original}, Doubled: #{output.doubled}"
+    end
+    # Calculate total
+    total = ctx.output[:double_numbers].sum(&:doubled)
+    puts "Total: #{total}"
+  end
+end
+# Execution
+BatchCalculationJob.perform_now(numbers: [1, 2, 3, 4, 5])
+# Output:
+# Original: 1, Doubled: 2
+# Original: 2, Doubled: 4
+# Original: 3, Doubled: 6
+# Original: 4, Doubled: 8
+# Original: 5, Doubled: 10
+# Total: 30
+```
+## Accessing Task Outputs
+Task outputs are accessible through `ctx.output` using `[]` with the task name. It always returns an Array of TaskOutput-like objects.
+### Regular Task Output
+```ruby
+task :fetch_data, output: { count: "Integer", items: "Array" } do |ctx|
+  data = ExternalAPI.fetch
+  {
+    count: data.size,
+    items: data
+  }
+end
+task :process, depends_on: [:fetch_data] do |ctx|
+  # Access output fields directly
+  puts "Received #{ctx.output[:fetch_data].first.count} items"
+  ctx.output[:fetch_data].first.items.each do |item|
+    process_item(item)
+  end
+end
+```
+### Map Task Output Array
+```ruby
+task :process_items,
+     each: ->(ctx) { ctx.arguments.items },
+     output: { result: "String", status: "String" } do |ctx|
+  item = ctx.each_value
+  {
+    result: transform(item),
+    status: "success"
+  }
+end
+task :verify, depends_on: [:process_items] do |ctx|
+  # outputs is an array of TaskOutput objects
+  outputs = ctx.output[:process_items]
+  successful = outputs.count { |o| o.status == "success" }
+  puts "Processed #{outputs.size} items, #{successful} successful"
+  # Access individual outputs by index
+  first_result = outputs[0].result
+  last_result = outputs[-1].result
+end
+```
+## Output Field Normalization
+Task outputs are automatically normalized based on the output definition:
+1. **Only defined fields are collected**: Fields not in the output definition are ignored
+2. **Missing fields default to nil**: If a defined field is not returned, it defaults to `nil`
+3. **Type safety**: Output definitions document expected types for better code clarity
+```ruby
+task :example, output: { required: "String", optional: "Integer" } do |ctx|
+  # Only return one field
+  { required: "value" }
+  # optional will be nil
+end
+task :check_output, depends_on: [:example] do |ctx|
+  puts ctx.output[:example].first.required  # => "value"
+  puts ctx.output[:example].first.optional  # => nil
+end
+```
+## Output Persistence
+Task outputs are automatically serialized and persisted with the Context, allowing them to:
+- **Survive job restarts**: Outputs are preserved across job retries
+- **Resume correctly**: When using continuations, outputs from completed tasks are available
+- **Pass between jobs**: In map tasks with concurrency, outputs from subjobs are collected
+## Output Design Guidelines
+### When to Use Outputs
+Use task outputs when you need to:
+- **Extract structured data** from a task for use in later tasks
+- **Collect results** from parallel map task executions
+- **Document return values** with types for better code clarity
+- **Separate concerns** between task execution and result usage
+### When to Use Context Instead
+Use Context fields when you need to:
+- **Share mutable state** that tasks modify incrementally
+- **Pass configuration** or settings through the workflow
+- **Store final results** that are the primary goal of the workflow
+### Best Practices
+```ruby
+class WellDesignedJob < ApplicationJob
+  include JobWorkflow::DSL
+  # Arguments for configuration
+  argument :user_id, "Integer"
+  # Use outputs for intermediate structured data
+  task :fetch_user,
+       output: { name: "String", email: "String", role: "String" } do |ctx|
+    user = User.find(ctx.arguments.user_id)
+    {
+      name: user.name,
+      email: user.email,
+      role: user.role
+    }
+  end
+  task :fetch_permissions,
+       depends_on: [:fetch_user],
+       output: { permissions: "Array[String]" } do |ctx|
+    role = ctx.output[:fetch_user].first.role
+    {
+      permissions: PermissionService.get_permissions(role)
+    }
+  end
+  # Build final report as output
+  task :generate_report,
+       depends_on: [:fetch_user, :fetch_permissions],
+       output: { final_report: "Hash" } do |ctx|
+    user = ctx.output[:fetch_user].first
+    perms = ctx.output[:fetch_permissions].first
+    {
+      final_report: {
+        user: { name: user.name, email: user.email },
+        permissions: perms.permissions,
+        generated_at: Time.current
+      }
+    }
+  end
+end
+```
+## Limitations
+### Arguments are Immutable
+Arguments cannot be modified during workflow execution. To pass data between tasks, use task outputs:
+```ruby
+# ✅ Correct: Use outputs
+task :process, output: { result: "String" } do |ctx|
+  { result: "processed" }
+end
+# ❌ Wrong: Cannot modify arguments
+task :wrong do |ctx|
+  ctx.arguments.result = "value"  # Error!
+end
+```

data/guides/TESTING_STRATEGY.md ADDED Viewed

@@ -0,0 +1,56 @@
+# Testing Strategy
+This section covers effective testing methods for workflows built with JobWorkflow.
+## Unit Testing
+### Testing Individual Tasks
+Test each task as a unit.
+```ruby
+# spec/jobs/user_registration_job_spec.rb
+RSpec.describe UserRegistrationJob do
+  describe 'task: validate_email' do
+    it 'validates correct email format' do
+      job = described_class.new
+      arguments = JobWorkflow::Arguments.new(email: 'user@example.com')
+      ctx = JobWorkflow::Context.new(arguments: arguments)
+      task = described_class._workflow_tasks[:validate_email]
+      expect { job.instance_exec(ctx, &task[:block]) }.not_to raise_error
+    end
+    it 'raises error for invalid email' do
+      job = described_class.new
+      arguments = JobWorkflow::Arguments.new(email: 'invalid')
+      ctx = JobWorkflow::Context.new(arguments: arguments)
+      task = described_class._workflow_tasks[:validate_email]
+      expect { job.instance_exec(ctx, &task[:block]) }.to raise_error(/Invalid email/)
+    end
+  end
+  describe 'task: create_user' do
+    it 'creates a new user' do
+      job = described_class.new
+      arguments = JobWorkflow::Arguments.new(
+        email: 'user@example.com',
+        password: 'password123'
+      )
+      ctx = JobWorkflow::Context.new(arguments: arguments)
+      task = described_class._workflow_tasks[:create_user]
+      expect {
+        job.instance_exec(ctx, &task[:block])
+      }.to change(User, :count).by(1)
+      # Verify output
+      output = ctx.output[:create_user].first
+      expect(output.user).to be_a(User)
+      expect(output.user.email).to eq('user@example.com')
+    end
+  end
+end
+```

data/guides/THROTTLING.md ADDED Viewed

@@ -0,0 +1,198 @@
+# Throttling
+JobWorkflow provides semaphore-based throttling to handle external API rate limits and protect shared resources. Throttling works across multiple jobs and workers, ensuring system-wide rate limiting.
+## Task-Level Throttling
+### Simple Integer Syntax (Recommended)
+For most use cases, specify the concurrency limit as an integer:
+```ruby
+class ExternalAPIJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :user_ids, "Array[Integer]"
+  # Allow up to 10 concurrent executions of this task
+  # Default key: "ExternalAPIJob:fetch_user_data"
+  # Default TTL: 180 seconds
+  task :fetch_user_data, throttle: 10, each: ->(ctx) { ctx.arguments.user_ids }, output: { user_data: "Hash" } do |ctx|
+    { user_data: ExternalAPI.fetch_user(ctx.each_value) }
+  end
+end
+```
+### Hash Syntax (Advanced Configuration)
+For detailed control, use the hash syntax:
+```ruby
+task :fetch_user_data,
+     throttle: {
+       key: "external_user_api",  # Custom semaphore key
+       limit: 10,                 # Concurrency limit
+       ttl: 120                   # Lease TTL in seconds (default: 180)
+     },
+     output: { api_results: "Hash" } do |ctx|
+  { api_results: ExternalAPI.fetch_user(ctx.arguments.user_id) }
+end
+```
+## Sharing Throttle Keys Across Jobs
+Use the same `key` to share rate limits across different jobs and tasks:
+```ruby
+# Both jobs share the same "payment_api" throttle limit
+class CreateUserJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :user_data, "Hash"
+  task :create_customer, throttle: { key: "payment_api", limit: 5 } do |ctx|
+    PaymentService.create_customer(ctx.arguments.user_data)
+  end
+end
+class UpdateBillingJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :billing_id, "String"
+  task :update_billing, throttle: { key: "payment_api", limit: 5 } do |ctx|
+    PaymentService.update_billing(ctx.arguments.billing_id)
+  end
+end
+# Total concurrent calls to payment API: max 5 across both jobs
+```
+## Throttling Behavior
+1. Acquire semaphore lease before task execution
+2. If lease cannot be acquired, wait (automatic polling with 3-second intervals)
+3. Execute task
+4. Release lease after completion (guaranteed by ensure block)
+5. If a worker crashes before releasing, the lease is recovered after `ttl` expires and the SolidQueue dispatcher concurrency maintenance runs (worst case: `ttl + concurrency_maintenance_interval`)
+```ruby
+argument :data, "Hash"
+# Example: Task with max 3 concurrent executions
+task :limited_task, throttle: 3, output: { result: "String" } do |ctx|
+  data = ctx.arguments.data
+  { result: SharedResource.use(data) }
+end
+# Execution state:
+# Job 1: Acquire lease → Executing
+# Job 2: Acquire lease → Executing
+# Job 3: Acquire lease → Executing
+# Job 4: Waiting (no lease available)
+# Job 1: Complete → Release lease
+# Job 4: Acquire lease → Executing
+```
+## Throttling with Map Tasks
+Throttling is especially useful with map tasks to limit API calls:
+```ruby
+class BatchFetchJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :ids, "Array[Integer]"
+  # Each iteration waits for a throttle slot
+  task :fetch_all, throttle: 5, each: ->(ctx) { ctx.arguments.ids }, output: { data: "Hash" } do |ctx|
+    { data: RateLimitedAPI.fetch(ctx.each_value) }
+  end
+end
+# With 100 IDs and throttle: 5
+# → Max 5 concurrent API calls at any time
+```
+## Runtime Throttling
+For fine-grained control within a task, use the `ctx.throttle` method to wrap specific code blocks. This method can only be called inside a task block; calling it outside will raise an error.
+```ruby
+class ComplexProcessingJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :data, "Hash"
+  task :process_and_save do |ctx|
+    # Read operations - no throttle needed
+    data = ExternalAPI.fetch(ctx.arguments.data[:id])
+    # Write operations - throttled
+    ctx.throttle(limit: 3, key: "db_write") do
+      Model.create!(data)
+    end
+  end
+end
+```
+### Multiple Throttle Blocks
+Apply different rate limits to different operations within the same task:
+```ruby
+task :multi_api_task do |ctx|
+  # Payment API: max 5 concurrent
+  ctx.throttle(limit: 5, key: "payment_api") do
+    PaymentService.process(ctx.arguments.payment_data)
+  end
+  # Notification API: max 10 concurrent
+  ctx.throttle(limit: 10, key: "notification_api") do
+    NotificationService.send(ctx.arguments.message_params)
+  end
+end
+```
+### Auto-Generated Keys
+When `key` is omitted, a unique key is generated automatically based on the job name, task name, and call index. The index resets to 0 for each task execution:
+```ruby
+task :sequential_operations do |ctx|
+  # Key: "MyJob:sequential_operations:0"
+  ctx.throttle(limit: 5) do
+    first_operation
+  end
+  # Key: "MyJob:sequential_operations:1"
+  ctx.throttle(limit: 5) do
+    second_operation
+  end
+end
+```
+## Combining Task-Level and Runtime Throttling
+Use both approaches for comprehensive rate limiting:
+```ruby
+class APIIntegrationJob < ApplicationJob
+  include JobWorkflow::DSL
+  argument :ids, "Array[Integer]"
+  # Task-level throttle: limits overall task concurrency
+  task :process_items, throttle: 10, each: ->(ctx) { ctx.arguments.ids } do |ctx|
+    data = ExternalAPI.fetch(ctx.each_value)
+    # Runtime throttle: limits specific write operations
+    ctx.throttle(limit: 3, key: "cache_write") do
+      CacheStorage.update(ctx.each_value, data)
+    end
+    data
+  end
+end
+```

data/guides/TROUBLESHOOTING.md ADDED Viewed

@@ -0,0 +1,53 @@
+# Troubleshooting
+This section covers common issues encountered during JobWorkflow operation and their solutions.
+## Common Issues
+### CircularDependencyError
+**Symptom**: Workflow crashes with `JobWorkflow::CircularDependencyError`
+```ruby
+# ❌ Circular dependency
+task :a, depends_on: [:b] do |ctx|
+  # ...
+end
+task :b, depends_on: [:a] do |ctx|
+  # ...
+end
+```
+**Solution**: Review and remove circular dependency
+```ruby
+# ✅ Correct dependency
+task :a do |ctx|
+  # ...
+end
+task :b, depends_on: [:a] do |ctx|
+  # ...
+end
+```
+### UnknownTaskError
+**Symptom**: `JobWorkflow::UnknownTaskError: Unknown task: :typo_task`
+```ruby
+# ❌ Depending on non-existent task
+task :process, depends_on: [:typo_task] do |ctx|
+  # ...
+end
+```
+**Solution**: Fix task name typo
+```ruby
+# ✅ Correct task name
+task :process, depends_on: [:correct_task] do |ctx|
+  # ...
+end
+```