RubyGems - language-operator - Versions diffs - 0.1.62 → 0.1.65 - Mend

language-operator 0.1.62 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/.plan.md +127 -0
data/.rspec +3 -0
data/Gemfile +2 -0
data/Gemfile.lock +4 -1
data/Makefile +34 -80
data/components/agent/Gemfile +1 -1
data/docs/cheat-sheet.md +173 -0
data/lib/language_operator/agent/base.rb +10 -1
data/lib/language_operator/agent/event_config.rb +172 -0
data/lib/language_operator/agent/safety/ast_validator.rb +1 -1
data/lib/language_operator/agent/safety/safe_executor.rb +5 -1
data/lib/language_operator/agent/task_executor.rb +90 -12
data/lib/language_operator/agent/telemetry.rb +25 -3
data/lib/language_operator/agent/web_server.rb +6 -9
data/lib/language_operator/cli/commands/agent/base.rb +15 -17
data/lib/language_operator/cli/commands/agent/learning.rb +156 -37
data/lib/language_operator/cli/commands/cluster.rb +2 -2
data/lib/language_operator/cli/commands/status.rb +2 -2
data/lib/language_operator/cli/commands/system/synthesize.rb +1 -1
data/lib/language_operator/cli/formatters/value_formatter.rb +1 -1
data/lib/language_operator/cli/helpers/ux_helper.rb +3 -4
data/lib/language_operator/config.rb +3 -3
data/lib/language_operator/constants/kubernetes_labels.rb +2 -2
data/lib/language_operator/dsl/task_definition.rb +18 -7
data/lib/language_operator/instrumentation/task_tracer.rb +44 -3
data/lib/language_operator/kubernetes/client.rb +111 -0
data/lib/language_operator/templates/schema/CHANGELOG.md +28 -0
data/lib/language_operator/templates/schema/agent_dsl_openapi.yaml +1 -1
data/lib/language_operator/templates/schema/agent_dsl_schema.json +1 -1
data/lib/language_operator/type_coercion.rb +22 -8
data/lib/language_operator/version.rb +1 -1
data/synth/002/agent.rb +23 -12
data/synth/002/output.log +88 -15
data/synth/003/Makefile +5 -2
data/synth/004/Makefile +54 -0
data/synth/004/README.md +281 -0
data/synth/004/instructions.txt +1 -0
metadata +8 -1

data/lib/language_operator/agent/event_config.rb ADDED Viewed

@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+require_relative '../config'
+module LanguageOperator
+  module Agent
+    # Event emission configuration for agent runtime
+    #
+    # Manages configuration for Kubernetes event emission including:
+    # - Event filtering and batching options
+    # - Error handling preferences
+    # - Performance tuning settings
+    #
+    # @example Load event configuration
+    #   config = EventConfig.load
+    #   puts "Events enabled: #{config[:enabled]}"
+    #   puts "Max events per minute: #{config[:rate_limit]}"
+    module EventConfig
+      # Load event emission configuration from environment variables
+      #
+      # @return [Hash] Event configuration hash
+      def self.load
+        Config.from_env(
+          {
+            # Core event emission settings
+            enabled: 'ENABLE_K8S_EVENTS',
+            disabled: 'DISABLE_K8S_EVENTS',
+            # Event filtering
+            emit_success_events: 'EMIT_SUCCESS_EVENTS',
+            emit_failure_events: 'EMIT_FAILURE_EVENTS',
+            emit_validation_events: 'EMIT_VALIDATION_EVENTS',
+            # Performance and rate limiting
+            rate_limit_per_minute: 'EVENT_RATE_LIMIT_PER_MINUTE',
+            batch_size: 'EVENT_BATCH_SIZE',
+            batch_timeout_ms: 'EVENT_BATCH_TIMEOUT_MS',
+            # Error handling
+            retry_failed_events: 'RETRY_FAILED_EVENTS',
+            max_event_retries: 'MAX_EVENT_RETRIES',
+            retry_delay_ms: 'EVENT_RETRY_DELAY_MS',
+            # Event content control
+            include_task_metadata: 'INCLUDE_TASK_METADATA',
+            include_error_details: 'INCLUDE_ERROR_DETAILS',
+            truncate_long_messages: 'TRUNCATE_LONG_MESSAGES',
+            max_message_length: 'MAX_EVENT_MESSAGE_LENGTH'
+          },
+          defaults: {
+            enabled: 'true',
+            disabled: 'false',
+            emit_success_events: 'true',
+            emit_failure_events: 'true',
+            emit_validation_events: 'true',
+            rate_limit_per_minute: '60',
+            batch_size: '1',
+            batch_timeout_ms: '1000',
+            retry_failed_events: 'true',
+            max_event_retries: '3',
+            retry_delay_ms: '1000',
+            include_task_metadata: 'true',
+            include_error_details: 'true',
+            truncate_long_messages: 'true',
+            max_message_length: '1000'
+          },
+          types: {
+            enabled: :boolean,
+            disabled: :boolean,
+            emit_success_events: :boolean,
+            emit_failure_events: :boolean,
+            emit_validation_events: :boolean,
+            rate_limit_per_minute: :integer,
+            batch_size: :integer,
+            batch_timeout_ms: :integer,
+            retry_failed_events: :boolean,
+            max_event_retries: :integer,
+            retry_delay_ms: :integer,
+            include_task_metadata: :boolean,
+            include_error_details: :boolean,
+            truncate_long_messages: :boolean,
+            max_message_length: :integer
+          }
+        )
+      end
+      # Check if event emission is enabled overall
+      #
+      # Events are enabled if:
+      # - Running in Kubernetes (KUBERNETES_SERVICE_HOST set)
+      # - Not explicitly disabled (DISABLE_K8S_EVENTS != 'true')
+      # - Explicitly enabled (ENABLE_K8S_EVENTS != 'false')
+      #
+      # @param config [Hash] Configuration hash from load
+      # @return [Boolean] True if events should be emitted
+      def self.enabled?(config = nil)
+        config ||= load
+        # Must be in Kubernetes environment
+        return false unless ENV.fetch('KUBERNETES_SERVICE_HOST', nil)
+        # Respect explicit disable flag (legacy)
+        return false if config[:disabled]
+        # Check enable flag
+        config[:enabled]
+      end
+      # Check if specific event type should be emitted
+      #
+      # @param event_type [Symbol] Event type (:success, :failure, :validation)
+      # @param config [Hash] Configuration hash from load
+      # @return [Boolean] True if this event type should be emitted
+      def self.should_emit?(event_type, config = nil)
+        return false unless enabled?(config)
+        config ||= load
+        case event_type
+        when :success
+          config[:emit_success_events]
+        when :failure
+          config[:emit_failure_events]
+        when :validation
+          config[:emit_validation_events]
+        else
+          false
+        end
+      end
+      # Get rate limiting configuration
+      #
+      # @param config [Hash] Configuration hash from load
+      # @return [Hash] Rate limiting settings
+      def self.rate_limit_config(config = nil)
+        config ||= load
+        {
+          per_minute: config[:rate_limit_per_minute],
+          batch_size: config[:batch_size],
+          batch_timeout_ms: config[:batch_timeout_ms]
+        }
+      end
+      # Get retry configuration for failed events
+      #
+      # @param config [Hash] Configuration hash from load
+      # @return [Hash] Retry settings
+      def self.retry_config(config = nil)
+        config ||= load
+        {
+          enabled: config[:retry_failed_events],
+          max_retries: config[:max_event_retries],
+          delay_ms: config[:retry_delay_ms]
+        }
+      end
+      # Get content configuration for event messages
+      #
+      # @param config [Hash] Configuration hash from load
+      # @return [Hash] Content settings
+      def self.content_config(config = nil)
+        config ||= load
+        {
+          include_task_metadata: config[:include_task_metadata],
+          include_error_details: config[:include_error_details],
+          truncate_long_messages: config[:truncate_long_messages],
+          max_message_length: config[:max_message_length]
+        }
+      end
+    end
+  end
+end

data/lib/language_operator/agent/safety/ast_validator.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module LanguageOperator
           const_set const_get remove_const
           define_method define_singleton_method
           undef_method remove_method alias_method
-          exit exit! abort raise fail throw
+          exit exit! abort throw
           trap at_exit
           open
         ].freeze

data/lib/language_operator/agent/safety/safe_executor.rb CHANGED Viewed

@@ -36,7 +36,8 @@ module LanguageOperator
           # Step 3: Execute using instance_eval with smart constant injection
           # Only inject constants that won't conflict with user-defined ones
-          safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date]
+          safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date
+                              ArgumentError TypeError RuntimeError StandardError]
           # Find which constants user code defines to avoid redefinition warnings
           user_defined_constants = safe_constants.select { |const| code.include?("#{const} =") }
@@ -129,6 +130,9 @@ module LanguageOperator
             when :TrueClass, :FalseClass, :NilClass
               # Allow boolean and nil types
               ::Object.const_get(name)
+            when :ArgumentError, :TypeError, :RuntimeError, :StandardError
+              # Allow standard Ruby exception classes for error handling
+              ::Object.const_get(name)
             else
               # Security-by-default: explicitly deny access to any other constants
               # This prevents sandbox bypass through const_missing fallback

data/lib/language_operator/agent/task_executor.rb CHANGED Viewed

@@ -107,11 +107,10 @@ module LanguageOperator
         execution_start = Time.now
         max_retries ||= @config[:max_retries]
-        with_span('task_executor.execute_task', attributes: {
-                    'task.name' => task_name.to_s,
-                    'task.inputs' => inputs.keys.map(&:to_s).join(','),
-                    'task.max_retries' => max_retries
-                  }) do
+        # Reset JSON parsing retry flag for this task
+        @parsing_retry_attempted = false
+        with_span('task_executor.execute_task', attributes: build_task_execution_attributes(task_name, inputs, max_retries)) do
           # Fast task lookup using pre-built cache
           task_name_sym = task_name.to_sym
           task_info = @task_cache[task_name_sym]
@@ -137,15 +136,27 @@ module LanguageOperator
           OpenTelemetry::Trace.current_span&.set_attribute('task.timeout', timeout)
           # Execute with retry logic
-          execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
+          result = execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
+          # Emit Kubernetes event for successful task completion
+          emit_task_execution_event(task_name, success: true, execution_start: execution_start)
+          result
         end
       rescue ArgumentError => e
         # Validation errors should not be retried - re-raise immediately
         log_task_error(task_name, e, :validation, execution_start)
+        emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
         raise TaskValidationError.new(task_name, e.message, e)
+      rescue TaskValidationError => e
+        # TaskValidationError from validate_inputs should be logged as :validation
+        log_task_error(task_name, e, :validation, execution_start)
+        emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
+        raise e
       rescue StandardError => e
         # Catch any unexpected errors that escaped retry logic
         log_task_error(task_name, e, :system, execution_start)
+        emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e)
         raise create_appropriate_error(task_name, e)
       end
@@ -211,7 +222,7 @@ module LanguageOperator
               parse_neural_response(response_text, task)
             rescue RuntimeError => e
               # If parsing fails and this is a JSON parsing error, try one more time with clarified prompt
-              raise e unless e.message.include?('returned invalid JSON') && !defined?(@parsing_retry_attempted)
+              raise e unless e.message.include?('returned invalid JSON') && !@parsing_retry_attempted
               @parsing_retry_attempted = true
@@ -237,11 +248,6 @@ module LanguageOperator
               # Try parsing the retry response
               parse_neural_response(retry_response_text, task)
-            # Re-raise original error if not a JSON parsing error or already retried
-            ensure
-              # Reset retry flag for next task
-              @parsing_retry_attempted = nil
             end
           end
@@ -373,6 +379,39 @@ module LanguageOperator
         'Agent::TaskExecutor'
       end
+      # Emit Kubernetes event for task execution
+      #
+      # @param task_name [Symbol, String] Task name
+      # @param success [Boolean] Whether task succeeded
+      # @param execution_start [Time] Task execution start time
+      # @param error [Exception, nil] Error if task failed
+      # @param event_type [Symbol, nil] Event type override (:success, :failure, :validation)
+      def emit_task_execution_event(task_name, success:, execution_start:, error: nil, event_type: nil)
+        return unless @agent.respond_to?(:kubernetes_client)
+        duration_ms = ((Time.now - execution_start) * 1000).round(2)
+        metadata = {
+          'task_type' => determine_task_type(@tasks[task_name.to_sym])
+        }
+        if error
+          metadata['error_type'] = error.class.name
+          metadata['error_category'] = categorize_error(error).to_s
+        end
+        @agent.kubernetes_client.emit_execution_event(
+          task_name.to_s,
+          success: success,
+          duration_ms: duration_ms,
+          metadata: metadata
+        )
+      rescue StandardError => e
+        logger.warn('Failed to emit task execution event',
+                    task: task_name,
+                    error: e.message)
+      end
       # Summarize hash values for logging (truncate long strings)
       # Optimized for performance with lazy computation
       #
@@ -622,6 +661,8 @@ module LanguageOperator
       # @param task [TaskDefinition] The task definition
       # @return [String] Task type
       def determine_task_type(task)
+        return nil unless task
         if task.neural? && task.symbolic?
           'hybrid'
         elsif task.neural?
@@ -966,6 +1007,43 @@ module LanguageOperator
         end
         cache
       end
+      # Build semantic attributes for task execution span
+      #
+      # Includes attributes required for learning status tracking:
+      # - task.name: Task identifier for learning controller
+      # - agent.name: Agent identifier (explicit for learning system)
+      # - gen_ai.operation.name: Semantic operation name
+      #
+      # @param task_name [Symbol] Name of the task being executed
+      # @param inputs [Hash] Task input parameters
+      # @param max_retries [Integer] Maximum retry attempts
+      # @return [Hash] Span attributes
+      def build_task_execution_attributes(task_name, inputs, max_retries)
+        attributes = {
+          # Core task identification (CRITICAL for learning system)
+          'task.name' => task_name.to_s,
+          'task.inputs' => inputs.keys.map(&:to_s).join(','),
+          'task.max_retries' => max_retries,
+          # Semantic operation name for better trace organization
+          'gen_ai.operation.name' => 'execute_task'
+        }
+        # Explicitly add agent name if available (redundant with resource attribute but ensures visibility)
+        if (agent_name = ENV.fetch('AGENT_NAME', nil))
+          attributes['agent.name'] = agent_name
+        end
+        # Add task type information if available
+        if (task_info = @task_cache[task_name.to_sym])
+          attributes['task.type'] = task_info[:type]
+          attributes['task.has_neural'] = task_info[:neural].to_s
+          attributes['task.has_symbolic'] = task_info[:symbolic].to_s
+        end
+        attributes
+      end
     end
   end
 end

data/lib/language_operator/agent/telemetry.rb CHANGED Viewed

@@ -70,6 +70,11 @@ module LanguageOperator
         # Build resource attributes from environment variables
         #
+        # Includes semantic attributes required for learning status tracking:
+        # - agent.name: Required for learning controller to identify agent executions
+        # - agent.mode: Agent operating mode (autonomous, scheduled, reactive)
+        # - service.version: Agent runtime version for observability
+        #
         # @return [Hash] Resource attributes
         def build_resource_attributes
           attributes = {}
@@ -83,9 +88,26 @@ module LanguageOperator
           # Kubernetes pod name
           attributes['k8s.pod.name'] = ENV['HOSTNAME'] if ENV['HOSTNAME']
-          # Agent-specific attributes
-          attributes['agent.name'] = ENV['AGENT_NAME'] if ENV['AGENT_NAME']
-          attributes['agent.mode'] = ENV['AGENT_MODE'] if ENV['AGENT_MODE']
+          # Agent-specific attributes (CRITICAL for learning system)
+          if (agent_name = ENV.fetch('AGENT_NAME', nil))
+            attributes['agent.name'] = agent_name
+            # Also set as service.name for better trace organization
+            attributes['service.name'] = "language-operator-agent-#{agent_name}"
+          else
+            warn 'AGENT_NAME environment variable not set - learning status tracking may not work correctly'
+          end
+          if (agent_mode = ENV.fetch('AGENT_MODE', nil))
+            attributes['agent.mode'] = agent_mode
+          end
+          # Agent runtime version for observability
+          attributes['service.version'] = LanguageOperator::VERSION if defined?(LanguageOperator::VERSION)
+          # Agent cluster context
+          if (cluster_name = ENV.fetch('AGENT_CLUSTER', nil))
+            attributes['agent.cluster'] = cluster_name
+          end
           attributes
         end

data/lib/language_operator/agent/web_server.rb CHANGED Viewed

@@ -179,16 +179,13 @@ module LanguageOperator
         # Drain and cleanup all executors in the pool
         executors_cleaned = 0
-        begin
-          loop do
-            executor = @executor_pool.pop(timeout: 0.1)
-            if executor
-              executor.cleanup_connections
-              executors_cleaned += 1
-            end
+        until @executor_pool.empty?
+          executor = @executor_pool.pop unless @executor_pool.empty?
+          if executor
+            executor.cleanup_connections
+            executors_cleaned += 1
           end
-        rescue ThreadError
-          # Pool is empty, we're done
         end
         puts "Cleaned up #{executors_cleaned} executors from pool"

data/lib/language_operator/cli/commands/agent/base.rb CHANGED Viewed

@@ -189,7 +189,7 @@ module LanguageOperator
               mode = agent.dig('spec', 'executionMode') || 'autonomous'
               if mode == 'scheduled'
                 exec_data = get_execution_data(name, ctx)
                 exec_rows = {
                   'Total Runs' => exec_data[:total_runs],
                   'Last Run' => exec_data[:last_run] || 'Never'
@@ -740,7 +740,7 @@ module LanguageOperator
             begin
               # Get CronJob to find last execution time and next run
               cronjob = ctx.client.get_resource('CronJob', agent_name, ctx.namespace)
               # Get last successful execution time
               last_successful = cronjob.dig('status', 'lastSuccessfulTime')
               if last_successful
@@ -750,9 +750,7 @@ module LanguageOperator
               # Calculate next run time from schedule
               schedule = cronjob.dig('spec', 'schedule')
-              if schedule
-                execution_data[:next_run] = calculate_next_run(schedule)
-              end
+              execution_data[:next_run] = calculate_next_run(schedule) if schedule
             rescue K8s::Error::NotFound, StandardError
               # CronJob not found or parsing error, continue with job counting
             end
@@ -761,7 +759,7 @@ module LanguageOperator
             begin
               # Count total completed jobs for this agent
               jobs = ctx.client.list_resources('Job', namespace: ctx.namespace)
               agent_jobs = jobs.select do |job|
                 labels = job.dig('metadata', 'labels') || {}
                 labels['app.kubernetes.io/name'] == agent_name
@@ -772,7 +770,7 @@ module LanguageOperator
                 conditions = job.dig('status', 'conditions') || []
                 conditions.any? { |c| c['type'] == 'Complete' && c['status'] == 'True' }
               end
               execution_data[:total_runs] = successful_jobs.length
             rescue StandardError
               # If job listing fails, keep default count of 0
@@ -784,32 +782,32 @@ module LanguageOperator
           def calculate_next_run(schedule)
             # Simple next run calculation for common cron patterns
             # Handle the most common case: */N * * * * (every N minutes)
             parts = schedule.split
             return schedule unless parts.length == 5 # Not a valid cron expression
             minute, hour, day, month, weekday = parts
             current_time = Time.now
             # Handle every-N-minutes pattern: */10 * * * *
             if minute.start_with?('*/') && hour == '*' && day == '*' && month == '*' && weekday == '*'
               interval = minute[2..].to_i
               if interval > 0 && interval < 60
                 current_minute = current_time.min
-                current_second = current_time.sec
+                current_time.sec
                 # Find the next occurrence
                 next_minute_mark = ((current_minute / interval) + 1) * interval
                 if next_minute_mark < 60
                   # Same hour
-                  next_time = Time.new(current_time.year, current_time.month, current_time.day,
+                  next_time = Time.new(current_time.year, current_time.month, current_time.day,
                                        current_time.hour, next_minute_mark, 0)
                 else
                   # Next hour
                   next_hour = current_time.hour + 1
                   next_minute = next_minute_mark - 60
                   if next_hour < 24
                     next_time = Time.new(current_time.year, current_time.month, current_time.day,
                                          next_hour, next_minute, 0)
@@ -820,11 +818,11 @@ module LanguageOperator
                                          0, next_minute, 0)
                   end
                 end
                 return Formatters::ValueFormatter.time_until(next_time)
               end
             end
             # For other patterns, show the schedule (could add more patterns later)
             schedule
           rescue StandardError