language-operator 0.1.63 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.plan.md +127 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +2 -0
  5. data/Gemfile.lock +4 -1
  6. data/Makefile +34 -80
  7. data/components/agent/Gemfile +1 -1
  8. data/docs/cheat-sheet.md +173 -0
  9. data/lib/language_operator/agent/base.rb +10 -1
  10. data/lib/language_operator/agent/event_config.rb +172 -0
  11. data/lib/language_operator/agent/safety/ast_validator.rb +1 -1
  12. data/lib/language_operator/agent/safety/safe_executor.rb +5 -1
  13. data/lib/language_operator/agent/task_executor.rb +87 -7
  14. data/lib/language_operator/agent/telemetry.rb +25 -3
  15. data/lib/language_operator/agent/web_server.rb +6 -9
  16. data/lib/language_operator/cli/commands/agent/base.rb +15 -17
  17. data/lib/language_operator/cli/commands/agent/learning.rb +156 -37
  18. data/lib/language_operator/cli/commands/cluster.rb +2 -2
  19. data/lib/language_operator/cli/commands/status.rb +2 -2
  20. data/lib/language_operator/cli/commands/system/synthesize.rb +1 -1
  21. data/lib/language_operator/cli/formatters/value_formatter.rb +1 -1
  22. data/lib/language_operator/cli/helpers/ux_helper.rb +3 -4
  23. data/lib/language_operator/config.rb +3 -3
  24. data/lib/language_operator/constants/kubernetes_labels.rb +2 -2
  25. data/lib/language_operator/dsl/task_definition.rb +18 -7
  26. data/lib/language_operator/instrumentation/task_tracer.rb +44 -3
  27. data/lib/language_operator/kubernetes/client.rb +111 -0
  28. data/lib/language_operator/templates/schema/CHANGELOG.md +28 -0
  29. data/lib/language_operator/templates/schema/agent_dsl_openapi.yaml +1 -1
  30. data/lib/language_operator/templates/schema/agent_dsl_schema.json +1 -1
  31. data/lib/language_operator/type_coercion.rb +22 -8
  32. data/lib/language_operator/version.rb +1 -1
  33. data/synth/002/agent.rb +23 -12
  34. data/synth/002/output.log +88 -15
  35. data/synth/003/Makefile +5 -2
  36. data/synth/004/Makefile +54 -0
  37. data/synth/004/README.md +281 -0
  38. data/synth/004/instructions.txt +1 -0
  39. metadata +8 -1
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../config'
4
+
5
+ module LanguageOperator
6
+ module Agent
7
+ # Event emission configuration for agent runtime
8
+ #
9
+ # Manages configuration for Kubernetes event emission including:
10
+ # - Event filtering and batching options
11
+ # - Error handling preferences
12
+ # - Performance tuning settings
13
+ #
14
+ # @example Load event configuration
15
+ # config = EventConfig.load
16
+ # puts "Events enabled: #{config[:enabled]}"
17
+ # puts "Max events per minute: #{config[:rate_limit]}"
18
+ module EventConfig
19
+ # Load event emission configuration from environment variables
20
+ #
21
+ # @return [Hash] Event configuration hash
22
+ def self.load
23
+ Config.from_env(
24
+ {
25
+ # Core event emission settings
26
+ enabled: 'ENABLE_K8S_EVENTS',
27
+ disabled: 'DISABLE_K8S_EVENTS',
28
+
29
+ # Event filtering
30
+ emit_success_events: 'EMIT_SUCCESS_EVENTS',
31
+ emit_failure_events: 'EMIT_FAILURE_EVENTS',
32
+ emit_validation_events: 'EMIT_VALIDATION_EVENTS',
33
+
34
+ # Performance and rate limiting
35
+ rate_limit_per_minute: 'EVENT_RATE_LIMIT_PER_MINUTE',
36
+ batch_size: 'EVENT_BATCH_SIZE',
37
+ batch_timeout_ms: 'EVENT_BATCH_TIMEOUT_MS',
38
+
39
+ # Error handling
40
+ retry_failed_events: 'RETRY_FAILED_EVENTS',
41
+ max_event_retries: 'MAX_EVENT_RETRIES',
42
+ retry_delay_ms: 'EVENT_RETRY_DELAY_MS',
43
+
44
+ # Event content control
45
+ include_task_metadata: 'INCLUDE_TASK_METADATA',
46
+ include_error_details: 'INCLUDE_ERROR_DETAILS',
47
+ truncate_long_messages: 'TRUNCATE_LONG_MESSAGES',
48
+ max_message_length: 'MAX_EVENT_MESSAGE_LENGTH'
49
+ },
50
+ defaults: {
51
+ enabled: 'true',
52
+ disabled: 'false',
53
+ emit_success_events: 'true',
54
+ emit_failure_events: 'true',
55
+ emit_validation_events: 'true',
56
+ rate_limit_per_minute: '60',
57
+ batch_size: '1',
58
+ batch_timeout_ms: '1000',
59
+ retry_failed_events: 'true',
60
+ max_event_retries: '3',
61
+ retry_delay_ms: '1000',
62
+ include_task_metadata: 'true',
63
+ include_error_details: 'true',
64
+ truncate_long_messages: 'true',
65
+ max_message_length: '1000'
66
+ },
67
+ types: {
68
+ enabled: :boolean,
69
+ disabled: :boolean,
70
+ emit_success_events: :boolean,
71
+ emit_failure_events: :boolean,
72
+ emit_validation_events: :boolean,
73
+ rate_limit_per_minute: :integer,
74
+ batch_size: :integer,
75
+ batch_timeout_ms: :integer,
76
+ retry_failed_events: :boolean,
77
+ max_event_retries: :integer,
78
+ retry_delay_ms: :integer,
79
+ include_task_metadata: :boolean,
80
+ include_error_details: :boolean,
81
+ truncate_long_messages: :boolean,
82
+ max_message_length: :integer
83
+ }
84
+ )
85
+ end
86
+
87
+ # Check if event emission is enabled overall
88
+ #
89
+ # Events are enabled if:
90
+ # - Running in Kubernetes (KUBERNETES_SERVICE_HOST set)
91
+ # - Not explicitly disabled (DISABLE_K8S_EVENTS != 'true')
92
+ # - Explicitly enabled (ENABLE_K8S_EVENTS != 'false')
93
+ #
94
+ # @param config [Hash] Configuration hash from load
95
+ # @return [Boolean] True if events should be emitted
96
+ def self.enabled?(config = nil)
97
+ config ||= load
98
+
99
+ # Must be in Kubernetes environment
100
+ return false unless ENV.fetch('KUBERNETES_SERVICE_HOST', nil)
101
+
102
+ # Respect explicit disable flag (legacy)
103
+ return false if config[:disabled]
104
+
105
+ # Check enable flag
106
+ config[:enabled]
107
+ end
108
+
109
+ # Check if specific event type should be emitted
110
+ #
111
+ # @param event_type [Symbol] Event type (:success, :failure, :validation)
112
+ # @param config [Hash] Configuration hash from load
113
+ # @return [Boolean] True if this event type should be emitted
114
+ def self.should_emit?(event_type, config = nil)
115
+ return false unless enabled?(config)
116
+
117
+ config ||= load
118
+
119
+ case event_type
120
+ when :success
121
+ config[:emit_success_events]
122
+ when :failure
123
+ config[:emit_failure_events]
124
+ when :validation
125
+ config[:emit_validation_events]
126
+ else
127
+ false
128
+ end
129
+ end
130
+
131
+ # Get rate limiting configuration
132
+ #
133
+ # @param config [Hash] Configuration hash from load
134
+ # @return [Hash] Rate limiting settings
135
+ def self.rate_limit_config(config = nil)
136
+ config ||= load
137
+ {
138
+ per_minute: config[:rate_limit_per_minute],
139
+ batch_size: config[:batch_size],
140
+ batch_timeout_ms: config[:batch_timeout_ms]
141
+ }
142
+ end
143
+
144
+ # Get retry configuration for failed events
145
+ #
146
+ # @param config [Hash] Configuration hash from load
147
+ # @return [Hash] Retry settings
148
+ def self.retry_config(config = nil)
149
+ config ||= load
150
+ {
151
+ enabled: config[:retry_failed_events],
152
+ max_retries: config[:max_event_retries],
153
+ delay_ms: config[:retry_delay_ms]
154
+ }
155
+ end
156
+
157
+ # Get content configuration for event messages
158
+ #
159
+ # @param config [Hash] Configuration hash from load
160
+ # @return [Hash] Content settings
161
+ def self.content_config(config = nil)
162
+ config ||= load
163
+ {
164
+ include_task_metadata: config[:include_task_metadata],
165
+ include_error_details: config[:include_error_details],
166
+ truncate_long_messages: config[:truncate_long_messages],
167
+ max_message_length: config[:max_message_length]
168
+ }
169
+ end
170
+ end
171
+ end
172
+ end
@@ -25,7 +25,7 @@ module LanguageOperator
25
25
  const_set const_get remove_const
26
26
  define_method define_singleton_method
27
27
  undef_method remove_method alias_method
28
- exit exit! abort raise fail throw
28
+ exit exit! abort throw
29
29
  trap at_exit
30
30
  open
31
31
  ].freeze
@@ -36,7 +36,8 @@ module LanguageOperator
36
36
 
37
37
  # Step 3: Execute using instance_eval with smart constant injection
38
38
  # Only inject constants that won't conflict with user-defined ones
39
- safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date]
39
+ safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date
40
+ ArgumentError TypeError RuntimeError StandardError]
40
41
 
41
42
  # Find which constants user code defines to avoid redefinition warnings
42
43
  user_defined_constants = safe_constants.select { |const| code.include?("#{const} =") }
@@ -129,6 +130,9 @@ module LanguageOperator
129
130
  when :TrueClass, :FalseClass, :NilClass
130
131
  # Allow boolean and nil types
131
132
  ::Object.const_get(name)
133
+ when :ArgumentError, :TypeError, :RuntimeError, :StandardError
134
+ # Allow standard Ruby exception classes for error handling
135
+ ::Object.const_get(name)
132
136
  else
133
137
  # Security-by-default: explicitly deny access to any other constants
134
138
  # This prevents sandbox bypass through const_missing fallback
@@ -106,15 +106,11 @@ module LanguageOperator
106
106
  def execute_task(task_name, inputs: {}, timeout: nil, max_retries: nil)
107
107
  execution_start = Time.now
108
108
  max_retries ||= @config[:max_retries]
109
-
109
+
110
110
  # Reset JSON parsing retry flag for this task
111
111
  @parsing_retry_attempted = false
112
112
 
113
- with_span('task_executor.execute_task', attributes: {
114
- 'task.name' => task_name.to_s,
115
- 'task.inputs' => inputs.keys.map(&:to_s).join(','),
116
- 'task.max_retries' => max_retries
117
- }) do
113
+ with_span('task_executor.execute_task', attributes: build_task_execution_attributes(task_name, inputs, max_retries)) do
118
114
  # Fast task lookup using pre-built cache
119
115
  task_name_sym = task_name.to_sym
120
116
  task_info = @task_cache[task_name_sym]
@@ -140,15 +136,27 @@ module LanguageOperator
140
136
  OpenTelemetry::Trace.current_span&.set_attribute('task.timeout', timeout)
141
137
 
142
138
  # Execute with retry logic
143
- execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
139
+ result = execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
140
+
141
+ # Emit Kubernetes event for successful task completion
142
+ emit_task_execution_event(task_name, success: true, execution_start: execution_start)
143
+
144
+ result
144
145
  end
145
146
  rescue ArgumentError => e
146
147
  # Validation errors should not be retried - re-raise immediately
147
148
  log_task_error(task_name, e, :validation, execution_start)
149
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
148
150
  raise TaskValidationError.new(task_name, e.message, e)
151
+ rescue TaskValidationError => e
152
+ # TaskValidationError from validate_inputs should be logged as :validation
153
+ log_task_error(task_name, e, :validation, execution_start)
154
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
155
+ raise e
149
156
  rescue StandardError => e
150
157
  # Catch any unexpected errors that escaped retry logic
151
158
  log_task_error(task_name, e, :system, execution_start)
159
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e)
152
160
  raise create_appropriate_error(task_name, e)
153
161
  end
154
162
 
@@ -371,6 +379,39 @@ module LanguageOperator
371
379
  'Agent::TaskExecutor'
372
380
  end
373
381
 
382
+ # Emit Kubernetes event for task execution
383
+ #
384
+ # @param task_name [Symbol, String] Task name
385
+ # @param success [Boolean] Whether task succeeded
386
+ # @param execution_start [Time] Task execution start time
387
+ # @param error [Exception, nil] Error if task failed
388
+ # @param event_type [Symbol, nil] Event type override (:success, :failure, :validation)
389
+ def emit_task_execution_event(task_name, success:, execution_start:, error: nil, event_type: nil)
390
+ return unless @agent.respond_to?(:kubernetes_client)
391
+
392
+ duration_ms = ((Time.now - execution_start) * 1000).round(2)
393
+
394
+ metadata = {
395
+ 'task_type' => determine_task_type(@tasks[task_name.to_sym])
396
+ }
397
+
398
+ if error
399
+ metadata['error_type'] = error.class.name
400
+ metadata['error_category'] = categorize_error(error).to_s
401
+ end
402
+
403
+ @agent.kubernetes_client.emit_execution_event(
404
+ task_name.to_s,
405
+ success: success,
406
+ duration_ms: duration_ms,
407
+ metadata: metadata
408
+ )
409
+ rescue StandardError => e
410
+ logger.warn('Failed to emit task execution event',
411
+ task: task_name,
412
+ error: e.message)
413
+ end
414
+
374
415
  # Summarize hash values for logging (truncate long strings)
375
416
  # Optimized for performance with lazy computation
376
417
  #
@@ -620,6 +661,8 @@ module LanguageOperator
620
661
  # @param task [TaskDefinition] The task definition
621
662
  # @return [String] Task type
622
663
  def determine_task_type(task)
664
+ return nil unless task
665
+
623
666
  if task.neural? && task.symbolic?
624
667
  'hybrid'
625
668
  elsif task.neural?
@@ -964,6 +1007,43 @@ module LanguageOperator
964
1007
  end
965
1008
  cache
966
1009
  end
1010
+
1011
+ # Build semantic attributes for task execution span
1012
+ #
1013
+ # Includes attributes required for learning status tracking:
1014
+ # - task.name: Task identifier for learning controller
1015
+ # - agent.name: Agent identifier (explicit for learning system)
1016
+ # - gen_ai.operation.name: Semantic operation name
1017
+ #
1018
+ # @param task_name [Symbol] Name of the task being executed
1019
+ # @param inputs [Hash] Task input parameters
1020
+ # @param max_retries [Integer] Maximum retry attempts
1021
+ # @return [Hash] Span attributes
1022
+ def build_task_execution_attributes(task_name, inputs, max_retries)
1023
+ attributes = {
1024
+ # Core task identification (CRITICAL for learning system)
1025
+ 'task.name' => task_name.to_s,
1026
+ 'task.inputs' => inputs.keys.map(&:to_s).join(','),
1027
+ 'task.max_retries' => max_retries,
1028
+
1029
+ # Semantic operation name for better trace organization
1030
+ 'gen_ai.operation.name' => 'execute_task'
1031
+ }
1032
+
1033
+ # Explicitly add agent name if available (redundant with resource attribute but ensures visibility)
1034
+ if (agent_name = ENV.fetch('AGENT_NAME', nil))
1035
+ attributes['agent.name'] = agent_name
1036
+ end
1037
+
1038
+ # Add task type information if available
1039
+ if (task_info = @task_cache[task_name.to_sym])
1040
+ attributes['task.type'] = task_info[:type]
1041
+ attributes['task.has_neural'] = task_info[:neural].to_s
1042
+ attributes['task.has_symbolic'] = task_info[:symbolic].to_s
1043
+ end
1044
+
1045
+ attributes
1046
+ end
967
1047
  end
968
1048
  end
969
1049
  end
@@ -70,6 +70,11 @@ module LanguageOperator
70
70
 
71
71
  # Build resource attributes from environment variables
72
72
  #
73
+ # Includes semantic attributes required for learning status tracking:
74
+ # - agent.name: Required for learning controller to identify agent executions
75
+ # - agent.mode: Agent operating mode (autonomous, scheduled, reactive)
76
+ # - service.version: Agent runtime version for observability
77
+ #
73
78
  # @return [Hash] Resource attributes
74
79
  def build_resource_attributes
75
80
  attributes = {}
@@ -83,9 +88,26 @@ module LanguageOperator
83
88
  # Kubernetes pod name
84
89
  attributes['k8s.pod.name'] = ENV['HOSTNAME'] if ENV['HOSTNAME']
85
90
 
86
- # Agent-specific attributes
87
- attributes['agent.name'] = ENV['AGENT_NAME'] if ENV['AGENT_NAME']
88
- attributes['agent.mode'] = ENV['AGENT_MODE'] if ENV['AGENT_MODE']
91
+ # Agent-specific attributes (CRITICAL for learning system)
92
+ if (agent_name = ENV.fetch('AGENT_NAME', nil))
93
+ attributes['agent.name'] = agent_name
94
+ # Also set as service.name for better trace organization
95
+ attributes['service.name'] = "language-operator-agent-#{agent_name}"
96
+ else
97
+ warn 'AGENT_NAME environment variable not set - learning status tracking may not work correctly'
98
+ end
99
+
100
+ if (agent_mode = ENV.fetch('AGENT_MODE', nil))
101
+ attributes['agent.mode'] = agent_mode
102
+ end
103
+
104
+ # Agent runtime version for observability
105
+ attributes['service.version'] = LanguageOperator::VERSION if defined?(LanguageOperator::VERSION)
106
+
107
+ # Agent cluster context
108
+ if (cluster_name = ENV.fetch('AGENT_CLUSTER', nil))
109
+ attributes['agent.cluster'] = cluster_name
110
+ end
89
111
 
90
112
  attributes
91
113
  end
@@ -179,16 +179,13 @@ module LanguageOperator
179
179
 
180
180
  # Drain and cleanup all executors in the pool
181
181
  executors_cleaned = 0
182
- begin
183
- loop do
184
- executor = @executor_pool.pop(timeout: 0.1)
185
- if executor
186
- executor.cleanup_connections
187
- executors_cleaned += 1
188
- end
182
+
183
+ until @executor_pool.empty?
184
+ executor = @executor_pool.pop unless @executor_pool.empty?
185
+ if executor
186
+ executor.cleanup_connections
187
+ executors_cleaned += 1
189
188
  end
190
- rescue ThreadError
191
- # Pool is empty, we're done
192
189
  end
193
190
 
194
191
  puts "Cleaned up #{executors_cleaned} executors from pool"
@@ -189,7 +189,7 @@ module LanguageOperator
189
189
  mode = agent.dig('spec', 'executionMode') || 'autonomous'
190
190
  if mode == 'scheduled'
191
191
  exec_data = get_execution_data(name, ctx)
192
-
192
+
193
193
  exec_rows = {
194
194
  'Total Runs' => exec_data[:total_runs],
195
195
  'Last Run' => exec_data[:last_run] || 'Never'
@@ -740,7 +740,7 @@ module LanguageOperator
740
740
  begin
741
741
  # Get CronJob to find last execution time and next run
742
742
  cronjob = ctx.client.get_resource('CronJob', agent_name, ctx.namespace)
743
-
743
+
744
744
  # Get last successful execution time
745
745
  last_successful = cronjob.dig('status', 'lastSuccessfulTime')
746
746
  if last_successful
@@ -750,9 +750,7 @@ module LanguageOperator
750
750
 
751
751
  # Calculate next run time from schedule
752
752
  schedule = cronjob.dig('spec', 'schedule')
753
- if schedule
754
- execution_data[:next_run] = calculate_next_run(schedule)
755
- end
753
+ execution_data[:next_run] = calculate_next_run(schedule) if schedule
756
754
  rescue K8s::Error::NotFound, StandardError
757
755
  # CronJob not found or parsing error, continue with job counting
758
756
  end
@@ -761,7 +759,7 @@ module LanguageOperator
761
759
  begin
762
760
  # Count total completed jobs for this agent
763
761
  jobs = ctx.client.list_resources('Job', namespace: ctx.namespace)
764
-
762
+
765
763
  agent_jobs = jobs.select do |job|
766
764
  labels = job.dig('metadata', 'labels') || {}
767
765
  labels['app.kubernetes.io/name'] == agent_name
@@ -772,7 +770,7 @@ module LanguageOperator
772
770
  conditions = job.dig('status', 'conditions') || []
773
771
  conditions.any? { |c| c['type'] == 'Complete' && c['status'] == 'True' }
774
772
  end
775
-
773
+
776
774
  execution_data[:total_runs] = successful_jobs.length
777
775
  rescue StandardError
778
776
  # If job listing fails, keep default count of 0
@@ -784,32 +782,32 @@ module LanguageOperator
784
782
  def calculate_next_run(schedule)
785
783
  # Simple next run calculation for common cron patterns
786
784
  # Handle the most common case: */N * * * * (every N minutes)
787
-
785
+
788
786
  parts = schedule.split
789
787
  return schedule unless parts.length == 5 # Not a valid cron expression
790
-
788
+
791
789
  minute, hour, day, month, weekday = parts
792
790
  current_time = Time.now
793
-
791
+
794
792
  # Handle every-N-minutes pattern: */10 * * * *
795
793
  if minute.start_with?('*/') && hour == '*' && day == '*' && month == '*' && weekday == '*'
796
794
  interval = minute[2..].to_i
797
795
  if interval > 0 && interval < 60
798
796
  current_minute = current_time.min
799
- current_second = current_time.sec
800
-
797
+ current_time.sec
798
+
801
799
  # Find the next occurrence
802
800
  next_minute_mark = ((current_minute / interval) + 1) * interval
803
-
801
+
804
802
  if next_minute_mark < 60
805
803
  # Same hour
806
- next_time = Time.new(current_time.year, current_time.month, current_time.day,
804
+ next_time = Time.new(current_time.year, current_time.month, current_time.day,
807
805
  current_time.hour, next_minute_mark, 0)
808
806
  else
809
807
  # Next hour
810
808
  next_hour = current_time.hour + 1
811
809
  next_minute = next_minute_mark - 60
812
-
810
+
813
811
  if next_hour < 24
814
812
  next_time = Time.new(current_time.year, current_time.month, current_time.day,
815
813
  next_hour, next_minute, 0)
@@ -820,11 +818,11 @@ module LanguageOperator
820
818
  0, next_minute, 0)
821
819
  end
822
820
  end
823
-
821
+
824
822
  return Formatters::ValueFormatter.time_until(next_time)
825
823
  end
826
824
  end
827
-
825
+
828
826
  # For other patterns, show the schedule (could add more patterns later)
829
827
  schedule
830
828
  rescue StandardError