language-operator 0.1.62 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.plan.md +127 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +2 -0
  5. data/Gemfile.lock +4 -1
  6. data/Makefile +34 -80
  7. data/components/agent/Gemfile +1 -1
  8. data/docs/cheat-sheet.md +173 -0
  9. data/lib/language_operator/agent/base.rb +10 -1
  10. data/lib/language_operator/agent/event_config.rb +172 -0
  11. data/lib/language_operator/agent/safety/ast_validator.rb +1 -1
  12. data/lib/language_operator/agent/safety/safe_executor.rb +5 -1
  13. data/lib/language_operator/agent/task_executor.rb +90 -12
  14. data/lib/language_operator/agent/telemetry.rb +25 -3
  15. data/lib/language_operator/agent/web_server.rb +6 -9
  16. data/lib/language_operator/cli/commands/agent/base.rb +15 -17
  17. data/lib/language_operator/cli/commands/agent/learning.rb +156 -37
  18. data/lib/language_operator/cli/commands/cluster.rb +2 -2
  19. data/lib/language_operator/cli/commands/status.rb +2 -2
  20. data/lib/language_operator/cli/commands/system/synthesize.rb +1 -1
  21. data/lib/language_operator/cli/formatters/value_formatter.rb +1 -1
  22. data/lib/language_operator/cli/helpers/ux_helper.rb +3 -4
  23. data/lib/language_operator/config.rb +3 -3
  24. data/lib/language_operator/constants/kubernetes_labels.rb +2 -2
  25. data/lib/language_operator/dsl/task_definition.rb +18 -7
  26. data/lib/language_operator/instrumentation/task_tracer.rb +44 -3
  27. data/lib/language_operator/kubernetes/client.rb +111 -0
  28. data/lib/language_operator/templates/schema/CHANGELOG.md +28 -0
  29. data/lib/language_operator/templates/schema/agent_dsl_openapi.yaml +1 -1
  30. data/lib/language_operator/templates/schema/agent_dsl_schema.json +1 -1
  31. data/lib/language_operator/type_coercion.rb +22 -8
  32. data/lib/language_operator/version.rb +1 -1
  33. data/synth/002/agent.rb +23 -12
  34. data/synth/002/output.log +88 -15
  35. data/synth/003/Makefile +5 -2
  36. data/synth/004/Makefile +54 -0
  37. data/synth/004/README.md +281 -0
  38. data/synth/004/instructions.txt +1 -0
  39. metadata +8 -1
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../config'
4
+
5
+ module LanguageOperator
6
+ module Agent
7
+ # Event emission configuration for agent runtime
8
+ #
9
+ # Manages configuration for Kubernetes event emission including:
10
+ # - Event filtering and batching options
11
+ # - Error handling preferences
12
+ # - Performance tuning settings
13
+ #
14
+ # @example Load event configuration
15
+ # config = EventConfig.load
16
+ # puts "Events enabled: #{config[:enabled]}"
17
+ # puts "Max events per minute: #{config[:rate_limit]}"
18
+ module EventConfig
19
+ # Load event emission configuration from environment variables
20
+ #
21
+ # @return [Hash] Event configuration hash
22
+ def self.load
23
+ Config.from_env(
24
+ {
25
+ # Core event emission settings
26
+ enabled: 'ENABLE_K8S_EVENTS',
27
+ disabled: 'DISABLE_K8S_EVENTS',
28
+
29
+ # Event filtering
30
+ emit_success_events: 'EMIT_SUCCESS_EVENTS',
31
+ emit_failure_events: 'EMIT_FAILURE_EVENTS',
32
+ emit_validation_events: 'EMIT_VALIDATION_EVENTS',
33
+
34
+ # Performance and rate limiting
35
+ rate_limit_per_minute: 'EVENT_RATE_LIMIT_PER_MINUTE',
36
+ batch_size: 'EVENT_BATCH_SIZE',
37
+ batch_timeout_ms: 'EVENT_BATCH_TIMEOUT_MS',
38
+
39
+ # Error handling
40
+ retry_failed_events: 'RETRY_FAILED_EVENTS',
41
+ max_event_retries: 'MAX_EVENT_RETRIES',
42
+ retry_delay_ms: 'EVENT_RETRY_DELAY_MS',
43
+
44
+ # Event content control
45
+ include_task_metadata: 'INCLUDE_TASK_METADATA',
46
+ include_error_details: 'INCLUDE_ERROR_DETAILS',
47
+ truncate_long_messages: 'TRUNCATE_LONG_MESSAGES',
48
+ max_message_length: 'MAX_EVENT_MESSAGE_LENGTH'
49
+ },
50
+ defaults: {
51
+ enabled: 'true',
52
+ disabled: 'false',
53
+ emit_success_events: 'true',
54
+ emit_failure_events: 'true',
55
+ emit_validation_events: 'true',
56
+ rate_limit_per_minute: '60',
57
+ batch_size: '1',
58
+ batch_timeout_ms: '1000',
59
+ retry_failed_events: 'true',
60
+ max_event_retries: '3',
61
+ retry_delay_ms: '1000',
62
+ include_task_metadata: 'true',
63
+ include_error_details: 'true',
64
+ truncate_long_messages: 'true',
65
+ max_message_length: '1000'
66
+ },
67
+ types: {
68
+ enabled: :boolean,
69
+ disabled: :boolean,
70
+ emit_success_events: :boolean,
71
+ emit_failure_events: :boolean,
72
+ emit_validation_events: :boolean,
73
+ rate_limit_per_minute: :integer,
74
+ batch_size: :integer,
75
+ batch_timeout_ms: :integer,
76
+ retry_failed_events: :boolean,
77
+ max_event_retries: :integer,
78
+ retry_delay_ms: :integer,
79
+ include_task_metadata: :boolean,
80
+ include_error_details: :boolean,
81
+ truncate_long_messages: :boolean,
82
+ max_message_length: :integer
83
+ }
84
+ )
85
+ end
86
+
87
+ # Check if event emission is enabled overall
88
+ #
89
+ # Events are enabled if:
90
+ # - Running in Kubernetes (KUBERNETES_SERVICE_HOST set)
91
+ # - Not explicitly disabled (DISABLE_K8S_EVENTS != 'true')
92
+ # - Explicitly enabled (ENABLE_K8S_EVENTS != 'false')
93
+ #
94
+ # @param config [Hash] Configuration hash from load
95
+ # @return [Boolean] True if events should be emitted
96
+ def self.enabled?(config = nil)
97
+ config ||= load
98
+
99
+ # Must be in Kubernetes environment
100
+ return false unless ENV.fetch('KUBERNETES_SERVICE_HOST', nil)
101
+
102
+ # Respect explicit disable flag (legacy)
103
+ return false if config[:disabled]
104
+
105
+ # Check enable flag
106
+ config[:enabled]
107
+ end
108
+
109
+ # Check if specific event type should be emitted
110
+ #
111
+ # @param event_type [Symbol] Event type (:success, :failure, :validation)
112
+ # @param config [Hash] Configuration hash from load
113
+ # @return [Boolean] True if this event type should be emitted
114
+ def self.should_emit?(event_type, config = nil)
115
+ return false unless enabled?(config)
116
+
117
+ config ||= load
118
+
119
+ case event_type
120
+ when :success
121
+ config[:emit_success_events]
122
+ when :failure
123
+ config[:emit_failure_events]
124
+ when :validation
125
+ config[:emit_validation_events]
126
+ else
127
+ false
128
+ end
129
+ end
130
+
131
+ # Get rate limiting configuration
132
+ #
133
+ # @param config [Hash] Configuration hash from load
134
+ # @return [Hash] Rate limiting settings
135
+ def self.rate_limit_config(config = nil)
136
+ config ||= load
137
+ {
138
+ per_minute: config[:rate_limit_per_minute],
139
+ batch_size: config[:batch_size],
140
+ batch_timeout_ms: config[:batch_timeout_ms]
141
+ }
142
+ end
143
+
144
+ # Get retry configuration for failed events
145
+ #
146
+ # @param config [Hash] Configuration hash from load
147
+ # @return [Hash] Retry settings
148
+ def self.retry_config(config = nil)
149
+ config ||= load
150
+ {
151
+ enabled: config[:retry_failed_events],
152
+ max_retries: config[:max_event_retries],
153
+ delay_ms: config[:retry_delay_ms]
154
+ }
155
+ end
156
+
157
+ # Get content configuration for event messages
158
+ #
159
+ # @param config [Hash] Configuration hash from load
160
+ # @return [Hash] Content settings
161
+ def self.content_config(config = nil)
162
+ config ||= load
163
+ {
164
+ include_task_metadata: config[:include_task_metadata],
165
+ include_error_details: config[:include_error_details],
166
+ truncate_long_messages: config[:truncate_long_messages],
167
+ max_message_length: config[:max_message_length]
168
+ }
169
+ end
170
+ end
171
+ end
172
+ end
@@ -25,7 +25,7 @@ module LanguageOperator
25
25
  const_set const_get remove_const
26
26
  define_method define_singleton_method
27
27
  undef_method remove_method alias_method
28
- exit exit! abort raise fail throw
28
+ exit exit! abort throw
29
29
  trap at_exit
30
30
  open
31
31
  ].freeze
@@ -36,7 +36,8 @@ module LanguageOperator
36
36
 
37
37
  # Step 3: Execute using instance_eval with smart constant injection
38
38
  # Only inject constants that won't conflict with user-defined ones
39
- safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date]
39
+ safe_constants = %w[Numeric Integer Float String Array Hash TrueClass FalseClass Time Date
40
+ ArgumentError TypeError RuntimeError StandardError]
40
41
 
41
42
  # Find which constants user code defines to avoid redefinition warnings
42
43
  user_defined_constants = safe_constants.select { |const| code.include?("#{const} =") }
@@ -129,6 +130,9 @@ module LanguageOperator
129
130
  when :TrueClass, :FalseClass, :NilClass
130
131
  # Allow boolean and nil types
131
132
  ::Object.const_get(name)
133
+ when :ArgumentError, :TypeError, :RuntimeError, :StandardError
134
+ # Allow standard Ruby exception classes for error handling
135
+ ::Object.const_get(name)
132
136
  else
133
137
  # Security-by-default: explicitly deny access to any other constants
134
138
  # This prevents sandbox bypass through const_missing fallback
@@ -107,11 +107,10 @@ module LanguageOperator
107
107
  execution_start = Time.now
108
108
  max_retries ||= @config[:max_retries]
109
109
 
110
- with_span('task_executor.execute_task', attributes: {
111
- 'task.name' => task_name.to_s,
112
- 'task.inputs' => inputs.keys.map(&:to_s).join(','),
113
- 'task.max_retries' => max_retries
114
- }) do
110
+ # Reset JSON parsing retry flag for this task
111
+ @parsing_retry_attempted = false
112
+
113
+ with_span('task_executor.execute_task', attributes: build_task_execution_attributes(task_name, inputs, max_retries)) do
115
114
  # Fast task lookup using pre-built cache
116
115
  task_name_sym = task_name.to_sym
117
116
  task_info = @task_cache[task_name_sym]
@@ -137,15 +136,27 @@ module LanguageOperator
137
136
  OpenTelemetry::Trace.current_span&.set_attribute('task.timeout', timeout)
138
137
 
139
138
  # Execute with retry logic
140
- execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
139
+ result = execute_with_retry(task, task_name, inputs, timeout, max_retries, execution_start)
140
+
141
+ # Emit Kubernetes event for successful task completion
142
+ emit_task_execution_event(task_name, success: true, execution_start: execution_start)
143
+
144
+ result
141
145
  end
142
146
  rescue ArgumentError => e
143
147
  # Validation errors should not be retried - re-raise immediately
144
148
  log_task_error(task_name, e, :validation, execution_start)
149
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
145
150
  raise TaskValidationError.new(task_name, e.message, e)
151
+ rescue TaskValidationError => e
152
+ # TaskValidationError from validate_inputs should be logged as :validation
153
+ log_task_error(task_name, e, :validation, execution_start)
154
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e, event_type: :validation)
155
+ raise e
146
156
  rescue StandardError => e
147
157
  # Catch any unexpected errors that escaped retry logic
148
158
  log_task_error(task_name, e, :system, execution_start)
159
+ emit_task_execution_event(task_name, success: false, execution_start: execution_start, error: e)
149
160
  raise create_appropriate_error(task_name, e)
150
161
  end
151
162
 
@@ -211,7 +222,7 @@ module LanguageOperator
211
222
  parse_neural_response(response_text, task)
212
223
  rescue RuntimeError => e
213
224
  # If parsing fails and this is a JSON parsing error, try one more time with clarified prompt
214
- raise e unless e.message.include?('returned invalid JSON') && !defined?(@parsing_retry_attempted)
225
+ raise e unless e.message.include?('returned invalid JSON') && !@parsing_retry_attempted
215
226
 
216
227
  @parsing_retry_attempted = true
217
228
 
@@ -237,11 +248,6 @@ module LanguageOperator
237
248
 
238
249
  # Try parsing the retry response
239
250
  parse_neural_response(retry_response_text, task)
240
-
241
- # Re-raise original error if not a JSON parsing error or already retried
242
- ensure
243
- # Reset retry flag for next task
244
- @parsing_retry_attempted = nil
245
251
  end
246
252
  end
247
253
 
@@ -373,6 +379,39 @@ module LanguageOperator
373
379
  'Agent::TaskExecutor'
374
380
  end
375
381
 
382
+ # Emit Kubernetes event for task execution
383
+ #
384
+ # @param task_name [Symbol, String] Task name
385
+ # @param success [Boolean] Whether task succeeded
386
+ # @param execution_start [Time] Task execution start time
387
+ # @param error [Exception, nil] Error if task failed
388
+ # @param event_type [Symbol, nil] Event type override (:success, :failure, :validation)
389
+ def emit_task_execution_event(task_name, success:, execution_start:, error: nil, event_type: nil)
390
+ return unless @agent.respond_to?(:kubernetes_client)
391
+
392
+ duration_ms = ((Time.now - execution_start) * 1000).round(2)
393
+
394
+ metadata = {
395
+ 'task_type' => determine_task_type(@tasks[task_name.to_sym])
396
+ }
397
+
398
+ if error
399
+ metadata['error_type'] = error.class.name
400
+ metadata['error_category'] = categorize_error(error).to_s
401
+ end
402
+
403
+ @agent.kubernetes_client.emit_execution_event(
404
+ task_name.to_s,
405
+ success: success,
406
+ duration_ms: duration_ms,
407
+ metadata: metadata
408
+ )
409
+ rescue StandardError => e
410
+ logger.warn('Failed to emit task execution event',
411
+ task: task_name,
412
+ error: e.message)
413
+ end
414
+
376
415
  # Summarize hash values for logging (truncate long strings)
377
416
  # Optimized for performance with lazy computation
378
417
  #
@@ -622,6 +661,8 @@ module LanguageOperator
622
661
  # @param task [TaskDefinition] The task definition
623
662
  # @return [String] Task type
624
663
  def determine_task_type(task)
664
+ return nil unless task
665
+
625
666
  if task.neural? && task.symbolic?
626
667
  'hybrid'
627
668
  elsif task.neural?
@@ -966,6 +1007,43 @@ module LanguageOperator
966
1007
  end
967
1008
  cache
968
1009
  end
1010
+
1011
+ # Build semantic attributes for task execution span
1012
+ #
1013
+ # Includes attributes required for learning status tracking:
1014
+ # - task.name: Task identifier for learning controller
1015
+ # - agent.name: Agent identifier (explicit for learning system)
1016
+ # - gen_ai.operation.name: Semantic operation name
1017
+ #
1018
+ # @param task_name [Symbol] Name of the task being executed
1019
+ # @param inputs [Hash] Task input parameters
1020
+ # @param max_retries [Integer] Maximum retry attempts
1021
+ # @return [Hash] Span attributes
1022
+ def build_task_execution_attributes(task_name, inputs, max_retries)
1023
+ attributes = {
1024
+ # Core task identification (CRITICAL for learning system)
1025
+ 'task.name' => task_name.to_s,
1026
+ 'task.inputs' => inputs.keys.map(&:to_s).join(','),
1027
+ 'task.max_retries' => max_retries,
1028
+
1029
+ # Semantic operation name for better trace organization
1030
+ 'gen_ai.operation.name' => 'execute_task'
1031
+ }
1032
+
1033
+ # Explicitly add agent name if available (redundant with resource attribute but ensures visibility)
1034
+ if (agent_name = ENV.fetch('AGENT_NAME', nil))
1035
+ attributes['agent.name'] = agent_name
1036
+ end
1037
+
1038
+ # Add task type information if available
1039
+ if (task_info = @task_cache[task_name.to_sym])
1040
+ attributes['task.type'] = task_info[:type]
1041
+ attributes['task.has_neural'] = task_info[:neural].to_s
1042
+ attributes['task.has_symbolic'] = task_info[:symbolic].to_s
1043
+ end
1044
+
1045
+ attributes
1046
+ end
969
1047
  end
970
1048
  end
971
1049
  end
@@ -70,6 +70,11 @@ module LanguageOperator
70
70
 
71
71
  # Build resource attributes from environment variables
72
72
  #
73
+ # Includes semantic attributes required for learning status tracking:
74
+ # - agent.name: Required for learning controller to identify agent executions
75
+ # - agent.mode: Agent operating mode (autonomous, scheduled, reactive)
76
+ # - service.version: Agent runtime version for observability
77
+ #
73
78
  # @return [Hash] Resource attributes
74
79
  def build_resource_attributes
75
80
  attributes = {}
@@ -83,9 +88,26 @@ module LanguageOperator
83
88
  # Kubernetes pod name
84
89
  attributes['k8s.pod.name'] = ENV['HOSTNAME'] if ENV['HOSTNAME']
85
90
 
86
- # Agent-specific attributes
87
- attributes['agent.name'] = ENV['AGENT_NAME'] if ENV['AGENT_NAME']
88
- attributes['agent.mode'] = ENV['AGENT_MODE'] if ENV['AGENT_MODE']
91
+ # Agent-specific attributes (CRITICAL for learning system)
92
+ if (agent_name = ENV.fetch('AGENT_NAME', nil))
93
+ attributes['agent.name'] = agent_name
94
+ # Also set as service.name for better trace organization
95
+ attributes['service.name'] = "language-operator-agent-#{agent_name}"
96
+ else
97
+ warn 'AGENT_NAME environment variable not set - learning status tracking may not work correctly'
98
+ end
99
+
100
+ if (agent_mode = ENV.fetch('AGENT_MODE', nil))
101
+ attributes['agent.mode'] = agent_mode
102
+ end
103
+
104
+ # Agent runtime version for observability
105
+ attributes['service.version'] = LanguageOperator::VERSION if defined?(LanguageOperator::VERSION)
106
+
107
+ # Agent cluster context
108
+ if (cluster_name = ENV.fetch('AGENT_CLUSTER', nil))
109
+ attributes['agent.cluster'] = cluster_name
110
+ end
89
111
 
90
112
  attributes
91
113
  end
@@ -179,16 +179,13 @@ module LanguageOperator
179
179
 
180
180
  # Drain and cleanup all executors in the pool
181
181
  executors_cleaned = 0
182
- begin
183
- loop do
184
- executor = @executor_pool.pop(timeout: 0.1)
185
- if executor
186
- executor.cleanup_connections
187
- executors_cleaned += 1
188
- end
182
+
183
+ until @executor_pool.empty?
184
+ executor = @executor_pool.pop unless @executor_pool.empty?
185
+ if executor
186
+ executor.cleanup_connections
187
+ executors_cleaned += 1
189
188
  end
190
- rescue ThreadError
191
- # Pool is empty, we're done
192
189
  end
193
190
 
194
191
  puts "Cleaned up #{executors_cleaned} executors from pool"
@@ -189,7 +189,7 @@ module LanguageOperator
189
189
  mode = agent.dig('spec', 'executionMode') || 'autonomous'
190
190
  if mode == 'scheduled'
191
191
  exec_data = get_execution_data(name, ctx)
192
-
192
+
193
193
  exec_rows = {
194
194
  'Total Runs' => exec_data[:total_runs],
195
195
  'Last Run' => exec_data[:last_run] || 'Never'
@@ -740,7 +740,7 @@ module LanguageOperator
740
740
  begin
741
741
  # Get CronJob to find last execution time and next run
742
742
  cronjob = ctx.client.get_resource('CronJob', agent_name, ctx.namespace)
743
-
743
+
744
744
  # Get last successful execution time
745
745
  last_successful = cronjob.dig('status', 'lastSuccessfulTime')
746
746
  if last_successful
@@ -750,9 +750,7 @@ module LanguageOperator
750
750
 
751
751
  # Calculate next run time from schedule
752
752
  schedule = cronjob.dig('spec', 'schedule')
753
- if schedule
754
- execution_data[:next_run] = calculate_next_run(schedule)
755
- end
753
+ execution_data[:next_run] = calculate_next_run(schedule) if schedule
756
754
  rescue K8s::Error::NotFound, StandardError
757
755
  # CronJob not found or parsing error, continue with job counting
758
756
  end
@@ -761,7 +759,7 @@ module LanguageOperator
761
759
  begin
762
760
  # Count total completed jobs for this agent
763
761
  jobs = ctx.client.list_resources('Job', namespace: ctx.namespace)
764
-
762
+
765
763
  agent_jobs = jobs.select do |job|
766
764
  labels = job.dig('metadata', 'labels') || {}
767
765
  labels['app.kubernetes.io/name'] == agent_name
@@ -772,7 +770,7 @@ module LanguageOperator
772
770
  conditions = job.dig('status', 'conditions') || []
773
771
  conditions.any? { |c| c['type'] == 'Complete' && c['status'] == 'True' }
774
772
  end
775
-
773
+
776
774
  execution_data[:total_runs] = successful_jobs.length
777
775
  rescue StandardError
778
776
  # If job listing fails, keep default count of 0
@@ -784,32 +782,32 @@ module LanguageOperator
784
782
  def calculate_next_run(schedule)
785
783
  # Simple next run calculation for common cron patterns
786
784
  # Handle the most common case: */N * * * * (every N minutes)
787
-
785
+
788
786
  parts = schedule.split
789
787
  return schedule unless parts.length == 5 # Not a valid cron expression
790
-
788
+
791
789
  minute, hour, day, month, weekday = parts
792
790
  current_time = Time.now
793
-
791
+
794
792
  # Handle every-N-minutes pattern: */10 * * * *
795
793
  if minute.start_with?('*/') && hour == '*' && day == '*' && month == '*' && weekday == '*'
796
794
  interval = minute[2..].to_i
797
795
  if interval > 0 && interval < 60
798
796
  current_minute = current_time.min
799
- current_second = current_time.sec
800
-
797
+ current_time.sec
798
+
801
799
  # Find the next occurrence
802
800
  next_minute_mark = ((current_minute / interval) + 1) * interval
803
-
801
+
804
802
  if next_minute_mark < 60
805
803
  # Same hour
806
- next_time = Time.new(current_time.year, current_time.month, current_time.day,
804
+ next_time = Time.new(current_time.year, current_time.month, current_time.day,
807
805
  current_time.hour, next_minute_mark, 0)
808
806
  else
809
807
  # Next hour
810
808
  next_hour = current_time.hour + 1
811
809
  next_minute = next_minute_mark - 60
812
-
810
+
813
811
  if next_hour < 24
814
812
  next_time = Time.new(current_time.year, current_time.month, current_time.day,
815
813
  next_hour, next_minute, 0)
@@ -820,11 +818,11 @@ module LanguageOperator
820
818
  0, next_minute, 0)
821
819
  end
822
820
  end
823
-
821
+
824
822
  return Formatters::ValueFormatter.time_until(next_time)
825
823
  end
826
824
  end
827
-
825
+
828
826
  # For other patterns, show the schedule (could add more patterns later)
829
827
  schedule
830
828
  rescue StandardError