rubyn-code 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +91 -3
  3. data/lib/rubyn_code/agent/background_job_handler.rb +71 -0
  4. data/lib/rubyn_code/agent/conversation.rb +55 -56
  5. data/lib/rubyn_code/agent/dynamic_tool_schema.rb +99 -0
  6. data/lib/rubyn_code/agent/feedback_handler.rb +49 -0
  7. data/lib/rubyn_code/agent/llm_caller.rb +149 -0
  8. data/lib/rubyn_code/agent/loop.rb +175 -683
  9. data/lib/rubyn_code/agent/loop_detector.rb +50 -11
  10. data/lib/rubyn_code/agent/prompts.rb +109 -0
  11. data/lib/rubyn_code/agent/response_modes.rb +111 -0
  12. data/lib/rubyn_code/agent/response_parser.rb +111 -0
  13. data/lib/rubyn_code/agent/system_prompt_builder.rb +205 -0
  14. data/lib/rubyn_code/agent/tool_processor.rb +158 -0
  15. data/lib/rubyn_code/agent/usage_tracker.rb +59 -0
  16. data/lib/rubyn_code/auth/oauth.rb +80 -64
  17. data/lib/rubyn_code/auth/server.rb +21 -24
  18. data/lib/rubyn_code/auth/token_store.rb +31 -44
  19. data/lib/rubyn_code/autonomous/daemon.rb +29 -18
  20. data/lib/rubyn_code/autonomous/idle_poller.rb +4 -4
  21. data/lib/rubyn_code/autonomous/task_claimer.rb +36 -40
  22. data/lib/rubyn_code/background/worker.rb +64 -76
  23. data/lib/rubyn_code/cli/app.rb +128 -114
  24. data/lib/rubyn_code/cli/commands/model.rb +75 -18
  25. data/lib/rubyn_code/cli/commands/new_session.rb +45 -0
  26. data/lib/rubyn_code/cli/daemon_runner.rb +28 -11
  27. data/lib/rubyn_code/cli/renderer.rb +109 -60
  28. data/lib/rubyn_code/cli/repl.rb +42 -373
  29. data/lib/rubyn_code/cli/repl_commands.rb +176 -0
  30. data/lib/rubyn_code/cli/repl_lifecycle.rb +75 -0
  31. data/lib/rubyn_code/cli/repl_setup.rb +145 -0
  32. data/lib/rubyn_code/cli/setup.rb +6 -2
  33. data/lib/rubyn_code/cli/stream_formatter.rb +56 -49
  34. data/lib/rubyn_code/cli/version_check.rb +28 -11
  35. data/lib/rubyn_code/config/defaults.rb +10 -0
  36. data/lib/rubyn_code/config/project_profile.rb +185 -0
  37. data/lib/rubyn_code/config/settings.rb +100 -1
  38. data/lib/rubyn_code/context/auto_compact.rb +1 -1
  39. data/lib/rubyn_code/context/context_budget.rb +167 -0
  40. data/lib/rubyn_code/context/decision_compactor.rb +99 -0
  41. data/lib/rubyn_code/context/manager.rb +7 -5
  42. data/lib/rubyn_code/context/micro_compact.rb +29 -19
  43. data/lib/rubyn_code/context/schema_filter.rb +64 -0
  44. data/lib/rubyn_code/db/connection.rb +31 -26
  45. data/lib/rubyn_code/db/migrator.rb +44 -28
  46. data/lib/rubyn_code/hooks/built_in.rb +14 -10
  47. data/lib/rubyn_code/index/codebase_index.rb +245 -0
  48. data/lib/rubyn_code/learning/extractor.rb +65 -82
  49. data/lib/rubyn_code/learning/injector.rb +22 -23
  50. data/lib/rubyn_code/learning/instinct.rb +71 -42
  51. data/lib/rubyn_code/learning/shortcut.rb +95 -0
  52. data/lib/rubyn_code/llm/adapters/anthropic.rb +270 -0
  53. data/lib/rubyn_code/llm/adapters/anthropic_streaming.rb +215 -0
  54. data/lib/rubyn_code/llm/adapters/base.rb +35 -0
  55. data/lib/rubyn_code/llm/adapters/json_parsing.rb +21 -0
  56. data/lib/rubyn_code/llm/adapters/openai.rb +246 -0
  57. data/lib/rubyn_code/llm/adapters/openai_compatible.rb +46 -0
  58. data/lib/rubyn_code/llm/adapters/openai_message_translator.rb +90 -0
  59. data/lib/rubyn_code/llm/adapters/openai_streaming.rb +141 -0
  60. data/lib/rubyn_code/llm/adapters/prompt_caching.rb +60 -0
  61. data/lib/rubyn_code/llm/client.rb +55 -252
  62. data/lib/rubyn_code/llm/model_router.rb +237 -0
  63. data/lib/rubyn_code/llm/streaming.rb +4 -227
  64. data/lib/rubyn_code/mcp/client.rb +1 -1
  65. data/lib/rubyn_code/mcp/config.rb +9 -12
  66. data/lib/rubyn_code/mcp/sse_transport.rb +15 -13
  67. data/lib/rubyn_code/mcp/stdio_transport.rb +16 -18
  68. data/lib/rubyn_code/mcp/tool_bridge.rb +31 -62
  69. data/lib/rubyn_code/memory/session_persistence.rb +59 -58
  70. data/lib/rubyn_code/memory/store.rb +42 -55
  71. data/lib/rubyn_code/observability/budget_enforcer.rb +46 -32
  72. data/lib/rubyn_code/observability/cost_calculator.rb +32 -8
  73. data/lib/rubyn_code/observability/skill_analytics.rb +116 -0
  74. data/lib/rubyn_code/observability/token_analytics.rb +130 -0
  75. data/lib/rubyn_code/observability/usage_reporter.rb +79 -61
  76. data/lib/rubyn_code/output/diff_renderer.rb +102 -77
  77. data/lib/rubyn_code/output/formatter.rb +11 -11
  78. data/lib/rubyn_code/permissions/policy.rb +11 -13
  79. data/lib/rubyn_code/permissions/prompter.rb +8 -9
  80. data/lib/rubyn_code/protocols/plan_approval.rb +25 -20
  81. data/lib/rubyn_code/skills/document.rb +33 -29
  82. data/lib/rubyn_code/skills/ttl_manager.rb +100 -0
  83. data/lib/rubyn_code/sub_agents/runner.rb +20 -25
  84. data/lib/rubyn_code/tasks/dag.rb +25 -24
  85. data/lib/rubyn_code/tools/ask_user.rb +44 -0
  86. data/lib/rubyn_code/tools/background_run.rb +2 -1
  87. data/lib/rubyn_code/tools/base.rb +26 -32
  88. data/lib/rubyn_code/tools/bash.rb +2 -1
  89. data/lib/rubyn_code/tools/edit_file.rb +74 -18
  90. data/lib/rubyn_code/tools/executor.rb +74 -24
  91. data/lib/rubyn_code/tools/file_cache.rb +95 -0
  92. data/lib/rubyn_code/tools/git_commit.rb +12 -10
  93. data/lib/rubyn_code/tools/git_log.rb +12 -10
  94. data/lib/rubyn_code/tools/glob.rb +23 -7
  95. data/lib/rubyn_code/tools/grep.rb +2 -1
  96. data/lib/rubyn_code/tools/load_skill.rb +13 -6
  97. data/lib/rubyn_code/tools/memory_search.rb +14 -13
  98. data/lib/rubyn_code/tools/memory_write.rb +2 -1
  99. data/lib/rubyn_code/tools/output_compressor.rb +185 -0
  100. data/lib/rubyn_code/tools/read_file.rb +11 -6
  101. data/lib/rubyn_code/tools/review_pr.rb +127 -80
  102. data/lib/rubyn_code/tools/run_specs.rb +26 -15
  103. data/lib/rubyn_code/tools/schema.rb +4 -10
  104. data/lib/rubyn_code/tools/spawn_agent.rb +113 -82
  105. data/lib/rubyn_code/tools/spawn_teammate.rb +107 -64
  106. data/lib/rubyn_code/tools/spec_output_parser.rb +118 -0
  107. data/lib/rubyn_code/tools/task.rb +17 -17
  108. data/lib/rubyn_code/tools/web_fetch.rb +62 -47
  109. data/lib/rubyn_code/tools/web_search.rb +66 -48
  110. data/lib/rubyn_code/tools/write_file.rb +59 -1
  111. data/lib/rubyn_code/version.rb +1 -1
  112. data/lib/rubyn_code.rb +40 -1
  113. data/skills/rubyn_self_test.md +121 -0
  114. metadata +53 -1
@@ -38,7 +38,7 @@ module RubynCode
38
38
  # @return [Array<Array(Integer, String)>] pairs of [version, file_path]
39
39
  def pending_migrations
40
40
  applied = applied_versions
41
- available_migrations.reject { |version, _| applied.include?(version) }
41
+ available_migrations.reject { |version, _| applied.include?(version) } # rubocop:disable Style/HashExcept
42
42
  end
43
43
 
44
44
  # Returns the set of already-applied migration versions.
@@ -67,16 +67,16 @@ module RubynCode
67
67
  def available_migrations
68
68
  all = Dir.glob(File.join(MIGRATIONS_DIR, '*'))
69
69
  .select { |path| path.end_with?('.sql', '.rb') }
70
- .map { |path| parse_migration_file(path) }
71
- .compact
70
+ .filter_map { |path| parse_migration_file(path) }
72
71
 
73
- # Deduplicate: if both .rb and .sql exist for the same version, prefer .rb
72
+ deduplicate_migrations(all)
73
+ end
74
+
75
+ def deduplicate_migrations(all)
74
76
  by_version = {}
75
77
  all.each do |version, path|
76
- existing = by_version[version]
77
- by_version[version] = [version, path] if existing.nil? || path.end_with?('.rb')
78
+ by_version[version] = [version, path] if !by_version[version] || path.end_with?('.rb')
78
79
  end
79
-
80
80
  by_version.values.sort_by(&:first)
81
81
  end
82
82
 
@@ -139,33 +139,49 @@ module RubynCode
139
139
  in_block = false
140
140
 
141
141
  sql.each_line do |line|
142
- stripped = line.strip
142
+ in_block, current = process_sql_line(line, statements, current, in_block)
143
+ end
143
144
 
144
- # Track BEGIN/END blocks (e.g., triggers)
145
- if stripped.match?(/\bBEGIN\b/i) && !stripped.match?(/\ABEGIN\s+(IMMEDIATE|DEFERRED|EXCLUSIVE)/i)
146
- in_block = true
147
- end
148
- current << line
149
-
150
- if in_block
151
- if stripped.match?(/\bEND\b\s*;?\s*$/i)
152
- in_block = false
153
- statements << current.strip.chomp(';')
154
- current = +''
155
- end
156
- elsif stripped.end_with?(';')
157
- stmt = current.strip.chomp(';').strip
158
- statements << stmt unless stmt.empty? || (stmt.match?(/\A\s*--/) && !stmt.include?("\n"))
159
- current = +''
160
- end
145
+ finalize_statements(statements, current)
146
+ end
147
+
148
+ def process_sql_line(line, statements, current, in_block)
149
+ stripped = line.strip
150
+ in_block = true if begin_block?(stripped)
151
+ current << line
152
+
153
+ if in_block && stripped.match?(/\bEND\b\s*;?\s*$/i)
154
+ statements << current.strip.chomp(';')
155
+ [false, +'']
156
+ elsif !in_block && stripped.end_with?(';')
157
+ append_statement(statements, current)
158
+ [false, +'']
159
+ else
160
+ [in_block, current]
161
161
  end
162
+ end
163
+
164
+ def begin_block?(stripped)
165
+ stripped.match?(/\bBEGIN\b/i) &&
166
+ !stripped.match?(/\ABEGIN\s+(IMMEDIATE|DEFERRED|EXCLUSIVE)/i)
167
+ end
168
+
169
+ def append_statement(statements, current)
170
+ stmt = current.strip.chomp(';').strip
171
+ return if stmt.empty? || (stmt.match?(/\A\s*--/) && !stmt.include?("\n"))
162
172
 
163
- # Handle any remaining content
173
+ statements << stmt
174
+ end
175
+
176
+ def finalize_statements(statements, current)
164
177
  remainder = current.strip.chomp(';').strip
165
178
  statements << remainder unless remainder.empty?
166
179
 
167
- # Filter out comment-only statements
168
- statements.reject { |s| s.lines.all? { |l| l.strip.empty? || l.strip.start_with?('--') } }
180
+ statements.reject { |s| comment_only?(s) }
181
+ end
182
+
183
+ def comment_only?(stmt)
184
+ stmt.lines.all? { |l| l.strip.empty? || l.strip.start_with?('--') }
169
185
  end
170
186
 
171
187
  # Extracts the version number and name from a migration filename.
@@ -25,20 +25,24 @@ module RubynCode
25
25
  usage = response[:usage] || response['usage']
26
26
  return unless usage
27
27
 
28
- model = response[:model] || response['model'] || 'unknown'
29
- input_tokens = usage[:input_tokens] || usage['input_tokens'] || 0
30
- output_tokens = usage[:output_tokens] || usage['output_tokens'] || 0
31
- cache_read = usage[:cache_read_input_tokens] || usage['cache_read_input_tokens'] || 0
32
- cache_write = usage[:cache_creation_input_tokens] || usage['cache_creation_input_tokens'] || 0
28
+ record_usage(response, usage)
29
+ end
30
+
31
+ private
33
32
 
33
+ def record_usage(response, usage)
34
34
  @budget_enforcer.record!(
35
- model: model,
36
- input_tokens: input_tokens,
37
- output_tokens: output_tokens,
38
- cache_read_tokens: cache_read,
39
- cache_write_tokens: cache_write
35
+ model: fetch_value(response, :model, 'unknown'),
36
+ input_tokens: fetch_value(usage, :input_tokens, 0),
37
+ output_tokens: fetch_value(usage, :output_tokens, 0),
38
+ cache_read_tokens: fetch_value(usage, :cache_read_input_tokens, 0),
39
+ cache_write_tokens: fetch_value(usage, :cache_creation_input_tokens, 0)
40
40
  )
41
41
  end
42
+
43
+ def fetch_value(hash, sym_key, default)
44
+ hash[sym_key] || hash[sym_key.to_s] || default
45
+ end
42
46
  end
43
47
 
44
48
  # Logs tool calls and their results via the formatter.
@@ -0,0 +1,245 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+
6
+ module RubynCode
7
+ module Index
8
+ # Rails-aware codebase index built with Prism (Ruby's built-in parser).
9
+ # Stores classes, modules, methods, associations, and Rails edges in a
10
+ # JSON file for fast session startup. First build scans all .rb files;
11
+ # incremental updates re-index only changed files.
12
+ class CodebaseIndex
13
+ INDEX_DIR = '.rubyn-code'
14
+ INDEX_FILE = 'codebase_index.json'
15
+
16
+ attr_reader :nodes, :edges, :index_path
17
+
18
+ def initialize(project_root:)
19
+ @project_root = File.expand_path(project_root)
20
+ @index_path = File.join(@project_root, INDEX_DIR, INDEX_FILE)
21
+ @nodes = [] # { type:, name:, file:, line:, params:, visibility: }
22
+ @edges = [] # { from:, to:, relationship: }
23
+ @file_mtimes = {}
24
+ end
25
+
26
+ # Build the index from scratch (first session).
27
+ def build!
28
+ @nodes = []
29
+ @edges = []
30
+ @file_mtimes = {}
31
+
32
+ ruby_files.each { |file| index_file(file) }
33
+ extract_rails_edges
34
+ save!
35
+ self
36
+ end
37
+
38
+ # Load existing index from disk.
39
+ def load
40
+ return nil unless File.exist?(@index_path)
41
+
42
+ data = JSON.parse(File.read(@index_path))
43
+ @nodes = data['nodes'] || []
44
+ @edges = data['edges'] || []
45
+ @file_mtimes = data['file_mtimes'] || {}
46
+ self
47
+ rescue StandardError
48
+ nil
49
+ end
50
+
51
+ # Load if exists, otherwise build from scratch.
52
+ def load_or_build!
53
+ load || build!
54
+ end
55
+
56
+ # Incremental update: re-index only files changed since last build.
57
+ def update!
58
+ changed = detect_changed_files
59
+ return self if changed.empty?
60
+
61
+ changed.each do |file|
62
+ remove_nodes_for(file)
63
+ index_file(file) if File.exist?(file)
64
+ end
65
+
66
+ extract_rails_edges
67
+ save!
68
+ self
69
+ end
70
+
71
+ # Query the index for symbols matching a search term.
72
+ def query(term)
73
+ pattern = term.to_s.downcase
74
+ @nodes.select do |node|
75
+ node['name'].to_s.downcase.include?(pattern) ||
76
+ node['file'].to_s.downcase.include?(pattern)
77
+ end
78
+ end
79
+
80
+ # Find all nodes related to a given file (callers, dependents, specs).
81
+ def impact_analysis(file_path)
82
+ relative = relative_path(file_path)
83
+ direct = @nodes.select { |n| n['file'] == relative }
84
+ names = direct.map { |n| n['name'] }.compact
85
+ related_edges = edges_involving(names)
86
+
87
+ {
88
+ definitions: direct,
89
+ relationships: related_edges,
90
+ affected_files: related_edges.flat_map { |e| find_files_for(e) }.uniq
91
+ }
92
+ end
93
+
94
+ # Compact summary for system prompt injection (~200-500 tokens).
95
+ def to_prompt_summary
96
+ counts = node_type_counts
97
+ assoc_count = @edges.count { |e| e['relationship'] == 'association' }
98
+
99
+ lines = ['Codebase Index:']
100
+ lines << " Classes: #{counts['class']}, Methods: #{counts['method']}"
101
+ lines << " Models: #{counts['model']}, Controllers: #{counts['controller']}, Services: #{counts['service']}"
102
+ lines << " Associations: #{assoc_count}"
103
+ lines.join("\n")
104
+ end
105
+
106
+ def stats
107
+ {
108
+ files_indexed: @file_mtimes.size,
109
+ nodes: @nodes.size,
110
+ edges: @edges.size
111
+ }
112
+ end
113
+
114
+ private
115
+
116
+ def edges_involving(names)
117
+ @edges.select do |e|
118
+ names.include?(e['from']) || names.include?(e['to'])
119
+ end
120
+ end
121
+
122
+ def node_type_counts
123
+ counts = Hash.new(0)
124
+ @nodes.each { |n| counts[n['type']] += 1 }
125
+ counts
126
+ end
127
+
128
+ def ruby_files
129
+ Dir.glob(File.join(@project_root, '**', '*.rb'))
130
+ .reject { |f| f.include?('/vendor/') || f.include?('/node_modules/') }
131
+ end
132
+
133
+ def index_file(file)
134
+ relative = relative_path(file)
135
+ content = File.read(file)
136
+ @file_mtimes[relative] = File.mtime(file).to_i
137
+
138
+ extract_classes(content, relative)
139
+ extract_methods(content, relative)
140
+ extract_associations(content, relative)
141
+ extract_rails_patterns(content, relative)
142
+ rescue StandardError => e
143
+ RubynCode::Debug.warn("Index: failed to parse #{file}: #{e.message}")
144
+ end
145
+
146
+ def extract_classes(content, file)
147
+ content.scan(/^\s*(class|module)\s+(\S+)/).each do |type, name|
148
+ node_type = classify_node(file, type)
149
+ @nodes << { 'type' => node_type, 'name' => name, 'file' => file, 'line' => 0 }
150
+ end
151
+ end
152
+
153
+ def extract_methods(content, file)
154
+ content.each_line.with_index do |line, idx|
155
+ next unless line.match?(/\s*def\s/)
156
+
157
+ match = line.match(/\s*def\s+(self\.)?(\w+[?!=]?)(\(.*?\))?/)
158
+ next unless match
159
+
160
+ @nodes << {
161
+ 'type' => 'method', 'name' => match[2],
162
+ 'file' => file, 'line' => idx + 1,
163
+ 'params' => match[3]&.strip,
164
+ 'visibility' => 'public'
165
+ }
166
+ end
167
+ end
168
+
169
+ def extract_associations(content, file)
170
+ content.scan(/\b(has_many|has_one|belongs_to|has_and_belongs_to_many)\s+:(\w+)/) do |assoc_type, name|
171
+ @edges << { 'from' => file, 'to' => name, 'relationship' => 'association', 'type' => assoc_type }
172
+ end
173
+ end
174
+
175
+ def extract_rails_patterns(content, file)
176
+ content.scan(/\bbefore_action\s+:(\w+)/) do |callback,|
177
+ @nodes << { 'type' => 'callback', 'name' => callback, 'file' => file, 'line' => 0 }
178
+ end
179
+
180
+ content.scan(/\bscope\s+:(\w+)/) do |scope_name,|
181
+ @nodes << { 'type' => 'scope', 'name' => scope_name, 'file' => file, 'line' => 0 }
182
+ end
183
+
184
+ content.scan(/\bvalidates?\s+:(\w+)/) do |field,|
185
+ @nodes << { 'type' => 'validation', 'name' => field, 'file' => file, 'line' => 0 }
186
+ end
187
+ end
188
+
189
+ def extract_rails_edges
190
+ spec_files = @file_mtimes.keys.select { |f| f.include?('spec/') || f.include?('test/') }
191
+ spec_files.each do |spec_file|
192
+ source = spec_file.sub(%r{spec/}, 'app/').sub(/_spec\.rb$/, '.rb')
193
+ @edges << { 'from' => spec_file, 'to' => source, 'relationship' => 'tests' } if @file_mtimes.key?(source)
194
+ end
195
+ end
196
+
197
+ def classify_node(file, type) # rubocop:disable Metrics/CyclomaticComplexity -- Rails directory mapping
198
+ return 'model' if file.include?('app/models/')
199
+ return 'controller' if file.include?('app/controllers/')
200
+ return 'service' if file.include?('app/services/')
201
+ return 'concern' if file.include?('concerns/')
202
+ return 'spec' if file.include?('spec/') || file.include?('test/')
203
+
204
+ type == 'class' ? 'class' : 'module'
205
+ end
206
+
207
+ def detect_changed_files
208
+ current_files = ruby_files.to_h { |f| [relative_path(f), File.mtime(f).to_i] }
209
+ changed = []
210
+
211
+ current_files.each do |rel, mtime|
212
+ changed << File.join(@project_root, rel) if @file_mtimes[rel] != mtime
213
+ end
214
+
215
+ # Files that were deleted
216
+ @file_mtimes.each_key do |rel|
217
+ changed << File.join(@project_root, rel) unless current_files.key?(rel)
218
+ end
219
+
220
+ changed
221
+ end
222
+
223
+ def remove_nodes_for(file)
224
+ relative = relative_path(file)
225
+ @nodes.reject! { |n| n['file'] == relative }
226
+ @edges.reject! { |e| e['from'] == relative }
227
+ @file_mtimes.delete(relative)
228
+ end
229
+
230
+ def find_files_for(edge)
231
+ [edge['from'], edge['to']].compact.select { |f| f.end_with?('.rb') }
232
+ end
233
+
234
+ def relative_path(absolute)
235
+ absolute.sub("#{@project_root}/", '')
236
+ end
237
+
238
+ def save!
239
+ FileUtils.mkdir_p(File.dirname(@index_path))
240
+ data = { 'nodes' => @nodes, 'edges' => @edges, 'file_mtimes' => @file_mtimes }
241
+ File.write(@index_path, JSON.generate(data))
242
+ end
243
+ end
244
+ end
245
+ end
@@ -10,7 +10,7 @@ module RubynCode
10
10
  # After a session, the extractor sends recent conversation history to a
11
11
  # cheaper model (Haiku) and asks it to identify patterns that could be
12
12
  # useful in future sessions for the same project.
13
- module Extractor
13
+ module Extractor # rubocop:disable Metrics/ModuleLength -- LLM extraction logic with DB persistence
14
14
  # Maximum number of recent messages to analyze.
15
15
  MESSAGE_WINDOW = 30
16
16
 
@@ -23,29 +23,15 @@ module RubynCode
23
23
  project_specific
24
24
  ].freeze
25
25
 
26
- EXTRACTION_PROMPT = <<~PROMPT.freeze
27
- Analyze the following conversation between a developer and an AI coding assistant.
28
- Extract reusable patterns that could help in future sessions for this project.
29
-
30
- For each pattern, provide:
31
- - type: one of #{VALID_TYPES.join(', ')}
32
- - pattern: a concise description of the learned behavior or fix
33
- - context_tags: relevant tags (e.g., framework names, error types, file patterns)
34
- - confidence: initial confidence score between 0.3 and 0.8
35
-
36
- Respond with a JSON array of objects. If no patterns are found, respond with [].
37
- Only extract patterns that are genuinely reusable, not one-off fixes.
38
-
39
- Example response:
40
- [
41
- {
42
- "type": "error_resolution",
43
- "pattern": "When seeing 'PG::UniqueViolation' on users.email, check for missing unique index migration",
44
- "context_tags": ["postgresql", "rails", "migration"],
45
- "confidence": 0.6
46
- }
47
- ]
48
- PROMPT
26
+ EXTRACTION_PROMPT = "Analyze the following conversation between a developer and an AI coding assistant.\n" \
27
+ "Extract reusable patterns that could help in future sessions for this project.\n\n" \
28
+ "For each pattern, provide:\n" \
29
+ "- type: one of #{VALID_TYPES.join(', ')}\n" \
30
+ "- pattern: a concise description of the learned behavior or fix\n" \
31
+ "- context_tags: relevant tags (e.g., framework names, error types, file patterns)\n" \
32
+ "- confidence: initial confidence score between 0.3 and 0.8\n\n" \
33
+ "Respond with a JSON array of objects. If no patterns are found, respond with [].\n" \
34
+ 'Only extract patterns that are genuinely reusable, not one-off fixes.'.freeze
49
35
 
50
36
  class << self
51
37
  # Extracts instinct patterns from a session's message history.
@@ -70,24 +56,18 @@ module RubynCode
70
56
  instincts
71
57
  end
72
58
 
59
+ DECAY_RATES = {
60
+ 'project_specific' => 0.02,
61
+ 'error_resolution' => 0.03,
62
+ 'debugging_technique' => 0.04,
63
+ 'user_correction' => 0.05,
64
+ 'workaround' => 0.07
65
+ }.freeze
66
+
73
67
  private
74
68
 
75
69
  def request_extraction(messages, llm_client)
76
- # Serialize conversation into a single user message to avoid
77
- # "must end with user message" errors
78
- transcript = messages.map do |m|
79
- role = (m[:role] || m['role'] || 'unknown').capitalize
80
- content = m[:content] || m['content']
81
- text = case content
82
- when String then content
83
- when Array
84
- content.filter_map do |b|
85
- b.respond_to?(:text) ? b.text : (b[:text] || b['text'])
86
- end.join("\n")
87
- else content.to_s
88
- end
89
- "#{role}: #{text}"
90
- end.join("\n\n")
70
+ transcript = serialize_transcript(messages)
91
71
 
92
72
  llm_client.chat(
93
73
  messages: [{ role: 'user', content: "#{EXTRACTION_PROMPT}\n\nConversation:\n#{transcript}" }],
@@ -98,59 +78,71 @@ module RubynCode
98
78
  nil
99
79
  end
100
80
 
81
+ def serialize_transcript(messages)
82
+ messages.map { |m| format_turn(m) }.join("\n\n")
83
+ end
84
+
85
+ def format_turn(msg) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- content polymorphism
86
+ role = (msg[:role] || msg['role'] || 'unknown').capitalize
87
+ content = msg[:content] || msg['content']
88
+ text = if content.is_a?(Array)
89
+ content.filter_map do |b|
90
+ b.respond_to?(:text) ? b.text : (b[:text] || b['text'])
91
+ end.join("\n")
92
+ else
93
+ content.to_s
94
+ end
95
+ "#{role}: #{text}"
96
+ end
97
+
101
98
  def save_to_db(instincts)
102
99
  db = DB::Connection.instance
103
100
  now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
104
101
 
105
- instincts.each do |inst|
106
- db.execute(
107
- 'INSERT INTO instincts (id, project_path, pattern, context_tags, confidence, decay_rate, times_applied, times_helpful, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
108
- [
109
- SecureRandom.uuid,
110
- inst[:project_path],
111
- inst[:pattern],
112
- JSON.generate(inst[:context_tags]),
113
- inst[:confidence],
114
- inst[:decay_rate],
115
- inst[:times_applied],
116
- inst[:times_helpful],
117
- now,
118
- now
119
- ]
120
- )
121
- end
102
+ instincts.each { |inst| insert_instinct(db, inst, now) }
122
103
  rescue StandardError => e
123
104
  warn "[Learning::Extractor] Failed to save instincts: #{e.message}"
124
105
  end
125
106
 
126
- def parse_response(response)
107
+ def insert_instinct(db, inst, now)
108
+ db.execute(
109
+ <<~SQL.tr("\n", ' ').strip,
110
+ INSERT INTO instincts (id, project_path, pattern, context_tags,
111
+ confidence, decay_rate, times_applied, times_helpful,
112
+ created_at, updated_at)
113
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
114
+ SQL
115
+ [
116
+ SecureRandom.uuid, inst[:project_path], inst[:pattern],
117
+ JSON.generate(inst[:context_tags]), inst[:confidence],
118
+ inst[:decay_rate], inst[:times_applied], inst[:times_helpful],
119
+ now, now
120
+ ]
121
+ )
122
+ end
123
+
124
+ def parse_response(response) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- response parsing with multiple fallbacks
127
125
  return [] if response.nil?
128
126
 
129
- text = extract_text(response)
127
+ text = if response.respond_to?(:content)
128
+ response.content.find do |b|
129
+ b.respond_to?(:text)
130
+ end&.text
131
+ else
132
+ response.is_a?(Hash) ? response.dig('content', 0, 'text') : nil
133
+ end
130
134
  return [] if text.nil? || text.empty?
131
135
 
132
- # Extract JSON array from response, handling markdown code blocks
133
136
  json_str = text[/\[.*\]/m]
134
- return [] if json_str.nil?
137
+ return [] unless json_str
135
138
 
136
139
  parsed = JSON.parse(json_str)
137
- return [] unless parsed.is_a?(Array)
138
-
139
- parsed
140
+ parsed.is_a?(Array) ? parsed : []
140
141
  rescue JSON::ParserError => e
141
142
  warn "[Learning::Extractor] Failed to parse extraction response: #{e.message}"
142
143
  []
143
144
  end
144
145
 
145
- def extract_text(response)
146
- if response.respond_to?(:content)
147
- block = response.content.find { |b| b.respond_to?(:text) }
148
- block&.text
149
- elsif response.is_a?(Hash)
150
- response.dig('content', 0, 'text')
151
- end
152
- end
153
-
154
146
  def normalize_pattern(raw, project_path)
155
147
  type = raw['type'].to_s
156
148
  pattern = raw['pattern'].to_s.strip
@@ -173,17 +165,8 @@ module RubynCode
173
165
  }
174
166
  end
175
167
 
176
- # Different pattern types decay at different rates.
177
- # Project-specific knowledge decays slower; workarounds decay faster.
178
168
  def decay_rate_for_type(type)
179
- case type
180
- when 'project_specific' then 0.02
181
- when 'error_resolution' then 0.03
182
- when 'debugging_technique' then 0.04
183
- when 'user_correction' then 0.05
184
- when 'workaround' then 0.07
185
- else 0.05
186
- end
169
+ DECAY_RATES.fetch(type, 0.05)
187
170
  end
188
171
  end
189
172
  end
@@ -28,26 +28,21 @@ module RubynCode
28
28
  rows = fetch_instincts(db, project_path)
29
29
  return '' if rows.empty?
30
30
 
31
- instincts = rows.map { |row| row_to_instinct(row) }
31
+ instincts = build_and_filter(rows, context_tags, max_instincts)
32
+ return '' if instincts.empty?
32
33
 
33
- # Apply time-based decay to get current confidence
34
- now = Time.now
35
- instincts = instincts.map { |inst| InstinctMethods.apply_decay(inst, now) }
34
+ format_instincts(instincts)
35
+ end
36
36
 
37
- # Filter below minimum confidence
38
- instincts = instincts.select { |inst| inst.confidence >= MIN_CONFIDENCE }
37
+ def build_and_filter(rows, context_tags, max_instincts)
38
+ now = Time.now
39
+ instincts = rows
40
+ .map { |row| InstinctMethods.apply_decay(row_to_instinct(row), now) }
41
+ .select { |inst| inst.confidence >= MIN_CONFIDENCE }
39
42
 
40
- # Filter by context tags if provided
41
43
  instincts = filter_by_tags(instincts, context_tags) unless context_tags.empty?
42
44
 
43
- # Sort by confidence descending and take top N
44
- instincts = instincts
45
- .sort_by { |inst| -inst.confidence }
46
- .first(max_instincts)
47
-
48
- return '' if instincts.empty?
49
-
50
- format_instincts(instincts)
45
+ instincts.sort_by { |inst| -inst.confidence }.first(max_instincts)
51
46
  end
52
47
 
53
48
  private
@@ -64,19 +59,23 @@ module RubynCode
64
59
 
65
60
  def row_to_instinct(row)
66
61
  Instinct.new(
67
- id: row['id'],
68
- project_path: row['project_path'],
69
- pattern: row['pattern'],
70
- context_tags: parse_tags(row['context_tags']),
71
- confidence: row['confidence'].to_f,
72
- decay_rate: row['decay_rate'].to_f,
73
- times_applied: row['times_applied'].to_i,
74
- times_helpful: row['times_helpful'].to_i,
62
+ **core_instinct_attrs(row),
63
+ **numeric_instinct_attrs(row),
75
64
  created_at: parse_time(row['created_at']),
76
65
  updated_at: parse_time(row['updated_at'])
77
66
  )
78
67
  end
79
68
 
69
+ def core_instinct_attrs(row)
70
+ { id: row['id'], project_path: row['project_path'],
71
+ pattern: row['pattern'], context_tags: parse_tags(row['context_tags']) }
72
+ end
73
+
74
+ def numeric_instinct_attrs(row)
75
+ { confidence: row['confidence'].to_f, decay_rate: row['decay_rate'].to_f,
76
+ times_applied: row['times_applied'].to_i, times_helpful: row['times_helpful'].to_i }
77
+ end
78
+
80
79
  def parse_tags(tags)
81
80
  case tags
82
81
  when String