rubyn-code 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +151 -5
  3. data/db/migrations/013_add_failed_status_to_tasks.rb +51 -0
  4. data/lib/rubyn_code/agent/background_job_handler.rb +71 -0
  5. data/lib/rubyn_code/agent/conversation.rb +84 -56
  6. data/lib/rubyn_code/agent/dynamic_tool_schema.rb +152 -0
  7. data/lib/rubyn_code/agent/feedback_handler.rb +49 -0
  8. data/lib/rubyn_code/agent/llm_caller.rb +157 -0
  9. data/lib/rubyn_code/agent/loop.rb +182 -683
  10. data/lib/rubyn_code/agent/loop_detector.rb +50 -11
  11. data/lib/rubyn_code/agent/prompts.rb +109 -0
  12. data/lib/rubyn_code/agent/response_modes.rb +111 -0
  13. data/lib/rubyn_code/agent/response_parser.rb +111 -0
  14. data/lib/rubyn_code/agent/system_prompt_builder.rb +211 -0
  15. data/lib/rubyn_code/agent/tool_processor.rb +178 -0
  16. data/lib/rubyn_code/agent/usage_tracker.rb +59 -0
  17. data/lib/rubyn_code/auth/key_encryption.rb +118 -0
  18. data/lib/rubyn_code/auth/oauth.rb +80 -64
  19. data/lib/rubyn_code/auth/server.rb +21 -24
  20. data/lib/rubyn_code/auth/token_store.rb +80 -52
  21. data/lib/rubyn_code/autonomous/daemon.rb +146 -32
  22. data/lib/rubyn_code/autonomous/idle_poller.rb +4 -24
  23. data/lib/rubyn_code/autonomous/task_claimer.rb +46 -44
  24. data/lib/rubyn_code/background/worker.rb +64 -76
  25. data/lib/rubyn_code/cli/app.rb +159 -114
  26. data/lib/rubyn_code/cli/commands/doctor.rb +73 -0
  27. data/lib/rubyn_code/cli/commands/mcp.rb +77 -0
  28. data/lib/rubyn_code/cli/commands/model.rb +105 -18
  29. data/lib/rubyn_code/cli/commands/new_session.rb +45 -0
  30. data/lib/rubyn_code/cli/commands/provider.rb +123 -0
  31. data/lib/rubyn_code/cli/commands/skill.rb +52 -3
  32. data/lib/rubyn_code/cli/daemon_runner.rb +64 -11
  33. data/lib/rubyn_code/cli/first_run.rb +159 -0
  34. data/lib/rubyn_code/cli/renderer.rb +109 -60
  35. data/lib/rubyn_code/cli/repl.rb +48 -374
  36. data/lib/rubyn_code/cli/repl_commands.rb +177 -0
  37. data/lib/rubyn_code/cli/repl_lifecycle.rb +76 -0
  38. data/lib/rubyn_code/cli/repl_setup.rb +181 -0
  39. data/lib/rubyn_code/cli/setup.rb +6 -2
  40. data/lib/rubyn_code/cli/stream_formatter.rb +56 -49
  41. data/lib/rubyn_code/cli/version_check.rb +28 -11
  42. data/lib/rubyn_code/config/defaults.rb +11 -0
  43. data/lib/rubyn_code/config/project_profile.rb +185 -0
  44. data/lib/rubyn_code/config/schema.json +49 -0
  45. data/lib/rubyn_code/config/settings.rb +103 -1
  46. data/lib/rubyn_code/config/validator.rb +63 -0
  47. data/lib/rubyn_code/context/auto_compact.rb +1 -1
  48. data/lib/rubyn_code/context/context_budget.rb +182 -0
  49. data/lib/rubyn_code/context/context_collapse.rb +34 -4
  50. data/lib/rubyn_code/context/decision_compactor.rb +99 -0
  51. data/lib/rubyn_code/context/manager.rb +44 -8
  52. data/lib/rubyn_code/context/manual_compact.rb +1 -1
  53. data/lib/rubyn_code/context/micro_compact.rb +29 -19
  54. data/lib/rubyn_code/context/schema_filter.rb +64 -0
  55. data/lib/rubyn_code/db/connection.rb +31 -26
  56. data/lib/rubyn_code/db/migrator.rb +44 -28
  57. data/lib/rubyn_code/hooks/built_in.rb +14 -10
  58. data/lib/rubyn_code/hooks/registry.rb +4 -0
  59. data/lib/rubyn_code/ide/adapters/tool_output.rb +330 -0
  60. data/lib/rubyn_code/ide/client.rb +110 -0
  61. data/lib/rubyn_code/ide/handlers/accept_edit_handler.rb +35 -0
  62. data/lib/rubyn_code/ide/handlers/approve_tool_use_handler.rb +34 -0
  63. data/lib/rubyn_code/ide/handlers/cancel_handler.rb +41 -0
  64. data/lib/rubyn_code/ide/handlers/config_get_handler.rb +63 -0
  65. data/lib/rubyn_code/ide/handlers/config_set_handler.rb +86 -0
  66. data/lib/rubyn_code/ide/handlers/initialize_handler.rb +79 -0
  67. data/lib/rubyn_code/ide/handlers/models_list_handler.rb +39 -0
  68. data/lib/rubyn_code/ide/handlers/prompt_handler.rb +215 -0
  69. data/lib/rubyn_code/ide/handlers/review_handler.rb +110 -0
  70. data/lib/rubyn_code/ide/handlers/session_fork_handler.rb +49 -0
  71. data/lib/rubyn_code/ide/handlers/session_list_handler.rb +41 -0
  72. data/lib/rubyn_code/ide/handlers/session_reset_handler.rb +31 -0
  73. data/lib/rubyn_code/ide/handlers/session_resume_handler.rb +42 -0
  74. data/lib/rubyn_code/ide/handlers/shutdown_handler.rb +37 -0
  75. data/lib/rubyn_code/ide/handlers.rb +76 -0
  76. data/lib/rubyn_code/ide/protocol.rb +111 -0
  77. data/lib/rubyn_code/ide/server.rb +186 -0
  78. data/lib/rubyn_code/index/codebase_index.rb +311 -0
  79. data/lib/rubyn_code/learning/extractor.rb +65 -82
  80. data/lib/rubyn_code/learning/injector.rb +22 -23
  81. data/lib/rubyn_code/learning/instinct.rb +71 -42
  82. data/lib/rubyn_code/learning/shortcut.rb +95 -0
  83. data/lib/rubyn_code/llm/adapters/anthropic.rb +274 -0
  84. data/lib/rubyn_code/llm/adapters/anthropic_compatible.rb +60 -0
  85. data/lib/rubyn_code/llm/adapters/anthropic_streaming.rb +215 -0
  86. data/lib/rubyn_code/llm/adapters/base.rb +35 -0
  87. data/lib/rubyn_code/llm/adapters/json_parsing.rb +21 -0
  88. data/lib/rubyn_code/llm/adapters/openai.rb +246 -0
  89. data/lib/rubyn_code/llm/adapters/openai_compatible.rb +50 -0
  90. data/lib/rubyn_code/llm/adapters/openai_message_translator.rb +90 -0
  91. data/lib/rubyn_code/llm/adapters/openai_streaming.rb +141 -0
  92. data/lib/rubyn_code/llm/adapters/prompt_caching.rb +60 -0
  93. data/lib/rubyn_code/llm/client.rb +75 -247
  94. data/lib/rubyn_code/llm/model_router.rb +237 -0
  95. data/lib/rubyn_code/llm/streaming.rb +4 -227
  96. data/lib/rubyn_code/mcp/client.rb +1 -1
  97. data/lib/rubyn_code/mcp/config.rb +10 -12
  98. data/lib/rubyn_code/mcp/sse_transport.rb +15 -13
  99. data/lib/rubyn_code/mcp/stdio_transport.rb +16 -18
  100. data/lib/rubyn_code/mcp/tool_bridge.rb +31 -62
  101. data/lib/rubyn_code/memory/search.rb +1 -0
  102. data/lib/rubyn_code/memory/session_persistence.rb +59 -58
  103. data/lib/rubyn_code/memory/store.rb +42 -55
  104. data/lib/rubyn_code/observability/budget_enforcer.rb +46 -32
  105. data/lib/rubyn_code/observability/cost_calculator.rb +32 -8
  106. data/lib/rubyn_code/observability/skill_analytics.rb +116 -0
  107. data/lib/rubyn_code/observability/token_analytics.rb +130 -0
  108. data/lib/rubyn_code/observability/usage_reporter.rb +79 -61
  109. data/lib/rubyn_code/output/diff_renderer.rb +102 -77
  110. data/lib/rubyn_code/output/formatter.rb +11 -11
  111. data/lib/rubyn_code/permissions/policy.rb +11 -13
  112. data/lib/rubyn_code/permissions/prompter.rb +8 -9
  113. data/lib/rubyn_code/protocols/plan_approval.rb +25 -20
  114. data/lib/rubyn_code/self_test.rb +315 -0
  115. data/lib/rubyn_code/skills/catalog.rb +66 -0
  116. data/lib/rubyn_code/skills/document.rb +33 -29
  117. data/lib/rubyn_code/skills/loader.rb +43 -0
  118. data/lib/rubyn_code/skills/ttl_manager.rb +100 -0
  119. data/lib/rubyn_code/sub_agents/runner.rb +20 -25
  120. data/lib/rubyn_code/tasks/dag.rb +25 -24
  121. data/lib/rubyn_code/tasks/models.rb +1 -0
  122. data/lib/rubyn_code/tools/ask_user.rb +44 -0
  123. data/lib/rubyn_code/tools/background_run.rb +2 -1
  124. data/lib/rubyn_code/tools/base.rb +39 -32
  125. data/lib/rubyn_code/tools/bash.rb +7 -1
  126. data/lib/rubyn_code/tools/edit_file.rb +130 -17
  127. data/lib/rubyn_code/tools/executor.rb +130 -25
  128. data/lib/rubyn_code/tools/file_cache.rb +95 -0
  129. data/lib/rubyn_code/tools/git_commit.rb +12 -10
  130. data/lib/rubyn_code/tools/git_log.rb +12 -10
  131. data/lib/rubyn_code/tools/glob.rb +29 -7
  132. data/lib/rubyn_code/tools/grep.rb +8 -1
  133. data/lib/rubyn_code/tools/ide_diagnostics.rb +51 -0
  134. data/lib/rubyn_code/tools/ide_symbols.rb +53 -0
  135. data/lib/rubyn_code/tools/load_skill.rb +13 -6
  136. data/lib/rubyn_code/tools/memory_search.rb +14 -13
  137. data/lib/rubyn_code/tools/memory_write.rb +2 -1
  138. data/lib/rubyn_code/tools/output_compressor.rb +190 -0
  139. data/lib/rubyn_code/tools/read_file.rb +17 -6
  140. data/lib/rubyn_code/tools/registry.rb +11 -0
  141. data/lib/rubyn_code/tools/review_pr.rb +127 -80
  142. data/lib/rubyn_code/tools/run_specs.rb +26 -15
  143. data/lib/rubyn_code/tools/schema.rb +4 -10
  144. data/lib/rubyn_code/tools/spawn_agent.rb +113 -82
  145. data/lib/rubyn_code/tools/spawn_teammate.rb +107 -64
  146. data/lib/rubyn_code/tools/spec_output_parser.rb +118 -0
  147. data/lib/rubyn_code/tools/task.rb +17 -17
  148. data/lib/rubyn_code/tools/web_fetch.rb +62 -47
  149. data/lib/rubyn_code/tools/web_search.rb +66 -48
  150. data/lib/rubyn_code/tools/write_file.rb +76 -1
  151. data/lib/rubyn_code/version.rb +1 -1
  152. data/lib/rubyn_code.rb +62 -1
  153. data/skills/rubyn_self_test.md +133 -0
  154. metadata +83 -1
@@ -0,0 +1,311 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+
6
+ module RubynCode
7
+ module Index
8
+ # Rails-aware codebase index built with Prism (Ruby's built-in parser).
9
+ # Stores classes, modules, methods, associations, and Rails edges in a
10
+ # JSON file for fast session startup. First build scans all .rb files;
11
+ # incremental updates re-index only changed files.
12
+ class CodebaseIndex # rubocop:disable Metrics/ClassLength -- structural summary methods
13
+ INDEX_DIR = '.rubyn-code'
14
+ INDEX_FILE = 'codebase_index.json'
15
+ CHARS_PER_TOKEN = 4
16
+
17
+ attr_reader :nodes, :edges, :index_path
18
+
19
+ def initialize(project_root:)
20
+ @project_root = File.expand_path(project_root)
21
+ @index_path = File.join(@project_root, INDEX_DIR, INDEX_FILE)
22
+ @nodes = [] # { type:, name:, file:, line:, params:, visibility: }
23
+ @edges = [] # { from:, to:, relationship: }
24
+ @file_mtimes = {}
25
+ end
26
+
27
+ # Build the index from scratch (first session).
28
+ def build!
29
+ @nodes = []
30
+ @edges = []
31
+ @file_mtimes = {}
32
+
33
+ ruby_files.each { |file| index_file(file) }
34
+ extract_rails_edges
35
+ save!
36
+ self
37
+ end
38
+
39
+ # Load existing index from disk.
40
+ def load
41
+ return nil unless File.exist?(@index_path)
42
+
43
+ data = JSON.parse(File.read(@index_path))
44
+ @nodes = data['nodes'] || []
45
+ @edges = data['edges'] || []
46
+ @file_mtimes = data['file_mtimes'] || {}
47
+ self
48
+ rescue StandardError
49
+ nil
50
+ end
51
+
52
+ # Load if exists, otherwise build from scratch.
53
+ def load_or_build!
54
+ load || build!
55
+ end
56
+
57
+ # Incremental update: re-index only files changed since last build.
58
+ def update!
59
+ changed = detect_changed_files
60
+ return self if changed.empty?
61
+
62
+ changed.each do |file|
63
+ remove_nodes_for(file)
64
+ index_file(file) if File.exist?(file)
65
+ end
66
+
67
+ extract_rails_edges
68
+ save!
69
+ self
70
+ end
71
+
72
+ # Query the index for symbols matching a search term.
73
+ def query(term)
74
+ pattern = term.to_s.downcase
75
+ @nodes.select do |node|
76
+ node['name'].to_s.downcase.include?(pattern) ||
77
+ node['file'].to_s.downcase.include?(pattern)
78
+ end
79
+ end
80
+
81
+ # Find all nodes related to a given file (callers, dependents, specs).
82
+ def impact_analysis(file_path)
83
+ relative = relative_path(file_path)
84
+ direct = @nodes.select { |n| n['file'] == relative }
85
+ names = direct.map { |n| n['name'] }.compact
86
+ related_edges = edges_involving(names)
87
+
88
+ {
89
+ definitions: direct,
90
+ relationships: related_edges,
91
+ affected_files: related_edges.flat_map { |e| find_files_for(e) }.uniq
92
+ }
93
+ end
94
+
95
+ # Compact summary for system prompt injection (~200-500 tokens).
96
+ def to_prompt_summary
97
+ counts = node_type_counts
98
+ assoc_count = @edges.count { |e| e['relationship'] == 'association' }
99
+
100
+ lines = ['Codebase Index:']
101
+ lines << " Classes: #{counts['class']}, Methods: #{counts['method']}"
102
+ lines << " Models: #{counts['model']}, Controllers: #{counts['controller']}, Services: #{counts['service']}"
103
+ lines << " Associations: #{assoc_count}"
104
+ lines.join("\n")
105
+ end
106
+
107
+ # Structural map for system prompt: model names with associations,
108
+ # controllers, and service objects. Capped to stay within token budget.
109
+ def to_structural_summary(max_tokens: 500)
110
+ budget = max_tokens * CHARS_PER_TOKEN
111
+ lines = ['Codebase Structure:']
112
+
113
+ append_model_section(lines)
114
+ append_controller_section(lines)
115
+ append_service_section(lines)
116
+ append_stats_section(lines)
117
+
118
+ truncate_to_budget(lines, budget)
119
+ end
120
+
121
+ def stats
122
+ {
123
+ files_indexed: @file_mtimes.size,
124
+ nodes: @nodes.size,
125
+ edges: @edges.size
126
+ }
127
+ end
128
+
129
+ private
130
+
131
+ def append_model_section(lines)
132
+ models = @nodes.select { |n| n['type'] == 'model' && (n['name'] || '').match?(/\A[A-Z]/) }
133
+ return if models.empty?
134
+
135
+ lines << 'Models:'
136
+ models.each do |model|
137
+ assocs = associations_for_file(model['file'])
138
+ desc = assocs.empty? ? model['name'] : "#{model['name']} #{assocs.join(', ')}"
139
+ lines << " #{desc}"
140
+ end
141
+ end
142
+
143
+ def append_controller_section(lines)
144
+ controllers = @nodes.select { |n| n['type'] == 'controller' && (n['name'] || '').match?(/\A[A-Z]/) }
145
+ return if controllers.empty?
146
+
147
+ lines << 'Controllers:'
148
+ controllers.each { |c| lines << " #{c['name']} (#{c['file']})" }
149
+ end
150
+
151
+ def append_service_section(lines)
152
+ services = @nodes.select { |n| n['type'] == 'service' && (n['name'] || '').match?(/\A[A-Z]/) }
153
+ return if services.empty?
154
+
155
+ lines << 'Services:'
156
+ services.each { |s| lines << " #{s['name']} (#{s['file']})" }
157
+ end
158
+
159
+ def append_stats_section(lines)
160
+ counts = node_type_counts
161
+ lines << "Stats: #{counts['class'] || 0} classes, #{counts['method'] || 0} methods, #{@edges.size} edges"
162
+ end
163
+
164
+ def associations_for_file(file)
165
+ @edges.select { |e| e['from'] == file && e['relationship'] == 'association' }
166
+ .map { |e| "#{e['type']} :#{e['to']}" }
167
+ end
168
+
169
+ def truncate_to_budget(lines, budget)
170
+ result = []
171
+ total = 0
172
+ lines.each do |line|
173
+ line_size = line.bytesize + 1 # +1 for newline
174
+ break if total + line_size > budget
175
+
176
+ result << line
177
+ total += line_size
178
+ end
179
+ result.join("\n")
180
+ end
181
+
182
+ def edges_involving(names)
183
+ @edges.select do |e|
184
+ names.include?(e['from']) || names.include?(e['to'])
185
+ end
186
+ end
187
+
188
+ def node_type_counts
189
+ counts = Hash.new(0)
190
+ @nodes.each { |n| counts[n['type']] += 1 }
191
+ counts
192
+ end
193
+
194
+ def ruby_files
195
+ Dir.glob(File.join(@project_root, '**', '*.rb'))
196
+ .reject { |f| f.include?('/vendor/') || f.include?('/node_modules/') }
197
+ end
198
+
199
+ def index_file(file)
200
+ relative = relative_path(file)
201
+ content = File.read(file)
202
+ @file_mtimes[relative] = File.mtime(file).to_i
203
+
204
+ extract_classes(content, relative)
205
+ extract_methods(content, relative)
206
+ extract_associations(content, relative)
207
+ extract_rails_patterns(content, relative)
208
+ rescue StandardError => e
209
+ RubynCode::Debug.warn("Index: failed to parse #{file}: #{e.message}")
210
+ end
211
+
212
+ def extract_classes(content, file)
213
+ content.scan(/^\s*(class|module)\s+(\S+)/).each do |type, name|
214
+ node_type = classify_node(file, type)
215
+ @nodes << { 'type' => node_type, 'name' => name, 'file' => file, 'line' => 0 }
216
+ end
217
+ end
218
+
219
+ def extract_methods(content, file)
220
+ content.each_line.with_index do |line, idx|
221
+ next unless line.match?(/\s*def\s/)
222
+
223
+ match = line.match(/\s*def\s+(self\.)?(\w+[?!=]?)(\(.*?\))?/)
224
+ next unless match
225
+
226
+ @nodes << {
227
+ 'type' => 'method', 'name' => match[2],
228
+ 'file' => file, 'line' => idx + 1,
229
+ 'params' => match[3]&.strip,
230
+ 'visibility' => 'public'
231
+ }
232
+ end
233
+ end
234
+
235
+ def extract_associations(content, file)
236
+ content.scan(/\b(has_many|has_one|belongs_to|has_and_belongs_to_many)\s+:(\w+)/) do |assoc_type, name|
237
+ @edges << { 'from' => file, 'to' => name, 'relationship' => 'association', 'type' => assoc_type }
238
+ end
239
+ end
240
+
241
+ def extract_rails_patterns(content, file)
242
+ content.scan(/\bbefore_action\s+:(\w+)/) do |callback,|
243
+ @nodes << { 'type' => 'callback', 'name' => callback, 'file' => file, 'line' => 0 }
244
+ end
245
+
246
+ content.scan(/\bscope\s+:(\w+)/) do |scope_name,|
247
+ @nodes << { 'type' => 'scope', 'name' => scope_name, 'file' => file, 'line' => 0 }
248
+ end
249
+
250
+ content.scan(/\bvalidates?\s+:(\w+)/) do |field,|
251
+ @nodes << { 'type' => 'validation', 'name' => field, 'file' => file, 'line' => 0 }
252
+ end
253
+ end
254
+
255
+ def extract_rails_edges
256
+ spec_files = @file_mtimes.keys.select { |f| f.include?('spec/') || f.include?('test/') }
257
+ spec_files.each do |spec_file|
258
+ source = spec_file.sub(%r{spec/}, 'app/').sub(/_spec\.rb$/, '.rb')
259
+ @edges << { 'from' => spec_file, 'to' => source, 'relationship' => 'tests' } if @file_mtimes.key?(source)
260
+ end
261
+ end
262
+
263
+ def classify_node(file, type) # rubocop:disable Metrics/CyclomaticComplexity -- Rails directory mapping
264
+ return 'model' if file.include?('app/models/')
265
+ return 'controller' if file.include?('app/controllers/')
266
+ return 'service' if file.include?('app/services/')
267
+ return 'concern' if file.include?('concerns/')
268
+ return 'spec' if file.include?('spec/') || file.include?('test/')
269
+
270
+ type == 'class' ? 'class' : 'module'
271
+ end
272
+
273
+ def detect_changed_files
274
+ current_files = ruby_files.to_h { |f| [relative_path(f), File.mtime(f).to_i] }
275
+ changed = []
276
+
277
+ current_files.each do |rel, mtime|
278
+ changed << File.join(@project_root, rel) if @file_mtimes[rel] != mtime
279
+ end
280
+
281
+ # Files that were deleted
282
+ @file_mtimes.each_key do |rel|
283
+ changed << File.join(@project_root, rel) unless current_files.key?(rel)
284
+ end
285
+
286
+ changed
287
+ end
288
+
289
+ def remove_nodes_for(file)
290
+ relative = relative_path(file)
291
+ @nodes.reject! { |n| n['file'] == relative }
292
+ @edges.reject! { |e| e['from'] == relative }
293
+ @file_mtimes.delete(relative)
294
+ end
295
+
296
+ def find_files_for(edge)
297
+ [edge['from'], edge['to']].compact.select { |f| f.end_with?('.rb') }
298
+ end
299
+
300
+ def relative_path(absolute)
301
+ absolute.sub("#{@project_root}/", '')
302
+ end
303
+
304
+ def save!
305
+ FileUtils.mkdir_p(File.dirname(@index_path))
306
+ data = { 'nodes' => @nodes, 'edges' => @edges, 'file_mtimes' => @file_mtimes }
307
+ File.write(@index_path, JSON.generate(data))
308
+ end
309
+ end
310
+ end
311
+ end
@@ -10,7 +10,7 @@ module RubynCode
10
10
  # After a session, the extractor sends recent conversation history to a
11
11
  # cheaper model (Haiku) and asks it to identify patterns that could be
12
12
  # useful in future sessions for the same project.
13
- module Extractor
13
+ module Extractor # rubocop:disable Metrics/ModuleLength -- LLM extraction logic with DB persistence
14
14
  # Maximum number of recent messages to analyze.
15
15
  MESSAGE_WINDOW = 30
16
16
 
@@ -23,29 +23,15 @@ module RubynCode
23
23
  project_specific
24
24
  ].freeze
25
25
 
26
- EXTRACTION_PROMPT = <<~PROMPT.freeze
27
- Analyze the following conversation between a developer and an AI coding assistant.
28
- Extract reusable patterns that could help in future sessions for this project.
29
-
30
- For each pattern, provide:
31
- - type: one of #{VALID_TYPES.join(', ')}
32
- - pattern: a concise description of the learned behavior or fix
33
- - context_tags: relevant tags (e.g., framework names, error types, file patterns)
34
- - confidence: initial confidence score between 0.3 and 0.8
35
-
36
- Respond with a JSON array of objects. If no patterns are found, respond with [].
37
- Only extract patterns that are genuinely reusable, not one-off fixes.
38
-
39
- Example response:
40
- [
41
- {
42
- "type": "error_resolution",
43
- "pattern": "When seeing 'PG::UniqueViolation' on users.email, check for missing unique index migration",
44
- "context_tags": ["postgresql", "rails", "migration"],
45
- "confidence": 0.6
46
- }
47
- ]
48
- PROMPT
26
+ EXTRACTION_PROMPT = "Analyze the following conversation between a developer and an AI coding assistant.\n" \
27
+ "Extract reusable patterns that could help in future sessions for this project.\n\n" \
28
+ "For each pattern, provide:\n" \
29
+ "- type: one of #{VALID_TYPES.join(', ')}\n" \
30
+ "- pattern: a concise description of the learned behavior or fix\n" \
31
+ "- context_tags: relevant tags (e.g., framework names, error types, file patterns)\n" \
32
+ "- confidence: initial confidence score between 0.3 and 0.8\n\n" \
33
+ "Respond with a JSON array of objects. If no patterns are found, respond with [].\n" \
34
+ 'Only extract patterns that are genuinely reusable, not one-off fixes.'.freeze
49
35
 
50
36
  class << self
51
37
  # Extracts instinct patterns from a session's message history.
@@ -70,24 +56,18 @@ module RubynCode
70
56
  instincts
71
57
  end
72
58
 
59
+ DECAY_RATES = {
60
+ 'project_specific' => 0.02,
61
+ 'error_resolution' => 0.03,
62
+ 'debugging_technique' => 0.04,
63
+ 'user_correction' => 0.05,
64
+ 'workaround' => 0.07
65
+ }.freeze
66
+
73
67
  private
74
68
 
75
69
  def request_extraction(messages, llm_client)
76
- # Serialize conversation into a single user message to avoid
77
- # "must end with user message" errors
78
- transcript = messages.map do |m|
79
- role = (m[:role] || m['role'] || 'unknown').capitalize
80
- content = m[:content] || m['content']
81
- text = case content
82
- when String then content
83
- when Array
84
- content.filter_map do |b|
85
- b.respond_to?(:text) ? b.text : (b[:text] || b['text'])
86
- end.join("\n")
87
- else content.to_s
88
- end
89
- "#{role}: #{text}"
90
- end.join("\n\n")
70
+ transcript = serialize_transcript(messages)
91
71
 
92
72
  llm_client.chat(
93
73
  messages: [{ role: 'user', content: "#{EXTRACTION_PROMPT}\n\nConversation:\n#{transcript}" }],
@@ -98,59 +78,71 @@ module RubynCode
98
78
  nil
99
79
  end
100
80
 
81
+ def serialize_transcript(messages)
82
+ messages.map { |m| format_turn(m) }.join("\n\n")
83
+ end
84
+
85
+ def format_turn(msg) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- content polymorphism
86
+ role = (msg[:role] || msg['role'] || 'unknown').capitalize
87
+ content = msg[:content] || msg['content']
88
+ text = if content.is_a?(Array)
89
+ content.filter_map do |b|
90
+ b.respond_to?(:text) ? b.text : (b[:text] || b['text'])
91
+ end.join("\n")
92
+ else
93
+ content.to_s
94
+ end
95
+ "#{role}: #{text}"
96
+ end
97
+
101
98
  def save_to_db(instincts)
102
99
  db = DB::Connection.instance
103
100
  now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
104
101
 
105
- instincts.each do |inst|
106
- db.execute(
107
- 'INSERT INTO instincts (id, project_path, pattern, context_tags, confidence, decay_rate, times_applied, times_helpful, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
108
- [
109
- SecureRandom.uuid,
110
- inst[:project_path],
111
- inst[:pattern],
112
- JSON.generate(inst[:context_tags]),
113
- inst[:confidence],
114
- inst[:decay_rate],
115
- inst[:times_applied],
116
- inst[:times_helpful],
117
- now,
118
- now
119
- ]
120
- )
121
- end
102
+ instincts.each { |inst| insert_instinct(db, inst, now) }
122
103
  rescue StandardError => e
123
104
  warn "[Learning::Extractor] Failed to save instincts: #{e.message}"
124
105
  end
125
106
 
126
- def parse_response(response)
107
+ def insert_instinct(db, inst, now)
108
+ db.execute(
109
+ <<~SQL.tr("\n", ' ').strip,
110
+ INSERT INTO instincts (id, project_path, pattern, context_tags,
111
+ confidence, decay_rate, times_applied, times_helpful,
112
+ created_at, updated_at)
113
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
114
+ SQL
115
+ [
116
+ SecureRandom.uuid, inst[:project_path], inst[:pattern],
117
+ JSON.generate(inst[:context_tags]), inst[:confidence],
118
+ inst[:decay_rate], inst[:times_applied], inst[:times_helpful],
119
+ now, now
120
+ ]
121
+ )
122
+ end
123
+
124
+ def parse_response(response) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- response parsing with multiple fallbacks
127
125
  return [] if response.nil?
128
126
 
129
- text = extract_text(response)
127
+ text = if response.respond_to?(:content)
128
+ response.content.find do |b|
129
+ b.respond_to?(:text)
130
+ end&.text
131
+ else
132
+ response.is_a?(Hash) ? response.dig('content', 0, 'text') : nil
133
+ end
130
134
  return [] if text.nil? || text.empty?
131
135
 
132
- # Extract JSON array from response, handling markdown code blocks
133
136
  json_str = text[/\[.*\]/m]
134
- return [] if json_str.nil?
137
+ return [] unless json_str
135
138
 
136
139
  parsed = JSON.parse(json_str)
137
- return [] unless parsed.is_a?(Array)
138
-
139
- parsed
140
+ parsed.is_a?(Array) ? parsed : []
140
141
  rescue JSON::ParserError => e
141
142
  warn "[Learning::Extractor] Failed to parse extraction response: #{e.message}"
142
143
  []
143
144
  end
144
145
 
145
- def extract_text(response)
146
- if response.respond_to?(:content)
147
- block = response.content.find { |b| b.respond_to?(:text) }
148
- block&.text
149
- elsif response.is_a?(Hash)
150
- response.dig('content', 0, 'text')
151
- end
152
- end
153
-
154
146
  def normalize_pattern(raw, project_path)
155
147
  type = raw['type'].to_s
156
148
  pattern = raw['pattern'].to_s.strip
@@ -173,17 +165,8 @@ module RubynCode
173
165
  }
174
166
  end
175
167
 
176
- # Different pattern types decay at different rates.
177
- # Project-specific knowledge decays slower; workarounds decay faster.
178
168
  def decay_rate_for_type(type)
179
- case type
180
- when 'project_specific' then 0.02
181
- when 'error_resolution' then 0.03
182
- when 'debugging_technique' then 0.04
183
- when 'user_correction' then 0.05
184
- when 'workaround' then 0.07
185
- else 0.05
186
- end
169
+ DECAY_RATES.fetch(type, 0.05)
187
170
  end
188
171
  end
189
172
  end
@@ -28,26 +28,21 @@ module RubynCode
28
28
  rows = fetch_instincts(db, project_path)
29
29
  return '' if rows.empty?
30
30
 
31
- instincts = rows.map { |row| row_to_instinct(row) }
31
+ instincts = build_and_filter(rows, context_tags, max_instincts)
32
+ return '' if instincts.empty?
32
33
 
33
- # Apply time-based decay to get current confidence
34
- now = Time.now
35
- instincts = instincts.map { |inst| InstinctMethods.apply_decay(inst, now) }
34
+ format_instincts(instincts)
35
+ end
36
36
 
37
- # Filter below minimum confidence
38
- instincts = instincts.select { |inst| inst.confidence >= MIN_CONFIDENCE }
37
+ def build_and_filter(rows, context_tags, max_instincts)
38
+ now = Time.now
39
+ instincts = rows
40
+ .map { |row| InstinctMethods.apply_decay(row_to_instinct(row), now) }
41
+ .select { |inst| inst.confidence >= MIN_CONFIDENCE }
39
42
 
40
- # Filter by context tags if provided
41
43
  instincts = filter_by_tags(instincts, context_tags) unless context_tags.empty?
42
44
 
43
- # Sort by confidence descending and take top N
44
- instincts = instincts
45
- .sort_by { |inst| -inst.confidence }
46
- .first(max_instincts)
47
-
48
- return '' if instincts.empty?
49
-
50
- format_instincts(instincts)
45
+ instincts.sort_by { |inst| -inst.confidence }.first(max_instincts)
51
46
  end
52
47
 
53
48
  private
@@ -64,19 +59,23 @@ module RubynCode
64
59
 
65
60
  def row_to_instinct(row)
66
61
  Instinct.new(
67
- id: row['id'],
68
- project_path: row['project_path'],
69
- pattern: row['pattern'],
70
- context_tags: parse_tags(row['context_tags']),
71
- confidence: row['confidence'].to_f,
72
- decay_rate: row['decay_rate'].to_f,
73
- times_applied: row['times_applied'].to_i,
74
- times_helpful: row['times_helpful'].to_i,
62
+ **core_instinct_attrs(row),
63
+ **numeric_instinct_attrs(row),
75
64
  created_at: parse_time(row['created_at']),
76
65
  updated_at: parse_time(row['updated_at'])
77
66
  )
78
67
  end
79
68
 
69
+ def core_instinct_attrs(row)
70
+ { id: row['id'], project_path: row['project_path'],
71
+ pattern: row['pattern'], context_tags: parse_tags(row['context_tags']) }
72
+ end
73
+
74
+ def numeric_instinct_attrs(row)
75
+ { confidence: row['confidence'].to_f, decay_rate: row['decay_rate'].to_f,
76
+ times_applied: row['times_applied'].to_i, times_helpful: row['times_helpful'].to_i }
77
+ end
78
+
80
79
  def parse_tags(tags)
81
80
  case tags
82
81
  when String