codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'set'
5
+ require_relative 'session_flow_document'
6
+
7
+ module CodebaseIndex
8
+ module SessionTracer
9
+ # Assembles a context tree from captured session requests against the extracted index.
10
+ #
11
+ # Does NOT require Rails — reads from a store + on-disk extracted index.
12
+ #
13
+ # Algorithm:
14
+ # 1. Load requests from store for session_id
15
+ # 2. For each request, resolve "Controller#action" via IndexReader
16
+ # 3. Expand dependencies via DependencyGraph — filter :job/:mailer as async side effects
17
+ # 4. Deduplicate units across steps (include source once, reference by identifier)
18
+ # 5. Token budget allocation with priority-based truncation
19
+ # 6. Build SessionFlowDocument
20
+ #
21
+ # @example
22
+ # assembler = SessionFlowAssembler.new(store: store, reader: reader)
23
+ # doc = assembler.assemble("abc123", budget: 8000, depth: 1)
24
+ # puts doc.to_context
25
+ #
26
+ # rubocop:disable Metrics/ClassLength
27
+ class SessionFlowAssembler
28
+ ASYNC_TYPES = %w[job mailer].to_set.freeze
29
+
30
+ # @param store [Store] Session trace store
31
+ # @param reader [MCP::IndexReader] Index reader for unit lookups
32
+ def initialize(store:, reader:)
33
+ @store = store
34
+ @reader = reader
35
+ end
36
+
37
+ # Assemble a context tree for a session.
38
+ #
39
+ # @param session_id [String] The session to assemble
40
+ # @param budget [Integer] Maximum token budget (default: 8000)
41
+ # @param depth [Integer] Expansion depth (0=metadata only, 1=direct deps, 2+=full flow)
42
+ # @return [SessionFlowDocument] The assembled document
43
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
44
+ def assemble(session_id, budget: 8000, depth: 1)
45
+ requests = @store.read(session_id)
46
+ return empty_document(session_id) if requests.empty?
47
+
48
+ steps = []
49
+ context_pool = {}
50
+ side_effects = []
51
+ dependency_map = {}
52
+ seen_units = Set.new
53
+
54
+ requests.each_with_index do |req, idx|
55
+ step = build_step(req, idx)
56
+ steps << step
57
+
58
+ next if depth.zero?
59
+
60
+ controller_id = req['controller']
61
+ next unless controller_id
62
+
63
+ # Resolve controller unit
64
+ unit = @reader.find_unit(controller_id)
65
+ if unit && !seen_units.include?(controller_id)
66
+ seen_units.add(controller_id)
67
+ context_pool[controller_id] = unit_summary(unit)
68
+ end
69
+ step[:unit_refs] = [controller_id].compact
70
+
71
+ # Expand dependencies
72
+ next unless unit
73
+
74
+ deps = resolve_dependencies(controller_id, seen_units, context_pool,
75
+ side_effects, step, dependency_map, depth)
76
+ step[:unit_refs].concat(deps)
77
+ end
78
+
79
+ # Apply token budget
80
+ token_count = apply_budget(context_pool, budget)
81
+
82
+ SessionFlowDocument.new(
83
+ session_id: session_id,
84
+ steps: steps,
85
+ context_pool: context_pool,
86
+ side_effects: side_effects,
87
+ dependency_map: dependency_map,
88
+ token_count: token_count
89
+ )
90
+ end
91
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
92
+
93
+ private
94
+
95
+ # Build a timeline step from a request record.
96
+ #
97
+ # @param req [Hash] Request data from store
98
+ # @param index [Integer] Step index
99
+ # @return [Hash] Step hash
100
+ def build_step(req, index)
101
+ {
102
+ index: index,
103
+ method: req['method'],
104
+ path: req['path'],
105
+ controller: req['controller'],
106
+ action: req['action'],
107
+ status: req['status'],
108
+ duration_ms: req['duration_ms'],
109
+ unit_refs: [],
110
+ side_effects: []
111
+ }
112
+ end
113
+
114
+ # Resolve dependencies for a unit, separating sync deps from async side effects.
115
+ #
116
+ # @return [Array<String>] Non-async dependency identifiers added
117
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
118
+ def resolve_dependencies(unit_id, seen_units, context_pool,
119
+ side_effects, step, dependency_map, depth)
120
+ graph = @reader.dependency_graph
121
+ dep_ids = graph.dependencies_of(unit_id)
122
+ added = []
123
+
124
+ dep_ids.each do |dep_id|
125
+ dep_unit = @reader.find_unit(dep_id)
126
+ next unless dep_unit
127
+
128
+ dep_type = dep_unit['type']&.to_s
129
+
130
+ if ASYNC_TYPES.include?(dep_type)
131
+ effect = {
132
+ type: dep_type.to_sym,
133
+ identifier: dep_id,
134
+ trigger_step: "#{step[:controller]}##{step[:action]}"
135
+ }
136
+ side_effects << effect
137
+ step[:side_effects] << effect
138
+ else
139
+ unless seen_units.include?(dep_id)
140
+ seen_units.add(dep_id)
141
+ context_pool[dep_id] = unit_summary(dep_unit)
142
+ added << dep_id
143
+
144
+ # Depth 2+: expand transitive dependencies
145
+ expand_transitive(dep_id, seen_units, context_pool, dependency_map, depth - 1) if depth >= 2
146
+ end
147
+ end
148
+ end
149
+
150
+ # Record dependency map for this unit
151
+ all_deps = dep_ids.select { |id| @reader.find_unit(id) }
152
+ dependency_map[unit_id] = all_deps if all_deps.any?
153
+
154
+ added
155
+ end
156
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
157
+
158
+ # Expand transitive dependencies (depth 2+).
159
+ #
160
+ # @param unit_id [String] Unit to expand from
161
+ # @param seen_units [Set<String>] Already-seen unit identifiers
162
+ # @param context_pool [Hash] Accumulator for unit data
163
+ # @param dependency_map [Hash] Accumulator for dependency edges
164
+ # @param remaining_depth [Integer] Remaining expansion depth
165
+ def expand_transitive(unit_id, seen_units, context_pool, dependency_map, remaining_depth)
166
+ return if remaining_depth <= 0
167
+
168
+ graph = @reader.dependency_graph
169
+ dep_ids = graph.dependencies_of(unit_id)
170
+ resolved_deps = []
171
+
172
+ dep_ids.each do |dep_id|
173
+ dep_unit = @reader.find_unit(dep_id)
174
+ next unless dep_unit
175
+
176
+ resolved_deps << dep_id
177
+ next if seen_units.include?(dep_id)
178
+
179
+ seen_units.add(dep_id)
180
+ context_pool[dep_id] = unit_summary(dep_unit)
181
+
182
+ expand_transitive(dep_id, seen_units, context_pool, dependency_map, remaining_depth - 1)
183
+ end
184
+
185
+ dependency_map[unit_id] = resolved_deps if resolved_deps.any?
186
+ end
187
+
188
+ # Extract a summary hash from a full unit data hash.
189
+ #
190
+ # @param unit [Hash] Full unit data from IndexReader
191
+ # @return [Hash] Summary with :type, :file_path, :source_code
192
+ def unit_summary(unit)
193
+ {
194
+ type: unit['type'],
195
+ file_path: unit['file_path'],
196
+ source_code: unit['source_code']
197
+ }
198
+ end
199
+
200
+ # Apply token budget by truncating source code from lowest-priority units.
201
+ #
202
+ # Priority order (highest first):
203
+ # 1. Controller action chunks (directly hit by requests)
204
+ # 2. Direct dependencies (models, services)
205
+ # 3. Transitive dependencies
206
+ #
207
+ # @param context_pool [Hash] Unit data to budget
208
+ # @param budget [Integer] Maximum tokens
209
+ # @return [Integer] Actual token count
210
+ def apply_budget(context_pool, budget)
211
+ total = estimate_tokens(context_pool)
212
+ return total if total <= budget
213
+
214
+ # Truncate from the end (lowest priority = last added)
215
+ identifiers = context_pool.keys.reverse
216
+ identifiers.each do |id|
217
+ break if total <= budget
218
+
219
+ unit = context_pool[id]
220
+ source = unit[:source_code]
221
+ next unless source
222
+
223
+ source_tokens = estimate_token_count(source)
224
+ unit[:source_code] = "# source truncated (#{source_tokens} tokens)"
225
+ total -= source_tokens
226
+ total += estimate_token_count(unit[:source_code])
227
+ end
228
+
229
+ [total, 0].max
230
+ end
231
+
232
+ # Estimate total tokens for the context pool.
233
+ #
234
+ # @param context_pool [Hash] Unit data
235
+ # @return [Integer] Estimated token count
236
+ def estimate_tokens(context_pool)
237
+ context_pool.values.sum do |unit|
238
+ source = unit[:source_code] || ''
239
+ estimate_token_count(source) + 20 # overhead for tags/metadata
240
+ end
241
+ end
242
+
243
+ # Estimate token count for a string.
244
+ # Uses project convention: (string.length / 4.0).ceil
245
+ # See docs/TOKEN_BENCHMARK.md — conservative floor (~10.6% overestimate).
246
+ #
247
+ # @param text [String] Text to estimate
248
+ # @return [Integer] Estimated token count
249
+ def estimate_token_count(text)
250
+ (text.length / 4.0).ceil
251
+ end
252
+
253
+ # Build an empty document for sessions with no requests.
254
+ #
255
+ # @param session_id [String]
256
+ # @return [SessionFlowDocument]
257
+ def empty_document(session_id)
258
+ SessionFlowDocument.new(session_id: session_id)
259
+ end
260
+ end
261
+ # rubocop:enable Metrics/ClassLength
262
+ end
263
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module CodebaseIndex
6
+ module SessionTracer
7
+ # Value object representing an assembled session flow trace.
8
+ #
9
+ # Contains a two-level structure:
10
+ # - **Timeline** — ordered steps with unit_refs and side_effects (lightweight)
11
+ # - **Context pool** — deduplicated ExtractedUnit data (heavy, included once each)
12
+ #
13
+ # Follows the FlowDocument pattern for serialization and rendering.
14
+ #
15
+ # @example
16
+ # doc = SessionFlowDocument.new(
17
+ # session_id: "abc123",
18
+ # steps: [...],
19
+ # context_pool: { "OrdersController" => { ... } },
20
+ # generated_at: Time.now.utc.iso8601
21
+ # )
22
+ # doc.to_h # => JSON-serializable Hash
23
+ # doc.to_markdown # => human-readable document
24
+ # doc.to_context # => LLM XML format
25
+ #
26
+ # rubocop:disable Metrics/ClassLength
27
+ class SessionFlowDocument
28
+ attr_reader :session_id, :steps, :context_pool, :side_effects,
29
+ :dependency_map, :token_count, :generated_at
30
+
31
+ # @param session_id [String] The session identifier
32
+ # @param steps [Array<Hash>] Ordered timeline steps
33
+ # @param context_pool [Hash<String, Hash>] Deduplicated unit data keyed by identifier
34
+ # @param side_effects [Array<Hash>] Async side effects (jobs, mailers)
35
+ # @param dependency_map [Hash<String, Array<String>>] Unit -> dependency identifiers
36
+ # @param token_count [Integer] Estimated total tokens
37
+ # @param generated_at [String, nil] ISO8601 timestamp (defaults to now)
38
+ # rubocop:disable Metrics/ParameterLists
39
+ def initialize(session_id:, steps: [], context_pool: {}, side_effects: [],
40
+ dependency_map: {}, token_count: 0, generated_at: nil)
41
+ @session_id = session_id
42
+ @steps = steps
43
+ @context_pool = context_pool
44
+ @side_effects = side_effects
45
+ @dependency_map = dependency_map
46
+ @token_count = token_count
47
+ @generated_at = generated_at || Time.now.utc.iso8601
48
+ end
49
+ # rubocop:enable Metrics/ParameterLists
50
+
51
+ # Serialize to a JSON-compatible Hash.
52
+ #
53
+ # @return [Hash]
54
+ def to_h
55
+ {
56
+ session_id: @session_id,
57
+ generated_at: @generated_at,
58
+ token_count: @token_count,
59
+ steps: @steps,
60
+ context_pool: @context_pool,
61
+ side_effects: @side_effects,
62
+ dependency_map: @dependency_map
63
+ }
64
+ end
65
+
66
+ # Reconstruct from a serialized Hash.
67
+ #
68
+ # Handles both symbol and string keys for JSON round-trip compatibility.
69
+ #
70
+ # @param data [Hash] Previously serialized document data
71
+ # @return [SessionFlowDocument]
72
+ def self.from_h(data)
73
+ data = deep_symbolize_keys(data)
74
+
75
+ new(
76
+ session_id: data[:session_id],
77
+ steps: data[:steps] || [],
78
+ context_pool: data[:context_pool] || {},
79
+ side_effects: data[:side_effects] || [],
80
+ dependency_map: data[:dependency_map] || {},
81
+ token_count: data[:token_count] || 0,
82
+ generated_at: data[:generated_at]
83
+ )
84
+ end
85
+
86
+ # Render as human-readable Markdown.
87
+ #
88
+ # @return [String]
89
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
90
+ def to_markdown
91
+ lines = []
92
+ lines << "## Session: #{@session_id}"
93
+ lines << "_Generated at #{@generated_at} | #{@steps.size} requests | ~#{@token_count} tokens_"
94
+ lines << ''
95
+
96
+ # Timeline
97
+ lines << '### Timeline'
98
+ lines << ''
99
+ @steps.each_with_index do |step, idx|
100
+ status = step[:status] || '?'
101
+ duration = step[:duration_ms] ? " (#{step[:duration_ms]}ms)" : ''
102
+ entry = "#{idx + 1}. #{step[:method]} #{step[:path]} → " \
103
+ "#{step[:controller]}##{step[:action]} [#{status}]#{duration}"
104
+ lines << entry
105
+ end
106
+ lines << ''
107
+
108
+ # Side effects
109
+ if @side_effects.any?
110
+ lines << '### Side Effects'
111
+ lines << ''
112
+ @side_effects.each do |effect|
113
+ lines << "- #{effect[:type]}: #{effect[:identifier]} (triggered by #{effect[:trigger_step]})"
114
+ end
115
+ lines << ''
116
+ end
117
+
118
+ # Context pool
119
+ if @context_pool.any?
120
+ lines << '### Code Units'
121
+ lines << ''
122
+ @context_pool.each do |identifier, unit|
123
+ type = unit[:type] || 'unknown'
124
+ file_path = unit[:file_path]
125
+ lines << "#### #{identifier} (#{type})"
126
+ lines << "_#{file_path}_" if file_path
127
+ lines << ''
128
+ next unless unit[:source_code]
129
+
130
+ lines << '```ruby'
131
+ lines << unit[:source_code]
132
+ lines << '```'
133
+ lines << ''
134
+ end
135
+ end
136
+
137
+ # Dependencies
138
+ if @dependency_map.any?
139
+ lines << '### Dependencies'
140
+ lines << ''
141
+ @dependency_map.each do |unit_id, deps|
142
+ lines << "- #{unit_id} → #{deps.join(', ')}"
143
+ end
144
+ lines << ''
145
+ end
146
+
147
+ lines.join("\n")
148
+ end
149
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
150
+
151
+ # Render as LLM-consumable XML context.
152
+ #
153
+ # Follows the format from docs/CONTEXT_AND_CHUNKING.md.
154
+ #
155
+ # @return [String]
156
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
157
+ def to_context
158
+ lines = []
159
+ header = "<session_context session_id=\"#{@session_id}\" requests=\"#{@steps.size}\" " \
160
+ "tokens=\"#{@token_count}\" units=\"#{@context_pool.size}\">"
161
+ lines << header
162
+
163
+ # Timeline
164
+ lines << '<session_timeline>'
165
+ @steps.each_with_index do |step, idx|
166
+ status = step[:status] || '?'
167
+ duration = step[:duration_ms] ? ", #{step[:duration_ms]}ms" : ''
168
+ entry = "#{idx + 1}. #{step[:method]} #{step[:path]} → " \
169
+ "#{step[:controller]}##{step[:action]} (#{status}#{duration})"
170
+ lines << entry
171
+ end
172
+ lines << '</session_timeline>'
173
+
174
+ # Units
175
+ @context_pool.each do |identifier, unit|
176
+ type = unit[:type] || 'unknown'
177
+ file_path = unit[:file_path] || 'unknown'
178
+ lines << %(<unit identifier="#{identifier}" type="#{type}" file="#{file_path}">)
179
+ lines << (unit[:source_code] || '# source not available')
180
+ lines << '</unit>'
181
+ end
182
+
183
+ # Side effects
184
+ if @side_effects.any?
185
+ lines << '<side_effects>'
186
+ @side_effects.each do |effect|
187
+ lines << "#{effect[:identifier]} (triggered by #{effect[:trigger_step]}, #{effect[:type]})"
188
+ end
189
+ lines << '</side_effects>'
190
+ end
191
+
192
+ # Dependencies
193
+ if @dependency_map.any?
194
+ lines << '<dependencies>'
195
+ @dependency_map.each do |unit_id, deps|
196
+ lines << "#{unit_id} → #{deps.join(', ')}"
197
+ end
198
+ lines << '</dependencies>'
199
+ end
200
+
201
+ lines << '</session_context>'
202
+ lines.join("\n")
203
+ end
204
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
205
+
206
+ # @api private
207
+ def self.deep_symbolize_keys(obj)
208
+ case obj
209
+ when Hash
210
+ obj.each_with_object({}) do |(key, value), result|
211
+ result[key.to_sym] = deep_symbolize_keys(value)
212
+ end
213
+ when Array
214
+ obj.map { |item| deep_symbolize_keys(item) }
215
+ else
216
+ obj
217
+ end
218
+ end
219
+ private_class_method :deep_symbolize_keys
220
+ end
221
+ # rubocop:enable Metrics/ClassLength
222
+ end
223
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative 'store'
5
+
6
+ module CodebaseIndex
7
+ module SessionTracer
8
+ # SolidCache-backed session store.
9
+ #
10
+ # Uses SolidCache key-value storage with `expires_in`. Single JSON blob
11
+ # per session (read-modify-write pattern). Requires the `solid_cache` gem.
12
+ #
13
+ # @example
14
+ # store = SolidCacheStore.new(cache: SolidCache::Store.new, expires_in: 3600)
15
+ # store.record("abc123", { controller: "OrdersController", action: "create" })
16
+ #
17
+ class SolidCacheStore < Store
18
+ KEY_PREFIX = 'codebase_index:session:'
19
+ INDEX_KEY = 'codebase_index:session_index'
20
+
21
+ # @param cache [ActiveSupport::Cache::Store] A SolidCache (or compatible) cache instance
22
+ # @param expires_in [Integer, nil] Expiry time in seconds (nil = no expiry)
23
+ def initialize(cache:, expires_in: nil)
24
+ super()
25
+ @cache = cache
26
+ @expires_in = expires_in
27
+ end
28
+
29
+ # Append a request record to a session (read-modify-write).
30
+ #
31
+ # NOTE: Not atomic — concurrent writes to the same session may lose data.
32
+ # Acceptable for development tracing. For high-concurrency tracing, use
33
+ # RedisStore (RPUSH is atomic) or FileStore (LOCK_EX).
34
+ #
35
+ # @param session_id [String] The session identifier
36
+ # @param request_data [Hash] Request metadata to store
37
+ # @return [void]
38
+ def record(session_id, request_data)
39
+ key = session_key(session_id)
40
+ existing = @cache.read(key)
41
+ requests = existing ? JSON.parse(existing) : []
42
+ requests << request_data
43
+
44
+ write_opts = @expires_in ? { expires_in: @expires_in } : {}
45
+ @cache.write(key, JSON.generate(requests), **write_opts)
46
+
47
+ update_index(session_id)
48
+ end
49
+
50
+ # Read all request records for a session.
51
+ #
52
+ # @param session_id [String] The session identifier
53
+ # @return [Array<Hash>] Request records, oldest first
54
+ def read(session_id)
55
+ key = session_key(session_id)
56
+ raw = @cache.read(key)
57
+ return [] unless raw
58
+
59
+ JSON.parse(raw)
60
+ rescue JSON::ParserError
61
+ []
62
+ end
63
+
64
+ # List recent session summaries.
65
+ #
66
+ # @param limit [Integer] Maximum number of sessions to return
67
+ # @return [Array<Hash>] Session summaries
68
+ def sessions(limit: 20)
69
+ index = read_index
70
+ active = index.select { |id| @cache.exist?(session_key(id)) }
71
+
72
+ # Clean up expired entries from the index
73
+ write_index(active) if active.size != index.size
74
+
75
+ active.first(limit).map do |session_id|
76
+ requests = read(session_id)
77
+ {
78
+ 'session_id' => session_id,
79
+ 'request_count' => requests.size,
80
+ 'first_request' => requests.first&.fetch('timestamp', nil),
81
+ 'last_request' => requests.last&.fetch('timestamp', nil)
82
+ }
83
+ end
84
+ end
85
+
86
+ # Remove all data for a single session.
87
+ #
88
+ # @param session_id [String] The session identifier
89
+ # @return [void]
90
+ def clear(session_id)
91
+ @cache.delete(session_key(session_id))
92
+ index = read_index
93
+ index.delete(session_id)
94
+ write_index(index)
95
+ end
96
+
97
+ # Remove all session data.
98
+ #
99
+ # @return [void]
100
+ def clear_all
101
+ index = read_index
102
+ index.each { |id| @cache.delete(session_key(id)) }
103
+ @cache.delete(INDEX_KEY)
104
+ end
105
+
106
+ private
107
+
108
+ # @param session_id [String]
109
+ # @return [String] Cache key for this session
110
+ def session_key(session_id)
111
+ "#{KEY_PREFIX}#{sanitize_session_id(session_id)}"
112
+ end
113
+
114
+ # Read the session index (list of known session IDs).
115
+ #
116
+ # @return [Array<String>]
117
+ def read_index
118
+ raw = @cache.read(INDEX_KEY)
119
+ return [] unless raw
120
+
121
+ JSON.parse(raw)
122
+ rescue JSON::ParserError
123
+ []
124
+ end
125
+
126
+ # Write the session index.
127
+ #
128
+ # @param ids [Array<String>]
129
+ def write_index(ids)
130
+ @cache.write(INDEX_KEY, JSON.generate(ids))
131
+ end
132
+
133
+ # Add a session ID to the index if not already present.
134
+ #
135
+ # @param session_id [String]
136
+ def update_index(session_id)
137
+ index = read_index
138
+ return if index.include?(session_id)
139
+
140
+ index << session_id
141
+ write_index(index)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module SessionTracer
5
+ # Abstract store interface for session trace data.
6
+ #
7
+ # Concrete implementations must define:
8
+ # - `record(session_id, request_data)` — append a request record
9
+ # - `read(session_id)` — return all requests for a session, ordered by timestamp
10
+ # - `sessions(limit:)` — return recent session summaries
11
+ # - `clear(session_id)` — remove a single session
12
+ # - `clear_all` — remove all sessions
13
+ #
14
+ # @abstract Subclass and implement the required methods.
15
+ class Store
16
+ # Append a request record to a session.
17
+ #
18
+ # @param session_id [String] The session identifier
19
+ # @param request_data [Hash] Request metadata to store
20
+ # @return [void]
21
+ def record(session_id, request_data)
22
+ raise NotImplementedError, "#{self.class}#record must be implemented"
23
+ end
24
+
25
+ # Read all request records for a session, ordered by timestamp.
26
+ #
27
+ # @param session_id [String] The session identifier
28
+ # @return [Array<Hash>] Request records, oldest first
29
+ def read(session_id)
30
+ raise NotImplementedError, "#{self.class}#read must be implemented"
31
+ end
32
+
33
+ # List recent session summaries.
34
+ #
35
+ # @param limit [Integer] Maximum number of sessions to return
36
+ # @return [Array<Hash>] Session summaries with :session_id, :request_count, :first_request, :last_request
37
+ def sessions(limit: 20)
38
+ raise NotImplementedError, "#{self.class}#sessions must be implemented"
39
+ end
40
+
41
+ # Remove all data for a single session.
42
+ #
43
+ # @param session_id [String] The session identifier
44
+ # @return [void]
45
+ def clear(session_id)
46
+ raise NotImplementedError, "#{self.class}#clear must be implemented"
47
+ end
48
+
49
+ # Remove all session data.
50
+ #
51
+ # @return [void]
52
+ def clear_all
53
+ raise NotImplementedError, "#{self.class}#clear_all must be implemented"
54
+ end
55
+
56
+ private
57
+
58
+ # Sanitize a session ID for use in keys/filenames.
59
+ #
60
+ # @param session_id [String] Raw session identifier
61
+ # @return [String] Sanitized identifier (alphanumeric, hyphens, underscores only)
62
+ def sanitize_session_id(session_id)
63
+ session_id.to_s.gsub(/[^a-zA-Z0-9_-]/, '_')
64
+ end
65
+ end
66
+ end
67
+ end