codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'set'
|
|
5
|
+
require_relative 'session_flow_document'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
module SessionTracer
|
|
9
|
+
# Assembles a context tree from captured session requests against the extracted index.
|
|
10
|
+
#
|
|
11
|
+
# Does NOT require Rails — reads from a store + on-disk extracted index.
|
|
12
|
+
#
|
|
13
|
+
# Algorithm:
|
|
14
|
+
# 1. Load requests from store for session_id
|
|
15
|
+
# 2. For each request, resolve "Controller#action" via IndexReader
|
|
16
|
+
# 3. Expand dependencies via DependencyGraph — filter :job/:mailer as async side effects
|
|
17
|
+
# 4. Deduplicate units across steps (include source once, reference by identifier)
|
|
18
|
+
# 5. Token budget allocation with priority-based truncation
|
|
19
|
+
# 6. Build SessionFlowDocument
|
|
20
|
+
#
|
|
21
|
+
# @example
|
|
22
|
+
# assembler = SessionFlowAssembler.new(store: store, reader: reader)
|
|
23
|
+
# doc = assembler.assemble("abc123", budget: 8000, depth: 1)
|
|
24
|
+
# puts doc.to_context
|
|
25
|
+
#
|
|
26
|
+
# rubocop:disable Metrics/ClassLength
|
|
27
|
+
class SessionFlowAssembler
|
|
28
|
+
ASYNC_TYPES = %w[job mailer].to_set.freeze
|
|
29
|
+
|
|
30
|
+
# @param store [Store] Session trace store
|
|
31
|
+
# @param reader [MCP::IndexReader] Index reader for unit lookups
|
|
32
|
+
def initialize(store:, reader:)
|
|
33
|
+
@store = store
|
|
34
|
+
@reader = reader
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Assemble a context tree for a session.
|
|
38
|
+
#
|
|
39
|
+
# @param session_id [String] The session to assemble
|
|
40
|
+
# @param budget [Integer] Maximum token budget (default: 8000)
|
|
41
|
+
# @param depth [Integer] Expansion depth (0=metadata only, 1=direct deps, 2+=full flow)
|
|
42
|
+
# @return [SessionFlowDocument] The assembled document
|
|
43
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
44
|
+
def assemble(session_id, budget: 8000, depth: 1)
|
|
45
|
+
requests = @store.read(session_id)
|
|
46
|
+
return empty_document(session_id) if requests.empty?
|
|
47
|
+
|
|
48
|
+
steps = []
|
|
49
|
+
context_pool = {}
|
|
50
|
+
side_effects = []
|
|
51
|
+
dependency_map = {}
|
|
52
|
+
seen_units = Set.new
|
|
53
|
+
|
|
54
|
+
requests.each_with_index do |req, idx|
|
|
55
|
+
step = build_step(req, idx)
|
|
56
|
+
steps << step
|
|
57
|
+
|
|
58
|
+
next if depth.zero?
|
|
59
|
+
|
|
60
|
+
controller_id = req['controller']
|
|
61
|
+
next unless controller_id
|
|
62
|
+
|
|
63
|
+
# Resolve controller unit
|
|
64
|
+
unit = @reader.find_unit(controller_id)
|
|
65
|
+
if unit && !seen_units.include?(controller_id)
|
|
66
|
+
seen_units.add(controller_id)
|
|
67
|
+
context_pool[controller_id] = unit_summary(unit)
|
|
68
|
+
end
|
|
69
|
+
step[:unit_refs] = [controller_id].compact
|
|
70
|
+
|
|
71
|
+
# Expand dependencies
|
|
72
|
+
next unless unit
|
|
73
|
+
|
|
74
|
+
deps = resolve_dependencies(controller_id, seen_units, context_pool,
|
|
75
|
+
side_effects, step, dependency_map, depth)
|
|
76
|
+
step[:unit_refs].concat(deps)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Apply token budget
|
|
80
|
+
token_count = apply_budget(context_pool, budget)
|
|
81
|
+
|
|
82
|
+
SessionFlowDocument.new(
|
|
83
|
+
session_id: session_id,
|
|
84
|
+
steps: steps,
|
|
85
|
+
context_pool: context_pool,
|
|
86
|
+
side_effects: side_effects,
|
|
87
|
+
dependency_map: dependency_map,
|
|
88
|
+
token_count: token_count
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Build a timeline step from a request record.
|
|
96
|
+
#
|
|
97
|
+
# @param req [Hash] Request data from store
|
|
98
|
+
# @param index [Integer] Step index
|
|
99
|
+
# @return [Hash] Step hash
|
|
100
|
+
def build_step(req, index)
|
|
101
|
+
{
|
|
102
|
+
index: index,
|
|
103
|
+
method: req['method'],
|
|
104
|
+
path: req['path'],
|
|
105
|
+
controller: req['controller'],
|
|
106
|
+
action: req['action'],
|
|
107
|
+
status: req['status'],
|
|
108
|
+
duration_ms: req['duration_ms'],
|
|
109
|
+
unit_refs: [],
|
|
110
|
+
side_effects: []
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Resolve dependencies for a unit, separating sync deps from async side effects.
|
|
115
|
+
#
|
|
116
|
+
# @return [Array<String>] Non-async dependency identifiers added
|
|
117
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
|
|
118
|
+
def resolve_dependencies(unit_id, seen_units, context_pool,
|
|
119
|
+
side_effects, step, dependency_map, depth)
|
|
120
|
+
graph = @reader.dependency_graph
|
|
121
|
+
dep_ids = graph.dependencies_of(unit_id)
|
|
122
|
+
added = []
|
|
123
|
+
|
|
124
|
+
dep_ids.each do |dep_id|
|
|
125
|
+
dep_unit = @reader.find_unit(dep_id)
|
|
126
|
+
next unless dep_unit
|
|
127
|
+
|
|
128
|
+
dep_type = dep_unit['type']&.to_s
|
|
129
|
+
|
|
130
|
+
if ASYNC_TYPES.include?(dep_type)
|
|
131
|
+
effect = {
|
|
132
|
+
type: dep_type.to_sym,
|
|
133
|
+
identifier: dep_id,
|
|
134
|
+
trigger_step: "#{step[:controller]}##{step[:action]}"
|
|
135
|
+
}
|
|
136
|
+
side_effects << effect
|
|
137
|
+
step[:side_effects] << effect
|
|
138
|
+
else
|
|
139
|
+
unless seen_units.include?(dep_id)
|
|
140
|
+
seen_units.add(dep_id)
|
|
141
|
+
context_pool[dep_id] = unit_summary(dep_unit)
|
|
142
|
+
added << dep_id
|
|
143
|
+
|
|
144
|
+
# Depth 2+: expand transitive dependencies
|
|
145
|
+
expand_transitive(dep_id, seen_units, context_pool, dependency_map, depth - 1) if depth >= 2
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Record dependency map for this unit
|
|
151
|
+
all_deps = dep_ids.select { |id| @reader.find_unit(id) }
|
|
152
|
+
dependency_map[unit_id] = all_deps if all_deps.any?
|
|
153
|
+
|
|
154
|
+
added
|
|
155
|
+
end
|
|
156
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
|
|
157
|
+
|
|
158
|
+
# Expand transitive dependencies (depth 2+).
|
|
159
|
+
#
|
|
160
|
+
# @param unit_id [String] Unit to expand from
|
|
161
|
+
# @param seen_units [Set<String>] Already-seen unit identifiers
|
|
162
|
+
# @param context_pool [Hash] Accumulator for unit data
|
|
163
|
+
# @param dependency_map [Hash] Accumulator for dependency edges
|
|
164
|
+
# @param remaining_depth [Integer] Remaining expansion depth
|
|
165
|
+
def expand_transitive(unit_id, seen_units, context_pool, dependency_map, remaining_depth)
|
|
166
|
+
return if remaining_depth <= 0
|
|
167
|
+
|
|
168
|
+
graph = @reader.dependency_graph
|
|
169
|
+
dep_ids = graph.dependencies_of(unit_id)
|
|
170
|
+
resolved_deps = []
|
|
171
|
+
|
|
172
|
+
dep_ids.each do |dep_id|
|
|
173
|
+
dep_unit = @reader.find_unit(dep_id)
|
|
174
|
+
next unless dep_unit
|
|
175
|
+
|
|
176
|
+
resolved_deps << dep_id
|
|
177
|
+
next if seen_units.include?(dep_id)
|
|
178
|
+
|
|
179
|
+
seen_units.add(dep_id)
|
|
180
|
+
context_pool[dep_id] = unit_summary(dep_unit)
|
|
181
|
+
|
|
182
|
+
expand_transitive(dep_id, seen_units, context_pool, dependency_map, remaining_depth - 1)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
dependency_map[unit_id] = resolved_deps if resolved_deps.any?
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Extract a summary hash from a full unit data hash.
|
|
189
|
+
#
|
|
190
|
+
# @param unit [Hash] Full unit data from IndexReader
|
|
191
|
+
# @return [Hash] Summary with :type, :file_path, :source_code
|
|
192
|
+
def unit_summary(unit)
|
|
193
|
+
{
|
|
194
|
+
type: unit['type'],
|
|
195
|
+
file_path: unit['file_path'],
|
|
196
|
+
source_code: unit['source_code']
|
|
197
|
+
}
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Apply token budget by truncating source code from lowest-priority units.
|
|
201
|
+
#
|
|
202
|
+
# Priority order (highest first):
|
|
203
|
+
# 1. Controller action chunks (directly hit by requests)
|
|
204
|
+
# 2. Direct dependencies (models, services)
|
|
205
|
+
# 3. Transitive dependencies
|
|
206
|
+
#
|
|
207
|
+
# @param context_pool [Hash] Unit data to budget
|
|
208
|
+
# @param budget [Integer] Maximum tokens
|
|
209
|
+
# @return [Integer] Actual token count
|
|
210
|
+
def apply_budget(context_pool, budget)
|
|
211
|
+
total = estimate_tokens(context_pool)
|
|
212
|
+
return total if total <= budget
|
|
213
|
+
|
|
214
|
+
# Truncate from the end (lowest priority = last added)
|
|
215
|
+
identifiers = context_pool.keys.reverse
|
|
216
|
+
identifiers.each do |id|
|
|
217
|
+
break if total <= budget
|
|
218
|
+
|
|
219
|
+
unit = context_pool[id]
|
|
220
|
+
source = unit[:source_code]
|
|
221
|
+
next unless source
|
|
222
|
+
|
|
223
|
+
source_tokens = estimate_token_count(source)
|
|
224
|
+
unit[:source_code] = "# source truncated (#{source_tokens} tokens)"
|
|
225
|
+
total -= source_tokens
|
|
226
|
+
total += estimate_token_count(unit[:source_code])
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
[total, 0].max
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Estimate total tokens for the context pool.
|
|
233
|
+
#
|
|
234
|
+
# @param context_pool [Hash] Unit data
|
|
235
|
+
# @return [Integer] Estimated token count
|
|
236
|
+
def estimate_tokens(context_pool)
|
|
237
|
+
context_pool.values.sum do |unit|
|
|
238
|
+
source = unit[:source_code] || ''
|
|
239
|
+
estimate_token_count(source) + 20 # overhead for tags/metadata
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Estimate token count for a string.
|
|
244
|
+
# Uses project convention: (string.length / 4.0).ceil
|
|
245
|
+
# See docs/TOKEN_BENCHMARK.md — conservative floor (~10.6% overestimate).
|
|
246
|
+
#
|
|
247
|
+
# @param text [String] Text to estimate
|
|
248
|
+
# @return [Integer] Estimated token count
|
|
249
|
+
def estimate_token_count(text)
|
|
250
|
+
(text.length / 4.0).ceil
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Build an empty document for sessions with no requests.
|
|
254
|
+
#
|
|
255
|
+
# @param session_id [String]
|
|
256
|
+
# @return [SessionFlowDocument]
|
|
257
|
+
def empty_document(session_id)
|
|
258
|
+
SessionFlowDocument.new(session_id: session_id)
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
# rubocop:enable Metrics/ClassLength
|
|
262
|
+
end
|
|
263
|
+
end
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module SessionTracer
|
|
7
|
+
# Value object representing an assembled session flow trace.
|
|
8
|
+
#
|
|
9
|
+
# Contains a two-level structure:
|
|
10
|
+
# - **Timeline** — ordered steps with unit_refs and side_effects (lightweight)
|
|
11
|
+
# - **Context pool** — deduplicated ExtractedUnit data (heavy, included once each)
|
|
12
|
+
#
|
|
13
|
+
# Follows the FlowDocument pattern for serialization and rendering.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# doc = SessionFlowDocument.new(
|
|
17
|
+
# session_id: "abc123",
|
|
18
|
+
# steps: [...],
|
|
19
|
+
# context_pool: { "OrdersController" => { ... } },
|
|
20
|
+
# generated_at: Time.now.utc.iso8601
|
|
21
|
+
# )
|
|
22
|
+
# doc.to_h # => JSON-serializable Hash
|
|
23
|
+
# doc.to_markdown # => human-readable document
|
|
24
|
+
# doc.to_context # => LLM XML format
|
|
25
|
+
#
|
|
26
|
+
# rubocop:disable Metrics/ClassLength
|
|
27
|
+
class SessionFlowDocument
|
|
28
|
+
attr_reader :session_id, :steps, :context_pool, :side_effects,
|
|
29
|
+
:dependency_map, :token_count, :generated_at
|
|
30
|
+
|
|
31
|
+
# @param session_id [String] The session identifier
|
|
32
|
+
# @param steps [Array<Hash>] Ordered timeline steps
|
|
33
|
+
# @param context_pool [Hash<String, Hash>] Deduplicated unit data keyed by identifier
|
|
34
|
+
# @param side_effects [Array<Hash>] Async side effects (jobs, mailers)
|
|
35
|
+
# @param dependency_map [Hash<String, Array<String>>] Unit -> dependency identifiers
|
|
36
|
+
# @param token_count [Integer] Estimated total tokens
|
|
37
|
+
# @param generated_at [String, nil] ISO8601 timestamp (defaults to now)
|
|
38
|
+
# rubocop:disable Metrics/ParameterLists
|
|
39
|
+
def initialize(session_id:, steps: [], context_pool: {}, side_effects: [],
|
|
40
|
+
dependency_map: {}, token_count: 0, generated_at: nil)
|
|
41
|
+
@session_id = session_id
|
|
42
|
+
@steps = steps
|
|
43
|
+
@context_pool = context_pool
|
|
44
|
+
@side_effects = side_effects
|
|
45
|
+
@dependency_map = dependency_map
|
|
46
|
+
@token_count = token_count
|
|
47
|
+
@generated_at = generated_at || Time.now.utc.iso8601
|
|
48
|
+
end
|
|
49
|
+
# rubocop:enable Metrics/ParameterLists
|
|
50
|
+
|
|
51
|
+
# Serialize to a JSON-compatible Hash.
|
|
52
|
+
#
|
|
53
|
+
# @return [Hash]
|
|
54
|
+
def to_h
|
|
55
|
+
{
|
|
56
|
+
session_id: @session_id,
|
|
57
|
+
generated_at: @generated_at,
|
|
58
|
+
token_count: @token_count,
|
|
59
|
+
steps: @steps,
|
|
60
|
+
context_pool: @context_pool,
|
|
61
|
+
side_effects: @side_effects,
|
|
62
|
+
dependency_map: @dependency_map
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Reconstruct from a serialized Hash.
|
|
67
|
+
#
|
|
68
|
+
# Handles both symbol and string keys for JSON round-trip compatibility.
|
|
69
|
+
#
|
|
70
|
+
# @param data [Hash] Previously serialized document data
|
|
71
|
+
# @return [SessionFlowDocument]
|
|
72
|
+
def self.from_h(data)
|
|
73
|
+
data = deep_symbolize_keys(data)
|
|
74
|
+
|
|
75
|
+
new(
|
|
76
|
+
session_id: data[:session_id],
|
|
77
|
+
steps: data[:steps] || [],
|
|
78
|
+
context_pool: data[:context_pool] || {},
|
|
79
|
+
side_effects: data[:side_effects] || [],
|
|
80
|
+
dependency_map: data[:dependency_map] || {},
|
|
81
|
+
token_count: data[:token_count] || 0,
|
|
82
|
+
generated_at: data[:generated_at]
|
|
83
|
+
)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Render as human-readable Markdown.
|
|
87
|
+
#
|
|
88
|
+
# @return [String]
|
|
89
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
90
|
+
def to_markdown
|
|
91
|
+
lines = []
|
|
92
|
+
lines << "## Session: #{@session_id}"
|
|
93
|
+
lines << "_Generated at #{@generated_at} | #{@steps.size} requests | ~#{@token_count} tokens_"
|
|
94
|
+
lines << ''
|
|
95
|
+
|
|
96
|
+
# Timeline
|
|
97
|
+
lines << '### Timeline'
|
|
98
|
+
lines << ''
|
|
99
|
+
@steps.each_with_index do |step, idx|
|
|
100
|
+
status = step[:status] || '?'
|
|
101
|
+
duration = step[:duration_ms] ? " (#{step[:duration_ms]}ms)" : ''
|
|
102
|
+
entry = "#{idx + 1}. #{step[:method]} #{step[:path]} → " \
|
|
103
|
+
"#{step[:controller]}##{step[:action]} [#{status}]#{duration}"
|
|
104
|
+
lines << entry
|
|
105
|
+
end
|
|
106
|
+
lines << ''
|
|
107
|
+
|
|
108
|
+
# Side effects
|
|
109
|
+
if @side_effects.any?
|
|
110
|
+
lines << '### Side Effects'
|
|
111
|
+
lines << ''
|
|
112
|
+
@side_effects.each do |effect|
|
|
113
|
+
lines << "- #{effect[:type]}: #{effect[:identifier]} (triggered by #{effect[:trigger_step]})"
|
|
114
|
+
end
|
|
115
|
+
lines << ''
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Context pool
|
|
119
|
+
if @context_pool.any?
|
|
120
|
+
lines << '### Code Units'
|
|
121
|
+
lines << ''
|
|
122
|
+
@context_pool.each do |identifier, unit|
|
|
123
|
+
type = unit[:type] || 'unknown'
|
|
124
|
+
file_path = unit[:file_path]
|
|
125
|
+
lines << "#### #{identifier} (#{type})"
|
|
126
|
+
lines << "_#{file_path}_" if file_path
|
|
127
|
+
lines << ''
|
|
128
|
+
next unless unit[:source_code]
|
|
129
|
+
|
|
130
|
+
lines << '```ruby'
|
|
131
|
+
lines << unit[:source_code]
|
|
132
|
+
lines << '```'
|
|
133
|
+
lines << ''
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Dependencies
|
|
138
|
+
if @dependency_map.any?
|
|
139
|
+
lines << '### Dependencies'
|
|
140
|
+
lines << ''
|
|
141
|
+
@dependency_map.each do |unit_id, deps|
|
|
142
|
+
lines << "- #{unit_id} → #{deps.join(', ')}"
|
|
143
|
+
end
|
|
144
|
+
lines << ''
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
lines.join("\n")
|
|
148
|
+
end
|
|
149
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
150
|
+
|
|
151
|
+
# Render as LLM-consumable XML context.
|
|
152
|
+
#
|
|
153
|
+
# Follows the format from docs/CONTEXT_AND_CHUNKING.md.
|
|
154
|
+
#
|
|
155
|
+
# @return [String]
|
|
156
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
157
|
+
def to_context
|
|
158
|
+
lines = []
|
|
159
|
+
header = "<session_context session_id=\"#{@session_id}\" requests=\"#{@steps.size}\" " \
|
|
160
|
+
"tokens=\"#{@token_count}\" units=\"#{@context_pool.size}\">"
|
|
161
|
+
lines << header
|
|
162
|
+
|
|
163
|
+
# Timeline
|
|
164
|
+
lines << '<session_timeline>'
|
|
165
|
+
@steps.each_with_index do |step, idx|
|
|
166
|
+
status = step[:status] || '?'
|
|
167
|
+
duration = step[:duration_ms] ? ", #{step[:duration_ms]}ms" : ''
|
|
168
|
+
entry = "#{idx + 1}. #{step[:method]} #{step[:path]} → " \
|
|
169
|
+
"#{step[:controller]}##{step[:action]} (#{status}#{duration})"
|
|
170
|
+
lines << entry
|
|
171
|
+
end
|
|
172
|
+
lines << '</session_timeline>'
|
|
173
|
+
|
|
174
|
+
# Units
|
|
175
|
+
@context_pool.each do |identifier, unit|
|
|
176
|
+
type = unit[:type] || 'unknown'
|
|
177
|
+
file_path = unit[:file_path] || 'unknown'
|
|
178
|
+
lines << %(<unit identifier="#{identifier}" type="#{type}" file="#{file_path}">)
|
|
179
|
+
lines << (unit[:source_code] || '# source not available')
|
|
180
|
+
lines << '</unit>'
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Side effects
|
|
184
|
+
if @side_effects.any?
|
|
185
|
+
lines << '<side_effects>'
|
|
186
|
+
@side_effects.each do |effect|
|
|
187
|
+
lines << "#{effect[:identifier]} (triggered by #{effect[:trigger_step]}, #{effect[:type]})"
|
|
188
|
+
end
|
|
189
|
+
lines << '</side_effects>'
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Dependencies
|
|
193
|
+
if @dependency_map.any?
|
|
194
|
+
lines << '<dependencies>'
|
|
195
|
+
@dependency_map.each do |unit_id, deps|
|
|
196
|
+
lines << "#{unit_id} → #{deps.join(', ')}"
|
|
197
|
+
end
|
|
198
|
+
lines << '</dependencies>'
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
lines << '</session_context>'
|
|
202
|
+
lines.join("\n")
|
|
203
|
+
end
|
|
204
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
205
|
+
|
|
206
|
+
# @api private
|
|
207
|
+
def self.deep_symbolize_keys(obj)
|
|
208
|
+
case obj
|
|
209
|
+
when Hash
|
|
210
|
+
obj.each_with_object({}) do |(key, value), result|
|
|
211
|
+
result[key.to_sym] = deep_symbolize_keys(value)
|
|
212
|
+
end
|
|
213
|
+
when Array
|
|
214
|
+
obj.map { |item| deep_symbolize_keys(item) }
|
|
215
|
+
else
|
|
216
|
+
obj
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
private_class_method :deep_symbolize_keys
|
|
220
|
+
end
|
|
221
|
+
# rubocop:enable Metrics/ClassLength
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require_relative 'store'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module SessionTracer
|
|
8
|
+
# SolidCache-backed session store.
|
|
9
|
+
#
|
|
10
|
+
# Uses SolidCache key-value storage with `expires_in`. Single JSON blob
|
|
11
|
+
# per session (read-modify-write pattern). Requires the `solid_cache` gem.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# store = SolidCacheStore.new(cache: SolidCache::Store.new, expires_in: 3600)
|
|
15
|
+
# store.record("abc123", { controller: "OrdersController", action: "create" })
|
|
16
|
+
#
|
|
17
|
+
class SolidCacheStore < Store
|
|
18
|
+
KEY_PREFIX = 'codebase_index:session:'
|
|
19
|
+
INDEX_KEY = 'codebase_index:session_index'
|
|
20
|
+
|
|
21
|
+
# @param cache [ActiveSupport::Cache::Store] A SolidCache (or compatible) cache instance
|
|
22
|
+
# @param expires_in [Integer, nil] Expiry time in seconds (nil = no expiry)
|
|
23
|
+
def initialize(cache:, expires_in: nil)
|
|
24
|
+
super()
|
|
25
|
+
@cache = cache
|
|
26
|
+
@expires_in = expires_in
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Append a request record to a session (read-modify-write).
|
|
30
|
+
#
|
|
31
|
+
# NOTE: Not atomic — concurrent writes to the same session may lose data.
|
|
32
|
+
# Acceptable for development tracing. For high-concurrency tracing, use
|
|
33
|
+
# RedisStore (RPUSH is atomic) or FileStore (LOCK_EX).
|
|
34
|
+
#
|
|
35
|
+
# @param session_id [String] The session identifier
|
|
36
|
+
# @param request_data [Hash] Request metadata to store
|
|
37
|
+
# @return [void]
|
|
38
|
+
def record(session_id, request_data)
|
|
39
|
+
key = session_key(session_id)
|
|
40
|
+
existing = @cache.read(key)
|
|
41
|
+
requests = existing ? JSON.parse(existing) : []
|
|
42
|
+
requests << request_data
|
|
43
|
+
|
|
44
|
+
write_opts = @expires_in ? { expires_in: @expires_in } : {}
|
|
45
|
+
@cache.write(key, JSON.generate(requests), **write_opts)
|
|
46
|
+
|
|
47
|
+
update_index(session_id)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Read all request records for a session.
|
|
51
|
+
#
|
|
52
|
+
# @param session_id [String] The session identifier
|
|
53
|
+
# @return [Array<Hash>] Request records, oldest first
|
|
54
|
+
def read(session_id)
|
|
55
|
+
key = session_key(session_id)
|
|
56
|
+
raw = @cache.read(key)
|
|
57
|
+
return [] unless raw
|
|
58
|
+
|
|
59
|
+
JSON.parse(raw)
|
|
60
|
+
rescue JSON::ParserError
|
|
61
|
+
[]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# List recent session summaries.
|
|
65
|
+
#
|
|
66
|
+
# @param limit [Integer] Maximum number of sessions to return
|
|
67
|
+
# @return [Array<Hash>] Session summaries
|
|
68
|
+
def sessions(limit: 20)
|
|
69
|
+
index = read_index
|
|
70
|
+
active = index.select { |id| @cache.exist?(session_key(id)) }
|
|
71
|
+
|
|
72
|
+
# Clean up expired entries from the index
|
|
73
|
+
write_index(active) if active.size != index.size
|
|
74
|
+
|
|
75
|
+
active.first(limit).map do |session_id|
|
|
76
|
+
requests = read(session_id)
|
|
77
|
+
{
|
|
78
|
+
'session_id' => session_id,
|
|
79
|
+
'request_count' => requests.size,
|
|
80
|
+
'first_request' => requests.first&.fetch('timestamp', nil),
|
|
81
|
+
'last_request' => requests.last&.fetch('timestamp', nil)
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Remove all data for a single session.
|
|
87
|
+
#
|
|
88
|
+
# @param session_id [String] The session identifier
|
|
89
|
+
# @return [void]
|
|
90
|
+
def clear(session_id)
|
|
91
|
+
@cache.delete(session_key(session_id))
|
|
92
|
+
index = read_index
|
|
93
|
+
index.delete(session_id)
|
|
94
|
+
write_index(index)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Remove all session data.
|
|
98
|
+
#
|
|
99
|
+
# @return [void]
|
|
100
|
+
def clear_all
|
|
101
|
+
index = read_index
|
|
102
|
+
index.each { |id| @cache.delete(session_key(id)) }
|
|
103
|
+
@cache.delete(INDEX_KEY)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
# @param session_id [String]
|
|
109
|
+
# @return [String] Cache key for this session
|
|
110
|
+
def session_key(session_id)
|
|
111
|
+
"#{KEY_PREFIX}#{sanitize_session_id(session_id)}"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Read the session index (list of known session IDs).
|
|
115
|
+
#
|
|
116
|
+
# @return [Array<String>]
|
|
117
|
+
def read_index
|
|
118
|
+
raw = @cache.read(INDEX_KEY)
|
|
119
|
+
return [] unless raw
|
|
120
|
+
|
|
121
|
+
JSON.parse(raw)
|
|
122
|
+
rescue JSON::ParserError
|
|
123
|
+
[]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Write the session index.
|
|
127
|
+
#
|
|
128
|
+
# @param ids [Array<String>]
|
|
129
|
+
def write_index(ids)
|
|
130
|
+
@cache.write(INDEX_KEY, JSON.generate(ids))
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Add a session ID to the index if not already present.
|
|
134
|
+
#
|
|
135
|
+
# @param session_id [String]
|
|
136
|
+
def update_index(session_id)
|
|
137
|
+
index = read_index
|
|
138
|
+
return if index.include?(session_id)
|
|
139
|
+
|
|
140
|
+
index << session_id
|
|
141
|
+
write_index(index)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module SessionTracer
|
|
5
|
+
# Abstract store interface for session trace data.
|
|
6
|
+
#
|
|
7
|
+
# Concrete implementations must define:
|
|
8
|
+
# - `record(session_id, request_data)` — append a request record
|
|
9
|
+
# - `read(session_id)` — return all requests for a session, ordered by timestamp
|
|
10
|
+
# - `sessions(limit:)` — return recent session summaries
|
|
11
|
+
# - `clear(session_id)` — remove a single session
|
|
12
|
+
# - `clear_all` — remove all sessions
|
|
13
|
+
#
|
|
14
|
+
# @abstract Subclass and implement the required methods.
|
|
15
|
+
class Store
|
|
16
|
+
# Append a request record to a session.
|
|
17
|
+
#
|
|
18
|
+
# @param session_id [String] The session identifier
|
|
19
|
+
# @param request_data [Hash] Request metadata to store
|
|
20
|
+
# @return [void]
|
|
21
|
+
def record(session_id, request_data)
|
|
22
|
+
raise NotImplementedError, "#{self.class}#record must be implemented"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Read all request records for a session, ordered by timestamp.
|
|
26
|
+
#
|
|
27
|
+
# @param session_id [String] The session identifier
|
|
28
|
+
# @return [Array<Hash>] Request records, oldest first
|
|
29
|
+
def read(session_id)
|
|
30
|
+
raise NotImplementedError, "#{self.class}#read must be implemented"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# List recent session summaries.
|
|
34
|
+
#
|
|
35
|
+
# @param limit [Integer] Maximum number of sessions to return
|
|
36
|
+
# @return [Array<Hash>] Session summaries with :session_id, :request_count, :first_request, :last_request
|
|
37
|
+
def sessions(limit: 20)
|
|
38
|
+
raise NotImplementedError, "#{self.class}#sessions must be implemented"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Remove all data for a single session.
|
|
42
|
+
#
|
|
43
|
+
# @param session_id [String] The session identifier
|
|
44
|
+
# @return [void]
|
|
45
|
+
def clear(session_id)
|
|
46
|
+
raise NotImplementedError, "#{self.class}#clear must be implemented"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Remove all session data.
|
|
50
|
+
#
|
|
51
|
+
# @return [void]
|
|
52
|
+
def clear_all
|
|
53
|
+
raise NotImplementedError, "#{self.class}#clear_all must be implemented"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# Sanitize a session ID for use in keys/filenames.
|
|
59
|
+
#
|
|
60
|
+
# @param session_id [String] Raw session identifier
|
|
61
|
+
# @return [String] Sanitized identifier (alphanumeric, hyphens, underscores only)
|
|
62
|
+
def sanitize_session_id(session_id)
|
|
63
|
+
session_id.to_s.gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|