codebase_index 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -0
- data/README.md +95 -300
- data/exe/codebase-index-mcp +3 -31
- data/exe/codebase-index-mcp-http +3 -31
- data/lib/codebase_index/ast/method_extractor.rb +3 -8
- data/lib/codebase_index/ast/node.rb +28 -0
- data/lib/codebase_index/ast/parser.rb +53 -92
- data/lib/codebase_index/builder.rb +67 -4
- data/lib/codebase_index/cache/cache_middleware.rb +199 -0
- data/lib/codebase_index/cache/cache_store.rb +264 -0
- data/lib/codebase_index/cache/redis_cache_store.rb +116 -0
- data/lib/codebase_index/cache/solid_cache_store.rb +111 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +29 -24
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/job_adapter.rb +68 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +7 -40
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +7 -40
- data/lib/codebase_index/console/bridge.rb +7 -0
- data/lib/codebase_index/console/console_response_renderer.rb +3 -7
- data/lib/codebase_index/console/embedded_executor.rb +2 -1
- data/lib/codebase_index/console/server.rb +1 -4
- data/lib/codebase_index/dependency_graph.rb +28 -19
- data/lib/codebase_index/embedding/indexer.rb +18 -8
- data/lib/codebase_index/embedding/openai.rb +27 -6
- data/lib/codebase_index/embedding/provider.rb +29 -2
- data/lib/codebase_index/evaluation/evaluator.rb +5 -12
- data/lib/codebase_index/extractor.rb +40 -44
- data/lib/codebase_index/extractors/action_cable_extractor.rb +9 -36
- data/lib/codebase_index/extractors/callback_analyzer.rb +22 -8
- data/lib/codebase_index/extractors/controller_extractor.rb +3 -93
- data/lib/codebase_index/extractors/decorator_extractor.rb +7 -14
- data/lib/codebase_index/extractors/engine_extractor.rb +20 -1
- data/lib/codebase_index/extractors/graphql_extractor.rb +4 -29
- data/lib/codebase_index/extractors/job_extractor.rb +11 -6
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -31
- data/lib/codebase_index/extractors/mailer_extractor.rb +15 -85
- data/lib/codebase_index/extractors/manager_extractor.rb +1 -15
- data/lib/codebase_index/extractors/model_extractor.rb +20 -53
- data/lib/codebase_index/extractors/phlex_extractor.rb +8 -8
- data/lib/codebase_index/extractors/policy_extractor.rb +1 -24
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -17
- data/lib/codebase_index/extractors/serializer_extractor.rb +12 -7
- data/lib/codebase_index/extractors/service_extractor.rb +1 -38
- data/lib/codebase_index/extractors/shared_utility_methods.rb +183 -1
- data/lib/codebase_index/extractors/validator_extractor.rb +3 -17
- data/lib/codebase_index/extractors/view_component_extractor.rb +10 -9
- data/lib/codebase_index/filename_utils.rb +32 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +1 -4
- data/lib/codebase_index/formatting/base.rb +0 -10
- data/lib/codebase_index/graph_analyzer.rb +1 -1
- data/lib/codebase_index/mcp/bootstrapper.rb +58 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +35 -34
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +29 -29
- data/lib/codebase_index/mcp/server.rb +59 -68
- data/lib/codebase_index/mcp/tool_response_renderer.rb +23 -0
- data/lib/codebase_index/notion/client.rb +2 -2
- data/lib/codebase_index/notion/mapper.rb +1 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +3 -11
- data/lib/codebase_index/notion/mappers/model_mapper.rb +20 -23
- data/lib/codebase_index/notion/mappers/shared.rb +22 -0
- data/lib/codebase_index/observability/health_check.rb +0 -2
- data/lib/codebase_index/observability/structured_logger.rb +12 -30
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -7
- data/lib/codebase_index/resilience/index_validator.rb +3 -21
- data/lib/codebase_index/retrieval/context_assembler.rb +19 -7
- data/lib/codebase_index/retrieval/query_classifier.rb +14 -12
- data/lib/codebase_index/retrieval/ranker.rb +6 -2
- data/lib/codebase_index/retrieval/search_executor.rb +8 -19
- data/lib/codebase_index/retriever.rb +1 -9
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +5 -25
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +6 -7
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +58 -53
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +11 -7
- data/lib/codebase_index/session_tracer/file_store.rb +1 -8
- data/lib/codebase_index/session_tracer/redis_store.rb +1 -7
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +4 -13
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +1 -7
- data/lib/codebase_index/session_tracer/store.rb +14 -0
- data/lib/codebase_index/storage/metadata_store.rb +37 -10
- data/lib/codebase_index/storage/pgvector.rb +37 -5
- data/lib/codebase_index/storage/qdrant.rb +39 -6
- data/lib/codebase_index/storage/vector_store.rb +11 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +14 -10
- data/lib/codebase_index/token_utils.rb +19 -0
- data/lib/codebase_index/version.rb +1 -1
- data/lib/codebase_index.rb +25 -6
- data/lib/tasks/codebase_index.rake +2 -2
- metadata +11 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'json'
|
|
4
|
-
|
|
4
|
+
require_relative '../filename_utils'
|
|
5
5
|
|
|
6
6
|
module CodebaseIndex
|
|
7
7
|
module Resilience
|
|
@@ -18,6 +18,8 @@ module CodebaseIndex
|
|
|
18
18
|
# report = validator.validate
|
|
19
19
|
# puts report.errors if !report.valid?
|
|
20
20
|
class IndexValidator
|
|
21
|
+
include CodebaseIndex::FilenameUtils
|
|
22
|
+
|
|
21
23
|
# Report produced by {#validate}.
|
|
22
24
|
#
|
|
23
25
|
# @!attribute [r] valid?
|
|
@@ -160,26 +162,6 @@ module CodebaseIndex
|
|
|
160
162
|
warnings << "Stale file not in index: #{type_name}/#{basename}"
|
|
161
163
|
end
|
|
162
164
|
end
|
|
163
|
-
|
|
164
|
-
# Convert an identifier to a safe filename (legacy format, mirrors Extractor#safe_filename).
|
|
165
|
-
#
|
|
166
|
-
# @param identifier [String] The unit identifier (e.g., "Admin::UsersController")
|
|
167
|
-
# @return [String] A filesystem-safe filename (e.g., "Admin__UsersController.json")
|
|
168
|
-
def safe_filename(identifier)
|
|
169
|
-
"#{identifier.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')}.json"
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
# Convert an identifier to a collision-safe filename (current format).
|
|
173
|
-
# Mirrors {Extractor#collision_safe_filename} — appends a short SHA256 digest
|
|
174
|
-
# to disambiguate identifiers that normalize to the same safe_filename.
|
|
175
|
-
#
|
|
176
|
-
# @param identifier [String] The unit identifier
|
|
177
|
-
# @return [String] Collision-safe filename (e.g., "Admin__UsersController_a1b2c3d4.json")
|
|
178
|
-
def collision_safe_filename(identifier)
|
|
179
|
-
base = identifier.gsub('::', '__').gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
180
|
-
digest = Digest::SHA256.hexdigest(identifier)[0, 8]
|
|
181
|
-
"#{base}_#{digest}.json"
|
|
182
|
-
end
|
|
183
165
|
end
|
|
184
166
|
end
|
|
185
167
|
end
|
|
@@ -54,6 +54,9 @@ module CodebaseIndex
|
|
|
54
54
|
sources = []
|
|
55
55
|
tokens_used = 0
|
|
56
56
|
|
|
57
|
+
# Pre-fetch all candidate metadata in one batch query
|
|
58
|
+
@unit_cache = @metadata_store.find_batch(candidates.map(&:identifier))
|
|
59
|
+
|
|
57
60
|
# 1. Structural context (always first if provided)
|
|
58
61
|
tokens_used = add_structural_section(sections, structural_context, tokens_used, effective_budget)
|
|
59
62
|
|
|
@@ -141,7 +144,7 @@ module CodebaseIndex
|
|
|
141
144
|
|
|
142
145
|
# Append a single candidate to the section. Returns updated tokens_used, or nil to stop.
|
|
143
146
|
def append_candidate(parts, sources, candidate, budget, tokens_used)
|
|
144
|
-
unit = @
|
|
147
|
+
unit = @unit_cache[candidate.identifier]
|
|
145
148
|
return tokens_used unless unit
|
|
146
149
|
|
|
147
150
|
text = format_unit(unit, candidate)
|
|
@@ -165,10 +168,10 @@ module CodebaseIndex
|
|
|
165
168
|
# @param candidate [Candidate] The search candidate
|
|
166
169
|
# @return [String]
|
|
167
170
|
def format_unit(unit, _candidate)
|
|
168
|
-
identifier = unit
|
|
169
|
-
type = unit
|
|
170
|
-
file_path = unit
|
|
171
|
-
source = unit
|
|
171
|
+
identifier = unit_field(unit, :identifier)
|
|
172
|
+
type = unit_field(unit, :type)
|
|
173
|
+
file_path = unit_field(unit, :file_path)
|
|
174
|
+
source = unit_field(unit, :source_code) || ''
|
|
172
175
|
|
|
173
176
|
<<~UNIT.strip
|
|
174
177
|
## #{identifier} (#{type})
|
|
@@ -184,14 +187,23 @@ module CodebaseIndex
|
|
|
184
187
|
def build_source_attribution(candidate, unit, truncated: false)
|
|
185
188
|
attribution = {
|
|
186
189
|
identifier: candidate.identifier,
|
|
187
|
-
type: unit
|
|
190
|
+
type: unit_field(unit, :type),
|
|
188
191
|
score: candidate.score,
|
|
189
|
-
file_path: unit
|
|
192
|
+
file_path: unit_field(unit, :file_path)
|
|
190
193
|
}
|
|
191
194
|
attribution[:truncated] = true if truncated
|
|
192
195
|
attribution
|
|
193
196
|
end
|
|
194
197
|
|
|
198
|
+
# Read a field from a unit hash, accepting either symbol or string keys.
|
|
199
|
+
#
|
|
200
|
+
# @param unit [Hash]
|
|
201
|
+
# @param key [Symbol]
|
|
202
|
+
# @return [Object, nil]
|
|
203
|
+
def unit_field(unit, key)
|
|
204
|
+
unit[key] || unit[key.to_s]
|
|
205
|
+
end
|
|
206
|
+
|
|
195
207
|
# Check if a candidate is framework source.
|
|
196
208
|
#
|
|
197
209
|
# @param candidate [Candidate]
|
|
@@ -87,28 +87,30 @@ module CodebaseIndex
|
|
|
87
87
|
# @param query [String]
|
|
88
88
|
# @return [Symbol]
|
|
89
89
|
def detect_intent(query)
|
|
90
|
-
INTENT_PATTERNS
|
|
91
|
-
return intent if query.match?(pattern)
|
|
92
|
-
end
|
|
93
|
-
:understand # default
|
|
90
|
+
match_first(INTENT_PATTERNS, query, default: :understand)
|
|
94
91
|
end
|
|
95
92
|
|
|
96
93
|
# @param query [String]
|
|
97
94
|
# @return [Symbol]
|
|
98
95
|
def detect_scope(query)
|
|
99
|
-
SCOPE_PATTERNS
|
|
100
|
-
return scope if query.match?(pattern)
|
|
101
|
-
end
|
|
102
|
-
:focused # default
|
|
96
|
+
match_first(SCOPE_PATTERNS, query, default: :focused)
|
|
103
97
|
end
|
|
104
98
|
|
|
105
99
|
# @param query [String]
|
|
106
100
|
# @return [Symbol, nil]
|
|
107
101
|
def detect_target_type(query)
|
|
108
|
-
TARGET_PATTERNS
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
102
|
+
match_first(TARGET_PATTERNS, query, default: nil)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Match query against a hash of {key => pattern}, returning the first matching key.
|
|
106
|
+
#
|
|
107
|
+
# @param patterns [Hash{Symbol => Regexp}]
|
|
108
|
+
# @param query [String]
|
|
109
|
+
# @param default [Object] value if no pattern matches
|
|
110
|
+
# @return [Object]
|
|
111
|
+
def match_first(patterns, query, default:)
|
|
112
|
+
patterns.each { |key, pattern| return key if query.match?(pattern) }
|
|
113
|
+
default
|
|
112
114
|
end
|
|
113
115
|
|
|
114
116
|
# @param query [String]
|
|
@@ -102,8 +102,9 @@ module CodebaseIndex
|
|
|
102
102
|
#
|
|
103
103
|
# @return [Array<Candidate>]
|
|
104
104
|
def rebuild_rrf_candidates(candidates, rrf_scores, metadata_map)
|
|
105
|
+
original_by_id = candidates.index_by(&:identifier)
|
|
105
106
|
rrf_scores.sort_by { |_id, score| -score }.map do |identifier, score|
|
|
106
|
-
original =
|
|
107
|
+
original = original_by_id[identifier]
|
|
107
108
|
build_candidate(
|
|
108
109
|
identifier: identifier,
|
|
109
110
|
score: score,
|
|
@@ -119,8 +120,11 @@ module CodebaseIndex
|
|
|
119
120
|
# @param classification [QueryClassifier::Classification]
|
|
120
121
|
# @return [Array<Hash>]
|
|
121
122
|
def score_candidates(candidates, classification)
|
|
123
|
+
# Batch-fetch all metadata in one query instead of per-candidate lookups
|
|
124
|
+
unit_map = @metadata_store.find_batch(candidates.map(&:identifier))
|
|
125
|
+
|
|
122
126
|
candidates.map do |candidate|
|
|
123
|
-
unit =
|
|
127
|
+
unit = unit_map[candidate.identifier]
|
|
124
128
|
|
|
125
129
|
{
|
|
126
130
|
candidate: candidate,
|
|
@@ -33,26 +33,11 @@ module CodebaseIndex
|
|
|
33
33
|
|
|
34
34
|
# Strategy mapping from (intent, scope) → strategy.
|
|
35
35
|
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
# Framework intent always uses :keyword against framework sources.
|
|
36
|
+
# Covers pinpoint overrides for locate/reference (:direct).
|
|
37
|
+
# Trace and framework intents are handled before this map is consulted.
|
|
39
38
|
STRATEGY_MAP = {
|
|
40
|
-
# [intent, scope] => strategy
|
|
41
|
-
# Pinpoint
|
|
42
39
|
%i[locate pinpoint] => :direct,
|
|
43
|
-
%i[reference pinpoint] => :direct
|
|
44
|
-
|
|
45
|
-
# Trace always uses graph
|
|
46
|
-
%i[trace pinpoint] => :graph,
|
|
47
|
-
%i[trace focused] => :graph,
|
|
48
|
-
%i[trace exploratory] => :graph,
|
|
49
|
-
%i[trace comprehensive] => :graph,
|
|
50
|
-
|
|
51
|
-
# Framework always keyword
|
|
52
|
-
%i[framework pinpoint] => :keyword,
|
|
53
|
-
%i[framework focused] => :keyword,
|
|
54
|
-
%i[framework exploratory] => :keyword,
|
|
55
|
-
%i[framework comprehensive] => :keyword
|
|
40
|
+
%i[reference pinpoint] => :direct
|
|
56
41
|
}.freeze
|
|
57
42
|
|
|
58
43
|
# @param vector_store [Storage::VectorStore::Interface] Vector store adapter
|
|
@@ -93,7 +78,11 @@ module CodebaseIndex
|
|
|
93
78
|
intent = classification.intent
|
|
94
79
|
scope = classification.scope
|
|
95
80
|
|
|
96
|
-
#
|
|
81
|
+
# Intent-level overrides (apply regardless of scope)
|
|
82
|
+
return :graph if intent == :trace
|
|
83
|
+
return :keyword if intent == :framework
|
|
84
|
+
|
|
85
|
+
# Pinpoint overrides for locate/reference
|
|
97
86
|
mapped = STRATEGY_MAP[[intent, scope]]
|
|
98
87
|
return mapped if mapped
|
|
99
88
|
|
|
@@ -140,7 +140,7 @@ module CodebaseIndex
|
|
|
140
140
|
return nil if total.zero?
|
|
141
141
|
|
|
142
142
|
type_counts = STRUCTURAL_TYPES.filter_map do |type|
|
|
143
|
-
count =
|
|
143
|
+
count = @metadata_store.find_by_type(type).size
|
|
144
144
|
"#{count} #{type}s" if count.positive?
|
|
145
145
|
end
|
|
146
146
|
|
|
@@ -148,13 +148,5 @@ module CodebaseIndex
|
|
|
148
148
|
rescue StandardError
|
|
149
149
|
nil
|
|
150
150
|
end
|
|
151
|
-
|
|
152
|
-
# Count units of a given type in the metadata store.
|
|
153
|
-
#
|
|
154
|
-
# @param type [String] The unit type to count
|
|
155
|
-
# @return [Integer] Number of units of this type
|
|
156
|
-
def count_by_type(type)
|
|
157
|
-
@metadata_store.find_by_type(type).size
|
|
158
|
-
end
|
|
159
151
|
end
|
|
160
152
|
end
|
|
@@ -18,6 +18,7 @@ module CodebaseIndex
|
|
|
18
18
|
#
|
|
19
19
|
class ClassAnalyzer
|
|
20
20
|
include FqnBuilder
|
|
21
|
+
include Ast::SourceSpan
|
|
21
22
|
|
|
22
23
|
# @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
|
|
23
24
|
def initialize(parser: nil)
|
|
@@ -43,9 +44,9 @@ module CodebaseIndex
|
|
|
43
44
|
|
|
44
45
|
case node.type
|
|
45
46
|
when :class
|
|
46
|
-
|
|
47
|
+
process_definition(node, :ruby_class, source, file_path, namespace_stack, units)
|
|
47
48
|
when :module
|
|
48
|
-
|
|
49
|
+
process_definition(node, :ruby_module, source, file_path, namespace_stack, units)
|
|
49
50
|
else
|
|
50
51
|
(node.children || []).each do |child|
|
|
51
52
|
extract_definitions(child, source, file_path, namespace_stack, units)
|
|
@@ -53,14 +54,6 @@ module CodebaseIndex
|
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
56
|
-
def process_class(node, source, file_path, namespace_stack, units)
|
|
57
|
-
process_definition(node, :ruby_class, source, file_path, namespace_stack, units)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def process_module(node, source, file_path, namespace_stack, units)
|
|
61
|
-
process_definition(node, :ruby_module, source, file_path, namespace_stack, units)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
57
|
def process_definition(node, type, source, file_path, namespace_stack, units)
|
|
65
58
|
name = node.method_name
|
|
66
59
|
fqn = build_fqn(name, namespace_stack)
|
|
@@ -144,13 +137,7 @@ module CodebaseIndex
|
|
|
144
137
|
|
|
145
138
|
# Count def and defs nodes in body children (non-recursive — only direct methods).
|
|
146
139
|
def count_methods(body_children)
|
|
147
|
-
count
|
|
148
|
-
body_children.each do |child|
|
|
149
|
-
next unless child.is_a?(Ast::Node)
|
|
150
|
-
|
|
151
|
-
count += 1 if %i[def defs].include?(child.type)
|
|
152
|
-
end
|
|
153
|
-
count
|
|
140
|
+
body_children.count { |child| child.is_a?(Ast::Node) && %i[def defs].include?(child.type) }
|
|
154
141
|
end
|
|
155
142
|
|
|
156
143
|
# Build the constant name from a :const node (may have receiver for namespaced).
|
|
@@ -163,14 +150,7 @@ module CodebaseIndex
|
|
|
163
150
|
|
|
164
151
|
# Extract source text for a node using line range.
|
|
165
152
|
def extract_source(node, source)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
lines = source.lines
|
|
169
|
-
start_idx = node.line - 1
|
|
170
|
-
end_idx = node.end_line - 1
|
|
171
|
-
return nil if start_idx.negative? || end_idx >= lines.length
|
|
172
|
-
|
|
173
|
-
lines[start_idx..end_idx].join
|
|
153
|
+
extract_source_span(source, node.line, node.end_line)
|
|
174
154
|
end
|
|
175
155
|
|
|
176
156
|
# Build dependency list from superclass, includes, and extends.
|
|
@@ -21,6 +21,11 @@ module CodebaseIndex
|
|
|
21
21
|
CONSTRUCTION_METHODS = %w[new].freeze
|
|
22
22
|
SERIALIZATION_METHODS = %w[to_h to_json to_a serialize as_json].freeze
|
|
23
23
|
DESERIALIZATION_METHODS = %w[from_json parse].freeze
|
|
24
|
+
CATEGORY_BY_METHOD = [
|
|
25
|
+
*CONSTRUCTION_METHODS.map { |m| [m, :construction] },
|
|
26
|
+
*SERIALIZATION_METHODS.map { |m| [m, :serialization] },
|
|
27
|
+
*DESERIALIZATION_METHODS.map { |m| [m, :deserialization] }
|
|
28
|
+
].to_h.freeze
|
|
24
29
|
|
|
25
30
|
# @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
|
|
26
31
|
def initialize(parser: nil)
|
|
@@ -65,13 +70,7 @@ module CodebaseIndex
|
|
|
65
70
|
end
|
|
66
71
|
|
|
67
72
|
def categorize(method_name)
|
|
68
|
-
|
|
69
|
-
:construction
|
|
70
|
-
elsif SERIALIZATION_METHODS.include?(method_name)
|
|
71
|
-
:serialization
|
|
72
|
-
elsif DESERIALIZATION_METHODS.include?(method_name)
|
|
73
|
-
:deserialization
|
|
74
|
-
end
|
|
73
|
+
CATEGORY_BY_METHOD[method_name]
|
|
75
74
|
end
|
|
76
75
|
end
|
|
77
76
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
3
5
|
module CodebaseIndex
|
|
4
6
|
module RubyAnalyzer
|
|
5
7
|
# Renders Mermaid-format diagrams from extracted units, dependency graphs,
|
|
@@ -25,31 +27,24 @@ module CodebaseIndex
|
|
|
25
27
|
lines = ['graph TD']
|
|
26
28
|
return lines.join("\n") if units.nil? || units.empty?
|
|
27
29
|
|
|
28
|
-
seen_nodes =
|
|
29
|
-
|
|
30
|
+
seen_nodes = Set.new
|
|
31
|
+
seen_edges = Set.new
|
|
30
32
|
|
|
31
33
|
units.each do |unit|
|
|
32
34
|
node_id = sanitize_id(unit.identifier)
|
|
33
|
-
|
|
34
|
-
seen_nodes[node_id] = true
|
|
35
|
-
lines << " #{node_id}[\"#{escape_label(unit.identifier)}\"]"
|
|
36
|
-
end
|
|
35
|
+
lines << " #{node_id}[\"#{escape_label(unit.identifier)}\"]" if seen_nodes.add?(node_id)
|
|
37
36
|
|
|
38
37
|
(unit.dependencies || []).each do |dep|
|
|
39
38
|
target = dep[:target] || dep['target']
|
|
40
39
|
next unless target
|
|
41
40
|
|
|
42
41
|
target_id = sanitize_id(target)
|
|
43
|
-
|
|
44
|
-
seen_nodes[target_id] = true
|
|
45
|
-
lines << " #{target_id}[\"#{escape_label(target)}\"]"
|
|
46
|
-
end
|
|
42
|
+
lines << " #{target_id}[\"#{escape_label(target)}\"]" if seen_nodes.add?(target_id)
|
|
47
43
|
|
|
48
44
|
via = dep[:via] || dep['via']
|
|
49
45
|
edge_key = "#{node_id}->#{target_id}"
|
|
50
|
-
next
|
|
46
|
+
next unless seen_edges.add?(edge_key)
|
|
51
47
|
|
|
52
|
-
edges << edge_key
|
|
53
48
|
lines << if via
|
|
54
49
|
" #{node_id} -->|#{via}| #{target_id}"
|
|
55
50
|
else
|
|
@@ -95,16 +90,14 @@ module CodebaseIndex
|
|
|
95
90
|
end
|
|
96
91
|
|
|
97
92
|
# Render edges
|
|
98
|
-
seen_edges =
|
|
93
|
+
seen_edges = Set.new
|
|
99
94
|
edges.each do |source, targets|
|
|
100
|
-
|
|
101
|
-
targets.each do |target|
|
|
95
|
+
Array(targets).each do |target|
|
|
102
96
|
next unless nodes.key?(target)
|
|
103
97
|
|
|
104
98
|
edge_key = "#{sanitize_id(source)}->#{sanitize_id(target)}"
|
|
105
|
-
next
|
|
99
|
+
next unless seen_edges.add?(edge_key)
|
|
106
100
|
|
|
107
|
-
seen_edges << edge_key
|
|
108
101
|
lines << " #{sanitize_id(source)} --> #{sanitize_id(target)}"
|
|
109
102
|
end
|
|
110
103
|
end
|
|
@@ -123,15 +116,14 @@ module CodebaseIndex
|
|
|
123
116
|
lines = ['flowchart TD']
|
|
124
117
|
return lines.join("\n") if units.nil? || units.empty?
|
|
125
118
|
|
|
126
|
-
seen_nodes =
|
|
119
|
+
seen_nodes = Set.new
|
|
127
120
|
|
|
128
121
|
units.each do |unit|
|
|
129
122
|
transformations = unit.metadata[:data_transformations] || unit.metadata['data_transformations']
|
|
130
123
|
next unless transformations.is_a?(Array) && transformations.any?
|
|
131
124
|
|
|
132
125
|
node_id = sanitize_id(unit.identifier)
|
|
133
|
-
|
|
134
|
-
seen_nodes[node_id] = true
|
|
126
|
+
if seen_nodes.add?(node_id)
|
|
135
127
|
shape = dataflow_shape(transformations)
|
|
136
128
|
lines << " #{node_id}#{shape}"
|
|
137
129
|
end
|
|
@@ -144,10 +136,7 @@ module CodebaseIndex
|
|
|
144
136
|
category = (t[:category] || t['category'])&.to_s
|
|
145
137
|
method_name = t[:method] || t['method']
|
|
146
138
|
|
|
147
|
-
|
|
148
|
-
seen_nodes[receiver_id] = true
|
|
149
|
-
lines << " #{receiver_id}[\"#{escape_label(receiver)}\"]"
|
|
150
|
-
end
|
|
139
|
+
lines << " #{receiver_id}[\"#{escape_label(receiver)}\"]" if seen_nodes.add?(receiver_id)
|
|
151
140
|
|
|
152
141
|
label = [category, method_name].compact.join(': ')
|
|
153
142
|
lines << " #{node_id} -->|#{label}| #{receiver_id}"
|
|
@@ -200,41 +189,57 @@ module CodebaseIndex
|
|
|
200
189
|
# Analysis summary
|
|
201
190
|
sections << '## Analysis Summary'
|
|
202
191
|
sections << ''
|
|
203
|
-
|
|
204
|
-
stats = analysis[:stats] || analysis['stats'] || {}
|
|
205
|
-
sections << "- **Orphans:** #{stats[:orphan_count] || stats['orphan_count'] || 0}"
|
|
206
|
-
sections << "- **Dead ends:** #{stats[:dead_end_count] || stats['dead_end_count'] || 0}"
|
|
207
|
-
sections << "- **Hubs:** #{stats[:hub_count] || stats['hub_count'] || 0}"
|
|
208
|
-
sections << "- **Cycles:** #{stats[:cycle_count] || stats['cycle_count'] || 0}"
|
|
209
|
-
|
|
210
|
-
hubs = analysis[:hubs] || analysis['hubs'] || []
|
|
211
|
-
if hubs.any?
|
|
212
|
-
sections << ''
|
|
213
|
-
sections << '### Top Hubs'
|
|
214
|
-
sections << ''
|
|
215
|
-
hubs.first(5).each do |hub|
|
|
216
|
-
id = hub[:identifier] || hub['identifier']
|
|
217
|
-
count = hub[:dependent_count] || hub['dependent_count']
|
|
218
|
-
sections << "- #{id} (#{count} dependents)"
|
|
219
|
-
end
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
cycles = analysis[:cycles] || analysis['cycles'] || []
|
|
223
|
-
if cycles.any?
|
|
224
|
-
sections << ''
|
|
225
|
-
sections << '### Cycles'
|
|
226
|
-
sections << ''
|
|
227
|
-
cycles.each do |cycle|
|
|
228
|
-
sections << "- #{cycle.join(' -> ')}"
|
|
229
|
-
end
|
|
230
|
-
end
|
|
231
|
-
end
|
|
192
|
+
sections.concat(render_stats_section(analysis))
|
|
232
193
|
|
|
233
194
|
sections.join("\n")
|
|
234
195
|
end
|
|
235
196
|
|
|
236
197
|
private
|
|
237
198
|
|
|
199
|
+
# Render the Analysis Summary section lines for a given analysis hash.
|
|
200
|
+
#
|
|
201
|
+
# @param analysis [Hash, nil] Graph analysis report from GraphAnalyzer#analyze
|
|
202
|
+
# @return [Array<String>] Lines to append to the architecture document
|
|
203
|
+
def render_stats_section(analysis)
|
|
204
|
+
lines = []
|
|
205
|
+
return lines unless analysis
|
|
206
|
+
|
|
207
|
+
stats = analysis[:stats] || analysis['stats'] || {}
|
|
208
|
+
lines << "- **Orphans:** #{stats[:orphan_count] || stats['orphan_count'] || 0}"
|
|
209
|
+
lines << "- **Dead ends:** #{stats[:dead_end_count] || stats['dead_end_count'] || 0}"
|
|
210
|
+
lines << "- **Hubs:** #{stats[:hub_count] || stats['hub_count'] || 0}"
|
|
211
|
+
lines << "- **Cycles:** #{stats[:cycle_count] || stats['cycle_count'] || 0}"
|
|
212
|
+
|
|
213
|
+
hubs = analysis[:hubs] || analysis['hubs'] || []
|
|
214
|
+
lines.concat(render_hubs_section(hubs))
|
|
215
|
+
|
|
216
|
+
cycles = analysis[:cycles] || analysis['cycles'] || []
|
|
217
|
+
if cycles.any?
|
|
218
|
+
lines << ''
|
|
219
|
+
lines << '### Cycles'
|
|
220
|
+
lines << ''
|
|
221
|
+
cycles.each { |cycle| lines << "- #{cycle.join(' -> ')}" }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
lines
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Render the Top Hubs subsection lines.
|
|
228
|
+
#
|
|
229
|
+
# @param hubs [Array<Hash>] Hub entries with :identifier and :dependent_count keys
|
|
230
|
+
# @return [Array<String>] Lines to append, or empty array if no hubs
|
|
231
|
+
def render_hubs_section(hubs)
|
|
232
|
+
return [] unless hubs.any?
|
|
233
|
+
|
|
234
|
+
lines = ['', '### Top Hubs', '']
|
|
235
|
+
hubs.first(5).each do |hub|
|
|
236
|
+
id = hub[:identifier] || hub['identifier']
|
|
237
|
+
count = hub[:dependent_count] || hub['dependent_count']
|
|
238
|
+
lines << "- #{id} (#{count} dependents)"
|
|
239
|
+
end
|
|
240
|
+
lines
|
|
241
|
+
end
|
|
242
|
+
|
|
238
243
|
# Sanitize an identifier for use as a Mermaid node ID.
|
|
239
244
|
#
|
|
240
245
|
# Replaces characters that Mermaid cannot use in node IDs with underscores.
|
|
@@ -64,19 +64,19 @@ module CodebaseIndex
|
|
|
64
64
|
|
|
65
65
|
traces = grouped[key]
|
|
66
66
|
|
|
67
|
-
calls = traces.select { |t| t
|
|
68
|
-
returns = traces.select { |t| t
|
|
67
|
+
calls = traces.select { |t| fetch_key(t, :event) == 'call' }
|
|
68
|
+
returns = traces.select { |t| fetch_key(t, :event) == 'return' }
|
|
69
69
|
|
|
70
70
|
callers = calls.filter_map do |t|
|
|
71
|
-
caller_class = t
|
|
72
|
-
caller_method = t
|
|
71
|
+
caller_class = fetch_key(t, :caller_class)
|
|
72
|
+
caller_method = fetch_key(t, :caller_method)
|
|
73
73
|
next unless caller_class
|
|
74
74
|
|
|
75
75
|
{ 'caller_class' => caller_class, 'caller_method' => caller_method }
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
return_types = returns.filter_map do |t|
|
|
79
|
-
t
|
|
79
|
+
fetch_key(t, :return_class)
|
|
80
80
|
end.uniq
|
|
81
81
|
|
|
82
82
|
unit.metadata[:trace] = {
|
|
@@ -90,11 +90,15 @@ module CodebaseIndex
|
|
|
90
90
|
class << self
|
|
91
91
|
private
|
|
92
92
|
|
|
93
|
+
def fetch_key(hash, key)
|
|
94
|
+
hash[key.to_s] || hash[key.to_sym]
|
|
95
|
+
end
|
|
96
|
+
|
|
93
97
|
def group_traces(trace_data)
|
|
94
98
|
grouped = Hash.new { |h, k| h[k] = [] }
|
|
95
99
|
trace_data.each do |trace|
|
|
96
|
-
class_name = trace
|
|
97
|
-
method_name = trace
|
|
100
|
+
class_name = fetch_key(trace, :class_name)
|
|
101
|
+
method_name = fetch_key(trace, :method_name)
|
|
98
102
|
next unless class_name && method_name
|
|
99
103
|
|
|
100
104
|
key = "#{class_name}##{method_name}"
|
|
@@ -71,14 +71,7 @@ module CodebaseIndex
|
|
|
71
71
|
|
|
72
72
|
files.first(limit).map do |file|
|
|
73
73
|
session_id = File.basename(file, '.jsonl')
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
{
|
|
77
|
-
'session_id' => session_id,
|
|
78
|
-
'request_count' => requests.size,
|
|
79
|
-
'first_request' => requests.first&.fetch('timestamp', nil),
|
|
80
|
-
'last_request' => requests.last&.fetch('timestamp', nil)
|
|
81
|
-
}
|
|
74
|
+
session_summary(session_id, read(session_id))
|
|
82
75
|
end
|
|
83
76
|
end
|
|
84
77
|
|
|
@@ -72,13 +72,7 @@ module CodebaseIndex
|
|
|
72
72
|
expired.each { |id| @redis.srem(SESSIONS_KEY, id) } if expired.any?
|
|
73
73
|
|
|
74
74
|
active.first(limit).map do |session_id|
|
|
75
|
-
|
|
76
|
-
{
|
|
77
|
-
'session_id' => session_id,
|
|
78
|
-
'request_count' => requests.size,
|
|
79
|
-
'first_request' => requests.first&.fetch('timestamp', nil),
|
|
80
|
-
'last_request' => requests.last&.fetch('timestamp', nil)
|
|
81
|
-
}
|
|
75
|
+
session_summary(session_id, read(session_id))
|
|
82
76
|
end
|
|
83
77
|
end
|
|
84
78
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'json'
|
|
4
4
|
require 'set'
|
|
5
|
+
require_relative '../token_utils'
|
|
5
6
|
require_relative 'session_flow_document'
|
|
6
7
|
|
|
7
8
|
module CodebaseIndex
|
|
@@ -220,10 +221,10 @@ module CodebaseIndex
|
|
|
220
221
|
source = unit[:source_code]
|
|
221
222
|
next unless source
|
|
222
223
|
|
|
223
|
-
source_tokens =
|
|
224
|
+
source_tokens = TokenUtils.estimate_tokens(source)
|
|
224
225
|
unit[:source_code] = "# source truncated (#{source_tokens} tokens)"
|
|
225
226
|
total -= source_tokens
|
|
226
|
-
total +=
|
|
227
|
+
total += TokenUtils.estimate_tokens(unit[:source_code])
|
|
227
228
|
end
|
|
228
229
|
|
|
229
230
|
[total, 0].max
|
|
@@ -236,20 +237,10 @@ module CodebaseIndex
|
|
|
236
237
|
def estimate_tokens(context_pool)
|
|
237
238
|
context_pool.values.sum do |unit|
|
|
238
239
|
source = unit[:source_code] || ''
|
|
239
|
-
|
|
240
|
+
TokenUtils.estimate_tokens(source) + 20 # overhead for tags/metadata
|
|
240
241
|
end
|
|
241
242
|
end
|
|
242
243
|
|
|
243
|
-
# Estimate token count for a string.
|
|
244
|
-
# Uses project convention: (string.length / 4.0).ceil
|
|
245
|
-
# See docs/TOKEN_BENCHMARK.md — conservative floor (~10.6% overestimate).
|
|
246
|
-
#
|
|
247
|
-
# @param text [String] Text to estimate
|
|
248
|
-
# @return [Integer] Estimated token count
|
|
249
|
-
def estimate_token_count(text)
|
|
250
|
-
(text.length / 4.0).ceil
|
|
251
|
-
end
|
|
252
|
-
|
|
253
244
|
# Build an empty document for sessions with no requests.
|
|
254
245
|
#
|
|
255
246
|
# @param session_id [String]
|
|
@@ -73,13 +73,7 @@ module CodebaseIndex
|
|
|
73
73
|
write_index(active) if active.size != index.size
|
|
74
74
|
|
|
75
75
|
active.first(limit).map do |session_id|
|
|
76
|
-
|
|
77
|
-
{
|
|
78
|
-
'session_id' => session_id,
|
|
79
|
-
'request_count' => requests.size,
|
|
80
|
-
'first_request' => requests.first&.fetch('timestamp', nil),
|
|
81
|
-
'last_request' => requests.last&.fetch('timestamp', nil)
|
|
82
|
-
}
|
|
76
|
+
session_summary(session_id, read(session_id))
|
|
83
77
|
end
|
|
84
78
|
end
|
|
85
79
|
|
|
@@ -62,6 +62,20 @@ module CodebaseIndex
|
|
|
62
62
|
def sanitize_session_id(session_id)
|
|
63
63
|
session_id.to_s.gsub(/[^a-zA-Z0-9_-]/, '_')
|
|
64
64
|
end
|
|
65
|
+
|
|
66
|
+
# Build a session summary hash from a session ID and its requests.
|
|
67
|
+
#
|
|
68
|
+
# @param session_id [String]
|
|
69
|
+
# @param requests [Array<Hash>]
|
|
70
|
+
# @return [Hash]
|
|
71
|
+
def session_summary(session_id, requests)
|
|
72
|
+
{
|
|
73
|
+
'session_id' => session_id,
|
|
74
|
+
'request_count' => requests.size,
|
|
75
|
+
'first_request' => requests.first&.fetch('timestamp', nil),
|
|
76
|
+
'last_request' => requests.last&.fetch('timestamp', nil)
|
|
77
|
+
}
|
|
78
|
+
end
|
|
65
79
|
end
|
|
66
80
|
end
|
|
67
81
|
end
|