codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'fileutils'
|
|
5
|
+
require_relative 'flow_assembler'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
# Orchestrates pre-computation of request flow maps for all controller actions.
|
|
9
|
+
#
|
|
10
|
+
# After the dependency graph is built, FlowPrecomputer iterates controller units,
|
|
11
|
+
# runs FlowAssembler for each action, and writes flow documents to disk.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# precomputer = FlowPrecomputer.new(units: all_units, graph: dep_graph, output_dir: out)
|
|
15
|
+
# flow_map = precomputer.precompute
|
|
16
|
+
# flow_map["OrdersController#create"] #=> "/tmp/codebase_index/flows/OrdersController_create.json"
|
|
17
|
+
#
|
|
18
|
+
class FlowPrecomputer
|
|
19
|
+
# Default maximum recursion depth for flow assembly
|
|
20
|
+
DEFAULT_MAX_DEPTH = 3
|
|
21
|
+
|
|
22
|
+
# @param units [Array<ExtractedUnit>] All extracted units
|
|
23
|
+
# @param graph [DependencyGraph] The dependency graph
|
|
24
|
+
# @param output_dir [String] Base output directory
|
|
25
|
+
# @param max_depth [Integer] Maximum flow assembly depth
|
|
26
|
+
def initialize(units:, graph:, output_dir:, max_depth: DEFAULT_MAX_DEPTH)
|
|
27
|
+
@units = units
|
|
28
|
+
@graph = graph
|
|
29
|
+
@output_dir = output_dir
|
|
30
|
+
@max_depth = max_depth
|
|
31
|
+
@flows_dir = File.join(output_dir, 'flows')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Pre-compute flow documents for all controller actions.
|
|
35
|
+
#
|
|
36
|
+
# @return [Hash{String => String}] Map of entry_point to flow file path
|
|
37
|
+
def precompute
|
|
38
|
+
FileUtils.mkdir_p(@flows_dir)
|
|
39
|
+
|
|
40
|
+
assembler = FlowAssembler.new(graph: @graph, extracted_dir: @output_dir)
|
|
41
|
+
flow_map = {}
|
|
42
|
+
|
|
43
|
+
controller_units.each do |unit|
|
|
44
|
+
actions = unit.metadata[:actions] || unit.metadata['actions'] || []
|
|
45
|
+
unit_flow_paths = {}
|
|
46
|
+
|
|
47
|
+
actions.each do |action|
|
|
48
|
+
entry_point = "#{unit.identifier}##{action}"
|
|
49
|
+
flow_path = assemble_and_write(assembler, entry_point, unit.identifier, action)
|
|
50
|
+
next unless flow_path
|
|
51
|
+
|
|
52
|
+
flow_map[entry_point] = flow_path
|
|
53
|
+
unit_flow_paths[action] = flow_path
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
unit.metadata[:flow_paths] = unit_flow_paths if unit_flow_paths.any?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
write_flow_index(flow_map)
|
|
60
|
+
|
|
61
|
+
flow_map
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
# Filter units to only controllers.
|
|
67
|
+
#
|
|
68
|
+
# @return [Array<ExtractedUnit>]
|
|
69
|
+
def controller_units
|
|
70
|
+
@units.select { |u| u.type.to_s == 'controller' }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Assemble a flow for one entry point and write the JSON file.
|
|
74
|
+
#
|
|
75
|
+
# @param assembler [FlowAssembler]
|
|
76
|
+
# @param entry_point [String]
|
|
77
|
+
# @param controller_id [String]
|
|
78
|
+
# @param action [String]
|
|
79
|
+
# @return [String, nil] The written file path, or nil on failure
|
|
80
|
+
def assemble_and_write(assembler, entry_point, controller_id, action)
|
|
81
|
+
flow = assembler.assemble(entry_point, max_depth: @max_depth)
|
|
82
|
+
|
|
83
|
+
filename = "#{controller_id.gsub('::', '__')}_#{action}.json"
|
|
84
|
+
flow_path = File.join(@flows_dir, filename)
|
|
85
|
+
|
|
86
|
+
File.write(flow_path, JSON.pretty_generate(flow.to_h))
|
|
87
|
+
|
|
88
|
+
flow_path
|
|
89
|
+
rescue StandardError => e
|
|
90
|
+
Rails.logger.error("[CodebaseIndex] Flow precompute failed for #{entry_point}: #{e.message}")
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Write the flow index mapping entry points to file paths.
|
|
95
|
+
#
|
|
96
|
+
# @param flow_map [Hash{String => String}]
|
|
97
|
+
def write_flow_index(flow_map)
|
|
98
|
+
index_path = File.join(@flows_dir, 'flow_index.json')
|
|
99
|
+
File.write(index_path, JSON.pretty_generate(flow_map))
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Formatting
|
|
5
|
+
# Abstract base class for formatting adapters.
|
|
6
|
+
#
|
|
7
|
+
# Each adapter transforms an AssembledContext into a format suitable for
|
|
8
|
+
# a specific LLM or output target. Subclasses must implement {#format}.
|
|
9
|
+
#
|
|
10
|
+
# @abstract Subclass and override {#format} to implement.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# class MyAdapter < Base
|
|
14
|
+
# def format(assembled_context)
|
|
15
|
+
# "Content: #{assembled_context.context}"
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
class Base
|
|
20
|
+
# Format an assembled context for output.
|
|
21
|
+
#
|
|
22
|
+
# @param _assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
23
|
+
# @return [String] Formatted output
|
|
24
|
+
# @raise [NotImplementedError] if not overridden by subclass
|
|
25
|
+
def format(_assembled_context)
|
|
26
|
+
raise NotImplementedError, "#{self.class}#format must be implemented"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
# Estimate token count using the project convention.
|
|
32
|
+
#
|
|
33
|
+
# @param text [String]
|
|
34
|
+
# @return [Integer]
|
|
35
|
+
def estimate_tokens(text)
|
|
36
|
+
(text.length / 4.0).ceil
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Formatting
|
|
7
|
+
# Formats assembled context as XML for Claude models.
|
|
8
|
+
#
|
|
9
|
+
# Produces structured XML with:
|
|
10
|
+
# - `<codebase-context>` root element
|
|
11
|
+
# - `<meta>` tag with token usage and budget
|
|
12
|
+
# - `<content>` section with indented context text
|
|
13
|
+
# - `<sources>` section with self-closing `<source>` elements
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# adapter = ClaudeAdapter.new
|
|
17
|
+
# xml = adapter.format(assembled_context)
|
|
18
|
+
# # => "<codebase-context>\n <meta tokens=\"42\" budget=\"8000\" />\n..."
|
|
19
|
+
#
|
|
20
|
+
class ClaudeAdapter < Base
|
|
21
|
+
# Format assembled context as XML for Claude.
|
|
22
|
+
#
|
|
23
|
+
# @param assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
24
|
+
# @return [String] XML-formatted context
|
|
25
|
+
def format(assembled_context)
|
|
26
|
+
parts = []
|
|
27
|
+
parts << '<codebase-context>'
|
|
28
|
+
parts << " <meta tokens=\"#{assembled_context.tokens_used}\" budget=\"#{assembled_context.budget}\" />"
|
|
29
|
+
parts << format_content(assembled_context.context)
|
|
30
|
+
parts << format_sources(assembled_context.sources)
|
|
31
|
+
parts << '</codebase-context>'
|
|
32
|
+
parts.join("\n")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
# Format the content section with indentation.
|
|
38
|
+
#
|
|
39
|
+
# @param context [String]
|
|
40
|
+
# @return [String]
|
|
41
|
+
def format_content(context)
|
|
42
|
+
lines = []
|
|
43
|
+
lines << ' <content>'
|
|
44
|
+
lines << indent(escape_xml(context), 4) unless context.empty?
|
|
45
|
+
lines << ' </content>'
|
|
46
|
+
lines.join("\n")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Format the sources section.
|
|
50
|
+
#
|
|
51
|
+
# @param sources [Array<Hash>]
|
|
52
|
+
# @return [String]
|
|
53
|
+
def format_sources(sources)
|
|
54
|
+
lines = []
|
|
55
|
+
lines << ' <sources>'
|
|
56
|
+
sources.each do |source|
|
|
57
|
+
lines << " <source #{source_attributes(source)} />"
|
|
58
|
+
end
|
|
59
|
+
lines << ' </sources>'
|
|
60
|
+
lines.join("\n")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Build attribute string for a source element.
|
|
64
|
+
#
|
|
65
|
+
# @param source [Hash]
|
|
66
|
+
# @return [String]
|
|
67
|
+
def source_attributes(source)
|
|
68
|
+
[
|
|
69
|
+
"identifier=\"#{escape_xml(source[:identifier].to_s)}\"",
|
|
70
|
+
"type=\"#{escape_xml(source[:type].to_s)}\"",
|
|
71
|
+
"score=\"#{source[:score]}\"",
|
|
72
|
+
"file=\"#{escape_xml(source[:file_path].to_s)}\""
|
|
73
|
+
].join(' ')
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Indent every line of text by the given number of spaces.
|
|
77
|
+
#
|
|
78
|
+
# @param text [String]
|
|
79
|
+
# @param spaces [Integer]
|
|
80
|
+
# @return [String]
|
|
81
|
+
def indent(text, spaces)
|
|
82
|
+
prefix = ' ' * spaces
|
|
83
|
+
text.lines.map { |line| "#{prefix}#{line.chomp}" }.join("\n")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Escape XML special characters.
|
|
87
|
+
#
|
|
88
|
+
# @param text [String]
|
|
89
|
+
# @return [String]
|
|
90
|
+
def escape_xml(text)
|
|
91
|
+
text.gsub('&', '&')
|
|
92
|
+
.gsub('<', '<')
|
|
93
|
+
.gsub('>', '>')
|
|
94
|
+
.gsub('"', '"')
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Formatting
|
|
7
|
+
# Formats assembled context as plain text for generic LLM consumption.
|
|
8
|
+
#
|
|
9
|
+
# Produces plain text with:
|
|
10
|
+
# - `=== CODEBASE CONTEXT ===` header
|
|
11
|
+
# - Token usage line
|
|
12
|
+
# - Content separated by `---` dividers
|
|
13
|
+
# - Sources in bracket notation
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# adapter = GenericAdapter.new
|
|
17
|
+
# text = adapter.format(assembled_context)
|
|
18
|
+
# # => "=== CODEBASE CONTEXT ===\nTokens: 42 / 8000\n..."
|
|
19
|
+
#
|
|
20
|
+
class GenericAdapter < Base
|
|
21
|
+
# Format assembled context as plain text.
|
|
22
|
+
#
|
|
23
|
+
# @param assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
24
|
+
# @return [String] Plain text formatted context
|
|
25
|
+
def format(assembled_context)
|
|
26
|
+
parts = []
|
|
27
|
+
parts << '=== CODEBASE CONTEXT ==='
|
|
28
|
+
parts << "Tokens: #{assembled_context.tokens_used} / #{assembled_context.budget}"
|
|
29
|
+
parts << '---'
|
|
30
|
+
parts << assembled_context.context
|
|
31
|
+
parts.concat(format_sources(assembled_context.sources))
|
|
32
|
+
parts.join("\n")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
# Format sources in bracket notation.
|
|
38
|
+
#
|
|
39
|
+
# @param sources [Array<Hash>]
|
|
40
|
+
# @return [Array<String>] Lines to append
|
|
41
|
+
def format_sources(sources)
|
|
42
|
+
return [] if sources.empty?
|
|
43
|
+
|
|
44
|
+
lines = []
|
|
45
|
+
lines << '---'
|
|
46
|
+
sources.each do |source|
|
|
47
|
+
identifier = source[:identifier]
|
|
48
|
+
type = source[:type]
|
|
49
|
+
score = source[:score]
|
|
50
|
+
lines << "[Source: #{identifier} (#{type}) \u2014 score: #{score}]"
|
|
51
|
+
end
|
|
52
|
+
lines
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Formatting
|
|
7
|
+
# Formats assembled context as Markdown for GPT models.
|
|
8
|
+
#
|
|
9
|
+
# Produces Markdown with:
|
|
10
|
+
# - `## Codebase Context` heading
|
|
11
|
+
# - Token usage in bold
|
|
12
|
+
# - Content in a fenced Ruby code block
|
|
13
|
+
# - Sources as a bullet list
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# adapter = GptAdapter.new
|
|
17
|
+
# md = adapter.format(assembled_context)
|
|
18
|
+
# # => "## Codebase Context\n\n**Tokens:** 42/8000\n..."
|
|
19
|
+
#
|
|
20
|
+
class GptAdapter < Base
|
|
21
|
+
# Format assembled context as Markdown for GPT.
|
|
22
|
+
#
|
|
23
|
+
# @param assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
24
|
+
# @return [String] Markdown-formatted context
|
|
25
|
+
def format(assembled_context)
|
|
26
|
+
parts = []
|
|
27
|
+
parts << '## Codebase Context'
|
|
28
|
+
parts << ''
|
|
29
|
+
parts << "**Tokens:** #{assembled_context.tokens_used}/#{assembled_context.budget}"
|
|
30
|
+
parts << ''
|
|
31
|
+
parts << '---'
|
|
32
|
+
parts << ''
|
|
33
|
+
parts << '```ruby'
|
|
34
|
+
parts << assembled_context.context
|
|
35
|
+
parts << '```'
|
|
36
|
+
parts.concat(format_sources(assembled_context.sources))
|
|
37
|
+
parts.join("\n")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
# Format sources as a Markdown bullet list.
|
|
43
|
+
#
|
|
44
|
+
# @param sources [Array<Hash>]
|
|
45
|
+
# @return [Array<String>] Lines to append
|
|
46
|
+
def format_sources(sources)
|
|
47
|
+
return [] if sources.empty?
|
|
48
|
+
|
|
49
|
+
lines = []
|
|
50
|
+
lines << ''
|
|
51
|
+
lines << '### Sources'
|
|
52
|
+
lines << ''
|
|
53
|
+
sources.each do |source|
|
|
54
|
+
identifier = source[:identifier]
|
|
55
|
+
type = source[:type]
|
|
56
|
+
score = source[:score]
|
|
57
|
+
file_path = source[:file_path]
|
|
58
|
+
lines << "- **#{identifier}** (#{type}) \u2014 score: #{score}, file: #{file_path}"
|
|
59
|
+
end
|
|
60
|
+
lines
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'base'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Formatting
|
|
7
|
+
# Formats assembled context with box-drawing characters for human display.
|
|
8
|
+
#
|
|
9
|
+
# Produces visually rich output with:
|
|
10
|
+
# - Box-drawn header using Unicode box characters
|
|
11
|
+
# - Token usage summary
|
|
12
|
+
# - Content section
|
|
13
|
+
# - Source entries with box-drawing decorators
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# adapter = HumanAdapter.new
|
|
17
|
+
# output = adapter.format(assembled_context)
|
|
18
|
+
#
|
|
19
|
+
class HumanAdapter < Base
|
|
20
|
+
HEADER_WIDTH = 50
|
|
21
|
+
|
|
22
|
+
# Format assembled context for human-readable display.
|
|
23
|
+
#
|
|
24
|
+
# @param assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
25
|
+
# @return [String] Box-drawing formatted context
|
|
26
|
+
def format(assembled_context)
|
|
27
|
+
parts = []
|
|
28
|
+
parts.concat(format_header(assembled_context))
|
|
29
|
+
parts << ''
|
|
30
|
+
parts << assembled_context.context unless assembled_context.context.empty?
|
|
31
|
+
parts.concat(format_sources(assembled_context.sources))
|
|
32
|
+
parts.join("\n")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
# Format the box-drawing header.
|
|
38
|
+
#
|
|
39
|
+
# @param assembled_context [CodebaseIndex::Retrieval::AssembledContext]
|
|
40
|
+
# @return [Array<String>]
|
|
41
|
+
def format_header(assembled_context)
|
|
42
|
+
title = 'Codebase Context'
|
|
43
|
+
token_info = "Tokens: #{assembled_context.tokens_used} / #{assembled_context.budget}"
|
|
44
|
+
width = [HEADER_WIDTH, title.length + 4, token_info.length + 4].max
|
|
45
|
+
|
|
46
|
+
[
|
|
47
|
+
"\u2554#{'═' * width}\u2557",
|
|
48
|
+
"\u2551 #{title.ljust(width - 2)} \u2551",
|
|
49
|
+
"\u255A#{'═' * width}\u255D",
|
|
50
|
+
token_info
|
|
51
|
+
]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Format sources with box-drawing decorators.
|
|
55
|
+
#
|
|
56
|
+
# @param sources [Array<Hash>]
|
|
57
|
+
# @return [Array<String>]
|
|
58
|
+
def format_sources(sources)
|
|
59
|
+
return [] if sources.empty?
|
|
60
|
+
|
|
61
|
+
lines = ['', 'Sources:']
|
|
62
|
+
sources.each { |source| lines.concat(format_source_entry(source)) }
|
|
63
|
+
lines
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Format a single source entry.
|
|
67
|
+
#
|
|
68
|
+
# @param source [Hash]
|
|
69
|
+
# @return [Array<String>]
|
|
70
|
+
def format_source_entry(source)
|
|
71
|
+
header = "\u2500\u2500 #{source[:identifier]} (#{source[:type]}) "
|
|
72
|
+
header += "\u2500" * [1, HEADER_WIDTH - header.length - 12].max
|
|
73
|
+
header += " score: #{source[:score]}"
|
|
74
|
+
[header, " #{source[:file_path]}"]
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|