codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# @see CodebaseIndex
|
|
4
|
+
module CodebaseIndex
|
|
5
|
+
class Error < StandardError; end unless defined?(CodebaseIndex::Error)
|
|
6
|
+
|
|
7
|
+
module Console
|
|
8
|
+
class ValidationError < CodebaseIndex::Error; end
|
|
9
|
+
|
|
10
|
+
# Validates model names and column names against the Rails schema.
|
|
11
|
+
#
|
|
12
|
+
# In production, validates against AR::Base.descendants and model.column_names.
|
|
13
|
+
# Accepts an injectable registry for testing without Rails.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# validator = ModelValidator.new(registry: { 'User' => %w[id email name] })
|
|
17
|
+
# validator.validate_model!('User') # => true
|
|
18
|
+
# validator.validate_model!('Hacker') # => raises ValidationError
|
|
19
|
+
# validator.validate_column!('User', 'email') # => true
|
|
20
|
+
#
|
|
21
|
+
class ModelValidator
|
|
22
|
+
# @param registry [Hash<String, Array<String>>] Model name => column names mapping
|
|
23
|
+
def initialize(registry:)
|
|
24
|
+
@registry = registry
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Validate that a model name is known.
|
|
28
|
+
#
|
|
29
|
+
# @param model_name [String]
|
|
30
|
+
# @return [true]
|
|
31
|
+
# @raise [ValidationError] if model is unknown
|
|
32
|
+
def validate_model!(model_name)
|
|
33
|
+
return true if @registry.key?(model_name)
|
|
34
|
+
|
|
35
|
+
raise ValidationError, "Unknown model: #{model_name}. Available: #{@registry.keys.sort.join(', ')}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Validate that a column exists on a model.
|
|
39
|
+
#
|
|
40
|
+
# @param model_name [String]
|
|
41
|
+
# @param column_name [String]
|
|
42
|
+
# @return [true]
|
|
43
|
+
# @raise [ValidationError] if column is unknown
|
|
44
|
+
def validate_column!(model_name, column_name)
|
|
45
|
+
validate_model!(model_name)
|
|
46
|
+
columns = @registry[model_name]
|
|
47
|
+
return true if columns.include?(column_name)
|
|
48
|
+
|
|
49
|
+
raise ValidationError,
|
|
50
|
+
"Unknown column '#{column_name}' on #{model_name}. Available: #{columns.sort.join(', ')}"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Validate multiple columns at once.
|
|
54
|
+
#
|
|
55
|
+
# @param model_name [String]
|
|
56
|
+
# @param column_names [Array<String>]
|
|
57
|
+
# @return [true]
|
|
58
|
+
# @raise [ValidationError] if any column is unknown
|
|
59
|
+
def validate_columns!(model_name, column_names) # rubocop:disable Naming/PredicateMethod
|
|
60
|
+
column_names.each { |col| validate_column!(model_name, col) }
|
|
61
|
+
true
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# List all known model names.
|
|
65
|
+
#
|
|
66
|
+
# @return [Array<String>]
|
|
67
|
+
def model_names
|
|
68
|
+
@registry.keys.sort
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# List columns for a model.
|
|
72
|
+
#
|
|
73
|
+
# @param model_name [String]
|
|
74
|
+
# @return [Array<String>]
|
|
75
|
+
def columns_for(model_name)
|
|
76
|
+
validate_model!(model_name)
|
|
77
|
+
@registry[model_name].sort
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Stub for environments that don't load ActiveRecord
|
|
4
|
+
unless defined?(ActiveRecord::Rollback)
|
|
5
|
+
module ActiveRecord
|
|
6
|
+
class Rollback < StandardError; end
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
module CodebaseIndex
|
|
11
|
+
module Console
|
|
12
|
+
# Wraps tool execution in a rolled-back transaction with statement timeout.
|
|
13
|
+
#
|
|
14
|
+
# Safety layers:
|
|
15
|
+
# - Every query runs inside a transaction that is always rolled back
|
|
16
|
+
# - Statement timeout prevents runaway queries
|
|
17
|
+
# - Column redaction replaces sensitive values with "[REDACTED]"
|
|
18
|
+
#
|
|
19
|
+
# @example
|
|
20
|
+
# ctx = SafeContext.new(connection: conn, timeout_ms: 5000, redacted_columns: %w[ssn])
|
|
21
|
+
# ctx.execute { |c| c.execute("SELECT count(*) FROM users") }
|
|
22
|
+
#
|
|
23
|
+
class SafeContext
|
|
24
|
+
# @param connection [Object] Database connection (or mock)
|
|
25
|
+
# @param timeout_ms [Integer] Statement timeout in milliseconds
|
|
26
|
+
# @param redacted_columns [Array<String>] Column names whose values should be redacted
|
|
27
|
+
def initialize(connection:, timeout_ms: 5000, redacted_columns: [])
|
|
28
|
+
@connection = connection
|
|
29
|
+
@timeout_ms = timeout_ms
|
|
30
|
+
@redacted_columns = redacted_columns.map(&:to_s)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Execute a block within a rolled-back transaction with statement timeout.
|
|
34
|
+
#
|
|
35
|
+
# The transaction is always rolled back to ensure read-only behavior.
|
|
36
|
+
#
|
|
37
|
+
# @yield [connection] The database connection
|
|
38
|
+
# @return [Object] The block's return value
|
|
39
|
+
def execute
|
|
40
|
+
result = nil
|
|
41
|
+
@connection.transaction do
|
|
42
|
+
set_timeout
|
|
43
|
+
result = yield(@connection)
|
|
44
|
+
raise ActiveRecord::Rollback
|
|
45
|
+
end
|
|
46
|
+
result
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Replace values of redacted columns with "[REDACTED]".
|
|
50
|
+
#
|
|
51
|
+
# @param hash [Hash] Record attributes
|
|
52
|
+
# @param _model_name [String] Model name (reserved for per-model redaction rules)
|
|
53
|
+
# @return [Hash] Redacted copy of the hash
|
|
54
|
+
def redact(hash, _model_name = nil)
|
|
55
|
+
return hash if @redacted_columns.empty?
|
|
56
|
+
|
|
57
|
+
hash.transform_keys(&:to_s).each_with_object({}) do |(key, value), redacted|
|
|
58
|
+
redacted[key] = @redacted_columns.include?(key) ? '[REDACTED]' : value
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Set statement timeout on the connection.
|
|
65
|
+
#
|
|
66
|
+
# PostgreSQL uses SET statement_timeout (applies to all statement types).
|
|
67
|
+
# MySQL uses SET max_execution_time (applies to SELECT only — MySQL limitation:
|
|
68
|
+
# DDL and DML statements cannot be time-limited via this variable).
|
|
69
|
+
def set_timeout(connection = @connection, timeout_ms = @timeout_ms)
|
|
70
|
+
adapter = connection.adapter_name.downcase
|
|
71
|
+
if adapter.include?('mysql')
|
|
72
|
+
connection.execute("SET max_execution_time = #{timeout_ms.to_i}")
|
|
73
|
+
else
|
|
74
|
+
connection.execute("SET statement_timeout = '#{timeout_ms.to_i}ms'")
|
|
75
|
+
end
|
|
76
|
+
rescue StandardError
|
|
77
|
+
# Unsupported adapter — timeout enforcement is best-effort
|
|
78
|
+
nil
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|