codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'retriever'
|
|
4
|
+
require_relative 'storage/vector_store'
|
|
5
|
+
require_relative 'storage/pgvector'
|
|
6
|
+
require_relative 'storage/qdrant'
|
|
7
|
+
require_relative 'storage/metadata_store'
|
|
8
|
+
require_relative 'storage/graph_store'
|
|
9
|
+
require_relative 'embedding/provider'
|
|
10
|
+
require_relative 'embedding/openai'
|
|
11
|
+
|
|
12
|
+
module CodebaseIndex
|
|
13
|
+
# Builder reads a {Configuration} and instantiates the appropriate adapters,
|
|
14
|
+
# returning a fully wired {Retriever} ready for use.
|
|
15
|
+
#
|
|
16
|
+
# Named presets are provided for common deployment scenarios. All presets can
|
|
17
|
+
# be further customized with a block passed to {CodebaseIndex.configure_with_preset}.
|
|
18
|
+
#
|
|
19
|
+
# @example Using a preset
|
|
20
|
+
# CodebaseIndex.configure_with_preset(:local)
|
|
21
|
+
# result = CodebaseIndex.retrieve("How does the User model work?")
|
|
22
|
+
#
|
|
23
|
+
# @example Using a preset with block customization
|
|
24
|
+
# CodebaseIndex.configure_with_preset(:production) do |config|
|
|
25
|
+
# config.embedding_options = { api_key: ENV['OPENAI_API_KEY'] }
|
|
26
|
+
# config.vector_store_options = { url: ENV['QDRANT_URL'], collection: 'myapp' }
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
class Builder
|
|
30
|
+
# Named presets mapping to default adapter types.
|
|
31
|
+
#
|
|
32
|
+
# :local — fully local, no external services required
|
|
33
|
+
# :postgresql — pgvector for vectors, OpenAI for embeddings
|
|
34
|
+
# :production — Qdrant for vectors, OpenAI for embeddings
|
|
35
|
+
PRESETS = {
|
|
36
|
+
local: {
|
|
37
|
+
vector_store: :in_memory,
|
|
38
|
+
metadata_store: :sqlite,
|
|
39
|
+
graph_store: :in_memory,
|
|
40
|
+
embedding_provider: :ollama
|
|
41
|
+
},
|
|
42
|
+
postgresql: {
|
|
43
|
+
vector_store: :pgvector,
|
|
44
|
+
metadata_store: :sqlite,
|
|
45
|
+
graph_store: :in_memory,
|
|
46
|
+
embedding_provider: :openai
|
|
47
|
+
},
|
|
48
|
+
production: {
|
|
49
|
+
vector_store: :qdrant,
|
|
50
|
+
metadata_store: :sqlite,
|
|
51
|
+
graph_store: :in_memory,
|
|
52
|
+
embedding_provider: :openai
|
|
53
|
+
}
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
# Build a {Configuration} populated with the named preset's adapter types.
|
|
57
|
+
#
|
|
58
|
+
# @param name [Symbol] Preset name — one of :local, :postgresql, or :production
|
|
59
|
+
# @return [Configuration] A new Configuration with preset values applied
|
|
60
|
+
# @raise [ArgumentError] if the preset name is not recognized
|
|
61
|
+
def self.preset_config(name)
|
|
62
|
+
preset = PRESETS.fetch(name) do
|
|
63
|
+
raise ArgumentError, "Unknown preset: #{name}. Valid: #{PRESETS.keys.join(', ')}"
|
|
64
|
+
end
|
|
65
|
+
config = Configuration.new
|
|
66
|
+
preset.each { |key, value| config.public_send(:"#{key}=", value) }
|
|
67
|
+
config
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @param config [Configuration] Configuration to read adapter types from
|
|
71
|
+
def initialize(config = CodebaseIndex.configuration)
|
|
72
|
+
@config = config
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Build a {Retriever} wired with adapters from the configuration.
|
|
76
|
+
#
|
|
77
|
+
# @return [Retriever] A fully instantiated, wired retriever
|
|
78
|
+
def build_retriever
|
|
79
|
+
Retriever.new(
|
|
80
|
+
vector_store: build_vector_store,
|
|
81
|
+
metadata_store: build_metadata_store,
|
|
82
|
+
graph_store: build_graph_store,
|
|
83
|
+
embedding_provider: build_embedding_provider
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Instantiate the vector store adapter specified by the configuration.
|
|
88
|
+
#
|
|
89
|
+
# @return [Storage::VectorStore::Interface] Vector store adapter instance
|
|
90
|
+
# @raise [ArgumentError] if the configured type is not recognized
|
|
91
|
+
def build_vector_store
|
|
92
|
+
case @config.vector_store
|
|
93
|
+
when :in_memory then Storage::VectorStore::InMemory.new
|
|
94
|
+
when :pgvector then Storage::VectorStore::Pgvector.new(**(@config.vector_store_options || {}))
|
|
95
|
+
when :qdrant then Storage::VectorStore::Qdrant.new(**(@config.vector_store_options || {}))
|
|
96
|
+
else raise ArgumentError, "Unknown vector_store: #{@config.vector_store}"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Instantiate the embedding provider specified by the configuration.
|
|
101
|
+
#
|
|
102
|
+
# @return [Embedding::Provider::Interface] Embedding provider instance
|
|
103
|
+
# @raise [ArgumentError] if the configured type is not recognized
|
|
104
|
+
def build_embedding_provider
|
|
105
|
+
case @config.embedding_provider
|
|
106
|
+
when :openai then Embedding::Provider::OpenAI.new(**(@config.embedding_options || {}))
|
|
107
|
+
when :ollama then Embedding::Provider::Ollama.new(**(@config.embedding_options || {}))
|
|
108
|
+
else raise ArgumentError, "Unknown embedding_provider: #{@config.embedding_provider}"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
private
|
|
113
|
+
|
|
114
|
+
# Instantiate the metadata store adapter specified by the configuration.
|
|
115
|
+
#
|
|
116
|
+
# @return [Storage::MetadataStore::Interface] Metadata store adapter instance
|
|
117
|
+
# @raise [ArgumentError] if the configured type is not recognized
|
|
118
|
+
def build_metadata_store
|
|
119
|
+
case @config.metadata_store
|
|
120
|
+
when :in_memory then Storage::MetadataStore::InMemory.new
|
|
121
|
+
when :sqlite then Storage::MetadataStore::SQLite.new(**(@config.metadata_store_options || {}))
|
|
122
|
+
else raise ArgumentError, "Unknown metadata_store: #{@config.metadata_store}"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Instantiate the graph store adapter specified by the configuration.
|
|
127
|
+
#
|
|
128
|
+
# @return [Storage::GraphStore::Interface] Graph store adapter instance
|
|
129
|
+
# @raise [ArgumentError] if the configured type is not recognized
|
|
130
|
+
def build_graph_store
|
|
131
|
+
case @config.graph_store
|
|
132
|
+
when :in_memory then Storage::GraphStore::Memory.new
|
|
133
|
+
else raise ArgumentError, "Unknown graph_store: #{@config.graph_store}"
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Chunking
|
|
7
|
+
# A single semantic chunk extracted from an ExtractedUnit.
|
|
8
|
+
#
|
|
9
|
+
# Chunks represent meaningful subsections of a code unit — associations,
|
|
10
|
+
# callbacks, validations, individual actions, etc. Each chunk is independently
|
|
11
|
+
# embeddable and retrievable, with a back-reference to its parent unit.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# chunk = Chunk.new(
|
|
15
|
+
# content: "has_many :posts\nhas_many :comments",
|
|
16
|
+
# chunk_type: :associations,
|
|
17
|
+
# parent_identifier: "User",
|
|
18
|
+
# parent_type: :model
|
|
19
|
+
# )
|
|
20
|
+
# chunk.token_count # => 20
|
|
21
|
+
# chunk.identifier # => "User#associations"
|
|
22
|
+
#
|
|
23
|
+
class Chunk
|
|
24
|
+
attr_reader :content, :chunk_type, :parent_identifier, :parent_type, :metadata
|
|
25
|
+
|
|
26
|
+
# @param content [String] The chunk's source code or text
|
|
27
|
+
# @param chunk_type [Symbol] Semantic type (:summary, :associations, :callbacks, etc.)
|
|
28
|
+
# @param parent_identifier [String] Identifier of the parent ExtractedUnit
|
|
29
|
+
# @param parent_type [Symbol] Type of the parent unit (:model, :controller, etc.)
|
|
30
|
+
# @param metadata [Hash] Optional chunk-specific metadata
|
|
31
|
+
def initialize(content:, chunk_type:, parent_identifier:, parent_type:, metadata: {})
|
|
32
|
+
@content = content
|
|
33
|
+
@chunk_type = chunk_type
|
|
34
|
+
@parent_identifier = parent_identifier
|
|
35
|
+
@parent_type = parent_type
|
|
36
|
+
@metadata = metadata
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Estimated token count using project convention.
|
|
40
|
+
#
|
|
41
|
+
# @return [Integer]
|
|
42
|
+
def token_count
|
|
43
|
+
@token_count ||= (content.length / 4.0).ceil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# SHA256 hash of content for change detection.
|
|
47
|
+
#
|
|
48
|
+
# @return [String]
|
|
49
|
+
def content_hash
|
|
50
|
+
@content_hash ||= Digest::SHA256.hexdigest(content)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Unique identifier combining parent and chunk type.
|
|
54
|
+
#
|
|
55
|
+
# @return [String]
|
|
56
|
+
def identifier
|
|
57
|
+
"#{parent_identifier}##{chunk_type}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Whether the chunk has no meaningful content.
|
|
61
|
+
#
|
|
62
|
+
# @return [Boolean]
|
|
63
|
+
def empty?
|
|
64
|
+
content.nil? || content.strip.empty?
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Serialize to hash for JSON output.
|
|
68
|
+
#
|
|
69
|
+
# @return [Hash]
|
|
70
|
+
def to_h
|
|
71
|
+
{
|
|
72
|
+
content: content,
|
|
73
|
+
chunk_type: chunk_type,
|
|
74
|
+
parent_identifier: parent_identifier,
|
|
75
|
+
parent_type: parent_type,
|
|
76
|
+
identifier: identifier,
|
|
77
|
+
token_count: token_count,
|
|
78
|
+
content_hash: content_hash,
|
|
79
|
+
metadata: metadata
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'chunk'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Chunking
|
|
7
|
+
# Splits ExtractedUnits into semantic chunks based on unit type.
|
|
8
|
+
#
|
|
9
|
+
# Models are split by: summary, associations, validations, callbacks,
|
|
10
|
+
# scopes, methods. Controllers are split by: summary (filters), per-action.
|
|
11
|
+
# Other types use whole-unit or method-level splitting based on size.
|
|
12
|
+
#
|
|
13
|
+
# Units below the token threshold are returned as a single :whole chunk.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# chunker = SemanticChunker.new(threshold: 200)
|
|
17
|
+
# chunks = chunker.chunk(extracted_unit)
|
|
18
|
+
# chunks.map(&:chunk_type) # => [:summary, :associations, :validations, :methods]
|
|
19
|
+
#
|
|
20
|
+
class SemanticChunker
|
|
21
|
+
# Default token threshold below which units stay whole.
|
|
22
|
+
DEFAULT_THRESHOLD = 200
|
|
23
|
+
|
|
24
|
+
# @param threshold [Integer] Token count threshold for chunking
|
|
25
|
+
def initialize(threshold: DEFAULT_THRESHOLD)
|
|
26
|
+
@threshold = threshold
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Split an ExtractedUnit into semantic chunks.
|
|
30
|
+
#
|
|
31
|
+
# @param unit [ExtractedUnit] The unit to chunk
|
|
32
|
+
# @return [Array<Chunk>] Ordered list of chunks
|
|
33
|
+
def chunk(unit)
|
|
34
|
+
return [] if unit.source_code.nil? || unit.source_code.strip.empty?
|
|
35
|
+
return [build_whole_chunk(unit)] if unit.estimated_tokens <= @threshold
|
|
36
|
+
|
|
37
|
+
case unit.type
|
|
38
|
+
when :model then ModelChunker.new(unit).chunk
|
|
39
|
+
when :controller then ControllerChunker.new(unit).chunk
|
|
40
|
+
else [build_whole_chunk(unit)]
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# Build a single :whole chunk for small units.
|
|
47
|
+
#
|
|
48
|
+
# @param unit [ExtractedUnit]
|
|
49
|
+
# @return [Chunk]
|
|
50
|
+
def build_whole_chunk(unit)
|
|
51
|
+
Chunk.new(
|
|
52
|
+
content: unit.source_code,
|
|
53
|
+
chunk_type: :whole,
|
|
54
|
+
parent_identifier: unit.identifier,
|
|
55
|
+
parent_type: unit.type
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Chunks a model unit by semantic sections: summary, associations,
|
|
61
|
+
# validations, callbacks, scopes, methods.
|
|
62
|
+
#
|
|
63
|
+
# @api private
|
|
64
|
+
class ModelChunker
|
|
65
|
+
ASSOCIATION_PATTERN = /^\s*(has_many|has_one|belongs_to|has_and_belongs_to_many)\b/
|
|
66
|
+
VALIDATION_PATTERN = /^\s*validates?\b/
|
|
67
|
+
CALLBACK_ACTIONS = '(save|create|update|destroy|validation|action|commit|rollback|find|initialize|touch)'
|
|
68
|
+
CALLBACK_PATTERN = /^\s*(before_|after_|around_)#{CALLBACK_ACTIONS}\b/
|
|
69
|
+
SCOPE_PATTERN = /^\s*scope\s+:/
|
|
70
|
+
METHOD_PATTERN = /^\s*def\s+/
|
|
71
|
+
PRIVATE_PATTERN = /^\s*(private|protected)\s*$/
|
|
72
|
+
|
|
73
|
+
SECTION_PATTERNS = {
|
|
74
|
+
associations: ASSOCIATION_PATTERN,
|
|
75
|
+
validations: VALIDATION_PATTERN,
|
|
76
|
+
callbacks: CALLBACK_PATTERN,
|
|
77
|
+
scopes: SCOPE_PATTERN
|
|
78
|
+
}.freeze
|
|
79
|
+
|
|
80
|
+
SEMANTIC_SECTIONS = %i[associations validations callbacks scopes].freeze
|
|
81
|
+
|
|
82
|
+
# @param unit [ExtractedUnit]
|
|
83
|
+
def initialize(unit)
|
|
84
|
+
@unit = unit
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @return [Array<Chunk>]
|
|
88
|
+
def chunk
|
|
89
|
+
sections = classify_lines(@unit.source_code.lines)
|
|
90
|
+
build_chunks(sections).reject(&:empty?)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# @param sections [Hash<Symbol, Array<String>>]
|
|
96
|
+
# @return [Array<Chunk>]
|
|
97
|
+
def build_chunks(sections)
|
|
98
|
+
chunks = []
|
|
99
|
+
chunks << build_chunk(:summary, sections[:summary].join) if sections[:summary].any?
|
|
100
|
+
|
|
101
|
+
SEMANTIC_SECTIONS.each do |type|
|
|
102
|
+
next if sections[type].empty?
|
|
103
|
+
|
|
104
|
+
chunks << build_chunk(type, sections[type].join)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
chunks << build_chunk(:methods, sections[:methods].join) if sections[:methods].any?
|
|
108
|
+
chunks
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Classify each line into a semantic section.
|
|
112
|
+
#
|
|
113
|
+
# @param lines [Array<String>]
|
|
114
|
+
# @return [Hash<Symbol, Array<String>>]
|
|
115
|
+
def classify_lines(lines)
|
|
116
|
+
state = { sections: empty_sections, current: :summary, in_method: false,
|
|
117
|
+
depth: 0 }
|
|
118
|
+
lines.each do |line|
|
|
119
|
+
if state[:in_method]
|
|
120
|
+
track_method_line(state, line)
|
|
121
|
+
else
|
|
122
|
+
classify_line(state, line)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
state[:sections]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# @return [Hash<Symbol, Array<String>>]
|
|
130
|
+
def empty_sections
|
|
131
|
+
{ summary: [], associations: [], validations: [], callbacks: [], scopes: [], methods: [] }
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Track lines inside a method body.
|
|
135
|
+
def track_method_line(state, line)
|
|
136
|
+
state[:sections][:methods] << line
|
|
137
|
+
update_method_depth(state, line)
|
|
138
|
+
state[:in_method] = false if state[:depth] <= 0 && line.strip.match?(/^end\s*$/)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def update_method_depth(state, line)
|
|
142
|
+
state[:depth] += 1 if line.match?(/\bdo\b|\bdef\b/) && !line.match?(/\bend\b/)
|
|
143
|
+
state[:depth] -= 1 if line.strip == 'end' || (line.match?(/\bend\s*$/) && state[:depth].positive?)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Classify a single non-method line.
|
|
147
|
+
def classify_line(state, line)
|
|
148
|
+
section = detect_semantic_section(line)
|
|
149
|
+
if section
|
|
150
|
+
state[:current] = section
|
|
151
|
+
state[:sections][section] << line
|
|
152
|
+
elsif line.match?(PRIVATE_PATTERN)
|
|
153
|
+
state[:sections][:methods] << line
|
|
154
|
+
elsif line.match?(METHOD_PATTERN)
|
|
155
|
+
start_method(state, line)
|
|
156
|
+
else
|
|
157
|
+
assign_fallback(state, line)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Detect which semantic section a line belongs to, if any.
|
|
162
|
+
#
|
|
163
|
+
# @return [Symbol, nil] the section name, or nil if no pattern matched
|
|
164
|
+
def detect_semantic_section(line)
|
|
165
|
+
SECTION_PATTERNS.each do |section, pattern|
|
|
166
|
+
return section if line.match?(pattern)
|
|
167
|
+
end
|
|
168
|
+
nil
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def start_method(state, line)
|
|
172
|
+
state[:in_method] = true
|
|
173
|
+
state[:depth] = 1
|
|
174
|
+
state[:sections][:methods] << line
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def assign_fallback(state, line)
|
|
178
|
+
if state[:current] == :summary || line.strip.empty? || line.match?(/^\s*#/)
|
|
179
|
+
state[:sections][:summary] << line
|
|
180
|
+
else
|
|
181
|
+
state[:sections][state[:current]] << line
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# @return [Chunk]
|
|
186
|
+
def build_chunk(chunk_type, content)
|
|
187
|
+
Chunk.new(
|
|
188
|
+
content: content,
|
|
189
|
+
chunk_type: chunk_type,
|
|
190
|
+
parent_identifier: @unit.identifier,
|
|
191
|
+
parent_type: @unit.type
|
|
192
|
+
)
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Chunks a controller unit by actions: summary (class + filters),
|
|
197
|
+
# then one chunk per public action method.
|
|
198
|
+
#
|
|
199
|
+
# @api private
|
|
200
|
+
class ControllerChunker
|
|
201
|
+
FILTER_PATTERN = /^\s*(before_action|after_action|around_action|skip_before_action)\b/
|
|
202
|
+
METHOD_PATTERN = /^\s*def\s+/
|
|
203
|
+
PRIVATE_PATTERN = /^\s*(private|protected)\s*$/
|
|
204
|
+
|
|
205
|
+
# @param unit [ExtractedUnit]
|
|
206
|
+
def initialize(unit)
|
|
207
|
+
@unit = unit
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# @return [Array<Chunk>]
|
|
211
|
+
def chunk
|
|
212
|
+
state = parse_lines(@unit.source_code.lines)
|
|
213
|
+
build_chunks(state).reject(&:empty?)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
|
|
218
|
+
# Parse controller lines into summary + action buffers.
|
|
219
|
+
#
|
|
220
|
+
# @param lines [Array<String>]
|
|
221
|
+
# @return [Hash]
|
|
222
|
+
def parse_lines(lines)
|
|
223
|
+
state = { summary: [], actions: {}, current_action: nil, depth: 0,
|
|
224
|
+
in_private: false }
|
|
225
|
+
lines.each do |line|
|
|
226
|
+
if state[:current_action]
|
|
227
|
+
track_action_line(state, line)
|
|
228
|
+
else
|
|
229
|
+
classify_controller_line(state, line)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
state
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def track_action_line(state, line)
|
|
237
|
+
state[:actions][state[:current_action]] << line
|
|
238
|
+
state[:depth] += 1 if line.match?(/\bdo\b/) && !line.match?(/\bend\b/)
|
|
239
|
+
return unless line.strip.match?(/^end\s*$/)
|
|
240
|
+
|
|
241
|
+
state[:depth] -= 1
|
|
242
|
+
return unless state[:depth] <= 0
|
|
243
|
+
|
|
244
|
+
state[:current_action] = nil
|
|
245
|
+
state[:depth] = 0
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def classify_controller_line(state, line)
|
|
249
|
+
if line.match?(PRIVATE_PATTERN)
|
|
250
|
+
state[:in_private] = true
|
|
251
|
+
state[:summary] << line
|
|
252
|
+
elsif !state[:in_private] && line.match?(METHOD_PATTERN)
|
|
253
|
+
start_action(state, line)
|
|
254
|
+
else
|
|
255
|
+
state[:summary] << line
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def start_action(state, line)
|
|
260
|
+
action_name = line[/def\s+(\w+)/, 1]
|
|
261
|
+
state[:current_action] = action_name
|
|
262
|
+
state[:depth] = 1
|
|
263
|
+
state[:actions][action_name] = [line]
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# @param state [Hash]
|
|
267
|
+
# @return [Array<Chunk>]
|
|
268
|
+
def build_chunks(state)
|
|
269
|
+
chunks = []
|
|
270
|
+
chunks << build_chunk(:summary, state[:summary].join) if state[:summary].any?
|
|
271
|
+
|
|
272
|
+
state[:actions].each do |action_name, action_lines|
|
|
273
|
+
chunks << build_chunk(:"action_#{action_name}", action_lines.join)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
chunks
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# @return [Chunk]
|
|
280
|
+
def build_chunk(chunk_type, content)
|
|
281
|
+
Chunk.new(
|
|
282
|
+
content: content,
|
|
283
|
+
chunk_type: chunk_type,
|
|
284
|
+
parent_identifier: @unit.identifier,
|
|
285
|
+
parent_type: @unit.type
|
|
286
|
+
)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Console
|
|
5
|
+
module Adapters
|
|
6
|
+
# Cache adapter that auto-detects the active cache store.
|
|
7
|
+
#
|
|
8
|
+
# Supports Redis, Solid Cache, memory, and file cache stores.
|
|
9
|
+
# Detection checks Rails.cache class name first, then falls back
|
|
10
|
+
# to checking for SolidCache constant.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# CacheAdapter.detect # => :redis
|
|
14
|
+
# CacheAdapter.stats # => { tool: 'cache_stats', params: {} }
|
|
15
|
+
#
|
|
16
|
+
module CacheAdapter
|
|
17
|
+
STORE_PATTERNS = {
|
|
18
|
+
'RedisCacheStore' => :redis,
|
|
19
|
+
'MemoryStore' => :memory,
|
|
20
|
+
'FileStore' => :file
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
module_function
|
|
24
|
+
|
|
25
|
+
# Detect the active cache store backend.
|
|
26
|
+
#
|
|
27
|
+
# @return [Symbol] One of :redis, :solid_cache, :memory, :file, :unknown
|
|
28
|
+
def detect
|
|
29
|
+
if defined?(::Rails) && ::Rails.respond_to?(:cache) && ::Rails.cache
|
|
30
|
+
class_name = ::Rails.cache.class.name.to_s
|
|
31
|
+
STORE_PATTERNS.each do |pattern, backend|
|
|
32
|
+
return backend if class_name.include?(pattern)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
return :solid_cache if defined?(::SolidCache)
|
|
37
|
+
|
|
38
|
+
:unknown
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Get cache store statistics.
|
|
42
|
+
#
|
|
43
|
+
# @param namespace [String, nil] Cache namespace filter
|
|
44
|
+
# @return [Hash] Bridge request
|
|
45
|
+
def stats(namespace: nil)
|
|
46
|
+
{ tool: 'cache_stats', params: { namespace: namespace }.compact }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Get cache store info (backend type, configuration).
|
|
50
|
+
#
|
|
51
|
+
# @return [Hash] Bridge request
|
|
52
|
+
def info
|
|
53
|
+
{ tool: 'cache_info', params: {} }
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Console
|
|
5
|
+
module Adapters
|
|
6
|
+
# Job backend adapter for GoodJob.
|
|
7
|
+
#
|
|
8
|
+
# Builds bridge requests for GoodJob queue stats, failure listing,
|
|
9
|
+
# job lookup, scheduled jobs, and retry operations.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# adapter = GoodJobAdapter.new
|
|
13
|
+
# adapter.queue_stats # => { tool: 'good_job_queue_stats', params: {} }
|
|
14
|
+
#
|
|
15
|
+
class GoodJobAdapter
|
|
16
|
+
# Check if GoodJob is available in the current environment.
|
|
17
|
+
#
|
|
18
|
+
# @return [Boolean]
|
|
19
|
+
def self.available?
|
|
20
|
+
defined?(::GoodJob) ? true : false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get queue statistics (sizes, latencies).
|
|
24
|
+
#
|
|
25
|
+
# @return [Hash] Bridge request
|
|
26
|
+
def queue_stats
|
|
27
|
+
{ tool: 'good_job_queue_stats', params: {} }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# List recent job failures.
|
|
31
|
+
#
|
|
32
|
+
# @param limit [Integer] Max failures (default: 10, max: 100)
|
|
33
|
+
# @return [Hash] Bridge request
|
|
34
|
+
def recent_failures(limit: 10)
|
|
35
|
+
limit = [limit, 100].min
|
|
36
|
+
{ tool: 'good_job_recent_failures', params: { limit: limit } }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Find a job by its ID.
|
|
40
|
+
#
|
|
41
|
+
# @param id [Object] GoodJob job ID
|
|
42
|
+
# @return [Hash] Bridge request
|
|
43
|
+
def find_job(id:)
|
|
44
|
+
{ tool: 'good_job_find_job', params: { id: id } }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# List scheduled jobs.
|
|
48
|
+
#
|
|
49
|
+
# @param limit [Integer] Max jobs (default: 20, max: 100)
|
|
50
|
+
# @return [Hash] Bridge request
|
|
51
|
+
def scheduled_jobs(limit: 20)
|
|
52
|
+
limit = [limit, 100].min
|
|
53
|
+
{ tool: 'good_job_scheduled_jobs', params: { limit: limit } }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Retry a failed job.
|
|
57
|
+
#
|
|
58
|
+
# @param id [Object] GoodJob job ID
|
|
59
|
+
# @return [Hash] Bridge request
|
|
60
|
+
def retry_job(id:)
|
|
61
|
+
{ tool: 'good_job_retry_job', params: { id: id } }
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Console
|
|
5
|
+
module Adapters
|
|
6
|
+
# Job backend adapter for Sidekiq.
|
|
7
|
+
#
|
|
8
|
+
# Builds bridge requests for Sidekiq queue stats, failure listing,
|
|
9
|
+
# job lookup, scheduled jobs, and retry operations.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# adapter = SidekiqAdapter.new
|
|
13
|
+
# adapter.queue_stats # => { tool: 'sidekiq_queue_stats', params: {} }
|
|
14
|
+
#
|
|
15
|
+
class SidekiqAdapter
|
|
16
|
+
# Check if Sidekiq is available in the current environment.
|
|
17
|
+
#
|
|
18
|
+
# @return [Boolean]
|
|
19
|
+
def self.available?
|
|
20
|
+
defined?(::Sidekiq) ? true : false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get queue statistics (sizes, latencies).
|
|
24
|
+
#
|
|
25
|
+
# @return [Hash] Bridge request
|
|
26
|
+
def queue_stats
|
|
27
|
+
{ tool: 'sidekiq_queue_stats', params: {} }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# List recent job failures.
|
|
31
|
+
#
|
|
32
|
+
# @param limit [Integer] Max failures (default: 10, max: 100)
|
|
33
|
+
# @return [Hash] Bridge request
|
|
34
|
+
def recent_failures(limit: 10)
|
|
35
|
+
limit = [limit, 100].min
|
|
36
|
+
{ tool: 'sidekiq_recent_failures', params: { limit: limit } }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Find a job by its ID.
|
|
40
|
+
#
|
|
41
|
+
# @param id [String] Sidekiq job ID
|
|
42
|
+
# @return [Hash] Bridge request
|
|
43
|
+
def find_job(id:)
|
|
44
|
+
{ tool: 'sidekiq_find_job', params: { id: id } }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# List scheduled jobs.
|
|
48
|
+
#
|
|
49
|
+
# @param limit [Integer] Max jobs (default: 20, max: 100)
|
|
50
|
+
# @return [Hash] Bridge request
|
|
51
|
+
def scheduled_jobs(limit: 20)
|
|
52
|
+
limit = [limit, 100].min
|
|
53
|
+
{ tool: 'sidekiq_scheduled_jobs', params: { limit: limit } }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Retry a failed job.
|
|
57
|
+
#
|
|
58
|
+
# @param id [String] Sidekiq job ID
|
|
59
|
+
# @return [Hash] Bridge request
|
|
60
|
+
def retry_job(id:)
|
|
61
|
+
{ tool: 'sidekiq_retry_job', params: { id: id } }
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|