codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module MCP
|
|
5
|
+
# Base class for rendering MCP tool responses in different output formats.
|
|
6
|
+
#
|
|
7
|
+
# Subclasses implement tool-specific render methods (render_lookup, render_search, etc.)
|
|
8
|
+
# and a render_default fallback. The dispatch uses convention: tool name maps to method name.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# renderer = ToolResponseRenderer.for(:markdown)
|
|
12
|
+
# renderer.render(:lookup, unit_data)
|
|
13
|
+
#
|
|
14
|
+
class ToolResponseRenderer
|
|
15
|
+
VALID_FORMATS = %i[claude markdown plain json].freeze
|
|
16
|
+
|
|
17
|
+
# Factory method to build the appropriate renderer for a format.
|
|
18
|
+
#
|
|
19
|
+
# @param format [Symbol] One of :claude, :markdown, :plain, :json
|
|
20
|
+
# @return [ToolResponseRenderer] A renderer instance
|
|
21
|
+
# @raise [ArgumentError] if format is unknown
|
|
22
|
+
def self.for(format)
|
|
23
|
+
require_relative 'renderers/markdown_renderer'
|
|
24
|
+
require_relative 'renderers/claude_renderer'
|
|
25
|
+
require_relative 'renderers/plain_renderer'
|
|
26
|
+
require_relative 'renderers/json_renderer'
|
|
27
|
+
|
|
28
|
+
case format
|
|
29
|
+
when :claude then Renderers::ClaudeRenderer.new
|
|
30
|
+
when :markdown then Renderers::MarkdownRenderer.new
|
|
31
|
+
when :plain then Renderers::PlainRenderer.new
|
|
32
|
+
when :json then Renderers::JsonRenderer.new
|
|
33
|
+
else raise ArgumentError, "Unknown format: #{format.inspect}. Valid: #{VALID_FORMATS.inspect}"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Render a tool response. Dispatches to render_<tool_name> if defined,
|
|
38
|
+
# otherwise falls back to render_default.
|
|
39
|
+
#
|
|
40
|
+
# @param tool_name [Symbol, String] The tool name
|
|
41
|
+
# @param data [Object] The tool result data
|
|
42
|
+
# @param opts [Hash] Additional rendering options
|
|
43
|
+
# @return [String] Rendered response text
|
|
44
|
+
def render(tool_name, data, **opts)
|
|
45
|
+
method_name = :"render_#{tool_name}"
|
|
46
|
+
if respond_to?(method_name, true)
|
|
47
|
+
send(method_name, data, **opts)
|
|
48
|
+
else
|
|
49
|
+
render_default(data)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Default rendering — subclasses must implement.
|
|
54
|
+
#
|
|
55
|
+
# @param data [Object] The data to render
|
|
56
|
+
# @return [String] Rendered text
|
|
57
|
+
def render_default(data)
|
|
58
|
+
raise NotImplementedError, "#{self.class}#render_default must be implemented"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
# Caches ActiveRecord model names and builds a precompiled regex
|
|
5
|
+
# for scanning source code for model references.
|
|
6
|
+
#
|
|
7
|
+
# Avoids O(n*m) per-extractor iteration of ActiveRecord::Base.descendants.
|
|
8
|
+
# Invalidated per extraction run (call .reset! before a new run).
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# CodebaseIndex::ModelNameCache.model_names
|
|
12
|
+
# # => ["User", "Order", "Product", ...]
|
|
13
|
+
#
|
|
14
|
+
# CodebaseIndex::ModelNameCache.model_names_regex
|
|
15
|
+
# # => /\b(?:User|Order|Product|...)\b/
|
|
16
|
+
#
|
|
17
|
+
module ModelNameCache
|
|
18
|
+
class << self
|
|
19
|
+
# @return [Array<String>] All named AR model descendant names
|
|
20
|
+
def model_names
|
|
21
|
+
@model_names ||= compute_model_names
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# @return [Regexp] Precompiled regex matching any model name as a whole word
|
|
25
|
+
def model_names_regex
|
|
26
|
+
@model_names_regex ||= build_regex
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Clear cache (call at the start of each extraction run)
|
|
30
|
+
def reset!
|
|
31
|
+
@model_names = nil
|
|
32
|
+
@model_names_regex = nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def compute_model_names
|
|
38
|
+
return [] unless defined?(ActiveRecord::Base)
|
|
39
|
+
|
|
40
|
+
ActiveRecord::Base.descendants.filter_map(&:name).uniq
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def build_regex
|
|
44
|
+
names = model_names
|
|
45
|
+
return /(?!)/ if names.empty? # never-matching regex
|
|
46
|
+
|
|
47
|
+
/\b(?:#{names.map { |n| Regexp.escape(n) }.join('|')})\b/
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'net/http'
|
|
5
|
+
require 'uri'
|
|
6
|
+
require 'codebase_index'
|
|
7
|
+
require_relative 'rate_limiter'
|
|
8
|
+
|
|
9
|
+
module CodebaseIndex
|
|
10
|
+
module Notion
|
|
11
|
+
# Thin wrapper around the Notion REST API (v2022-06-28).
|
|
12
|
+
#
|
|
13
|
+
# Uses Net::HTTP (stdlib) for zero external dependencies. All requests are
|
|
14
|
+
# throttled through a {RateLimiter} to respect Notion's 3 req/sec limit.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# client = Client.new(api_token: "secret_...")
|
|
18
|
+
# client.create_page(database_id: "db-uuid", properties: { ... })
|
|
19
|
+
# client.query_database(database_id: "db-uuid", filter: { ... })
|
|
20
|
+
#
|
|
21
|
+
class Client # rubocop:disable Metrics/ClassLength
|
|
22
|
+
BASE_URL = 'https://api.notion.com/v1'
|
|
23
|
+
NOTION_VERSION = '2022-06-28'
|
|
24
|
+
MAX_RETRIES = 3
|
|
25
|
+
DEFAULT_TIMEOUT = 30
|
|
26
|
+
|
|
27
|
+
# @param api_token [String] Notion integration API token
|
|
28
|
+
# @param rate_limiter [RateLimiter] Rate limiter instance (default: 3 req/sec)
|
|
29
|
+
# @raise [ArgumentError] if api_token is nil or empty
|
|
30
|
+
def initialize(api_token:, rate_limiter: RateLimiter.new)
|
|
31
|
+
raise ArgumentError, 'api_token is required' if api_token.nil? || api_token.to_s.empty?
|
|
32
|
+
|
|
33
|
+
@api_token = api_token
|
|
34
|
+
@rate_limiter = rate_limiter
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Create a page in a Notion database.
|
|
38
|
+
#
|
|
39
|
+
# @param database_id [String] Target database UUID
|
|
40
|
+
# @param properties [Hash] Page properties in Notion API format
|
|
41
|
+
# @param children [Array<Hash>] Optional page content blocks
|
|
42
|
+
# @return [Hash] Created page data
|
|
43
|
+
def create_page(database_id:, properties:, children: [])
|
|
44
|
+
body = {
|
|
45
|
+
parent: { database_id: database_id },
|
|
46
|
+
properties: properties
|
|
47
|
+
}
|
|
48
|
+
body[:children] = children if children.any?
|
|
49
|
+
|
|
50
|
+
request(:post, 'pages', body)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Update an existing page's properties.
|
|
54
|
+
#
|
|
55
|
+
# @param page_id [String] Page UUID to update
|
|
56
|
+
# @param properties [Hash] Properties to update
|
|
57
|
+
# @return [Hash] Updated page data
|
|
58
|
+
def update_page(page_id:, properties:)
|
|
59
|
+
request(:patch, "pages/#{page_id}", { properties: properties })
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Query a database with optional filter and sort.
|
|
63
|
+
#
|
|
64
|
+
# @param database_id [String] Database UUID
|
|
65
|
+
# @param filter [Hash, nil] Notion filter object
|
|
66
|
+
# @param sorts [Array<Hash>, nil] Notion sort objects
|
|
67
|
+
# @return [Hash] Query results with 'results', 'has_more', 'next_cursor'
|
|
68
|
+
def query_database(database_id:, filter: nil, sorts: nil)
|
|
69
|
+
body = {}
|
|
70
|
+
body[:filter] = filter if filter
|
|
71
|
+
body[:sorts] = sorts if sorts
|
|
72
|
+
|
|
73
|
+
request(:post, "databases/#{database_id}/query", body)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Query all pages from a database, auto-paginating.
|
|
77
|
+
#
|
|
78
|
+
# @param database_id [String] Database UUID
|
|
79
|
+
# @param filter [Hash, nil] Notion filter object
|
|
80
|
+
# @return [Array<Hash>] All matching pages
|
|
81
|
+
def query_all(database_id:, filter: nil)
|
|
82
|
+
all_results = []
|
|
83
|
+
cursor = nil
|
|
84
|
+
|
|
85
|
+
loop do
|
|
86
|
+
body = {}
|
|
87
|
+
body[:filter] = filter if filter
|
|
88
|
+
body[:start_cursor] = cursor if cursor
|
|
89
|
+
|
|
90
|
+
response = request(:post, "databases/#{database_id}/query", body)
|
|
91
|
+
all_results.concat(response['results'] || [])
|
|
92
|
+
|
|
93
|
+
break unless response['has_more']
|
|
94
|
+
|
|
95
|
+
cursor = response['next_cursor']
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
all_results
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Find a page by its title property value.
|
|
102
|
+
#
|
|
103
|
+
# @param database_id [String] Database UUID
|
|
104
|
+
# @param title [String] Title text to search for
|
|
105
|
+
# @return [Hash, nil] First matching page or nil
|
|
106
|
+
def find_page_by_title(database_id:, title:)
|
|
107
|
+
response = query_database(
|
|
108
|
+
database_id: database_id,
|
|
109
|
+
filter: {
|
|
110
|
+
property: 'title',
|
|
111
|
+
title: { equals: title }
|
|
112
|
+
}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
results = response['results'] || []
|
|
116
|
+
results.first
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
# Execute an HTTP request against the Notion API.
|
|
122
|
+
#
|
|
123
|
+
# @param method [Symbol] HTTP method (:post, :patch, :get)
|
|
124
|
+
# @param path [String] API path (appended to BASE_URL)
|
|
125
|
+
# @param body [Hash, nil] Request body
|
|
126
|
+
# @return [Hash] Parsed JSON response
|
|
127
|
+
# @raise [CodebaseIndex::Error] on non-success responses (after retries for 429)
|
|
128
|
+
def request(method, path, body = nil)
|
|
129
|
+
retries = 0
|
|
130
|
+
|
|
131
|
+
loop do
|
|
132
|
+
response = execute_with_retry(method, path, body, retries)
|
|
133
|
+
|
|
134
|
+
return JSON.parse(response.body) if response.is_a?(Net::HTTPSuccess)
|
|
135
|
+
|
|
136
|
+
if response.code == '429' && retries < MAX_RETRIES
|
|
137
|
+
retries += 1
|
|
138
|
+
wait_time = (response['Retry-After'] || retries).to_f
|
|
139
|
+
sleep(wait_time)
|
|
140
|
+
next
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
raise_api_error(response)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Execute HTTP with rate limiting and network error retry.
|
|
148
|
+
#
|
|
149
|
+
# @return [Net::HTTPResponse]
|
|
150
|
+
# @raise [CodebaseIndex::Error] on persistent network failures
|
|
151
|
+
def execute_with_retry(method, path, body, _retries)
|
|
152
|
+
attempts = 0
|
|
153
|
+
begin
|
|
154
|
+
@rate_limiter.throttle { execute_http(method, path, body) }
|
|
155
|
+
rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED => e
|
|
156
|
+
attempts += 1
|
|
157
|
+
raise CodebaseIndex::Error, "Network error after #{attempts} retries: #{e.message}" if attempts >= MAX_RETRIES
|
|
158
|
+
|
|
159
|
+
sleep(2**attempts)
|
|
160
|
+
retry
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Raise a descriptive error from a non-success Notion response.
|
|
165
|
+
#
|
|
166
|
+
# @raise [CodebaseIndex::Error]
|
|
167
|
+
def raise_api_error(response)
|
|
168
|
+
parsed = begin
|
|
169
|
+
JSON.parse(response.body)
|
|
170
|
+
rescue JSON::ParserError
|
|
171
|
+
{ 'message' => "Unparseable response body: #{response.body&.slice(0, 200)}" }
|
|
172
|
+
end
|
|
173
|
+
message = parsed['message'] || 'Unknown error'
|
|
174
|
+
raise CodebaseIndex::Error, "Notion API error #{response.code}: #{message}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Perform the raw HTTP request.
|
|
178
|
+
#
|
|
179
|
+
# @param method [Symbol] HTTP method
|
|
180
|
+
# @param path [String] API path
|
|
181
|
+
# @param body [Hash, nil] Request body
|
|
182
|
+
# @return [Net::HTTPResponse]
|
|
183
|
+
def execute_http(method, path, body)
|
|
184
|
+
uri = URI("#{BASE_URL}/#{path}")
|
|
185
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
186
|
+
http.use_ssl = true
|
|
187
|
+
http.open_timeout = DEFAULT_TIMEOUT
|
|
188
|
+
http.read_timeout = DEFAULT_TIMEOUT
|
|
189
|
+
|
|
190
|
+
req = build_request(method, uri, body)
|
|
191
|
+
http.request(req)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Build an HTTP request object with headers.
|
|
195
|
+
#
|
|
196
|
+
# @param method [Symbol] HTTP method
|
|
197
|
+
# @param uri [URI] Full request URI
|
|
198
|
+
# @param body [Hash, nil] Request body
|
|
199
|
+
# @return [Net::HTTPRequest]
|
|
200
|
+
def build_request(method, uri, body)
|
|
201
|
+
req = case method
|
|
202
|
+
when :post then Net::HTTP::Post.new(uri)
|
|
203
|
+
when :patch then Net::HTTP::Patch.new(uri)
|
|
204
|
+
when :get then Net::HTTP::Get.new(uri)
|
|
205
|
+
else raise ArgumentError, "Unsupported HTTP method: #{method}"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
req['Authorization'] = "Bearer #{@api_token}"
|
|
209
|
+
req['Notion-Version'] = NOTION_VERSION
|
|
210
|
+
req['Content-Type'] = 'application/json'
|
|
211
|
+
req.body = JSON.generate(body) if body
|
|
212
|
+
|
|
213
|
+
req
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'codebase_index'
|
|
4
|
+
require_relative 'client'
|
|
5
|
+
require_relative 'mapper'
|
|
6
|
+
require_relative 'rate_limiter'
|
|
7
|
+
|
|
8
|
+
module CodebaseIndex
|
|
9
|
+
module Notion
|
|
10
|
+
# Orchestrates syncing CodebaseIndex extraction data to Notion databases.
|
|
11
|
+
#
|
|
12
|
+
# Reads extraction output from disk via IndexReader, maps model and column data
|
|
13
|
+
# to Notion page properties, and pushes via the Notion API. All syncs are idempotent —
|
|
14
|
+
# existing pages are updated, new pages are created.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# exporter = Exporter.new(index_dir: "tmp/codebase_index")
|
|
18
|
+
# stats = exporter.sync_all
|
|
19
|
+
# # => { data_models: 10, columns: 45, errors: [] }
|
|
20
|
+
#
|
|
21
|
+
class Exporter # rubocop:disable Metrics/ClassLength
|
|
22
|
+
# @param index_dir [String] Path to extraction output directory
|
|
23
|
+
# @param config [Configuration] CodebaseIndex configuration (default: global config)
|
|
24
|
+
# @param client [Client, nil] Notion API client (auto-created from config if nil)
|
|
25
|
+
# @param reader [Object, nil] IndexReader instance (auto-created from index_dir if nil)
|
|
26
|
+
# @raise [ConfigurationError] if notion_api_token is not configured
|
|
27
|
+
def initialize(index_dir:, config: CodebaseIndex.configuration, client: nil, reader: nil)
|
|
28
|
+
api_token = config.notion_api_token
|
|
29
|
+
raise ConfigurationError, 'notion_api_token is required for Notion export' unless api_token
|
|
30
|
+
|
|
31
|
+
@database_ids = config.notion_database_ids || {}
|
|
32
|
+
@client = client || Client.new(api_token: api_token)
|
|
33
|
+
@reader = reader || build_reader(index_dir)
|
|
34
|
+
@page_id_cache = {}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Sync all configured databases. Idempotent — safe to re-run.
|
|
38
|
+
#
|
|
39
|
+
# @return [Hash] { data_models: Integer, columns: Integer, errors: Array<String> }
|
|
40
|
+
def sync_all
|
|
41
|
+
model_stats = @database_ids[:data_models] ? sync_data_models : empty_stats
|
|
42
|
+
column_stats = @database_ids[:columns] && @database_ids[:data_models] ? sync_columns : empty_stats
|
|
43
|
+
|
|
44
|
+
all_errors = model_stats[:errors] + column_stats[:errors]
|
|
45
|
+
|
|
46
|
+
{
|
|
47
|
+
data_models: model_stats[:synced],
|
|
48
|
+
columns: column_stats[:synced],
|
|
49
|
+
errors: cap_errors(all_errors)
|
|
50
|
+
}
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Sync model units to the Data Models Notion database.
|
|
54
|
+
#
|
|
55
|
+
# @return [Hash] { synced: Integer, errors: Array<String> }
|
|
56
|
+
def sync_data_models
|
|
57
|
+
database_id = @database_ids[:data_models]
|
|
58
|
+
return empty_stats unless database_id
|
|
59
|
+
|
|
60
|
+
migration_dates = load_migration_dates
|
|
61
|
+
sync_units('model', database_id, 'Table Name') do |unit_data|
|
|
62
|
+
properties = Mappers::ModelMapper.new.map(unit_data)
|
|
63
|
+
enrich_with_migration_date(properties, migration_dates)
|
|
64
|
+
properties
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Sync column data to the Columns Notion database.
|
|
69
|
+
#
|
|
70
|
+
# @return [Hash] { synced: Integer, errors: Array<String> }
|
|
71
|
+
def sync_columns
|
|
72
|
+
database_id = @database_ids[:columns]
|
|
73
|
+
return empty_stats unless database_id
|
|
74
|
+
|
|
75
|
+
synced = 0
|
|
76
|
+
errors = []
|
|
77
|
+
|
|
78
|
+
each_model_unit do |entry, unit_data|
|
|
79
|
+
synced_count, unit_errors = sync_model_columns(entry, unit_data, database_id)
|
|
80
|
+
synced += synced_count
|
|
81
|
+
errors.concat(unit_errors)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
{ synced: synced, errors: errors }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
MAX_ERRORS = 100
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Sync all units of a type, yielding each for property mapping.
|
|
92
|
+
#
|
|
93
|
+
# @param type [String] Unit type to list
|
|
94
|
+
# @param database_id [String] Notion database UUID
|
|
95
|
+
# @param title_property [String] Name of the title property
|
|
96
|
+
# @yield [Hash] Unit data hash, expects Notion properties hash back
|
|
97
|
+
# @return [Hash] { synced: Integer, errors: Array<String> }
|
|
98
|
+
def sync_units(type, database_id, title_property)
|
|
99
|
+
synced = 0
|
|
100
|
+
errors = []
|
|
101
|
+
|
|
102
|
+
@reader.list_units(type: type).each do |entry|
|
|
103
|
+
unit_data = @reader.find_unit(entry['identifier'])
|
|
104
|
+
next unless unit_data
|
|
105
|
+
|
|
106
|
+
begin
|
|
107
|
+
properties = yield(unit_data)
|
|
108
|
+
title_value = extract_title_text(properties[title_property])
|
|
109
|
+
page_id = upsert_page(database_id: database_id, title_value: title_value, properties: properties)
|
|
110
|
+
@page_id_cache[entry['identifier']] = page_id
|
|
111
|
+
synced += 1
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
errors << "#{entry['identifier']}: #{e.message}"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
{ synced: synced, errors: errors }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Iterate over loaded model units.
|
|
121
|
+
#
|
|
122
|
+
# @yield [Hash, Hash] Index entry and full unit data
|
|
123
|
+
def each_model_unit
|
|
124
|
+
@reader.list_units(type: 'model').each do |entry|
|
|
125
|
+
unit_data = @reader.find_unit(entry['identifier'])
|
|
126
|
+
next unless unit_data
|
|
127
|
+
|
|
128
|
+
yield(entry, unit_data)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Sync columns for a single model.
|
|
133
|
+
#
|
|
134
|
+
# @return [Array(Integer, Array<String>)] Count of synced columns and errors
|
|
135
|
+
def sync_model_columns(entry, unit_data, database_id)
|
|
136
|
+
parent_page_id = @page_id_cache[entry['identifier']]
|
|
137
|
+
columns = unit_data.dig('metadata', 'columns') || []
|
|
138
|
+
validations = unit_data.dig('metadata', 'validations') || []
|
|
139
|
+
mapper = Mappers::ColumnMapper.new
|
|
140
|
+
synced = 0
|
|
141
|
+
errors = []
|
|
142
|
+
|
|
143
|
+
columns.each do |column|
|
|
144
|
+
properties = mapper.map(column, model_identifier: entry['identifier'],
|
|
145
|
+
validations: validations, parent_page_id: parent_page_id)
|
|
146
|
+
upsert_page(database_id: database_id, title_value: column['name'], properties: properties)
|
|
147
|
+
synced += 1
|
|
148
|
+
rescue StandardError => e
|
|
149
|
+
errors << "#{entry['identifier']}.#{column['name']}: #{e.message}"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
[synced, errors]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Enrich model properties with migration date if available.
|
|
156
|
+
#
|
|
157
|
+
# @param properties [Hash] Notion properties hash (mutated)
|
|
158
|
+
# @param migration_dates [Hash] { table_name => date_string }
|
|
159
|
+
def enrich_with_migration_date(properties, migration_dates)
|
|
160
|
+
table_name = extract_title_text(properties['Table Name'])
|
|
161
|
+
return unless migration_dates[table_name]
|
|
162
|
+
|
|
163
|
+
properties['Last Schema Change'] = { date: { start: migration_dates[table_name] } }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Load migration units and compute latest change dates per table.
|
|
167
|
+
#
|
|
168
|
+
# @return [Hash<String, String>] { table_name => latest_date }
|
|
169
|
+
def load_migration_dates
|
|
170
|
+
mapper = Mappers::MigrationMapper.new
|
|
171
|
+
units = @reader.list_units(type: 'migration').filter_map { |e| @reader.find_unit(e['identifier']) }
|
|
172
|
+
mapper.latest_changes(units)
|
|
173
|
+
rescue StandardError
|
|
174
|
+
{}
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Upsert a Notion page: find by title, update if exists, create if not.
|
|
178
|
+
#
|
|
179
|
+
# @return [String] Notion page ID
|
|
180
|
+
def upsert_page(database_id:, title_value:, properties:)
|
|
181
|
+
existing = @client.find_page_by_title(database_id: database_id, title: title_value)
|
|
182
|
+
|
|
183
|
+
if existing
|
|
184
|
+
@client.update_page(page_id: existing['id'], properties: properties)
|
|
185
|
+
existing['id']
|
|
186
|
+
else
|
|
187
|
+
result = @client.create_page(database_id: database_id, properties: properties)
|
|
188
|
+
result['id']
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# @return [Hash]
|
|
193
|
+
def empty_stats
|
|
194
|
+
{ synced: 0, errors: [] }
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Cap errors to prevent unbounded memory growth.
|
|
198
|
+
#
|
|
199
|
+
# @param errors [Array<String>]
|
|
200
|
+
# @return [Array<String>]
|
|
201
|
+
def cap_errors(errors)
|
|
202
|
+
return errors if errors.size <= MAX_ERRORS
|
|
203
|
+
|
|
204
|
+
errors.first(MAX_ERRORS) + ["... and #{errors.size - MAX_ERRORS} more errors"]
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# @return [String]
|
|
208
|
+
def extract_title_text(title_prop)
|
|
209
|
+
title_prop&.dig(:title, 0, :text, :content) || ''
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# @return [Object] IndexReader
|
|
213
|
+
def build_reader(index_dir)
|
|
214
|
+
require_relative '../mcp/index_reader'
|
|
215
|
+
CodebaseIndex::MCP::IndexReader.new(index_dir)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'mappers/model_mapper'
|
|
4
|
+
require_relative 'mappers/column_mapper'
|
|
5
|
+
require_relative 'mappers/migration_mapper'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
module Notion
|
|
9
|
+
# Dispatcher for Notion mappers. Returns the appropriate mapper for a unit type.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# mapper = Mapper.for("model")
|
|
13
|
+
# properties = mapper.map(unit_data)
|
|
14
|
+
#
|
|
15
|
+
class Mapper
|
|
16
|
+
REGISTRY = {
|
|
17
|
+
'model' => Mappers::ModelMapper,
|
|
18
|
+
'column' => Mappers::ColumnMapper,
|
|
19
|
+
'migration' => Mappers::MigrationMapper
|
|
20
|
+
}.freeze
|
|
21
|
+
|
|
22
|
+
# Get a mapper instance for a unit type.
|
|
23
|
+
#
|
|
24
|
+
# @param type [String] Unit type name (e.g. "model", "column", "migration")
|
|
25
|
+
# @return [Object, nil] Mapper instance or nil if type is not supported
|
|
26
|
+
def self.for(type)
|
|
27
|
+
klass = REGISTRY[type]
|
|
28
|
+
klass&.new
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# List all supported unit types.
|
|
32
|
+
#
|
|
33
|
+
# @return [Array<String>]
|
|
34
|
+
def self.supported_types
|
|
35
|
+
REGISTRY.keys
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Notion
|
|
5
|
+
module Mappers
|
|
6
|
+
# Maps individual column metadata to Notion page properties for the Columns database.
|
|
7
|
+
#
|
|
8
|
+
# Each column from a model's metadata becomes a separate Notion page, optionally
|
|
9
|
+
# linked to its parent Data Models page via a relation property.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# mapper = ColumnMapper.new
|
|
13
|
+
# properties = mapper.map(column, model_identifier: "User", validations: [...], parent_page_id: "page-123")
|
|
14
|
+
#
|
|
15
|
+
class ColumnMapper
|
|
16
|
+
MAX_RICH_TEXT_LENGTH = 2000
|
|
17
|
+
|
|
18
|
+
# Map a single column to Notion Columns page properties.
|
|
19
|
+
#
|
|
20
|
+
# @param column [Hash] Column hash from metadata["columns"] (name, type, null, default)
|
|
21
|
+
# @param model_identifier [String] Parent model name (for context)
|
|
22
|
+
# @param validations [Array<Hash>] Model-level validations to match against this column
|
|
23
|
+
# @param parent_page_id [String, nil] Notion page ID of the Data Models parent page
|
|
24
|
+
# @return [Hash] Notion page properties hash
|
|
25
|
+
def map(column, model_identifier: nil, validations: [], parent_page_id: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
26
|
+
properties = {
|
|
27
|
+
'Column Name' => { title: [{ text: { content: column['name'] } }] },
|
|
28
|
+
'Data Type' => { select: { name: column['type'] } },
|
|
29
|
+
'Nullable' => { checkbox: column['null'] == true },
|
|
30
|
+
'Default Value' => rich_text_property(column['default'].to_s),
|
|
31
|
+
'Validation Rules' => rich_text_property(format_validation_rules(column['name'], validations))
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
properties['Table'] = { relation: [{ id: parent_page_id }] } if parent_page_id
|
|
35
|
+
|
|
36
|
+
properties
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Find and format validations matching this column name.
|
|
42
|
+
#
|
|
43
|
+
# @param column_name [String]
|
|
44
|
+
# @param validations [Array<Hash>]
|
|
45
|
+
# @return [String]
|
|
46
|
+
def format_validation_rules(column_name, validations)
|
|
47
|
+
matched = validations.select { |v| v['attribute'] == column_name }
|
|
48
|
+
return 'None' if matched.empty?
|
|
49
|
+
|
|
50
|
+
matched.map { |v| v['type'] }.join(', ')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Build a Notion rich_text property.
|
|
54
|
+
#
|
|
55
|
+
# @param text [String]
|
|
56
|
+
# @return [Hash]
|
|
57
|
+
def rich_text_property(text)
|
|
58
|
+
content = text.to_s
|
|
59
|
+
content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
|
|
60
|
+
{ rich_text: [{ text: { content: content } }] }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Notion
|
|
5
|
+
module Mappers
|
|
6
|
+
# Extracts latest migration dates per table from migration ExtractedUnits.
|
|
7
|
+
#
|
|
8
|
+
# Used to update Data Models pages with the most recent schema change date.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# mapper = MigrationMapper.new
|
|
12
|
+
# changes = mapper.latest_changes(migration_units)
|
|
13
|
+
# # => { "users" => "2026-02-20T10:00:00Z", "posts" => "2026-01-15T09:00:00Z" }
|
|
14
|
+
#
|
|
15
|
+
class MigrationMapper
|
|
16
|
+
# Compute the latest migration date for each affected table.
|
|
17
|
+
#
|
|
18
|
+
# @param migration_units [Array<Hash>] Parsed migration ExtractedUnit JSONs
|
|
19
|
+
# @return [Hash<String, String>] Table name to latest extracted_at timestamp
|
|
20
|
+
def latest_changes(migration_units)
|
|
21
|
+
migration_units.each_with_object({}) do |unit, changes|
|
|
22
|
+
extracted_at = unit['extracted_at']
|
|
23
|
+
next unless extracted_at
|
|
24
|
+
|
|
25
|
+
tables = (unit['metadata'] || {})['tables_affected'] || []
|
|
26
|
+
tables.each { |table| update_latest(changes, table, extracted_at) }
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
# @return [void]
|
|
33
|
+
def update_latest(changes, table, extracted_at)
|
|
34
|
+
changes[table] = extracted_at if changes[table].nil? || extracted_at > changes[table]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|