codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module MCP
5
+ # Base class for rendering MCP tool responses in different output formats.
6
+ #
7
+ # Subclasses implement tool-specific render methods (render_lookup, render_search, etc.)
8
+ # and a render_default fallback. The dispatch uses convention: tool name maps to method name.
9
+ #
10
+ # @example
11
+ # renderer = ToolResponseRenderer.for(:markdown)
12
+ # renderer.render(:lookup, unit_data)
13
+ #
14
+ class ToolResponseRenderer
15
+ VALID_FORMATS = %i[claude markdown plain json].freeze
16
+
17
+ # Factory method to build the appropriate renderer for a format.
18
+ #
19
+ # @param format [Symbol] One of :claude, :markdown, :plain, :json
20
+ # @return [ToolResponseRenderer] A renderer instance
21
+ # @raise [ArgumentError] if format is unknown
22
+ def self.for(format)
23
+ require_relative 'renderers/markdown_renderer'
24
+ require_relative 'renderers/claude_renderer'
25
+ require_relative 'renderers/plain_renderer'
26
+ require_relative 'renderers/json_renderer'
27
+
28
+ case format
29
+ when :claude then Renderers::ClaudeRenderer.new
30
+ when :markdown then Renderers::MarkdownRenderer.new
31
+ when :plain then Renderers::PlainRenderer.new
32
+ when :json then Renderers::JsonRenderer.new
33
+ else raise ArgumentError, "Unknown format: #{format.inspect}. Valid: #{VALID_FORMATS.inspect}"
34
+ end
35
+ end
36
+
37
+ # Render a tool response. Dispatches to render_<tool_name> if defined,
38
+ # otherwise falls back to render_default.
39
+ #
40
+ # @param tool_name [Symbol, String] The tool name
41
+ # @param data [Object] The tool result data
42
+ # @param opts [Hash] Additional rendering options
43
+ # @return [String] Rendered response text
44
+ def render(tool_name, data, **opts)
45
+ method_name = :"render_#{tool_name}"
46
+ if respond_to?(method_name, true)
47
+ send(method_name, data, **opts)
48
+ else
49
+ render_default(data)
50
+ end
51
+ end
52
+
53
+ # Default rendering — subclasses must implement.
54
+ #
55
+ # @param data [Object] The data to render
56
+ # @return [String] Rendered text
57
+ def render_default(data)
58
+ raise NotImplementedError, "#{self.class}#render_default must be implemented"
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ # Caches ActiveRecord model names and builds a precompiled regex
5
+ # for scanning source code for model references.
6
+ #
7
+ # Avoids O(n*m) per-extractor iteration of ActiveRecord::Base.descendants.
8
+ # Invalidated per extraction run (call .reset! before a new run).
9
+ #
10
+ # @example
11
+ # CodebaseIndex::ModelNameCache.model_names
12
+ # # => ["User", "Order", "Product", ...]
13
+ #
14
+ # CodebaseIndex::ModelNameCache.model_names_regex
15
+ # # => /\b(?:User|Order|Product|...)\b/
16
+ #
17
+ module ModelNameCache
18
+ class << self
19
+ # @return [Array<String>] All named AR model descendant names
20
+ def model_names
21
+ @model_names ||= compute_model_names
22
+ end
23
+
24
+ # @return [Regexp] Precompiled regex matching any model name as a whole word
25
+ def model_names_regex
26
+ @model_names_regex ||= build_regex
27
+ end
28
+
29
+ # Clear cache (call at the start of each extraction run)
30
+ def reset!
31
+ @model_names = nil
32
+ @model_names_regex = nil
33
+ end
34
+
35
+ private
36
+
37
+ def compute_model_names
38
+ return [] unless defined?(ActiveRecord::Base)
39
+
40
+ ActiveRecord::Base.descendants.filter_map(&:name).uniq
41
+ end
42
+
43
+ def build_regex
44
+ names = model_names
45
+ return /(?!)/ if names.empty? # never-matching regex
46
+
47
+ /\b(?:#{names.map { |n| Regexp.escape(n) }.join('|')})\b/
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'uri'
6
+ require 'codebase_index'
7
+ require_relative 'rate_limiter'
8
+
9
+ module CodebaseIndex
10
+ module Notion
11
+ # Thin wrapper around the Notion REST API (v2022-06-28).
12
+ #
13
+ # Uses Net::HTTP (stdlib) for zero external dependencies. All requests are
14
+ # throttled through a {RateLimiter} to respect Notion's 3 req/sec limit.
15
+ #
16
+ # @example
17
+ # client = Client.new(api_token: "secret_...")
18
+ # client.create_page(database_id: "db-uuid", properties: { ... })
19
+ # client.query_database(database_id: "db-uuid", filter: { ... })
20
+ #
21
+ class Client # rubocop:disable Metrics/ClassLength
22
+ BASE_URL = 'https://api.notion.com/v1'
23
+ NOTION_VERSION = '2022-06-28'
24
+ MAX_RETRIES = 3
25
+ DEFAULT_TIMEOUT = 30
26
+
27
+ # @param api_token [String] Notion integration API token
28
+ # @param rate_limiter [RateLimiter] Rate limiter instance (default: 3 req/sec)
29
+ # @raise [ArgumentError] if api_token is nil or empty
30
+ def initialize(api_token:, rate_limiter: RateLimiter.new)
31
+ raise ArgumentError, 'api_token is required' if api_token.nil? || api_token.to_s.empty?
32
+
33
+ @api_token = api_token
34
+ @rate_limiter = rate_limiter
35
+ end
36
+
37
+ # Create a page in a Notion database.
38
+ #
39
+ # @param database_id [String] Target database UUID
40
+ # @param properties [Hash] Page properties in Notion API format
41
+ # @param children [Array<Hash>] Optional page content blocks
42
+ # @return [Hash] Created page data
43
+ def create_page(database_id:, properties:, children: [])
44
+ body = {
45
+ parent: { database_id: database_id },
46
+ properties: properties
47
+ }
48
+ body[:children] = children if children.any?
49
+
50
+ request(:post, 'pages', body)
51
+ end
52
+
53
+ # Update an existing page's properties.
54
+ #
55
+ # @param page_id [String] Page UUID to update
56
+ # @param properties [Hash] Properties to update
57
+ # @return [Hash] Updated page data
58
+ def update_page(page_id:, properties:)
59
+ request(:patch, "pages/#{page_id}", { properties: properties })
60
+ end
61
+
62
+ # Query a database with optional filter and sort.
63
+ #
64
+ # @param database_id [String] Database UUID
65
+ # @param filter [Hash, nil] Notion filter object
66
+ # @param sorts [Array<Hash>, nil] Notion sort objects
67
+ # @return [Hash] Query results with 'results', 'has_more', 'next_cursor'
68
+ def query_database(database_id:, filter: nil, sorts: nil)
69
+ body = {}
70
+ body[:filter] = filter if filter
71
+ body[:sorts] = sorts if sorts
72
+
73
+ request(:post, "databases/#{database_id}/query", body)
74
+ end
75
+
76
+ # Query all pages from a database, auto-paginating.
77
+ #
78
+ # @param database_id [String] Database UUID
79
+ # @param filter [Hash, nil] Notion filter object
80
+ # @return [Array<Hash>] All matching pages
81
+ def query_all(database_id:, filter: nil)
82
+ all_results = []
83
+ cursor = nil
84
+
85
+ loop do
86
+ body = {}
87
+ body[:filter] = filter if filter
88
+ body[:start_cursor] = cursor if cursor
89
+
90
+ response = request(:post, "databases/#{database_id}/query", body)
91
+ all_results.concat(response['results'] || [])
92
+
93
+ break unless response['has_more']
94
+
95
+ cursor = response['next_cursor']
96
+ end
97
+
98
+ all_results
99
+ end
100
+
101
+ # Find a page by its title property value.
102
+ #
103
+ # @param database_id [String] Database UUID
104
+ # @param title [String] Title text to search for
105
+ # @return [Hash, nil] First matching page or nil
106
+ def find_page_by_title(database_id:, title:)
107
+ response = query_database(
108
+ database_id: database_id,
109
+ filter: {
110
+ property: 'title',
111
+ title: { equals: title }
112
+ }
113
+ )
114
+
115
+ results = response['results'] || []
116
+ results.first
117
+ end
118
+
119
+ private
120
+
121
+ # Execute an HTTP request against the Notion API.
122
+ #
123
+ # @param method [Symbol] HTTP method (:post, :patch, :get)
124
+ # @param path [String] API path (appended to BASE_URL)
125
+ # @param body [Hash, nil] Request body
126
+ # @return [Hash] Parsed JSON response
127
+ # @raise [CodebaseIndex::Error] on non-success responses (after retries for 429)
128
+ def request(method, path, body = nil)
129
+ retries = 0
130
+
131
+ loop do
132
+ response = execute_with_retry(method, path, body, retries)
133
+
134
+ return JSON.parse(response.body) if response.is_a?(Net::HTTPSuccess)
135
+
136
+ if response.code == '429' && retries < MAX_RETRIES
137
+ retries += 1
138
+ wait_time = (response['Retry-After'] || retries).to_f
139
+ sleep(wait_time)
140
+ next
141
+ end
142
+
143
+ raise_api_error(response)
144
+ end
145
+ end
146
+
147
+ # Execute HTTP with rate limiting and network error retry.
148
+ #
149
+ # @return [Net::HTTPResponse]
150
+ # @raise [CodebaseIndex::Error] on persistent network failures
151
+ def execute_with_retry(method, path, body, _retries)
152
+ attempts = 0
153
+ begin
154
+ @rate_limiter.throttle { execute_http(method, path, body) }
155
+ rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED => e
156
+ attempts += 1
157
+ raise CodebaseIndex::Error, "Network error after #{attempts} retries: #{e.message}" if attempts >= MAX_RETRIES
158
+
159
+ sleep(2**attempts)
160
+ retry
161
+ end
162
+ end
163
+
164
+ # Raise a descriptive error from a non-success Notion response.
165
+ #
166
+ # @raise [CodebaseIndex::Error]
167
+ def raise_api_error(response)
168
+ parsed = begin
169
+ JSON.parse(response.body)
170
+ rescue JSON::ParserError
171
+ { 'message' => "Unparseable response body: #{response.body&.slice(0, 200)}" }
172
+ end
173
+ message = parsed['message'] || 'Unknown error'
174
+ raise CodebaseIndex::Error, "Notion API error #{response.code}: #{message}"
175
+ end
176
+
177
+ # Perform the raw HTTP request.
178
+ #
179
+ # @param method [Symbol] HTTP method
180
+ # @param path [String] API path
181
+ # @param body [Hash, nil] Request body
182
+ # @return [Net::HTTPResponse]
183
+ def execute_http(method, path, body)
184
+ uri = URI("#{BASE_URL}/#{path}")
185
+ http = Net::HTTP.new(uri.host, uri.port)
186
+ http.use_ssl = true
187
+ http.open_timeout = DEFAULT_TIMEOUT
188
+ http.read_timeout = DEFAULT_TIMEOUT
189
+
190
+ req = build_request(method, uri, body)
191
+ http.request(req)
192
+ end
193
+
194
+ # Build an HTTP request object with headers.
195
+ #
196
+ # @param method [Symbol] HTTP method
197
+ # @param uri [URI] Full request URI
198
+ # @param body [Hash, nil] Request body
199
+ # @return [Net::HTTPRequest]
200
+ def build_request(method, uri, body)
201
+ req = case method
202
+ when :post then Net::HTTP::Post.new(uri)
203
+ when :patch then Net::HTTP::Patch.new(uri)
204
+ when :get then Net::HTTP::Get.new(uri)
205
+ else raise ArgumentError, "Unsupported HTTP method: #{method}"
206
+ end
207
+
208
+ req['Authorization'] = "Bearer #{@api_token}"
209
+ req['Notion-Version'] = NOTION_VERSION
210
+ req['Content-Type'] = 'application/json'
211
+ req.body = JSON.generate(body) if body
212
+
213
+ req
214
+ end
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'codebase_index'
4
+ require_relative 'client'
5
+ require_relative 'mapper'
6
+ require_relative 'rate_limiter'
7
+
8
+ module CodebaseIndex
9
+ module Notion
10
+ # Orchestrates syncing CodebaseIndex extraction data to Notion databases.
11
+ #
12
+ # Reads extraction output from disk via IndexReader, maps model and column data
13
+ # to Notion page properties, and pushes via the Notion API. All syncs are idempotent —
14
+ # existing pages are updated, new pages are created.
15
+ #
16
+ # @example
17
+ # exporter = Exporter.new(index_dir: "tmp/codebase_index")
18
+ # stats = exporter.sync_all
19
+ # # => { data_models: 10, columns: 45, errors: [] }
20
+ #
21
+ class Exporter # rubocop:disable Metrics/ClassLength
22
+ # @param index_dir [String] Path to extraction output directory
23
+ # @param config [Configuration] CodebaseIndex configuration (default: global config)
24
+ # @param client [Client, nil] Notion API client (auto-created from config if nil)
25
+ # @param reader [Object, nil] IndexReader instance (auto-created from index_dir if nil)
26
+ # @raise [ConfigurationError] if notion_api_token is not configured
27
+ def initialize(index_dir:, config: CodebaseIndex.configuration, client: nil, reader: nil)
28
+ api_token = config.notion_api_token
29
+ raise ConfigurationError, 'notion_api_token is required for Notion export' unless api_token
30
+
31
+ @database_ids = config.notion_database_ids || {}
32
+ @client = client || Client.new(api_token: api_token)
33
+ @reader = reader || build_reader(index_dir)
34
+ @page_id_cache = {}
35
+ end
36
+
37
+ # Sync all configured databases. Idempotent — safe to re-run.
38
+ #
39
+ # @return [Hash] { data_models: Integer, columns: Integer, errors: Array<String> }
40
+ def sync_all
41
+ model_stats = @database_ids[:data_models] ? sync_data_models : empty_stats
42
+ column_stats = @database_ids[:columns] && @database_ids[:data_models] ? sync_columns : empty_stats
43
+
44
+ all_errors = model_stats[:errors] + column_stats[:errors]
45
+
46
+ {
47
+ data_models: model_stats[:synced],
48
+ columns: column_stats[:synced],
49
+ errors: cap_errors(all_errors)
50
+ }
51
+ end
52
+
53
+ # Sync model units to the Data Models Notion database.
54
+ #
55
+ # @return [Hash] { synced: Integer, errors: Array<String> }
56
+ def sync_data_models
57
+ database_id = @database_ids[:data_models]
58
+ return empty_stats unless database_id
59
+
60
+ migration_dates = load_migration_dates
61
+ sync_units('model', database_id, 'Table Name') do |unit_data|
62
+ properties = Mappers::ModelMapper.new.map(unit_data)
63
+ enrich_with_migration_date(properties, migration_dates)
64
+ properties
65
+ end
66
+ end
67
+
68
+ # Sync column data to the Columns Notion database.
69
+ #
70
+ # @return [Hash] { synced: Integer, errors: Array<String> }
71
+ def sync_columns
72
+ database_id = @database_ids[:columns]
73
+ return empty_stats unless database_id
74
+
75
+ synced = 0
76
+ errors = []
77
+
78
+ each_model_unit do |entry, unit_data|
79
+ synced_count, unit_errors = sync_model_columns(entry, unit_data, database_id)
80
+ synced += synced_count
81
+ errors.concat(unit_errors)
82
+ end
83
+
84
+ { synced: synced, errors: errors }
85
+ end
86
+
87
+ MAX_ERRORS = 100
88
+
89
+ private
90
+
91
+ # Sync all units of a type, yielding each for property mapping.
92
+ #
93
+ # @param type [String] Unit type to list
94
+ # @param database_id [String] Notion database UUID
95
+ # @param title_property [String] Name of the title property
96
+ # @yield [Hash] Unit data hash, expects Notion properties hash back
97
+ # @return [Hash] { synced: Integer, errors: Array<String> }
98
+ def sync_units(type, database_id, title_property)
99
+ synced = 0
100
+ errors = []
101
+
102
+ @reader.list_units(type: type).each do |entry|
103
+ unit_data = @reader.find_unit(entry['identifier'])
104
+ next unless unit_data
105
+
106
+ begin
107
+ properties = yield(unit_data)
108
+ title_value = extract_title_text(properties[title_property])
109
+ page_id = upsert_page(database_id: database_id, title_value: title_value, properties: properties)
110
+ @page_id_cache[entry['identifier']] = page_id
111
+ synced += 1
112
+ rescue StandardError => e
113
+ errors << "#{entry['identifier']}: #{e.message}"
114
+ end
115
+ end
116
+
117
+ { synced: synced, errors: errors }
118
+ end
119
+
120
+ # Iterate over loaded model units.
121
+ #
122
+ # @yield [Hash, Hash] Index entry and full unit data
123
+ def each_model_unit
124
+ @reader.list_units(type: 'model').each do |entry|
125
+ unit_data = @reader.find_unit(entry['identifier'])
126
+ next unless unit_data
127
+
128
+ yield(entry, unit_data)
129
+ end
130
+ end
131
+
132
+ # Sync columns for a single model.
133
+ #
134
+ # @return [Array(Integer, Array<String>)] Count of synced columns and errors
135
+ def sync_model_columns(entry, unit_data, database_id)
136
+ parent_page_id = @page_id_cache[entry['identifier']]
137
+ columns = unit_data.dig('metadata', 'columns') || []
138
+ validations = unit_data.dig('metadata', 'validations') || []
139
+ mapper = Mappers::ColumnMapper.new
140
+ synced = 0
141
+ errors = []
142
+
143
+ columns.each do |column|
144
+ properties = mapper.map(column, model_identifier: entry['identifier'],
145
+ validations: validations, parent_page_id: parent_page_id)
146
+ upsert_page(database_id: database_id, title_value: column['name'], properties: properties)
147
+ synced += 1
148
+ rescue StandardError => e
149
+ errors << "#{entry['identifier']}.#{column['name']}: #{e.message}"
150
+ end
151
+
152
+ [synced, errors]
153
+ end
154
+
155
+ # Enrich model properties with migration date if available.
156
+ #
157
+ # @param properties [Hash] Notion properties hash (mutated)
158
+ # @param migration_dates [Hash] { table_name => date_string }
159
+ def enrich_with_migration_date(properties, migration_dates)
160
+ table_name = extract_title_text(properties['Table Name'])
161
+ return unless migration_dates[table_name]
162
+
163
+ properties['Last Schema Change'] = { date: { start: migration_dates[table_name] } }
164
+ end
165
+
166
+ # Load migration units and compute latest change dates per table.
167
+ #
168
+ # @return [Hash<String, String>] { table_name => latest_date }
169
+ def load_migration_dates
170
+ mapper = Mappers::MigrationMapper.new
171
+ units = @reader.list_units(type: 'migration').filter_map { |e| @reader.find_unit(e['identifier']) }
172
+ mapper.latest_changes(units)
173
+ rescue StandardError
174
+ {}
175
+ end
176
+
177
+ # Upsert a Notion page: find by title, update if exists, create if not.
178
+ #
179
+ # @return [String] Notion page ID
180
+ def upsert_page(database_id:, title_value:, properties:)
181
+ existing = @client.find_page_by_title(database_id: database_id, title: title_value)
182
+
183
+ if existing
184
+ @client.update_page(page_id: existing['id'], properties: properties)
185
+ existing['id']
186
+ else
187
+ result = @client.create_page(database_id: database_id, properties: properties)
188
+ result['id']
189
+ end
190
+ end
191
+
192
+ # @return [Hash]
193
+ def empty_stats
194
+ { synced: 0, errors: [] }
195
+ end
196
+
197
+ # Cap errors to prevent unbounded memory growth.
198
+ #
199
+ # @param errors [Array<String>]
200
+ # @return [Array<String>]
201
+ def cap_errors(errors)
202
+ return errors if errors.size <= MAX_ERRORS
203
+
204
+ errors.first(MAX_ERRORS) + ["... and #{errors.size - MAX_ERRORS} more errors"]
205
+ end
206
+
207
+ # @return [String]
208
+ def extract_title_text(title_prop)
209
+ title_prop&.dig(:title, 0, :text, :content) || ''
210
+ end
211
+
212
+ # @return [Object] IndexReader
213
+ def build_reader(index_dir)
214
+ require_relative '../mcp/index_reader'
215
+ CodebaseIndex::MCP::IndexReader.new(index_dir)
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'mappers/model_mapper'
4
+ require_relative 'mappers/column_mapper'
5
+ require_relative 'mappers/migration_mapper'
6
+
7
+ module CodebaseIndex
8
+ module Notion
9
+ # Dispatcher for Notion mappers. Returns the appropriate mapper for a unit type.
10
+ #
11
+ # @example
12
+ # mapper = Mapper.for("model")
13
+ # properties = mapper.map(unit_data)
14
+ #
15
+ class Mapper
16
+ REGISTRY = {
17
+ 'model' => Mappers::ModelMapper,
18
+ 'column' => Mappers::ColumnMapper,
19
+ 'migration' => Mappers::MigrationMapper
20
+ }.freeze
21
+
22
+ # Get a mapper instance for a unit type.
23
+ #
24
+ # @param type [String] Unit type name (e.g. "model", "column", "migration")
25
+ # @return [Object, nil] Mapper instance or nil if type is not supported
26
+ def self.for(type)
27
+ klass = REGISTRY[type]
28
+ klass&.new
29
+ end
30
+
31
+ # List all supported unit types.
32
+ #
33
+ # @return [Array<String>]
34
+ def self.supported_types
35
+ REGISTRY.keys
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Notion
5
+ module Mappers
6
+ # Maps individual column metadata to Notion page properties for the Columns database.
7
+ #
8
+ # Each column from a model's metadata becomes a separate Notion page, optionally
9
+ # linked to its parent Data Models page via a relation property.
10
+ #
11
+ # @example
12
+ # mapper = ColumnMapper.new
13
+ # properties = mapper.map(column, model_identifier: "User", validations: [...], parent_page_id: "page-123")
14
+ #
15
+ class ColumnMapper
16
+ MAX_RICH_TEXT_LENGTH = 2000
17
+
18
+ # Map a single column to Notion Columns page properties.
19
+ #
20
+ # @param column [Hash] Column hash from metadata["columns"] (name, type, null, default)
21
+ # @param model_identifier [String] Parent model name (for context)
22
+ # @param validations [Array<Hash>] Model-level validations to match against this column
23
+ # @param parent_page_id [String, nil] Notion page ID of the Data Models parent page
24
+ # @return [Hash] Notion page properties hash
25
+ def map(column, model_identifier: nil, validations: [], parent_page_id: nil) # rubocop:disable Lint/UnusedMethodArgument
26
+ properties = {
27
+ 'Column Name' => { title: [{ text: { content: column['name'] } }] },
28
+ 'Data Type' => { select: { name: column['type'] } },
29
+ 'Nullable' => { checkbox: column['null'] == true },
30
+ 'Default Value' => rich_text_property(column['default'].to_s),
31
+ 'Validation Rules' => rich_text_property(format_validation_rules(column['name'], validations))
32
+ }
33
+
34
+ properties['Table'] = { relation: [{ id: parent_page_id }] } if parent_page_id
35
+
36
+ properties
37
+ end
38
+
39
+ private
40
+
41
+ # Find and format validations matching this column name.
42
+ #
43
+ # @param column_name [String]
44
+ # @param validations [Array<Hash>]
45
+ # @return [String]
46
+ def format_validation_rules(column_name, validations)
47
+ matched = validations.select { |v| v['attribute'] == column_name }
48
+ return 'None' if matched.empty?
49
+
50
+ matched.map { |v| v['type'] }.join(', ')
51
+ end
52
+
53
+ # Build a Notion rich_text property.
54
+ #
55
+ # @param text [String]
56
+ # @return [Hash]
57
+ def rich_text_property(text)
58
+ content = text.to_s
59
+ content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
60
+ { rich_text: [{ text: { content: content } }] }
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Notion
5
+ module Mappers
6
+ # Extracts latest migration dates per table from migration ExtractedUnits.
7
+ #
8
+ # Used to update Data Models pages with the most recent schema change date.
9
+ #
10
+ # @example
11
+ # mapper = MigrationMapper.new
12
+ # changes = mapper.latest_changes(migration_units)
13
+ # # => { "users" => "2026-02-20T10:00:00Z", "posts" => "2026-01-15T09:00:00Z" }
14
+ #
15
+ class MigrationMapper
16
+ # Compute the latest migration date for each affected table.
17
+ #
18
+ # @param migration_units [Array<Hash>] Parsed migration ExtractedUnit JSONs
19
+ # @return [Hash<String, String>] Table name to latest extracted_at timestamp
20
+ def latest_changes(migration_units)
21
+ migration_units.each_with_object({}) do |unit, changes|
22
+ extracted_at = unit['extracted_at']
23
+ next unless extracted_at
24
+
25
+ tables = (unit['metadata'] || {})['tables_affected'] || []
26
+ tables.each { |table| update_latest(changes, table, extracted_at) }
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # @return [void]
33
+ def update_latest(changes, table, extracted_at)
34
+ changes[table] = extracted_at if changes[table].nil? || extracted_at > changes[table]
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end