codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,223 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'shared_utility_methods'
4
- require_relative 'shared_dependency_scanner'
5
-
6
- module CodebaseIndex
7
- module Extractors
8
- # PunditExtractor handles Pundit authorization policy extraction.
9
- #
10
- # Specifically targets Pundit convention: classes in `app/policies/`
11
- # that inherit from ApplicationPolicy or follow Pundit patterns
12
- # (user/record attrs, action? methods). This is distinct from the
13
- # generic PolicyExtractor which handles domain eligibility policies.
14
- #
15
- # @example
16
- # extractor = PunditExtractor.new
17
- # units = extractor.extract_all
18
- # post_policy = units.find { |u| u.identifier == "PostPolicy" }
19
- #
20
- class PunditExtractor
21
- include SharedUtilityMethods
22
- include SharedDependencyScanner
23
-
24
- # Directories to scan for Pundit policies
25
- PUNDIT_DIRECTORIES = %w[
26
- app/policies
27
- ].freeze
28
-
29
- # Standard Pundit action methods
30
- PUNDIT_ACTIONS = %w[index? show? create? new? update? edit? destroy?].freeze
31
-
32
- def initialize
33
- @directories = PUNDIT_DIRECTORIES.map { |d| Rails.root.join(d) }
34
- .select(&:directory?)
35
- end
36
-
37
- # Extract all Pundit policy classes
38
- #
39
- # @return [Array<ExtractedUnit>] List of Pundit policy units
40
- def extract_all
41
- @directories.flat_map do |dir|
42
- Dir[dir.join('**/*.rb')].filter_map do |file|
43
- extract_pundit_file(file)
44
- end
45
- end
46
- end
47
-
48
- # Extract a single Pundit policy file
49
- #
50
- # @param file_path [String] Path to the policy file
51
- # @return [ExtractedUnit, nil] The extracted unit or nil if not a Pundit policy
52
- def extract_pundit_file(file_path)
53
- source = File.read(file_path)
54
- class_name = extract_class_name(file_path, source)
55
-
56
- return nil unless class_name
57
- return nil unless pundit_policy?(source)
58
-
59
- unit = ExtractedUnit.new(
60
- type: :pundit_policy,
61
- identifier: class_name,
62
- file_path: file_path
63
- )
64
-
65
- unit.namespace = extract_namespace(class_name)
66
- unit.source_code = annotate_source(source, class_name)
67
- unit.metadata = extract_metadata(source, class_name)
68
- unit.dependencies = extract_dependencies(source, class_name)
69
-
70
- unit
71
- rescue StandardError => e
72
- Rails.logger.error("Failed to extract Pundit policy #{file_path}: #{e.message}")
73
- nil
74
- end
75
-
76
- private
77
-
78
- # ──────────────────────────────────────────────────────────────────────
79
- # Class Discovery
80
- # ──────────────────────────────────────────────────────────────────────
81
-
82
- # Extract class name from source or infer from file path.
83
- #
84
- # @param file_path [String]
85
- # @param source [String]
86
- # @return [String, nil]
87
- def extract_class_name(file_path, source)
88
- return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
89
-
90
- file_path
91
- .sub("#{Rails.root}/", '')
92
- .sub(%r{^app/policies/}, '')
93
- .sub('.rb', '')
94
- .split('/')
95
- .map { |s| s.split('_').map(&:capitalize).join }
96
- .join('::')
97
- end
98
-
99
- # Detect whether this is a Pundit policy.
100
- #
101
- # @param source [String] Ruby source code
102
- # @return [Boolean]
103
- def pundit_policy?(source)
104
- source.match?(/< ApplicationPolicy/) ||
105
- (source.match?(/attr_reader\s+:user/) && source.match?(/attr_reader.*:record/)) ||
106
- (source.match?(/def\s+initialize\s*\(\s*user\s*,/) && source.match?(/def\s+\w+\?/))
107
- end
108
-
109
- # ──────────────────────────────────────────────────────────────────────
110
- # Source Annotation
111
- # ──────────────────────────────────────────────────────────────────────
112
-
113
- # @param source [String]
114
- # @param class_name [String]
115
- # @return [String]
116
- def annotate_source(source, class_name)
117
- model = infer_model(class_name)
118
- actions = detect_authorization_actions(source)
119
-
120
- <<~ANNOTATION
121
- # ╔═══════════════════════════════════════════════════════════════════════╗
122
- # ║ Pundit Policy: #{class_name.ljust(53)}║
123
- # ║ Model: #{model.to_s.ljust(61)}║
124
- # ║ Actions: #{actions.join(', ').ljust(59)}║
125
- # ╚═══════════════════════════════════════════════════════════════════════╝
126
-
127
- #{source}
128
- ANNOTATION
129
- end
130
-
131
- # ──────────────────────────────────────────────────────────────────────
132
- # Metadata Extraction
133
- # ──────────────────────────────────────────────────────────────────────
134
-
135
- # @param source [String]
136
- # @param class_name [String]
137
- # @return [Hash]
138
- def extract_metadata(source, class_name)
139
- actions = detect_authorization_actions(source)
140
- {
141
- model: infer_model(class_name),
142
- authorization_actions: actions,
143
- standard_actions: actions & PUNDIT_ACTIONS,
144
- custom_actions: actions - PUNDIT_ACTIONS,
145
- has_scope_class: source.match?(/class\s+Scope\b/) || false,
146
- inherits_application_policy: source.match?(/< ApplicationPolicy/) || false,
147
- public_methods: extract_public_methods(source),
148
- class_methods: extract_class_methods(source),
149
- loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') },
150
- method_count: source.scan(/def\s+(?:self\.)?\w+/).size
151
- }
152
- end
153
-
154
- # Detect authorization action methods (public methods ending in ?).
155
- #
156
- # @param source [String]
157
- # @return [Array<String>]
158
- def detect_authorization_actions(source)
159
- methods = []
160
- in_private = false
161
- in_protected = false
162
- in_scope_class = false
163
- scope_depth = 0
164
-
165
- source.each_line do |line|
166
- stripped = line.strip
167
-
168
- # Track Scope inner class
169
- if stripped =~ /class\s+Scope\b/
170
- in_scope_class = true
171
- scope_depth = 0
172
- end
173
- if in_scope_class
174
- scope_depth += stripped.scan(/\b(class|module|do)\b/).size
175
- scope_depth -= stripped.scan(/\bend\b/).size
176
- if scope_depth <= 0
177
- in_scope_class = false
178
- next
179
- end
180
- next
181
- end
182
-
183
- in_private = true if stripped == 'private'
184
- in_protected = true if stripped == 'protected'
185
- in_private = false if stripped == 'public'
186
- in_protected = false if stripped == 'public'
187
-
188
- next if in_private || in_protected
189
-
190
- methods << ::Regexp.last_match(1) if stripped =~ /def\s+(\w+\?)/
191
- end
192
-
193
- methods.uniq
194
- end
195
-
196
- # Infer the model name from the policy class name.
197
- #
198
- # @param class_name [String]
199
- # @return [String]
200
- def infer_model(class_name)
201
- stripped = class_name.split('::').last
202
- stripped.sub(/Policy\z/, '')
203
- end
204
-
205
- # ──────────────────────────────────────────────────────────────────────
206
- # Dependency Extraction
207
- # ──────────────────────────────────────────────────────────────────────
208
-
209
- # @param source [String]
210
- # @param class_name [String]
211
- # @return [Array<Hash>]
212
- def extract_dependencies(source, class_name)
213
- model = infer_model(class_name)
214
- deps = [{ type: :model, target: model, via: :authorization }]
215
-
216
- deps.concat(scan_model_dependencies(source))
217
- deps.concat(scan_service_dependencies(source))
218
-
219
- deps.uniq { |d| [d[:type], d[:target]] }
220
- end
221
- end
222
- end
223
- end
@@ -1,473 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Extractors
5
- # RailsSourceExtractor indexes selected parts of the Rails framework
6
- # and key gems for version-specific accuracy.
7
- #
8
- # This enables queries like "what options does has_many support" or
9
- # "how does Rails implement callbacks" to return accurate answers
10
- # for the exact versions in use.
11
- #
12
- # Only high-value, frequently-referenced code is indexed to avoid bloat.
13
- #
14
- # @example
15
- # extractor = RailsSourceExtractor.new
16
- # units = extractor.extract_all
17
- # # Returns units for ActiveRecord associations, callbacks, etc.
18
- #
19
- class RailsSourceExtractor
20
- # High-value Rails paths to index
21
- RAILS_PATHS = {
22
- 'activerecord' => [
23
- 'lib/active_record/associations',
24
- 'lib/active_record/callbacks.rb',
25
- 'lib/active_record/validations',
26
- 'lib/active_record/relation',
27
- 'lib/active_record/querying.rb',
28
- 'lib/active_record/scoping',
29
- 'lib/active_record/transactions.rb',
30
- 'lib/active_record/persistence.rb',
31
- 'lib/active_record/attribute_methods',
32
- 'lib/active_record/enum.rb',
33
- 'lib/active_record/store.rb',
34
- 'lib/active_record/nested_attributes.rb'
35
- ],
36
- 'actionpack' => [
37
- 'lib/action_controller/metal',
38
- 'lib/action_controller/callbacks.rb',
39
- 'lib/abstract_controller/callbacks.rb',
40
- 'lib/action_controller/rendering.rb',
41
- 'lib/action_controller/redirecting.rb',
42
- 'lib/action_controller/params_wrapper.rb'
43
- ],
44
- 'activesupport' => [
45
- 'lib/active_support/callbacks.rb',
46
- 'lib/active_support/concern.rb',
47
- 'lib/active_support/configurable.rb',
48
- 'lib/active_support/core_ext/module/delegation.rb',
49
- 'lib/active_support/core_ext/object/inclusion.rb'
50
- ],
51
- 'activejob' => [
52
- 'lib/active_job/callbacks.rb',
53
- 'lib/active_job/enqueuing.rb',
54
- 'lib/active_job/execution.rb',
55
- 'lib/active_job/exceptions.rb'
56
- ],
57
- 'actionmailer' => [
58
- 'lib/action_mailer/base.rb',
59
- 'lib/action_mailer/delivery_methods.rb',
60
- 'lib/action_mailer/callbacks.rb'
61
- ]
62
- }.freeze
63
-
64
- # Common gems worth indexing (configure based on project)
65
- GEM_CONFIGS = {
66
- 'devise' => {
67
- paths: ['lib/devise/models', 'lib/devise/controllers', 'lib/devise/strategies'],
68
- priority: :high
69
- },
70
- 'pundit' => {
71
- paths: ['lib/pundit.rb', 'lib/pundit'],
72
- priority: :high
73
- },
74
- 'sidekiq' => {
75
- paths: ['lib/sidekiq/worker.rb', 'lib/sidekiq/job.rb', 'lib/sidekiq/client.rb'],
76
- priority: :high
77
- },
78
- 'activeadmin' => {
79
- paths: ['lib/active_admin/dsl.rb', 'lib/active_admin/resource_dsl.rb'],
80
- priority: :medium
81
- },
82
- 'cancancan' => {
83
- paths: ['lib/cancan/ability.rb', 'lib/cancan/controller_additions.rb'],
84
- priority: :high
85
- },
86
- 'friendly_id' => {
87
- paths: ['lib/friendly_id'],
88
- priority: :medium
89
- },
90
- 'paper_trail' => {
91
- paths: ['lib/paper_trail/has_paper_trail.rb', 'lib/paper_trail/model_config.rb'],
92
- priority: :medium
93
- },
94
- 'aasm' => {
95
- paths: ['lib/aasm'],
96
- priority: :high
97
- },
98
- 'phlex' => {
99
- paths: ['lib/phlex'],
100
- priority: :high
101
- },
102
- 'dry-monads' => {
103
- paths: ['lib/dry/monads'],
104
- priority: :medium
105
- }
106
- }.freeze
107
-
108
- def initialize
109
- @rails_version = Rails.version
110
- @gem_versions = {}
111
- end
112
-
113
- # Extract Rails framework and gem source
114
- #
115
- # @return [Array<ExtractedUnit>] List of framework/gem source units
116
- def extract_all
117
- units = []
118
-
119
- # Extract Rails framework sources
120
- units.concat(extract_rails_sources)
121
-
122
- # Extract configured gem sources
123
- units.concat(extract_gem_sources)
124
-
125
- units.compact
126
- end
127
-
128
- # ──────────────────────────────────────────────────────────────────────
129
- # Rails Framework Extraction
130
- # ──────────────────────────────────────────────────────────────────────
131
-
132
- # Extract only Rails framework sources
133
- def extract_rails_sources
134
- units = []
135
-
136
- RAILS_PATHS.each do |gem_name, paths|
137
- gem_path = find_gem_path(gem_name)
138
- next unless gem_path
139
-
140
- paths.each do |relative_path|
141
- full_path = gem_path.join(relative_path)
142
-
143
- if full_path.directory?
144
- Dir[full_path.join('**/*.rb')].each do |file|
145
- unit = extract_framework_file(gem_name, file)
146
- units << unit if unit
147
- end
148
- elsif full_path.exist?
149
- unit = extract_framework_file(gem_name, full_path.to_s)
150
- units << unit if unit
151
- end
152
- end
153
- end
154
-
155
- units
156
- end
157
-
158
- # ──────────────────────────────────────────────────────────────────────
159
- # Gem Source Extraction
160
- # ──────────────────────────────────────────────────────────────────────
161
-
162
- # Extract gem sources
163
- def extract_gem_sources
164
- units = []
165
-
166
- GEM_CONFIGS.each do |gem_name, config|
167
- gem_path = find_gem_path(gem_name)
168
- next unless gem_path
169
-
170
- @gem_versions[gem_name] = gem_version(gem_name)
171
-
172
- config[:paths].each do |relative_path|
173
- full_path = gem_path.join(relative_path)
174
-
175
- if full_path.directory?
176
- Dir[full_path.join('**/*.rb')].each do |file|
177
- unit = extract_gem_file(gem_name, config[:priority], file)
178
- units << unit if unit
179
- end
180
- elsif full_path.exist?
181
- unit = extract_gem_file(gem_name, config[:priority], full_path.to_s)
182
- units << unit if unit
183
- end
184
- end
185
- end
186
-
187
- units
188
- end
189
-
190
- private
191
-
192
- # ──────────────────────────────────────────────────────────────────────
193
- # Gem Discovery
194
- # ──────────────────────────────────────────────────────────────────────
195
-
196
- def find_gem_path(gem_name)
197
- spec = Gem::Specification.find_by_name(gem_name)
198
- Pathname.new(spec.gem_dir)
199
- rescue Gem::MissingSpecError
200
- nil
201
- end
202
-
203
- def gem_version(gem_name)
204
- Gem::Specification.find_by_name(gem_name).version.to_s
205
- rescue StandardError
206
- 'unknown'
207
- end
208
-
209
- # ──────────────────────────────────────────────────────────────────────
210
- # File Extraction
211
- # ──────────────────────────────────────────────────────────────────────
212
-
213
- def extract_framework_file(component, file_path)
214
- source = File.read(file_path)
215
- relative = file_path.sub(%r{.*/gems/[^/]+/}, '')
216
-
217
- # Create a meaningful identifier
218
- identifier = "rails/#{component}/#{relative}"
219
-
220
- unit = ExtractedUnit.new(
221
- type: :rails_source,
222
- identifier: identifier,
223
- file_path: file_path
224
- )
225
-
226
- unit.source_code = annotate_framework_source(source, component, relative)
227
-
228
- public_methods = extract_public_api(source)
229
- dsl_methods = extract_dsl_methods(source)
230
-
231
- unit.metadata = {
232
- rails_version: @rails_version,
233
- component: component,
234
- relative_path: relative,
235
-
236
- # API extraction for retrieval
237
- defined_modules: extract_module_names(source),
238
- defined_classes: extract_class_names(source),
239
- public_methods: public_methods,
240
- dsl_methods: dsl_methods,
241
-
242
- # Common options/configurations
243
- option_definitions: extract_option_definitions(source),
244
-
245
- # For retrieval ranking
246
- is_public_api: public_api_file?(relative),
247
- importance: rate_importance(relative, source, public_methods: public_methods, dsl_methods: dsl_methods)
248
- }
249
-
250
- unit
251
- rescue StandardError => e
252
- Rails.logger.error("Failed to extract Rails source #{file_path}: #{e.message}")
253
- nil
254
- end
255
-
256
- def extract_gem_file(gem_name, priority, file_path)
257
- source = File.read(file_path)
258
- relative = file_path.sub(%r{.*/gems/[^/]+/}, '')
259
-
260
- identifier = "gems/#{gem_name}/#{relative}"
261
-
262
- unit = ExtractedUnit.new(
263
- type: :gem_source,
264
- identifier: identifier,
265
- file_path: file_path
266
- )
267
-
268
- unit.source_code = annotate_gem_source(source, gem_name, relative)
269
- unit.metadata = {
270
- gem_name: gem_name,
271
- gem_version: @gem_versions[gem_name],
272
- relative_path: relative,
273
- priority: priority,
274
-
275
- defined_modules: extract_module_names(source),
276
- defined_classes: extract_class_names(source),
277
- public_methods: extract_public_api(source),
278
-
279
- # Gem-specific patterns
280
- mixins_provided: extract_mixins(source),
281
- configuration_options: extract_configuration(source)
282
- }
283
-
284
- unit
285
- rescue StandardError => e
286
- Rails.logger.error("Failed to extract gem source #{file_path}: #{e.message}")
287
- nil
288
- end
289
-
290
- # ──────────────────────────────────────────────────────────────────────
291
- # Source Annotation
292
- # ──────────────────────────────────────────────────────────────────────
293
-
294
- def annotate_framework_source(source, component, relative)
295
- <<~ANNOTATION
296
- # ╔═══════════════════════════════════════════════════════════════════════╗
297
- # ║ Rails #{@rails_version} - #{component.ljust(55)}║
298
- # ║ File: #{relative.ljust(62)}║
299
- # ╚═══════════════════════════════════════════════════════════════════════╝
300
-
301
- #{source}
302
- ANNOTATION
303
- end
304
-
305
- def annotate_gem_source(source, gem_name, relative)
306
- version = @gem_versions[gem_name] || 'unknown'
307
-
308
- <<~ANNOTATION
309
- # ╔═══════════════════════════════════════════════════════════════════════╗
310
- # ║ Gem: #{gem_name} v#{version.ljust(55 - gem_name.length)}║
311
- # ║ File: #{relative.ljust(62)}║
312
- # ╚═══════════════════════════════════════════════════════════════════════╝
313
-
314
- #{source}
315
- ANNOTATION
316
- end
317
-
318
- # ──────────────────────────────────────────────────────────────────────
319
- # Code Analysis
320
- # ──────────────────────────────────────────────────────────────────────
321
-
322
- def extract_module_names(source)
323
- source.scan(/^\s*module\s+([\w:]+)/).flatten.uniq
324
- end
325
-
326
- def extract_class_names(source)
327
- source.scan(/^\s*class\s+([\w:]+)/).flatten.uniq
328
- end
329
-
330
- def extract_public_api(source)
331
- methods = []
332
- in_private = false
333
-
334
- source.each_line do |line|
335
- stripped = line.strip
336
-
337
- in_private = true if stripped.match?(/^\s*private\s*$/)
338
- in_private = false if stripped.match?(/^\s*public\s*$/)
339
-
340
- next unless !in_private && stripped =~ /def\s+((?:self\.)?\w+[?!=]?)(\(.*?\))?/
341
-
342
- method_name = ::Regexp.last_match(1)
343
- signature = ::Regexp.last_match(2)
344
- next if method_name.start_with?('_')
345
-
346
- methods << {
347
- name: method_name,
348
- signature: signature,
349
- class_method: method_name.start_with?('self.')
350
- }
351
- end
352
-
353
- methods
354
- end
355
-
356
- # Extract DSL-style methods (like has_many, validates, etc.)
357
- def extract_dsl_methods(source)
358
- dsl_patterns = [
359
- /def\s+self\.(\w+).*?#.*?DSL/i,
360
- /def\s+(\w+)\(.*?\)\s*#\s*:call-seq:/,
361
- /class_methods\s+do.*?def\s+(\w+)/m
362
- ]
363
-
364
- methods = []
365
- dsl_patterns.each do |pattern|
366
- source.scan(pattern) { |m| methods.concat(Array(m)) }
367
- end
368
-
369
- methods.uniq
370
- end
371
-
372
- # Extract option hashes and their documentation
373
- def extract_option_definitions(source)
374
- options = []
375
-
376
- # Look for VALID_OPTIONS or similar constants
377
- source.scan(/(\w+_OPTIONS|VALID_\w+)\s*=\s*\[(.*?)\]/m) do |const, values|
378
- options << {
379
- constant: const,
380
- values: values.scan(/:(\w+)/).flatten
381
- }
382
- end
383
-
384
- # Look for documented options in comments
385
- source.scan(/# (\w+) - (.+)$/) do |opt, desc|
386
- options << { name: opt, description: desc }
387
- end
388
-
389
- options
390
- end
391
-
392
- # ──────────────────────────────────────────────────────────────────────
393
- # Importance Rating
394
- # ──────────────────────────────────────────────────────────────────────
395
-
396
- # Determine if this is a public API file worth prioritizing
397
- def public_api_file?(relative_path)
398
- public_patterns = [
399
- %r{associations/builder},
400
- /callbacks\.rb$/,
401
- /validations\.rb$/,
402
- /base\.rb$/,
403
- %r{/metal/[^/]+\.rb$}
404
- ]
405
-
406
- public_patterns.any? { |p| relative_path.match?(p) }
407
- end
408
-
409
- # Rate importance for retrieval ranking
410
- def rate_importance(relative_path, source, public_methods: nil, dsl_methods: nil)
411
- score = 0
412
-
413
- # High-traffic files
414
- score += 3 if relative_path.match?(/associations|callbacks|validations/)
415
-
416
- # Files with lots of public methods
417
- public_method_count = public_methods ? public_methods.size : extract_public_api(source).size
418
- score += 2 if public_method_count > 10
419
-
420
- # Files with DSL methods
421
- dsl = dsl_methods || extract_dsl_methods(source)
422
- score += 2 if dsl.any?
423
-
424
- # Files with option documentation
425
- score += 1 if source.include?('# Options:')
426
-
427
- case score
428
- when 0..2 then :low
429
- when 3..5 then :medium
430
- else :high
431
- end
432
- end
433
-
434
- # ──────────────────────────────────────────────────────────────────────
435
- # Gem-Specific Analysis
436
- # ──────────────────────────────────────────────────────────────────────
437
-
438
- # Extract mixin modules provided by a gem
439
- def extract_mixins(source)
440
- mixins = []
441
-
442
- # Look for modules designed to be included
443
- source.scan(/module\s+(\w+).*?def\s+self\.included/m) do |mod|
444
- mixins << mod[0]
445
- end
446
-
447
- # ActiveSupport::Concern pattern
448
- source.scan(/extend\s+ActiveSupport::Concern.*?module\s+ClassMethods/m) do
449
- mixins << ::Regexp.last_match(1) if source =~ /module\s+(\w+).*?extend\s+ActiveSupport::Concern/m
450
- end
451
-
452
- mixins.uniq
453
- end
454
-
455
- # Extract configuration options provided by a gem
456
- def extract_configuration(source)
457
- configs = []
458
-
459
- # Railtie configuration
460
- source.scan(/config\.(\w+)\s*=/) do |cfg|
461
- configs << cfg[0]
462
- end
463
-
464
- # Class-level configuration
465
- source.scan(/(?:mattr|cattr)_accessor\s+:(\w+)/) do |cfg|
466
- configs << cfg[0]
467
- end
468
-
469
- configs.uniq
470
- end
471
- end
472
- end
473
- end