woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
data/exe/woods-console ADDED
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Embedded console MCP server — runs inside a Rails environment.
5
+ #
6
+ # Usage (via rake, recommended):
7
+ # bundle exec rake woods:console
8
+ #
9
+ # Usage (via rails runner):
10
+ # bundle exec rails runner "$(bundle show woods)/exe/woods-console"
11
+ #
12
+ # The rake task captures stdout before Rails boots and passes the fd via
13
+ # $woods_protocol_out. When run via rails runner, this script
14
+ # captures stdout itself to keep MCP protocol clean.
15
+
16
+ # Check if the rake task already captured stdout for us.
17
+ protocol_out = $woods_protocol_out # rubocop:disable Style/GlobalVars
18
+
19
+ unless protocol_out
20
+ # Running via rails runner — capture stdout ourselves.
21
+ protocol_out = $stdout.dup
22
+ $stdout.reopen($stderr)
23
+ end
24
+
25
+ require 'woods/console/server'
26
+
27
+ # Ensure all application models are loaded for the registry.
28
+ Rails.application.eager_load!
29
+
30
+ registry = ActiveRecord::Base.descendants.each_with_object({}) do |model, hash|
31
+ next if model.abstract_class?
32
+ next unless model.table_exists?
33
+
34
+ hash[model.name] = model.column_names
35
+ rescue StandardError
36
+ next
37
+ end
38
+
39
+ validator = Woods::Console::ModelValidator.new(registry: registry)
40
+ safe_context = Woods::Console::SafeContext.new(connection: ActiveRecord::Base.connection)
41
+
42
+ redacted_columns = if Woods.respond_to?(:configuration) && Woods.configuration
43
+ Array(Woods.configuration.console_redacted_columns)
44
+ else
45
+ []
46
+ end
47
+
48
+ server = Woods::Console::Server.build_embedded(
49
+ model_validator: validator,
50
+ safe_context: safe_context,
51
+ redacted_columns: redacted_columns
52
+ )
53
+
54
+ # Restore the protocol output for MCP transport.
55
+ $stdout.reopen(protocol_out)
56
+ protocol_out.close unless protocol_out.closed?
57
+
58
+ transport = MCP::Server::Transports::StdioTransport.new(server)
59
+ transport.open
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Console MCP server for querying live Rails application state.
5
+ #
6
+ # Usage:
7
+ # woods-console-mcp
8
+ # WOODS_CONSOLE_CONFIG=/path/to/console.yml woods-console-mcp
9
+ #
10
+ # Connects to a Rails application via a bridge process (Docker exec, direct,
11
+ # or SSH) and exposes read-only query tools via the Model Context Protocol
12
+ # (stdio transport).
13
+
14
+ require 'yaml'
15
+ require_relative '../lib/woods/console/server'
16
+
17
+ config_path = ENV.fetch('WOODS_CONSOLE_CONFIG', File.expand_path('~/.woods/console.yml'))
18
+ config = File.exist?(config_path) ? YAML.safe_load_file(config_path) : {}
19
+
20
+ server = Woods::Console::Server.build(config: config)
21
+ transport = MCP::Server::Transports::StdioTransport.new(server)
22
+ transport.open
data/exe/woods-mcp ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # MCP server for querying Woods extraction output.
5
+ #
6
+ # Usage:
7
+ # woods-mcp [INDEX_DIR]
8
+ # WOODS_DIR=/path/to/output woods-mcp
9
+ #
10
+ # Reads JSON files from the extraction output directory and exposes
11
+ # them via the Model Context Protocol (stdio transport).
12
+ # Does NOT require Rails — only reads pre-extracted data.
13
+
14
+ require_relative '../lib/woods'
15
+ require_relative '../lib/woods/dependency_graph'
16
+ require_relative '../lib/woods/graph_analyzer'
17
+ require_relative '../lib/woods/mcp/server'
18
+ require_relative '../lib/woods/mcp/bootstrapper'
19
+ require_relative '../lib/woods/embedding/text_preparer'
20
+ require_relative '../lib/woods/embedding/indexer'
21
+
22
+ index_dir = Woods::MCP::Bootstrapper.resolve_index_dir(ARGV)
23
+ retriever = Woods::MCP::Bootstrapper.build_retriever
24
+ snapshot_store = Woods::MCP::Bootstrapper.build_snapshot_store(index_dir)
25
+
26
+ server = Woods::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
27
+
28
+ # Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
29
+ if ENV['MCP_PROTOCOL_VERSION']
30
+ server.configuration = MCP::Configuration.new(protocol_version: ENV['MCP_PROTOCOL_VERSION'])
31
+ end
32
+
33
+ transport = MCP::Server::Transports::StdioTransport.new(server)
34
+ transport.open
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # MCP server for querying Woods extraction output over HTTP.
5
+ #
6
+ # Usage:
7
+ # woods-mcp-http [INDEX_DIR]
8
+ # WOODS_DIR=/path/to/output woods-mcp-http
9
+ #
10
+ # Reads JSON files from the extraction output directory and exposes
11
+ # them via the Model Context Protocol (Streamable HTTP transport).
12
+ # Requires the `rackup` gem and a Rack-compatible server (e.g., puma).
13
+
14
+ require 'rackup'
15
+ require_relative '../lib/woods'
16
+ require_relative '../lib/woods/dependency_graph'
17
+ require_relative '../lib/woods/graph_analyzer'
18
+ require_relative '../lib/woods/mcp/server'
19
+ require_relative '../lib/woods/mcp/bootstrapper'
20
+ require_relative '../lib/woods/embedding/text_preparer'
21
+ require_relative '../lib/woods/embedding/indexer'
22
+
23
+ index_dir = Woods::MCP::Bootstrapper.resolve_index_dir(ARGV)
24
+ retriever = Woods::MCP::Bootstrapper.build_retriever
25
+ snapshot_store = Woods::MCP::Bootstrapper.build_snapshot_store(index_dir)
26
+
27
+ port = (ENV['PORT'] || 9292).to_i
28
+ host = ENV['HOST'] || 'localhost'
29
+
30
+ server = Woods::MCP::Server.build(index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store)
31
+ transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
32
+ server.transport = transport
33
+
34
+ app = proc { |env| transport.handle_request(Rack::Request.new(env)) }
35
+
36
+ warn "Woods MCP HTTP server starting on http://#{host}:#{port}"
37
+ Rackup::Handler.default.run(app, Port: port, Host: host)
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env bash
2
+ # Self-healing wrapper for the Woods MCP server.
3
+ # Ensures Ruby dependencies are installed, validates the index directory,
4
+ # and starts the stdio MCP server.
5
+ #
6
+ # Usage (direct):
7
+ # woods-mcp-start /path/to/index_dir
8
+ #
9
+ # Usage (.mcp.json):
10
+ # {
11
+ # "command": "${HOME}/work/codebase_index/exe/woods-mcp-start",
12
+ # "args": ["${HOME}/my-rails-app/tmp/woods"]
13
+ # }
14
+ #
15
+ # All diagnostic output goes to stderr to keep stdio clean for MCP protocol.
16
+
17
+ set -euo pipefail
18
+
19
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
20
+ GEM_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
21
+ INDEX_DIR="${1:-${WOODS_DIR:-}}"
22
+
23
+ # --- Validate index directory ---
24
+ if [[ -z "$INDEX_DIR" ]]; then
25
+ echo "Error: No index directory specified." >&2
26
+ echo "Usage: woods-mcp-start /path/to/index_dir" >&2
27
+ exit 1
28
+ fi
29
+
30
+ if [[ ! -d "$INDEX_DIR" ]]; then
31
+ echo "Error: Index directory does not exist: $INDEX_DIR" >&2
32
+ echo "Run extraction first: bundle exec rake woods:extract" >&2
33
+ exit 1
34
+ fi
35
+
36
+ if [[ ! -f "$INDEX_DIR/manifest.json" ]]; then
37
+ echo "Error: No manifest.json in: $INDEX_DIR" >&2
38
+ echo "Run extraction first: bundle exec rake woods:extract" >&2
39
+ exit 1
40
+ fi
41
+
42
+ # --- Ensure Ruby dependencies are installed ---
43
+ export BUNDLE_GEMFILE="${GEM_DIR}/Gemfile"
44
+
45
+ if ! bundle check > /dev/null 2>&1; then
46
+ echo "Installing woods dependencies..." >&2
47
+ if ! bundle install --quiet >&2 2>&1; then
48
+ echo "Error: bundle install failed. Check Ruby version and network." >&2
49
+ exit 1
50
+ fi
51
+ echo "Dependencies installed." >&2
52
+ fi
53
+
54
+ # --- Pin MCP protocol version for Claude Code compatibility ---
55
+ export MCP_PROTOCOL_VERSION="${MCP_PROTOCOL_VERSION:-2024-11-05}"
56
+
57
+ # --- Start the MCP server ---
58
+ exec bundle exec ruby "${GEM_DIR}/exe/woods-mcp" "$INDEX_DIR"
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'rails/generators/active_record'
5
+
6
+ module Woods
7
+ module Generators
8
+ # Rails generator that creates a migration for Woods tables.
9
+ #
10
+ # Usage:
11
+ # rails generate woods:install
12
+ #
13
+ # Creates a migration with woods_units, woods_edges, and
14
+ # woods_embeddings tables. Works with PostgreSQL, MySQL, and SQLite.
15
+ #
16
+ class InstallGenerator < Rails::Generators::Base
17
+ include ActiveRecord::Generators::Migration
18
+
19
+ source_root File.expand_path('templates', __dir__)
20
+
21
+ desc 'Creates a migration for Woods tables (units, edges, embeddings)'
22
+
23
+ # @return [void]
24
+ def create_migration_file
25
+ migration_template(
26
+ 'create_woods_tables.rb.erb',
27
+ 'db/migrate/create_woods_tables.rb'
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'rails/generators/active_record'
5
+
6
+ module Woods
7
+ module Generators
8
+ # Rails generator that adds pgvector support to Woods.
9
+ #
10
+ # Requires the pgvector PostgreSQL extension. Adds a native vector column
11
+ # and HNSW index to the woods_embeddings table.
12
+ #
13
+ # Usage:
14
+ # rails generate woods:pgvector
15
+ # rails generate woods:pgvector --dimensions 3072
16
+ #
17
+ class PgvectorGenerator < Rails::Generators::Base
18
+ include ActiveRecord::Generators::Migration
19
+
20
+ source_root File.expand_path('templates', __dir__)
21
+
22
+ desc 'Adds pgvector native vector column and HNSW index to woods_embeddings'
23
+
24
+ class_option :dimensions, type: :numeric, default: 1536,
25
+ desc: 'Vector dimensions (1536 for text-embedding-3-small, 3072 for large)'
26
+
27
+ # @return [void]
28
+ def create_migration_file
29
+ @dimensions = options[:dimensions]
30
+ migration_template(
31
+ 'add_pgvector_to_woods.rb.erb',
32
+ 'db/migrate/add_pgvector_to_woods.rb'
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,15 @@
1
+ class AddPgvectorToWoods < ActiveRecord::Migration[7.0]
2
+ def change
3
+ enable_extension 'vector' unless extension_enabled?('vector')
4
+
5
+ add_column :woods_embeddings, :embedding_vector, :vector,
6
+ limit: <%= @dimensions || 1536 %>, null: true
7
+
8
+ # HNSW index for fast approximate nearest neighbor search
9
+ # Using cosine distance operator (vector_cosine_ops)
10
+ add_index :woods_embeddings, :embedding_vector,
11
+ using: :hnsw,
12
+ opclass: :vector_cosine_ops,
13
+ name: 'idx_woods_embeddings_vector_hnsw'
14
+ end
15
+ end
@@ -0,0 +1,43 @@
1
+ class CreateWoodsTables < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :woods_units do |t|
4
+ t.string :unit_type, null: false
5
+ t.string :identifier, null: false
6
+ t.string :namespace
7
+ t.string :file_path, null: false
8
+ t.text :source_code
9
+ t.string :source_hash
10
+ t.json :metadata
11
+
12
+ t.timestamps
13
+ end
14
+
15
+ add_index :woods_units, :unit_type
16
+ add_index :woods_units, :identifier, unique: true
17
+ add_index :woods_units, :file_path
18
+
19
+ create_table :woods_edges do |t|
20
+ t.references :source, null: false, foreign_key: { to_table: :woods_units }
21
+ t.references :target, null: false, foreign_key: { to_table: :woods_units }
22
+ t.string :relationship, null: false
23
+ t.string :via
24
+
25
+ t.datetime :created_at, null: false
26
+ end
27
+
28
+ add_index :woods_edges, [:source_id, :target_id, :relationship], unique: true,
29
+ name: 'idx_woods_edges_unique'
30
+
31
+ create_table :woods_embeddings do |t|
32
+ t.references :unit, null: false, foreign_key: { to_table: :woods_units }
33
+ t.string :chunk_type
34
+ t.text :embedding, null: false
35
+ t.string :content_hash, null: false
36
+ t.integer :dimensions, null: false
37
+
38
+ t.datetime :created_at, null: false
39
+ end
40
+
41
+ add_index :woods_embeddings, :content_hash
42
+ end
43
+ end