woods 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +186 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +69 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +210 -0
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +771 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +163 -0
  102. data/lib/woods/unblocked/document_builder.rb +326 -0
  103. data/lib/woods/unblocked/exporter.rb +201 -0
  104. data/lib/woods/unblocked/rate_limiter.rb +94 -0
  105. data/lib/woods/util/host_guard.rb +61 -0
  106. data/lib/woods/version.rb +1 -1
  107. data/lib/woods.rb +130 -6
  108. metadata +73 -4
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Console
5
+ # Shape-aware Layer 3 (column + EAV) redaction for console tool responses.
6
+ #
7
+ # Extracted from {Server} so the response-redaction logic can live next to
8
+ # the {ResponseContext} that invokes it and be unit-tested without the
9
+ # full server construction path.
10
+ #
11
+ # Redaction is shape-aware:
12
+ # - {record: Hash} (find)
13
+ # - {records: [Hash]} (sample, recent)
14
+ # - {columns: [...], rows: [[...]]} (sql, query)
15
+ # - {columns: [...], values: [...|[...]]} (pluck)
16
+ # - Plain Hash (redact top-level keys)
17
+ # - Array<Hash> (redact each hash)
18
+ module Redactor
19
+ # Data-shape keys used by console tool responses. When any of these keys
20
+ # appear at the top of a Hash result we treat the value as row data and
21
+ # descend into it instead of redacting at the envelope level.
22
+ #
23
+ # Full recursive descent is intentionally NOT used here. Some tools return
24
+ # Hashes whose keys happen to be column names but whose values are metadata
25
+ # objects, not row data — e.g. `console_schema` returns
26
+ # {columns: {col_name => {type:..., null:...}}}. Recursing into that Hash
27
+ # would incorrectly replace schema metadata with "[REDACTED]" whenever a
28
+ # column name matches a redacted_columns entry. Keeping a closed list of
29
+ # envelope keys that carry actual row data is therefore the safer choice.
30
+ #
31
+ # When adding a new Tier 2/3 tool that returns row data under a new envelope
32
+ # key, add that key here AND add a matching `when` branch in
33
+ # `redact_envelope_value` that applies the appropriate redaction strategy.
34
+ DATA_ENVELOPE_KEYS = %w[record records rows values associations].freeze
35
+
36
+ module_function
37
+
38
+ # Apply SafeContext column redaction to a result value.
39
+ #
40
+ # @param result [Object] The result from the bridge or embedded executor
41
+ # @param ctx [SafeContext] The context with redacted_columns configured
42
+ # @return [Object] Redacted result, same shape as input
43
+ def apply(result, ctx)
44
+ case result
45
+ when Array
46
+ result.map { |item| item.is_a?(Hash) ? apply(item, ctx) : item }
47
+ when Hash
48
+ redact_hash(result, ctx)
49
+ else
50
+ result
51
+ end
52
+ end
53
+
54
+ def redact_hash(hash, ctx)
55
+ string_keyed = hash.transform_keys(&:to_s)
56
+ return ctx.redact(string_keyed) unless (string_keyed.keys & DATA_ENVELOPE_KEYS).any?
57
+
58
+ plan = positional_plan(string_keyed['columns'], ctx)
59
+ string_keyed.each_with_object({}) do |(key, value), out|
60
+ out[key] = redact_envelope_value(key, value, plan, ctx)
61
+ end
62
+ end
63
+
64
+ def redact_envelope_value(key, value, plan, ctx)
65
+ case key
66
+ when 'record' then value.is_a?(Hash) ? ctx.redact(value) : value
67
+ when 'records' then redact_hash_array(value, ctx)
68
+ when 'rows', 'values' then redact_positional(value, plan)
69
+ when 'associations' then redact_association_map(value, ctx)
70
+ else value
71
+ end
72
+ end
73
+
74
+ def redact_hash_array(value, ctx)
75
+ Array(value).map { |row| row.is_a?(Hash) ? ctx.redact(row) : row }
76
+ end
77
+
78
+ # Redact an associations map returned by console_data_snapshot.
79
+ #
80
+ # The associations payload has the shape:
81
+ # { "assoc_name" => [Hash, ...], ... }
82
+ # Each value is an Array of record Hashes. We redact each record
83
+ # the same way we handle `records` (column-name + EAV rules).
84
+ def redact_association_map(value, ctx)
85
+ return value unless value.is_a?(Hash)
86
+
87
+ value.each_with_object({}) do |(assoc_name, assoc_records), out|
88
+ out[assoc_name] = redact_hash_array(assoc_records, ctx)
89
+ end
90
+ end
91
+
92
+ # Precompute everything needed to redact positional rows for a given
93
+ # `columns` header: the column-name mask plus any EAV key-value rules
94
+ # resolved to column indexes.
95
+ def positional_plan(columns, ctx)
96
+ { mask: positional_mask(columns, ctx),
97
+ kv_rules: positional_kv_rules(columns, ctx) }
98
+ end
99
+
100
+ # Precompute the positional redaction mask from a `columns` header.
101
+ # Returns nil when there is nothing to redact so callers can short-circuit.
102
+ def positional_mask(columns, ctx)
103
+ return nil unless columns.is_a?(Array)
104
+
105
+ redacted = ctx.redacted_columns
106
+ return nil if redacted.empty?
107
+
108
+ mask = columns.map { |name| redacted.include?(name.to_s) }
109
+ mask.any? ? mask : nil
110
+ end
111
+
112
+ # Resolve EAV patterns against a `columns` header into concrete index
113
+ # pairs. A rule only fires when both key_column and value_column are
114
+ # present in the header, and costs nothing per row otherwise.
115
+ def positional_kv_rules(columns, ctx)
116
+ return [] unless columns.is_a?(Array)
117
+
118
+ index = columns.each_with_index.to_h { |name, idx| [name.to_s, idx] }
119
+ ctx.redacted_key_values.filter_map do |pattern|
120
+ key_idx = index[pattern['key_column']]
121
+ val_idx = index[pattern['value_column']]
122
+ next unless key_idx && val_idx
123
+
124
+ { key_idx: key_idx, val_idx: val_idx, sensitive: pattern['sensitive_keys'] }
125
+ end
126
+ end
127
+
128
+ # Redact positional row data using a precomputed plan. Handles both
129
+ # nested arrays (multi-column pluck, sql/query rows) and flat scalar
130
+ # arrays (pluck with a single column — Rails collapses the result).
131
+ def redact_positional(rows, plan)
132
+ return rows unless rows.is_a?(Array)
133
+ return rows if plan[:mask].nil? && plan[:kv_rules].empty?
134
+
135
+ rows.map do |row|
136
+ row.is_a?(Array) ? redact_row(row, plan) : redact_scalar(row, plan[:mask])
137
+ end
138
+ end
139
+
140
+ def redact_row(row, plan)
141
+ result = apply_mask(row, plan[:mask])
142
+ plan[:kv_rules].each do |rule|
143
+ result[rule[:val_idx]] = '[REDACTED]' if rule[:sensitive].include?(row[rule[:key_idx]].to_s)
144
+ end
145
+ result
146
+ end
147
+
148
+ def apply_mask(row, mask)
149
+ return row.dup unless mask
150
+
151
+ row.each_with_index.map { |value, idx| mask[idx] ? '[REDACTED]' : value }
152
+ end
153
+
154
+ def redact_scalar(value, mask)
155
+ return value unless mask
156
+
157
+ mask.first ? '[REDACTED]' : value
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+ require_relative 'redactor'
5
+
6
+ module Woods
7
+ module Console
8
+ # Bundles the three Console response-safety layers the Server threads
9
+ # through every tool definition:
10
+ #
11
+ # - Layer 1 (TableGate) — reject tool calls that touch blocked tables.
12
+ # - Layer 2 (CredentialScanner) — redact credential-shaped substrings in the
13
+ # final response tree, regardless of where they arrived.
14
+ # - Layer 3 (SafeContext) — operator-configured column + EAV redaction.
15
+ #
16
+ # Exposed as tell-don't-ask commands: {#enforce!}, {#redact}, {#scan}. Callers
17
+ # never need to ask which layers are configured — a {NullResponseContext} is
18
+ # returned from {.build} when every layer is absent, and its commands are
19
+ # no-ops that return their input unchanged.
20
+ #
21
+ # Ordering (applied in Server#send_to_bridge, after the bridge responds):
22
+ # Layer 3 (columns/EAV) -> Layer 2 (credential scan) -> response emitted.
23
+ # Layer 1 runs earlier, before tool dispatch.
24
+ class ResponseContext
25
+ attr_reader :safe_ctx, :table_gate, :credential_scanner
26
+
27
+ # @return [ResponseContext, NullResponseContext] NullResponseContext
28
+ # when every layer is absent so callers never receive nil.
29
+ def self.build(safe_ctx: nil, table_gate: nil, credential_scanner: nil)
30
+ gate_inactive = table_gate.nil? || !table_gate.active?
31
+ if safe_ctx.nil? && gate_inactive && credential_scanner.nil?
32
+ NullResponseContext.instance
33
+ else
34
+ new(safe_ctx: safe_ctx, table_gate: table_gate, credential_scanner: credential_scanner)
35
+ end
36
+ end
37
+
38
+ def initialize(safe_ctx:, table_gate:, credential_scanner:)
39
+ @safe_ctx = safe_ctx
40
+ @table_gate = table_gate
41
+ @credential_scanner = credential_scanner
42
+ end
43
+
44
+ # True for real response contexts; false for {NullResponseContext}.
45
+ def present?
46
+ true
47
+ end
48
+
49
+ # Run the Layer 1 blocked-table gate against the arguments a tool was
50
+ # invoked with. Tools may arrive at tables through five different
51
+ # arg shapes — SQL string, model name, raw table, joined associations,
52
+ # or a single association name — so the gate checks every variant that's
53
+ # present. A no-op when the gate is nil.
54
+ #
55
+ # @param args [Hash] Tool arguments (symbol keys from MCP dispatch)
56
+ # @raise [TableGateError] if any referenced identifier is blocked
57
+ def enforce!(args) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
58
+ return unless @table_gate
59
+
60
+ @table_gate.check_sql!(args[:sql]) if args[:sql]
61
+ @table_gate.check_model!(args[:model]) if args[:model]
62
+ @table_gate.check_table!(args[:table]) if args[:table]
63
+ @table_gate.check_joins!(args[:model], args[:joins]) if args[:model] && args[:joins]
64
+ return unless args[:model] && args[:association]
65
+
66
+ @table_gate.check_association!(args[:model], args[:association])
67
+ end
68
+
69
+ # Apply Layer 3 (column + EAV) redaction to a result value. Shape-aware —
70
+ # see {Redactor.apply} for the supported envelope keys.
71
+ #
72
+ # @param result [Object]
73
+ # @return [Object] Redacted result, same shape as input.
74
+ def redact(result)
75
+ return result unless @safe_ctx
76
+
77
+ Redactor.apply(result, @safe_ctx)
78
+ end
79
+
80
+ # Run Layer 2 (credential scanner) over a value and return the scanned
81
+ # form alongside any hit counts. Callers decide whether to log the
82
+ # counts — the context deliberately does not assume access to a logger.
83
+ #
84
+ # @param value [Object]
85
+ # @return [Array(Object, Hash)] [scanned_value, counts_by_pattern]
86
+ def scan(value)
87
+ return [value, {}] unless @credential_scanner
88
+
89
+ @credential_scanner.scan(value)
90
+ end
91
+ end
92
+
93
+ # Null variant returned by {ResponseContext.build} when every layer is
94
+ # absent. Exposes the same public surface so callers never need &.nil-guards.
95
+ class NullResponseContext
96
+ include Singleton
97
+
98
+ def present?
99
+ false
100
+ end
101
+
102
+ def safe_ctx
103
+ nil
104
+ end
105
+
106
+ def table_gate
107
+ nil
108
+ end
109
+
110
+ def credential_scanner
111
+ nil
112
+ end
113
+
114
+ def enforce!(_args)
115
+ nil
116
+ end
117
+
118
+ def redact(result)
119
+ result
120
+ end
121
+
122
+ def scan(value)
123
+ [value, {}]
124
+ end
125
+ end
126
+ end
127
+ end
@@ -13,70 +13,267 @@ module Woods
13
13
  #
14
14
  # Safety layers:
15
15
  # - Every query runs inside a transaction that is always rolled back
16
- # - Statement timeout prevents runaway queries
16
+ # - Statement timeout uses `SET LOCAL` so it cannot leak to the next
17
+ # pool consumer
17
18
  # - Column redaction replaces sensitive values with "[REDACTED]"
18
19
  #
19
- # @example
20
+ # == What SafeContext does NOT cover
21
+ #
22
+ # The rolled-back transaction is a strong guardrail but not absolute.
23
+ # Known escape paths — callers of {#execute} should assume anything
24
+ # below is effectively live:
25
+ # - `ActiveJob` / `ActionMailer` async deliveries. Earlier versions
26
+ # tried to swap the global queue_adapter/delivery_method to `:test`
27
+ # for the block's duration, but those settings are process-wide
28
+ # class state: in a Puma worker serving both the host app and the
29
+ # Console MCP, a concurrent host request would briefly see the
30
+ # test adapter and silently drop real jobs / mail. We now leave
31
+ # them alone — treat callback-triggered enqueues / deliveries as
32
+ # live.
33
+ # - `after_rollback` callbacks (fire on rollback, can still enqueue
34
+ # jobs or call external services).
35
+ # - `Thread.new` / `Fiber.new` inside the block — they lease a fresh
36
+ # connection outside the transaction.
37
+ # - Direct HTTP egress (Net::HTTP, Faraday, HTTP gem, ...).
38
+ # - File I/O / shell-outs initiated from within AR callbacks.
39
+ # - Writes through a different pool or shard than the one this
40
+ # SafeContext was built with.
41
+ # - `raw_connection.execute` on some adapters when the adapter's
42
+ # transaction bookkeeping is out-of-band.
43
+ #
44
+ # Treat SafeContext as "rolls back the database", not "prevents every
45
+ # side effect" — operators must still apply the upstream defenses
46
+ # (TableGate, SqlValidator, EvalGuard, BearerAuth).
47
+ #
48
+ # Two construction modes are supported:
49
+ #
50
+ # - `connection:` — wraps the supplied connection in a single-use pool
51
+ # adapter so the execution path is identical to the `pool:` form.
52
+ # Useful in tests and for callers that already manage their own
53
+ # connection lifecycle (e.g. bridge mode, `exe/woods-console`).
54
+ # - `pool:` — each call to {#execute} leases a fresh connection via
55
+ # `pool.with_connection { |c| ... }`, so the connection is returned
56
+ # to the pool immediately after the block. The leased connection is
57
+ # also exposed via `Thread.current[:woods_console_leased_connection]`
58
+ # so dispatch handlers (e.g. EmbeddedExecutor#active_connection) can
59
+ # reuse the same connection without re-leasing.
60
+ #
61
+ # In both forms the connection is resolved *per {#execute} call* —
62
+ # SafeContext never holds a connection ivar. This is the key invariant
63
+ # for multi-DB / sharded hosts: if you supply a shard pool (or shard
64
+ # connection), the rolled-back transaction is opened on that shard's
65
+ # connection, not on the default pool.
66
+ #
67
+ # @example connection: form
20
68
  # ctx = SafeContext.new(connection: conn, timeout_ms: 5000, redacted_columns: %w[ssn])
21
69
  # ctx.execute { |c| c.execute("SELECT count(*) FROM users") }
22
70
  #
71
+ # @example pool: form (per-request lease)
72
+ # ctx = SafeContext.new(pool: ActiveRecord::Base.connection_pool)
73
+ # ctx.execute { |c| c.select_all("SELECT count(*) FROM users") }
74
+ #
75
+ # @example Shard pool — rollback covers the shard
76
+ # shard_pool = ShardedModel.connection_pool
77
+ # ctx = SafeContext.new(pool: shard_pool)
78
+ # ctx.execute { |c| c.select_all("SELECT * FROM shard_table") }
79
+ #
80
+ # @example Key-value (EAV) redaction
81
+ # ctx = SafeContext.new(
82
+ # connection: conn,
83
+ # redacted_key_values: [
84
+ # { key_column: 'key', value_column: 'value',
85
+ # sensitive_keys: %w[stripe_access_token oauth_token] }
86
+ # ]
87
+ # )
88
+ #
23
89
  class SafeContext
24
- # @param connection [Object] Database connection (or mock)
90
+ # Thread-local key that exposes the connection currently leased for
91
+ # the in-flight #execute block. Handlers should prefer this over
92
+ # acquiring their own connection so every request stays on a single
93
+ # leased connection inside the rolled-back transaction.
94
+ LEASED_CONNECTION_KEY = :woods_console_leased_connection
95
+
96
+ # Thin adapter that makes a bare connection look like a connection pool
97
+ # with a `with_connection` interface. Used internally when callers pass
98
+ # `connection:` so that {#execute} always flows through a single code
99
+ # path regardless of construction form.
100
+ #
101
+ # @api private
102
+ SingleConnectionPool = Struct.new(:connection) do
103
+ # @yield [Object] the wrapped connection
104
+ def with_connection(&block)
105
+ block.call(connection)
106
+ end
107
+ end
108
+
109
+ # @return [Array<String>] Column names whose values are replaced with "[REDACTED]"
110
+ attr_reader :redacted_columns
111
+
112
+ # @return [Array<Hash>] Normalized EAV redaction patterns. Each entry has
113
+ # string keys: 'key_column', 'value_column', 'sensitive_keys'.
114
+ attr_reader :redacted_key_values
115
+
116
+ # @param connection [Object, nil] Database connection (or mock).
117
+ # Mutually exclusive with `pool:` — pass one or the other (or
118
+ # neither, if this SafeContext is only being used for #redact).
119
+ # The connection is wrapped in {SingleConnectionPool} so execution
120
+ # always flows through `pool.with_connection`.
121
+ # @param pool [#with_connection, nil] Connection pool to lease from
122
+ # per request. Each {#execute} call wraps `pool.with_connection`.
25
123
  # @param timeout_ms [Integer] Statement timeout in milliseconds
26
124
  # @param redacted_columns [Array<String>] Column names whose values should be redacted
27
- def initialize(connection:, timeout_ms: 5000, redacted_columns: [])
28
- @connection = connection
125
+ # @param redacted_key_values [Array<Hash>] EAV-style redaction patterns.
126
+ # Each pattern: {key_column: 'key', value_column: 'value',
127
+ # sensitive_keys: %w[stripe_access_token ...]}. When a row's
128
+ # `key_column` cell matches one of `sensitive_keys`, the same row's
129
+ # `value_column` cell is replaced with "[REDACTED]".
130
+ def initialize(connection: nil, pool: nil, timeout_ms: 5000,
131
+ redacted_columns: [], redacted_key_values: [])
132
+ @pool = pool || (connection && SingleConnectionPool.new(connection))
29
133
  @timeout_ms = timeout_ms
30
134
  @redacted_columns = redacted_columns.map(&:to_s)
135
+ @redacted_key_values = normalize_key_value_patterns(redacted_key_values)
31
136
  end
32
137
 
33
138
  # Execute a block within a rolled-back transaction with statement timeout.
34
139
  #
35
140
  # The transaction is always rolled back to ensure read-only behavior.
141
+ # A fresh connection is leased from the pool on every call via
142
+ # `pool.with_connection`. The leased connection is published as
143
+ # `Thread.current[LEASED_CONNECTION_KEY]` for the duration of the
144
+ # block and cleared in `ensure` (even on exceptions) so dispatch
145
+ # handlers can pick it up without re-leasing.
36
146
  #
37
147
  # @yield [connection] The database connection
38
148
  # @return [Object] The block's return value
39
- def execute
40
- result = nil
41
- @connection.transaction do
42
- set_timeout
43
- result = yield(@connection)
44
- raise ActiveRecord::Rollback
45
- end
46
- result
149
+ # @raise [ArgumentError] when neither `connection:` nor `pool:` was
150
+ # supplied at construction time. Deferred to #execute so callers
151
+ # that only use #redact can construct with neither.
152
+ def execute(&block)
153
+ raise ArgumentError, 'SafeContext#execute requires connection: or pool: at construction time' unless @pool
154
+
155
+ # NOTE: on async side effects: earlier iterations of SafeContext
156
+ # tried to swap `ActiveJob::Base.queue_adapter` / `ActionMailer::Base
157
+ # .delivery_method` to `:test` for the duration of this block.
158
+ # That's unsafe — those settings are process-wide class state, so
159
+ # any concurrent request served by the SAME Puma worker (the host
160
+ # app running alongside the Console MCP) would race and briefly
161
+ # see the test adapter, silently dropping real jobs and mail.
162
+ # The gap is documented in the class docstring instead; operators
163
+ # must treat callback-triggered enqueues / deliveries as live.
164
+ @pool.with_connection { |conn| run_with_timeout(conn, &block) }
47
165
  end
48
166
 
49
167
  # Replace values of redacted columns with "[REDACTED]".
50
168
  #
169
+ # Runs column-name redaction first, then EAV key-value redaction — a row
170
+ # like {key: "stripe_access_token", value: "sk_live_..."} has its `value`
171
+ # column replaced when "stripe_access_token" is in the sensitive_keys
172
+ # list, regardless of whether `value` itself is in redacted_columns.
173
+ #
51
174
  # @param hash [Hash] Record attributes
52
175
  # @param _model_name [String] Model name (reserved for per-model redaction rules)
53
176
  # @return [Hash] Redacted copy of the hash
54
177
  def redact(hash, _model_name = nil)
55
- return hash if @redacted_columns.empty?
178
+ return hash if @redacted_columns.empty? && @redacted_key_values.empty?
56
179
 
57
- hash.transform_keys(&:to_s).each_with_object({}) do |(key, value), redacted|
58
- redacted[key] = @redacted_columns.include?(key) ? '[REDACTED]' : value
180
+ redacted = hash.transform_keys(&:to_s).each_with_object({}) do |(key, value), out|
181
+ out[key] = @redacted_columns.include?(key) ? '[REDACTED]' : value
59
182
  end
183
+ apply_key_value_redaction(redacted)
60
184
  end
61
185
 
62
186
  private
63
187
 
188
+ # Wrap one connection in a rolled-back transaction with timeout, and
189
+ # publish it via Thread.current so handlers can reuse it. Always
190
+ # clears the thread-local in ensure so a raise mid-block cannot leak
191
+ # a stale connection reference into the next request on this thread.
192
+ def run_with_timeout(connection)
193
+ previous = Thread.current[LEASED_CONNECTION_KEY]
194
+ Thread.current[LEASED_CONNECTION_KEY] = connection
195
+ result = nil
196
+ connection.transaction do
197
+ set_timeout(connection)
198
+ result = yield(connection)
199
+ raise ActiveRecord::Rollback
200
+ end
201
+ result
202
+ ensure
203
+ Thread.current[LEASED_CONNECTION_KEY] = previous
204
+ end
205
+
206
+ def normalize_key_value_patterns(patterns)
207
+ Array(patterns).filter_map { |pattern| normalize_pattern(pattern) }
208
+ end
209
+
210
+ def normalize_pattern(pattern)
211
+ key_col = fetch_pattern_string(pattern, :key_column)
212
+ val_col = fetch_pattern_string(pattern, :value_column)
213
+ sensitive = Array(pattern[:sensitive_keys] || pattern['sensitive_keys']).map(&:to_s)
214
+ return if key_col.nil? || val_col.nil? || sensitive.empty?
215
+
216
+ { 'key_column' => key_col, 'value_column' => val_col, 'sensitive_keys' => sensitive }
217
+ end
218
+
219
+ def fetch_pattern_string(pattern, key)
220
+ (pattern[key] || pattern[key.to_s])&.to_s
221
+ end
222
+
223
+ def apply_key_value_redaction(hash)
224
+ @redacted_key_values.each do |pattern|
225
+ key_col = pattern['key_column']
226
+ val_col = pattern['value_column']
227
+ next unless hash.key?(key_col) && hash.key?(val_col)
228
+ next unless pattern['sensitive_keys'].include?(hash[key_col].to_s)
229
+
230
+ hash[val_col] = '[REDACTED]'
231
+ end
232
+ hash
233
+ end
234
+
64
235
  # Set statement timeout on the connection.
65
236
  #
66
- # PostgreSQL uses SET statement_timeout (applies to all statement types).
67
- # MySQL uses SET max_execution_time (applies to SELECT only — MySQL limitation:
68
- # DDL and DML statements cannot be time-limited via this variable).
69
- def set_timeout(connection = @connection, timeout_ms = @timeout_ms)
237
+ # PostgreSQL uses `SET LOCAL statement_timeout` so the setting is
238
+ # scoped to the surrounding transaction and is automatically
239
+ # discarded on rollback without LOCAL the timeout would persist on
240
+ # the pooled connection and bleed into the next consumer (host app
241
+ # request, background job, etc.). Safe here because every #execute
242
+ # is wrapped in a transaction.
243
+ #
244
+ # MySQL uses `SET max_execution_time` (applies to SELECT only — DDL
245
+ # and DML statements cannot be time-limited via this variable).
246
+ def set_timeout(connection, timeout_ms = @timeout_ms)
70
247
  adapter = connection.adapter_name.downcase
71
248
  if adapter.include?('mysql')
72
249
  connection.execute("SET max_execution_time = #{timeout_ms.to_i}")
73
250
  else
74
- connection.execute("SET statement_timeout = '#{timeout_ms.to_i}ms'")
251
+ connection.execute("SET LOCAL statement_timeout = '#{timeout_ms.to_i}ms'")
75
252
  end
76
- rescue StandardError
77
- # Unsupported adapter timeout enforcement is best-effort
253
+ rescue StandardError => e
254
+ # Unsupported adapter (SQLite, Trilogy on unsupported version, Oracle) —
255
+ # timeout enforcement is best-effort, but operators need to know their
256
+ # rollback fence is narrower than advertised. Log once per adapter via
257
+ # Rails.logger when available; otherwise swallow as before.
258
+ warn_timeout_unsupported(adapter, e)
78
259
  nil
79
260
  end
261
+
262
+ def warn_timeout_unsupported(adapter, error)
263
+ return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
264
+
265
+ @warned_adapters ||= {}
266
+ return if @warned_adapters[adapter]
267
+
268
+ @warned_adapters[adapter] = true
269
+ Rails.logger.warn(
270
+ '[Woods::Console::SafeContext] statement timeout not supported on ' \
271
+ "adapter #{adapter.inspect}: #{error.class}: #{error.message}. " \
272
+ 'Queries will run without a per-statement time limit.'
273
+ )
274
+ rescue StandardError
275
+ # Last-resort swallow — never let telemetry failure break execution.
276
+ end
80
277
  end
81
278
  end
82
279
  end