woods 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +186 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +69 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +210 -0
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +771 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +163 -0
- data/lib/woods/unblocked/document_builder.rb +326 -0
- data/lib/woods/unblocked/exporter.rb +201 -0
- data/lib/woods/unblocked/rate_limiter.rb +94 -0
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +130 -6
- metadata +73 -4
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Woods
|
|
4
|
+
module Console
|
|
5
|
+
# Shape-aware Layer 3 (column + EAV) redaction for console tool responses.
|
|
6
|
+
#
|
|
7
|
+
# Extracted from {Server} so the response-redaction logic can live next to
|
|
8
|
+
# the {ResponseContext} that invokes it and be unit-tested without the
|
|
9
|
+
# full server construction path.
|
|
10
|
+
#
|
|
11
|
+
# Redaction is shape-aware:
|
|
12
|
+
# - {record: Hash} (find)
|
|
13
|
+
# - {records: [Hash]} (sample, recent)
|
|
14
|
+
# - {columns: [...], rows: [[...]]} (sql, query)
|
|
15
|
+
# - {columns: [...], values: [...|[...]]} (pluck)
|
|
16
|
+
# - Plain Hash (redact top-level keys)
|
|
17
|
+
# - Array<Hash> (redact each hash)
|
|
18
|
+
module Redactor
|
|
19
|
+
# Data-shape keys used by console tool responses. When any of these keys
|
|
20
|
+
# appear at the top of a Hash result we treat the value as row data and
|
|
21
|
+
# descend into it instead of redacting at the envelope level.
|
|
22
|
+
#
|
|
23
|
+
# Full recursive descent is intentionally NOT used here. Some tools return
|
|
24
|
+
# Hashes whose keys happen to be column names but whose values are metadata
|
|
25
|
+
# objects, not row data — e.g. `console_schema` returns
|
|
26
|
+
# {columns: {col_name => {type:..., null:...}}}. Recursing into that Hash
|
|
27
|
+
# would incorrectly replace schema metadata with "[REDACTED]" whenever a
|
|
28
|
+
# column name matches a redacted_columns entry. Keeping a closed list of
|
|
29
|
+
# envelope keys that carry actual row data is therefore the safer choice.
|
|
30
|
+
#
|
|
31
|
+
# When adding a new Tier 2/3 tool that returns row data under a new envelope
|
|
32
|
+
# key, add that key here AND add a matching `when` branch in
|
|
33
|
+
# `redact_envelope_value` that applies the appropriate redaction strategy.
|
|
34
|
+
DATA_ENVELOPE_KEYS = %w[record records rows values associations].freeze
|
|
35
|
+
|
|
36
|
+
module_function
|
|
37
|
+
|
|
38
|
+
# Apply SafeContext column redaction to a result value.
|
|
39
|
+
#
|
|
40
|
+
# @param result [Object] The result from the bridge or embedded executor
|
|
41
|
+
# @param ctx [SafeContext] The context with redacted_columns configured
|
|
42
|
+
# @return [Object] Redacted result, same shape as input
|
|
43
|
+
def apply(result, ctx)
|
|
44
|
+
case result
|
|
45
|
+
when Array
|
|
46
|
+
result.map { |item| item.is_a?(Hash) ? apply(item, ctx) : item }
|
|
47
|
+
when Hash
|
|
48
|
+
redact_hash(result, ctx)
|
|
49
|
+
else
|
|
50
|
+
result
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def redact_hash(hash, ctx)
|
|
55
|
+
string_keyed = hash.transform_keys(&:to_s)
|
|
56
|
+
return ctx.redact(string_keyed) unless (string_keyed.keys & DATA_ENVELOPE_KEYS).any?
|
|
57
|
+
|
|
58
|
+
plan = positional_plan(string_keyed['columns'], ctx)
|
|
59
|
+
string_keyed.each_with_object({}) do |(key, value), out|
|
|
60
|
+
out[key] = redact_envelope_value(key, value, plan, ctx)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def redact_envelope_value(key, value, plan, ctx)
|
|
65
|
+
case key
|
|
66
|
+
when 'record' then value.is_a?(Hash) ? ctx.redact(value) : value
|
|
67
|
+
when 'records' then redact_hash_array(value, ctx)
|
|
68
|
+
when 'rows', 'values' then redact_positional(value, plan)
|
|
69
|
+
when 'associations' then redact_association_map(value, ctx)
|
|
70
|
+
else value
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def redact_hash_array(value, ctx)
|
|
75
|
+
Array(value).map { |row| row.is_a?(Hash) ? ctx.redact(row) : row }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Redact an associations map returned by console_data_snapshot.
|
|
79
|
+
#
|
|
80
|
+
# The associations payload has the shape:
|
|
81
|
+
# { "assoc_name" => [Hash, ...], ... }
|
|
82
|
+
# Each value is an Array of record Hashes. We redact each record
|
|
83
|
+
# the same way we handle `records` (column-name + EAV rules).
|
|
84
|
+
def redact_association_map(value, ctx)
|
|
85
|
+
return value unless value.is_a?(Hash)
|
|
86
|
+
|
|
87
|
+
value.each_with_object({}) do |(assoc_name, assoc_records), out|
|
|
88
|
+
out[assoc_name] = redact_hash_array(assoc_records, ctx)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Precompute everything needed to redact positional rows for a given
|
|
93
|
+
# `columns` header: the column-name mask plus any EAV key-value rules
|
|
94
|
+
# resolved to column indexes.
|
|
95
|
+
def positional_plan(columns, ctx)
|
|
96
|
+
{ mask: positional_mask(columns, ctx),
|
|
97
|
+
kv_rules: positional_kv_rules(columns, ctx) }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Precompute the positional redaction mask from a `columns` header.
|
|
101
|
+
# Returns nil when there is nothing to redact so callers can short-circuit.
|
|
102
|
+
def positional_mask(columns, ctx)
|
|
103
|
+
return nil unless columns.is_a?(Array)
|
|
104
|
+
|
|
105
|
+
redacted = ctx.redacted_columns
|
|
106
|
+
return nil if redacted.empty?
|
|
107
|
+
|
|
108
|
+
mask = columns.map { |name| redacted.include?(name.to_s) }
|
|
109
|
+
mask.any? ? mask : nil
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Resolve EAV patterns against a `columns` header into concrete index
|
|
113
|
+
# pairs. A rule only fires when both key_column and value_column are
|
|
114
|
+
# present in the header, and costs nothing per row otherwise.
|
|
115
|
+
def positional_kv_rules(columns, ctx)
|
|
116
|
+
return [] unless columns.is_a?(Array)
|
|
117
|
+
|
|
118
|
+
index = columns.each_with_index.to_h { |name, idx| [name.to_s, idx] }
|
|
119
|
+
ctx.redacted_key_values.filter_map do |pattern|
|
|
120
|
+
key_idx = index[pattern['key_column']]
|
|
121
|
+
val_idx = index[pattern['value_column']]
|
|
122
|
+
next unless key_idx && val_idx
|
|
123
|
+
|
|
124
|
+
{ key_idx: key_idx, val_idx: val_idx, sensitive: pattern['sensitive_keys'] }
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Redact positional row data using a precomputed plan. Handles both
|
|
129
|
+
# nested arrays (multi-column pluck, sql/query rows) and flat scalar
|
|
130
|
+
# arrays (pluck with a single column — Rails collapses the result).
|
|
131
|
+
def redact_positional(rows, plan)
|
|
132
|
+
return rows unless rows.is_a?(Array)
|
|
133
|
+
return rows if plan[:mask].nil? && plan[:kv_rules].empty?
|
|
134
|
+
|
|
135
|
+
rows.map do |row|
|
|
136
|
+
row.is_a?(Array) ? redact_row(row, plan) : redact_scalar(row, plan[:mask])
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def redact_row(row, plan)
|
|
141
|
+
result = apply_mask(row, plan[:mask])
|
|
142
|
+
plan[:kv_rules].each do |rule|
|
|
143
|
+
result[rule[:val_idx]] = '[REDACTED]' if rule[:sensitive].include?(row[rule[:key_idx]].to_s)
|
|
144
|
+
end
|
|
145
|
+
result
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def apply_mask(row, mask)
|
|
149
|
+
return row.dup unless mask
|
|
150
|
+
|
|
151
|
+
row.each_with_index.map { |value, idx| mask[idx] ? '[REDACTED]' : value }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def redact_scalar(value, mask)
|
|
155
|
+
return value unless mask
|
|
156
|
+
|
|
157
|
+
mask.first ? '[REDACTED]' : value
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
require_relative 'redactor'
|
|
5
|
+
|
|
6
|
+
module Woods
|
|
7
|
+
module Console
|
|
8
|
+
# Bundles the three Console response-safety layers the Server threads
|
|
9
|
+
# through every tool definition:
|
|
10
|
+
#
|
|
11
|
+
# - Layer 1 (TableGate) — reject tool calls that touch blocked tables.
|
|
12
|
+
# - Layer 2 (CredentialScanner) — redact credential-shaped substrings in the
|
|
13
|
+
# final response tree, regardless of where they arrived.
|
|
14
|
+
# - Layer 3 (SafeContext) — operator-configured column + EAV redaction.
|
|
15
|
+
#
|
|
16
|
+
# Exposed as tell-don't-ask commands: {#enforce!}, {#redact}, {#scan}. Callers
|
|
17
|
+
# never need to ask which layers are configured — a {NullResponseContext} is
|
|
18
|
+
# returned from {.build} when every layer is absent, and its commands are
|
|
19
|
+
# no-ops that return their input unchanged.
|
|
20
|
+
#
|
|
21
|
+
# Ordering (applied in Server#send_to_bridge, after the bridge responds):
|
|
22
|
+
# Layer 3 (columns/EAV) -> Layer 2 (credential scan) -> response emitted.
|
|
23
|
+
# Layer 1 runs earlier, before tool dispatch.
|
|
24
|
+
class ResponseContext
|
|
25
|
+
attr_reader :safe_ctx, :table_gate, :credential_scanner
|
|
26
|
+
|
|
27
|
+
# @return [ResponseContext, NullResponseContext] NullResponseContext
|
|
28
|
+
# when every layer is absent so callers never receive nil.
|
|
29
|
+
def self.build(safe_ctx: nil, table_gate: nil, credential_scanner: nil)
|
|
30
|
+
gate_inactive = table_gate.nil? || !table_gate.active?
|
|
31
|
+
if safe_ctx.nil? && gate_inactive && credential_scanner.nil?
|
|
32
|
+
NullResponseContext.instance
|
|
33
|
+
else
|
|
34
|
+
new(safe_ctx: safe_ctx, table_gate: table_gate, credential_scanner: credential_scanner)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def initialize(safe_ctx:, table_gate:, credential_scanner:)
|
|
39
|
+
@safe_ctx = safe_ctx
|
|
40
|
+
@table_gate = table_gate
|
|
41
|
+
@credential_scanner = credential_scanner
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# True for real response contexts; false for {NullResponseContext}.
|
|
45
|
+
def present?
|
|
46
|
+
true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Run the Layer 1 blocked-table gate against the arguments a tool was
|
|
50
|
+
# invoked with. Tools may arrive at tables through five different
|
|
51
|
+
# arg shapes — SQL string, model name, raw table, joined associations,
|
|
52
|
+
# or a single association name — so the gate checks every variant that's
|
|
53
|
+
# present. A no-op when the gate is nil.
|
|
54
|
+
#
|
|
55
|
+
# @param args [Hash] Tool arguments (symbol keys from MCP dispatch)
|
|
56
|
+
# @raise [TableGateError] if any referenced identifier is blocked
|
|
57
|
+
def enforce!(args) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
58
|
+
return unless @table_gate
|
|
59
|
+
|
|
60
|
+
@table_gate.check_sql!(args[:sql]) if args[:sql]
|
|
61
|
+
@table_gate.check_model!(args[:model]) if args[:model]
|
|
62
|
+
@table_gate.check_table!(args[:table]) if args[:table]
|
|
63
|
+
@table_gate.check_joins!(args[:model], args[:joins]) if args[:model] && args[:joins]
|
|
64
|
+
return unless args[:model] && args[:association]
|
|
65
|
+
|
|
66
|
+
@table_gate.check_association!(args[:model], args[:association])
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Apply Layer 3 (column + EAV) redaction to a result value. Shape-aware —
|
|
70
|
+
# see {Redactor.apply} for the supported envelope keys.
|
|
71
|
+
#
|
|
72
|
+
# @param result [Object]
|
|
73
|
+
# @return [Object] Redacted result, same shape as input.
|
|
74
|
+
def redact(result)
|
|
75
|
+
return result unless @safe_ctx
|
|
76
|
+
|
|
77
|
+
Redactor.apply(result, @safe_ctx)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Run Layer 2 (credential scanner) over a value and return the scanned
|
|
81
|
+
# form alongside any hit counts. Callers decide whether to log the
|
|
82
|
+
# counts — the context deliberately does not assume access to a logger.
|
|
83
|
+
#
|
|
84
|
+
# @param value [Object]
|
|
85
|
+
# @return [Array(Object, Hash)] [scanned_value, counts_by_pattern]
|
|
86
|
+
def scan(value)
|
|
87
|
+
return [value, {}] unless @credential_scanner
|
|
88
|
+
|
|
89
|
+
@credential_scanner.scan(value)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Null variant returned by {ResponseContext.build} when every layer is
|
|
94
|
+
# absent. Exposes the same public surface so callers never need &.nil-guards.
|
|
95
|
+
class NullResponseContext
|
|
96
|
+
include Singleton
|
|
97
|
+
|
|
98
|
+
def present?
|
|
99
|
+
false
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def safe_ctx
|
|
103
|
+
nil
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def table_gate
|
|
107
|
+
nil
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def credential_scanner
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def enforce!(_args)
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def redact(result)
|
|
119
|
+
result
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def scan(value)
|
|
123
|
+
[value, {}]
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -13,70 +13,267 @@ module Woods
|
|
|
13
13
|
#
|
|
14
14
|
# Safety layers:
|
|
15
15
|
# - Every query runs inside a transaction that is always rolled back
|
|
16
|
-
# - Statement timeout
|
|
16
|
+
# - Statement timeout uses `SET LOCAL` so it cannot leak to the next
|
|
17
|
+
# pool consumer
|
|
17
18
|
# - Column redaction replaces sensitive values with "[REDACTED]"
|
|
18
19
|
#
|
|
19
|
-
#
|
|
20
|
+
# == What SafeContext does NOT cover
|
|
21
|
+
#
|
|
22
|
+
# The rolled-back transaction is a strong guardrail but not absolute.
|
|
23
|
+
# Known escape paths — callers of {#execute} should assume anything
|
|
24
|
+
# below is effectively live:
|
|
25
|
+
# - `ActiveJob` / `ActionMailer` async deliveries. Earlier versions
|
|
26
|
+
# tried to swap the global queue_adapter/delivery_method to `:test`
|
|
27
|
+
# for the block's duration, but those settings are process-wide
|
|
28
|
+
# class state: in a Puma worker serving both the host app and the
|
|
29
|
+
# Console MCP, a concurrent host request would briefly see the
|
|
30
|
+
# test adapter and silently drop real jobs / mail. We now leave
|
|
31
|
+
# them alone — treat callback-triggered enqueues / deliveries as
|
|
32
|
+
# live.
|
|
33
|
+
# - `after_rollback` callbacks (fire on rollback, can still enqueue
|
|
34
|
+
# jobs or call external services).
|
|
35
|
+
# - `Thread.new` / `Fiber.new` inside the block — they lease a fresh
|
|
36
|
+
# connection outside the transaction.
|
|
37
|
+
# - Direct HTTP egress (Net::HTTP, Faraday, HTTP gem, ...).
|
|
38
|
+
# - File I/O / shell-outs initiated from within AR callbacks.
|
|
39
|
+
# - Writes through a different pool or shard than the one this
|
|
40
|
+
# SafeContext was built with.
|
|
41
|
+
# - `raw_connection.execute` on some adapters when the adapter's
|
|
42
|
+
# transaction bookkeeping is out-of-band.
|
|
43
|
+
#
|
|
44
|
+
# Treat SafeContext as "rolls back the database", not "prevents every
|
|
45
|
+
# side effect" — operators must still apply the upstream defenses
|
|
46
|
+
# (TableGate, SqlValidator, EvalGuard, BearerAuth).
|
|
47
|
+
#
|
|
48
|
+
# Two construction modes are supported:
|
|
49
|
+
#
|
|
50
|
+
# - `connection:` — wraps the supplied connection in a single-use pool
|
|
51
|
+
# adapter so the execution path is identical to the `pool:` form.
|
|
52
|
+
# Useful in tests and for callers that already manage their own
|
|
53
|
+
# connection lifecycle (e.g. bridge mode, `exe/woods-console`).
|
|
54
|
+
# - `pool:` — each call to {#execute} leases a fresh connection via
|
|
55
|
+
# `pool.with_connection { |c| ... }`, so the connection is returned
|
|
56
|
+
# to the pool immediately after the block. The leased connection is
|
|
57
|
+
# also exposed via `Thread.current[:woods_console_leased_connection]`
|
|
58
|
+
# so dispatch handlers (e.g. EmbeddedExecutor#active_connection) can
|
|
59
|
+
# reuse the same connection without re-leasing.
|
|
60
|
+
#
|
|
61
|
+
# In both forms the connection is resolved *per {#execute} call* —
|
|
62
|
+
# SafeContext never holds a connection ivar. This is the key invariant
|
|
63
|
+
# for multi-DB / sharded hosts: if you supply a shard pool (or shard
|
|
64
|
+
# connection), the rolled-back transaction is opened on that shard's
|
|
65
|
+
# connection, not on the default pool.
|
|
66
|
+
#
|
|
67
|
+
# @example connection: form
|
|
20
68
|
# ctx = SafeContext.new(connection: conn, timeout_ms: 5000, redacted_columns: %w[ssn])
|
|
21
69
|
# ctx.execute { |c| c.execute("SELECT count(*) FROM users") }
|
|
22
70
|
#
|
|
71
|
+
# @example pool: form (per-request lease)
|
|
72
|
+
# ctx = SafeContext.new(pool: ActiveRecord::Base.connection_pool)
|
|
73
|
+
# ctx.execute { |c| c.select_all("SELECT count(*) FROM users") }
|
|
74
|
+
#
|
|
75
|
+
# @example Shard pool — rollback covers the shard
|
|
76
|
+
# shard_pool = ShardedModel.connection_pool
|
|
77
|
+
# ctx = SafeContext.new(pool: shard_pool)
|
|
78
|
+
# ctx.execute { |c| c.select_all("SELECT * FROM shard_table") }
|
|
79
|
+
#
|
|
80
|
+
# @example Key-value (EAV) redaction
|
|
81
|
+
# ctx = SafeContext.new(
|
|
82
|
+
# connection: conn,
|
|
83
|
+
# redacted_key_values: [
|
|
84
|
+
# { key_column: 'key', value_column: 'value',
|
|
85
|
+
# sensitive_keys: %w[stripe_access_token oauth_token] }
|
|
86
|
+
# ]
|
|
87
|
+
# )
|
|
88
|
+
#
|
|
23
89
|
class SafeContext
|
|
24
|
-
#
|
|
90
|
+
# Thread-local key that exposes the connection currently leased for
|
|
91
|
+
# the in-flight #execute block. Handlers should prefer this over
|
|
92
|
+
# acquiring their own connection so every request stays on a single
|
|
93
|
+
# leased connection inside the rolled-back transaction.
|
|
94
|
+
LEASED_CONNECTION_KEY = :woods_console_leased_connection
|
|
95
|
+
|
|
96
|
+
# Thin adapter that makes a bare connection look like a connection pool
|
|
97
|
+
# with a `with_connection` interface. Used internally when callers pass
|
|
98
|
+
# `connection:` so that {#execute} always flows through a single code
|
|
99
|
+
# path regardless of construction form.
|
|
100
|
+
#
|
|
101
|
+
# @api private
|
|
102
|
+
SingleConnectionPool = Struct.new(:connection) do
|
|
103
|
+
# @yield [Object] the wrapped connection
|
|
104
|
+
def with_connection(&block)
|
|
105
|
+
block.call(connection)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @return [Array<String>] Column names whose values are replaced with "[REDACTED]"
|
|
110
|
+
attr_reader :redacted_columns
|
|
111
|
+
|
|
112
|
+
# @return [Array<Hash>] Normalized EAV redaction patterns. Each entry has
|
|
113
|
+
# string keys: 'key_column', 'value_column', 'sensitive_keys'.
|
|
114
|
+
attr_reader :redacted_key_values
|
|
115
|
+
|
|
116
|
+
# @param connection [Object, nil] Database connection (or mock).
|
|
117
|
+
# Mutually exclusive with `pool:` — pass one or the other (or
|
|
118
|
+
# neither, if this SafeContext is only being used for #redact).
|
|
119
|
+
# The connection is wrapped in {SingleConnectionPool} so execution
|
|
120
|
+
# always flows through `pool.with_connection`.
|
|
121
|
+
# @param pool [#with_connection, nil] Connection pool to lease from
|
|
122
|
+
# per request. Each {#execute} call wraps `pool.with_connection`.
|
|
25
123
|
# @param timeout_ms [Integer] Statement timeout in milliseconds
|
|
26
124
|
# @param redacted_columns [Array<String>] Column names whose values should be redacted
|
|
27
|
-
|
|
28
|
-
|
|
125
|
+
# @param redacted_key_values [Array<Hash>] EAV-style redaction patterns.
|
|
126
|
+
# Each pattern: {key_column: 'key', value_column: 'value',
|
|
127
|
+
# sensitive_keys: %w[stripe_access_token ...]}. When a row's
|
|
128
|
+
# `key_column` cell matches one of `sensitive_keys`, the same row's
|
|
129
|
+
# `value_column` cell is replaced with "[REDACTED]".
|
|
130
|
+
def initialize(connection: nil, pool: nil, timeout_ms: 5000,
|
|
131
|
+
redacted_columns: [], redacted_key_values: [])
|
|
132
|
+
@pool = pool || (connection && SingleConnectionPool.new(connection))
|
|
29
133
|
@timeout_ms = timeout_ms
|
|
30
134
|
@redacted_columns = redacted_columns.map(&:to_s)
|
|
135
|
+
@redacted_key_values = normalize_key_value_patterns(redacted_key_values)
|
|
31
136
|
end
|
|
32
137
|
|
|
33
138
|
# Execute a block within a rolled-back transaction with statement timeout.
|
|
34
139
|
#
|
|
35
140
|
# The transaction is always rolled back to ensure read-only behavior.
|
|
141
|
+
# A fresh connection is leased from the pool on every call via
|
|
142
|
+
# `pool.with_connection`. The leased connection is published as
|
|
143
|
+
# `Thread.current[LEASED_CONNECTION_KEY]` for the duration of the
|
|
144
|
+
# block and cleared in `ensure` (even on exceptions) so dispatch
|
|
145
|
+
# handlers can pick it up without re-leasing.
|
|
36
146
|
#
|
|
37
147
|
# @yield [connection] The database connection
|
|
38
148
|
# @return [Object] The block's return value
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
149
|
+
# @raise [ArgumentError] when neither `connection:` nor `pool:` was
|
|
150
|
+
# supplied at construction time. Deferred to #execute so callers
|
|
151
|
+
# that only use #redact can construct with neither.
|
|
152
|
+
def execute(&block)
|
|
153
|
+
raise ArgumentError, 'SafeContext#execute requires connection: or pool: at construction time' unless @pool
|
|
154
|
+
|
|
155
|
+
# NOTE: on async side effects: earlier iterations of SafeContext
|
|
156
|
+
# tried to swap `ActiveJob::Base.queue_adapter` / `ActionMailer::Base
|
|
157
|
+
# .delivery_method` to `:test` for the duration of this block.
|
|
158
|
+
# That's unsafe — those settings are process-wide class state, so
|
|
159
|
+
# any concurrent request served by the SAME Puma worker (the host
|
|
160
|
+
# app running alongside the Console MCP) would race and briefly
|
|
161
|
+
# see the test adapter, silently dropping real jobs and mail.
|
|
162
|
+
# The gap is documented in the class docstring instead; operators
|
|
163
|
+
# must treat callback-triggered enqueues / deliveries as live.
|
|
164
|
+
@pool.with_connection { |conn| run_with_timeout(conn, &block) }
|
|
47
165
|
end
|
|
48
166
|
|
|
49
167
|
# Replace values of redacted columns with "[REDACTED]".
|
|
50
168
|
#
|
|
169
|
+
# Runs column-name redaction first, then EAV key-value redaction — a row
|
|
170
|
+
# like {key: "stripe_access_token", value: "sk_live_..."} has its `value`
|
|
171
|
+
# column replaced when "stripe_access_token" is in the sensitive_keys
|
|
172
|
+
# list, regardless of whether `value` itself is in redacted_columns.
|
|
173
|
+
#
|
|
51
174
|
# @param hash [Hash] Record attributes
|
|
52
175
|
# @param _model_name [String] Model name (reserved for per-model redaction rules)
|
|
53
176
|
# @return [Hash] Redacted copy of the hash
|
|
54
177
|
def redact(hash, _model_name = nil)
|
|
55
|
-
return hash if @redacted_columns.empty?
|
|
178
|
+
return hash if @redacted_columns.empty? && @redacted_key_values.empty?
|
|
56
179
|
|
|
57
|
-
hash.transform_keys(&:to_s).each_with_object({}) do |(key, value),
|
|
58
|
-
|
|
180
|
+
redacted = hash.transform_keys(&:to_s).each_with_object({}) do |(key, value), out|
|
|
181
|
+
out[key] = @redacted_columns.include?(key) ? '[REDACTED]' : value
|
|
59
182
|
end
|
|
183
|
+
apply_key_value_redaction(redacted)
|
|
60
184
|
end
|
|
61
185
|
|
|
62
186
|
private
|
|
63
187
|
|
|
188
|
+
# Wrap one connection in a rolled-back transaction with timeout, and
|
|
189
|
+
# publish it via Thread.current so handlers can reuse it. Always
|
|
190
|
+
# clears the thread-local in ensure so a raise mid-block cannot leak
|
|
191
|
+
# a stale connection reference into the next request on this thread.
|
|
192
|
+
def run_with_timeout(connection)
|
|
193
|
+
previous = Thread.current[LEASED_CONNECTION_KEY]
|
|
194
|
+
Thread.current[LEASED_CONNECTION_KEY] = connection
|
|
195
|
+
result = nil
|
|
196
|
+
connection.transaction do
|
|
197
|
+
set_timeout(connection)
|
|
198
|
+
result = yield(connection)
|
|
199
|
+
raise ActiveRecord::Rollback
|
|
200
|
+
end
|
|
201
|
+
result
|
|
202
|
+
ensure
|
|
203
|
+
Thread.current[LEASED_CONNECTION_KEY] = previous
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def normalize_key_value_patterns(patterns)
|
|
207
|
+
Array(patterns).filter_map { |pattern| normalize_pattern(pattern) }
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def normalize_pattern(pattern)
|
|
211
|
+
key_col = fetch_pattern_string(pattern, :key_column)
|
|
212
|
+
val_col = fetch_pattern_string(pattern, :value_column)
|
|
213
|
+
sensitive = Array(pattern[:sensitive_keys] || pattern['sensitive_keys']).map(&:to_s)
|
|
214
|
+
return if key_col.nil? || val_col.nil? || sensitive.empty?
|
|
215
|
+
|
|
216
|
+
{ 'key_column' => key_col, 'value_column' => val_col, 'sensitive_keys' => sensitive }
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def fetch_pattern_string(pattern, key)
|
|
220
|
+
(pattern[key] || pattern[key.to_s])&.to_s
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def apply_key_value_redaction(hash)
|
|
224
|
+
@redacted_key_values.each do |pattern|
|
|
225
|
+
key_col = pattern['key_column']
|
|
226
|
+
val_col = pattern['value_column']
|
|
227
|
+
next unless hash.key?(key_col) && hash.key?(val_col)
|
|
228
|
+
next unless pattern['sensitive_keys'].include?(hash[key_col].to_s)
|
|
229
|
+
|
|
230
|
+
hash[val_col] = '[REDACTED]'
|
|
231
|
+
end
|
|
232
|
+
hash
|
|
233
|
+
end
|
|
234
|
+
|
|
64
235
|
# Set statement timeout on the connection.
|
|
65
236
|
#
|
|
66
|
-
# PostgreSQL uses SET statement_timeout
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
|
|
237
|
+
# PostgreSQL uses `SET LOCAL statement_timeout` so the setting is
|
|
238
|
+
# scoped to the surrounding transaction and is automatically
|
|
239
|
+
# discarded on rollback — without LOCAL the timeout would persist on
|
|
240
|
+
# the pooled connection and bleed into the next consumer (host app
|
|
241
|
+
# request, background job, etc.). Safe here because every #execute
|
|
242
|
+
# is wrapped in a transaction.
|
|
243
|
+
#
|
|
244
|
+
# MySQL uses `SET max_execution_time` (applies to SELECT only — DDL
|
|
245
|
+
# and DML statements cannot be time-limited via this variable).
|
|
246
|
+
def set_timeout(connection, timeout_ms = @timeout_ms)
|
|
70
247
|
adapter = connection.adapter_name.downcase
|
|
71
248
|
if adapter.include?('mysql')
|
|
72
249
|
connection.execute("SET max_execution_time = #{timeout_ms.to_i}")
|
|
73
250
|
else
|
|
74
|
-
connection.execute("SET statement_timeout = '#{timeout_ms.to_i}ms'")
|
|
251
|
+
connection.execute("SET LOCAL statement_timeout = '#{timeout_ms.to_i}ms'")
|
|
75
252
|
end
|
|
76
|
-
rescue StandardError
|
|
77
|
-
# Unsupported adapter
|
|
253
|
+
rescue StandardError => e
|
|
254
|
+
# Unsupported adapter (SQLite, Trilogy on unsupported version, Oracle) —
|
|
255
|
+
# timeout enforcement is best-effort, but operators need to know their
|
|
256
|
+
# rollback fence is narrower than advertised. Log once per adapter via
|
|
257
|
+
# Rails.logger when available; otherwise swallow as before.
|
|
258
|
+
warn_timeout_unsupported(adapter, e)
|
|
78
259
|
nil
|
|
79
260
|
end
|
|
261
|
+
|
|
262
|
+
def warn_timeout_unsupported(adapter, error)
|
|
263
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
264
|
+
|
|
265
|
+
@warned_adapters ||= {}
|
|
266
|
+
return if @warned_adapters[adapter]
|
|
267
|
+
|
|
268
|
+
@warned_adapters[adapter] = true
|
|
269
|
+
Rails.logger.warn(
|
|
270
|
+
'[Woods::Console::SafeContext] statement timeout not supported on ' \
|
|
271
|
+
"adapter #{adapter.inspect}: #{error.class}: #{error.message}. " \
|
|
272
|
+
'Queries will run without a per-statement time limit.'
|
|
273
|
+
)
|
|
274
|
+
rescue StandardError
|
|
275
|
+
# Last-resort swallow — never let telemetry failure break execution.
|
|
276
|
+
end
|
|
80
277
|
end
|
|
81
278
|
end
|
|
82
279
|
end
|