rack-libinjection 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ci.yml +55 -0
  3. data/CHANGELOG.md +112 -0
  4. data/GET_STARTED.md +418 -0
  5. data/LICENSE-libinjection.txt +33 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +68 -0
  8. data/SECURITY.md +65 -0
  9. data/ext/libinjection/extconf.rb +113 -0
  10. data/ext/libinjection/libinjection_ext.c +1132 -0
  11. data/ext/libinjection/vendor/libinjection/.vendored +5 -0
  12. data/ext/libinjection/vendor/libinjection/COPYING +33 -0
  13. data/ext/libinjection/vendor/libinjection/MIGRATION.md +393 -0
  14. data/ext/libinjection/vendor/libinjection/README.md +251 -0
  15. data/ext/libinjection/vendor/libinjection/src/libinjection.h +70 -0
  16. data/ext/libinjection/vendor/libinjection/src/libinjection_error.h +26 -0
  17. data/ext/libinjection/vendor/libinjection/src/libinjection_html5.c +830 -0
  18. data/ext/libinjection/vendor/libinjection/src/libinjection_html5.h +56 -0
  19. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.c +2342 -0
  20. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.h +297 -0
  21. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli_data.h +9651 -0
  22. data/ext/libinjection/vendor/libinjection/src/libinjection_xss.c +1203 -0
  23. data/ext/libinjection/vendor/libinjection/src/libinjection_xss.h +23 -0
  24. data/lib/libinjection/version.rb +6 -0
  25. data/lib/libinjection.rb +31 -0
  26. data/lib/rack/libinjection.rb +586 -0
  27. data/lib/rack-libinjection.rb +3 -0
  28. data/samples/README.md +67 -0
  29. data/samples/libinjection_detect_raw_hot_path.rb +161 -0
  30. data/samples/rack_all_surfaces_hot_path.rb +198 -0
  31. data/samples/rack_params_hot_path.rb +166 -0
  32. data/samples/rack_query_hot_path.rb +176 -0
  33. data/samples/results/.gitkeep +0 -0
  34. data/script/fuzz_smoke.rb +39 -0
  35. data/script/vendor_libs.rb +227 -0
  36. data/test/test_helper.rb +7 -0
  37. data/test/test_libinjection.rb +223 -0
  38. data/test/test_middleware.rb +404 -0
  39. metadata +148 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0203a3f1b8a3447818e725857364461f8f62632cb8f466b6c42178948a02f93d
4
+ data.tar.gz: 84d19b19a25a44987efdd3486d6896efad15e9c9d1ca715431bd612f4b0a580a
5
+ SHA512:
6
+ metadata.gz: 24a29c7bb6ec739b8f35170a721978b55fe2e2b4cbbdfe03c1fde47b8513c3e8b3eb0d5da2274e6cf770533e96d4e0c545c45dbbb62c916f09bcd02601ed211b
7
+ data.tar.gz: 57115bdc3838e5a5d47aaaf7a8e55f0b22a005dcee8042b0f71778a2f29a2a44c3c4bc8d7d8d7ca390694ddd83b831d15f178a9629fa86c1f32e18e6fc054531
@@ -0,0 +1,55 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches: [main]
7
+
8
+ jobs:
9
+ vendor-verify:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: ruby/setup-ruby@v1
14
+ with:
15
+ ruby-version: "3.4"
16
+ bundler-cache: true
17
+ - run: bundle exec rake vendor
18
+ - run: bundle exec rake vendor:verify
19
+
20
+ test:
21
+ needs: vendor-verify
22
+ runs-on: ${{ matrix.os }}
23
+ strategy:
24
+ fail-fast: false
25
+ matrix:
26
+ os: [ubuntu-latest, macos-latest]
27
+ ruby: ["3.3", "3.4", "4.0"]
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+ - uses: ruby/setup-ruby@v1
31
+ with:
32
+ ruby-version: ${{ matrix.ruby }}
33
+ bundler-cache: true
34
+ - run: bundle exec rake vendor
35
+ - run: bundle exec rake compile
36
+ - run: bundle exec rake test
37
+
38
+ sanitizers:
39
+ needs: vendor-verify
40
+ runs-on: ubuntu-latest
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+ - uses: ruby/setup-ruby@v1
44
+ with:
45
+ ruby-version: "3.4"
46
+ bundler-cache: true
47
+ - run: bundle exec rake vendor
48
+ - run: LIBINJECTION_SANITIZE=1 bundle exec rake compile
49
+ - name: Run ASan/UBSan smoke
50
+ env:
51
+ ASAN_OPTIONS: detect_leaks=0:halt_on_error=1:abort_on_error=1
52
+ UBSAN_OPTIONS: halt_on_error=1:abort_on_error=1
53
+ run: |
54
+ export LD_PRELOAD="$(gcc -print-file-name=libasan.so)"
55
+ LIBINJECTION_SANITIZE=1 bundle exec rake security:smoke
data/CHANGELOG.md ADDED
@@ -0,0 +1,112 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ ### Security / hardening
6
+
7
+ - Added explicit Rack middleware parser-error policy. `parser_errors: :auto` now reports native libinjection and known Rack parameter/cookie parser errors in report mode and fails closed in block mode; `:report`, `:block`, and `:raise` are available for explicit behavior.
8
+ - Added explicit skipped-input policy. `skipped_inputs: :auto` reports `max_value_bytes` / `max_depth` skips in report mode and fails closed in block mode; `:report`, `:block`, and `:allow` are available for explicit behavior.
9
+ - Narrowed the default ignored params to `authenticity_token`; sensitive values such as `password` are scanned by default while raw values remain absent from notifications.
10
+ - Public low-level scans that release the GVL now copy large input strings into
11
+ a temporary C buffer before scanning. This avoids reading directly from a Ruby
12
+ `String` buffer while another Ruby thread could mutate/reallocate it.
13
+ - `RB_GC_GUARD` placement and native scan cleanup were tightened. Public
14
+ low-level scan methods now release copied C buffers through `rb_ensure`, so
15
+ async Ruby exceptions cannot skip native buffer cleanup.
16
+ - SQLi fingerprint buffer sizing is now tied to the vendored libinjection
17
+ struct field instead of a standalone magic number.
18
+ - Added native edge-case tests for empty strings, nil input, invalid UTF-8,
19
+ binary/null-byte payloads, UTF-16 byte input, large no-GVL inputs, and invalid
20
+ option values.
21
+ - Added `security:smoke` random/binary-input checks and an AddressSanitizer/UBSan
22
+ CI job.
23
+
24
+ ### Fixed
25
+
26
+ - `:path` scanning now checks individual path segments when the full path is not
27
+ classified. This keeps SQLi-like path payloads such as
28
+ `/items/1 OR 1=1--` detectable instead of letting the route prefix hide the
29
+ payload from libinjection. Percent-encoded path values are decoded up to
30
+ `path_decode_depth` times for this fallback; the default is `2`.
31
+ - Empty attack lists stored in `rack.libinjection.attacks` are now mutable per
32
+ request instead of a shared frozen array.
33
+ - Header scanning now supports `ignore_headers` and skips common low-signal
34
+ protocol/browser headers by default.
35
+ - Cookie name scanning is disabled by default and can be enabled with
36
+ `scan_cookie_names: true`; cookie values are still scanned when `:cookies` is
37
+ enabled.
38
+
39
+ ### Performance
40
+
41
+ - Added a native URL-decoded scan primitive, `LibInjection.detect_url_encoded_raw(input, depth, plus_as_space, threat_mask)`, used by Rack `:query` and `:path` surfaces. It scans raw, decoded-once, and decoded-twice variants without Ruby `gsub`, Oniguruma, or intermediate decoded Ruby strings. Small decoded buffers now use stack allocation; heap allocation is reserved for larger decoded inputs.
42
+ - `scan: [:query]`, `scan: [:path]`, and `scan: [:query, :path, :headers]` can now run directly from the Rack env without constructing `Rack::Request`; `:params` and `:cookies` still use Rack parsers to preserve their semantics.
43
+ - Added `scan: [:query]`, a fast raw query-string surface that scans the query string and decoded variants without invoking Rack nested params parsing.
44
+ - Added `threats: [:sqli]` / `threats: [:xss]` middleware modes for deployments that intentionally want to skip one native detector on hot paths.
45
+ - Attack notifications and skipped/error notifications now build request metadata lazily and do no payload work when the notifier is the built-in no-op.
46
+ - Header name normalization no longer uses an unbounded per-middleware cache. Path/query decoding now stays in C and is skipped when the value has no URL-encoded candidate bytes.
47
+ - Added `LibInjection.detect_raw` — a single native primitive that runs SQLi
48
+ and XSS checks in one Ruby->C call and short-circuits XSS when SQLi is
49
+ already detected. Returns the minimum data needed (`nil`, `[:sqli, fp]`,
50
+ or `[:xss, nil]`) with no Result/Hash allocation.
51
+ - Native scans now release the GVL for inputs >= `LI_NOGVL_THRESHOLD` bytes
52
+ (default 1024). The extension uses `rb_nogvl(..., RB_NOGVL_OFFLOAD_SAFE)`
53
+ when the Ruby headers provide that flag, and falls back to
54
+ `rb_thread_call_without_gvl` on older headers.
55
+ - `Rack::LibInjection` middleware rewritten on top of a mutable accumulator:
56
+ no more per-level `flat_map`, no more `path + [key]` allocations, no more
57
+ intermediate hashes per match. Path keys are now built as plain `String`s.
58
+ - Middleware now uses `LibInjection.detect_raw` instead of `sqli_fingerprint`
59
+ + `xss?`, halving the number of Ruby->C boundaries per scanned string.
60
+
61
+ ### Middleware behavior
62
+
63
+ - Notification event names now use dotted namespaces:
64
+ - `rack.libinjection.attack`
65
+ - `rack.libinjection.error`
66
+ - `rack.libinjection.skipped`
67
+ - Supported scan locations are now explicit: `:query`, `:params`, `:path`, `:headers`,
68
+ and `:cookies`. Default remains `scan: [:params]`.
69
+ - Middleware emits skipped-input telemetry for values skipped because of
70
+ `max_value_bytes` or `max_depth` when `notify_skipped: true`; block mode fails closed for skipped input by default.
71
+ - Middleware no longer rescues `StandardError` around scanning. It catches
72
+ `LibInjection::ParserError` and known Rack parameter parsing errors, emits an
73
+ error event, and lets unrelated exceptions propagate.
74
+ - Notifier exceptions are isolated by default (`notifier_errors: :ignore`) so a
75
+ reporting subscriber cannot accidentally turn a request into a 500. Use
76
+ `notifier_errors: :raise` in tests/development when desired.
77
+ - Explicit `logger:` now wins over ActiveSupport auto-detection. Passing both
78
+ `logger:` and `notifier:` remains an `ArgumentError`.
79
+ - `Config.build` now validates scan locations and notifier error mode.
80
+ - Uploaded file names are scanned when upload objects expose `original_filename`
81
+ or `filename`; file contents are not scanned.
82
+
83
+ ### Native / vendoring
84
+
85
+ - Minimum Ruby version is **3.3**. Ruby versions whose headers do not expose
86
+ `RB_NOGVL_OFFLOAD_SAFE` still build through the classic no-GVL fallback; they
87
+ just do not get the Fiber Scheduler offload-safe hint.
88
+ - System libinjection mode now verifies that the runtime libinjection version is
89
+ exactly `4.0.0`, because this binding uses v4 diagnostic structs and token
90
+ fields.
91
+ - The vendored manifest no longer includes a self-hash. Verification now relies
92
+ on the pinned upstream archive SHA-256 plus local tree checksum.
93
+ - The vendor script now downloads with `Net::HTTP`, explicit timeouts, retry
94
+ handling, redirect handling, and streaming writes instead of `URI.open` and
95
+ `remote.read`.
96
+ - `extconf.rb` no longer auto-downloads vendored native sources during build; missing vendored sources fail with an explicit instruction to run `script/vendor_libs.rb`.
97
+
98
+ ### Docs / project hygiene
99
+
100
+ - Added `SECURITY.md` for private vulnerability reporting.
101
+ - README now states the scan surface and JSON-body limitation at the top.
102
+ - GET_STARTED now includes a threat model, skipped input behavior, PII notes,
103
+ GVL/offload notes, notifier hot-path warning, and system-library version guard.
104
+ - Added explicit BSD-3 attribution for vendored libinjection.
105
+
106
+ ## 0.1.0
107
+
108
+ - Initial native binding for libinjection v4.0.0.
109
+ - Added `LibInjection.sqli?`, `LibInjection.sqli_fingerprint`, `LibInjection.xss?`, and `LibInjection.detect`.
110
+ - Added table-driven diagnostic native API: `sqli_result`, `sqli_contexts`, `sqli_tokens`, `sqli_fingerprint_for`, `xss_result`, `xss_contexts`, and `html5_tokens`.
111
+ - Added `Rack::LibInjection` middleware in report/block/off modes.
112
+ - Added pinned `script/vendor_libs.rb` vendoring workflow.
data/GET_STARTED.md ADDED
@@ -0,0 +1,418 @@
1
+ # Get started
2
+
3
+ ## Install
4
+
5
+ Add the gem to your `Gemfile`:
6
+
7
+ ```ruby
8
+ # Gemfile
9
+ gem "rack-libinjection"
10
+ ```
11
+
12
+ The published gem should ship vendored `libinjection` sources, so users install
13
+ it with a normal `bundle install`.
14
+
15
+ For source checkouts, vendor pinned upstream sources explicitly before building.
16
+ The extension does not auto-download native code during `extconf.rb`:
17
+
18
+ ```bash
19
+ bundle install
20
+ bundle exec rake vendor
21
+ bundle exec rake compile
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## Rails middleware
26
+
27
+ Start in report-only mode:
28
+
29
+ ```ruby
30
+ # config/application.rb
31
+ config.middleware.use Rack::LibInjection, mode: :report
32
+ ```
33
+
34
+ Full configuration example:
35
+
36
+ ```ruby
37
+ # config/application.rb
38
+ config.middleware.use Rack::LibInjection,
39
+ mode: :report,
40
+ scan: [:params],
41
+ threats: [:sqli, :xss],
42
+ max_value_bytes: 8192,
43
+ max_depth: 8,
44
+ path_decode_depth: 2,
45
+ ignore_params: %w[authenticity_token],
46
+ ignore_headers: Rack::LibInjection::DEFAULT_IGNORE_HEADERS,
47
+ scan_cookie_names: false,
48
+ parser_errors: :auto,
49
+ notify_skipped: true,
50
+ skipped_inputs: :auto,
51
+ notifier_errors: :ignore
52
+ ```
53
+
54
+ Start in `:report`; switch to `:block` only after observing false positives.
55
+
56
+ ### Modes
57
+
58
+ - `:report` — detect and notify, then continue request processing.
59
+ - `:block` — return `403` when at least one attack signal is detected. With the default `parser_errors: :auto` and `skipped_inputs: :auto`, Rack/native parser errors and limit-skipped input also fail closed.
60
+ - `:off` — no scanning.
61
+
62
+ The middleware stores attack records in:
63
+
64
+ ```ruby
65
+ request.env["rack.libinjection.attacks"]
66
+ ```
67
+
68
+ Each record has:
69
+
70
+ ```ruby
71
+ {
72
+ type: :sqli, # or :xss
73
+ location: :params, # :query, :params, :path, :headers, or :cookies
74
+ key: "search.q",
75
+ key_name: false, # true when the signal came from a param/cookie key
76
+ fingerprint: "s&1UE", # SQLi only
77
+ bytes: 23
78
+ }
79
+ ```
80
+
81
+ ## Scanning model
82
+
83
+ The default scan surface is deliberately narrow:
84
+
85
+ ```ruby
86
+ scan: [:params]
87
+ ```
88
+
89
+ Supported scan locations:
90
+
91
+ - `:query` — raw Rack query string plus decoded variants. This is the fastest WAF-style signal surface and avoids Rack nested params parsing; URL decoding is performed in the native extension to avoid Ruby `gsub`/regex allocation, but this surface does not provide semantic per-param keys;
92
+ - `:params` — parsed Rack params, including query/form params as provided by Rack;
93
+ - `:path` — `request.path`, with a segment fallback so `/items/1 OR 1=1--` is scanned as both the full path and individual path segments. Percent-encoded path values are decoded inside the native extension up to `path_decode_depth` times for detection; the default is `2` to catch common double-encoding attempts.
94
+ - `:headers` — Rack HTTP headers from `env`, except names in `ignore_headers`;
95
+ - `:cookies` — parsed Rack cookie values. Cookie names are skipped by default and can be enabled with `scan_cookie_names: true`.
96
+
97
+ The middleware scans predictable, bounded input only:
98
+
99
+ - string values inside parsed params, cookies, and non-ignored headers;
100
+ - string keys inside params hashes;
101
+ - multipart upload filenames when an upload object exposes `original_filename`
102
+ or `filename`;
103
+ - values up to `max_value_bytes`;
104
+ - nested structures up to `max_depth`;
105
+ - ignored params and ignored headers are skipped by exact normalized name. The default ignored param list is intentionally narrow (`authenticity_token` only); sensitive values such as `password` are scanned, but raw values are not emitted in attack notifications.
106
+
107
+ Threat classes are configurable:
108
+
109
+ ```ruby
110
+ threats: [:sqli, :xss] # default
111
+ threats: [:sqli] # skip the XSS state machine on clean strings
112
+ threats: [:xss] # skip SQLi fingerprinting
113
+ ```
114
+
115
+ Use single-threat mode only when the missing class is covered elsewhere. The main performance reason is that clean strings otherwise run both SQLi fingerprinting and the XSS state machine.
116
+
117
+ For raw query/path scanning, `path_decode_depth: 0` scans only the raw value. Depth `1` adds one percent-decoded pass, and depth `2` adds a second pass for common double-encoding attempts. The value is hard-capped at `Rack::LibInjection::MAX_PATH_DECODE_DEPTH` to avoid accidental CPU-heavy configurations. Query decoding treats `+` as a space; path decoding keeps `+` literal. Malformed percent escapes are kept literal inside the native decoder instead of raising through Ruby `gsub`/regex machinery.
118
+
119
+ Header scanning is intentionally noisy if every header is scanned. By default the
120
+ middleware ignores low-signal protocol/browser headers such as `Accept`, `Host`,
121
+ `Content-Type`, and `Content-Length`. Override `ignore_headers: []` if you really
122
+ want every Rack header scanned.
123
+
124
+ Cookie names are also skipped by default because names are often application
125
+ controlled and can produce false positives (`session_or_token`, feature flags,
126
+ etc.). Enable `scan_cookie_names: true` only if cookie names are attacker
127
+ controlled or security-relevant in your application.
128
+
129
+ There is deliberately no hidden “lazy suspicious prefilter” before
130
+ libinjection. That avoids creating a second bypass-prone mini-WAF layer.
131
+
132
+ ### Parser error policy
133
+
134
+ Vendored libinjection v4 returns parser errors instead of aborting. The Rack middleware treats those errors explicitly:
135
+
136
+ ```ruby
137
+ parser_errors: :auto # default: report in :report mode, block in :block mode
138
+ parser_errors: :report # notify and allow
139
+ parser_errors: :block # notify and return 403
140
+ parser_errors: :raise # re-raise LibInjection::ParserError
141
+ ```
142
+
143
+ For blocking deployments, `:auto` fails closed: native libinjection parser errors and known Rack parameter/cookie parser errors are treated like blocked requests.
144
+
145
+
146
+ Raw JSON body scanning is intentionally not part of the current middleware.
147
+ Rack body rewind, large request bodies, multipart file contents, nested JSON
148
+ depth, and PII-safe logging need separate design. If your API is JSON-only,
149
+ keep this limitation visible in your threat model.
150
+
151
+ ### Skipped input notifications
152
+
153
+ Input skipped because of safety limits is observable through:
154
+
155
+ ```ruby
156
+ rack.libinjection.skipped
157
+ ```
158
+
159
+ The payload includes `reason`, `location`, `key`, `bytes`, and `limit` when
160
+ available. This is intentionally separate from attack notifications: a skipped
161
+ large/deep value is not an attack by itself, but it is useful telemetry when
162
+ someone tries to hide payloads behind configured limits.
163
+
164
+ Skipped input policy is explicit:
165
+
166
+ ```ruby
167
+ skipped_inputs: :auto # default: report in :report mode, block in :block mode
168
+ skipped_inputs: :report # notify and allow
169
+ skipped_inputs: :block # notify and return 403
170
+ skipped_inputs: :allow # allow silently
171
+ ```
172
+
173
+ Disable skipped telemetry if it is too noisy while keeping the same allow/block
174
+ policy:
175
+
176
+ ```ruby
177
+ config.middleware.use Rack::LibInjection, notify_skipped: false
178
+ ```
179
+
180
+ ## Notifications
181
+
182
+ If `ActiveSupport::Notifications` is loaded and no explicit `logger:` or
183
+ `notifier:` is provided, the middleware emits:
184
+
185
+ ```ruby
186
+ ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_name, _start, _finish, _id, payload|
187
+ Rails.logger.warn(payload.inspect)
188
+ end
189
+ ```
190
+
191
+ You can also pass a custom notifier:
192
+
193
+ ```ruby
194
+ config.middleware.use Rack::LibInjection,
195
+ notifier: ->(event, payload) { SecurityEvents.write(event, payload) }
196
+ ```
197
+
198
+ Explicit `logger:` wins over the ActiveSupport auto-detection:
199
+
200
+ ```ruby
201
+ config.middleware.use Rack::LibInjection, logger: Rails.logger
202
+ ```
203
+
204
+ `Rack::LibInjection` calls `notifier.call(...)` synchronously inside the
205
+ middleware. A slow subscriber slows down the entire request. Do not perform
206
+ blocking IO inside a subscriber. If you need to persist attack signals to Redis,
207
+ a database, or an external service, push to a background queue and process out
208
+ of band:
209
+
210
+ ```ruby
211
+ ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_, _, _, _, payload|
212
+ AttackEventJob.perform_later(payload)
213
+ end
214
+ ```
215
+
216
+ Notifier exceptions are ignored by default so a reporting hook cannot turn a
217
+ request into a 500. To fail closed during development or tests:
218
+
219
+ ```ruby
220
+ config.middleware.use Rack::LibInjection, notifier_errors: :raise
221
+ ```
222
+
223
+ The middleware also emits `rack.libinjection.error` events when Rack parameter
224
+ parsing or libinjection parser errors occur on the configured scan surface.
225
+ These are rare but useful to monitor.
226
+
227
+ ## Rack::Attack integration
228
+
229
+ Keep `rack-libinjection` in report mode and score IPs yourself:
230
+
231
+ ```ruby
232
+ class LibInjectionTracker
233
+ def self.record(ip, attack)
234
+ Rails.cache.increment("libinjection:#{ip}:score", 1, expires_in: 10.minutes)
235
+ end
236
+
237
+ def self.score(ip)
238
+ Rails.cache.read("libinjection:#{ip}:score").to_i
239
+ end
240
+ end
241
+
242
+ ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_name, _start, _finish, _id, payload|
243
+ LibInjectionTracker.record(payload[:ip], payload)
244
+ end
245
+
246
+ Rack::Attack.blocklist("libinjection repeat attackers") do |req|
247
+ LibInjectionTracker.score(req.ip) > 5
248
+ end
249
+ ```
250
+
251
+ ## Threat model and limitations
252
+
253
+ `rack-libinjection` is a signal layer. It helps identify suspicious payloads
254
+ that reached configured Rack surfaces. It is not the control that keeps SQL or
255
+ HTML safe.
256
+
257
+ Still required:
258
+
259
+ - ActiveRecord bind params / parameterized SQL;
260
+ - output escaping and CSP;
261
+ - authorization;
262
+ - rate limiting;
263
+ - upstream WAF/reverse-proxy controls where appropriate.
264
+
265
+ Known limitations:
266
+
267
+ - detection inherits upstream `libinjection` limits and bypasses;
268
+ - only ANSI/MySQL SQLi contexts are exposed by this binding's diagnostic API;
269
+ - the middleware scans what Rack has already parsed; path values are decoded up
270
+ to `path_decode_depth` times, but params/cookies/headers are not recursively
271
+ decoded by this gem;
272
+ - Unicode normalization is not performed; callers that need NFC/NFKC or
273
+ UTF-16/UTF-32 decoding must normalize before scanning;
274
+ - values beyond `max_value_bytes` and nested values deeper than `max_depth` are not scanned. In report mode they are reported as skipped input by default; in block mode they are blocked by default through `skipped_inputs: :auto`;
275
+ - JSON bodies are not scanned yet;
276
+ - path and IP in notification payloads can contain identifiers, so treat them as
277
+ operational/security telemetry, not PII-free analytics data.
278
+
279
+ ## Low-level API
280
+
281
+ ```ruby
282
+ require "libinjection"
283
+
284
+ LibInjection.sqli?("1 OR 1=1--")
285
+ # => true
286
+
287
+ LibInjection.sqli_fingerprint("1 OR 1=1--")
288
+ # => "..." or nil
289
+
290
+ LibInjection.xss?("<script>alert(1)</script>")
291
+ # => true / false
292
+
293
+ LibInjection.detect("1 OR 1=1--")
294
+ # => #<data LibInjection::Result type=:sqli, detected=true, fingerprint="...">
295
+ ```
296
+
297
+ ### Hot-path primitive
298
+
299
+ `detect_raw` is the low-allocation primitive used internally by the Rack
300
+ middleware. It returns the minimum amount of data needed to decide what to do,
301
+ without allocating a `Result` object.
302
+
303
+ ```ruby
304
+ LibInjection.detect_raw("1 OR 1=1--")
305
+ # => [:sqli, "s&1UE"]
306
+
307
+ LibInjection.detect_raw("<script>alert(1)</script>")
308
+ # => [:xss, nil]
309
+
310
+ LibInjection.detect_raw("hello")
311
+ # => nil
312
+ ```
313
+
314
+ ### Diagnostic API
315
+
316
+ ```ruby
317
+ LibInjection.sqli_result("1 OR 1=1--")
318
+ LibInjection.sqli_contexts("1 OR 1=1--")
319
+ LibInjection.sqli_tokens("1 OR 1=1--")
320
+ LibInjection.sqli_tokens("1 OR 1=1--", fold: true)
321
+ LibInjection.sqli_fingerprint_for("1 OR 1=1--", context: :none_ansi)
322
+
323
+ LibInjection.xss_result("<script>alert(1)</script>")
324
+ LibInjection.xss_contexts("<script>alert(1)</script>")
325
+ LibInjection.html5_tokens("<script>alert(1)</script>")
326
+ ```
327
+
328
+ Available native maps:
329
+
330
+ ```ruby
331
+ LibInjection::SQLI_CONTEXTS
332
+ LibInjection::SQLI_QUOTES
333
+ LibInjection::SQLI_DIALECTS
334
+ LibInjection::SQLI_TOKEN_TYPES
335
+ LibInjection::HTML5_CONTEXTS
336
+ LibInjection::XSS_CONTEXTS
337
+ LibInjection::HTML5_TOKEN_TYPES
338
+ ```
339
+
340
+ Parser errors from libinjection v4 are exposed as
341
+ `LibInjection::ParserError`.
342
+
343
+ ## Vendored libinjection
344
+
345
+ The vendor flow pins upstream C sources by archive SHA-256 and records a tree
346
+ checksum for local verification.
347
+
348
+ ```bash
349
+ ruby script/vendor_libs.rb --sync
350
+ ruby script/vendor_libs.rb --verify
351
+ ```
352
+
353
+ Pinned upstream:
354
+
355
+ - libinjection `v4.0.0`
356
+ - archive SHA-256: `a69d27e3d98608df89203c4e1c00c034fe0f8c723017e4088ab53ce3ff5a9129`
357
+
358
+ The script records a tree checksum in
359
+ `ext/libinjection/vendor/libinjection/.vendored`. This catches accidental local
360
+ changes to vendored files. It is not a cryptographic signature; the upstream
361
+ archive hash above is the external integrity pin.
362
+
363
+ ## System library mode
364
+
365
+ Default mode is vendored. For distro builds:
366
+
367
+ ```bash
368
+ bundle config build.rack-libinjection --use-system-libinjection
369
+ # or
370
+ LIBINJECTION_USE_SYSTEM=1 bundle install
371
+ ```
372
+
373
+ When system mode is used, the extension checks the runtime libinjection version
374
+ at load time and rejects versions other than `4.0.0`; the binding exposes
375
+ diagnostic structs and token fields that are tied to that upstream API.
376
+
377
+ ## Concurrency and the GVL
378
+
379
+ `libinjection` scans are pure CPU work over raw bytes. They do not allocate on
380
+ the heap, do not call into the Ruby C API, and do not touch global state. That
381
+ makes them a good candidate for releasing the GVL during a scan.
382
+
383
+ For inputs of at least `LI_NOGVL_THRESHOLD` bytes (default `1024`), the native
384
+ binding releases the GVL. When the Ruby headers provide
385
+ `RB_NOGVL_OFFLOAD_SAFE`, the binding passes that flag to `rb_nogvl(...)` so a
386
+ Fiber Scheduler can treat the scan as offload-safe. On older Ruby headers that
387
+ only provide the classic no-GVL API, the extension falls back to
388
+ `rb_thread_call_without_gvl(...)`: multi-thread Puma/Sidekiq still benefit, but
389
+ there is no scheduler offload hint.
390
+
391
+ Short inputs are scanned inline under the GVL. The fixed overhead of releasing
392
+ the GVL is larger than the scan itself on small payloads, so the threshold is
393
+ conservative and should be tuned with benchmarks for specific deployments. You
394
+ can override it at build time:
395
+
396
+ ```bash
397
+ bundle config build.rack-libinjection -- --with-cflags="-DLI_NOGVL_THRESHOLD=512"
398
+ ```
399
+
400
+ ### Memory safety contract
401
+
402
+ For public low-level scans that release the GVL, the native binding first
403
+ copies the input bytes into a temporary C buffer. That avoids reading from a
404
+ Ruby `String` buffer while another Ruby thread could mutate or reallocate the
405
+ same object. Short inputs are scanned under the GVL without a copy.
406
+
407
+ The binding scans bytes, not semantic text. It accepts `UTF-8` and
408
+ `ASCII-8BIT`/binary strings, including invalid byte sequences, as byte input. It
409
+ does not implicitly decode UTF-16/UTF-32 into UTF-8. Normalize or transcode
410
+ application text before calling the low-level API if your application accepts
411
+ non-UTF-8 encodings.
412
+
413
+ ## License notes
414
+
415
+ The Ruby gem is MIT-licensed. The vendored upstream `libinjection` sources are
416
+ BSD-3-Clause licensed; the upstream license is included as
417
+ `LICENSE-libinjection.txt` in the packaged gem and
418
+ `ext/libinjection/vendor/libinjection/COPYING` in source checkouts.
@@ -0,0 +1,33 @@
1
+ Copyright (c) 2012-2016, Nick Galbreath
2
+ Copyright (c) 2017-2024, libinjection Contributors
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are
7
+ met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright
13
+ notice, this list of conditions and the following disclaimer in the
14
+ documentation and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+
32
+ https://github.com/libinjection/libinjection
33
+ http://opensource.org/licenses/BSD-3-Clause
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Roman Haydarov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.