rack-libinjection 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +55 -0
- data/CHANGELOG.md +112 -0
- data/GET_STARTED.md +418 -0
- data/LICENSE-libinjection.txt +33 -0
- data/LICENSE.txt +21 -0
- data/README.md +68 -0
- data/SECURITY.md +65 -0
- data/ext/libinjection/extconf.rb +113 -0
- data/ext/libinjection/libinjection_ext.c +1132 -0
- data/ext/libinjection/vendor/libinjection/.vendored +5 -0
- data/ext/libinjection/vendor/libinjection/COPYING +33 -0
- data/ext/libinjection/vendor/libinjection/MIGRATION.md +393 -0
- data/ext/libinjection/vendor/libinjection/README.md +251 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection.h +70 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_error.h +26 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_html5.c +830 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_html5.h +56 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.c +2342 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.h +297 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_sqli_data.h +9651 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_xss.c +1203 -0
- data/ext/libinjection/vendor/libinjection/src/libinjection_xss.h +23 -0
- data/lib/libinjection/version.rb +6 -0
- data/lib/libinjection.rb +31 -0
- data/lib/rack/libinjection.rb +586 -0
- data/lib/rack-libinjection.rb +3 -0
- data/samples/README.md +67 -0
- data/samples/libinjection_detect_raw_hot_path.rb +161 -0
- data/samples/rack_all_surfaces_hot_path.rb +198 -0
- data/samples/rack_params_hot_path.rb +166 -0
- data/samples/rack_query_hot_path.rb +176 -0
- data/samples/results/.gitkeep +0 -0
- data/script/fuzz_smoke.rb +39 -0
- data/script/vendor_libs.rb +227 -0
- data/test/test_helper.rb +7 -0
- data/test/test_libinjection.rb +223 -0
- data/test/test_middleware.rb +404 -0
- metadata +148 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 0203a3f1b8a3447818e725857364461f8f62632cb8f466b6c42178948a02f93d
|
|
4
|
+
data.tar.gz: 84d19b19a25a44987efdd3486d6896efad15e9c9d1ca715431bd612f4b0a580a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 24a29c7bb6ec739b8f35170a721978b55fe2e2b4cbbdfe03c1fde47b8513c3e8b3eb0d5da2274e6cf770533e96d4e0c545c45dbbb62c916f09bcd02601ed211b
|
|
7
|
+
data.tar.gz: 57115bdc3838e5a5d47aaaf7a8e55f0b22a005dcee8042b0f71778a2f29a2a44c3c4bc8d7d8d7ca390694ddd83b831d15f178a9629fa86c1f32e18e6fc054531
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches: [main]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
vendor-verify:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: ruby/setup-ruby@v1
|
|
14
|
+
with:
|
|
15
|
+
ruby-version: "3.4"
|
|
16
|
+
bundler-cache: true
|
|
17
|
+
- run: bundle exec rake vendor
|
|
18
|
+
- run: bundle exec rake vendor:verify
|
|
19
|
+
|
|
20
|
+
test:
|
|
21
|
+
needs: vendor-verify
|
|
22
|
+
runs-on: ${{ matrix.os }}
|
|
23
|
+
strategy:
|
|
24
|
+
fail-fast: false
|
|
25
|
+
matrix:
|
|
26
|
+
os: [ubuntu-latest, macos-latest]
|
|
27
|
+
ruby: ["3.3", "3.4", "4.0"]
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
- uses: ruby/setup-ruby@v1
|
|
31
|
+
with:
|
|
32
|
+
ruby-version: ${{ matrix.ruby }}
|
|
33
|
+
bundler-cache: true
|
|
34
|
+
- run: bundle exec rake vendor
|
|
35
|
+
- run: bundle exec rake compile
|
|
36
|
+
- run: bundle exec rake test
|
|
37
|
+
|
|
38
|
+
sanitizers:
|
|
39
|
+
needs: vendor-verify
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v4
|
|
43
|
+
- uses: ruby/setup-ruby@v1
|
|
44
|
+
with:
|
|
45
|
+
ruby-version: "3.4"
|
|
46
|
+
bundler-cache: true
|
|
47
|
+
- run: bundle exec rake vendor
|
|
48
|
+
- run: LIBINJECTION_SANITIZE=1 bundle exec rake compile
|
|
49
|
+
- name: Run ASan/UBSan smoke
|
|
50
|
+
env:
|
|
51
|
+
ASAN_OPTIONS: detect_leaks=0:halt_on_error=1:abort_on_error=1
|
|
52
|
+
UBSAN_OPTIONS: halt_on_error=1:abort_on_error=1
|
|
53
|
+
run: |
|
|
54
|
+
export LD_PRELOAD="$(gcc -print-file-name=libasan.so)"
|
|
55
|
+
LIBINJECTION_SANITIZE=1 bundle exec rake security:smoke
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
### Security / hardening
|
|
6
|
+
|
|
7
|
+
- Added explicit Rack middleware parser-error policy. `parser_errors: :auto` now reports native libinjection and known Rack parameter/cookie parser errors in report mode and fails closed in block mode; `:report`, `:block`, and `:raise` are available for explicit behavior.
|
|
8
|
+
- Added explicit skipped-input policy. `skipped_inputs: :auto` reports `max_value_bytes` / `max_depth` skips in report mode and fails closed in block mode; `:report`, `:block`, and `:allow` are available for explicit behavior.
|
|
9
|
+
- Narrowed the default ignored params to `authenticity_token`; sensitive values such as `password` are scanned by default while raw values remain absent from notifications.
|
|
10
|
+
- Public low-level scans that release the GVL now copy large input strings into
|
|
11
|
+
a temporary C buffer before scanning. This avoids reading directly from a Ruby
|
|
12
|
+
`String` buffer while another Ruby thread could mutate/reallocate it.
|
|
13
|
+
- `RB_GC_GUARD` placement and native scan cleanup were tightened. Public
|
|
14
|
+
low-level scan methods now release copied C buffers through `rb_ensure`, so
|
|
15
|
+
async Ruby exceptions cannot skip native buffer cleanup.
|
|
16
|
+
- SQLi fingerprint buffer sizing is now tied to the vendored libinjection
|
|
17
|
+
struct field instead of a standalone magic number.
|
|
18
|
+
- Added native edge-case tests for empty strings, nil input, invalid UTF-8,
|
|
19
|
+
binary/null-byte payloads, UTF-16 byte input, large no-GVL inputs, and invalid
|
|
20
|
+
option values.
|
|
21
|
+
- Added `security:smoke` random/binary-input checks and an AddressSanitizer/UBSan
|
|
22
|
+
CI job.
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- `:path` scanning now checks individual path segments when the full path is not
|
|
27
|
+
classified. This keeps SQLi-like path payloads such as
|
|
28
|
+
`/items/1 OR 1=1--` detectable instead of letting the route prefix hide the
|
|
29
|
+
payload from libinjection. Percent-encoded path values are decoded up to
|
|
30
|
+
`path_decode_depth` times for this fallback; the default is `2`.
|
|
31
|
+
- Empty attack lists stored in `rack.libinjection.attacks` are now mutable per
|
|
32
|
+
request instead of a shared frozen array.
|
|
33
|
+
- Header scanning now supports `ignore_headers` and skips common low-signal
|
|
34
|
+
protocol/browser headers by default.
|
|
35
|
+
- Cookie name scanning is disabled by default and can be enabled with
|
|
36
|
+
`scan_cookie_names: true`; cookie values are still scanned when `:cookies` is
|
|
37
|
+
enabled.
|
|
38
|
+
|
|
39
|
+
### Performance
|
|
40
|
+
|
|
41
|
+
- Added a native URL-decoded scan primitive, `LibInjection.detect_url_encoded_raw(input, depth, plus_as_space, threat_mask)`, used by Rack `:query` and `:path` surfaces. It scans raw, decoded-once, and decoded-twice variants without Ruby `gsub`, Oniguruma, or intermediate decoded Ruby strings. Small decoded buffers now use stack allocation; heap allocation is reserved for larger decoded inputs.
|
|
42
|
+
- `scan: [:query]`, `scan: [:path]`, and `scan: [:query, :path, :headers]` can now run directly from the Rack env without constructing `Rack::Request`; `:params` and `:cookies` still use Rack parsers to preserve their semantics.
|
|
43
|
+
- Added `scan: [:query]`, a fast raw query-string surface that scans the query string and decoded variants without invoking Rack nested params parsing.
|
|
44
|
+
- Added `threats: [:sqli]` / `threats: [:xss]` middleware modes for deployments that intentionally want to skip one native detector on hot paths.
|
|
45
|
+
- Attack notifications and skipped/error notifications now build request metadata lazily and do no payload work when the notifier is the built-in no-op.
|
|
46
|
+
- Header name normalization no longer uses an unbounded per-middleware cache. Path/query decoding now stays in C and is skipped when the value has no URL-encoded candidate bytes.
|
|
47
|
+
- Added `LibInjection.detect_raw` — a single native primitive that runs SQLi
|
|
48
|
+
and XSS checks in one Ruby->C call and short-circuits XSS when SQLi is
|
|
49
|
+
already detected. Returns the minimum data needed (`nil`, `[:sqli, fp]`,
|
|
50
|
+
or `[:xss, nil]`) with no Result/Hash allocation.
|
|
51
|
+
- Native scans now release the GVL for inputs >= `LI_NOGVL_THRESHOLD` bytes
|
|
52
|
+
(default 1024). The extension uses `rb_nogvl(..., RB_NOGVL_OFFLOAD_SAFE)`
|
|
53
|
+
when the Ruby headers provide that flag, and falls back to
|
|
54
|
+
`rb_thread_call_without_gvl` on older headers.
|
|
55
|
+
- `Rack::LibInjection` middleware rewritten on top of a mutable accumulator:
|
|
56
|
+
no more per-level `flat_map`, no more `path + [key]` allocations, no more
|
|
57
|
+
intermediate hashes per match. Path keys are now built as plain `String`s.
|
|
58
|
+
- Middleware now uses `LibInjection.detect_raw` instead of `sqli_fingerprint`
|
|
59
|
+
+ `xss?`, halving the number of Ruby->C boundaries per scanned string.
|
|
60
|
+
|
|
61
|
+
### Middleware behavior
|
|
62
|
+
|
|
63
|
+
- Notification event names now use dotted namespaces:
|
|
64
|
+
- `rack.libinjection.attack`
|
|
65
|
+
- `rack.libinjection.error`
|
|
66
|
+
- `rack.libinjection.skipped`
|
|
67
|
+
- Supported scan locations are now explicit: `:query`, `:params`, `:path`, `:headers`,
|
|
68
|
+
and `:cookies`. Default remains `scan: [:params]`.
|
|
69
|
+
- Middleware emits skipped-input telemetry for values skipped because of
|
|
70
|
+
`max_value_bytes` or `max_depth` when `notify_skipped: true`; block mode fails closed for skipped input by default.
|
|
71
|
+
- Middleware no longer rescues `StandardError` around scanning. It catches
|
|
72
|
+
`LibInjection::ParserError` and known Rack parameter parsing errors, emits an
|
|
73
|
+
error event, and lets unrelated exceptions propagate.
|
|
74
|
+
- Notifier exceptions are isolated by default (`notifier_errors: :ignore`) so a
|
|
75
|
+
reporting subscriber cannot accidentally turn a request into a 500. Use
|
|
76
|
+
`notifier_errors: :raise` in tests/development when desired.
|
|
77
|
+
- Explicit `logger:` now wins over ActiveSupport auto-detection. Passing both
|
|
78
|
+
`logger:` and `notifier:` remains an `ArgumentError`.
|
|
79
|
+
- `Config.build` now validates scan locations and notifier error mode.
|
|
80
|
+
- Uploaded file names are scanned when upload objects expose `original_filename`
|
|
81
|
+
or `filename`; file contents are not scanned.
|
|
82
|
+
|
|
83
|
+
### Native / vendoring
|
|
84
|
+
|
|
85
|
+
- Minimum Ruby version is **3.3**. Ruby versions whose headers do not expose
|
|
86
|
+
`RB_NOGVL_OFFLOAD_SAFE` still build through the classic no-GVL fallback; they
|
|
87
|
+
just do not get the Fiber Scheduler offload-safe hint.
|
|
88
|
+
- System libinjection mode now verifies that the runtime libinjection version is
|
|
89
|
+
exactly `4.0.0`, because this binding uses v4 diagnostic structs and token
|
|
90
|
+
fields.
|
|
91
|
+
- The vendored manifest no longer includes a self-hash. Verification now relies
|
|
92
|
+
on the pinned upstream archive SHA-256 plus local tree checksum.
|
|
93
|
+
- The vendor script now downloads with `Net::HTTP`, explicit timeouts, retry
|
|
94
|
+
handling, redirect handling, and streaming writes instead of `URI.open` and
|
|
95
|
+
`remote.read`.
|
|
96
|
+
- `extconf.rb` no longer auto-downloads vendored native sources during build; missing vendored sources fail with an explicit instruction to run `script/vendor_libs.rb`.
|
|
97
|
+
|
|
98
|
+
### Docs / project hygiene
|
|
99
|
+
|
|
100
|
+
- Added `SECURITY.md` for private vulnerability reporting.
|
|
101
|
+
- README now states the scan surface and JSON-body limitation at the top.
|
|
102
|
+
- GET_STARTED now includes a threat model, skipped input behavior, PII notes,
|
|
103
|
+
GVL/offload notes, notifier hot-path warning, and system-library version guard.
|
|
104
|
+
- Added explicit BSD-3 attribution for vendored libinjection.
|
|
105
|
+
|
|
106
|
+
## 0.1.0
|
|
107
|
+
|
|
108
|
+
- Initial native binding for libinjection v4.0.0.
|
|
109
|
+
- Added `LibInjection.sqli?`, `LibInjection.sqli_fingerprint`, `LibInjection.xss?`, and `LibInjection.detect`.
|
|
110
|
+
- Added table-driven diagnostic native API: `sqli_result`, `sqli_contexts`, `sqli_tokens`, `sqli_fingerprint_for`, `xss_result`, `xss_contexts`, and `html5_tokens`.
|
|
111
|
+
- Added `Rack::LibInjection` middleware in report/block/off modes.
|
|
112
|
+
- Added pinned `script/vendor_libs.rb` vendoring workflow.
|
data/GET_STARTED.md
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
# Get started
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
Add the gem to your `Gemfile`:
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
# Gemfile
|
|
9
|
+
gem "rack-libinjection"
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
The published gem should ship vendored `libinjection` sources, so users install
|
|
13
|
+
it with a normal `bundle install`.
|
|
14
|
+
|
|
15
|
+
For source checkouts, vendor pinned upstream sources explicitly before building.
|
|
16
|
+
The extension does not auto-download native code during `extconf.rb`:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
bundle install
|
|
20
|
+
bundle exec rake vendor
|
|
21
|
+
bundle exec rake compile
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Rails middleware
|
|
26
|
+
|
|
27
|
+
Start in report-only mode:
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
# config/application.rb
|
|
31
|
+
config.middleware.use Rack::LibInjection, mode: :report
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Full configuration example:
|
|
35
|
+
|
|
36
|
+
```ruby
|
|
37
|
+
# config/application.rb
|
|
38
|
+
config.middleware.use Rack::LibInjection,
|
|
39
|
+
mode: :report,
|
|
40
|
+
scan: [:params],
|
|
41
|
+
threats: [:sqli, :xss],
|
|
42
|
+
max_value_bytes: 8192,
|
|
43
|
+
max_depth: 8,
|
|
44
|
+
path_decode_depth: 2,
|
|
45
|
+
ignore_params: %w[authenticity_token],
|
|
46
|
+
ignore_headers: Rack::LibInjection::DEFAULT_IGNORE_HEADERS,
|
|
47
|
+
scan_cookie_names: false,
|
|
48
|
+
parser_errors: :auto,
|
|
49
|
+
notify_skipped: true,
|
|
50
|
+
skipped_inputs: :auto,
|
|
51
|
+
notifier_errors: :ignore
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Start in `:report`; switch to `:block` only after observing false positives.
|
|
55
|
+
|
|
56
|
+
### Modes
|
|
57
|
+
|
|
58
|
+
- `:report` — detect and notify, then continue request processing.
|
|
59
|
+
- `:block` — return `403` when at least one attack signal is detected. With the default `parser_errors: :auto` and `skipped_inputs: :auto`, Rack/native parser errors and limit-skipped input also fail closed.
|
|
60
|
+
- `:off` — no scanning.
|
|
61
|
+
|
|
62
|
+
The middleware stores attack records in:
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
request.env["rack.libinjection.attacks"]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Each record has:
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
{
|
|
72
|
+
type: :sqli, # or :xss
|
|
73
|
+
location: :params, # :query, :params, :path, :headers, or :cookies
|
|
74
|
+
key: "search.q",
|
|
75
|
+
key_name: false, # true when the signal came from a param/cookie key
|
|
76
|
+
fingerprint: "s&1UE", # SQLi only
|
|
77
|
+
bytes: 23
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Scanning model
|
|
82
|
+
|
|
83
|
+
The default scan surface is deliberately narrow:
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
scan: [:params]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Supported scan locations:
|
|
90
|
+
|
|
91
|
+
- `:query` — raw Rack query string plus decoded variants. This is the fastest WAF-style signal surface and avoids Rack nested params parsing; URL decoding is performed in the native extension to avoid Ruby `gsub`/regex allocation, but this surface does not provide semantic per-param keys;
|
|
92
|
+
- `:params` — parsed Rack params, including query/form params as provided by Rack;
|
|
93
|
+
- `:path` — `request.path`, with a segment fallback so `/items/1 OR 1=1--` is scanned as both the full path and individual path segments. Percent-encoded path values are decoded inside the native extension up to `path_decode_depth` times for detection; the default is `2` to catch common double-encoding attempts.
|
|
94
|
+
- `:headers` — Rack HTTP headers from `env`, except names in `ignore_headers`;
|
|
95
|
+
- `:cookies` — parsed Rack cookie values. Cookie names are skipped by default and can be enabled with `scan_cookie_names: true`.
|
|
96
|
+
|
|
97
|
+
The middleware scans predictable, bounded input only:
|
|
98
|
+
|
|
99
|
+
- string values inside parsed params, cookies, and non-ignored headers;
|
|
100
|
+
- string keys inside params hashes;
|
|
101
|
+
- multipart upload filenames when an upload object exposes `original_filename`
|
|
102
|
+
or `filename`;
|
|
103
|
+
- values up to `max_value_bytes`;
|
|
104
|
+
- nested structures up to `max_depth`;
|
|
105
|
+
- ignored params and ignored headers are skipped by exact normalized name. The default ignored param list is intentionally narrow (`authenticity_token` only); sensitive values such as `password` are scanned, but raw values are not emitted in attack notifications.
|
|
106
|
+
|
|
107
|
+
Threat classes are configurable:
|
|
108
|
+
|
|
109
|
+
```ruby
|
|
110
|
+
threats: [:sqli, :xss] # default
|
|
111
|
+
threats: [:sqli] # skip the XSS state machine on clean strings
|
|
112
|
+
threats: [:xss] # skip SQLi fingerprinting
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Use single-threat mode only when the missing class is covered elsewhere. The main performance reason is that clean strings otherwise run both SQLi fingerprinting and the XSS state machine.
|
|
116
|
+
|
|
117
|
+
For raw query/path scanning, `path_decode_depth: 0` scans only the raw value. Depth `1` adds one percent-decoded pass, and depth `2` adds a second pass for common double-encoding attempts. The value is hard-capped at `Rack::LibInjection::MAX_PATH_DECODE_DEPTH` to avoid accidental CPU-heavy configurations. Query decoding treats `+` as a space; path decoding keeps `+` literal. Malformed percent escapes are kept literal inside the native decoder instead of raising through Ruby `gsub`/regex machinery.
|
|
118
|
+
|
|
119
|
+
Header scanning is intentionally noisy if every header is scanned. By default the
|
|
120
|
+
middleware ignores low-signal protocol/browser headers such as `Accept`, `Host`,
|
|
121
|
+
`Content-Type`, and `Content-Length`. Override `ignore_headers: []` if you really
|
|
122
|
+
want every Rack header scanned.
|
|
123
|
+
|
|
124
|
+
Cookie names are also skipped by default because names are often application
|
|
125
|
+
controlled and can produce false positives (`session_or_token`, feature flags,
|
|
126
|
+
etc.). Enable `scan_cookie_names: true` only if cookie names are attacker
|
|
127
|
+
controlled or security-relevant in your application.
|
|
128
|
+
|
|
129
|
+
There is deliberately no hidden “lazy suspicious prefilter” before
|
|
130
|
+
libinjection. That avoids creating a second bypass-prone mini-WAF layer.
|
|
131
|
+
|
|
132
|
+
### Parser error policy
|
|
133
|
+
|
|
134
|
+
Vendored libinjection v4 returns parser errors instead of aborting. The Rack middleware treats those errors explicitly:
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
parser_errors: :auto # default: report in :report mode, block in :block mode
|
|
138
|
+
parser_errors: :report # notify and allow
|
|
139
|
+
parser_errors: :block # notify and return 403
|
|
140
|
+
parser_errors: :raise # re-raise LibInjection::ParserError
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
For blocking deployments, `:auto` fails closed: native libinjection parser errors and known Rack parameter/cookie parser errors are treated like blocked requests.
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
Raw JSON body scanning is intentionally not part of the current middleware.
|
|
147
|
+
Rack body rewind, large request bodies, multipart file contents, nested JSON
|
|
148
|
+
depth, and PII-safe logging need separate design. If your API is JSON-only,
|
|
149
|
+
keep this limitation visible in your threat model.
|
|
150
|
+
|
|
151
|
+
### Skipped input notifications
|
|
152
|
+
|
|
153
|
+
Input skipped because of safety limits is observable through:
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
rack.libinjection.skipped
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The payload includes `reason`, `location`, `key`, `bytes`, and `limit` when
|
|
160
|
+
available. This is intentionally separate from attack notifications: a skipped
|
|
161
|
+
large/deep value is not an attack by itself, but it is useful telemetry when
|
|
162
|
+
someone tries to hide payloads behind configured limits.
|
|
163
|
+
|
|
164
|
+
Skipped input policy is explicit:
|
|
165
|
+
|
|
166
|
+
```ruby
|
|
167
|
+
skipped_inputs: :auto # default: report in :report mode, block in :block mode
|
|
168
|
+
skipped_inputs: :report # notify and allow
|
|
169
|
+
skipped_inputs: :block # notify and return 403
|
|
170
|
+
skipped_inputs: :allow # allow silently
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Disable skipped telemetry if it is too noisy while keeping the same allow/block
|
|
174
|
+
policy:
|
|
175
|
+
|
|
176
|
+
```ruby
|
|
177
|
+
config.middleware.use Rack::LibInjection, notify_skipped: false
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Notifications
|
|
181
|
+
|
|
182
|
+
If `ActiveSupport::Notifications` is loaded and no explicit `logger:` or
|
|
183
|
+
`notifier:` is provided, the middleware emits:
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_name, _start, _finish, _id, payload|
|
|
187
|
+
Rails.logger.warn(payload.inspect)
|
|
188
|
+
end
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
You can also pass a custom notifier:
|
|
192
|
+
|
|
193
|
+
```ruby
|
|
194
|
+
config.middleware.use Rack::LibInjection,
|
|
195
|
+
notifier: ->(event, payload) { SecurityEvents.write(event, payload) }
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Explicit `logger:` wins over the ActiveSupport auto-detection:
|
|
199
|
+
|
|
200
|
+
```ruby
|
|
201
|
+
config.middleware.use Rack::LibInjection, logger: Rails.logger
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
`Rack::LibInjection` calls `notifier.call(...)` synchronously inside the
|
|
205
|
+
middleware. A slow subscriber slows down the entire request. Do not perform
|
|
206
|
+
blocking IO inside a subscriber. If you need to persist attack signals to Redis,
|
|
207
|
+
a database, or an external service, push to a background queue and process out
|
|
208
|
+
of band:
|
|
209
|
+
|
|
210
|
+
```ruby
|
|
211
|
+
ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_, _, _, _, payload|
|
|
212
|
+
AttackEventJob.perform_later(payload)
|
|
213
|
+
end
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Notifier exceptions are ignored by default so a reporting hook cannot turn a
|
|
217
|
+
request into a 500. To fail closed during development or tests:
|
|
218
|
+
|
|
219
|
+
```ruby
|
|
220
|
+
config.middleware.use Rack::LibInjection, notifier_errors: :raise
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
The middleware also emits `rack.libinjection.error` events when Rack parameter
|
|
224
|
+
parsing or libinjection parser errors occur on the configured scan surface.
|
|
225
|
+
These are rare but useful to monitor.
|
|
226
|
+
|
|
227
|
+
## Rack::Attack integration
|
|
228
|
+
|
|
229
|
+
Keep `rack-libinjection` in report mode and score IPs yourself:
|
|
230
|
+
|
|
231
|
+
```ruby
|
|
232
|
+
class LibInjectionTracker
|
|
233
|
+
def self.record(ip, attack)
|
|
234
|
+
Rails.cache.increment("libinjection:#{ip}:score", 1, expires_in: 10.minutes)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def self.score(ip)
|
|
238
|
+
Rails.cache.read("libinjection:#{ip}:score").to_i
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
ActiveSupport::Notifications.subscribe("rack.libinjection.attack") do |_name, _start, _finish, _id, payload|
|
|
243
|
+
LibInjectionTracker.record(payload[:ip], payload)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
Rack::Attack.blocklist("libinjection repeat attackers") do |req|
|
|
247
|
+
LibInjectionTracker.score(req.ip) > 5
|
|
248
|
+
end
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Threat model and limitations
|
|
252
|
+
|
|
253
|
+
`rack-libinjection` is a signal layer. It helps identify suspicious payloads
|
|
254
|
+
that reached configured Rack surfaces. It is not the control that keeps SQL or
|
|
255
|
+
HTML safe.
|
|
256
|
+
|
|
257
|
+
Still required:
|
|
258
|
+
|
|
259
|
+
- ActiveRecord bind params / parameterized SQL;
|
|
260
|
+
- output escaping and CSP;
|
|
261
|
+
- authorization;
|
|
262
|
+
- rate limiting;
|
|
263
|
+
- upstream WAF/reverse-proxy controls where appropriate.
|
|
264
|
+
|
|
265
|
+
Known limitations:
|
|
266
|
+
|
|
267
|
+
- detection inherits upstream `libinjection` limits and bypasses;
|
|
268
|
+
- only ANSI/MySQL SQLi contexts are exposed by this binding's diagnostic API;
|
|
269
|
+
- the middleware scans what Rack has already parsed; path values are decoded up
|
|
270
|
+
to `path_decode_depth` times, but params/cookies/headers are not recursively
|
|
271
|
+
decoded by this gem;
|
|
272
|
+
- Unicode normalization is not performed; callers that need NFC/NFKC or
|
|
273
|
+
UTF-16/UTF-32 decoding must normalize before scanning;
|
|
274
|
+
- values beyond `max_value_bytes` and nested values deeper than `max_depth` are not scanned. In report mode they are reported as skipped input by default; in block mode they are blocked by default through `skipped_inputs: :auto`;
|
|
275
|
+
- JSON bodies are not scanned yet;
|
|
276
|
+
- path and IP in notification payloads can contain identifiers, so treat them as
|
|
277
|
+
operational/security telemetry, not PII-free analytics data.
|
|
278
|
+
|
|
279
|
+
## Low-level API
|
|
280
|
+
|
|
281
|
+
```ruby
|
|
282
|
+
require "libinjection"
|
|
283
|
+
|
|
284
|
+
LibInjection.sqli?("1 OR 1=1--")
|
|
285
|
+
# => true
|
|
286
|
+
|
|
287
|
+
LibInjection.sqli_fingerprint("1 OR 1=1--")
|
|
288
|
+
# => "..." or nil
|
|
289
|
+
|
|
290
|
+
LibInjection.xss?("<script>alert(1)</script>")
|
|
291
|
+
# => true / false
|
|
292
|
+
|
|
293
|
+
LibInjection.detect("1 OR 1=1--")
|
|
294
|
+
# => #<data LibInjection::Result type=:sqli, detected=true, fingerprint="...">
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Hot-path primitive
|
|
298
|
+
|
|
299
|
+
`detect_raw` is the low-allocation primitive used internally by the Rack
|
|
300
|
+
middleware. It returns the minimum amount of data needed to decide what to do,
|
|
301
|
+
without allocating a `Result` object.
|
|
302
|
+
|
|
303
|
+
```ruby
|
|
304
|
+
LibInjection.detect_raw("1 OR 1=1--")
|
|
305
|
+
# => [:sqli, "s&1UE"]
|
|
306
|
+
|
|
307
|
+
LibInjection.detect_raw("<script>alert(1)</script>")
|
|
308
|
+
# => [:xss, nil]
|
|
309
|
+
|
|
310
|
+
LibInjection.detect_raw("hello")
|
|
311
|
+
# => nil
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Diagnostic API
|
|
315
|
+
|
|
316
|
+
```ruby
|
|
317
|
+
LibInjection.sqli_result("1 OR 1=1--")
|
|
318
|
+
LibInjection.sqli_contexts("1 OR 1=1--")
|
|
319
|
+
LibInjection.sqli_tokens("1 OR 1=1--")
|
|
320
|
+
LibInjection.sqli_tokens("1 OR 1=1--", fold: true)
|
|
321
|
+
LibInjection.sqli_fingerprint_for("1 OR 1=1--", context: :none_ansi)
|
|
322
|
+
|
|
323
|
+
LibInjection.xss_result("<script>alert(1)</script>")
|
|
324
|
+
LibInjection.xss_contexts("<script>alert(1)</script>")
|
|
325
|
+
LibInjection.html5_tokens("<script>alert(1)</script>")
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
Available native maps:
|
|
329
|
+
|
|
330
|
+
```ruby
|
|
331
|
+
LibInjection::SQLI_CONTEXTS
|
|
332
|
+
LibInjection::SQLI_QUOTES
|
|
333
|
+
LibInjection::SQLI_DIALECTS
|
|
334
|
+
LibInjection::SQLI_TOKEN_TYPES
|
|
335
|
+
LibInjection::HTML5_CONTEXTS
|
|
336
|
+
LibInjection::XSS_CONTEXTS
|
|
337
|
+
LibInjection::HTML5_TOKEN_TYPES
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Parser errors from libinjection v4 are exposed as
|
|
341
|
+
`LibInjection::ParserError`.
|
|
342
|
+
|
|
343
|
+
## Vendored libinjection
|
|
344
|
+
|
|
345
|
+
The vendor flow pins upstream C sources by archive SHA-256 and records a tree
|
|
346
|
+
checksum for local verification.
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
ruby script/vendor_libs.rb --sync
|
|
350
|
+
ruby script/vendor_libs.rb --verify
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
Pinned upstream:
|
|
354
|
+
|
|
355
|
+
- libinjection `v4.0.0`
|
|
356
|
+
- archive SHA-256: `a69d27e3d98608df89203c4e1c00c034fe0f8c723017e4088ab53ce3ff5a9129`
|
|
357
|
+
|
|
358
|
+
The script records a tree checksum in
|
|
359
|
+
`ext/libinjection/vendor/libinjection/.vendored`. This catches accidental local
|
|
360
|
+
changes to vendored files. It is not a cryptographic signature; the upstream
|
|
361
|
+
archive hash above is the external integrity pin.
|
|
362
|
+
|
|
363
|
+
## System library mode
|
|
364
|
+
|
|
365
|
+
Default mode is vendored. For distro builds:
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
bundle config build.rack-libinjection --use-system-libinjection
|
|
369
|
+
# or
|
|
370
|
+
LIBINJECTION_USE_SYSTEM=1 bundle install
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
When system mode is used, the extension checks the runtime libinjection version
|
|
374
|
+
at load time and rejects versions other than `4.0.0`; the binding exposes
|
|
375
|
+
diagnostic structs and token fields that are tied to that upstream API.
|
|
376
|
+
|
|
377
|
+
## Concurrency and the GVL
|
|
378
|
+
|
|
379
|
+
`libinjection` scans are pure CPU work over raw bytes. They do not allocate on
|
|
380
|
+
the heap, do not call into the Ruby C API, and do not touch global state. That
|
|
381
|
+
makes them a good candidate for releasing the GVL during a scan.
|
|
382
|
+
|
|
383
|
+
For inputs of at least `LI_NOGVL_THRESHOLD` bytes (default `1024`), the native
|
|
384
|
+
binding releases the GVL. When the Ruby headers provide
|
|
385
|
+
`RB_NOGVL_OFFLOAD_SAFE`, the binding passes that flag to `rb_nogvl(...)` so a
|
|
386
|
+
Fiber Scheduler can treat the scan as offload-safe. On older Ruby headers that
|
|
387
|
+
only provide the classic no-GVL API, the extension falls back to
|
|
388
|
+
`rb_thread_call_without_gvl(...)`: multi-thread Puma/Sidekiq still benefit, but
|
|
389
|
+
there is no scheduler offload hint.
|
|
390
|
+
|
|
391
|
+
Short inputs are scanned inline under the GVL. The fixed overhead of releasing
|
|
392
|
+
the GVL is larger than the scan itself on small payloads, so the threshold is
|
|
393
|
+
conservative and should be tuned with benchmarks for specific deployments. You
|
|
394
|
+
can override it at build time:
|
|
395
|
+
|
|
396
|
+
```bash
|
|
397
|
+
bundle config build.rack-libinjection -- --with-cflags="-DLI_NOGVL_THRESHOLD=512"
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
### Memory safety contract
|
|
401
|
+
|
|
402
|
+
For public low-level scans that release the GVL, the native binding first
|
|
403
|
+
copies the input bytes into a temporary C buffer. That avoids reading from a
|
|
404
|
+
Ruby `String` buffer while another Ruby thread could mutate or reallocate the
|
|
405
|
+
same object. Short inputs are scanned under the GVL without a copy.
|
|
406
|
+
|
|
407
|
+
The binding scans bytes, not semantic text. It accepts `UTF-8` and
|
|
408
|
+
`ASCII-8BIT`/binary strings, including invalid byte sequences, as byte input. It
|
|
409
|
+
does not implicitly decode UTF-16/UTF-32 into UTF-8. Normalize or transcode
|
|
410
|
+
application text before calling the low-level API if your application accepts
|
|
411
|
+
non-UTF-8 encodings.
|
|
412
|
+
|
|
413
|
+
## License notes
|
|
414
|
+
|
|
415
|
+
The Ruby gem is MIT-licensed. The vendored upstream `libinjection` sources are
|
|
416
|
+
BSD-3-Clause licensed; the upstream license is included as
|
|
417
|
+
`LICENSE-libinjection.txt` in the packaged gem and
|
|
418
|
+
`ext/libinjection/vendor/libinjection/COPYING` in source checkouts.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Copyright (c) 2012-2016, Nick Galbreath
|
|
2
|
+
Copyright (c) 2017-2024, libinjection Contributors
|
|
3
|
+
All rights reserved.
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are
|
|
7
|
+
met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright
|
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright
|
|
13
|
+
notice, this list of conditions and the following disclaimer in the
|
|
14
|
+
documentation and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
17
|
+
contributors may be used to endorse or promote products derived from
|
|
18
|
+
this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
24
|
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
31
|
+
|
|
32
|
+
https://github.com/libinjection/libinjection
|
|
33
|
+
http://opensource.org/licenses/BSD-3-Clause
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Roman Haydarov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|