perchfall 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +30 -0
- data/LICENSE.txt +21 -0
- data/README.md +187 -0
- data/lib/perchfall/client.rb +93 -0
- data/lib/perchfall/command_runner.rb +33 -0
- data/lib/perchfall/concurrency_limiter.rb +65 -0
- data/lib/perchfall/console_error.rb +15 -0
- data/lib/perchfall/error_filter.rb +35 -0
- data/lib/perchfall/errors.rb +50 -0
- data/lib/perchfall/ignore_rule.rb +39 -0
- data/lib/perchfall/network_error.rb +15 -0
- data/lib/perchfall/parsers/playwright_json_parser.rb +71 -0
- data/lib/perchfall/playwright_invoker.rb +67 -0
- data/lib/perchfall/report.rb +81 -0
- data/lib/perchfall/url_validator.rb +107 -0
- data/lib/perchfall/version.rb +5 -0
- data/lib/perchfall.rb +56 -0
- data/perchfall.gemspec +44 -0
- data/playwright/check.js +144 -0
- metadata +121 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 5547f2ac3310d1069229f015c558e1c13d2f21883feee453c0f80fd87ea00c80
|
|
4
|
+
data.tar.gz: 2e4a1d91838b8b97609c718cdad6cbe6d94ee6b577c7c165356eaef3d3e740b7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bd5fc98a14b893767c4bb50aa943b7d0fff41211d06f83089a4a95c7545270e82be02a2761cdfe5643cc5812c92ddabf5260a18160ea99d84ec08376f3cd7741
|
|
7
|
+
data.tar.gz: 2d354eb422ec3196af811201422de3d0323dcdab30a6fc46fe7c7cd422fbd1874e06be0736c9da2f919173588c019140bdaf20a5834d0b5eccd6ca3e7635e4c8
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-03-17
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `Perchfall.run(url:)` — primary public API; returns an immutable `Report` value object
|
|
15
|
+
- `bust_cache:` option (default `true`) appends a `_perchfall=<timestamp>` query parameter to prevent CDN and proxy caching from masking real page state; `report.url` always reflects the original caller URL, not the cache-busted one
|
|
16
|
+
- `scenario_name:` option included in the report for labelling checks
|
|
17
|
+
- `wait_until:` option (`load`, `domcontentloaded`, `networkidle`, `commit`) controls when Playwright considers navigation complete
|
|
18
|
+
- `timeout_ms:` option (default 30 000, max 60 000) for Playwright navigation timeout
|
|
19
|
+
- `Report` value object with `ok?`, `http_status`, `duration_ms`, `network_errors`, `console_errors`, `to_json`
|
|
20
|
+
- `ignored_network_errors` / `ignored_console_errors` on `Report` — errors suppressed by ignore rules are captured, not silently dropped
|
|
21
|
+
- Configurable ignore rules via `ignore:` — `IgnoreRule` supports substring, regex, and wildcard matching on URL/text and failure/type fields
|
|
22
|
+
- Default ignore rule suppresses `net::ERR_ABORTED` (analytics beacons, cancelled prefetches)
|
|
23
|
+
- Typed exception hierarchy: `PageLoadError` (with partial report), `ConcurrencyLimitError`, `InvocationError`, `ScriptError`, `ParseError`
|
|
24
|
+
- Process-wide concurrency limiter (default 5 simultaneous Chromium instances) using Mutex + ConditionVariable — no spinning, slot always released
|
|
25
|
+
- SSRF mitigations: scheme allowlist (`http`/`https` only), literal IP blocklist (loopback, link-local, RFC-1918), DNS resolution check; URL validation always runs against the effective URL sent to Playwright (post cache-bust)
|
|
26
|
+
- Full dependency injection throughout — test suite runs in ~0.4 s with no browser, Node, or network required
|
|
27
|
+
- GitHub Actions CI workflow (unit suite) and manual Playwright smoke check workflow
|
|
28
|
+
|
|
29
|
+
[Unreleased]: https://github.com/beflagrant/perchfall/compare/v0.1.0...HEAD
|
|
30
|
+
[0.1.0]: https://github.com/beflagrant/perchfall/releases/tag/v0.1.0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Flagrant LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# Perchfall
|
|
2
|
+
|
|
3
|
+
[](https://github.com/beflagrant/perchfall/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/beflagrant/perchfall/actions/workflows/playwright.yml)
|
|
5
|
+
|
|
6
|
+
**Synthetic browser monitoring for Ruby.** Give it a URL; get back a structured report of what a real Chromium browser saw — HTTP status, broken assets, JavaScript errors, and load time. No framework required.
|
|
7
|
+
|
|
8
|
+
```ruby
|
|
9
|
+
report = Perchfall.run(url: "https://example.com")
|
|
10
|
+
|
|
11
|
+
report.ok? # => true
|
|
12
|
+
report.http_status # => 200
|
|
13
|
+
report.duration_ms # => 834
|
|
14
|
+
report.network_errors # => []
|
|
15
|
+
report.console_errors # => []
|
|
16
|
+
report.to_json # => '{"status":"ok","url":"https://example.com",...}'
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Why Perchfall
|
|
22
|
+
|
|
23
|
+
**Uptime monitoring tells you a server is responding. Perchfall tells you the page actually works.**
|
|
24
|
+
|
|
25
|
+
- A `200 OK` doesn't mean your JavaScript loaded.
|
|
26
|
+
- An APM trace doesn't capture a missing CDN asset.
|
|
27
|
+
- A health check endpoint doesn't know your checkout flow is broken.
|
|
28
|
+
|
|
29
|
+
Perchfall runs a headless Chromium browser against your URL and gives you back everything it found: the HTTP status, every failed network request, every JavaScript error logged to the console, and how long it took. The result is an **immutable Ruby value object** you can store, log, or alert on — no database schema imposed, no framework lock-in.
|
|
30
|
+
|
|
31
|
+
Drop it into a Sidekiq job, a Rake task, a CI step, or a plain Ruby script. It works anywhere Ruby runs.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Requirements
|
|
36
|
+
|
|
37
|
+
| Dependency | Version |
|
|
38
|
+
| --- | --- |
|
|
39
|
+
| Ruby | ≥ 3.2 |
|
|
40
|
+
| Node | ≥ 18 |
|
|
41
|
+
| Playwright | installed via npm |
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```sh
|
|
48
|
+
# 1. Add the gem
|
|
49
|
+
bundle add perchfall
|
|
50
|
+
|
|
51
|
+
# 2. Install Playwright (once per machine)
|
|
52
|
+
npm install playwright
|
|
53
|
+
npx playwright install chromium
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
require "perchfall"
|
|
62
|
+
|
|
63
|
+
report = Perchfall.run(url: "https://example.com")
|
|
64
|
+
|
|
65
|
+
if report.ok?
|
|
66
|
+
puts "#{report.url} loaded in #{report.duration_ms}ms"
|
|
67
|
+
else
|
|
68
|
+
puts "Page failed: #{report.network_errors.map(&:failure).join(", ")}"
|
|
69
|
+
end
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Detect broken assets and JS errors
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
report = Perchfall.run(url: "https://example.com")
|
|
76
|
+
|
|
77
|
+
report.network_errors.each do |e|
|
|
78
|
+
puts "#{e.http_method} #{e.url} — #{e.failure}"
|
|
79
|
+
end
|
|
80
|
+
# GET https://example.com/assets/app.js — HTTP 404
|
|
81
|
+
# GET https://cdn.example.com/font.woff — net::ERR_NAME_NOT_RESOLVED
|
|
82
|
+
|
|
83
|
+
report.console_errors.each do |e|
|
|
84
|
+
puts "#{e.type}: #{e.text}"
|
|
85
|
+
end
|
|
86
|
+
# error: Uncaught ReferenceError: Stripe is not defined
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
A page can be `ok: true` (it loaded) and still have broken sub-resources. Perchfall captures both.
|
|
90
|
+
|
|
91
|
+
### Handle page load failures
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
begin
|
|
95
|
+
report = Perchfall.run(url: "https://example.com", timeout_ms: 10_000)
|
|
96
|
+
rescue Perchfall::Errors::PageLoadError => e
|
|
97
|
+
# Page couldn't load at all — a partial report is always attached.
|
|
98
|
+
store_report(e.report.to_json)
|
|
99
|
+
end
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Use in a background job
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
class SyntheticCheckJob
|
|
106
|
+
include Sidekiq::Job
|
|
107
|
+
|
|
108
|
+
def perform(url)
|
|
109
|
+
report = Perchfall.run(url: url)
|
|
110
|
+
SyntheticResult.create!(ok: report.ok?, payload: report.to_json)
|
|
111
|
+
rescue Perchfall::Errors::PageLoadError => e
|
|
112
|
+
SyntheticResult.create!(ok: false, payload: e.report.to_json)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## What's in a report
|
|
120
|
+
|
|
121
|
+
Every check returns a `Perchfall::Report`:
|
|
122
|
+
|
|
123
|
+
| Field | Type | Description |
|
|
124
|
+
| --- | --- | --- |
|
|
125
|
+
| `ok?` | Boolean | `true` if the page loaded successfully |
|
|
126
|
+
| `http_status` | Integer / nil | HTTP response code |
|
|
127
|
+
| `duration_ms` | Integer | Total time from navigation start to `load` event |
|
|
128
|
+
| `url` | String | The URL checked |
|
|
129
|
+
| `timestamp` | Time | When the check ran (UTC) |
|
|
130
|
+
| `network_errors` | Array | Failed or errored network requests |
|
|
131
|
+
| `console_errors` | Array | JavaScript errors logged to the browser console |
|
|
132
|
+
| `to_json` | String | Full report as JSON |
|
|
133
|
+
|
|
134
|
+
→ [Full report schema and JSON reference](docs/report-schema.md)
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Errors
|
|
139
|
+
|
|
140
|
+
| Exception | When |
|
|
141
|
+
| --- | --- |
|
|
142
|
+
| `ArgumentError` | URL is invalid (bad scheme, internal address) |
|
|
143
|
+
| `Perchfall::Errors::PageLoadError` | Page couldn't load; partial report attached at `e.report` |
|
|
144
|
+
| `Perchfall::Errors::ConcurrencyLimitError` | All browser slots are busy; back off and retry |
|
|
145
|
+
| `Perchfall::Errors::InvocationError` | Node isn't installed or not in PATH |
|
|
146
|
+
| `Perchfall::Errors::Error` | Base class — catches any Perchfall error |
|
|
147
|
+
|
|
148
|
+
→ [Full error handling guide](docs/error-handling.md)
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Configuration
|
|
153
|
+
|
|
154
|
+
```ruby
|
|
155
|
+
Perchfall.run(
|
|
156
|
+
url: "https://example.com",
|
|
157
|
+
timeout_ms: 10_000, # default 30_000, max 60_000
|
|
158
|
+
wait_until: "domcontentloaded", # default "load"
|
|
159
|
+
scenario_name: "homepage_smoke" # included in report JSON
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
→ [All options and wait_until strategies](docs/configuration.md)
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Further reading
|
|
168
|
+
|
|
169
|
+
- [Rails integration — Sidekiq job, schema, scheduling](docs/rails-integration.md)
|
|
170
|
+
- [Security — SSRF protection, URL validation, ignore rules](docs/security.md)
|
|
171
|
+
- [Architecture decisions](doc/adr/)
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Development
|
|
176
|
+
|
|
177
|
+
```sh
|
|
178
|
+
bundle install
|
|
179
|
+
bundle exec rspec # ~0.4s, no browser or Node required
|
|
180
|
+
bin/console # IRB with perchfall loaded
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## License
|
|
186
|
+
|
|
187
|
+
MIT
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
|
|
5
|
+
module Perchfall
|
|
6
|
+
# The primary entry point for library consumers.
|
|
7
|
+
#
|
|
8
|
+
# Usage (simple):
|
|
9
|
+
# client = Perchfall::Client.new
|
|
10
|
+
# report = client.run(url: "https://example.com")
|
|
11
|
+
#
|
|
12
|
+
# Usage (with options):
|
|
13
|
+
# report = client.run(
|
|
14
|
+
# url: "https://example.com",
|
|
15
|
+
# timeout_ms: 10_000,
|
|
16
|
+
# scenario_name: "homepage_smoke"
|
|
17
|
+
# )
|
|
18
|
+
#
|
|
19
|
+
# Usage (with custom invoker — testing or alternate runtimes):
|
|
20
|
+
# client = Perchfall::Client.new(invoker: MyCustomInvoker.new)
|
|
21
|
+
#
|
|
22
|
+
# Client is intentionally thin. It owns the public method signature
|
|
23
|
+
# and delegates all real work to the invoker.
|
|
24
|
+
class Client
|
|
25
|
+
VALID_WAIT_UNTIL = %w[load domcontentloaded networkidle commit].freeze
|
|
26
|
+
|
|
27
|
+
def initialize(
|
|
28
|
+
invoker: PlaywrightInvoker.new,
|
|
29
|
+
validator: UrlValidator.new,
|
|
30
|
+
limiter: Perchfall.default_limiter
|
|
31
|
+
)
|
|
32
|
+
@invoker = invoker
|
|
33
|
+
@validator = validator
|
|
34
|
+
@limiter = limiter
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Run a synthetic browser check against the given URL.
|
|
38
|
+
#
|
|
39
|
+
# @param url [String] the URL to check (required, must be http or https)
|
|
40
|
+
# @param timeout_ms [Integer] ms before Playwright gives up (default 30_000)
|
|
41
|
+
# @param scenario_name [String, nil] optional label included in the report
|
|
42
|
+
# @param timestamp [Time] override the run timestamp (default Time.now.utc)
|
|
43
|
+
# @return [Report] on success
|
|
44
|
+
# @raise [ArgumentError] if the URL is not http/https
|
|
45
|
+
# @raise [Errors::ConcurrencyLimitError] if the concurrency cap is reached
|
|
46
|
+
# @raise [Errors::InvocationError] if Node could not be started
|
|
47
|
+
# @raise [Errors::ScriptError] if the Node script exited non-zero
|
|
48
|
+
# @raise [Errors::ParseError] if the script output was not valid JSON
|
|
49
|
+
# @raise [Errors::PageLoadError] if the page itself failed to load
|
|
50
|
+
|
|
51
|
+
def run(url:, ignore: [], wait_until: "load", timeout_ms: 30_000, scenario_name: nil, timestamp: Time.now.utc, bust_cache: true)
|
|
52
|
+
effective_url = bust_cache ? append_cache_buster(url) : url
|
|
53
|
+
@validator.validate!(effective_url)
|
|
54
|
+
validate_wait_until!(wait_until)
|
|
55
|
+
validate_timeout_ms!(timeout_ms)
|
|
56
|
+
merged_ignore = Perchfall::DEFAULT_IGNORE_RULES + ignore
|
|
57
|
+
@limiter.acquire do
|
|
58
|
+
@invoker.run(
|
|
59
|
+
url: effective_url,
|
|
60
|
+
original_url: url,
|
|
61
|
+
ignore: merged_ignore,
|
|
62
|
+
wait_until: wait_until,
|
|
63
|
+
timeout_ms: timeout_ms,
|
|
64
|
+
scenario_name: scenario_name,
|
|
65
|
+
timestamp: timestamp
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def validate_wait_until!(value)
|
|
73
|
+
return if VALID_WAIT_UNTIL.include?(value)
|
|
74
|
+
|
|
75
|
+
raise ArgumentError,
|
|
76
|
+
"wait_until must be one of #{VALID_WAIT_UNTIL.join(", ")}. Got: #{value.inspect}"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def append_cache_buster(url)
|
|
80
|
+
separator = url.include?("?") ? "&" : "?"
|
|
81
|
+
"#{url}#{separator}_perchfall=#{Time.now.utc.to_i}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
MAX_TIMEOUT_MS = 60_000
|
|
85
|
+
|
|
86
|
+
def validate_timeout_ms!(value)
|
|
87
|
+
return if value.is_a?(Integer) && value > 0 && value <= MAX_TIMEOUT_MS
|
|
88
|
+
|
|
89
|
+
raise ArgumentError,
|
|
90
|
+
"timeout_ms must be a positive integer no greater than #{MAX_TIMEOUT_MS}. Got: #{value.inspect}"
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "open3"
|
|
4
|
+
|
|
5
|
+
module Perchfall
|
|
6
|
+
# Wraps Open3.capture3 behind an injectable interface.
|
|
7
|
+
#
|
|
8
|
+
# Interface contract (implement this to build a test fake):
|
|
9
|
+
#
|
|
10
|
+
# result = runner.call(command)
|
|
11
|
+
# result.stdout # => String
|
|
12
|
+
# result.stderr # => String
|
|
13
|
+
# result.success? # => Boolean
|
|
14
|
+
# result.exit_status # => Integer
|
|
15
|
+
#
|
|
16
|
+
# Always pass argv arrays, never shell strings — this prevents injection.
|
|
17
|
+
class CommandRunner
|
|
18
|
+
Result = Data.define(:stdout, :stderr, :exit_status) do
|
|
19
|
+
def success?
|
|
20
|
+
exit_status.zero?
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def call(command)
|
|
25
|
+
stdout, stderr, status = Open3.capture3(*command)
|
|
26
|
+
Result.new(
|
|
27
|
+
stdout: stdout,
|
|
28
|
+
stderr: stderr,
|
|
29
|
+
exit_status: status.exitstatus
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Caps the number of Playwright browser processes that can run simultaneously.
|
|
5
|
+
#
|
|
6
|
+
# Uses a Mutex + ConditionVariable semaphore so threads block (up to
|
|
7
|
+
# timeout_ms) rather than spinning. The slot is always released in an
|
|
8
|
+
# ensure block so a raising caller cannot leak it.
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# limiter = ConcurrencyLimiter.new(limit: 5, timeout_ms: 10_000)
|
|
12
|
+
# limiter.acquire { do_expensive_work }
|
|
13
|
+
#
|
|
14
|
+
# Raises Errors::ConcurrencyLimitError if timeout_ms elapses before a
|
|
15
|
+
# slot is available.
|
|
16
|
+
class ConcurrencyLimiter
|
|
17
|
+
DEFAULT_TIMEOUT_MS = 30_000
|
|
18
|
+
|
|
19
|
+
def initialize(limit:, timeout_ms: DEFAULT_TIMEOUT_MS)
|
|
20
|
+
@limit = limit
|
|
21
|
+
@timeout_s = timeout_ms / 1000.0
|
|
22
|
+
@count = 0
|
|
23
|
+
@mutex = Mutex.new
|
|
24
|
+
@condvar = ConditionVariable.new
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def acquire
|
|
28
|
+
acquire_slot!
|
|
29
|
+
begin
|
|
30
|
+
yield
|
|
31
|
+
ensure
|
|
32
|
+
release_slot!
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def available_slots
|
|
37
|
+
@mutex.synchronize { @limit - @count }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def acquire_slot!
|
|
43
|
+
@mutex.synchronize do
|
|
44
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @timeout_s
|
|
45
|
+
|
|
46
|
+
while @count >= @limit
|
|
47
|
+
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
48
|
+
raise Errors::ConcurrencyLimitError,
|
|
49
|
+
"Concurrency limit of #{@limit} reached; timeout of #{@timeout_s}s exceeded" if remaining <= 0
|
|
50
|
+
|
|
51
|
+
@condvar.wait(@mutex, remaining)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
@count += 1
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def release_slot!
|
|
59
|
+
@mutex.synchronize do
|
|
60
|
+
@count -= 1
|
|
61
|
+
@condvar.signal
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Immutable value object representing a browser console error message
|
|
5
|
+
# captured during a Playwright browser run.
|
|
6
|
+
ConsoleError = Data.define(:type, :text, :location) do
|
|
7
|
+
def to_h
|
|
8
|
+
{ type: type, text: text, location: location }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_json(...)
|
|
12
|
+
to_h.to_json(...)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Applies a unified list of IgnoreRule objects to both NetworkError and ConsoleError arrays.
|
|
5
|
+
#
|
|
6
|
+
# Rules are routed by target:
|
|
7
|
+
# :network — applied only to NetworkError (matched on url + failure)
|
|
8
|
+
# :console — applied only to ConsoleError (matched on text + type)
|
|
9
|
+
# :all — applied to both error types
|
|
10
|
+
class ErrorFilter
|
|
11
|
+
def initialize(rules:)
|
|
12
|
+
@network_rules = rules.select { |r| r.target == :network || r.target == :all }
|
|
13
|
+
@console_rules = rules.select { |r| r.target == :console || r.target == :all }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# @param errors [Array<NetworkError>]
|
|
17
|
+
# @return [Hash{Symbol => Array<NetworkError>}] with keys :kept and :ignored
|
|
18
|
+
def filter_network(errors)
|
|
19
|
+
partition(errors) { |e| @network_rules.any? { |r| r.match?(e.url, e.failure) } }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @param errors [Array<ConsoleError>]
|
|
23
|
+
# @return [Hash{Symbol => Array<ConsoleError>}] with keys :kept and :ignored
|
|
24
|
+
def filter_console(errors)
|
|
25
|
+
partition(errors) { |e| @console_rules.any? { |r| r.match?(e.text, e.type) } }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def partition(errors, &should_ignore)
|
|
31
|
+
kept, ignored = errors.partition { |e| !should_ignore.call(e) }
|
|
32
|
+
{ kept: kept, ignored: ignored }
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
module Errors
|
|
5
|
+
# Base for all Perchfall errors. Rescue this to catch anything from the gem.
|
|
6
|
+
class Error < StandardError; end
|
|
7
|
+
|
|
8
|
+
# The Node/Playwright process could not be started.
|
|
9
|
+
# Cause: Node not installed, script path wrong, etc.
|
|
10
|
+
class InvocationError < Error; end
|
|
11
|
+
|
|
12
|
+
# The Node process ran but exited non-zero, or produced unparseable output.
|
|
13
|
+
# exit_status is exposed for callers that need to distinguish failure modes.
|
|
14
|
+
# stderr is intentionally not public — it may contain server filesystem paths,
|
|
15
|
+
# Node version strings, and stack traces that should not be surfaced to end users.
|
|
16
|
+
# Log stderr at the framework/application level using a rescue block if needed.
|
|
17
|
+
class ScriptError < Error
|
|
18
|
+
attr_reader :exit_status
|
|
19
|
+
|
|
20
|
+
def initialize(message, exit_status: nil, stderr: nil)
|
|
21
|
+
super(message)
|
|
22
|
+
@exit_status = exit_status
|
|
23
|
+
@stderr = stderr
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
attr_reader :stderr
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# The JSON the Node script produced was structurally invalid.
|
|
32
|
+
class ParseError < Error; end
|
|
33
|
+
|
|
34
|
+
# The concurrency limit was reached and the caller's timeout expired
|
|
35
|
+
# before a slot became available.
|
|
36
|
+
class ConcurrencyLimitError < Error; end
|
|
37
|
+
|
|
38
|
+
# The target URL was unreachable at the network/page level (Playwright
|
|
39
|
+
# reported status: "error"). Carries the partial Report so callers can
|
|
40
|
+
# inspect whatever was captured before failure.
|
|
41
|
+
class PageLoadError < Error
|
|
42
|
+
attr_reader :report
|
|
43
|
+
|
|
44
|
+
def initialize(report)
|
|
45
|
+
super("Page failed to load: #{report.url} — #{report.error}")
|
|
46
|
+
@report = report
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Describes a single error suppression rule applicable to NetworkError,
|
|
5
|
+
# ConsoleError, or both.
|
|
6
|
+
#
|
|
7
|
+
# pattern - String (substring match) or Regexp matched against the primary field:
|
|
8
|
+
# NetworkError#url or ConsoleError#text.
|
|
9
|
+
# type - String (substring match), Regexp, or "*" (wildcard) matched against
|
|
10
|
+
# the secondary field: NetworkError#failure or ConsoleError#type.
|
|
11
|
+
# target - Symbol: :network, :console, or :all — which error type this rule applies to.
|
|
12
|
+
#
|
|
13
|
+
# A rule matches when both pattern and type match their respective values.
|
|
14
|
+
# The filter is responsible for routing rules to the correct error type via target.
|
|
15
|
+
IgnoreRule = Data.define(:pattern, :type, :target) do
|
|
16
|
+
# @param primary [String] the primary field value (url or text)
|
|
17
|
+
# @param secondary [String] the secondary field value (failure or type)
|
|
18
|
+
def match?(primary, secondary)
|
|
19
|
+
pattern_matches?(primary) && type_matches?(secondary)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def pattern_matches?(value)
|
|
25
|
+
case pattern
|
|
26
|
+
when Regexp then pattern.match?(value)
|
|
27
|
+
else value.include?(pattern)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def type_matches?(value)
|
|
32
|
+
case type
|
|
33
|
+
when "*" then true
|
|
34
|
+
when Regexp then type.match?(value)
|
|
35
|
+
else value.include?(type)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Immutable value object representing a single failed network request
|
|
5
|
+
# captured during a Playwright browser run.
|
|
6
|
+
NetworkError = Data.define(:url, :http_method, :failure) do
|
|
7
|
+
def to_h
|
|
8
|
+
{ url: url, method: http_method, failure: failure }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_json(...)
|
|
12
|
+
to_h.to_json(...)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Perchfall
|
|
6
|
+
module Parsers
|
|
7
|
+
# Parses the raw JSON string produced by playwright/check.js into a Report.
|
|
8
|
+
#
|
|
9
|
+
# This is the only place where raw data becomes domain objects.
|
|
10
|
+
# No side effects — pure data transformation, fully unit-testable with strings.
|
|
11
|
+
class PlaywrightJsonParser
|
|
12
|
+
def initialize(filter: ErrorFilter.new(rules: []))
|
|
13
|
+
@filter = filter
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def parse(raw_json, timestamp:, scenario_name: nil, original_url: nil)
|
|
17
|
+
data = JSON.parse(raw_json, symbolize_names: true)
|
|
18
|
+
build_report(data, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url)
|
|
19
|
+
rescue JSON::ParserError => e
|
|
20
|
+
raise Errors::ParseError, "Invalid JSON from Playwright script: #{e.message}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def build_report(data, scenario_name:, timestamp:, original_url: nil)
|
|
26
|
+
net_filtered = @filter.filter_network(parse_network_errors(data.fetch(:network_errors, [])))
|
|
27
|
+
console_filtered = @filter.filter_console(parse_console_errors(data.fetch(:console_errors, [])))
|
|
28
|
+
|
|
29
|
+
Report.new(
|
|
30
|
+
status: data.fetch(:status),
|
|
31
|
+
url: original_url || data.fetch(:url),
|
|
32
|
+
duration_ms: data.fetch(:duration_ms),
|
|
33
|
+
http_status: data[:http_status],
|
|
34
|
+
network_errors: net_filtered[:kept],
|
|
35
|
+
ignored_network_errors: net_filtered[:ignored],
|
|
36
|
+
console_errors: console_filtered[:kept],
|
|
37
|
+
ignored_console_errors: console_filtered[:ignored],
|
|
38
|
+
error: data[:error],
|
|
39
|
+
scenario_name: scenario_name,
|
|
40
|
+
timestamp: timestamp
|
|
41
|
+
)
|
|
42
|
+
rescue KeyError => e
|
|
43
|
+
raise Errors::ParseError, "Playwright JSON missing required field: #{e.message}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def parse_network_errors(raw)
|
|
47
|
+
raw.map do |item|
|
|
48
|
+
NetworkError.new(
|
|
49
|
+
url: item.fetch(:url),
|
|
50
|
+
http_method: item.fetch(:method),
|
|
51
|
+
failure: item.fetch(:failure)
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
rescue KeyError => e
|
|
55
|
+
raise Errors::ParseError, "Malformed network_error entry: #{e.message}"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def parse_console_errors(raw)
|
|
59
|
+
raw.map do |item|
|
|
60
|
+
ConsoleError.new(
|
|
61
|
+
type: item.fetch(:type),
|
|
62
|
+
text: item.fetch(:text),
|
|
63
|
+
location: item.fetch(:location)
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
rescue KeyError => e
|
|
67
|
+
raise Errors::ParseError, "Malformed console_error entry: #{e.message}"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Perchfall
|
|
4
|
+
# Knows how to invoke the Playwright Node script and return a Report.
|
|
5
|
+
#
|
|
6
|
+
# Collaborators (all injectable):
|
|
7
|
+
# runner - responds to #call(argv_array) -> Result
|
|
8
|
+
# parser - responds to #parse(raw_json, **opts) -> Report
|
|
9
|
+
# script_path - String path to playwright/check.js
|
|
10
|
+
#
|
|
11
|
+
# PlaywrightInvoker owns the command shape and error-promotion semantics.
|
|
12
|
+
# It does not know how to run a process (runner's job) or parse JSON (parser's job).
|
|
13
|
+
class PlaywrightInvoker
|
|
14
|
+
DEFAULT_SCRIPT_PATH = File.expand_path(
|
|
15
|
+
"../../playwright/check.js",
|
|
16
|
+
__dir__
|
|
17
|
+
).freeze
|
|
18
|
+
|
|
19
|
+
def initialize(
|
|
20
|
+
runner: CommandRunner.new,
|
|
21
|
+
script_path: DEFAULT_SCRIPT_PATH
|
|
22
|
+
)
|
|
23
|
+
@runner = runner
|
|
24
|
+
@script_path = script_path
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def run(url:, timestamp:, timeout_ms: 30_000, wait_until: "load", scenario_name: nil, ignore: [], original_url: nil)
|
|
28
|
+
parser = build_parser(ignore)
|
|
29
|
+
result = execute(build_command(url: url, timeout_ms: timeout_ms, wait_until: wait_until))
|
|
30
|
+
report = parse(result, parser: parser, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url || url)
|
|
31
|
+
raise_if_page_load_error(report)
|
|
32
|
+
report
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def build_parser(ignore_rules)
|
|
38
|
+
Parsers::PlaywrightJsonParser.new(filter: ErrorFilter.new(rules: ignore_rules))
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def build_command(url:, timeout_ms:, wait_until:)
|
|
42
|
+
["node", @script_path, "--url", url, "--timeout", timeout_ms.to_s, "--wait-until", wait_until]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def execute(command)
|
|
46
|
+
@runner.call(command)
|
|
47
|
+
rescue => e
|
|
48
|
+
raise Errors::InvocationError, "Could not start Node process: #{e.message}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def parse(result, parser:, **opts)
|
|
52
|
+
unless result.success?
|
|
53
|
+
raise Errors::ScriptError.new(
|
|
54
|
+
"Playwright script exited with status #{result.exit_status}",
|
|
55
|
+
exit_status: result.exit_status,
|
|
56
|
+
stderr: result.stderr
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
parser.parse(result.stdout, **opts)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def raise_if_page_load_error(report)
|
|
64
|
+
raise Errors::PageLoadError.new(report) unless report.ok?
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Perchfall
|
|
6
|
+
# Immutable value object representing the full result of one synthetic check.
|
|
7
|
+
#
|
|
8
|
+
# Attributes:
|
|
9
|
+
# status - String: "ok" or "error"
|
|
10
|
+
# url - String: the checked URL
|
|
11
|
+
# scenario_name - String or nil: optional label for the check
|
|
12
|
+
# timestamp - Time: when the run was initiated
|
|
13
|
+
# duration_ms - Integer: wall-clock time of the browser run
|
|
14
|
+
# http_status - Integer or nil: HTTP response code, nil if page never loaded
|
|
15
|
+
# network_errors - Array<NetworkError>: failures not matched by any ignore rule
|
|
16
|
+
# ignored_network_errors - Array<NetworkError>: failures suppressed by ignore rules
|
|
17
|
+
# console_errors - Array<ConsoleError>: errors not matched by any ignore rule
|
|
18
|
+
# ignored_console_errors - Array<ConsoleError>: errors suppressed by ignore rules
|
|
19
|
+
# error - String or nil: set only when status == "error"
|
|
20
|
+
class Report
|
|
21
|
+
attr_reader :status, :url, :scenario_name, :timestamp, :duration_ms,
|
|
22
|
+
:http_status, :network_errors, :ignored_network_errors,
|
|
23
|
+
:console_errors, :ignored_console_errors, :error
|
|
24
|
+
|
|
25
|
+
def initialize(
|
|
26
|
+
status:,
|
|
27
|
+
url:,
|
|
28
|
+
duration_ms:,
|
|
29
|
+
http_status:,
|
|
30
|
+
network_errors:,
|
|
31
|
+
console_errors:,
|
|
32
|
+
error:,
|
|
33
|
+
ignored_network_errors: [],
|
|
34
|
+
ignored_console_errors: [],
|
|
35
|
+
scenario_name: nil,
|
|
36
|
+
timestamp: Time.now.utc
|
|
37
|
+
)
|
|
38
|
+
@status = status.freeze
|
|
39
|
+
@url = url.freeze
|
|
40
|
+
@scenario_name = scenario_name&.freeze
|
|
41
|
+
@timestamp = timestamp
|
|
42
|
+
@duration_ms = duration_ms
|
|
43
|
+
@http_status = http_status
|
|
44
|
+
@network_errors = network_errors.freeze
|
|
45
|
+
@ignored_network_errors = ignored_network_errors.freeze
|
|
46
|
+
@console_errors = console_errors.freeze
|
|
47
|
+
@ignored_console_errors = ignored_console_errors.freeze
|
|
48
|
+
@error = error&.freeze
|
|
49
|
+
freeze
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def ok?
|
|
53
|
+
status == "ok"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def to_h
|
|
57
|
+
{
|
|
58
|
+
status: status,
|
|
59
|
+
url: url,
|
|
60
|
+
scenario_name: scenario_name,
|
|
61
|
+
timestamp: timestamp.iso8601,
|
|
62
|
+
ok: ok?,
|
|
63
|
+
http_status: http_status,
|
|
64
|
+
duration_ms: duration_ms,
|
|
65
|
+
network_errors: network_errors.map(&:to_h),
|
|
66
|
+
ignored_network_errors: ignored_network_errors.map(&:to_h),
|
|
67
|
+
console_errors: console_errors.map(&:to_h),
|
|
68
|
+
ignored_console_errors: ignored_console_errors.map(&:to_h),
|
|
69
|
+
error: error
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def to_json(...)
|
|
74
|
+
to_h.to_json(...)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def ==(other)
|
|
78
|
+
other.is_a?(Report) && to_h == other.to_h
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
require "ipaddr"
|
|
5
|
+
require "resolv"
|
|
6
|
+
|
|
7
|
+
module Perchfall
|
|
8
|
+
# Validates that a URL is safe to pass to Playwright.
|
|
9
|
+
#
|
|
10
|
+
# Three checks are applied in order:
|
|
11
|
+
# 1. Scheme must be http or https.
|
|
12
|
+
# 2. Hostname must not be a known-internal literal address (literal IP or "localhost").
|
|
13
|
+
# 3. Hostname is resolved via DNS; any address in a blocked range is rejected.
|
|
14
|
+
#
|
|
15
|
+
# Check 3 shrinks the DNS rebinding window but does not eliminate it — a TOCTOU
|
|
16
|
+
# race remains between our resolution and Playwright's. Network-level egress
|
|
17
|
+
# filtering (security groups, firewall rules) is still required as the authoritative
|
|
18
|
+
# control when accepting untrusted URLs.
|
|
19
|
+
#
|
|
20
|
+
# The resolver: keyword argument is injectable for testing (pass a fake that
|
|
21
|
+
# responds to #getaddresses(hostname) → Array<String>).
|
|
22
|
+
class UrlValidator
|
|
23
|
+
PERMITTED_SCHEMES = %w[http https].freeze
|
|
24
|
+
|
|
25
|
+
# Blocked as exact hostname strings (case-insensitive).
|
|
26
|
+
BLOCKED_HOSTNAMES = %w[localhost].freeze
|
|
27
|
+
|
|
28
|
+
# Blocked IP ranges. Any literal IPv4 or IPv6 address falling within these
|
|
29
|
+
# ranges is rejected. Order does not matter; all are checked.
|
|
30
|
+
BLOCKED_RANGES = [
|
|
31
|
+
IPAddr.new("127.0.0.0/8"), # IPv4 loopback
|
|
32
|
+
IPAddr.new("::1"), # IPv6 loopback
|
|
33
|
+
IPAddr.new("169.254.0.0/16"), # IPv4 link-local (incl. AWS metadata 169.254.169.254)
|
|
34
|
+
IPAddr.new("fe80::/10"), # IPv6 link-local
|
|
35
|
+
IPAddr.new("10.0.0.0/8"), # RFC-1918
|
|
36
|
+
IPAddr.new("172.16.0.0/12"), # RFC-1918
|
|
37
|
+
IPAddr.new("192.168.0.0/16"), # RFC-1918
|
|
38
|
+
IPAddr.new("0.0.0.0/8"), # unroutable
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
def initialize(resolver: Resolv)
|
|
42
|
+
@resolver = resolver
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def validate!(url)
|
|
46
|
+
uri = parse!(url)
|
|
47
|
+
assert_permitted_scheme!(uri, url)
|
|
48
|
+
assert_not_internal_host!(uri, url)
|
|
49
|
+
assert_not_internal_resolved_addresses!(uri, url)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def parse!(url)
|
|
55
|
+
URI.parse(url)
|
|
56
|
+
rescue URI::InvalidURIError
|
|
57
|
+
raise ArgumentError, "Invalid URL: #{url.inspect}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def assert_permitted_scheme!(uri, url)
|
|
61
|
+
return if PERMITTED_SCHEMES.include?(uri.scheme)
|
|
62
|
+
|
|
63
|
+
raise ArgumentError,
|
|
64
|
+
"URL scheme #{uri.scheme.inspect} is not permitted. " \
|
|
65
|
+
"Only #{PERMITTED_SCHEMES.join(", ")} URLs are accepted. Got: #{url.inspect}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def assert_not_internal_host!(uri, url)
|
|
69
|
+
host = uri.hostname.to_s.downcase
|
|
70
|
+
|
|
71
|
+
if BLOCKED_HOSTNAMES.include?(host)
|
|
72
|
+
raise ArgumentError, internal_error(url)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
addr = parse_ip(host)
|
|
76
|
+
if addr && blocked_ip?(addr)
|
|
77
|
+
raise ArgumentError, internal_error(url)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def assert_not_internal_resolved_addresses!(uri, url)
|
|
82
|
+
host = uri.hostname.to_s
|
|
83
|
+
# Skip resolution for literal IPs — already checked in assert_not_internal_host!
|
|
84
|
+
return if parse_ip(host)
|
|
85
|
+
|
|
86
|
+
addresses = @resolver.getaddresses(host)
|
|
87
|
+
addresses.each do |address|
|
|
88
|
+
addr = parse_ip(address)
|
|
89
|
+
raise ArgumentError, internal_error(url) if addr && blocked_ip?(addr)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def parse_ip(host)
|
|
94
|
+
IPAddr.new(host)
|
|
95
|
+
rescue IPAddr::InvalidAddressError
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def blocked_ip?(addr)
|
|
100
|
+
BLOCKED_RANGES.any? { |range| range.include?(addr) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def internal_error(url)
|
|
104
|
+
"URL resolves to an internal or reserved address and is not permitted. Got: #{url.inspect}"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
data/lib/perchfall.rb
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "perchfall/version"
|
|
4
|
+
require_relative "perchfall/errors"
|
|
5
|
+
require_relative "perchfall/network_error"
|
|
6
|
+
require_relative "perchfall/console_error"
|
|
7
|
+
require_relative "perchfall/report"
|
|
8
|
+
require_relative "perchfall/ignore_rule"
|
|
9
|
+
require_relative "perchfall/error_filter"
|
|
10
|
+
require_relative "perchfall/command_runner"
|
|
11
|
+
require_relative "perchfall/concurrency_limiter"
|
|
12
|
+
require_relative "perchfall/url_validator"
|
|
13
|
+
require_relative "perchfall/parsers/playwright_json_parser"
|
|
14
|
+
require_relative "perchfall/playwright_invoker"
|
|
15
|
+
require_relative "perchfall/client"
|
|
16
|
+
|
|
17
|
+
# Perchfall — synthetic browser monitoring via Playwright.
|
|
18
|
+
#
|
|
19
|
+
# Quick start:
|
|
20
|
+
# report = Perchfall.run(url: "https://example.com")
|
|
21
|
+
# report.ok? # => true
|
|
22
|
+
# report.http_status # => 200
|
|
23
|
+
# report.to_json # => '{"status":"ok",...}'
|
|
24
|
+
#
|
|
25
|
+
# For advanced use, inject collaborators:
|
|
26
|
+
# client = Perchfall::Client.new(invoker: MyInvoker.new)
|
|
27
|
+
# report = client.run(url: "https://example.com", scenario_name: "homepage_smoke")
|
|
28
|
+
module Perchfall
|
|
29
|
+
# Errors suppressed by default on every run.
|
|
30
|
+
# ERR_ABORTED is a browser-side abort (analytics beacons, cancelled prefetches)
|
|
31
|
+
# and is never a signal of real page failure.
|
|
32
|
+
# Callers extend this list by passing ignore: to Perchfall.run or Client#run.
|
|
33
|
+
DEFAULT_IGNORE_RULES = [
|
|
34
|
+
IgnoreRule.new(pattern: //, type: "net::ERR_ABORTED", target: :network),
|
|
35
|
+
].freeze
|
|
36
|
+
|
|
37
|
+
# Process-wide concurrency limiter. Caps simultaneous Chromium instances
|
|
38
|
+
# across all threads. Override by passing limiter: to Client.new.
|
|
39
|
+
#
|
|
40
|
+
# Lazily initialised so requiring the gem does not create threads or
|
|
41
|
+
# mutexes until the first actual run.
|
|
42
|
+
def self.default_limiter
|
|
43
|
+
@default_limiter ||= ConcurrencyLimiter.new(limit: 5)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Convenience method. Equivalent to Perchfall::Client.new.run(url:, **opts).
|
|
47
|
+
# Creates a fresh Client (and thus a fresh PlaywrightInvoker) on each call —
|
|
48
|
+
# no shared state between invocations.
|
|
49
|
+
#
|
|
50
|
+
# @param url [String]
|
|
51
|
+
# @param opts [Hash] forwarded to Client#run
|
|
52
|
+
# @return [Report]
|
|
53
|
+
def self.run(url:, **opts)
|
|
54
|
+
Client.new.run(url: url, **opts)
|
|
55
|
+
end
|
|
56
|
+
end
|
data/perchfall.gemspec
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/perchfall/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'perchfall'
|
|
7
|
+
spec.version = Perchfall::VERSION
|
|
8
|
+
spec.authors = ['Jim Remsik']
|
|
9
|
+
spec.email = ['jim@beflagrant.com']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'Synthetic browser monitoring via Playwright'
|
|
12
|
+
spec.description = <<~DESC
|
|
13
|
+
Run headless browser checks against a URL using Playwright and receive a
|
|
14
|
+
structured, immutable Ruby report object — framework-agnostic, no persistence.
|
|
15
|
+
DESC
|
|
16
|
+
spec.homepage = 'https://github.com/beflagrant/perchfall'
|
|
17
|
+
spec.license = 'MIT'
|
|
18
|
+
spec.metadata = {
|
|
19
|
+
'source_code_uri' => 'https://github.com/beflagrant/perchfall',
|
|
20
|
+
'changelog_uri' => 'https://github.com/beflagrant/perchfall/blob/main/CHANGELOG.md',
|
|
21
|
+
'bug_tracker_uri' => 'https://github.com/beflagrant/perchfall/issues',
|
|
22
|
+
'rubygems_mfa_required' => 'true'
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
spec.required_ruby_version = '>= 3.2.0'
|
|
26
|
+
|
|
27
|
+
spec.files = Dir[
|
|
28
|
+
'lib/**/*',
|
|
29
|
+
'playwright/**/*',
|
|
30
|
+
'README.md',
|
|
31
|
+
'CHANGELOG.md',
|
|
32
|
+
'LICENSE.txt',
|
|
33
|
+
'perchfall.gemspec'
|
|
34
|
+
].reject { |f| File.directory?(f) }
|
|
35
|
+
|
|
36
|
+
spec.require_paths = ['lib']
|
|
37
|
+
|
|
38
|
+
# json is in stdlib but declared explicitly so bundler resolves it correctly
|
|
39
|
+
spec.add_dependency 'json', '>= 2.0'
|
|
40
|
+
|
|
41
|
+
spec.add_development_dependency 'rspec', '~> 3.13'
|
|
42
|
+
spec.add_development_dependency 'rubocop', '~> 1.70'
|
|
43
|
+
spec.add_development_dependency 'simplecov', '~> 0.22'
|
|
44
|
+
end
|
data/playwright/check.js
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// playwright/check.js
|
|
3
|
+
//
|
|
4
|
+
// Runs a single synthetic check against a URL using Playwright.
|
|
5
|
+
// Writes one JSON object to stdout; exits 0 when JSON is trustworthy,
|
|
6
|
+
// exits 1 only for infrastructure failures (Node crash, missing args) where
|
|
7
|
+
// stdout cannot be trusted.
|
|
8
|
+
//
|
|
9
|
+
// Usage:
|
|
10
|
+
// node playwright/check.js --url https://example.com --timeout 30000
|
|
11
|
+
|
|
12
|
+
"use strict";
|
|
13
|
+
|
|
14
|
+
const { chromium } = require("playwright");
|
|
15
|
+
const { parseArgs } = require("node:util");
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Argument parsing
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
const { values: args } = parseArgs({
|
|
22
|
+
options: {
|
|
23
|
+
url: { type: "string" },
|
|
24
|
+
timeout: { type: "string", default: "30000" },
|
|
25
|
+
"wait-until": { type: "string", default: "load" },
|
|
26
|
+
},
|
|
27
|
+
strict: true,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
if (!args.url) {
|
|
31
|
+
process.stderr.write("Error: --url is required\n");
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const TARGET_URL = args.url;
|
|
36
|
+
const TIMEOUT_MS = parseInt(args.timeout, 10);
|
|
37
|
+
const WAIT_UNTIL = args["wait-until"];
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Helpers
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
function buildResult({ status, durationMs, httpStatus, networkErrors, consoleErrors, error }) {
|
|
44
|
+
return JSON.stringify({
|
|
45
|
+
status,
|
|
46
|
+
url: TARGET_URL,
|
|
47
|
+
duration_ms: durationMs,
|
|
48
|
+
http_status: httpStatus ?? null,
|
|
49
|
+
network_errors: networkErrors,
|
|
50
|
+
console_errors: consoleErrors,
|
|
51
|
+
error: error ?? null,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Main
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
async function run() {
|
|
60
|
+
const startedAt = Date.now();
|
|
61
|
+
const networkErrors = [];
|
|
62
|
+
const consoleErrors = [];
|
|
63
|
+
let browser;
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
browser = await chromium.launch({ headless: true });
|
|
67
|
+
const page = await browser.newPage();
|
|
68
|
+
|
|
69
|
+
// Collect failed network requests (4xx/5xx responses + connection failures).
|
|
70
|
+
page.on("requestfailed", (request) => {
|
|
71
|
+
networkErrors.push({
|
|
72
|
+
url: request.url(),
|
|
73
|
+
method: request.method(),
|
|
74
|
+
failure: request.failure()?.errorText ?? "unknown",
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// Collect non-2xx/3xx responses as network errors too.
|
|
79
|
+
page.on("response", (response) => {
|
|
80
|
+
const status = response.status();
|
|
81
|
+
if (status >= 400) {
|
|
82
|
+
networkErrors.push({
|
|
83
|
+
url: response.url(),
|
|
84
|
+
method: response.request().method(),
|
|
85
|
+
failure: `HTTP ${status}`,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Collect browser console errors.
|
|
91
|
+
page.on("console", (msg) => {
|
|
92
|
+
if (msg.type() === "error") {
|
|
93
|
+
const loc = msg.location();
|
|
94
|
+
consoleErrors.push({
|
|
95
|
+
type: msg.type(),
|
|
96
|
+
text: msg.text(),
|
|
97
|
+
location: loc ? `${loc.url}:${loc.lineNumber}:${loc.columnNumber}` : "",
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const response = await page.goto(TARGET_URL, {
|
|
103
|
+
timeout: TIMEOUT_MS,
|
|
104
|
+
waitUntil: WAIT_UNTIL,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const durationMs = Date.now() - startedAt;
|
|
108
|
+
|
|
109
|
+
process.stdout.write(buildResult({
|
|
110
|
+
status: "ok",
|
|
111
|
+
durationMs,
|
|
112
|
+
httpStatus: response ? response.status() : null,
|
|
113
|
+
networkErrors,
|
|
114
|
+
consoleErrors,
|
|
115
|
+
error: null,
|
|
116
|
+
}));
|
|
117
|
+
|
|
118
|
+
process.exit(0);
|
|
119
|
+
|
|
120
|
+
} catch (err) {
|
|
121
|
+
// Page-level failure (timeout, DNS, etc.) — exit 0 so Ruby reads the JSON.
|
|
122
|
+
const durationMs = Date.now() - startedAt;
|
|
123
|
+
|
|
124
|
+
process.stdout.write(buildResult({
|
|
125
|
+
status: "error",
|
|
126
|
+
durationMs,
|
|
127
|
+
httpStatus: null,
|
|
128
|
+
networkErrors,
|
|
129
|
+
consoleErrors,
|
|
130
|
+
error: err.message,
|
|
131
|
+
}));
|
|
132
|
+
|
|
133
|
+
process.exit(0);
|
|
134
|
+
|
|
135
|
+
} finally {
|
|
136
|
+
if (browser) await browser.close().catch(() => {});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Infrastructure-level failure — stdout cannot be trusted, exit 1.
|
|
141
|
+
run().catch((err) => {
|
|
142
|
+
process.stderr.write(`Unhandled error: ${err.stack ?? err.message}\n`);
|
|
143
|
+
process.exit(1);
|
|
144
|
+
});
|
metadata
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: perchfall
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Jim Remsik
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: json
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '2.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '2.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: rspec
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '3.13'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '3.13'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rubocop
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.70'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.70'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: simplecov
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0.22'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0.22'
|
|
68
|
+
description: |
|
|
69
|
+
Run headless browser checks against a URL using Playwright and receive a
|
|
70
|
+
structured, immutable Ruby report object — framework-agnostic, no persistence.
|
|
71
|
+
email:
|
|
72
|
+
- jim@beflagrant.com
|
|
73
|
+
executables: []
|
|
74
|
+
extensions: []
|
|
75
|
+
extra_rdoc_files: []
|
|
76
|
+
files:
|
|
77
|
+
- CHANGELOG.md
|
|
78
|
+
- LICENSE.txt
|
|
79
|
+
- README.md
|
|
80
|
+
- lib/perchfall.rb
|
|
81
|
+
- lib/perchfall/client.rb
|
|
82
|
+
- lib/perchfall/command_runner.rb
|
|
83
|
+
- lib/perchfall/concurrency_limiter.rb
|
|
84
|
+
- lib/perchfall/console_error.rb
|
|
85
|
+
- lib/perchfall/error_filter.rb
|
|
86
|
+
- lib/perchfall/errors.rb
|
|
87
|
+
- lib/perchfall/ignore_rule.rb
|
|
88
|
+
- lib/perchfall/network_error.rb
|
|
89
|
+
- lib/perchfall/parsers/playwright_json_parser.rb
|
|
90
|
+
- lib/perchfall/playwright_invoker.rb
|
|
91
|
+
- lib/perchfall/report.rb
|
|
92
|
+
- lib/perchfall/url_validator.rb
|
|
93
|
+
- lib/perchfall/version.rb
|
|
94
|
+
- perchfall.gemspec
|
|
95
|
+
- playwright/check.js
|
|
96
|
+
homepage: https://github.com/beflagrant/perchfall
|
|
97
|
+
licenses:
|
|
98
|
+
- MIT
|
|
99
|
+
metadata:
|
|
100
|
+
source_code_uri: https://github.com/beflagrant/perchfall
|
|
101
|
+
changelog_uri: https://github.com/beflagrant/perchfall/blob/main/CHANGELOG.md
|
|
102
|
+
bug_tracker_uri: https://github.com/beflagrant/perchfall/issues
|
|
103
|
+
rubygems_mfa_required: 'true'
|
|
104
|
+
rdoc_options: []
|
|
105
|
+
require_paths:
|
|
106
|
+
- lib
|
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
|
+
requirements:
|
|
109
|
+
- - ">="
|
|
110
|
+
- !ruby/object:Gem::Version
|
|
111
|
+
version: 3.2.0
|
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - ">="
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: '0'
|
|
117
|
+
requirements: []
|
|
118
|
+
rubygems_version: 4.0.3
|
|
119
|
+
specification_version: 4
|
|
120
|
+
summary: Synthetic browser monitoring via Playwright
|
|
121
|
+
test_files: []
|