perchfall 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -1
- data/README.md +10 -6
- data/lib/perchfall/client.rb +60 -13
- data/lib/perchfall/parsers/playwright_json_parser.rb +5 -4
- data/lib/perchfall/playwright_invoker.rb +7 -5
- data/lib/perchfall/report.rb +7 -3
- data/lib/perchfall/version.rb +1 -1
- data/playwright/check.js +33 -3
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 86b85c771d240f7c1ac7c255c6882a1d7cd1c5da2eb242bb681327612f079ae5
|
|
4
|
+
data.tar.gz: 1f1281e0e17c1e5ec91c841bc271990c85c752a2a97c3c5980d631d943d71460
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b552f554e7843c489c0c46905800f2f6f8a69bd9b577502262d8dde9a99f4cbf5e23e3a268f908e879ea65aced9587abce86753658d0e878d439d6987a12f1b6
|
|
7
|
+
data.tar.gz: 4504904e4ea3a877f51ca6f07b2a86ea2030f5ed42ae8d3519a6d39b77da28869a4af95a9a19d64a02dba8aec39811928722e9857862dcdedd52045e38bbec62
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.0] - 2026-03-19
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `cache_profile:` option on `Perchfall.run` — replaces `bust_cache:` with four named profiles:
|
|
15
|
+
- `:query_bust` (default) — appends `?_pf=<unix_timestamp>` to force a cold fetch
|
|
16
|
+
- `:warm` — no URL mutation, no extra headers; measures real-user warm-cache experience
|
|
17
|
+
- `:no_cache` — sets `Cache-Control: no-cache` on all requests (main document + sub-resources)
|
|
18
|
+
- `:no_store` — sets `Cache-Control: no-store, no-cache` and `Pragma: no-cache`
|
|
19
|
+
- Custom Hash form: `cache_profile: { headers: { "Cache-Control" => "max-age=0" } }`
|
|
20
|
+
- `report.cache_profile` — cache profile is stored on the `Report` and included in `to_h` / `to_json`
|
|
21
|
+
- `--headers` argument to `playwright/check.js` — extra HTTP headers applied via `page.setExtraHTTPHeaders`
|
|
22
|
+
- `check.js` integration specs (15 examples, tagged `:js`); excluded from default run, opt-in via `RUN_JS_SPECS=true`
|
|
23
|
+
- `check-js.yml` GitHub Actions workflow — runs automatically when `playwright/check.js` or its specs change; caches Playwright Chromium binary keyed on `package-lock.json`
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
|
|
27
|
+
- Renamed cache-bust query parameter from `_perchfall=` to `_pf=` (shorter, less intrusive in logs)
|
|
28
|
+
- Validation order: `cache_profile` → `wait_until` → `timeout_ms` → URL validation — invalid params now raise before the effective URL is built
|
|
29
|
+
- `check.js` now writes a `status: "error"` JSON result (exit 0) for malformed or non-object `--headers` instead of crashing
|
|
30
|
+
|
|
31
|
+
### Breaking Changes
|
|
32
|
+
|
|
33
|
+
- `bust_cache:` keyword argument removed. Migrate: `bust_cache: false` → `cache_profile: :warm`; `bust_cache: true` → `cache_profile: :query_bust` (or omit — it is the default)
|
|
34
|
+
- Cache-bust query parameter renamed from `_perchfall=` to `_pf=` — update any log filters or URL allow-lists
|
|
35
|
+
|
|
36
|
+
### Security
|
|
37
|
+
|
|
38
|
+
- Custom `cache_profile` headers validated against a `FORBIDDEN_HEADERS` denylist (`Authorization`, `Cookie`, `Set-Cookie`, `Host`, `X-Forwarded-For`, `X-Forwarded-Host`, `X-Real-IP`) — these cannot be injected via the custom Hash form
|
|
39
|
+
|
|
10
40
|
## [0.1.0] - 2026-03-17
|
|
11
41
|
|
|
12
42
|
### Added
|
|
@@ -26,5 +56,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
26
56
|
- Full dependency injection throughout — test suite runs in ~0.4 s with no browser, Node, or network required
|
|
27
57
|
- GitHub Actions CI workflow (unit suite) and manual Playwright smoke check workflow
|
|
28
58
|
|
|
29
|
-
[Unreleased]: https://github.com/beflagrant/perchfall/compare/v0.
|
|
59
|
+
[Unreleased]: https://github.com/beflagrant/perchfall/compare/v0.2.0...HEAD
|
|
60
|
+
[0.2.0]: https://github.com/beflagrant/perchfall/compare/v0.1.0...v0.2.0
|
|
30
61
|
[0.1.0]: https://github.com/beflagrant/perchfall/releases/tag/v0.1.0
|
data/README.md
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/beflagrant/perchfall/actions/workflows/ci.yml)
|
|
4
4
|
[](https://github.com/beflagrant/perchfall/actions/workflows/playwright.yml)
|
|
5
|
+
[](https://badge.fury.io/rb/perchfall)
|
|
5
6
|
|
|
6
7
|
**Synthetic browser monitoring for Ruby.** Give it a URL; get back a structured report of what a real Chromium browser saw — HTTP status, broken assets, JavaScript errors, and load time. No framework required.
|
|
7
8
|
|
|
@@ -127,6 +128,7 @@ Every check returns a `Perchfall::Report`:
|
|
|
127
128
|
| `duration_ms` | Integer | Total time from navigation start to `load` event |
|
|
128
129
|
| `url` | String | The URL checked |
|
|
129
130
|
| `timestamp` | Time | When the check ran (UTC) |
|
|
131
|
+
| `cache_profile` | Symbol / nil | Cache profile used (`:query_bust`, `:warm`, `:no_cache`, `:no_store`) |
|
|
130
132
|
| `network_errors` | Array | Failed or errored network requests |
|
|
131
133
|
| `console_errors` | Array | JavaScript errors logged to the browser console |
|
|
132
134
|
| `to_json` | String | Full report as JSON |
|
|
@@ -154,13 +156,14 @@ Every check returns a `Perchfall::Report`:
|
|
|
154
156
|
```ruby
|
|
155
157
|
Perchfall.run(
|
|
156
158
|
url: "https://example.com",
|
|
157
|
-
timeout_ms: 10_000,
|
|
158
|
-
wait_until: "domcontentloaded",
|
|
159
|
-
scenario_name: "homepage_smoke"
|
|
159
|
+
timeout_ms: 10_000, # default 30_000, max 60_000
|
|
160
|
+
wait_until: "domcontentloaded", # default "load"
|
|
161
|
+
scenario_name: "homepage_smoke", # included in report JSON
|
|
162
|
+
cache_profile: :no_cache # default :query_bust
|
|
160
163
|
)
|
|
161
164
|
```
|
|
162
165
|
|
|
163
|
-
→ [All options and wait_until strategies](docs/configuration.md)
|
|
166
|
+
→ [All options, cache profiles, and wait_until strategies](docs/configuration.md)
|
|
164
167
|
|
|
165
168
|
---
|
|
166
169
|
|
|
@@ -176,8 +179,9 @@ Perchfall.run(
|
|
|
176
179
|
|
|
177
180
|
```sh
|
|
178
181
|
bundle install
|
|
179
|
-
bundle exec rspec
|
|
180
|
-
|
|
182
|
+
bundle exec rspec # ~0.5s, no browser or Node required (208 examples)
|
|
183
|
+
RUN_JS_SPECS=true bundle exec rspec # includes check.js integration specs (223 examples)
|
|
184
|
+
bin/console # IRB with perchfall loaded
|
|
181
185
|
```
|
|
182
186
|
|
|
183
187
|
---
|
data/lib/perchfall/client.rb
CHANGED
|
@@ -24,6 +24,26 @@ module Perchfall
|
|
|
24
24
|
class Client
|
|
25
25
|
VALID_WAIT_UNTIL = %w[load domcontentloaded networkidle commit].freeze
|
|
26
26
|
|
|
27
|
+
CACHE_PROFILES = {
|
|
28
|
+
query_bust: { bust_url: true, headers: {}.freeze }.freeze,
|
|
29
|
+
warm: { bust_url: false, headers: {}.freeze }.freeze,
|
|
30
|
+
no_cache: { bust_url: false, headers: { "Cache-Control" => "no-cache" }.freeze }.freeze,
|
|
31
|
+
no_store: { bust_url: false, headers: { "Cache-Control" => "no-store, no-cache", "Pragma" => "no-cache" }.freeze }.freeze
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
# Headers that could carry credentials, impersonate infrastructure, or
|
|
35
|
+
# manipulate routing. Rejected in custom cache profiles to prevent
|
|
36
|
+
# accidental or malicious injection into all page-load requests.
|
|
37
|
+
FORBIDDEN_HEADERS = %w[
|
|
38
|
+
authorization
|
|
39
|
+
cookie
|
|
40
|
+
set-cookie
|
|
41
|
+
host
|
|
42
|
+
x-forwarded-for
|
|
43
|
+
x-forwarded-host
|
|
44
|
+
x-real-ip
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
27
47
|
def initialize(
|
|
28
48
|
invoker: PlaywrightInvoker.new,
|
|
29
49
|
validator: UrlValidator.new,
|
|
@@ -48,22 +68,26 @@ module Perchfall
|
|
|
48
68
|
# @raise [Errors::ParseError] if the script output was not valid JSON
|
|
49
69
|
# @raise [Errors::PageLoadError] if the page itself failed to load
|
|
50
70
|
|
|
51
|
-
def run(url:, ignore: [], wait_until: "load", timeout_ms: 30_000, scenario_name: nil, timestamp: Time.now.utc,
|
|
52
|
-
|
|
53
|
-
@validator.validate!(effective_url)
|
|
71
|
+
def run(url:, ignore: [], wait_until: "load", timeout_ms: 30_000, scenario_name: nil, timestamp: Time.now.utc, cache_profile: :query_bust)
|
|
72
|
+
profile = resolve_cache_profile!(cache_profile)
|
|
54
73
|
validate_wait_until!(wait_until)
|
|
55
74
|
validate_timeout_ms!(timeout_ms)
|
|
75
|
+
effective_url = profile[:bust_url] ? append_cache_buster(url) : url
|
|
76
|
+
@validator.validate!(effective_url)
|
|
56
77
|
merged_ignore = Perchfall::DEFAULT_IGNORE_RULES + ignore
|
|
78
|
+
invoker_opts = {
|
|
79
|
+
url: effective_url,
|
|
80
|
+
original_url: url,
|
|
81
|
+
ignore: merged_ignore,
|
|
82
|
+
wait_until: wait_until,
|
|
83
|
+
timeout_ms: timeout_ms,
|
|
84
|
+
scenario_name: scenario_name,
|
|
85
|
+
timestamp: timestamp,
|
|
86
|
+
cache_profile: cache_profile
|
|
87
|
+
}
|
|
88
|
+
invoker_opts[:extra_headers] = profile[:headers] unless profile[:headers].empty?
|
|
57
89
|
@limiter.acquire do
|
|
58
|
-
@invoker.run(
|
|
59
|
-
url: effective_url,
|
|
60
|
-
original_url: url,
|
|
61
|
-
ignore: merged_ignore,
|
|
62
|
-
wait_until: wait_until,
|
|
63
|
-
timeout_ms: timeout_ms,
|
|
64
|
-
scenario_name: scenario_name,
|
|
65
|
-
timestamp: timestamp
|
|
66
|
-
)
|
|
90
|
+
@invoker.run(**invoker_opts)
|
|
67
91
|
end
|
|
68
92
|
end
|
|
69
93
|
|
|
@@ -76,9 +100,32 @@ module Perchfall
|
|
|
76
100
|
"wait_until must be one of #{VALID_WAIT_UNTIL.join(", ")}. Got: #{value.inspect}"
|
|
77
101
|
end
|
|
78
102
|
|
|
103
|
+
def resolve_cache_profile!(profile)
|
|
104
|
+
if profile.is_a?(Symbol)
|
|
105
|
+
CACHE_PROFILES.fetch(profile) do
|
|
106
|
+
raise ArgumentError, "cache_profile must be one of #{CACHE_PROFILES.keys.join(", ")} or a Hash with :headers. Got: #{profile.inspect}"
|
|
107
|
+
end
|
|
108
|
+
else
|
|
109
|
+
headers = profile.fetch(:headers, {})
|
|
110
|
+
validate_custom_headers!(headers)
|
|
111
|
+
{ bust_url: false, headers: headers }
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def validate_custom_headers!(headers)
|
|
116
|
+
headers.each_key do |name|
|
|
117
|
+
if FORBIDDEN_HEADERS.include?(name.to_s.downcase)
|
|
118
|
+
raise ArgumentError,
|
|
119
|
+
"cache_profile contains a forbidden header: #{name.inspect}. " \
|
|
120
|
+
"Headers that carry credentials or influence routing (#{FORBIDDEN_HEADERS.join(", ")}) " \
|
|
121
|
+
"may not be set via cache_profile."
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
79
126
|
def append_cache_buster(url)
|
|
80
127
|
separator = url.include?("?") ? "&" : "?"
|
|
81
|
-
"#{url}#{separator}
|
|
128
|
+
"#{url}#{separator}_pf=#{Time.now.utc.to_i}"
|
|
82
129
|
end
|
|
83
130
|
|
|
84
131
|
MAX_TIMEOUT_MS = 60_000
|
|
@@ -13,16 +13,16 @@ module Perchfall
|
|
|
13
13
|
@filter = filter
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def parse(raw_json, timestamp:, scenario_name: nil, original_url: nil)
|
|
16
|
+
def parse(raw_json, timestamp:, scenario_name: nil, original_url: nil, cache_profile: nil)
|
|
17
17
|
data = JSON.parse(raw_json, symbolize_names: true)
|
|
18
|
-
build_report(data, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url)
|
|
18
|
+
build_report(data, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url, cache_profile: cache_profile)
|
|
19
19
|
rescue JSON::ParserError => e
|
|
20
20
|
raise Errors::ParseError, "Invalid JSON from Playwright script: #{e.message}"
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
private
|
|
24
24
|
|
|
25
|
-
def build_report(data, scenario_name:, timestamp:, original_url: nil)
|
|
25
|
+
def build_report(data, scenario_name:, timestamp:, original_url: nil, cache_profile: nil)
|
|
26
26
|
net_filtered = @filter.filter_network(parse_network_errors(data.fetch(:network_errors, [])))
|
|
27
27
|
console_filtered = @filter.filter_console(parse_console_errors(data.fetch(:console_errors, [])))
|
|
28
28
|
|
|
@@ -37,7 +37,8 @@ module Perchfall
|
|
|
37
37
|
ignored_console_errors: console_filtered[:ignored],
|
|
38
38
|
error: data[:error],
|
|
39
39
|
scenario_name: scenario_name,
|
|
40
|
-
timestamp: timestamp
|
|
40
|
+
timestamp: timestamp,
|
|
41
|
+
cache_profile: cache_profile
|
|
41
42
|
)
|
|
42
43
|
rescue KeyError => e
|
|
43
44
|
raise Errors::ParseError, "Playwright JSON missing required field: #{e.message}"
|
|
@@ -24,10 +24,10 @@ module Perchfall
|
|
|
24
24
|
@script_path = script_path
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
def run(url:, timestamp:, timeout_ms: 30_000, wait_until: "load", scenario_name: nil, ignore: [], original_url: nil)
|
|
27
|
+
def run(url:, timestamp:, timeout_ms: 30_000, wait_until: "load", scenario_name: nil, ignore: [], original_url: nil, extra_headers: {}, cache_profile: nil)
|
|
28
28
|
parser = build_parser(ignore)
|
|
29
|
-
result = execute(build_command(url: url, timeout_ms: timeout_ms, wait_until: wait_until))
|
|
30
|
-
report = parse(result, parser: parser, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url || url)
|
|
29
|
+
result = execute(build_command(url: url, timeout_ms: timeout_ms, wait_until: wait_until, extra_headers: extra_headers))
|
|
30
|
+
report = parse(result, parser: parser, scenario_name: scenario_name, timestamp: timestamp, original_url: original_url || url, cache_profile: cache_profile)
|
|
31
31
|
raise_if_page_load_error(report)
|
|
32
32
|
report
|
|
33
33
|
end
|
|
@@ -38,8 +38,10 @@ module Perchfall
|
|
|
38
38
|
Parsers::PlaywrightJsonParser.new(filter: ErrorFilter.new(rules: ignore_rules))
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
-
def build_command(url:, timeout_ms:, wait_until:)
|
|
42
|
-
["node", @script_path, "--url", url, "--timeout", timeout_ms.to_s, "--wait-until", wait_until]
|
|
41
|
+
def build_command(url:, timeout_ms:, wait_until:, extra_headers: {})
|
|
42
|
+
cmd = ["node", @script_path, "--url", url, "--timeout", timeout_ms.to_s, "--wait-until", wait_until]
|
|
43
|
+
cmd += ["--headers", extra_headers.to_json] unless extra_headers.empty?
|
|
44
|
+
cmd
|
|
43
45
|
end
|
|
44
46
|
|
|
45
47
|
def execute(command)
|
data/lib/perchfall/report.rb
CHANGED
|
@@ -17,10 +17,11 @@ module Perchfall
|
|
|
17
17
|
# console_errors - Array<ConsoleError>: errors not matched by any ignore rule
|
|
18
18
|
# ignored_console_errors - Array<ConsoleError>: errors suppressed by ignore rules
|
|
19
19
|
# error - String or nil: set only when status == "error"
|
|
20
|
+
# cache_profile - Symbol or nil: the cache profile used for this run
|
|
20
21
|
class Report
|
|
21
22
|
attr_reader :status, :url, :scenario_name, :timestamp, :duration_ms,
|
|
22
23
|
:http_status, :network_errors, :ignored_network_errors,
|
|
23
|
-
:console_errors, :ignored_console_errors, :error
|
|
24
|
+
:console_errors, :ignored_console_errors, :error, :cache_profile
|
|
24
25
|
|
|
25
26
|
def initialize(
|
|
26
27
|
status:,
|
|
@@ -33,7 +34,8 @@ module Perchfall
|
|
|
33
34
|
ignored_network_errors: [],
|
|
34
35
|
ignored_console_errors: [],
|
|
35
36
|
scenario_name: nil,
|
|
36
|
-
timestamp: Time.now.utc
|
|
37
|
+
timestamp: Time.now.utc,
|
|
38
|
+
cache_profile: nil
|
|
37
39
|
)
|
|
38
40
|
@status = status.freeze
|
|
39
41
|
@url = url.freeze
|
|
@@ -46,6 +48,7 @@ module Perchfall
|
|
|
46
48
|
@console_errors = console_errors.freeze
|
|
47
49
|
@ignored_console_errors = ignored_console_errors.freeze
|
|
48
50
|
@error = error&.freeze
|
|
51
|
+
@cache_profile = cache_profile
|
|
49
52
|
freeze
|
|
50
53
|
end
|
|
51
54
|
|
|
@@ -66,7 +69,8 @@ module Perchfall
|
|
|
66
69
|
ignored_network_errors: ignored_network_errors.map(&:to_h),
|
|
67
70
|
console_errors: console_errors.map(&:to_h),
|
|
68
71
|
ignored_console_errors: ignored_console_errors.map(&:to_h),
|
|
69
|
-
error: error
|
|
72
|
+
error: error,
|
|
73
|
+
cache_profile: cache_profile
|
|
70
74
|
}
|
|
71
75
|
end
|
|
72
76
|
|
data/lib/perchfall/version.rb
CHANGED
data/playwright/check.js
CHANGED
|
@@ -23,6 +23,7 @@ const { values: args } = parseArgs({
|
|
|
23
23
|
url: { type: "string" },
|
|
24
24
|
timeout: { type: "string", default: "30000" },
|
|
25
25
|
"wait-until": { type: "string", default: "load" },
|
|
26
|
+
headers: { type: "string", default: "{}" },
|
|
26
27
|
},
|
|
27
28
|
strict: true,
|
|
28
29
|
});
|
|
@@ -32,9 +33,34 @@ if (!args.url) {
|
|
|
32
33
|
process.exit(1);
|
|
33
34
|
}
|
|
34
35
|
|
|
35
|
-
const TARGET_URL
|
|
36
|
-
const TIMEOUT_MS
|
|
37
|
-
const WAIT_UNTIL
|
|
36
|
+
const TARGET_URL = args.url;
|
|
37
|
+
const TIMEOUT_MS = parseInt(args.timeout, 10);
|
|
38
|
+
const WAIT_UNTIL = args["wait-until"];
|
|
39
|
+
|
|
40
|
+
let EXTRA_HEADERS;
|
|
41
|
+
try {
|
|
42
|
+
const parsed = JSON.parse(args.headers);
|
|
43
|
+
if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
44
|
+
throw new TypeError("--headers must be a JSON object, got: " + args.headers);
|
|
45
|
+
}
|
|
46
|
+
for (const [key, value] of Object.entries(parsed)) {
|
|
47
|
+
if (typeof value !== "string") {
|
|
48
|
+
throw new TypeError(`--headers value for "${key}" must be a string, got ${typeof value}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
EXTRA_HEADERS = parsed;
|
|
52
|
+
} catch (err) {
|
|
53
|
+
process.stdout.write(JSON.stringify({
|
|
54
|
+
status: "error",
|
|
55
|
+
url: TARGET_URL,
|
|
56
|
+
duration_ms: 0,
|
|
57
|
+
http_status: null,
|
|
58
|
+
network_errors: [],
|
|
59
|
+
console_errors: [],
|
|
60
|
+
error: "Invalid --headers: " + err.message,
|
|
61
|
+
}));
|
|
62
|
+
process.exit(0);
|
|
63
|
+
}
|
|
38
64
|
|
|
39
65
|
// ---------------------------------------------------------------------------
|
|
40
66
|
// Helpers
|
|
@@ -66,6 +92,10 @@ async function run() {
|
|
|
66
92
|
browser = await chromium.launch({ headless: true });
|
|
67
93
|
const page = await browser.newPage();
|
|
68
94
|
|
|
95
|
+
if (Object.keys(EXTRA_HEADERS).length > 0) {
|
|
96
|
+
await page.setExtraHTTPHeaders(EXTRA_HEADERS);
|
|
97
|
+
}
|
|
98
|
+
|
|
69
99
|
// Collect failed network requests (4xx/5xx responses + connection failures).
|
|
70
100
|
page.on("requestfailed", (request) => {
|
|
71
101
|
networkErrors.push({
|