@jayfarei/lazyanalytics 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +67 -9
- package/cli/dist/commands/active.d.ts +2 -0
- package/cli/dist/commands/active.js +30 -0
- package/cli/dist/commands/base.js +9 -83
- package/cli/dist/commands/setup.js +15 -2
- package/cli/dist/index.js +11 -0
- package/cli/dist/lib/api.d.ts +10 -0
- package/cli/dist/lib/api.js +78 -3
- package/cli/dist/lib/wrangler.d.ts +2 -0
- package/cli/dist/lib/wrangler.js +26 -0
- package/dist/worker.js +724 -52
- package/package.json +1 -1
- package/skill/SKILL.md +15 -1
- package/templates/wrangler.toml +3 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,38 @@ All notable changes to this project are documented here. The format is based on
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres
|
|
5
5
|
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
+
## [0.3.0] - 2026-06-13
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Bounce rate and average session duration endpoints/CLI commands backed by salted fixed-window session hashes and best-effort dwell beacons.
|
|
12
|
+
- Scheduled R2 aggregate rollups and blended `/api/history` / `lazyanalytics history` reads for ranges beyond Analytics Engine's live retention.
|
|
13
|
+
- Setup flags for archive configuration, including `--no-archive` and `--archive-bucket`.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- Tracker now sends one pagehide/hidden dwell beacon while staying below 2KB.
|
|
18
|
+
- Query responses document sampled bounce as unreliable by returning `bounce_rate: null` plus a warning.
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
|
|
22
|
+
- `/api/history` queried the live (≤90d) window with one Analytics Engine subrequest per day, exceeding the Workers per-invocation subrequest limit on large ranges; it now aggregates the whole live span in a single query.
|
|
23
|
+
- The scheduled rollup backfilled 88 days oldest-first in one run (far over the subrequest limit); it now rolls up newest-first within a bounded window, capped per invocation, and self-heals across daily runs.
|
|
24
|
+
|
|
25
|
+
## [0.2.0] - 2026-06-13
|
|
26
|
+
|
|
27
|
+
### Added
|
|
28
|
+
|
|
29
|
+
- `/api/active` and `lazyanalytics active` for current active visitors.
|
|
30
|
+
- `/api/crawlers` and `lazyanalytics crawlers` for opt-in JS-executing AI crawler/agent analytics.
|
|
31
|
+
- `/api/channels` and `lazyanalytics channels` for pageview-scoped acquisition channels.
|
|
32
|
+
- Shared `traffic_class` and `event_type` filters in `buildWhereClause`, so existing human pageview endpoints exclude AI crawler rows and dwell rows.
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
|
|
36
|
+
- Beacon writes now include the 16-blob slot map for traffic class, channel, session, crawler metadata, and event type.
|
|
37
|
+
- CLI API fetching/table formatting is shared across base commands and dedicated commands.
|
|
38
|
+
|
|
7
39
|
## [0.1.0] - 2026-06-12
|
|
8
40
|
|
|
9
41
|
Initial public release.
|
package/README.md
CHANGED
|
@@ -37,6 +37,33 @@ echo "<read-only-token>" | CLOUDFLARE_API_TOKEN=<token> CLOUDFLARE_ACCOUNT_ID=<a
|
|
|
37
37
|
|
|
38
38
|
The script is <2KB, sets no cookies, strips query strings and fragments in the browser, sends only the referrer *domain*, and tracks SPA navigations. `data-site-id` must match a site in the worker's `ALLOWED_SITES`. Print snippets anytime with `lazyanalytics snippet [--site example.com]`.
|
|
39
39
|
|
|
40
|
+
### Installing it (prompt for a coding agent)
|
|
41
|
+
|
|
42
|
+
Copy this into the coding agent for the site you want to track (replace `YOUR_SITE` and the worker URL — `lazyanalytics snippet --site YOUR_SITE` prints the exact tag):
|
|
43
|
+
|
|
44
|
+
```text
|
|
45
|
+
Add the lazyanalytics tracking snippet to this site so it loads on every page.
|
|
46
|
+
|
|
47
|
+
Insert this tag, exactly once, into the global <head> (Astro: the base layout's
|
|
48
|
+
<head>; Next.js App Router: app/layout.tsx; Next.js Pages: pages/_document.tsx
|
|
49
|
+
<Head>; plain HTML: the shared header/partial):
|
|
50
|
+
|
|
51
|
+
<script defer id="analytics" data-site-id="YOUR_SITE"
|
|
52
|
+
src="https://lazyanalytics.YOUR-SUBDOMAIN.workers.dev/tracker.js"></script>
|
|
53
|
+
|
|
54
|
+
Rules:
|
|
55
|
+
- It must appear once per page, site-wide. If a script with id="analytics"
|
|
56
|
+
already exists, leave it as-is.
|
|
57
|
+
- Do NOT add a cookie/consent banner for it — it sets no cookies and collects
|
|
58
|
+
no personal data.
|
|
59
|
+
- data-site-id must exactly match the site registered in the worker's
|
|
60
|
+
ALLOWED_SITES (otherwise beacons are rejected).
|
|
61
|
+
- The script is async/defer, <2KB, and tracks SPA route changes automatically.
|
|
62
|
+
|
|
63
|
+
After it ships, confirm a page load was recorded:
|
|
64
|
+
npx @jayfarei/lazyanalytics stats --site YOUR_SITE --period today
|
|
65
|
+
```
|
|
66
|
+
|
|
40
67
|
## CLI reference
|
|
41
68
|
|
|
42
69
|
Install globally (`npm i -g @jayfarei/lazyanalytics`) or use `npx @jayfarei/lazyanalytics`.
|
|
@@ -45,7 +72,7 @@ Install globally (`npm i -g @jayfarei/lazyanalytics`) or use `npx @jayfarei/lazy
|
|
|
45
72
|
|
|
46
73
|
| Command | What it does |
|
|
47
74
|
| ------- | ------------ |
|
|
48
|
-
| `setup` | Deploy the worker and configure the CLI. Flags: `--sites <csv>`, `--account-id <id>`, `--name <worker-name>` (default `lazyanalytics`), `--rotate-secrets`, `-y/--yes`. Needs `CLOUDFLARE_API_TOKEN` (env or hidden prompt). |
|
|
75
|
+
| `setup` | Deploy the worker and configure the CLI. Flags: `--sites <csv>`, `--account-id <id>`, `--name <worker-name>` (default `lazyanalytics`), `--track-ai-crawlers`, `--no-archive`, `--rotate-secrets`, `-y/--yes`. Needs `CLOUDFLARE_API_TOKEN` (env or hidden prompt). |
|
|
49
76
|
| `sites list` | List tracked sites via the worker's `/api/sites` endpoint. |
|
|
50
77
|
| `sites add <domain>` | Add a site to `ALLOWED_SITES` in the scaffolded `wrangler.toml` and redeploy. Needs `CLOUDFLARE_API_TOKEN`. |
|
|
51
78
|
| `sites remove <domain>` | Remove a site and redeploy (refuses to remove the last site). |
|
|
@@ -58,9 +85,15 @@ Install globally (`npm i -g @jayfarei/lazyanalytics`) or use `npx @jayfarei/lazy
|
|
|
58
85
|
|
|
59
86
|
```bash
|
|
60
87
|
lazyanalytics stats --site example.com --period 7d
|
|
88
|
+
lazyanalytics active --site example.com --window 5
|
|
61
89
|
lazyanalytics pages --site example.com --period 30d --limit 5
|
|
62
90
|
lazyanalytics referrers --site example.com
|
|
63
91
|
lazyanalytics geo --site example.com --period 30d
|
|
92
|
+
lazyanalytics channels --site example.com
|
|
93
|
+
lazyanalytics crawlers --site example.com --type operator
|
|
94
|
+
lazyanalytics bounce --site example.com --period 30d
|
|
95
|
+
lazyanalytics duration --site example.com --period 30d
|
|
96
|
+
lazyanalytics history --site example.com --dimension pages --days 180
|
|
64
97
|
lazyanalytics browsers --site example.com --type os # browser | os | device
|
|
65
98
|
lazyanalytics timeseries --site example.com --unit day # hour | day
|
|
66
99
|
```
|
|
@@ -73,7 +106,7 @@ lazyanalytics timeseries --site example.com --unit day # hour | day
|
|
|
73
106
|
| `--json` | | default | JSON output (for agents) |
|
|
74
107
|
| `--table` | | | Human-readable table |
|
|
75
108
|
|
|
76
|
-
Exit codes: `0` data returned, `1` error, `2` success but empty, `3` config/auth error.
|
|
109
|
+
Exit codes: `0` data returned, `1` error, `2` success but empty, `3` config/auth error. `active` returns `0` on a successful 200 even when `active_visitors` is `0`.
|
|
77
110
|
|
|
78
111
|
## HTTP API reference
|
|
79
112
|
|
|
@@ -82,16 +115,22 @@ All `/api/*` endpoints require `Authorization: Bearer <API_SECRET>` (constant-ti
|
|
|
82
115
|
| Endpoint | Auth | Description |
|
|
83
116
|
| -------- | ---- | ----------- |
|
|
84
117
|
| `GET /api/stats` | yes | Pageviews, approximate daily visitors, avg screen width. Params: `site` (required), `period`. |
|
|
118
|
+
| `GET /api/active` | yes | Active visitors and recent pageviews in the last N minutes. Params: `site`, `window` (1-60, default 5). |
|
|
85
119
|
| `GET /api/pages` | yes | Top pages. Params: `site`, `period`, `limit`. |
|
|
86
120
|
| `GET /api/referrers` | yes | Top external referrer domains. Params: `site`, `period`, `limit`. |
|
|
87
121
|
| `GET /api/geo` | yes | Country breakdown. Params: `site`, `period`, `limit`. |
|
|
122
|
+
| `GET /api/channels` | yes | Pageview-scoped acquisition channels. Params: `site`, `period`. |
|
|
123
|
+
| `GET /api/crawlers` | yes | JS-executing AI crawler/agent breakdown. Params: `site`, `period`, `limit`, `type` (`name`\|`operator`\|`class`). Requires `TRACK_AI_CRAWLERS=true` to collect rows. |
|
|
124
|
+
| `GET /api/bounce` | yes | Approximate session bounce rate. Params: `site`, `period`. Returns `bounce_rate: null` with a warning when sampled. |
|
|
125
|
+
| `GET /api/duration` | yes | Average session duration in seconds. Params: `site`, `period`. Uses best-effort pagehide dwell beacons. |
|
|
126
|
+
| `GET /api/history` | yes | Long-term stats from live AE plus R2 daily rollups. Params: `site`, `dimension`, `days` or `from`+`to`. |
|
|
88
127
|
| `GET /api/browsers` | yes | Browser/OS/device breakdown. Params: `site`, `period`, `limit`, `type` (`browser`\|`os`\|`device`). |
|
|
89
128
|
| `GET /api/timeseries` | yes | Pageviews over time. Params: `site`, `period`, `unit` (`hour`\|`day`). |
|
|
90
129
|
| `GET /api/sites` | yes | Tracked sites: `{ "data": [{"site": "example.com"}], "meta": {"count": 1} }`. |
|
|
91
|
-
| `POST /collect` | no | Beacon ingest. Body: `{ "sid", "url", "ref?", "sw?", "us?", "um?" }`. Returns 204. Bots get 204 but are not recorded; beacons are dropped (204) if the worker has no `ALLOWED_SITES` or `HASH_SALT` configured; unknown `sid` returns 400. CORS-enabled. |
|
|
130
|
+
| `POST /collect` | no | Beacon ingest. Body: `{ "sid", "url", "ref?", "sw?", "us?", "um?", "t?", "em?" }`. Returns 204. Bots get 204 but are not recorded; AI crawler beacons are recorded only when enabled; beacons are dropped (204) if the worker has no `ALLOWED_SITES` or `HASH_SALT` configured; unknown `sid` returns 400. CORS-enabled. |
|
|
92
131
|
| `GET /tracker.js` | no | Serves the tracking script. |
|
|
93
132
|
| `GET /dashboard` | no (page) | Built-in dashboard UI. The page is public, but data loads only after you enter the API token in-page; the token is kept in `sessionStorage` (cleared when the tab closes). You can hand off a session via `https://.../dashboard#token=<API_SECRET>`; the fragment is consumed and immediately stripped from the URL. |
|
|
94
|
-
| `GET /health` | no | `{ "status": "ok", "version": "0.
|
|
133
|
+
| `GET /health` | no | `{ "status": "ok", "version": "0.3.0", "timestamp": "..." }`. |
|
|
95
134
|
|
|
96
135
|
Example:
|
|
97
136
|
|
|
@@ -120,6 +159,11 @@ curl -H "Authorization: Bearer $ANALYTICS_API_TOKEN" \
|
|
|
120
159
|
┌──────────▼───────────────────────────┐
|
|
121
160
|
│ Cloudflare Analytics Engine │
|
|
122
161
|
│ (ClickHouse-backed, 90-day) │
|
|
162
|
+
└──────────────────────────────────────┘
|
|
163
|
+
│ daily aggregate rollups
|
|
164
|
+
▼
|
|
165
|
+
┌──────────────────────────────────────┐
|
|
166
|
+
│ R2 archive (optional) │
|
|
123
167
|
└──────────────────────────────────────┘
|
|
124
168
|
▲
|
|
125
169
|
│ HTTPS + bearer token
|
|
@@ -129,9 +173,10 @@ curl -H "Authorization: Bearer $ANALYTICS_API_TOKEN" \
|
|
|
129
173
|
└────────────────────────┘
|
|
130
174
|
```
|
|
131
175
|
|
|
132
|
-
1. **Collection**: the tracker sends a beacon on page load and SPA navigations with the page URL (already stripped), referrer domain, screen width, and UTM source/medium.
|
|
133
|
-
2. **Processing**: the worker filters bots,
|
|
176
|
+
1. **Collection**: the tracker sends a beacon on page load and SPA navigations with the page URL (already stripped), referrer domain, screen width, and UTM source/medium; it also sends one best-effort dwell beacon when the page is hidden or unloaded.
|
|
177
|
+
2. **Processing**: the worker filters generic bots, optionally classifies JS-executing AI agents, classifies acquisition channel server-side, computes salted daily visitor and 30-minute session hashes, and writes one data point to Analytics Engine.
|
|
134
178
|
3. **Querying**: `/api/*` translates HTTP params into sampling-aware SQL (`SUM(_sample_interval)`, never `COUNT(*)`).
|
|
179
|
+
4. **Archiving**: the scheduled handler writes daily aggregate JSON rollups to R2 so `/api/history` can blend archive days with live Analytics Engine days.
|
|
135
180
|
|
|
136
181
|
## Data model
|
|
137
182
|
|
|
@@ -146,10 +191,24 @@ One Analytics Engine data point per pageview:
|
|
|
146
191
|
| `blob4` | Country code (`CF-IPCountry`) | `US` |
|
|
147
192
|
| `blob5` / `blob6` / `blob7` | Browser / OS / device | `Chrome` / `macOS` / `desktop` |
|
|
148
193
|
| `blob8` / `blob9` | UTM source / medium | `twitter` / `social` |
|
|
194
|
+
| `blob10` | Traffic class: empty string for human, `ai` for tracked AI agents | `ai` |
|
|
195
|
+
| `blob11` | Pageview-scoped channel | `Organic Search` |
|
|
196
|
+
| `blob12` | Session hash: salted 30-minute fixed-window hash | `8bd4...` |
|
|
197
|
+
| `blob13` / `blob14` / `blob15` | AI crawler name / operator / class | `ChatGPT-User` / `OpenAI` / `user` |
|
|
198
|
+
| `blob16` | Event type: `pv` pageview or `eng` dwell beacon | `pv` |
|
|
149
199
|
| `double1` | Count (always 1) | `1` |
|
|
150
200
|
| `double2` | Screen width | `1440` |
|
|
201
|
+
| `double4` | Engagement milliseconds from the dwell beacon | `2500` |
|
|
202
|
+
|
|
203
|
+
**Sampling note**: Analytics Engine downsamples high-volume data. All count queries use `SUM(_sample_interval)` for correct estimates, and `meta.sampled` tells you when an answer is an estimate rather than an exact count. Bounce rate returns `null` when sampled because single-page session detection becomes biased.
|
|
204
|
+
|
|
205
|
+
**Metric caveats**:
|
|
151
206
|
|
|
152
|
-
|
|
207
|
+
- Active visitors can lag by seconds to minutes because Analytics Engine is eventually consistent.
|
|
208
|
+
- Channels are pageview-scoped, not session-scoped. SPA navigations without an external referrer can inflate Direct compared with GA4/Plausible.
|
|
209
|
+
- AI crawler analytics only covers agents that execute JavaScript and send `/collect` beacons. Raw non-JS crawlers such as many training bots are invisible.
|
|
210
|
+
- Sessions use fixed 30-minute slots, not sliding inactivity windows. A long visit crossing a slot boundary can split into two sessions and inflate bounce rate.
|
|
211
|
+
- Long-range history sums daily approximate visitors from archived rollups; it is not a cross-day unique visitor count.
|
|
153
212
|
|
|
154
213
|
## Privacy
|
|
155
214
|
|
|
@@ -200,9 +259,8 @@ The repo contains a `cli/bin/analytics` bash wrapper that routes requests throug
|
|
|
200
259
|
|
|
201
260
|
## Limitations
|
|
202
261
|
|
|
203
|
-
-
|
|
262
|
+
- Analytics Engine data point retention is 90 days; optional R2 history stores daily aggregate rollups only.
|
|
204
263
|
- Visitor counts are approximations (NAT/VPN undercounts, shared devices overcount).
|
|
205
|
-
- No bounce rate or session duration (Analytics Engine has no JOINs).
|
|
206
264
|
- Data points can take seconds to minutes to become queryable.
|
|
207
265
|
|
|
208
266
|
## Contributing & license
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { fetchApi, formatTable } from '../lib/api.js';
|
|
3
|
+
export function activeCommand() {
|
|
4
|
+
const cmd = new Command('active');
|
|
5
|
+
cmd
|
|
6
|
+
.description('Current active visitors in the last N minutes')
|
|
7
|
+
.requiredOption('-s, --site <site>', 'Site to query (e.g., example.com)')
|
|
8
|
+
.option('-w, --window <minutes>', 'Active window in minutes, clamped server-side to 1-60', '5')
|
|
9
|
+
.option('--json', 'Output as JSON (default)', true)
|
|
10
|
+
.option('--table', 'Output as human-readable table')
|
|
11
|
+
.action(async (opts) => {
|
|
12
|
+
try {
|
|
13
|
+
const result = await fetchApi('active', { site: opts.site, window: opts.window });
|
|
14
|
+
if (opts.table) {
|
|
15
|
+
console.log(formatTable([result.data]));
|
|
16
|
+
if (result.meta.sampled) {
|
|
17
|
+
console.log('\n(data may be sampled)');
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
console.log(JSON.stringify(result, null, 2));
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
catch (e) {
|
|
25
|
+
console.error(`Error: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
26
|
+
process.exit(1);
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
return cmd;
|
|
30
|
+
}
|
|
@@ -1,87 +1,5 @@
|
|
|
1
1
|
import { Command } from 'commander';
|
|
2
|
-
import {
|
|
3
|
-
import { resolve } from 'node:path';
|
|
4
|
-
import tls from 'node:tls';
|
|
5
|
-
import { loadEnv } from '../lib/env.js';
|
|
6
|
-
// Advanced/experimental: trust an extra proxy CA cert at runtime.
|
|
7
|
-
// Opt-in only via ANALYTICS_PROXY_CA=<path> — never loaded implicitly from
|
|
8
|
-
// the working directory. NODE_EXTRA_CA_CERTS must be set before Node boots,
|
|
9
|
-
// so we patch the secure context here instead.
|
|
10
|
-
function loadProxyCA(path) {
|
|
11
|
-
if (!existsSync(path))
|
|
12
|
-
return;
|
|
13
|
-
try {
|
|
14
|
-
const cert = readFileSync(path, 'utf-8');
|
|
15
|
-
const origCreateSecureContext = tls.createSecureContext;
|
|
16
|
-
tls.createSecureContext = function (options = {}) {
|
|
17
|
-
const ctx = origCreateSecureContext.call(this, options);
|
|
18
|
-
ctx.context.addCACert(cert);
|
|
19
|
-
return ctx;
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
catch { /* ignore */ }
|
|
23
|
-
}
|
|
24
|
-
if (process.env.HTTPS_PROXY && process.env.ANALYTICS_PROXY_CA) {
|
|
25
|
-
loadProxyCA(resolve(process.env.ANALYTICS_PROXY_CA));
|
|
26
|
-
}
|
|
27
|
-
function getConfig() {
|
|
28
|
-
loadEnv();
|
|
29
|
-
const apiUrl = process.env.ANALYTICS_API_URL;
|
|
30
|
-
const apiToken = process.env.ANALYTICS_API_TOKEN;
|
|
31
|
-
const proxyMode = !!process.env.HTTPS_PROXY;
|
|
32
|
-
if (!apiUrl) {
|
|
33
|
-
console.error('Error: ANALYTICS_API_URL is not configured.');
|
|
34
|
-
console.error('Example: https://lazyanalytics.YOUR-SUBDOMAIN.workers.dev');
|
|
35
|
-
console.error('');
|
|
36
|
-
console.error('Run "lazyanalytics setup" to deploy and configure, or set');
|
|
37
|
-
console.error('ANALYTICS_API_URL and ANALYTICS_API_TOKEN in the environment.');
|
|
38
|
-
process.exit(3);
|
|
39
|
-
}
|
|
40
|
-
// In proxy mode (HTTPS_PROXY set), the proxy injects the Authorization header.
|
|
41
|
-
if (!apiToken && !proxyMode) {
|
|
42
|
-
console.error('Error: No authentication configured.');
|
|
43
|
-
console.error('');
|
|
44
|
-
console.error('Set ANALYTICS_API_TOKEN in the environment, or run');
|
|
45
|
-
console.error('"lazyanalytics setup" to write it to ~/.config/lazyanalytics/.env.');
|
|
46
|
-
process.exit(3);
|
|
47
|
-
}
|
|
48
|
-
return { apiUrl: apiUrl.replace(/\/$/, ''), apiToken: apiToken || '', proxyMode };
|
|
49
|
-
}
|
|
50
|
-
async function fetchApi(endpoint, params) {
|
|
51
|
-
const { apiUrl, apiToken, proxyMode } = getConfig();
|
|
52
|
-
const url = new URL(`${apiUrl}/api/${endpoint}`);
|
|
53
|
-
for (const [k, v] of Object.entries(params)) {
|
|
54
|
-
if (v)
|
|
55
|
-
url.searchParams.set(k, v);
|
|
56
|
-
}
|
|
57
|
-
// In proxy mode, the credential proxy injects the Authorization header.
|
|
58
|
-
// In direct mode, we add it ourselves.
|
|
59
|
-
const headers = {};
|
|
60
|
-
if (!proxyMode && apiToken) {
|
|
61
|
-
headers.Authorization = `Bearer ${apiToken}`;
|
|
62
|
-
}
|
|
63
|
-
const response = await fetch(url.toString(), { headers });
|
|
64
|
-
if (response.status === 401) {
|
|
65
|
-
console.error('Error: Authentication failed. Check your ANALYTICS_API_TOKEN.');
|
|
66
|
-
process.exit(3);
|
|
67
|
-
}
|
|
68
|
-
if (!response.ok) {
|
|
69
|
-
const body = await response.text();
|
|
70
|
-
console.error(`Error: API returned ${response.status}: ${body}`);
|
|
71
|
-
process.exit(1);
|
|
72
|
-
}
|
|
73
|
-
return response.json();
|
|
74
|
-
}
|
|
75
|
-
function formatTable(data) {
|
|
76
|
-
if (data.length === 0)
|
|
77
|
-
return '(no data)';
|
|
78
|
-
const keys = Object.keys(data[0]);
|
|
79
|
-
const widths = keys.map((k) => Math.max(k.length, ...data.map((row) => String(row[k] ?? '').length)));
|
|
80
|
-
const header = keys.map((k, i) => k.padEnd(widths[i])).join(' ');
|
|
81
|
-
const sep = widths.map((w) => '-'.repeat(w)).join(' ');
|
|
82
|
-
const rows = data.map((row) => keys.map((k, i) => String(row[k] ?? '').padEnd(widths[i])).join(' '));
|
|
83
|
-
return [header, sep, ...rows].join('\n');
|
|
84
|
-
}
|
|
2
|
+
import { fetchApi, formatTable } from '../lib/api.js';
|
|
85
3
|
export function makeCommand(name, description) {
|
|
86
4
|
const cmd = new Command(name);
|
|
87
5
|
cmd
|
|
@@ -103,6 +21,14 @@ export function makeCommand(name, description) {
|
|
|
103
21
|
params.type = opts.type;
|
|
104
22
|
if (opts.unit)
|
|
105
23
|
params.unit = opts.unit;
|
|
24
|
+
if (opts.dimension)
|
|
25
|
+
params.dimension = opts.dimension;
|
|
26
|
+
if (opts.days)
|
|
27
|
+
params.days = opts.days;
|
|
28
|
+
if (opts.from)
|
|
29
|
+
params.from = opts.from;
|
|
30
|
+
if (opts.to)
|
|
31
|
+
params.to = opts.to;
|
|
106
32
|
const result = await fetchApi(name, params);
|
|
107
33
|
if (opts.table) {
|
|
108
34
|
const arr = Array.isArray(result.data) ? result.data : [result.data];
|
|
@@ -6,7 +6,7 @@ import { CONFIG_ENV_PATH, WORKER_SCAFFOLD_DIR, loadEnv, readEnvFile, writeEnvFil
|
|
|
6
6
|
import { packagedWorkerBundle, packagedWranglerTemplate } from '../lib/paths.js';
|
|
7
7
|
import { isValidDomain, prompt, promptHidden, trackingSnippet } from '../lib/prompt.js';
|
|
8
8
|
import { readAllowedSites } from '../lib/scaffold.js';
|
|
9
|
-
import { parseWorkersDevUrl, wranglerDeploy, wranglerSecretPut } from '../lib/wrangler.js';
|
|
9
|
+
import { parseWorkersDevUrl, wranglerDeploy, wranglerR2BucketCreate, wranglerSecretPut } from '../lib/wrangler.js';
|
|
10
10
|
function fail(message) {
|
|
11
11
|
console.error(`Error: ${message}`);
|
|
12
12
|
// Exit 3 = missing/invalid configuration, matching the other commands.
|
|
@@ -35,6 +35,10 @@ export function setupCommand() {
|
|
|
35
35
|
.option('--sites <sites>', 'Comma-separated list of site domains to track')
|
|
36
36
|
.option('--account-id <id>', 'Cloudflare account ID (32-char hex)')
|
|
37
37
|
.option('--name <name>', 'Worker name', 'lazyanalytics')
|
|
38
|
+
.option('--track-ai-crawlers', 'Store JS-executing AI crawler beacons separately')
|
|
39
|
+
.option('--archive', 'Enable daily R2 rollups for history beyond Analytics Engine retention', true)
|
|
40
|
+
.option('--no-archive', 'Do not create or bind the R2 archive bucket')
|
|
41
|
+
.option('--archive-bucket <name>', 'R2 bucket name for daily rollups', 'lazyanalytics-archive')
|
|
38
42
|
.option('--rotate-secrets', 'Regenerate API_SECRET and HASH_SALT instead of reusing them')
|
|
39
43
|
.option('-y, --yes', 'Non-interactive mode: never prompt, fail if input is missing')
|
|
40
44
|
.action(async (opts) => {
|
|
@@ -87,13 +91,22 @@ export function setupCommand() {
|
|
|
87
91
|
const templatePath = packagedWranglerTemplate();
|
|
88
92
|
if (!existsSync(templatePath))
|
|
89
93
|
fail(`wrangler.toml template not found at ${templatePath}`);
|
|
94
|
+
const archiveConfig = opts.archive === false
|
|
95
|
+
? ''
|
|
96
|
+
: `[[r2_buckets]]\nbinding = "ARCHIVE"\nbucket_name = "${opts.archiveBucket}"\n\n[triggers]\ncrons = ["5 1 * * *"]\n`;
|
|
90
97
|
const toml = readFileSync(templatePath, 'utf-8')
|
|
91
98
|
.replace(/__WORKER_NAME__/g, opts.name)
|
|
92
|
-
.replace(/__ALLOWED_SITES__/g, sites.join(','))
|
|
99
|
+
.replace(/__ALLOWED_SITES__/g, sites.join(','))
|
|
100
|
+
.replace(/__TRACK_AI_CRAWLERS__/g, opts.trackAiCrawlers ? 'true' : 'false')
|
|
101
|
+
.replace(/__ARCHIVE_CONFIG__/g, archiveConfig);
|
|
93
102
|
writeFileSync(wranglerTomlPath, toml);
|
|
94
103
|
console.log(`Scaffolded worker in ${WORKER_SCAFFOLD_DIR}`);
|
|
95
104
|
// --- Deploy ---
|
|
96
105
|
const wranglerEnv = { CLOUDFLARE_API_TOKEN: apiToken, CLOUDFLARE_ACCOUNT_ID: accountId };
|
|
106
|
+
if (opts.archive !== false) {
|
|
107
|
+
console.log('\nEnsuring R2 archive bucket exists...');
|
|
108
|
+
await wranglerR2BucketCreate(WORKER_SCAFFOLD_DIR, wranglerEnv, opts.archiveBucket);
|
|
109
|
+
}
|
|
97
110
|
console.log('\nDeploying worker with wrangler...');
|
|
98
111
|
const deployOutput = await wranglerDeploy(WORKER_SCAFFOLD_DIR, wranglerEnv);
|
|
99
112
|
let workerUrl = parseWorkersDevUrl(deployOutput);
|
package/cli/dist/index.js
CHANGED
|
@@ -7,6 +7,7 @@ import { sitesCommand } from './commands/sites.js';
|
|
|
7
7
|
import { snippetCommand } from './commands/snippet.js';
|
|
8
8
|
import { skillCommand } from './commands/skill.js';
|
|
9
9
|
import { configCommand } from './commands/config.js';
|
|
10
|
+
import { activeCommand } from './commands/active.js';
|
|
10
11
|
import { packageVersion } from './lib/paths.js';
|
|
11
12
|
const program = new Command();
|
|
12
13
|
program
|
|
@@ -17,9 +18,19 @@ program
|
|
|
17
18
|
'programmatic consumption.')
|
|
18
19
|
.version(packageVersion());
|
|
19
20
|
program.addCommand(makeCommand('stats', 'Aggregate statistics (pageviews, approx visitors, avg screen width)'));
|
|
21
|
+
program.addCommand(activeCommand());
|
|
20
22
|
program.addCommand(makeCommand('pages', 'Top pages by view count'));
|
|
21
23
|
program.addCommand(makeCommand('referrers', 'Top referrer domains'));
|
|
22
24
|
program.addCommand(makeCommand('geo', 'Geographic breakdown by country'));
|
|
25
|
+
program.addCommand(makeCommand('channels', 'Acquisition channel breakdown (pageview-scoped)'));
|
|
26
|
+
program.addCommand(makeCommand('bounce', 'Bounce rate (% single-pageview sessions)'));
|
|
27
|
+
program.addCommand(makeCommand('duration', 'Average session duration in seconds'));
|
|
28
|
+
program.addCommand(makeCommand('history', 'Long-term stats blending live AE (<=90d) with R2 archives (>90d)')
|
|
29
|
+
.option('--dimension <dimension>', 'Dimension: totals, pages, referrers, geo, browsers', 'totals')
|
|
30
|
+
.option('--days <days>', 'Lookback days, can exceed 90')
|
|
31
|
+
.option('--from <date>', 'Start date YYYY-MM-DD')
|
|
32
|
+
.option('--to <date>', 'End date YYYY-MM-DD'));
|
|
33
|
+
program.addCommand(makeCommand('crawlers', 'AI agent breakdown (training, search, user-triggered)').option('--type <type>', 'Breakdown type: name, operator, class', 'name'));
|
|
23
34
|
program.addCommand(makeCommand('browsers', 'Browser, OS, or device breakdown').option('--type <type>', 'Breakdown type: browser, os, device', 'browser'));
|
|
24
35
|
program.addCommand(makeCommand('timeseries', 'Pageview timeseries').option('--unit <unit>', 'Time bucket: hour, day', 'day'));
|
|
25
36
|
program.addCommand(usageCommand());
|
package/cli/dist/lib/api.d.ts
CHANGED
|
@@ -3,7 +3,17 @@ export interface ApiConfig {
|
|
|
3
3
|
apiToken: string;
|
|
4
4
|
proxyMode: boolean;
|
|
5
5
|
}
|
|
6
|
+
export interface ApiResponse {
|
|
7
|
+
data: unknown;
|
|
8
|
+
meta: {
|
|
9
|
+
site: string;
|
|
10
|
+
period: string;
|
|
11
|
+
sampled: boolean;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
6
14
|
/** Resolve API URL + token from env/config. Returns null if not configured. */
|
|
7
15
|
export declare function getApiConfig(): ApiConfig | null;
|
|
16
|
+
export declare function fetchApi(endpoint: string, params: Record<string, string>): Promise<ApiResponse>;
|
|
17
|
+
export declare function formatTable(data: Record<string, unknown>[]): string;
|
|
8
18
|
/** Fetch the list of tracked sites from the worker's /api/sites endpoint. */
|
|
9
19
|
export declare function fetchSites(config: ApiConfig): Promise<string[]>;
|
package/cli/dist/lib/api.js
CHANGED
|
@@ -1,4 +1,28 @@
|
|
|
1
1
|
import { loadEnv } from './env.js';
|
|
2
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
3
|
+
import { resolve } from 'node:path';
|
|
4
|
+
import tls from 'node:tls';
|
|
5
|
+
// Advanced/experimental: trust an extra proxy CA cert at runtime.
|
|
6
|
+
// Opt-in only via ANALYTICS_PROXY_CA=<path>.
|
|
7
|
+
function loadProxyCA(path) {
|
|
8
|
+
if (!existsSync(path))
|
|
9
|
+
return;
|
|
10
|
+
try {
|
|
11
|
+
const cert = readFileSync(path, 'utf-8');
|
|
12
|
+
const origCreateSecureContext = tls.createSecureContext;
|
|
13
|
+
tls.createSecureContext = function (options = {}) {
|
|
14
|
+
const ctx = origCreateSecureContext.call(this, options);
|
|
15
|
+
ctx.context.addCACert(cert);
|
|
16
|
+
return ctx;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
/* ignore */
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
if (process.env.HTTPS_PROXY && process.env.ANALYTICS_PROXY_CA) {
|
|
24
|
+
loadProxyCA(resolve(process.env.ANALYTICS_PROXY_CA));
|
|
25
|
+
}
|
|
2
26
|
/** Resolve API URL + token from env/config. Returns null if not configured. */
|
|
3
27
|
export function getApiConfig() {
|
|
4
28
|
loadEnv();
|
|
@@ -9,13 +33,64 @@ export function getApiConfig() {
|
|
|
9
33
|
return null;
|
|
10
34
|
return { apiUrl: apiUrl.replace(/\/$/, ''), apiToken, proxyMode };
|
|
11
35
|
}
|
|
12
|
-
|
|
13
|
-
|
|
36
|
+
function requireApiConfig() {
|
|
37
|
+
const config = getApiConfig();
|
|
38
|
+
if (!config) {
|
|
39
|
+
console.error('Error: ANALYTICS_API_URL is not configured.');
|
|
40
|
+
console.error('Example: https://lazyanalytics.YOUR-SUBDOMAIN.workers.dev');
|
|
41
|
+
console.error('');
|
|
42
|
+
console.error('Run "lazyanalytics setup" to deploy and configure, or set');
|
|
43
|
+
console.error('ANALYTICS_API_URL and ANALYTICS_API_TOKEN in the environment.');
|
|
44
|
+
process.exit(3);
|
|
45
|
+
}
|
|
46
|
+
return config;
|
|
47
|
+
}
|
|
48
|
+
function authHeaders(config) {
|
|
14
49
|
const headers = {};
|
|
15
50
|
if (!config.proxyMode && config.apiToken) {
|
|
16
51
|
headers.Authorization = `Bearer ${config.apiToken}`;
|
|
17
52
|
}
|
|
18
|
-
|
|
53
|
+
return headers;
|
|
54
|
+
}
|
|
55
|
+
export async function fetchApi(endpoint, params) {
|
|
56
|
+
const config = requireApiConfig();
|
|
57
|
+
if (!config.apiToken && !config.proxyMode) {
|
|
58
|
+
console.error('Error: No authentication configured.');
|
|
59
|
+
console.error('');
|
|
60
|
+
console.error('Set ANALYTICS_API_TOKEN in the environment, or run');
|
|
61
|
+
console.error('"lazyanalytics setup" to write it to ~/.config/lazyanalytics/.env.');
|
|
62
|
+
process.exit(3);
|
|
63
|
+
}
|
|
64
|
+
const url = new URL(`${config.apiUrl}/api/${endpoint}`);
|
|
65
|
+
for (const [k, v] of Object.entries(params)) {
|
|
66
|
+
if (v)
|
|
67
|
+
url.searchParams.set(k, v);
|
|
68
|
+
}
|
|
69
|
+
const response = await fetch(url.toString(), { headers: authHeaders(config) });
|
|
70
|
+
if (response.status === 401) {
|
|
71
|
+
console.error('Error: Authentication failed. Check your ANALYTICS_API_TOKEN.');
|
|
72
|
+
process.exit(3);
|
|
73
|
+
}
|
|
74
|
+
if (!response.ok) {
|
|
75
|
+
const body = await response.text();
|
|
76
|
+
console.error(`Error: API returned ${response.status}: ${body}`);
|
|
77
|
+
process.exit(1);
|
|
78
|
+
}
|
|
79
|
+
return response.json();
|
|
80
|
+
}
|
|
81
|
+
export function formatTable(data) {
|
|
82
|
+
if (data.length === 0)
|
|
83
|
+
return '(no data)';
|
|
84
|
+
const keys = Object.keys(data[0]);
|
|
85
|
+
const widths = keys.map((k) => Math.max(k.length, ...data.map((row) => String(row[k] ?? '').length)));
|
|
86
|
+
const header = keys.map((k, i) => k.padEnd(widths[i])).join(' ');
|
|
87
|
+
const sep = widths.map((w) => '-'.repeat(w)).join(' ');
|
|
88
|
+
const rows = data.map((row) => keys.map((k, i) => String(row[k] ?? '').padEnd(widths[i])).join(' '));
|
|
89
|
+
return [header, sep, ...rows].join('\n');
|
|
90
|
+
}
|
|
91
|
+
/** Fetch the list of tracked sites from the worker's /api/sites endpoint. */
|
|
92
|
+
export async function fetchSites(config) {
|
|
93
|
+
const res = await fetch(`${config.apiUrl}/api/sites`, { headers: authHeaders(config) });
|
|
19
94
|
if (res.status === 401) {
|
|
20
95
|
throw new Error('Authentication failed. Check your ANALYTICS_API_TOKEN.');
|
|
21
96
|
}
|
|
@@ -13,5 +13,7 @@ export declare function wranglerDeploy(cwd: string, env: WranglerEnv): Promise<s
|
|
|
13
13
|
* via stdin. The value is never echoed or logged.
|
|
14
14
|
*/
|
|
15
15
|
export declare function wranglerSecretPut(cwd: string, env: WranglerEnv, name: string, value: string): Promise<void>;
|
|
16
|
+
/** Create an R2 bucket if it does not already exist. */
|
|
17
|
+
export declare function wranglerR2BucketCreate(cwd: string, env: WranglerEnv, bucket: string): Promise<void>;
|
|
16
18
|
/** Parse the workers.dev URL from wrangler deploy output. Returns null if absent. */
|
|
17
19
|
export declare function parseWorkersDevUrl(deployOutput: string): string | null;
|
package/cli/dist/lib/wrangler.js
CHANGED
|
@@ -58,6 +58,32 @@ export function wranglerSecretPut(cwd, env, name, value) {
|
|
|
58
58
|
});
|
|
59
59
|
});
|
|
60
60
|
}
|
|
61
|
+
/** Create an R2 bucket if it does not already exist. */
|
|
62
|
+
export function wranglerR2BucketCreate(cwd, env, bucket) {
|
|
63
|
+
return new Promise((resolvePromise, reject) => {
|
|
64
|
+
const child = spawn(NPX, ['wrangler@4', 'r2', 'bucket', 'create', bucket], {
|
|
65
|
+
cwd,
|
|
66
|
+
env: wranglerProcessEnv(env),
|
|
67
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
68
|
+
});
|
|
69
|
+
let output = '';
|
|
70
|
+
child.stdout.on('data', (chunk) => {
|
|
71
|
+
output += chunk.toString();
|
|
72
|
+
process.stderr.write(chunk);
|
|
73
|
+
});
|
|
74
|
+
child.stderr.on('data', (chunk) => {
|
|
75
|
+
output += chunk.toString();
|
|
76
|
+
process.stderr.write(chunk);
|
|
77
|
+
});
|
|
78
|
+
child.on('error', reject);
|
|
79
|
+
child.on('close', (code) => {
|
|
80
|
+
if (code === 0 || /already exists/i.test(output))
|
|
81
|
+
resolvePromise();
|
|
82
|
+
else
|
|
83
|
+
reject(new Error(`wrangler r2 bucket create ${bucket} exited with code ${code}`));
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
}
|
|
61
87
|
/** Parse the workers.dev URL from wrangler deploy output. Returns null if absent. */
|
|
62
88
|
export function parseWorkersDevUrl(deployOutput) {
|
|
63
89
|
const match = deployOutput.match(/https:\/\/[a-z0-9-]+(\.[a-z0-9-]+)*\.workers\.dev/i);
|