emdash 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{apply-B4MsLM-w.mjs → apply-5uslYdUu.mjs} +174 -17
- package/dist/apply-5uslYdUu.mjs.map +1 -0
- package/dist/astro/index.d.mts +4 -4
- package/dist/astro/index.mjs +7 -3
- package/dist/astro/index.mjs.map +1 -1
- package/dist/astro/middleware/auth.d.mts +4 -4
- package/dist/astro/middleware/redirect.mjs +1 -1
- package/dist/astro/middleware/request-context.mjs +6 -1
- package/dist/astro/middleware/request-context.mjs.map +1 -1
- package/dist/astro/middleware.mjs +13 -12
- package/dist/astro/middleware.mjs.map +1 -1
- package/dist/astro/types.d.mts +13 -4
- package/dist/astro/types.d.mts.map +1 -1
- package/dist/cli/index.mjs +4 -4
- package/dist/{content-BsBoyj8G.mjs → content-D7J5y73J.mjs} +27 -1
- package/dist/{content-BsBoyj8G.mjs.map → content-D7J5y73J.mjs.map} +1 -1
- package/dist/db/index.d.mts +2 -2
- package/dist/db/index.mjs +1 -1
- package/dist/{index-BYv0mB9g.d.mts → index-De6_Xv3v.d.mts} +77 -3
- package/dist/index-De6_Xv3v.d.mts.map +1 -0
- package/dist/index.d.mts +4 -4
- package/dist/index.mjs +7 -7
- package/dist/media/local-runtime.d.mts +4 -4
- package/dist/plugins/adapt-sandbox-entry.d.mts +4 -4
- package/dist/{query-Bk_3vKvU.mjs → query-g4Ug-9j9.mjs} +3 -3
- package/dist/{query-Bk_3vKvU.mjs.map → query-g4Ug-9j9.mjs.map} +1 -1
- package/dist/{redirect-7lGhLBNZ.mjs → redirect-CN0Rt9Ob.mjs} +66 -10
- package/dist/redirect-CN0Rt9Ob.mjs.map +1 -0
- package/dist/{runner-Fl2NcUUz.d.mts → runner-BR2xKwhn.d.mts} +2 -2
- package/dist/{runner-Fl2NcUUz.d.mts.map → runner-BR2xKwhn.d.mts.map} +1 -1
- package/dist/{runner-Cd-_WyDo.mjs → runner-tQ7BJ4T7.mjs} +211 -134
- package/dist/runner-tQ7BJ4T7.mjs.map +1 -0
- package/dist/runtime.d.mts +4 -4
- package/dist/{search-DI4bM2w9.mjs → search-B0effn3j.mjs} +117 -23
- package/dist/search-B0effn3j.mjs.map +1 -0
- package/dist/seed/index.d.mts +2 -2
- package/dist/seed/index.mjs +3 -3
- package/dist/{taxonomies-DbrKzDju.mjs → taxonomies-K2z0Uhnj.mjs} +2 -2
- package/dist/{taxonomies-DbrKzDju.mjs.map → taxonomies-K2z0Uhnj.mjs.map} +1 -1
- package/dist/{types-8xrvl_68.d.mts → types-C2v0c34j.d.mts} +10 -1
- package/dist/{types-8xrvl_68.d.mts.map → types-C2v0c34j.d.mts.map} +1 -1
- package/dist/{validate-CaLH1Ia2.d.mts → validate-kM8Pjuf7.d.mts} +2 -2
- package/dist/{validate-CaLH1Ia2.d.mts.map → validate-kM8Pjuf7.d.mts.map} +1 -1
- package/dist/version-BnTKdfam.mjs +7 -0
- package/dist/{version-Uaf2ynPX.mjs.map → version-BnTKdfam.mjs.map} +1 -1
- package/package.json +5 -5
- package/src/api/handlers/content.ts +2 -0
- package/src/api/schemas/content.ts +8 -0
- package/src/astro/integration/font-provider.ts +3 -1
- package/src/astro/integration/index.ts +2 -0
- package/src/astro/integration/runtime.ts +55 -1
- package/src/astro/routes/admin.astro +14 -7
- package/src/astro/routes/api/auth/magic-link/send.ts +2 -1
- package/src/astro/routes/api/auth/passkey/options.ts +2 -1
- package/src/astro/routes/api/auth/signup/request.ts +26 -8
- package/src/astro/routes/api/comments/[collection]/[contentId]/index.ts +10 -6
- package/src/astro/routes/api/content/[collection]/[id]/compare.ts +1 -1
- package/src/astro/routes/api/content/[collection]/[id]/preview-url.ts +1 -1
- package/src/astro/routes/api/content/[collection]/[id]/revisions.ts +1 -1
- package/src/astro/routes/api/content/[collection]/[id]/translations.ts +26 -0
- package/src/astro/routes/api/content/[collection]/[id].ts +30 -2
- package/src/astro/routes/api/content/[collection]/index.ts +19 -1
- package/src/astro/routes/api/content/[collection]/trash.ts +1 -1
- package/src/astro/routes/api/import/wordpress-plugin/analyze.ts +4 -3
- package/src/astro/routes/api/import/wordpress-plugin/execute.ts +4 -3
- package/src/astro/routes/api/manifest.ts +7 -0
- package/src/astro/routes/api/oauth/device/code.ts +2 -1
- package/src/astro/routes/api/oauth/device/token.ts +2 -1
- package/src/astro/routes/api/setup/admin-verify.ts +30 -5
- package/src/astro/routes/api/setup/admin.ts +32 -8
- package/src/astro/routes/api/setup/index.ts +5 -2
- package/src/astro/types.ts +9 -0
- package/src/auth/rate-limit.ts +50 -22
- package/src/auth/setup-nonce.ts +22 -0
- package/src/auth/trusted-proxy.ts +92 -0
- package/src/database/migrations/035_bounded_404_log.ts +112 -0
- package/src/database/migrations/runner.ts +2 -0
- package/src/database/repositories/content.ts +39 -0
- package/src/database/repositories/options.ts +25 -0
- package/src/database/repositories/redirect.ts +111 -8
- package/src/database/types.ts +9 -0
- package/src/emdash-runtime.ts +3 -1
- package/src/import/registry.ts +4 -3
- package/src/import/ssrf.ts +253 -12
- package/src/mcp/server.ts +76 -3
- package/src/plugins/context.ts +15 -3
- package/src/plugins/manager.ts +6 -0
- package/src/plugins/request-meta.ts +66 -15
- package/src/plugins/routes.ts +3 -1
- package/src/seed/apply.ts +26 -0
- package/src/visual-editing/toolbar.ts +6 -1
- package/dist/apply-B4MsLM-w.mjs.map +0 -1
- package/dist/index-BYv0mB9g.d.mts.map +0 -1
- package/dist/redirect-7lGhLBNZ.mjs.map +0 -1
- package/dist/runner-Cd-_WyDo.mjs.map +0 -1
- package/dist/search-DI4bM2w9.mjs.map +0 -1
- package/dist/version-Uaf2ynPX.mjs +0 -7
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve the list of client-IP headers the operator trusts.
|
|
3
|
+
*
|
|
4
|
+
* Resolution order:
|
|
5
|
+
* 1. `config.trustedProxyHeaders` — explicit opt-in via astro.config.mjs.
|
|
6
|
+
* An empty array is respected (means "trust nothing, ignore env").
|
|
7
|
+
* 2. `EMDASH_TRUSTED_PROXY_HEADERS` env var — comma-separated header names.
|
|
8
|
+
* 3. `[]` — default, no trusted headers.
|
|
9
|
+
*
|
|
10
|
+
* Operators must only set this when they control the reverse proxy.
|
|
11
|
+
* Untrusted clients can set any header they like; trusting headers from
|
|
12
|
+
* an open network defeats rate limiting.
|
|
13
|
+
*
|
|
14
|
+
* Header names are returned lowercased because HTTP header lookups are
|
|
15
|
+
* case-insensitive.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { EmDashConfig } from "../astro/integration/runtime.js";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* RFC 7230 token — valid characters for an HTTP header name. Invalid names
|
|
22
|
+
* passed to `Headers.get()` throw a TypeError at runtime, which would
|
|
23
|
+
* otherwise surface as a 500 from every auth route.
|
|
24
|
+
*/
|
|
25
|
+
const HEADER_NAME_PATTERN = /^[!#$%&'*+\-.^_`|~0-9a-z]+$/;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Normalise a list of header names the way both the config path and any
|
|
29
|
+
* caller passing a pre-resolved list should do: trim, lowercase, drop
|
|
30
|
+
* empty, drop anything that isn't a valid RFC 7230 token. Invalid names
|
|
31
|
+
* would crash `Headers.get()` at runtime.
|
|
32
|
+
*/
|
|
33
|
+
export function normalizeTrustedHeaders(names: readonly string[]): string[] {
|
|
34
|
+
return names
|
|
35
|
+
.map((h) => h.trim().toLowerCase())
|
|
36
|
+
.filter((h) => h.length > 0 && HEADER_NAME_PATTERN.test(h));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isValidHeaderName(name: string): boolean {
|
|
40
|
+
return HEADER_NAME_PATTERN.test(name);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Cache for the env-derived value. `null` means "not yet parsed". */
|
|
44
|
+
let _envCache: string[] | null = null;
|
|
45
|
+
|
|
46
|
+
/** Test-only: clear the env cache so a fresh value is read on next call. */
|
|
47
|
+
export function _resetTrustedProxyHeadersCache(): void {
|
|
48
|
+
_envCache = null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function getEnvTrustedHeaders(): string[] {
|
|
52
|
+
if (_envCache !== null) return _envCache;
|
|
53
|
+
let raw: string | undefined;
|
|
54
|
+
try {
|
|
55
|
+
// Prefer process.env so SSR/container deployments can override this
|
|
56
|
+
// value at runtime (Vite/Astro inline import.meta.env at build time,
|
|
57
|
+
// which locks the value into the bundle). Fall back to import.meta.env
|
|
58
|
+
// for bundler-managed environments where process.env isn't populated.
|
|
59
|
+
// eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- import.meta.env shape varies by bundler
|
|
60
|
+
const importMetaEnv = (import.meta as unknown as { env?: Record<string, string | undefined> })
|
|
61
|
+
.env;
|
|
62
|
+
raw =
|
|
63
|
+
(typeof process !== "undefined" ? process.env?.EMDASH_TRUSTED_PROXY_HEADERS : undefined) ||
|
|
64
|
+
importMetaEnv?.EMDASH_TRUSTED_PROXY_HEADERS;
|
|
65
|
+
} catch {
|
|
66
|
+
raw = undefined;
|
|
67
|
+
}
|
|
68
|
+
if (!raw) {
|
|
69
|
+
_envCache = [];
|
|
70
|
+
return _envCache;
|
|
71
|
+
}
|
|
72
|
+
_envCache = raw
|
|
73
|
+
.split(",")
|
|
74
|
+
.map((s) => s.trim().toLowerCase())
|
|
75
|
+
.filter((s) => s.length > 0 && isValidHeaderName(s));
|
|
76
|
+
return _envCache;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Return the lowercased list of headers to trust for client-IP resolution.
|
|
81
|
+
*
|
|
82
|
+
* When `config?.trustedProxyHeaders` is explicitly set (even to `[]`), it
|
|
83
|
+
* wins. Otherwise fall through to the env var, then to `[]`.
|
|
84
|
+
*/
|
|
85
|
+
export function getTrustedProxyHeaders(config: EmDashConfig | null | undefined): string[] {
|
|
86
|
+
if (config && config.trustedProxyHeaders !== undefined) {
|
|
87
|
+
return config.trustedProxyHeaders
|
|
88
|
+
.map((h) => h.trim().toLowerCase())
|
|
89
|
+
.filter((h) => h.length > 0 && isValidHeaderName(h));
|
|
90
|
+
}
|
|
91
|
+
return getEnvTrustedHeaders();
|
|
92
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { Kysely } from "kysely";
|
|
2
|
+
import { sql } from "kysely";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Migration: Bounded 404 logging
|
|
6
|
+
*
|
|
7
|
+
* Hardens `_emdash_404_log` against unauthenticated DoS. Previously every 404
|
|
8
|
+
* inserted a new row, so an attacker could grow the table without bound.
|
|
9
|
+
*
|
|
10
|
+
* Changes:
|
|
11
|
+
* - Adds `hits` (default 1, NOT NULL)
|
|
12
|
+
* - Adds `last_seen_at` (nullable; SQLite can't add NOT NULL with a
|
|
13
|
+
* non-constant default to a populated table, so the column is nullable
|
|
14
|
+
* at the schema level and backfilled from `created_at` for existing rows;
|
|
15
|
+
* new inserts via `log404` always set it)
|
|
16
|
+
* - Deduplicates existing rows by path, keeping the most recent row per
|
|
17
|
+
* path and summing hits
|
|
18
|
+
* - Adds a UNIQUE index on `path` so upsert semantics work
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export async function up(db: Kysely<unknown>): Promise<void> {
|
|
22
|
+
// 1. Add columns.
|
|
23
|
+
await db.schema
|
|
24
|
+
.alterTable("_emdash_404_log")
|
|
25
|
+
.addColumn("hits", "integer", (col) => col.notNull().defaultTo(1))
|
|
26
|
+
.execute();
|
|
27
|
+
|
|
28
|
+
// SQLite won't accept a non-constant default when adding a NOT NULL column
|
|
29
|
+
// to a table with existing rows, so backfill in two steps: add nullable,
|
|
30
|
+
// populate, then rely on the application layer / future inserts to set it.
|
|
31
|
+
await db.schema.alterTable("_emdash_404_log").addColumn("last_seen_at", "text").execute();
|
|
32
|
+
|
|
33
|
+
// Backfill last_seen_at from created_at for existing rows.
|
|
34
|
+
await sql`
|
|
35
|
+
UPDATE _emdash_404_log
|
|
36
|
+
SET last_seen_at = created_at
|
|
37
|
+
WHERE last_seen_at IS NULL
|
|
38
|
+
`.execute(db);
|
|
39
|
+
|
|
40
|
+
// 2. Deduplicate existing rows by path.
|
|
41
|
+
// For each path, roll up hits and pick the freshest last_seen_at onto
|
|
42
|
+
// a single keeper row, then delete the non-keepers. Uses window
|
|
43
|
+
// functions (ROW_NUMBER) so the dedup SQL is valid on both SQLite
|
|
44
|
+
// (3.25+, 2018) and Postgres. The previous GROUP BY approach was
|
|
45
|
+
// accepted by SQLite but invalid on Postgres because `id` wasn't in
|
|
46
|
+
// the GROUP BY or wrapped in an aggregate.
|
|
47
|
+
await sql`
|
|
48
|
+
WITH ranked AS (
|
|
49
|
+
SELECT
|
|
50
|
+
id,
|
|
51
|
+
path,
|
|
52
|
+
ROW_NUMBER() OVER (
|
|
53
|
+
PARTITION BY path
|
|
54
|
+
ORDER BY created_at DESC, id DESC
|
|
55
|
+
) AS rn,
|
|
56
|
+
COUNT(*) OVER (PARTITION BY path) AS path_count,
|
|
57
|
+
MAX(created_at) OVER (PARTITION BY path) AS latest_created_at
|
|
58
|
+
FROM _emdash_404_log
|
|
59
|
+
)
|
|
60
|
+
UPDATE _emdash_404_log
|
|
61
|
+
SET
|
|
62
|
+
hits = (SELECT path_count FROM ranked WHERE ranked.id = _emdash_404_log.id),
|
|
63
|
+
last_seen_at = (SELECT latest_created_at FROM ranked WHERE ranked.id = _emdash_404_log.id)
|
|
64
|
+
WHERE id IN (SELECT id FROM ranked WHERE rn = 1)
|
|
65
|
+
`.execute(db);
|
|
66
|
+
|
|
67
|
+
// Delete the non-keepers (every row except the freshest per path).
|
|
68
|
+
await sql`
|
|
69
|
+
DELETE FROM _emdash_404_log
|
|
70
|
+
WHERE id IN (
|
|
71
|
+
SELECT id FROM (
|
|
72
|
+
SELECT
|
|
73
|
+
id,
|
|
74
|
+
ROW_NUMBER() OVER (
|
|
75
|
+
PARTITION BY path
|
|
76
|
+
ORDER BY created_at DESC, id DESC
|
|
77
|
+
) AS rn
|
|
78
|
+
FROM _emdash_404_log
|
|
79
|
+
) AS ranked
|
|
80
|
+
WHERE rn > 1
|
|
81
|
+
)
|
|
82
|
+
`.execute(db);
|
|
83
|
+
|
|
84
|
+
// 3. Add unique index on path for upsert semantics.
|
|
85
|
+
await db.schema
|
|
86
|
+
.createIndex("idx_404_log_path_unique")
|
|
87
|
+
.on("_emdash_404_log")
|
|
88
|
+
.column("path")
|
|
89
|
+
.unique()
|
|
90
|
+
.execute();
|
|
91
|
+
|
|
92
|
+
// Drop the old non-unique index; the unique one covers the same lookups.
|
|
93
|
+
await db.schema.dropIndex("idx_404_log_path").execute();
|
|
94
|
+
|
|
95
|
+
// 4. Index on last_seen_at for eviction ordering.
|
|
96
|
+
await db.schema
|
|
97
|
+
.createIndex("idx_404_log_last_seen")
|
|
98
|
+
.on("_emdash_404_log")
|
|
99
|
+
.column("last_seen_at")
|
|
100
|
+
.execute();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export async function down(db: Kysely<unknown>): Promise<void> {
|
|
104
|
+
await db.schema.dropIndex("idx_404_log_last_seen").execute();
|
|
105
|
+
await db.schema.dropIndex("idx_404_log_path_unique").execute();
|
|
106
|
+
|
|
107
|
+
// Restore the original non-unique path index.
|
|
108
|
+
await db.schema.createIndex("idx_404_log_path").on("_emdash_404_log").column("path").execute();
|
|
109
|
+
|
|
110
|
+
await db.schema.alterTable("_emdash_404_log").dropColumn("last_seen_at").execute();
|
|
111
|
+
await db.schema.alterTable("_emdash_404_log").dropColumn("hits").execute();
|
|
112
|
+
}
|
|
@@ -35,6 +35,7 @@ import * as m031 from "./031_bylines.js";
|
|
|
35
35
|
import * as m032 from "./032_rate_limits.js";
|
|
36
36
|
import * as m033 from "./033_optimize_content_indexes.js";
|
|
37
37
|
import * as m034 from "./034_published_at_index.js";
|
|
38
|
+
import * as m035 from "./035_bounded_404_log.js";
|
|
38
39
|
|
|
39
40
|
const MIGRATIONS: Readonly<Record<string, Migration>> = Object.freeze({
|
|
40
41
|
"001_initial": m001,
|
|
@@ -70,6 +71,7 @@ const MIGRATIONS: Readonly<Record<string, Migration>> = Object.freeze({
|
|
|
70
71
|
"032_rate_limits": m032,
|
|
71
72
|
"033_optimize_content_indexes": m033,
|
|
72
73
|
"034_published_at_index": m034,
|
|
74
|
+
"035_bounded_404_log": m035,
|
|
73
75
|
});
|
|
74
76
|
|
|
75
77
|
/** Total number of registered migrations. Exported for use in tests. */
|
|
@@ -1031,6 +1031,45 @@ export class ContentRepository {
|
|
|
1031
1031
|
return updated;
|
|
1032
1032
|
}
|
|
1033
1033
|
|
|
1034
|
+
/**
|
|
1035
|
+
* Set the draft revision pointer for a content item.
|
|
1036
|
+
*
|
|
1037
|
+
* Used by seed/import paths that stage a new revision's data before
|
|
1038
|
+
* promoting it to live via `publish()`.
|
|
1039
|
+
*
|
|
1040
|
+
* Validates that the content item exists and is not soft-deleted, that
|
|
1041
|
+
* the revision exists, and that the revision belongs to the same
|
|
1042
|
+
* collection and entry. Without these checks, a caller could leave the
|
|
1043
|
+
* content row pointing at a missing or unrelated revision.
|
|
1044
|
+
*/
|
|
1045
|
+
async setDraftRevision(type: string, id: string, revisionId: string): Promise<void> {
|
|
1046
|
+
const tableName = getTableName(type);
|
|
1047
|
+
const now = new Date().toISOString();
|
|
1048
|
+
|
|
1049
|
+
const existing = await this.findById(type, id);
|
|
1050
|
+
if (!existing) {
|
|
1051
|
+
throw new EmDashValidationError("Content item not found");
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
const revisionRepo = new RevisionRepository(this.db);
|
|
1055
|
+
const revision = await revisionRepo.findById(revisionId);
|
|
1056
|
+
if (!revision) {
|
|
1057
|
+
throw new EmDashValidationError("Revision not found");
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
if (revision.collection !== type || revision.entryId !== id) {
|
|
1061
|
+
throw new EmDashValidationError("Revision does not belong to the specified content item");
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
await sql`
|
|
1065
|
+
UPDATE ${sql.ref(tableName)}
|
|
1066
|
+
SET draft_revision_id = ${revisionId},
|
|
1067
|
+
updated_at = ${now}
|
|
1068
|
+
WHERE id = ${id}
|
|
1069
|
+
AND deleted_at IS NULL
|
|
1070
|
+
`.execute(this.db);
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1034
1073
|
/**
|
|
1035
1074
|
* Discard pending draft changes
|
|
1036
1075
|
*
|
|
@@ -55,6 +55,31 @@ export class OptionsRepository {
|
|
|
55
55
|
.execute();
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Set an option value only if no row with that name exists. Atomic at the
|
|
60
|
+
* database level via INSERT ... ON CONFLICT DO NOTHING, so concurrent
|
|
61
|
+
* callers can't race past the check.
|
|
62
|
+
*
|
|
63
|
+
* Returns true when the row was inserted, false when a row already
|
|
64
|
+
* existed (regardless of its value — even an empty string or null).
|
|
65
|
+
*/
|
|
66
|
+
async setIfAbsent<T = unknown>(name: string, value: T): Promise<boolean> {
|
|
67
|
+
const row: OptionTable = {
|
|
68
|
+
name,
|
|
69
|
+
value: JSON.stringify(value),
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const result = await this.db
|
|
73
|
+
.insertInto("options")
|
|
74
|
+
.values(row)
|
|
75
|
+
.onConflict((oc) => oc.column("name").doNothing())
|
|
76
|
+
.executeTakeFirst();
|
|
77
|
+
|
|
78
|
+
// SQLite reports numInsertedOrUpdatedRows; Postgres reports the same.
|
|
79
|
+
// When the ON CONFLICT branch fires and does nothing, the count is 0.
|
|
80
|
+
return (result.numInsertedOrUpdatedRows ?? 0n) > 0n;
|
|
81
|
+
}
|
|
82
|
+
|
|
58
83
|
/**
|
|
59
84
|
* Delete an option
|
|
60
85
|
*/
|
|
@@ -11,6 +11,32 @@ import { currentTimestampValue } from "../dialect-helpers.js";
|
|
|
11
11
|
import type { Database, RedirectTable } from "../types.js";
|
|
12
12
|
import { encodeCursor, decodeCursor, type FindManyResult } from "./types.js";
|
|
13
13
|
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Bounded 404 logging
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Hard cap on rows stored in `_emdash_404_log`. When exceeded, the oldest
|
|
20
|
+
* rows (by `last_seen_at`) are evicted on insert. Prevents an unauthenticated
|
|
21
|
+
* attacker from growing the table without bound by requesting unique URLs.
|
|
22
|
+
*/
|
|
23
|
+
export const MAX_404_LOG_ROWS = 10_000;
|
|
24
|
+
|
|
25
|
+
/** Max stored length for the `Referer` header — truncated on insert. */
|
|
26
|
+
export const REFERRER_MAX_LENGTH = 512;
|
|
27
|
+
|
|
28
|
+
/** Max stored length for the `User-Agent` header — truncated on insert. */
|
|
29
|
+
export const USER_AGENT_MAX_LENGTH = 256;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Truncate a header-derived string to `max` chars, preserving `null`/`undefined`
|
|
33
|
+
* as `null`. Empty strings stay empty (the caller decides whether to coerce).
|
|
34
|
+
*/
|
|
35
|
+
function truncateOrNull(value: string | null | undefined, max: number): string | null {
|
|
36
|
+
if (value === null || value === undefined) return null;
|
|
37
|
+
return value.length > max ? value.slice(0, max) : value;
|
|
38
|
+
}
|
|
39
|
+
|
|
14
40
|
// ---------------------------------------------------------------------------
|
|
15
41
|
// Types
|
|
16
42
|
// ---------------------------------------------------------------------------
|
|
@@ -369,22 +395,97 @@ export class RedirectRepository {
|
|
|
369
395
|
|
|
370
396
|
// --- 404 log ------------------------------------------------------------
|
|
371
397
|
|
|
398
|
+
/**
|
|
399
|
+
* Record a 404 hit for `entry.path`.
|
|
400
|
+
*
|
|
401
|
+
* Dedups by path: repeat hits increment `hits` and refresh `last_seen_at`
|
|
402
|
+
* on the existing row instead of inserting a new one. Referrer and
|
|
403
|
+
* user-agent are truncated to bounded lengths so a malicious client can't
|
|
404
|
+
* blow up storage with huge headers. When the table would exceed
|
|
405
|
+
* MAX_404_LOG_ROWS, the oldest entries (by `last_seen_at`) are evicted.
|
|
406
|
+
*
|
|
407
|
+
* This is called from the public redirect middleware on every 404 and
|
|
408
|
+
* must never throw for an unauthenticated caller — failures bubble up to
|
|
409
|
+
* the middleware, which swallows them.
|
|
410
|
+
*/
|
|
372
411
|
async log404(entry: {
|
|
373
412
|
path: string;
|
|
374
413
|
referrer?: string | null;
|
|
375
414
|
userAgent?: string | null;
|
|
376
415
|
ip?: string | null;
|
|
377
416
|
}): Promise<void> {
|
|
417
|
+
const now = new Date().toISOString();
|
|
418
|
+
const referrer = truncateOrNull(entry.referrer, REFERRER_MAX_LENGTH);
|
|
419
|
+
const userAgent = truncateOrNull(entry.userAgent, USER_AGENT_MAX_LENGTH);
|
|
420
|
+
const ip = entry.ip ?? null;
|
|
421
|
+
|
|
422
|
+
// Atomic upsert by path. The UNIQUE index on `path` makes this safe
|
|
423
|
+
// under concurrency: two requests for the same new path can't both
|
|
424
|
+
// insert — the second one hits the conflict branch and increments
|
|
425
|
+
// hits instead of failing with a uniqueness error.
|
|
378
426
|
await this.db
|
|
379
427
|
.insertInto("_emdash_404_log")
|
|
380
428
|
.values({
|
|
381
429
|
id: ulid(),
|
|
382
430
|
path: entry.path,
|
|
383
|
-
referrer
|
|
384
|
-
user_agent:
|
|
385
|
-
ip
|
|
386
|
-
|
|
431
|
+
referrer,
|
|
432
|
+
user_agent: userAgent,
|
|
433
|
+
ip,
|
|
434
|
+
hits: 1,
|
|
435
|
+
last_seen_at: now,
|
|
436
|
+
created_at: now,
|
|
387
437
|
})
|
|
438
|
+
.onConflict((oc) =>
|
|
439
|
+
oc.column("path").doUpdateSet({
|
|
440
|
+
hits: sql`hits + 1`,
|
|
441
|
+
last_seen_at: now,
|
|
442
|
+
referrer,
|
|
443
|
+
user_agent: userAgent,
|
|
444
|
+
ip,
|
|
445
|
+
}),
|
|
446
|
+
)
|
|
447
|
+
.execute();
|
|
448
|
+
|
|
449
|
+
// Enforce the row cap. Cheap when the table is under cap (single
|
|
450
|
+
// COUNT(*) query); evicts oldest rows if we're over. Updates (dedup
|
|
451
|
+
// hits) don't grow the table so this is a no-op for repeat paths.
|
|
452
|
+
await this.enforce404Cap();
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Delete the oldest rows from `_emdash_404_log` if the row count exceeds
|
|
457
|
+
* MAX_404_LOG_ROWS. "Oldest" is by `last_seen_at`, so a path that keeps
|
|
458
|
+
* getting hit stays in the table even if it was first seen long ago.
|
|
459
|
+
*
|
|
460
|
+
* Private — callers use `log404`, which invokes this after every upsert.
|
|
461
|
+
*/
|
|
462
|
+
private async enforce404Cap(): Promise<void> {
|
|
463
|
+
const countRow = await this.db
|
|
464
|
+
.selectFrom("_emdash_404_log")
|
|
465
|
+
.select((eb) => eb.fn.countAll<number>().as("c"))
|
|
466
|
+
.executeTakeFirst();
|
|
467
|
+
const count = Number(countRow?.c ?? 0);
|
|
468
|
+
if (count <= MAX_404_LOG_ROWS) return;
|
|
469
|
+
|
|
470
|
+
const excess = count - MAX_404_LOG_ROWS;
|
|
471
|
+
|
|
472
|
+
// Evict the oldest rows in a single SQL statement. Using a subquery
|
|
473
|
+
// (rather than materialising the victim IDs in JS and passing them
|
|
474
|
+
// back as bind parameters) keeps the statement bounded regardless of
|
|
475
|
+
// how far over cap the table is — important for existing installs
|
|
476
|
+
// that crossed the threshold before this cap was introduced.
|
|
477
|
+
await this.db
|
|
478
|
+
.deleteFrom("_emdash_404_log")
|
|
479
|
+
.where(
|
|
480
|
+
"id",
|
|
481
|
+
"in",
|
|
482
|
+
this.db
|
|
483
|
+
.selectFrom("_emdash_404_log")
|
|
484
|
+
.select("id")
|
|
485
|
+
.orderBy("last_seen_at", "asc")
|
|
486
|
+
.orderBy("id", "asc")
|
|
487
|
+
.limit(excess),
|
|
488
|
+
)
|
|
388
489
|
.execute();
|
|
389
490
|
}
|
|
390
491
|
|
|
@@ -438,6 +539,10 @@ export class RedirectRepository {
|
|
|
438
539
|
}
|
|
439
540
|
|
|
440
541
|
async get404Summary(limit = 50): Promise<NotFoundSummary[]> {
|
|
542
|
+
// Since rows are now deduped by path, each path has exactly one row
|
|
543
|
+
// with `hits` as the running count and `last_seen_at` as the latest
|
|
544
|
+
// timestamp. The subquery for `top_referrer` collapses to a simple
|
|
545
|
+
// pick of the row's stored referrer (the most recent one seen).
|
|
441
546
|
const rows = await sql<{
|
|
442
547
|
path: string;
|
|
443
548
|
count: number;
|
|
@@ -446,14 +551,12 @@ export class RedirectRepository {
|
|
|
446
551
|
}>`
|
|
447
552
|
SELECT
|
|
448
553
|
path,
|
|
449
|
-
|
|
450
|
-
MAX(
|
|
554
|
+
SUM(hits) as count,
|
|
555
|
+
MAX(last_seen_at) as last_seen,
|
|
451
556
|
(
|
|
452
557
|
SELECT referrer FROM _emdash_404_log AS inner_log
|
|
453
558
|
WHERE inner_log.path = _emdash_404_log.path
|
|
454
559
|
AND referrer IS NOT NULL AND referrer != ''
|
|
455
|
-
GROUP BY referrer
|
|
456
|
-
ORDER BY COUNT(*) DESC
|
|
457
560
|
LIMIT 1
|
|
458
561
|
) as top_referrer
|
|
459
562
|
FROM _emdash_404_log
|
package/src/database/types.ts
CHANGED
|
@@ -466,6 +466,15 @@ export interface NotFoundLogTable {
|
|
|
466
466
|
referrer: string | null;
|
|
467
467
|
user_agent: string | null;
|
|
468
468
|
ip: string | null;
|
|
469
|
+
hits: number;
|
|
470
|
+
/**
|
|
471
|
+
* Migration 035 adds this as a nullable column (SQLite can't add a
|
|
472
|
+
* NOT NULL column with a non-constant default to an existing table).
|
|
473
|
+
* The `log404` upsert always writes a value, so new and updated rows
|
|
474
|
+
* always have one, but existing rows pre-migration were backfilled
|
|
475
|
+
* without a NOT NULL constraint. Typed as nullable to match the schema.
|
|
476
|
+
*/
|
|
477
|
+
last_seen_at: string | null;
|
|
469
478
|
created_at: string;
|
|
470
479
|
}
|
|
471
480
|
|
package/src/emdash-runtime.ts
CHANGED
|
@@ -19,6 +19,7 @@ import type {
|
|
|
19
19
|
} from "./astro/integration/runtime.js";
|
|
20
20
|
import type { EmDashManifest, ManifestCollection } from "./astro/types.js";
|
|
21
21
|
import { getAuthMode } from "./auth/mode.js";
|
|
22
|
+
import { getTrustedProxyHeaders } from "./auth/trusted-proxy.js";
|
|
22
23
|
import { isSqlite } from "./database/dialect-helpers.js";
|
|
23
24
|
import { kyselyLogOption } from "./database/instrumentation.js";
|
|
24
25
|
import { runMigrations } from "./database/migrations/runner.js";
|
|
@@ -2080,6 +2081,7 @@ export class EmDashRuntime {
|
|
|
2080
2081
|
const routeRegistry = new PluginRouteRegistry({
|
|
2081
2082
|
db: this.db,
|
|
2082
2083
|
emailPipeline: this.email ?? undefined,
|
|
2084
|
+
trustedProxyHeaders: getTrustedProxyHeaders(this.config),
|
|
2083
2085
|
});
|
|
2084
2086
|
routeRegistry.register(trustedPlugin);
|
|
2085
2087
|
|
|
@@ -2321,7 +2323,7 @@ export class EmDashRuntime {
|
|
|
2321
2323
|
|
|
2322
2324
|
try {
|
|
2323
2325
|
const headers = sanitizeHeadersForSandbox(request.headers);
|
|
2324
|
-
const meta = extractRequestMeta(request);
|
|
2326
|
+
const meta = extractRequestMeta(request, this.config);
|
|
2325
2327
|
const result = await plugin.invokeRoute(routeName, body, {
|
|
2326
2328
|
url: request.url,
|
|
2327
2329
|
method: request.method,
|
package/src/import/registry.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Manages available import sources and provides URL probing.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import {
|
|
7
|
+
import { resolveAndValidateExternalUrl } from "./ssrf.js";
|
|
8
8
|
import type { ImportSource, ProbeResult, SourceProbeResult } from "./types.js";
|
|
9
9
|
|
|
10
10
|
// Regex pattern for URL normalization
|
|
@@ -63,8 +63,9 @@ export async function probeUrl(url: string): Promise<ProbeResult> {
|
|
|
63
63
|
// Remove trailing slash for consistency
|
|
64
64
|
normalizedUrl = normalizedUrl.replace(TRAILING_SLASHES_PATTERN, "");
|
|
65
65
|
|
|
66
|
-
// SSRF: reject internal/private network targets
|
|
67
|
-
|
|
66
|
+
// SSRF: reject internal/private network targets. DNS resolution
|
|
67
|
+
// catches hostnames that resolve to private addresses.
|
|
68
|
+
await resolveAndValidateExternalUrl(normalizedUrl);
|
|
68
69
|
|
|
69
70
|
const results: SourceProbeResult[] = [];
|
|
70
71
|
const urlSources = getUrlSources();
|