emdash 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/{apply-B4MsLM-w.mjs → apply-5uslYdUu.mjs} +174 -17
  2. package/dist/apply-5uslYdUu.mjs.map +1 -0
  3. package/dist/astro/index.d.mts +4 -4
  4. package/dist/astro/index.mjs +7 -3
  5. package/dist/astro/index.mjs.map +1 -1
  6. package/dist/astro/middleware/auth.d.mts +4 -4
  7. package/dist/astro/middleware/redirect.mjs +1 -1
  8. package/dist/astro/middleware/request-context.mjs +6 -1
  9. package/dist/astro/middleware/request-context.mjs.map +1 -1
  10. package/dist/astro/middleware.mjs +13 -12
  11. package/dist/astro/middleware.mjs.map +1 -1
  12. package/dist/astro/types.d.mts +13 -4
  13. package/dist/astro/types.d.mts.map +1 -1
  14. package/dist/cli/index.mjs +4 -4
  15. package/dist/{content-BsBoyj8G.mjs → content-D7J5y73J.mjs} +27 -1
  16. package/dist/{content-BsBoyj8G.mjs.map → content-D7J5y73J.mjs.map} +1 -1
  17. package/dist/db/index.d.mts +2 -2
  18. package/dist/db/index.mjs +1 -1
  19. package/dist/{index-BYv0mB9g.d.mts → index-De6_Xv3v.d.mts} +77 -3
  20. package/dist/index-De6_Xv3v.d.mts.map +1 -0
  21. package/dist/index.d.mts +4 -4
  22. package/dist/index.mjs +7 -7
  23. package/dist/media/local-runtime.d.mts +4 -4
  24. package/dist/plugins/adapt-sandbox-entry.d.mts +4 -4
  25. package/dist/{query-Bk_3vKvU.mjs → query-g4Ug-9j9.mjs} +3 -3
  26. package/dist/{query-Bk_3vKvU.mjs.map → query-g4Ug-9j9.mjs.map} +1 -1
  27. package/dist/{redirect-7lGhLBNZ.mjs → redirect-CN0Rt9Ob.mjs} +66 -10
  28. package/dist/redirect-CN0Rt9Ob.mjs.map +1 -0
  29. package/dist/{runner-Fl2NcUUz.d.mts → runner-BR2xKwhn.d.mts} +2 -2
  30. package/dist/{runner-Fl2NcUUz.d.mts.map → runner-BR2xKwhn.d.mts.map} +1 -1
  31. package/dist/{runner-Cd-_WyDo.mjs → runner-tQ7BJ4T7.mjs} +211 -134
  32. package/dist/runner-tQ7BJ4T7.mjs.map +1 -0
  33. package/dist/runtime.d.mts +4 -4
  34. package/dist/{search-DI4bM2w9.mjs → search-B0effn3j.mjs} +117 -23
  35. package/dist/search-B0effn3j.mjs.map +1 -0
  36. package/dist/seed/index.d.mts +2 -2
  37. package/dist/seed/index.mjs +3 -3
  38. package/dist/{taxonomies-DbrKzDju.mjs → taxonomies-K2z0Uhnj.mjs} +2 -2
  39. package/dist/{taxonomies-DbrKzDju.mjs.map → taxonomies-K2z0Uhnj.mjs.map} +1 -1
  40. package/dist/{types-8xrvl_68.d.mts → types-C2v0c34j.d.mts} +10 -1
  41. package/dist/{types-8xrvl_68.d.mts.map → types-C2v0c34j.d.mts.map} +1 -1
  42. package/dist/{validate-CaLH1Ia2.d.mts → validate-kM8Pjuf7.d.mts} +2 -2
  43. package/dist/{validate-CaLH1Ia2.d.mts.map → validate-kM8Pjuf7.d.mts.map} +1 -1
  44. package/dist/version-BnTKdfam.mjs +7 -0
  45. package/dist/{version-Uaf2ynPX.mjs.map → version-BnTKdfam.mjs.map} +1 -1
  46. package/package.json +5 -5
  47. package/src/api/handlers/content.ts +2 -0
  48. package/src/api/schemas/content.ts +8 -0
  49. package/src/astro/integration/font-provider.ts +3 -1
  50. package/src/astro/integration/index.ts +2 -0
  51. package/src/astro/integration/runtime.ts +55 -1
  52. package/src/astro/routes/admin.astro +14 -7
  53. package/src/astro/routes/api/auth/magic-link/send.ts +2 -1
  54. package/src/astro/routes/api/auth/passkey/options.ts +2 -1
  55. package/src/astro/routes/api/auth/signup/request.ts +26 -8
  56. package/src/astro/routes/api/comments/[collection]/[contentId]/index.ts +10 -6
  57. package/src/astro/routes/api/content/[collection]/[id]/compare.ts +1 -1
  58. package/src/astro/routes/api/content/[collection]/[id]/preview-url.ts +1 -1
  59. package/src/astro/routes/api/content/[collection]/[id]/revisions.ts +1 -1
  60. package/src/astro/routes/api/content/[collection]/[id]/translations.ts +26 -0
  61. package/src/astro/routes/api/content/[collection]/[id].ts +30 -2
  62. package/src/astro/routes/api/content/[collection]/index.ts +19 -1
  63. package/src/astro/routes/api/content/[collection]/trash.ts +1 -1
  64. package/src/astro/routes/api/import/wordpress-plugin/analyze.ts +4 -3
  65. package/src/astro/routes/api/import/wordpress-plugin/execute.ts +4 -3
  66. package/src/astro/routes/api/manifest.ts +7 -0
  67. package/src/astro/routes/api/oauth/device/code.ts +2 -1
  68. package/src/astro/routes/api/oauth/device/token.ts +2 -1
  69. package/src/astro/routes/api/setup/admin-verify.ts +30 -5
  70. package/src/astro/routes/api/setup/admin.ts +32 -8
  71. package/src/astro/routes/api/setup/index.ts +5 -2
  72. package/src/astro/types.ts +9 -0
  73. package/src/auth/rate-limit.ts +50 -22
  74. package/src/auth/setup-nonce.ts +22 -0
  75. package/src/auth/trusted-proxy.ts +92 -0
  76. package/src/database/migrations/035_bounded_404_log.ts +112 -0
  77. package/src/database/migrations/runner.ts +2 -0
  78. package/src/database/repositories/content.ts +39 -0
  79. package/src/database/repositories/options.ts +25 -0
  80. package/src/database/repositories/redirect.ts +111 -8
  81. package/src/database/types.ts +9 -0
  82. package/src/emdash-runtime.ts +3 -1
  83. package/src/import/registry.ts +4 -3
  84. package/src/import/ssrf.ts +253 -12
  85. package/src/mcp/server.ts +76 -3
  86. package/src/plugins/context.ts +15 -3
  87. package/src/plugins/manager.ts +6 -0
  88. package/src/plugins/request-meta.ts +66 -15
  89. package/src/plugins/routes.ts +3 -1
  90. package/src/seed/apply.ts +26 -0
  91. package/src/visual-editing/toolbar.ts +6 -1
  92. package/dist/apply-B4MsLM-w.mjs.map +0 -1
  93. package/dist/index-BYv0mB9g.d.mts.map +0 -1
  94. package/dist/redirect-7lGhLBNZ.mjs.map +0 -1
  95. package/dist/runner-Cd-_WyDo.mjs.map +0 -1
  96. package/dist/search-DI4bM2w9.mjs.map +0 -1
  97. package/dist/version-Uaf2ynPX.mjs +0 -7
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Resolve the list of client-IP headers the operator trusts.
3
+ *
4
+ * Resolution order:
5
+ * 1. `config.trustedProxyHeaders` — explicit opt-in via astro.config.mjs.
6
+ * An empty array is respected (means "trust nothing, ignore env").
7
+ * 2. `EMDASH_TRUSTED_PROXY_HEADERS` env var — comma-separated header names.
8
+ * 3. `[]` — default, no trusted headers.
9
+ *
10
+ * Operators must only set this when they control the reverse proxy.
11
+ * Untrusted clients can set any header they like; trusting headers from
12
+ * an open network defeats rate limiting.
13
+ *
14
+ * Header names are returned lowercased because HTTP header lookups are
15
+ * case-insensitive.
16
+ */
17
+
18
+ import type { EmDashConfig } from "../astro/integration/runtime.js";
19
+
20
+ /**
21
+ * RFC 7230 token — valid characters for an HTTP header name. Invalid names
22
+ * passed to `Headers.get()` throw a TypeError at runtime, which would
23
+ * otherwise surface as a 500 from every auth route.
24
+ */
25
+ const HEADER_NAME_PATTERN = /^[!#$%&'*+\-.^_`|~0-9a-z]+$/;
26
+
27
+ /**
28
+ * Normalise a list of header names the way both the config path and any
29
+ * caller passing a pre-resolved list should do: trim, lowercase, drop
30
+ * empty, drop anything that isn't a valid RFC 7230 token. Invalid names
31
+ * would crash `Headers.get()` at runtime.
32
+ */
33
+ export function normalizeTrustedHeaders(names: readonly string[]): string[] {
34
+ return names
35
+ .map((h) => h.trim().toLowerCase())
36
+ .filter((h) => h.length > 0 && HEADER_NAME_PATTERN.test(h));
37
+ }
38
+
39
+ function isValidHeaderName(name: string): boolean {
40
+ return HEADER_NAME_PATTERN.test(name);
41
+ }
42
+
43
+ /** Cache for the env-derived value. `null` means "not yet parsed". */
44
+ let _envCache: string[] | null = null;
45
+
46
+ /** Test-only: clear the env cache so a fresh value is read on next call. */
47
+ export function _resetTrustedProxyHeadersCache(): void {
48
+ _envCache = null;
49
+ }
50
+
51
+ function getEnvTrustedHeaders(): string[] {
52
+ if (_envCache !== null) return _envCache;
53
+ let raw: string | undefined;
54
+ try {
55
+ // Prefer process.env so SSR/container deployments can override this
56
+ // value at runtime (Vite/Astro inline import.meta.env at build time,
57
+ // which locks the value into the bundle). Fall back to import.meta.env
58
+ // for bundler-managed environments where process.env isn't populated.
59
+ // eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- import.meta.env shape varies by bundler
60
+ const importMetaEnv = (import.meta as unknown as { env?: Record<string, string | undefined> })
61
+ .env;
62
+ raw =
63
+ (typeof process !== "undefined" ? process.env?.EMDASH_TRUSTED_PROXY_HEADERS : undefined) ||
64
+ importMetaEnv?.EMDASH_TRUSTED_PROXY_HEADERS;
65
+ } catch {
66
+ raw = undefined;
67
+ }
68
+ if (!raw) {
69
+ _envCache = [];
70
+ return _envCache;
71
+ }
72
+ _envCache = raw
73
+ .split(",")
74
+ .map((s) => s.trim().toLowerCase())
75
+ .filter((s) => s.length > 0 && isValidHeaderName(s));
76
+ return _envCache;
77
+ }
78
+
79
+ /**
80
+ * Return the lowercased list of headers to trust for client-IP resolution.
81
+ *
82
+ * When `config?.trustedProxyHeaders` is explicitly set (even to `[]`), it
83
+ * wins. Otherwise fall through to the env var, then to `[]`.
84
+ */
85
+ export function getTrustedProxyHeaders(config: EmDashConfig | null | undefined): string[] {
86
+ if (config && config.trustedProxyHeaders !== undefined) {
87
+ return config.trustedProxyHeaders
88
+ .map((h) => h.trim().toLowerCase())
89
+ .filter((h) => h.length > 0 && isValidHeaderName(h));
90
+ }
91
+ return getEnvTrustedHeaders();
92
+ }
@@ -0,0 +1,112 @@
1
+ import type { Kysely } from "kysely";
2
+ import { sql } from "kysely";
3
+
4
+ /**
5
+ * Migration: Bounded 404 logging
6
+ *
7
+ * Hardens `_emdash_404_log` against unauthenticated DoS. Previously every 404
8
+ * inserted a new row, so an attacker could grow the table without bound.
9
+ *
10
+ * Changes:
11
+ * - Adds `hits` (default 1, NOT NULL)
12
+ * - Adds `last_seen_at` (nullable; SQLite can't add NOT NULL with a
13
+ * non-constant default to a populated table, so the column is nullable
14
+ * at the schema level and backfilled from `created_at` for existing rows;
15
+ * new inserts via `log404` always set it)
16
+ * - Deduplicates existing rows by path, keeping the most recent row per
17
+ * path and summing hits
18
+ * - Adds a UNIQUE index on `path` so upsert semantics work
19
+ */
20
+
21
+ export async function up(db: Kysely<unknown>): Promise<void> {
22
+ // 1. Add columns.
23
+ await db.schema
24
+ .alterTable("_emdash_404_log")
25
+ .addColumn("hits", "integer", (col) => col.notNull().defaultTo(1))
26
+ .execute();
27
+
28
+ // SQLite won't accept a non-constant default when adding a NOT NULL column
29
+ // to a table with existing rows, so backfill in two steps: add nullable,
30
+ // populate, then rely on the application layer / future inserts to set it.
31
+ await db.schema.alterTable("_emdash_404_log").addColumn("last_seen_at", "text").execute();
32
+
33
+ // Backfill last_seen_at from created_at for existing rows.
34
+ await sql`
35
+ UPDATE _emdash_404_log
36
+ SET last_seen_at = created_at
37
+ WHERE last_seen_at IS NULL
38
+ `.execute(db);
39
+
40
+ // 2. Deduplicate existing rows by path.
41
+ // For each path, roll up hits and pick the freshest last_seen_at onto
42
+ // a single keeper row, then delete the non-keepers. Uses window
43
+ // functions (ROW_NUMBER) so the dedup SQL is valid on both SQLite
44
+ // (3.25+, 2018) and Postgres. The previous GROUP BY approach was
45
+ // accepted by SQLite but invalid on Postgres because `id` wasn't in
46
+ // the GROUP BY or wrapped in an aggregate.
47
+ await sql`
48
+ WITH ranked AS (
49
+ SELECT
50
+ id,
51
+ path,
52
+ ROW_NUMBER() OVER (
53
+ PARTITION BY path
54
+ ORDER BY created_at DESC, id DESC
55
+ ) AS rn,
56
+ COUNT(*) OVER (PARTITION BY path) AS path_count,
57
+ MAX(created_at) OVER (PARTITION BY path) AS latest_created_at
58
+ FROM _emdash_404_log
59
+ )
60
+ UPDATE _emdash_404_log
61
+ SET
62
+ hits = (SELECT path_count FROM ranked WHERE ranked.id = _emdash_404_log.id),
63
+ last_seen_at = (SELECT latest_created_at FROM ranked WHERE ranked.id = _emdash_404_log.id)
64
+ WHERE id IN (SELECT id FROM ranked WHERE rn = 1)
65
+ `.execute(db);
66
+
67
+ // Delete the non-keepers (every row except the freshest per path).
68
+ await sql`
69
+ DELETE FROM _emdash_404_log
70
+ WHERE id IN (
71
+ SELECT id FROM (
72
+ SELECT
73
+ id,
74
+ ROW_NUMBER() OVER (
75
+ PARTITION BY path
76
+ ORDER BY created_at DESC, id DESC
77
+ ) AS rn
78
+ FROM _emdash_404_log
79
+ ) AS ranked
80
+ WHERE rn > 1
81
+ )
82
+ `.execute(db);
83
+
84
+ // 3. Add unique index on path for upsert semantics.
85
+ await db.schema
86
+ .createIndex("idx_404_log_path_unique")
87
+ .on("_emdash_404_log")
88
+ .column("path")
89
+ .unique()
90
+ .execute();
91
+
92
+ // Drop the old non-unique index; the unique one covers the same lookups.
93
+ await db.schema.dropIndex("idx_404_log_path").execute();
94
+
95
+ // 4. Index on last_seen_at for eviction ordering.
96
+ await db.schema
97
+ .createIndex("idx_404_log_last_seen")
98
+ .on("_emdash_404_log")
99
+ .column("last_seen_at")
100
+ .execute();
101
+ }
102
+
103
+ export async function down(db: Kysely<unknown>): Promise<void> {
104
+ await db.schema.dropIndex("idx_404_log_last_seen").execute();
105
+ await db.schema.dropIndex("idx_404_log_path_unique").execute();
106
+
107
+ // Restore the original non-unique path index.
108
+ await db.schema.createIndex("idx_404_log_path").on("_emdash_404_log").column("path").execute();
109
+
110
+ await db.schema.alterTable("_emdash_404_log").dropColumn("last_seen_at").execute();
111
+ await db.schema.alterTable("_emdash_404_log").dropColumn("hits").execute();
112
+ }
@@ -35,6 +35,7 @@ import * as m031 from "./031_bylines.js";
35
35
  import * as m032 from "./032_rate_limits.js";
36
36
  import * as m033 from "./033_optimize_content_indexes.js";
37
37
  import * as m034 from "./034_published_at_index.js";
38
+ import * as m035 from "./035_bounded_404_log.js";
38
39
 
39
40
  const MIGRATIONS: Readonly<Record<string, Migration>> = Object.freeze({
40
41
  "001_initial": m001,
@@ -70,6 +71,7 @@ const MIGRATIONS: Readonly<Record<string, Migration>> = Object.freeze({
70
71
  "032_rate_limits": m032,
71
72
  "033_optimize_content_indexes": m033,
72
73
  "034_published_at_index": m034,
74
+ "035_bounded_404_log": m035,
73
75
  });
74
76
 
75
77
  /** Total number of registered migrations. Exported for use in tests. */
@@ -1031,6 +1031,45 @@ export class ContentRepository {
1031
1031
  return updated;
1032
1032
  }
1033
1033
 
1034
+ /**
1035
+ * Set the draft revision pointer for a content item.
1036
+ *
1037
+ * Used by seed/import paths that stage a new revision's data before
1038
+ * promoting it to live via `publish()`.
1039
+ *
1040
+ * Validates that the content item exists and is not soft-deleted, that
1041
+ * the revision exists, and that the revision belongs to the same
1042
+ * collection and entry. Without these checks, a caller could leave the
1043
+ * content row pointing at a missing or unrelated revision.
1044
+ */
1045
+ async setDraftRevision(type: string, id: string, revisionId: string): Promise<void> {
1046
+ const tableName = getTableName(type);
1047
+ const now = new Date().toISOString();
1048
+
1049
+ const existing = await this.findById(type, id);
1050
+ if (!existing) {
1051
+ throw new EmDashValidationError("Content item not found");
1052
+ }
1053
+
1054
+ const revisionRepo = new RevisionRepository(this.db);
1055
+ const revision = await revisionRepo.findById(revisionId);
1056
+ if (!revision) {
1057
+ throw new EmDashValidationError("Revision not found");
1058
+ }
1059
+
1060
+ if (revision.collection !== type || revision.entryId !== id) {
1061
+ throw new EmDashValidationError("Revision does not belong to the specified content item");
1062
+ }
1063
+
1064
+ await sql`
1065
+ UPDATE ${sql.ref(tableName)}
1066
+ SET draft_revision_id = ${revisionId},
1067
+ updated_at = ${now}
1068
+ WHERE id = ${id}
1069
+ AND deleted_at IS NULL
1070
+ `.execute(this.db);
1071
+ }
1072
+
1034
1073
  /**
1035
1074
  * Discard pending draft changes
1036
1075
  *
@@ -55,6 +55,31 @@ export class OptionsRepository {
55
55
  .execute();
56
56
  }
57
57
 
58
+ /**
59
+ * Set an option value only if no row with that name exists. Atomic at the
60
+ * database level via INSERT ... ON CONFLICT DO NOTHING, so concurrent
61
+ * callers can't race past the check.
62
+ *
63
+ * Returns true when the row was inserted, false when a row already
64
+ * existed (regardless of its value — even an empty string or null).
65
+ */
66
+ async setIfAbsent<T = unknown>(name: string, value: T): Promise<boolean> {
67
+ const row: OptionTable = {
68
+ name,
69
+ value: JSON.stringify(value),
70
+ };
71
+
72
+ const result = await this.db
73
+ .insertInto("options")
74
+ .values(row)
75
+ .onConflict((oc) => oc.column("name").doNothing())
76
+ .executeTakeFirst();
77
+
78
+ // SQLite reports numInsertedOrUpdatedRows; Postgres reports the same.
79
+ // When the ON CONFLICT branch fires and does nothing, the count is 0.
80
+ return (result.numInsertedOrUpdatedRows ?? 0n) > 0n;
81
+ }
82
+
58
83
  /**
59
84
  * Delete an option
60
85
  */
@@ -11,6 +11,32 @@ import { currentTimestampValue } from "../dialect-helpers.js";
11
11
  import type { Database, RedirectTable } from "../types.js";
12
12
  import { encodeCursor, decodeCursor, type FindManyResult } from "./types.js";
13
13
 
14
+ // ---------------------------------------------------------------------------
15
+ // Bounded 404 logging
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Hard cap on rows stored in `_emdash_404_log`. When exceeded, the oldest
20
+ * rows (by `last_seen_at`) are evicted on insert. Prevents an unauthenticated
21
+ * attacker from growing the table without bound by requesting unique URLs.
22
+ */
23
+ export const MAX_404_LOG_ROWS = 10_000;
24
+
25
+ /** Max stored length for the `Referer` header — truncated on insert. */
26
+ export const REFERRER_MAX_LENGTH = 512;
27
+
28
+ /** Max stored length for the `User-Agent` header — truncated on insert. */
29
+ export const USER_AGENT_MAX_LENGTH = 256;
30
+
31
+ /**
32
+ * Truncate a header-derived string to `max` chars, preserving `null`/`undefined`
33
+ * as `null`. Empty strings stay empty (the caller decides whether to coerce).
34
+ */
35
+ function truncateOrNull(value: string | null | undefined, max: number): string | null {
36
+ if (value === null || value === undefined) return null;
37
+ return value.length > max ? value.slice(0, max) : value;
38
+ }
39
+
14
40
  // ---------------------------------------------------------------------------
15
41
  // Types
16
42
  // ---------------------------------------------------------------------------
@@ -369,22 +395,97 @@ export class RedirectRepository {
369
395
 
370
396
  // --- 404 log ------------------------------------------------------------
371
397
 
398
+ /**
399
+ * Record a 404 hit for `entry.path`.
400
+ *
401
+ * Dedups by path: repeat hits increment `hits` and refresh `last_seen_at`
402
+ * on the existing row instead of inserting a new one. Referrer and
403
+ * user-agent are truncated to bounded lengths so a malicious client can't
404
+ * blow up storage with huge headers. When the table would exceed
405
+ * MAX_404_LOG_ROWS, the oldest entries (by `last_seen_at`) are evicted.
406
+ *
407
+ * This is called from the public redirect middleware on every 404 and
408
+ * must never throw for an unauthenticated caller — failures bubble up to
409
+ * the middleware, which swallows them.
410
+ */
372
411
  async log404(entry: {
373
412
  path: string;
374
413
  referrer?: string | null;
375
414
  userAgent?: string | null;
376
415
  ip?: string | null;
377
416
  }): Promise<void> {
417
+ const now = new Date().toISOString();
418
+ const referrer = truncateOrNull(entry.referrer, REFERRER_MAX_LENGTH);
419
+ const userAgent = truncateOrNull(entry.userAgent, USER_AGENT_MAX_LENGTH);
420
+ const ip = entry.ip ?? null;
421
+
422
+ // Atomic upsert by path. The UNIQUE index on `path` makes this safe
423
+ // under concurrency: two requests for the same new path can't both
424
+ // insert — the second one hits the conflict branch and increments
425
+ // hits instead of failing with a uniqueness error.
378
426
  await this.db
379
427
  .insertInto("_emdash_404_log")
380
428
  .values({
381
429
  id: ulid(),
382
430
  path: entry.path,
383
- referrer: entry.referrer ?? null,
384
- user_agent: entry.userAgent ?? null,
385
- ip: entry.ip ?? null,
386
- created_at: new Date().toISOString(),
431
+ referrer,
432
+ user_agent: userAgent,
433
+ ip,
434
+ hits: 1,
435
+ last_seen_at: now,
436
+ created_at: now,
387
437
  })
438
+ .onConflict((oc) =>
439
+ oc.column("path").doUpdateSet({
440
+ hits: sql`hits + 1`,
441
+ last_seen_at: now,
442
+ referrer,
443
+ user_agent: userAgent,
444
+ ip,
445
+ }),
446
+ )
447
+ .execute();
448
+
449
+ // Enforce the row cap. Cheap when the table is under cap (single
450
+ // COUNT(*) query); evicts oldest rows if we're over. Updates (dedup
451
+ // hits) don't grow the table so this is a no-op for repeat paths.
452
+ await this.enforce404Cap();
453
+ }
454
+
455
+ /**
456
+ * Delete the oldest rows from `_emdash_404_log` if the row count exceeds
457
+ * MAX_404_LOG_ROWS. "Oldest" is by `last_seen_at`, so a path that keeps
458
+ * getting hit stays in the table even if it was first seen long ago.
459
+ *
460
+ * Private — callers use `log404`, which invokes this after every upsert.
461
+ */
462
+ private async enforce404Cap(): Promise<void> {
463
+ const countRow = await this.db
464
+ .selectFrom("_emdash_404_log")
465
+ .select((eb) => eb.fn.countAll<number>().as("c"))
466
+ .executeTakeFirst();
467
+ const count = Number(countRow?.c ?? 0);
468
+ if (count <= MAX_404_LOG_ROWS) return;
469
+
470
+ const excess = count - MAX_404_LOG_ROWS;
471
+
472
+ // Evict the oldest rows in a single SQL statement. Using a subquery
473
+ // (rather than materialising the victim IDs in JS and passing them
474
+ // back as bind parameters) keeps the statement bounded regardless of
475
+ // how far over cap the table is — important for existing installs
476
+ // that crossed the threshold before this cap was introduced.
477
+ await this.db
478
+ .deleteFrom("_emdash_404_log")
479
+ .where(
480
+ "id",
481
+ "in",
482
+ this.db
483
+ .selectFrom("_emdash_404_log")
484
+ .select("id")
485
+ .orderBy("last_seen_at", "asc")
486
+ .orderBy("id", "asc")
487
+ .limit(excess),
488
+ )
388
489
  .execute();
389
490
  }
390
491
 
@@ -438,6 +539,10 @@ export class RedirectRepository {
438
539
  }
439
540
 
440
541
  async get404Summary(limit = 50): Promise<NotFoundSummary[]> {
542
+ // Since rows are now deduped by path, each path has exactly one row
543
+ // with `hits` as the running count and `last_seen_at` as the latest
544
+ // timestamp. The subquery for `top_referrer` collapses to a simple
545
+ // pick of the row's stored referrer (the most recent one seen).
441
546
  const rows = await sql<{
442
547
  path: string;
443
548
  count: number;
@@ -446,14 +551,12 @@ export class RedirectRepository {
446
551
  }>`
447
552
  SELECT
448
553
  path,
449
- COUNT(*) as count,
450
- MAX(created_at) as last_seen,
554
+ SUM(hits) as count,
555
+ MAX(last_seen_at) as last_seen,
451
556
  (
452
557
  SELECT referrer FROM _emdash_404_log AS inner_log
453
558
  WHERE inner_log.path = _emdash_404_log.path
454
559
  AND referrer IS NOT NULL AND referrer != ''
455
- GROUP BY referrer
456
- ORDER BY COUNT(*) DESC
457
560
  LIMIT 1
458
561
  ) as top_referrer
459
562
  FROM _emdash_404_log
@@ -466,6 +466,15 @@ export interface NotFoundLogTable {
466
466
  referrer: string | null;
467
467
  user_agent: string | null;
468
468
  ip: string | null;
469
+ hits: number;
470
+ /**
471
+ * Migration 035 adds this as a nullable column (SQLite can't add a
472
+ * NOT NULL column with a non-constant default to an existing table).
473
+ * The `log404` upsert always writes a value, so new and updated rows
474
+ * always have one, but existing rows pre-migration were backfilled
475
+ * without a NOT NULL constraint. Typed as nullable to match the schema.
476
+ */
477
+ last_seen_at: string | null;
469
478
  created_at: string;
470
479
  }
471
480
 
@@ -19,6 +19,7 @@ import type {
19
19
  } from "./astro/integration/runtime.js";
20
20
  import type { EmDashManifest, ManifestCollection } from "./astro/types.js";
21
21
  import { getAuthMode } from "./auth/mode.js";
22
+ import { getTrustedProxyHeaders } from "./auth/trusted-proxy.js";
22
23
  import { isSqlite } from "./database/dialect-helpers.js";
23
24
  import { kyselyLogOption } from "./database/instrumentation.js";
24
25
  import { runMigrations } from "./database/migrations/runner.js";
@@ -2080,6 +2081,7 @@ export class EmDashRuntime {
2080
2081
  const routeRegistry = new PluginRouteRegistry({
2081
2082
  db: this.db,
2082
2083
  emailPipeline: this.email ?? undefined,
2084
+ trustedProxyHeaders: getTrustedProxyHeaders(this.config),
2083
2085
  });
2084
2086
  routeRegistry.register(trustedPlugin);
2085
2087
 
@@ -2321,7 +2323,7 @@ export class EmDashRuntime {
2321
2323
 
2322
2324
  try {
2323
2325
  const headers = sanitizeHeadersForSandbox(request.headers);
2324
- const meta = extractRequestMeta(request);
2326
+ const meta = extractRequestMeta(request, this.config);
2325
2327
  const result = await plugin.invokeRoute(routeName, body, {
2326
2328
  url: request.url,
2327
2329
  method: request.method,
@@ -4,7 +4,7 @@
4
4
  * Manages available import sources and provides URL probing.
5
5
  */
6
6
 
7
- import { validateExternalUrl } from "./ssrf.js";
7
+ import { resolveAndValidateExternalUrl } from "./ssrf.js";
8
8
  import type { ImportSource, ProbeResult, SourceProbeResult } from "./types.js";
9
9
 
10
10
  // Regex pattern for URL normalization
@@ -63,8 +63,9 @@ export async function probeUrl(url: string): Promise<ProbeResult> {
63
63
  // Remove trailing slash for consistency
64
64
  normalizedUrl = normalizedUrl.replace(TRAILING_SLASHES_PATTERN, "");
65
65
 
66
- // SSRF: reject internal/private network targets
67
- validateExternalUrl(normalizedUrl);
66
+ // SSRF: reject internal/private network targets. DNS resolution
67
+ // catches hostnames that resolve to private addresses.
68
+ await resolveAndValidateExternalUrl(normalizedUrl);
68
69
 
69
70
  const results: SourceProbeResult[] = [];
70
71
  const urlSources = getUrlSources();