@jant/core 0.3.42 → 0.3.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/bin/commands/import-site.js +1 -1
  2. package/bin/commands/search-reindex.js +175 -0
  3. package/bin/lib/hugo-markdown.js +102 -0
  4. package/bin/lib/site-pull-media.js +1 -4
  5. package/dist/app-Ctl0T0zO.js +5 -0
  6. package/dist/{app-Cu3lveYI.js → app-GbfwoeDJ.js} +630 -123
  7. package/dist/client/.vite/manifest.json +1 -1
  8. package/dist/client/_assets/{client-auth-BRFl5zQA.js → client-auth-CXILhW1b.js} +144 -144
  9. package/dist/{env-wCpMcNXs.js → env-CgaH9Mut.js} +1 -1
  10. package/dist/{github-api-CficQztC.js → github-api-BkRWnqMx.js} +1 -1
  11. package/dist/{github-app-F4qZ05xk.js → github-app-WeadXMb8.js} +1 -1
  12. package/dist/{github-sync-zohnA9qv.js → github-sync-7y_nTXx1.js} +41 -14
  13. package/dist/index.js +5 -5
  14. package/dist/node.js +5 -5
  15. package/dist/{url-FvvgARU9.js → url-umUptr5z.js} +30 -1
  16. package/package.json +1 -1
  17. package/src/__tests__/helpers/app.ts +15 -4
  18. package/src/app.tsx +8 -0
  19. package/src/client/tiptap/__tests__/insert-paragraph-around.test.ts +228 -0
  20. package/src/client/tiptap/extensions.ts +3 -0
  21. package/src/client/tiptap/insert-paragraph-around.ts +79 -0
  22. package/src/db/migrations/0018_yummy_franklin_richards.sql +6 -0
  23. package/src/db/migrations/meta/0018_snapshot.json +2225 -0
  24. package/src/db/migrations/meta/_journal.json +8 -1
  25. package/src/db/migrations/pg/0016_familiar_lionheart.sql +6 -0
  26. package/src/db/migrations/pg/meta/0016_snapshot.json +2840 -0
  27. package/src/db/migrations/pg/meta/_journal.json +8 -1
  28. package/src/db/pg/schema.ts +18 -0
  29. package/src/db/schema.ts +23 -0
  30. package/src/index.ts +1 -2
  31. package/src/lib/__tests__/hosted-signin.test.ts +30 -0
  32. package/src/lib/__tests__/navigation.test.ts +4 -20
  33. package/src/lib/__tests__/rate-limit-d1.test.ts +82 -0
  34. package/src/lib/__tests__/rate-limit-memory.test.ts +69 -0
  35. package/src/lib/__tests__/summary.test.ts +140 -0
  36. package/src/lib/__tests__/view.test.ts +66 -0
  37. package/src/lib/feed.ts +70 -34
  38. package/src/lib/hosted-signin.ts +9 -3
  39. package/src/lib/navigation.ts +11 -12
  40. package/src/lib/post-meta.ts +20 -2
  41. package/src/lib/rate-limit-d1.ts +99 -0
  42. package/src/lib/rate-limit-memory.ts +105 -0
  43. package/src/lib/rate-limit.ts +63 -0
  44. package/src/lib/render.tsx +9 -0
  45. package/src/lib/resolve-config.ts +9 -0
  46. package/src/lib/summary.ts +42 -7
  47. package/src/lib/url.ts +34 -0
  48. package/src/lib/view.ts +42 -8
  49. package/src/middleware/__tests__/auth.test.ts +44 -4
  50. package/src/middleware/__tests__/rate-limit.test.ts +113 -0
  51. package/src/middleware/__tests__/session.test.ts +85 -0
  52. package/src/middleware/auth.ts +62 -25
  53. package/src/middleware/rate-limit.ts +54 -0
  54. package/src/middleware/session.ts +36 -0
  55. package/src/routes/__tests__/compose.test.ts +1 -1
  56. package/src/routes/api/__tests__/search.test.ts +48 -0
  57. package/src/routes/api/__tests__/upload-multipart.test.ts +11 -4
  58. package/src/routes/api/internal/search-reindex.ts +40 -0
  59. package/src/routes/api/search.ts +13 -0
  60. package/src/routes/auth/dev.ts +1 -1
  61. package/src/routes/auth/signin.tsx +23 -5
  62. package/src/routes/dash/settings.tsx +3 -5
  63. package/src/routes/feed/__tests__/sitemap.test.ts +320 -4
  64. package/src/routes/feed/sitemap.ts +208 -33
  65. package/src/routes/pages/__tests__/page-canonical.test.ts +101 -0
  66. package/src/routes/pages/home.tsx +24 -15
  67. package/src/routes/pages/page.tsx +34 -0
  68. package/src/routes/pages/partials.tsx +4 -15
  69. package/src/runtime/cloudflare.ts +4 -0
  70. package/src/runtime/node.ts +16 -0
  71. package/src/services/__tests__/post.test.ts +205 -0
  72. package/src/services/__tests__/search.test.ts +44 -0
  73. package/src/services/export.ts +9 -2
  74. package/src/services/post.ts +200 -2
  75. package/src/types/app-context.ts +20 -0
  76. package/src/types/config.ts +8 -0
  77. package/src/types/props.ts +0 -7
  78. package/src/ui/layouts/BaseLayout.tsx +9 -0
  79. package/dist/app-DzCB4yOp.js +0 -5
@@ -3,10 +3,15 @@ import {
3
3
  getHostedControlPlaneProviderLabel as getConfiguredHostedControlPlaneProviderLabel,
4
4
  getSiteResolutionMode,
5
5
  } from "./env.js";
6
+ import { isSafeInternalRedirect } from "./url.js";
6
7
 
7
- function getHostedAdminContinuationPath(publicRequestUrl: string): string {
8
+ function getHostedAdminContinuationPath(
9
+ publicRequestUrl: string,
10
+ redirect?: string,
11
+ ): string {
8
12
  const currentHost = new URL(publicRequestUrl).host;
9
- return `/auth/handoff/start?host=${encodeURIComponent(currentHost)}&redirect=${encodeURIComponent("/")}`;
13
+ const safeRedirect = isSafeInternalRedirect(redirect) ? redirect : "/";
14
+ return `/auth/handoff/start?host=${encodeURIComponent(currentHost)}&redirect=${encodeURIComponent(safeRedirect)}`;
10
15
  }
11
16
 
12
17
  function buildHostedControlPlaneUrl(
@@ -31,11 +36,12 @@ function buildHostedControlPlaneUrl(
31
36
  export function getHostedControlPlaneSigninUrl(
32
37
  env: object | undefined | null,
33
38
  publicRequestUrl: string,
39
+ redirect?: string,
34
40
  ): string | null {
35
41
  return buildHostedControlPlaneUrl(
36
42
  env,
37
43
  "/auth/handoff/start",
38
- getHostedAdminContinuationPath(publicRequestUrl).replace(
44
+ getHostedAdminContinuationPath(publicRequestUrl, redirect).replace(
39
45
  /^\/auth\/handoff\/start/,
40
46
  "",
41
47
  ),
@@ -60,8 +60,15 @@ export function getHomeDefaultViewFromNavItems(
60
60
  * });
61
61
  * ```
62
62
  */
63
- export async function getNavigationData(c: Context): Promise<NavigationData> {
64
- const items = await c.var.services.navItems.list();
63
+ export async function getNavigationData(
64
+ c: Context,
65
+ options?: { preloadedItems?: NavItem[] },
66
+ ): Promise<NavigationData> {
67
+ // Callers that already fetched nav items (e.g. home route, which needs
68
+ // `homeDefaultView` before deciding which timeline to assemble) can pass
69
+ // them in to avoid a redundant DB round-trip.
70
+ const items =
71
+ options?.preloadedItems ?? (await c.var.services.navItems.list());
65
72
  const currentPath = c.var.publicPath;
66
73
  const appConfig = c.var.appConfig;
67
74
 
@@ -86,17 +93,9 @@ export async function getNavigationData(c: Context): Promise<NavigationData> {
86
93
  // Render footer markdown
87
94
  const siteFooterHtml = siteFooter ? renderMarkdown(siteFooter) : undefined;
88
95
 
89
- // Check auth status (needed for compose button and system nav items)
90
- let isAuthenticated = false;
96
+ // Auth state is populated once per request by `attachSession` middleware.
97
+ const isAuthenticated = c.var.isAuthenticated;
91
98
  let collections: Collection[] = [];
92
- try {
93
- const session = await c.var.auth.api.getSession({
94
- headers: c.req.raw.headers,
95
- });
96
- isAuthenticated = !!session?.user;
97
- } catch {
98
- // Not authenticated
99
- }
100
99
 
101
100
  // Compute freshness for collection nav items
102
101
  const collectionNavIds: string[] = [];
@@ -1,9 +1,27 @@
1
1
  import type { Post } from "../types.js";
2
2
  import { extractDisplayDomain } from "./url.js";
3
+ import { extractBodyText } from "./summary.js";
3
4
 
4
5
  const TITLE_MAX_CHARS = 72;
5
6
  const DESCRIPTION_MAX_CHARS = 160;
6
7
 
8
+ /**
9
+ * Derive a clean plain-text projection of the body for human-facing meta.
10
+ *
11
+ * We cannot reuse `post.bodyText` here: that column is written with
12
+ * `includeLinkHrefs: true` so inline link URLs land in the FTS index, which
13
+ * pollutes the stored text with trailing URLs. Re-derive from the source
14
+ * TipTap JSON (`post.body`) without that option.
15
+ */
16
+ function getCleanBodyText(post: Post): string {
17
+ // Prefer re-derivation from `post.body` (TipTap JSON). Fall back to
18
+ // `post.bodyText` only when `body` is absent (legacy rows / fixtures);
19
+ // without link marks in the source, there is no URL pollution to worry
20
+ // about.
21
+ if (post.body) return extractBodyText(post.body) ?? "";
22
+ return post.bodyText ?? "";
23
+ }
24
+
7
25
  function normalizeText(text: string | null | undefined): string {
8
26
  return (text ?? "").replace(/\s+/g, " ").trim();
9
27
  }
@@ -49,7 +67,7 @@ function getTitleCandidate(post: Post): string {
49
67
  const summarySnippet = getFirstParagraph(post.summary);
50
68
  if (summarySnippet) return clipText(summarySnippet, TITLE_MAX_CHARS);
51
69
 
52
- const bodySnippet = getFirstParagraph(post.bodyText);
70
+ const bodySnippet = getFirstParagraph(getCleanBodyText(post));
53
71
  if (bodySnippet) return clipText(bodySnippet, TITLE_MAX_CHARS);
54
72
 
55
73
  if (post.format === "link" && post.url) {
@@ -68,7 +86,7 @@ function getDescriptionCandidate(post: Post): string {
68
86
  const summaryText = normalizeText(post.summary);
69
87
  if (summaryText) return clipText(summaryText, DESCRIPTION_MAX_CHARS);
70
88
 
71
- const bodyText = normalizeText(post.bodyText);
89
+ const bodyText = normalizeText(getCleanBodyText(post));
72
90
  if (bodyText) return clipText(bodyText, DESCRIPTION_MAX_CHARS);
73
91
 
74
92
  const quoteText = normalizeText(post.quoteText);
@@ -0,0 +1,99 @@
1
+ /**
2
+ * D1 Sliding-Window Rate Limiter
3
+ *
4
+ * Used by the Cloudflare Workers runtime where isolates are ephemeral and
5
+ * memory-based limiters would silently drop state between requests. Each
6
+ * check performs one SELECT (over a two-row range) plus one UPSERT — both
7
+ * hit a composite primary key so the round-trips are cheap.
8
+ *
9
+ * Algorithm: two-window weighted counter. The previous window's tally
10
+ * decays linearly as the current window fills, giving a smoother limit
11
+ * than a naive fixed window (which would allow a 2x burst at the
12
+ * boundary) while remaining a single-key storage primitive.
13
+ *
14
+ * For Node deployments use `createMemoryRateLimiter` instead.
15
+ */
16
+
17
+ import { and, eq, inArray, lt, sql } from "drizzle-orm";
18
+ import type { Database } from "../db/index.js";
19
+ import type { DatabaseSchema } from "../db/schema-bundle.js";
20
+ import type {
21
+ RateLimitCheckOptions,
22
+ RateLimitResult,
23
+ RateLimiter,
24
+ } from "./rate-limit.js";
25
+
26
+ /**
27
+ * Probability of running opportunistic cleanup on any given write.
28
+ * 1% strikes a balance between bounded table growth and per-request cost.
29
+ */
30
+ const CLEANUP_PROBABILITY = 0.01;
31
+
32
+ export function createD1RateLimiter(
33
+ db: Database,
34
+ schema: DatabaseSchema,
35
+ now: () => number = () => Math.floor(Date.now() / 1000),
36
+ ): RateLimiter {
37
+ const { rateLimit } = schema;
38
+
39
+ return {
40
+ async check(
41
+ key: string,
42
+ opts: RateLimitCheckOptions,
43
+ ): Promise<RateLimitResult> {
44
+ const { limit, windowSec } = opts;
45
+ const nowSec = now();
46
+ const currentWindow = Math.floor(nowSec / windowSec) * windowSec;
47
+ const previousWindow = currentWindow - windowSec;
48
+
49
+ const rows = await db
50
+ .select({
51
+ windowStart: rateLimit.windowStart,
52
+ count: rateLimit.count,
53
+ })
54
+ .from(rateLimit)
55
+ .where(
56
+ and(
57
+ eq(rateLimit.key, key),
58
+ inArray(rateLimit.windowStart, [currentWindow, previousWindow]),
59
+ ),
60
+ );
61
+
62
+ let currentCount = 0;
63
+ let previousCount = 0;
64
+ for (const row of rows) {
65
+ if (row.windowStart === currentWindow) currentCount = row.count;
66
+ else if (row.windowStart === previousWindow) previousCount = row.count;
67
+ }
68
+
69
+ const elapsed = nowSec - currentWindow;
70
+ const prevWeight = 1 - elapsed / windowSec;
71
+ const estimate = previousCount * prevWeight + currentCount;
72
+
73
+ if (estimate >= limit) {
74
+ // Don't record the rejected hit — otherwise a sustained flood
75
+ // would keep increasing `count` past the limit for no benefit.
76
+ const retryAfterSec = Math.max(1, windowSec - elapsed);
77
+ return { ok: false, retryAfterSec };
78
+ }
79
+
80
+ await db
81
+ .insert(rateLimit)
82
+ .values({ key, windowStart: currentWindow, count: 1 })
83
+ .onConflictDoUpdate({
84
+ target: [rateLimit.key, rateLimit.windowStart],
85
+ set: { count: sql`${rateLimit.count} + 1` },
86
+ });
87
+
88
+ // Opportunistic cleanup: keep the table bounded without writing
89
+ // a DELETE on every request.
90
+ if (Math.random() < CLEANUP_PROBABILITY) {
91
+ await db
92
+ .delete(rateLimit)
93
+ .where(lt(rateLimit.windowStart, currentWindow - windowSec * 2));
94
+ }
95
+
96
+ return { ok: true };
97
+ },
98
+ };
99
+ }
@@ -0,0 +1,105 @@
1
+ /**
2
+ * In-Memory Rate Limiter
3
+ *
4
+ * Used by the Node runtime. The server process is long-lived and
5
+ * single-instance, so a local `Map` is reliable and avoids unnecessary DB
6
+ * round-trips. Uses the classic sliding-window-counter algorithm (two
7
+ * aligned buckets with a weighted estimate) for smooth limiting without
8
+ * the 2x boundary burst of a fixed window.
9
+ *
10
+ * On Cloudflare Workers use `createD1RateLimiter` instead — isolates are
11
+ * ephemeral and cannot share memory across requests.
12
+ */
13
+
14
+ import type {
15
+ RateLimitCheckOptions,
16
+ RateLimitResult,
17
+ RateLimiter,
18
+ } from "./rate-limit.js";
19
+
20
+ interface Bucket {
21
+ /** Unix seconds, aligned to the start of the current window. */
22
+ windowStart: number;
23
+ /** Hits recorded in the current window. */
24
+ count: number;
25
+ /** Hits recorded in the previous window (used for weighted estimate). */
26
+ prevCount: number;
27
+ }
28
+
29
+ /**
30
+ * Number of live keys after which we prune old buckets on the next write.
31
+ * Keeps the Map bounded under abuse without paying for eager sweeps.
32
+ */
33
+ const SWEEP_THRESHOLD = 10_000;
34
+
35
+ /**
36
+ * Creates an isolated in-memory limiter. Tests construct one per test app;
37
+ * the Node runtime holds a single module-level instance across requests.
38
+ *
39
+ * `now` is injectable so tests can assert window-rollover behavior without
40
+ * relying on real time.
41
+ */
42
+ export function createMemoryRateLimiter(
43
+ now: () => number = () => Math.floor(Date.now() / 1000),
44
+ ): RateLimiter {
45
+ const buckets = new Map<string, Bucket>();
46
+
47
+ function sweep(nowSec: number) {
48
+ if (buckets.size < SWEEP_THRESHOLD) return;
49
+ // Drop entries whose window is older than 2 windows in the past. We
50
+ // use the bucket's stored windowSize via the difference between prev
51
+ // hits existing and the current time; since the sweep runs rarely we
52
+ // simply drop anything with windowStart < nowSec - 2 * largestWindow.
53
+ // In practice callers use a single window size; we approximate by
54
+ // dropping anything more than 10 minutes stale, which is generous.
55
+ const cutoff = nowSec - 600;
56
+ for (const [key, bucket] of buckets) {
57
+ if (bucket.windowStart < cutoff) buckets.delete(key);
58
+ }
59
+ }
60
+
61
+ return {
62
+ async check(
63
+ key: string,
64
+ opts: RateLimitCheckOptions,
65
+ ): Promise<RateLimitResult> {
66
+ const { limit, windowSec } = opts;
67
+ const nowSec = now();
68
+ const currentWindow = Math.floor(nowSec / windowSec) * windowSec;
69
+
70
+ let bucket = buckets.get(key);
71
+ if (!bucket) {
72
+ bucket = { windowStart: currentWindow, count: 0, prevCount: 0 };
73
+ buckets.set(key, bucket);
74
+ } else if (bucket.windowStart !== currentWindow) {
75
+ // Roll forward: if exactly one window ago, preserve prev count for
76
+ // the weighted estimate; otherwise treat the gap as cold-start.
77
+ if (bucket.windowStart === currentWindow - windowSec) {
78
+ bucket.prevCount = bucket.count;
79
+ } else {
80
+ bucket.prevCount = 0;
81
+ }
82
+ bucket.count = 0;
83
+ bucket.windowStart = currentWindow;
84
+ }
85
+
86
+ const elapsed = nowSec - currentWindow;
87
+ const prevWeight = 1 - elapsed / windowSec;
88
+ const estimate = bucket.prevCount * prevWeight + bucket.count;
89
+
90
+ if (estimate >= limit) {
91
+ // Suggest waiting until the current window ends. This is
92
+ // deliberately coarse; a more precise retry-after would require
93
+ // computing when the weighted estimate drops back under the
94
+ // limit, which is more complexity than this DoS-mitigation
95
+ // feature warrants.
96
+ const retryAfterSec = Math.max(1, windowSec - elapsed);
97
+ return { ok: false, retryAfterSec };
98
+ }
99
+
100
+ bucket.count += 1;
101
+ sweep(nowSec);
102
+ return { ok: true };
103
+ },
104
+ };
105
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Rate Limiting Abstraction
3
+ *
4
+ * Shared interface for per-key rate limiting. Runtimes provide their own
5
+ * implementation: Cloudflare Workers uses a D1-backed sliding-window table
6
+ * (ephemeral isolates can't hold memory state), while Node uses an
7
+ * in-process Map (the process is persistent and avoids DB round-trips).
8
+ *
9
+ * Consumers depend only on this interface; they are runtime-agnostic.
10
+ */
11
+
12
+ import type { Context } from "hono";
13
+
14
+ export interface RateLimitCheckOptions {
15
+ /** Max requests allowed within `windowSec`. */
16
+ limit: number;
17
+ /** Sliding window size in seconds. */
18
+ windowSec: number;
19
+ }
20
+
21
+ export interface RateLimitResult {
22
+ /** True when the request is under the limit (already counted). */
23
+ ok: boolean;
24
+ /**
25
+ * When `ok` is false, suggested seconds the client should wait before
26
+ * retrying. Implementations may return the full window as a safe default.
27
+ */
28
+ retryAfterSec?: number;
29
+ }
30
+
31
+ export interface RateLimiter {
32
+ /**
33
+ * Records a hit against `key` and reports whether the request is under
34
+ * the configured limit. Implementations must be race-safe enough that
35
+ * concurrent callers cannot durably exceed the limit.
36
+ */
37
+ check(key: string, opts: RateLimitCheckOptions): Promise<RateLimitResult>;
38
+ }
39
+
40
+ /**
41
+ * Extracts the client IP from a Hono request context.
42
+ *
43
+ * On Cloudflare Workers, `cf-connecting-ip` is set by the edge and is
44
+ * authoritative. On Node deployments we fall back to the leftmost
45
+ * `x-forwarded-for` entry, which is the conventional client IP when the
46
+ * app sits behind a single trusted proxy. When neither header is
47
+ * available we return `"unknown"` so all such requests share a bucket —
48
+ * preferable to skipping the rate limit entirely.
49
+ *
50
+ * Note: this helper does not verify proxy trust. It is used for DoS
51
+ * protection, not authentication. If header-forgery resistance becomes
52
+ * important, gate the `x-forwarded-for` branch on `shouldTrustProxy`.
53
+ */
54
+ export function getClientIp(c: Context): string {
55
+ const cf = c.req.header("cf-connecting-ip");
56
+ if (cf) return cf;
57
+ const fwd = c.req.header("x-forwarded-for");
58
+ if (fwd) {
59
+ const first = fwd.split(",")[0]?.trim();
60
+ if (first) return first;
61
+ }
62
+ return "unknown";
63
+ }
@@ -24,6 +24,13 @@ export interface RenderPublicPageOptions {
24
24
  appleTouchHref?: string;
25
25
  /** Optional explicit social image href */
26
26
  socialImageUrl?: string;
27
+ /**
28
+ * Absolute canonical URL for this page. Forwarded to `BaseLayout` and
29
+ * rendered as `<link rel="canonical">`. Only set when the page has a
30
+ * different canonical location (e.g. thread reply pages point back to the
31
+ * thread root).
32
+ */
33
+ canonicalHref?: string;
27
34
  /** Navigation data (from getNavigationData) */
28
35
  navData: NavigationData;
29
36
  /** Page content JSX to render inside SiteLayout */
@@ -66,6 +73,7 @@ export function renderPublicPage(c: Context, options: RenderPublicPageOptions) {
66
73
  faviconHref,
67
74
  appleTouchHref,
68
75
  socialImageUrl,
76
+ canonicalHref,
69
77
  navData,
70
78
  content,
71
79
  sidebar,
@@ -116,6 +124,7 @@ export function renderPublicPage(c: Context, options: RenderPublicPageOptions) {
116
124
  faviconHref={faviconHref}
117
125
  appleTouchHref={appleTouchHref}
118
126
  socialImageUrl={socialImageUrl}
127
+ canonicalHref={canonicalHref}
119
128
  faviconUrl={faviconUrl}
120
129
  faviconVersion={faviconVersion}
121
130
  noindex={noindex}
@@ -228,6 +228,15 @@ export function resolveConfig(
228
228
  siteAvatarUrl,
229
229
  faviconVersion: allSettings["SITE_FAVICON_VERSION"] ?? "",
230
230
 
231
+ // Rate limiting (ENV only). Defaults are conservative enough for a
232
+ // human typing in the search UI but reject bot floods.
233
+ rateLimit: {
234
+ disabled: getEnvString(env, "RATE_LIMIT_DISABLED") === "true",
235
+ searchPerMinute:
236
+ parseInt(getEnvString(env, "RATE_LIMIT_SEARCH_PER_MIN") ?? "30", 10) ||
237
+ 30,
238
+ },
239
+
231
240
  // Settings form placeholders (ENV > Default, without DB)
232
241
  fallbacks: {
233
242
  siteName: resolveFallback("SITE_NAME", env),
@@ -93,15 +93,25 @@ const SEARCHABLE_TYPES = new Set([
93
93
  * matching.
94
94
  *
95
95
  * @param bodyJson - TipTap JSON string (the `body` column)
96
+ * @param options.includeLinkHrefs
97
+ * When `true`, URLs from inline link marks are appended after the link text
98
+ * so they get indexed for search. Default `false` keeps the output clean for
99
+ * plain-text consumers like `toPlainText`/`extractTitle`.
96
100
  * @returns Plain text for FTS indexing, or null if parsing fails or doc is empty
97
101
  *
98
102
  * @example
99
103
  * ```ts
100
104
  * const text = extractBodyText(body);
101
105
  * // "Hello world Some code here"
106
+ *
107
+ * const indexed = extractBodyText(body, { includeLinkHrefs: true });
108
+ * // "See this page https://example.com"
102
109
  * ```
103
110
  */
104
- export function extractBodyText(bodyJson: string): string | null {
111
+ export function extractBodyText(
112
+ bodyJson: string,
113
+ options: { includeLinkHrefs?: boolean } = {},
114
+ ): string | null {
105
115
  let doc: TiptapNode;
106
116
  try {
107
117
  doc = JSON.parse(bodyJson) as TiptapNode;
@@ -111,9 +121,23 @@ export function extractBodyText(bodyJson: string): string | null {
111
121
 
112
122
  if (doc.type !== "doc" || !doc.content) return null;
113
123
 
124
+ const includeLinkHrefs = options.includeLinkHrefs === true;
125
+
114
126
  function collectText(node: TiptapNode): string {
115
127
  if (!SEARCHABLE_TYPES.has(node.type)) return "";
116
- if (node.type === "text") return node.text ?? "";
128
+ if (node.type === "text") {
129
+ const text = node.text ?? "";
130
+ if (!includeLinkHrefs || !node.marks || node.marks.length === 0) {
131
+ return text;
132
+ }
133
+ const hrefs: string[] = [];
134
+ for (const mark of node.marks) {
135
+ if (mark.type !== "link") continue;
136
+ const href = mark.attrs?.href;
137
+ if (typeof href === "string" && href.trim()) hrefs.push(href);
138
+ }
139
+ return hrefs.length > 0 ? `${text} ${hrefs.join(" ")}` : text;
140
+ }
117
141
  if (node.type === "hardBreak") return " ";
118
142
  if (!node.content) return "";
119
143
  return node.content.map(collectText).join(" ");
@@ -190,19 +214,22 @@ export function extractSummary(
190
214
  * @param bodyJson - Tiptap JSON string
191
215
  * @param maxBlocks - Maximum number of top-level blocks to include
192
216
  * @param maxChars - Maximum total plain-text character count
193
- * @returns HTML summary and whether content was truncated, or null
217
+ * @returns HTML summary, whether content was truncated, and the index in
218
+ * `doc.content` where the content after the summary boundary begins, or null.
219
+ * `breakAtIndex` lets callers align the summary with the full-body rendering
220
+ * when splitting at the "read more" boundary (e.g. to insert an anchor).
194
221
  *
195
222
  * @example
196
223
  * ```ts
197
224
  * const result = extractSummaryHtml(body, 5, 500);
198
- * // { html: "<ul><li><p>Item</p></li></ul>", hasMore: true }
225
+ * // { html: "<ul><li><p>Item</p></li></ul>", hasMore: true, breakAtIndex: 1 }
199
226
  * ```
200
227
  */
201
228
  export function extractSummaryHtml(
202
229
  bodyJson: string,
203
230
  maxBlocks: number = 5,
204
231
  maxChars: number = 500,
205
- ): { html: string; hasMore: boolean } | null {
232
+ ): { html: string; hasMore: boolean; breakAtIndex: number } | null {
206
233
  let doc: TiptapNode;
207
234
  try {
208
235
  doc = JSON.parse(bodyJson) as TiptapNode;
@@ -231,15 +258,21 @@ export function extractSummaryHtml(
231
258
  return {
232
259
  html: renderTiptapDocument(subDoc),
233
260
  hasMore: true,
261
+ // Anchor goes in place of the moreBreak marker, so the marker itself
262
+ // is NOT part of the pre-anchor body. It remains in the post-anchor
263
+ // body as an inert HTML comment.
264
+ breakAtIndex: moreBreakIdx,
234
265
  };
235
266
  }
236
267
 
237
268
  // No moreBreak — accumulate blocks up to limits
238
269
  const selected: TiptapNode[] = [];
239
270
  let totalChars = 0;
271
+ let lastSelectedIdx = -1;
240
272
 
241
- for (const node of nodes) {
242
- if (!SUMMARY_BLOCK_TYPES.has(node.type)) continue;
273
+ for (let i = 0; i < nodes.length; i++) {
274
+ const node = nodes[i];
275
+ if (!node || !SUMMARY_BLOCK_TYPES.has(node.type)) continue;
243
276
 
244
277
  const text = extractPlainText(node).trim();
245
278
  if (
@@ -250,6 +283,7 @@ export function extractSummaryHtml(
250
283
 
251
284
  selected.push(node);
252
285
  totalChars += text.length;
286
+ lastSelectedIdx = i;
253
287
  }
254
288
 
255
289
  if (selected.length === 0) return null;
@@ -261,5 +295,6 @@ export function extractSummaryHtml(
261
295
  return {
262
296
  html: renderTiptapDocument(subDoc),
263
297
  hasMore: selected.length < totalContentNodes,
298
+ breakAtIndex: lastSelectedIdx + 1,
264
299
  };
265
300
  }
package/src/lib/url.ts CHANGED
@@ -303,6 +303,40 @@ export function toPublicHref(href: string, sitePathPrefix = ""): string {
303
303
  return toPublicPath(href, sitePathPrefix);
304
304
  }
305
305
 
306
+ /**
307
+ * Check whether a path is a safe same-origin redirect target.
308
+ *
309
+ * Accepts only paths that start with a single `/` (no protocol-relative
310
+ * `//host`, no scheme, no control characters). Callers should use this to
311
+ * validate user-supplied `redirect` query parameters before issuing a
312
+ * `Location` header.
313
+ *
314
+ * @param path - Candidate redirect path
315
+ * @returns `true` when the path is safe to use as an internal redirect
316
+ *
317
+ * @example
318
+ * ```ts
319
+ * isSafeInternalRedirect("/settings") // true
320
+ * isSafeInternalRedirect("//evil.example") // false
321
+ * isSafeInternalRedirect("https://evil.example") // false
322
+ * ```
323
+ */
324
+ export function isSafeInternalRedirect(
325
+ path: string | null | undefined,
326
+ ): path is string {
327
+ if (typeof path !== "string") return false;
328
+ if (!path.startsWith("/")) return false;
329
+ if (path.startsWith("//")) return false;
330
+ // Disallow backslash-prefixed paths (some browsers treat `/\host` as
331
+ // protocol-relative) and control characters that could smuggle headers.
332
+ if (path.startsWith("/\\")) return false;
333
+ for (let i = 0; i < path.length; i += 1) {
334
+ const code = path.charCodeAt(i);
335
+ if (code < 0x20 || code === 0x7f) return false;
336
+ }
337
+ return true;
338
+ }
339
+
306
340
  /**
307
341
  * Remove the site path prefix from a public request pathname.
308
342
  *
package/src/lib/view.ts CHANGED
@@ -35,7 +35,8 @@ import {
35
35
  } from "./time.js";
36
36
  import { getCollectionPagePath } from "./collection-paths.js";
37
37
  import { getMediaUrl, getImageUrl, getPublicUrlForProvider } from "./image.js";
38
- import { extractSummaryHtml } from "./summary.js";
38
+ import { extractSummaryHtml, extractBodyText } from "./summary.js";
39
+ import { renderTiptapDocument } from "./tiptap-render.js";
39
40
  import { highlightText } from "./search-snippet.js";
40
41
  import { toPublicPath } from "./url.js";
41
42
 
@@ -159,8 +160,17 @@ function getPlainSummary(post: PostWithMedia): string | undefined {
159
160
  return normalizePreviewText(post.quoteText);
160
161
  }
161
162
 
163
+ // `post.bodyText` is written with `includeLinkHrefs: true` for FTS search
164
+ // indexing, so it's polluted with trailing link URLs. For human-facing
165
+ // preview text, prefer a clean re-derivation from the source TipTap JSON.
166
+ // Fall back to `post.bodyText` when the body isn't valid JSON (legacy rows
167
+ // or fixtures); that path predates link-href injection and carries no
168
+ // pollution risk.
169
+ const cleanBody = post.body ? extractBodyText(post.body) : null;
170
+
162
171
  return (
163
172
  normalizePreviewText(post.summary) ||
173
+ normalizePreviewText(cleanBody) ||
164
174
  normalizePreviewText(post.bodyText) ||
165
175
  getLegacyBodyPreview(post) ||
166
176
  normalizePreviewText(post.url)
@@ -216,14 +226,38 @@ export function toPostView(
216
226
  summaryHasMore = result.hasMore;
217
227
 
218
228
  // Inject #continue anchor at the excerpt boundary for scroll targeting.
219
- // Both summaryHtml and bodyHtml are rendered by the same renderTiptapJson,
220
- // so the excerpt HTML is a prefix of bodyHtml.
229
+ // The summary HTML is NOT a byte-prefix of bodyHtml — structural nodes
230
+ // like `horizontalRule` and `moreBreak` appear in bodyHtml but are
231
+ // excluded from the summary, so slicing bodyHtml by summary.length lands
232
+ // mid-tag and corrupts the markup. Instead, render the pre-boundary
233
+ // doc slice and splice the anchor at that exact block boundary.
221
234
  if (result.hasMore && post.bodyHtml) {
222
- const pos = result.html.length;
223
- bodyHtmlWithAnchor =
224
- post.bodyHtml.slice(0, pos) +
225
- '<span id="continue"></span>' +
226
- post.bodyHtml.slice(pos);
235
+ try {
236
+ const doc = JSON.parse(post.body) as {
237
+ type?: string;
238
+ content?: unknown[];
239
+ };
240
+ if (
241
+ doc.type === "doc" &&
242
+ Array.isArray(doc.content) &&
243
+ result.breakAtIndex > 0 &&
244
+ result.breakAtIndex <= doc.content.length
245
+ ) {
246
+ const beforeHtml = renderTiptapDocument({
247
+ type: "doc",
248
+ content: doc.content.slice(0, result.breakAtIndex) as never[],
249
+ });
250
+ if (post.bodyHtml.startsWith(beforeHtml)) {
251
+ bodyHtmlWithAnchor =
252
+ beforeHtml +
253
+ '<span id="continue"></span>' +
254
+ post.bodyHtml.slice(beforeHtml.length);
255
+ }
256
+ }
257
+ } catch {
258
+ // Fallback: leave bodyHtml untouched if the split can't be computed
259
+ // safely. Better no anchor than a broken document.
260
+ }
227
261
  }
228
262
  }
229
263
  }