@dineway-ai/plugin-seo-graph 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ import type { PublicPageContext, PageMetadataContribution } from "dineway";
2
+
3
+ import type { SeoSettings } from "./settings.js";
4
+
5
+ /**
6
+ * Generate Open Graph and Twitter Card meta contributions.
7
+ * og:type is "article" for all content types (posts, pages, videos).
8
+ */
9
+ /**
10
+ * Bare language codes that need a specific region suffix
11
+ * (where the region doesn't match the language code).
12
+ */
13
+ const FIX_LOCALES: Record<string, string> = {
14
+ ca: "ca_ES",
15
+ en: "en_US",
16
+ el: "el_GR",
17
+ et: "et_EE",
18
+ ja: "ja_JP",
19
+ sq: "sq_AL",
20
+ uk: "uk_UA",
21
+ vi: "vi_VN",
22
+ zh: "zh_CN",
23
+ };
24
+
25
+ /**
26
+ * Complete set of valid Facebook/Open Graph locales.
27
+ */
28
+ const VALID_LOCALES = new Set([
29
+ "af_ZA",
30
+ "ak_GH",
31
+ "am_ET",
32
+ "ar_AR",
33
+ "as_IN",
34
+ "ay_BO",
35
+ "az_AZ",
36
+ "be_BY",
37
+ "bg_BG",
38
+ "bn_IN",
39
+ "bp_IN",
40
+ "br_FR",
41
+ "bs_BA",
42
+ "ca_ES",
43
+ "cb_IQ",
44
+ "ck_US",
45
+ "co_FR",
46
+ "cs_CZ",
47
+ "cx_PH",
48
+ "cy_GB",
49
+ "da_DK",
50
+ "de_DE",
51
+ "el_GR",
52
+ "em_ZM",
53
+ "en_GB",
54
+ "en_PI",
55
+ "en_UD",
56
+ "en_US",
57
+ "eo_EO",
58
+ "es_ES",
59
+ "es_LA",
60
+ "es_MX",
61
+ "et_EE",
62
+ "eu_ES",
63
+ "fa_IR",
64
+ "fb_LT",
65
+ "ff_NG",
66
+ "fi_FI",
67
+ "fo_FO",
68
+ "fr_CA",
69
+ "fr_FR",
70
+ "fy_NL",
71
+ "ga_IE",
72
+ "gl_ES",
73
+ "gn_PY",
74
+ "gu_IN",
75
+ "gx_GR",
76
+ "ha_NG",
77
+ "he_IL",
78
+ "hi_IN",
79
+ "hr_HR",
80
+ "ht_HT",
81
+ "hu_HU",
82
+ "hy_AM",
83
+ "id_ID",
84
+ "ig_NG",
85
+ "ik_US",
86
+ "is_IS",
87
+ "it_IT",
88
+ "iu_CA",
89
+ "ja_JP",
90
+ "ja_KS",
91
+ "jv_ID",
92
+ "ka_GE",
93
+ "kk_KZ",
94
+ "km_KH",
95
+ "kn_IN",
96
+ "ko_KR",
97
+ "ks_IN",
98
+ "ku_TR",
99
+ "ky_KG",
100
+ "la_VA",
101
+ "lg_UG",
102
+ "li_NL",
103
+ "ln_CD",
104
+ "lo_LA",
105
+ "lt_LT",
106
+ "lv_LV",
107
+ "mg_MG",
108
+ "mi_NZ",
109
+ "mk_MK",
110
+ "ml_IN",
111
+ "mn_MN",
112
+ "mr_IN",
113
+ "ms_MY",
114
+ "mt_MT",
115
+ "my_MM",
116
+ "nb_NO",
117
+ "nd_ZW",
118
+ "ne_NP",
119
+ "nl_BE",
120
+ "nl_NL",
121
+ "nn_NO",
122
+ "nr_ZA",
123
+ "ns_ZA",
124
+ "ny_MW",
125
+ "om_ET",
126
+ "or_IN",
127
+ "pa_IN",
128
+ "pl_PL",
129
+ "ps_AF",
130
+ "pt_BR",
131
+ "pt_PT",
132
+ "qc_GT",
133
+ "qr_GR",
134
+ "qu_PE",
135
+ "qz_MM",
136
+ "rm_CH",
137
+ "ro_RO",
138
+ "ru_RU",
139
+ "rw_RW",
140
+ "sa_IN",
141
+ "sc_IT",
142
+ "se_NO",
143
+ "si_LK",
144
+ "sk_SK",
145
+ "sl_SI",
146
+ "sn_ZW",
147
+ "so_SO",
148
+ "sq_AL",
149
+ "sr_RS",
150
+ "ss_SZ",
151
+ "st_ZA",
152
+ "su_ID",
153
+ "sv_SE",
154
+ "sw_KE",
155
+ "sy_SY",
156
+ "sz_PL",
157
+ "ta_IN",
158
+ "te_IN",
159
+ "tg_TJ",
160
+ "th_TH",
161
+ "tk_TM",
162
+ "tl_PH",
163
+ "tl_ST",
164
+ "tn_BW",
165
+ "tr_TR",
166
+ "ts_ZA",
167
+ "tt_RU",
168
+ "tz_MA",
169
+ "uk_UA",
170
+ "ur_PK",
171
+ "uz_UZ",
172
+ "ve_ZA",
173
+ "vi_VN",
174
+ "wo_SN",
175
+ "xh_ZA",
176
+ "yi_DE",
177
+ "yo_NG",
178
+ "zh_CN",
179
+ "zh_HK",
180
+ "zh_TW",
181
+ "zu_ZA",
182
+ "zz_TR",
183
+ ]);
184
+
185
+ /**
186
+ * Convert a locale to a valid Facebook/Open Graph locale.
187
+ *
188
+ * 1. Check bare language codes against known fixes (e.g. "en" -> "en_US")
189
+ * 2. Normalize hyphens to underscores (e.g. "en-GB" -> "en_GB")
190
+ * 3. Expand 2-letter codes to xx_XX format
191
+ * 4. Validate against the full Facebook locale list
192
+ * 5. Fall back to en_US if no valid match
193
+ */
194
+ function toOgLocale(locale: string): string {
195
+ // Known bare-code fixes
196
+ if (FIX_LOCALES[locale]) return FIX_LOCALES[locale];
197
+
198
+ // Normalize hyphens to underscores
199
+ let normalized = locale.replace("-", "_");
200
+
201
+ // Expand bare 2-letter codes to xx_XX
202
+ if (normalized.length === 2) {
203
+ normalized = normalized.toLowerCase() + "_" + normalized.toUpperCase();
204
+ }
205
+
206
+ // If it's a valid Facebook locale, use it
207
+ if (VALID_LOCALES.has(normalized)) return normalized;
208
+
209
+ // Try deriving xx_XX from the language part
210
+ const lang = normalized.substring(0, 2).toLowerCase();
211
+ const derived = lang + "_" + lang.toUpperCase();
212
+ if (VALID_LOCALES.has(derived)) return derived;
213
+
214
+ return "en_US";
215
+ }
216
+
217
+ export function generateOpengraph(
218
+ page: PublicPageContext,
219
+ settings: SeoSettings,
220
+ ogTitle: string,
221
+ description: string | null,
222
+ canonical: string | null,
223
+ locale: string,
224
+ ): PageMetadataContribution[] {
225
+ const contributions: PageMetadataContribution[] = [];
226
+ const path = page.path || "/";
227
+
228
+ // Skip most OG tags on 404
229
+ if (path === "/404") {
230
+ if (page.siteName) {
231
+ contributions.push({ kind: "property", property: "og:site_name", content: page.siteName });
232
+ }
233
+ contributions.push({ kind: "property", property: "og:locale", content: toOgLocale(locale) });
234
+ return contributions;
235
+ }
236
+
237
+ // og:type - "article" for content pages, "website" for archives/homepage
238
+ const isContent = page.kind === "content";
239
+ contributions.push({
240
+ kind: "property",
241
+ property: "og:type",
242
+ content: isContent ? "article" : "website",
243
+ });
244
+
245
+ // og:title
246
+ if (ogTitle) {
247
+ contributions.push({ kind: "property", property: "og:title", content: ogTitle });
248
+ }
249
+
250
+ // og:description
251
+ if (description) {
252
+ contributions.push({ kind: "property", property: "og:description", content: description });
253
+ }
254
+
255
+ // og:image
256
+ if (page.image) {
257
+ contributions.push({ kind: "property", property: "og:image", content: page.image });
258
+ }
259
+
260
+ // og:url
261
+ if (canonical) {
262
+ contributions.push({ kind: "property", property: "og:url", content: canonical });
263
+ }
264
+
265
+ // og:site_name
266
+ if (page.siteName) {
267
+ contributions.push({ kind: "property", property: "og:site_name", content: page.siteName });
268
+ }
269
+
270
+ // og:locale
271
+ contributions.push({ kind: "property", property: "og:locale", content: toOgLocale(locale) });
272
+
273
+ // Article meta
274
+ if (isContent && page.articleMeta) {
275
+ if (page.articleMeta.publishedTime) {
276
+ contributions.push({
277
+ kind: "property",
278
+ property: "article:published_time",
279
+ content: page.articleMeta.publishedTime,
280
+ });
281
+ }
282
+ if (page.articleMeta.modifiedTime) {
283
+ contributions.push({
284
+ kind: "property",
285
+ property: "article:modified_time",
286
+ content: page.articleMeta.modifiedTime,
287
+ });
288
+ }
289
+ if (page.articleMeta.author) {
290
+ contributions.push({
291
+ kind: "property",
292
+ property: "article:author",
293
+ content: page.articleMeta.author,
294
+ });
295
+ }
296
+ }
297
+
298
+ // Twitter Card
299
+ contributions.push({
300
+ kind: "meta",
301
+ name: "twitter:card",
302
+ content: page.image ? "summary_large_image" : "summary",
303
+ });
304
+
305
+ if (ogTitle) {
306
+ contributions.push({ kind: "meta", name: "twitter:title", content: ogTitle });
307
+ }
308
+ if (description) {
309
+ contributions.push({ kind: "meta", name: "twitter:description", content: description });
310
+ }
311
+ if (page.image) {
312
+ contributions.push({ kind: "meta", name: "twitter:image", content: page.image });
313
+ }
314
+
315
+ // Twitter site handle from settings
316
+ const twitterUrl = settings.socials.find(
317
+ (s) => s.includes("twitter.com/") || s.includes("x.com/"),
318
+ );
319
+ if (twitterUrl) {
320
+ const handle = twitterUrl.split("/").pop();
321
+ if (handle) {
322
+ contributions.push({ kind: "meta", name: "twitter:site", content: `@${handle}` });
323
+ }
324
+ }
325
+
326
+ return contributions;
327
+ }
package/src/robots.ts ADDED
@@ -0,0 +1,29 @@
1
+ import type { PublicPageContext } from "dineway";
2
+
3
+ const SNIPPET_DIRECTIVES = "max-snippet:-1, max-image-preview:large, max-video-preview:-1";
4
+
5
+ const NOINDEX_PATHS = new Set(["/search"]);
6
+
7
+ /**
8
+ * Generate meta robots value.
9
+ *
10
+ * - Normal pages: index, follow + snippet directives
11
+ * - Noindex pages: noindex, follow + snippet directives
12
+ * - 404/error: omit entirely (return null)
13
+ */
14
+ export function generateRobots(page: PublicPageContext): string | null {
15
+ const path = page.path || "/";
16
+
17
+ // 404: omit robots entirely
18
+ if (path === "/404") return null;
19
+
20
+ // Check for noindex: explicit setting or known noindex paths
21
+ const explicitRobots = page.seo?.robots || "";
22
+ const isNoindex = explicitRobots.includes("noindex") || NOINDEX_PATHS.has(path);
23
+
24
+ if (isNoindex) {
25
+ return `noindex, follow, ${SNIPPET_DIRECTIVES}`;
26
+ }
27
+
28
+ return `index, follow, ${SNIPPET_DIRECTIVES}`;
29
+ }
@@ -0,0 +1,70 @@
1
+ import { buildArticle as coreBuildArticle } from "@jdevalk/seo-graph-core";
2
+ import type { IdFactory, Reference } from "@jdevalk/seo-graph-core";
3
+ import type { PublicPageContext } from "dineway";
4
+
5
+ import type { SeoSettings } from "../settings.js";
6
+ import { getSiteEntityId } from "./organization.js";
7
+
8
+ /**
9
+ * Build the Article (or BlogPosting) schema node.
10
+ * Output on all content types that support authorship.
11
+ * Required: headline, datePublished, author, publisher.
12
+ */
13
+ export function buildArticle(
14
+ page: PublicPageContext,
15
+ settings: SeoSettings,
16
+ siteName: string,
17
+ canonical: string | null,
18
+ ogTitle: string,
19
+ description: string | null,
20
+ locale: string,
21
+ ids: IdFactory,
22
+ blogId: string | null,
23
+ keywords?: string[],
24
+ articleSection?: string,
25
+ ): Record<string, unknown> | null {
26
+ const pageUrl = canonical || page.url;
27
+
28
+ // Required fields per spec - if missing, don't output
29
+ if (!ogTitle || !page.articleMeta?.publishedTime) return null;
30
+
31
+ const siteEntityId = getSiteEntityId(settings, ids);
32
+ const webPageRef: Reference = { "@id": ids.webPage(pageUrl) };
33
+
34
+ // When a Blog entity exists, link the posting to both WebPage and Blog.
35
+ const isPartOf: Reference | Reference[] = blogId ? [webPageRef, { "@id": blogId }] : webPageRef;
36
+
37
+ // Copyright fields from settings.
38
+ const copyrightHolder: Reference | undefined = settings.copyrightYear
39
+ ? { "@id": siteEntityId }
40
+ : undefined;
41
+
42
+ const piece = coreBuildArticle(
43
+ {
44
+ url: pageUrl,
45
+ isPartOf: isPartOf as Reference,
46
+ author: {
47
+ "@id": ids.person,
48
+ name: settings.personName || siteName,
49
+ },
50
+ publisher: { "@id": siteEntityId },
51
+ headline: ogTitle,
52
+ description: description || "",
53
+ datePublished: new Date(page.articleMeta.publishedTime),
54
+ dateModified: page.articleMeta.modifiedTime
55
+ ? new Date(page.articleMeta.modifiedTime)
56
+ : undefined,
57
+ inLanguage: locale,
58
+ image: page.image ? { "@id": ids.primaryImage(pageUrl) } : undefined,
59
+ copyrightHolder,
60
+ copyrightYear: settings.copyrightYear || undefined,
61
+ license: settings.licenseUrl || undefined,
62
+ keywords: keywords?.length ? keywords.join(", ") : undefined,
63
+ articleSection: articleSection || undefined,
64
+ },
65
+ ids,
66
+ "BlogPosting",
67
+ );
68
+
69
+ return piece;
70
+ }
@@ -0,0 +1,158 @@
1
+ import type { BreadcrumbItem } from "@jdevalk/seo-graph-core";
2
+ import type { PublicPageContext } from "dineway";
3
+
4
+ import type { BreadcrumbRule, SeoSettings } from "../settings.js";
5
+
6
+ const TRAILING_SLASH_RE = /\/$/;
7
+ const TRIM_SLASHES_RE = /^\/+|\/+$/g;
8
+ const PAGINATION_SEGMENT_RE = /^\d+$/;
9
+ const YEAR_SEGMENT_RE = /^\d{4}$/;
10
+ const MONTH_SEGMENT_RE = /^\d{1,2}$/;
11
+ const SEGMENT_SEPARATOR_RE = /[-_]+/g;
12
+ const WORD_START_RE = /\b\w/g;
13
+
14
+ /**
15
+ * Compute the breadcrumb item list for a page, or `null` when no
16
+ * breadcrumb should be emitted (homepage, 404, single-item trails).
17
+ *
18
+ * Priority order:
19
+ * 1. Per-`pageType` rule match (from `settings.breadcrumbRules`)
20
+ * 2. Path derivation from `page.path`, with segment label overrides
21
+ * from `settings.breadcrumbLabels`
22
+ *
23
+ * Trails are always absolute URLs (prefixed with `siteUrl`) and always
24
+ * start with a `Home` crumb.
25
+ */
26
+ export function buildBreadcrumbs(
27
+ page: PublicPageContext,
28
+ settings: SeoSettings,
29
+ siteUrl: string,
30
+ ): BreadcrumbItem[] | null {
31
+ // Homepage and 404 never emit breadcrumbs — single-item trails
32
+ // provide no value and crawlers don't want them.
33
+ const path = page.path || "/";
34
+ if (path === "/" || path === "/404") return null;
35
+
36
+ const baseUrl = siteUrl.replace(TRAILING_SLASH_RE, "");
37
+ const pageUrl = page.canonical || page.url;
38
+
39
+ // Layer 1: rule match by pageType
40
+ const rule = settings.breadcrumbRules[page.pageType];
41
+ if (rule && rule.length > 0) {
42
+ return applyRule(rule, page, baseUrl, pageUrl);
43
+ }
44
+
45
+ // Layer 2: path derivation with label map
46
+ return derivePath(page, settings, baseUrl, pageUrl);
47
+ }
48
+
49
+ function applyRule(
50
+ rule: BreadcrumbRule,
51
+ page: PublicPageContext,
52
+ baseUrl: string,
53
+ pageUrl: string,
54
+ ): BreadcrumbItem[] | null {
55
+ const items: BreadcrumbItem[] = [];
56
+ for (const crumb of rule) {
57
+ const name = crumb.label === "{title}" ? page.title || "" : crumb.label;
58
+ const href = resolveHref(crumb.href, baseUrl, pageUrl);
59
+ items.push({ name, url: href });
60
+ }
61
+ return items.length > 1 ? items : null;
62
+ }
63
+
64
+ /**
65
+ * Resolve a rule's `href` placeholder or relative URL to an absolute one.
66
+ * - Undefined or `{path}` → current page URL
67
+ * - Starts with `/` → prefixed with siteUrl
68
+ * - Anything else → returned as-is (assumed already absolute)
69
+ */
70
+ function resolveHref(href: string | undefined, baseUrl: string, pageUrl: string): string {
71
+ if (!href || href === "{path}") return pageUrl;
72
+ if (href.startsWith("/")) return `${baseUrl}${href}`;
73
+ return href;
74
+ }
75
+
76
+ function derivePath(
77
+ page: PublicPageContext,
78
+ settings: SeoSettings,
79
+ baseUrl: string,
80
+ pageUrl: string,
81
+ ): BreadcrumbItem[] | null {
82
+ const items: BreadcrumbItem[] = [{ name: "Home", url: `${baseUrl}/` }];
83
+
84
+ // Strip leading/trailing slashes and split.
85
+ const trimmed = (page.path || "").replace(TRIM_SLASHES_RE, "");
86
+ if (!trimmed) return null;
87
+
88
+ const segments = trimmed.split("/");
89
+ let accumulated = "";
90
+ const hasTrailingSlash = (page.path || "").endsWith("/");
91
+
92
+ for (let i = 0; i < segments.length; i++) {
93
+ const segment = segments[i];
94
+ if (segment === undefined) continue;
95
+ accumulated += `/${segment}`;
96
+
97
+ if (shouldSkipSegment(segment, segments, i)) {
98
+ continue;
99
+ }
100
+
101
+ const isLast = i === segments.length - 1;
102
+ // Last crumb uses page.title if present (authoritative); earlier
103
+ // crumbs come from the label map or default cleaning.
104
+ const label =
105
+ isLast && page.title
106
+ ? page.title
107
+ : settings.breadcrumbLabels[segment] || defaultCleanSegment(segment);
108
+
109
+ // For the last crumb, use the canonical URL so fragment/query
110
+ // normalization from the canonical plugin is preserved. For
111
+ // intermediate crumbs, build the absolute URL from the segment.
112
+ const url = isLast ? pageUrl : `${baseUrl}${accumulated}${hasTrailingSlash ? "/" : ""}`;
113
+
114
+ items.push({ name: label, url });
115
+ }
116
+
117
+ return items.length > 1 ? items : null;
118
+ }
119
+
120
+ /**
121
+ * Noise segments that should not appear as crumbs:
122
+ * - `/YYYY/` or `/MM/` — year/month archive segments
123
+ * - `/page/N` pagination — both the literal `page` and the number
124
+ *
125
+ * When a segment is skipped, subsequent crumbs still accumulate the
126
+ * URL correctly (so `/blog/2025/my-post` → `Home > Blog > My Post`
127
+ * with the last crumb pointing at the full canonical path).
128
+ */
129
+ function shouldSkipSegment(segment: string, all: string[], index: number): boolean {
130
+ // /.../page/N — both segments
131
+ const next = all[index + 1];
132
+ if (
133
+ segment === "page" &&
134
+ index < all.length - 1 &&
135
+ next !== undefined &&
136
+ PAGINATION_SEGMENT_RE.test(next)
137
+ ) {
138
+ return true;
139
+ }
140
+ const prev = all[index - 1];
141
+ if (index > 0 && prev === "page" && PAGINATION_SEGMENT_RE.test(segment)) {
142
+ return true;
143
+ }
144
+
145
+ // Pure numeric year (4 digits) or month (1-2 digits) archive segments
146
+ if (YEAR_SEGMENT_RE.test(segment)) return true;
147
+ if (MONTH_SEGMENT_RE.test(segment)) return true;
148
+
149
+ return false;
150
+ }
151
+
152
+ /**
153
+ * Default segment cleaner: replace dashes/underscores with spaces and
154
+ * title-case the result. `"open-source"` → `"Open Source"`.
155
+ */
156
+ function defaultCleanSegment(segment: string): string {
157
+ return segment.replace(SEGMENT_SEPARATOR_RE, " ").replace(WORD_START_RE, (c) => c.toUpperCase());
158
+ }
@@ -0,0 +1,69 @@
1
+ import type { PluginContext } from "dineway";
2
+
3
+ import { buildPageUrl } from "../urls.js";
4
+
5
+ /**
6
+ * One entry in the schema map: a URL backed by a published content record.
7
+ */
8
+ export interface SchemaMapEntry {
9
+ /** Absolute URL of the live page. */
10
+ url: string;
11
+ /** Collection slug, for example `posts` or `pages`. */
12
+ collection: string;
13
+ /** ISO-8601 last-modified timestamp. */
14
+ updatedAt: string;
15
+ }
16
+
17
+ /**
18
+ * Enumerate every published URL the site exposes for agent/crawler discovery.
19
+ */
20
+ export async function listSchemaEntries(ctx: PluginContext): Promise<SchemaMapEntry[]> {
21
+ if (!ctx.content) return [];
22
+ const siteUrl = ctx.site.url;
23
+ if (!siteUrl) return [];
24
+
25
+ const { SchemaRegistry, isI18nEnabled, getI18nConfig } = await import("dineway");
26
+ const { getDb } = await import("dineway/runtime");
27
+ const db = await getDb();
28
+ const registry = new SchemaRegistry(db);
29
+ const collections = await registry.listCollections();
30
+
31
+ const cfg =
32
+ isI18nEnabled() && getI18nConfig()
33
+ ? getI18nConfig()!
34
+ : { locales: ["en"], defaultLocale: "en", prefixDefaultLocale: false };
35
+
36
+ const entries: SchemaMapEntry[] = [];
37
+
38
+ for (const collection of collections) {
39
+ if (!collection.urlPattern) continue;
40
+
41
+ let cursor: string | undefined;
42
+ do {
43
+ const page = await ctx.content.list(collection.slug, {
44
+ limit: 100,
45
+ cursor,
46
+ where: { status: "published" },
47
+ });
48
+ for (const item of page.items) {
49
+ if (!item.slug) continue;
50
+ const locale = item.locale || cfg.defaultLocale;
51
+
52
+ const url = buildPageUrl({
53
+ locale,
54
+ slug: item.slug,
55
+ siteUrl,
56
+ cfg,
57
+ urlPattern: collection.urlPattern,
58
+ });
59
+ if (!url) continue;
60
+
61
+ const updatedAt = item.updatedAt || item.createdAt || new Date(0).toISOString();
62
+ entries.push({ url, collection: collection.slug, updatedAt });
63
+ }
64
+ cursor = page.cursor;
65
+ } while (cursor);
66
+ }
67
+
68
+ return entries;
69
+ }