npm - @morphika/andami - Versions diffs - 0.5.11 → 0.8.1 - Mend

@morphika/andami 0.5.11 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/app/(site)/[slug]/page.tsx +2 -0
package/app/(site)/layout.tsx +4 -15
package/app/(site)/work/[slug]/page.tsx +4 -0
package/app/admin/settings/page.tsx +136 -132
package/app/api/admin/settings/route.ts +69 -0
package/app/llms.txt/route.ts +142 -0
package/app/robots.ts +73 -54
package/app/sitemap.ts +85 -48
package/components/admin/MetadataEditor.tsx +487 -173
package/components/admin/SERPPreview.tsx +85 -0
package/components/builder/settings-panel/PageSettings.tsx +79 -23
package/components/seo/JsonLd.tsx +50 -0
package/components/seo/ProjectJsonLd.tsx +44 -0
package/components/seo/SiteSeoHead.tsx +66 -0
package/lib/builder/serializer/serializers.ts +1 -0
package/lib/sanity/queries.ts +35 -0
package/lib/sanity/types.ts +30 -0
package/lib/seo/jsonld.ts +174 -0
package/lib/seo/site-settings.ts +37 -0
package/lib/version.ts +1 -1
package/package.json +2 -1
package/sanity/schemas/page.ts +7 -0
package/sanity/schemas/siteSettings.ts +102 -0
package/site/llms-txt.ts +10 -0

package/app/robots.ts CHANGED Viewed

@@ -1,54 +1,73 @@
-import type { MetadataRoute } from "next";
-import { getSiteConfig } from "../lib/config";
-const cfg = getSiteConfig();
-/**
- * robots.txt — Controls crawler access and rate.
- *
- * Crawl-delay (seconds between requests) is honoured by Bing, Yandex, Baidu
- * and most well-behaved bots. Googlebot ignores it but respects the rate
- * configured in Search Console. The 10-second delay drastically reduces
- * serverless CPU usage from bot traffic on Hobby-tier hosting.
- *
- * Aggressive AI scrapers (GPTBot, CCBot, etc.) are blocked entirely.
- */
-export default function robots(): MetadataRoute.Robots {
-  return {
-    rules: [
-      // Block known AI scrapers / aggressive bots
-      {
-        userAgent: "GPTBot",
-        disallow: ["/"],
-      },
-      {
-        userAgent: "CCBot",
-        disallow: ["/"],
-      },
-      {
-        userAgent: "anthropic-ai",
-        disallow: ["/"],
-      },
-      {
-        userAgent: "ClaudeBot",
-        disallow: ["/"],
-      },
-      {
-        userAgent: "Bytespider",
-        disallow: ["/"],
-      },
-      {
-        userAgent: "PetalBot",
-        disallow: ["/"],
-      },
-      // Default: allow with crawl delay
-      {
-        userAgent: "*",
-        allow: "/",
-        disallow: ["/admin/", "/studio/", "/api/admin/", "/api/"],
-        crawlDelay: 10,
-      },
-    ],
-    sitemap: `${cfg.domain}/sitemap.xml`,
-  };
-}
+import type { MetadataRoute } from "next";
+import { getSiteConfig } from "../lib/config";
+const cfg = getSiteConfig();
+/**
+ * robots.txt — Controls crawler access and rate.
+ *
+ * Two-tier AI bot strategy:
+ *
+ *   1. TRAINING bots (block) — crawl pages to train LLMs on the content.
+ *      Blocking them protects IP from being absorbed into model weights.
+ *      Examples: GPTBot, CCBot, Google-Extended, ClaudeBot, anthropic-ai.
+ *
+ *   2. CITATION / ON-DEMAND bots (allow) — fetch URLs in real time when a
+ *      user asks an AI assistant a question. Blocking them means we lose
+ *      citation opportunities in ChatGPT, Perplexity, Claude responses.
+ *      Examples: ChatGPT-User, Perplexity-User, Claude-User, OAI-SearchBot.
+ *
+ *   Distinction matters because we want LLMs to *cite* us, not *learn from*
+ *   us. The bots have different User-Agent strings; granular control via
+ *   per-UA rules.
+ *
+ * Crawl-delay (seconds between requests) is honoured by Bing, Yandex,
+ * Baidu and most well-behaved bots. Googlebot ignores it but respects the
+ * rate configured in Search Console.
+ *
+ * Last reviewed: 2026-05-15.
+ */
+export default function robots(): MetadataRoute.Robots {
+  return {
+    rules: [
+      // ─────────────────────────────────────────────
+      // TRAINING BOTS — explicitly blocked
+      // ─────────────────────────────────────────────
+      { userAgent: "GPTBot", disallow: ["/"] },              // OpenAI training crawler
+      { userAgent: "CCBot", disallow: ["/"] },               // Common Crawl (feeds most LLMs)
+      { userAgent: "Google-Extended", disallow: ["/"] },     // Google's AI training (separate from Googlebot)
+      { userAgent: "anthropic-ai", disallow: ["/"] },        // Older Anthropic training UA
+      { userAgent: "ClaudeBot", disallow: ["/"] },           // Anthropic Claude training
+      { userAgent: "FacebookBot", disallow: ["/"] },         // Meta/LLaMA training
+      { userAgent: "Applebot-Extended", disallow: ["/"] },   // Apple Intelligence training (separate from Applebot which indexes for Siri/Spotlight — that one is allowed implicitly)
+      { userAgent: "Bytespider", disallow: ["/"] },          // ByteDance / TikTok training crawler
+      { userAgent: "PetalBot", disallow: ["/"] },            // Huawei / Petal Search aggressive crawler
+      // ─────────────────────────────────────────────
+      // CITATION / ON-DEMAND BOTS — explicitly allowed
+      // (they're allowed by default anyway, but we list
+      // them to make the policy explicit and to override
+      // any future changes to the catch-all rule.)
+      // ─────────────────────────────────────────────
+      { userAgent: "ChatGPT-User", allow: "/" },             // ChatGPT user-triggered fetches
+      { userAgent: "OAI-SearchBot", allow: "/" },            // OpenAI search index
+      { userAgent: "PerplexityBot", allow: "/" },            // Perplexity index
+      { userAgent: "Perplexity-User", allow: "/" },          // Perplexity user-triggered fetches
+      { userAgent: "Claude-User", allow: "/" },              // Claude user-triggered fetches
+      { userAgent: "Claude-Web", allow: "/" },               // Claude web search
+      { userAgent: "Google-CloudVertexBot", allow: "/" },    // Vertex AI on-demand fetch
+      { userAgent: "YouBot", allow: "/" },                   // You.com AI search
+      // ─────────────────────────────────────────────
+      // DEFAULT — Googlebot, Bingbot, and everyone else
+      // ─────────────────────────────────────────────
+      {
+        userAgent: "*",
+        allow: "/",
+        disallow: ["/admin/", "/studio/", "/api/admin/", "/api/"],
+        crawlDelay: 10,
+      },
+    ],
+    sitemap: `${cfg.domain}/sitemap.xml`,
+  };
+}

package/app/sitemap.ts CHANGED Viewed

@@ -1,48 +1,85 @@
-import type { MetadataRoute } from "next";
-import { client } from "../lib/sanity/client";
-import { allPageSlugsQuery, allProjectSlugsQuery } from "../lib/sanity/queries";
-import { getSiteConfig } from "../lib/config";
-const cfg = getSiteConfig();
-export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
-  const baseUrl = cfg.domain;
-  // Fetch all published slugs
-  const [pageSlugs, projectSlugs] = await Promise.all([
-    client.fetch<string[]>(allPageSlugsQuery).catch(() => [] as string[]),
-    client.fetch<string[]>(allProjectSlugsQuery).catch(() => [] as string[]),
-  ]);
-  // Homepage
-  const routes: MetadataRoute.Sitemap = [
-    {
-      url: baseUrl,
-      lastModified: new Date(),
-      changeFrequency: "weekly",
-      priority: 1,
-    },
-  ];
-  // Dynamic pages (About, Contact, Archive, etc.)
-  for (const slug of pageSlugs) {
-    routes.push({
-      url: `${baseUrl}/${slug}`,
-      lastModified: new Date(),
-      changeFrequency: "monthly",
-      priority: 0.8,
-    });
-  }
-  // Project pages
-  for (const slug of projectSlugs) {
-    routes.push({
-      url: `${baseUrl}/work/${slug}`,
-      lastModified: new Date(),
-      changeFrequency: "monthly",
-      priority: 0.7,
-    });
-  }
-  return routes;
-}
+import type { MetadataRoute } from "next";
+import { client } from "../lib/sanity/client";
+import { allPagesForSitemapQuery, allProjectsForSitemapQuery } from "../lib/sanity/queries";
+import { getSiteConfig } from "../lib/config";
+import { assetUrl } from "../lib/assets";
+import { toAbsoluteUrl } from "../lib/seo/site-settings";
+const cfg = getSiteConfig();
+interface PageSitemapEntry {
+  slug: string;
+  updatedAt: string;
+}
+interface ProjectSitemapEntry extends PageSitemapEntry {
+  thumbnail_path?: string;
+  title?: string;
+  description?: string;
+  published_at?: string;
+}
+/**
+ * sitemap.xml — Dynamic generation from Sanity content.
+ *
+ * Each entry's <lastmod> uses Sanity's _updatedAt field (not the build time),
+ * which is the correct SEO signal: Google deprioritizes sitemaps with all
+ * identical timestamps.
+ *
+ * Project entries include their thumbnail in the `images` array — Next.js
+ * emits this as <image:image> nodes per the Google Image Sitemap protocol,
+ * allowing Google Images to index project thumbnails alongside the page URL.
+ *
+ * Pages/projects with metadata.noindex == true are excluded at the GROQ
+ * level (see lib/sanity/queries.ts).
+ */
+export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
+  const baseUrl = cfg.domain;
+  const [pages, projects] = await Promise.all([
+    client.fetch<PageSitemapEntry[]>(allPagesForSitemapQuery).catch(() => [] as PageSitemapEntry[]),
+    client.fetch<ProjectSitemapEntry[]>(allProjectsForSitemapQuery).catch(() => [] as ProjectSitemapEntry[]),
+  ]);
+  // Homepage — lastModified derived from the most recent page or project update
+  const allTimestamps = [
+    ...pages.map((p) => p.updatedAt),
+    ...projects.map((p) => p.updatedAt),
+  ].filter(Boolean);
+  const homeLastMod = allTimestamps.length > 0
+    ? new Date(Math.max(...allTimestamps.map((t) => new Date(t).getTime())))
+    : new Date();
+  const routes: MetadataRoute.Sitemap = [
+    {
+      url: baseUrl,
+      lastModified: homeLastMod,
+      changeFrequency: "weekly",
+      priority: 1,
+    },
+  ];
+  for (const { slug, updatedAt } of pages) {
+    routes.push({
+      url: `${baseUrl}/${slug}`,
+      lastModified: updatedAt ? new Date(updatedAt) : new Date(),
+      changeFrequency: "monthly",
+      priority: 0.8,
+    });
+  }
+  for (const { slug, updatedAt, thumbnail_path } of projects) {
+    const thumbnailUrl = thumbnail_path
+      ? toAbsoluteUrl(assetUrl(thumbnail_path), baseUrl)
+      : undefined;
+    routes.push({
+      url: `${baseUrl}/work/${slug}`,
+      lastModified: updatedAt ? new Date(updatedAt) : new Date(),
+      changeFrequency: "monthly",
+      priority: 0.7,
+      ...(thumbnailUrl && { images: [thumbnailUrl] }),
+    });
+  }
+  return routes;
+}