npm - @ontosdk/next - Versions diffs - 1.2.0 → 1.3.0 - Mend

@ontosdk/next 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/src/OntoHead.tsx ADDED Viewed

@@ -0,0 +1,59 @@
+'use client';
+import { usePathname } from 'next/navigation';
+/**
+ * OntoHead — Auto-Discovery component for AI agents.
+ *
+ * Injects `<link rel="alternate">` tags into the page `<head>` so AI crawlers
+ * can discover the optimized markdown endpoint for the current route.
+ *
+ * Usage in a Next.js App Router layout:
+ * ```tsx
+ * import { OntoHead } from '@ontosdk/next/components';
+ *
+ * export default function RootLayout({ children }) {
+ *   return (
+ *     <html>
+ *       <head>
+ *         <OntoHead />
+ *       </head>
+ *       <body>{children}</body>
+ *     </html>
+ *   );
+ * }
+ * ```
+ */
+export function OntoHead() {
+    const pathname = usePathname();
+    // Map current route to its .onto markdown payload path
+    let payloadPath = pathname;
+    if (payloadPath === '/' || payloadPath === '') {
+        payloadPath = '/index';
+    }
+    if (payloadPath.endsWith('/') && payloadPath !== '/') {
+        payloadPath = payloadPath.slice(0, -1);
+    }
+    const markdownHref = `/.onto${payloadPath}.md`;
+    return (
+        <>
+            {/* Per-page markdown alternate for AI agents */}
+            <link
+                rel="alternate"
+                type="text/markdown"
+                href={markdownHref}
+                title="AI-optimized Markdown version"
+            />
+            {/* Site-wide llms.txt manifest */}
+            <link
+                rel="alternate"
+                type="text/plain"
+                href="/llms.txt"
+                title="LLM-readable site manifest"
+            />
+        </>
+    );
+}

package/src/OntoProvider.tsx ADDED Viewed

@@ -0,0 +1,95 @@
+'use client';
+import { usePathname } from 'next/navigation';
+import { ReactNode, useMemo } from 'react';
+import type { OntoConfig, PageType } from './config';
+import { generateSchemaForPageType, serializeSchema } from './schemas';
+export interface OntoProviderProps {
+  /**
+   * The base URL of your site (e.g., 'https://example.com')
+   * Used to construct the full href for the AI discovery link tag.
+   */
+  baseUrl: string;
+  /**
+   * Child components to render
+   */
+  children: ReactNode;
+  /**
+   * Optional: Onto configuration for automatic JSON-LD schema injection
+   * If provided, the provider will automatically inject JSON-LD schemas
+   * based on the page type configuration
+   */
+  config?: OntoConfig;
+}
+/**
+ * OntoProvider — Automatic AI Discovery Provider
+ *
+ * Wraps your application and automatically injects:
+ * 1. `<link rel="alternate">` tags for AI discovery
+ * 2. JSON-LD structured data schemas based on page type
+ *
+ * With config, automatically generates JSON-LD schemas:
+ * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)
+ * - 'about' pages get Organization/AboutPage schema
+ *
+ * Usage in a Next.js App Router layout:
+ * ```tsx
+ * import { OntoProvider } from '@ontosdk/next/provider';
+ * import config from '../onto.config';
+ *
+ * export default function RootLayout({ children }) {
+ *   return (
+ *     <OntoProvider baseUrl="https://example.com" config={config}>
+ *       <html>
+ *         <head />
+ *         <body>{children}</body>
+ *       </html>
+ *     </OntoProvider>
+ *   );
+ * }
+ * ```
+ */
+export function OntoProvider({ baseUrl, children, config }: OntoProviderProps) {
+  const pathname = usePathname();
+  // Construct the full URL with the current path and ?format=md query string
+  const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;
+  const markdownHref = `${cleanBaseUrl}${pathname}?format=md`;
+  const fullPageUrl = `${cleanBaseUrl}${pathname}`;
+  // Determine page type from config routes
+  const pageType: PageType = useMemo(() => {
+    if (!config?.routes) return 'default';
+    const matchingRoute = config.routes.find(route => route.path === pathname);
+    return matchingRoute?.pageType || 'default';
+  }, [config, pathname]);
+  // Generate JSON-LD schema based on page type
+  const jsonLdSchema = useMemo(() => {
+    if (!config || pageType === 'default') return null;
+    const schema = generateSchemaForPageType(pageType, config, fullPageUrl);
+    return serializeSchema(schema);
+  }, [config, pageType, fullPageUrl]);
+  return (
+    <>
+      <link
+        rel="alternate"
+        type="text/markdown"
+        href={markdownHref}
+        title="AI-optimized Markdown version"
+      />
+      {jsonLdSchema && (
+        <script
+          type="application/ld+json"
+          dangerouslySetInnerHTML={{ __html: jsonLdSchema }}
+        />
+      )}
+      {children}
+    </>
+  );
+}

package/src/bots.ts CHANGED Viewed

@@ -59,7 +59,10 @@ export const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);
  * Given a raw user-agent string, returns the matched AiBot entry or undefined.
  * Comparison is case-insensitive to handle inconsistent agent casing.
  */
-export function matchBot(userAgent: string): AiBot | undefined {
-    const ua = userAgent.toLowerCase();
-    return AI_BOTS.find(bot => ua.includes(bot.name.toLowerCase()));
+export function matchBot(userAgent: string | null): AiBot | undefined {
+    if (!userAgent) return undefined;
+    const lowerUA = userAgent.toLowerCase();
+    return AI_BOTS.find(bot =>
+        lowerUA.includes(bot.name.toLowerCase())
+    );
 }

package/src/config.ts ADDED Viewed

@@ -0,0 +1,151 @@
+/**
+ * Configuration schema for onto.config.ts
+ * Used to dynamically generate llms.txt and other AI discovery files
+ */
+export type PageType = 'scoring' | 'about' | 'default';
+export interface OntoRoute {
+  /**
+   * The URL path (e.g., '/docs', '/api/reference')
+   */
+  path: string;
+  /**
+   * Description of what this route contains
+   */
+  description: string;
+  /**
+   * Optional: Page type for automatic JSON-LD schema injection
+   * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)
+   * - 'about': Injects Organization/AboutPage schema
+   * - 'default': No automatic schema injection
+   */
+  pageType?: PageType;
+}
+export interface OntoConfig {
+  /**
+   * The name of your project or site (required)
+   * Used as the H1 heading in llms.txt
+   */
+  name: string;
+  /**
+   * A short summary of your project (required)
+   * Displayed as a blockquote in llms.txt
+   * Should contain key information necessary for understanding the rest of the file
+   */
+  summary: string;
+  /**
+   * The base URL of your site (e.g., 'https://example.com')
+   */
+  baseUrl: string;
+  /**
+   * Optional: Additional sections to include in llms.txt
+   * Each section can contain any markdown content
+   */
+  sections?: {
+    heading: string;
+    content: string;
+  }[];
+  /**
+   * Key routes that AI agents should know about
+   * These will be formatted as a markdown list in llms.txt
+   */
+  routes?: OntoRoute[];
+  /**
+   * Optional: Links to external resources (documentation, API references, etc.)
+   */
+  externalLinks?: {
+    title: string;
+    url: string;
+    description?: string;
+  }[];
+  /**
+   * Optional: Organization information for JSON-LD schemas
+   */
+  organization?: {
+    name: string;
+    description?: string;
+    url?: string;
+    logo?: string;
+    foundingDate?: string;
+  };
+}
+/**
+ * Load the onto.config.ts file from the user's project
+ * This is used by the middleware to dynamically generate llms.txt
+ */
+export async function loadOntoConfig(): Promise<OntoConfig | null> {
+  try {
+    // Try to dynamically import the config file from the user's project root
+    // This runs in the middleware context, so we look in the project root
+    const config = await import(process.cwd() + '/onto.config');
+    return config.default || config;
+  } catch (error) {
+    // Config file doesn't exist or failed to load
+    return null;
+  }
+}
+/**
+ * Generate llms.txt content from OntoConfig
+ * Follows the llms.txt specification:
+ * - H1 with project name
+ * - Blockquote with summary
+ * - Additional markdown sections
+ */
+export function generateLlmsTxt(config: OntoConfig): string {
+  const lines: string[] = [];
+  // H1: Project name (required)
+  lines.push(`# ${config.name}`);
+  lines.push('');
+  // Blockquote: Summary (required)
+  lines.push(`> ${config.summary}`);
+  lines.push('');
+  // Key Routes section (if provided)
+  if (config.routes && config.routes.length > 0) {
+    lines.push('## Key Routes');
+    lines.push('');
+    for (const route of config.routes) {
+      const fullUrl = `${config.baseUrl}${route.path}`;
+      lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);
+    }
+    lines.push('');
+  }
+  // External Links section (if provided)
+  if (config.externalLinks && config.externalLinks.length > 0) {
+    lines.push('## Resources');
+    lines.push('');
+    for (const link of config.externalLinks) {
+      if (link.description) {
+        lines.push(`- [${link.title}](${link.url}): ${link.description}`);
+      } else {
+        lines.push(`- [${link.title}](${link.url})`);
+      }
+    }
+    lines.push('');
+  }
+  // Custom sections (if provided)
+  if (config.sections && config.sections.length > 0) {
+    for (const section of config.sections) {
+      lines.push(`## ${section.heading}`);
+      lines.push('');
+      lines.push(section.content);
+      lines.push('');
+    }
+  }
+  return lines.join('\n').trim() + '\n';
+}

package/src/index.ts CHANGED Viewed

@@ -1,3 +1,17 @@
 // We cannot use Webpack plugins reliably in Next.js Turbopack due to WorkerError restrictions.
 // Users must instead run `npx onto-next` as a postbuild script.
 export { extractContent } from './extractor';
+export { OntoConfig, OntoRoute, loadOntoConfig, generateLlmsTxt } from './config';
+export type { OntoConfig as OntoConfigType, OntoRoute as OntoRouteType, PageType } from './config';
+export {
+  generateAIOMethodologySchema,
+  generateOrganizationSchema,
+  generateAboutPageSchema,
+  generateSchemaForPageType,
+  serializeSchema
+} from './schemas';
+export type {
+  AIOMethodologySchema,
+  OrganizationSchema,
+  AboutPageSchema
+} from './schemas';

package/src/middleware.ts CHANGED Viewed

@@ -1,20 +1,71 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { AI_BOT_USER_AGENTS, matchBot } from './bots';
+import { loadOntoConfig, generateLlmsTxt } from './config';
 export async function ontoMiddleware(request: NextRequest) {
-    const userAgent = request.headers.get('user-agent') || '';
+    const userAgent = request.headers.get('user-agent');
     const accept = request.headers.get('accept') || '';
     const matched = matchBot(userAgent);
     const isAiBot = !!matched;
     const isMarkdownRequested = accept.includes('text/markdown');
-    // If traffic is identified as an AI Bot, rewrite the URL
+    // If traffic is identified as an AI Bot or markdown is requested
     if (isAiBot || isMarkdownRequested) {
         const url = request.nextUrl.clone();
-        // Ignore internal next.js requests & static assets
-        if (url.pathname.startsWith('/_next') || url.pathname.includes('.')) {
+        // Ignore internal next.js requests & static assets (but not llms.txt)
+        if (url.pathname.startsWith('/_next')) {
+            return NextResponse.next();
+        }
+        // --- llms.txt Auto-Discovery ---
+        // Dynamically generate llms.txt from onto.config.ts
+        if (url.pathname === '/llms.txt') {
+            try {
+                const config = await loadOntoConfig();
+                if (config) {
+                    // Generate llms.txt dynamically from config
+                    const llmsTxtContent = generateLlmsTxt(config);
+                    const response = new NextResponse(llmsTxtContent, {
+                        headers: {
+                            'Content-Type': 'text/plain; charset=utf-8',
+                            'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
+                        }
+                    });
+                    if (matched) {
+                        response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
+                    }
+                    return response;
+                } else {
+                    // Fallback: try to serve static llms.txt from public folder
+                    url.pathname = '/llms.txt';
+                    const response = NextResponse.rewrite(url);
+                    response.headers.set('Content-Type', 'text/plain; charset=utf-8');
+                    response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');
+                    if (matched) {
+                        response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
+                    }
+                    return response;
+                }
+            } catch (error) {
+                console.error('[Onto] Failed to generate llms.txt:', error);
+                // Fallback to static file on error
+                url.pathname = '/llms.txt';
+                const response = NextResponse.rewrite(url);
+                response.headers.set('Content-Type', 'text/plain; charset=utf-8');
+                if (matched) {
+                    response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
+                }
+                return response;
+            }
+        }
+        // Skip other static assets
+        if (url.pathname.includes('.')) {
             return NextResponse.next();
         }
@@ -60,10 +111,8 @@ export async function ontoMiddleware(request: NextRequest) {
             // 2. Dynamic Context Injection
             try {
-                // Fetch the injection from the Control Plane
                 const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {
                     headers: { 'x-onto-key': ONTO_API_KEY },
-                    // Set a strict timeout to keep edge fast
                     signal: AbortSignal.timeout(1500)
                 });
@@ -71,7 +120,6 @@ export async function ontoMiddleware(request: NextRequest) {
                     const { injection } = await injectRes.json();
                     if (injection) {
-                        // To inject, we must fetch the local markdown and append
                         const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;
                         const mdRes = await fetch(localMdUrl);
@@ -98,7 +146,6 @@ export async function ontoMiddleware(request: NextRequest) {
         // Rewrite implicitly serves the target URL transparently to the client.
         const response = NextResponse.rewrite(url);
-        // Attach bot identification headers to the rewrite response
         if (matched) {
             response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
         }
@@ -112,3 +159,4 @@ export async function ontoMiddleware(request: NextRequest) {
 export { AI_BOT_USER_AGENTS, matchBot } from './bots';
 export type { AiBot } from './bots';

package/src/schemas.ts ADDED Viewed

@@ -0,0 +1,186 @@
+/**
+ * JSON-LD Schema generators for automatic structured data injection
+ * Follows Schema.org standards for AI-friendly metadata
+ */
+import { OntoConfig } from './config';
+/**
+ * Standard AIO (AI Optimization) scoring methodology
+ * Based on the Onto scoring algorithm:
+ * - Content Negotiation: 40% (30 points)
+ * - React Tax / Token Efficiency: 35% (30 points)
+ * - Structured Data: 25% (25 points)
+ * - Semantic HTML: Bonus (15 points)
+ */
+export interface AIOMethodologySchema {
+  '@context': 'https://schema.org';
+  '@type': 'HowTo';
+  name: string;
+  description: string;
+  step: Array<{
+    '@type': 'HowToStep';
+    name: string;
+    text: string;
+    position: number;
+  }>;
+}
+/**
+ * Generate AIO Scoring Methodology JSON-LD schema
+ * This explains to AI agents how the scoring system works
+ */
+export function generateAIOMethodologySchema(
+  config: OntoConfig,
+  pageUrl: string
+): AIOMethodologySchema {
+  return {
+    '@context': 'https://schema.org',
+    '@type': 'HowTo',
+    name: 'AIO Score Calculation Methodology',
+    description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',
+    step: [
+      {
+        '@type': 'HowToStep',
+        name: 'Content Negotiation',
+        text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',
+        position: 1
+      },
+      {
+        '@type': 'HowToStep',
+        name: 'Token Efficiency (React Tax)',
+        text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',
+        position: 2
+      },
+      {
+        '@type': 'HowToStep',
+        name: 'Structured Data',
+        text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',
+        position: 3
+      },
+      {
+        '@type': 'HowToStep',
+        name: 'Semantic HTML',
+        text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',
+        position: 4
+      }
+    ]
+  };
+}
+/**
+ * Organization schema for About pages
+ */
+export interface OrganizationSchema {
+  '@context': 'https://schema.org';
+  '@type': 'Organization';
+  name: string;
+  url?: string;
+  description?: string;
+  logo?: string;
+  foundingDate?: string;
+}
+/**
+ * Generate Organization JSON-LD schema for About pages
+ */
+export function generateOrganizationSchema(
+  config: OntoConfig,
+  pageUrl: string
+): OrganizationSchema | null {
+  if (!config.organization) {
+    return null;
+  }
+  const schema: OrganizationSchema = {
+    '@context': 'https://schema.org',
+    '@type': 'Organization',
+    name: config.organization.name
+  };
+  if (config.organization.url) {
+    schema.url = config.organization.url;
+  }
+  if (config.organization.description) {
+    schema.description = config.organization.description;
+  }
+  if (config.organization.logo) {
+    schema.logo = config.organization.logo;
+  }
+  if (config.organization.foundingDate) {
+    schema.foundingDate = config.organization.foundingDate;
+  }
+  return schema;
+}
+/**
+ * AboutPage schema combining Organization and WebPage
+ */
+export interface AboutPageSchema {
+  '@context': 'https://schema.org';
+  '@type': 'AboutPage';
+  name: string;
+  url: string;
+  description?: string;
+  mainEntity?: OrganizationSchema;
+}
+/**
+ * Generate AboutPage JSON-LD schema
+ */
+export function generateAboutPageSchema(
+  config: OntoConfig,
+  pageUrl: string
+): AboutPageSchema {
+  const orgSchema = generateOrganizationSchema(config, pageUrl);
+  const schema: AboutPageSchema = {
+    '@context': 'https://schema.org',
+    '@type': 'AboutPage',
+    name: `About ${config.name}`,
+    url: pageUrl
+  };
+  if (config.summary) {
+    schema.description = config.summary;
+  }
+  if (orgSchema) {
+    schema.mainEntity = orgSchema;
+  }
+  return schema;
+}
+/**
+ * Determine which schema to generate based on page type
+ */
+export function generateSchemaForPageType(
+  pageType: 'scoring' | 'about' | 'default',
+  config: OntoConfig,
+  pageUrl: string
+): any | null {
+  switch (pageType) {
+    case 'scoring':
+      return generateAIOMethodologySchema(config, pageUrl);
+    case 'about':
+      return generateAboutPageSchema(config, pageUrl);
+    case 'default':
+    default:
+      return null;
+  }
+}
+/**
+ * Serialize schema to JSON-LD script tag content
+ */
+export function serializeSchema(schema: any | null): string | null {
+  if (!schema) {
+    return null;
+  }
+  return JSON.stringify(schema, null, 2);
+}

package/tsconfig.json CHANGED Viewed

@@ -7,6 +7,7 @@
         "strict": true,
         "skipLibCheck": true,
         "forceConsistentCasingInFileNames": true,
+        "jsx": "react-jsx",
         "outDir": "dist"
     },
     "include": [

package/tsup.config.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { defineConfig } from 'tsup';
 export default defineConfig({
-    entry: ['src/index.ts', 'src/cli.ts', 'src/middleware.ts'],
+    entry: ['src/index.ts', 'src/cli.ts', 'src/middleware.ts', 'src/OntoHead.tsx', 'src/OntoProvider.tsx', 'src/config.ts', 'src/schemas.ts'],
     format: ['cjs', 'esm'],
     dts: true,
     splitting: false,
@@ -10,5 +10,5 @@ export default defineConfig({
     bundle: true,
     outDir: 'dist',
     minify: true,
-    external: ['next'],
+    external: ['next', 'react'],
 });