npm - @adwait12345/telemetry-core - Versions diffs - 0.1.0 - Mend

@adwait12345/telemetry-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Shared types for the Telemetry SDK.
+ */
+type DetectionConfidence = "certain" | "high" | "medium" | "low";
+type DetectionMethod = "ua-match" | "header-anomaly" | "no-js" | "http10" | "automation-header";
+interface BotDetectionResult {
+    isBot: boolean;
+    confidence: DetectionConfidence;
+    method: DetectionMethod | null;
+    /** e.g. "GPTBot", "ClaudeBot" — null if not matched to a known bot */
+    botName: string | null;
+    /** e.g. "ai-crawler", "search", "scraper" */
+    botCategory: string | null;
+}
+/** Normalized request data that adapters extract from their framework's request object */
+interface NormalizedRequest {
+    userAgent: string;
+    ip: string;
+    path: string;
+    method: string;
+    referrer: string | null;
+    acceptLanguage: string | null;
+    acceptEncoding: string | null;
+    /** sec-fetch-site header — present in Chrome 80+, Firefox 90+, Safari 15+ */
+    secFetchSite: string | null;
+    /** HTTP version e.g. "1.0", "1.1", "2.0" */
+    httpVersion: string | null;
+    /** Any automation-related custom headers the client sent */
+    automationHeaders: string[];
+}
+/** Configuration passed when creating a tracker instance */
+interface TelemetryConfig {
+    /** Your Telemetry project ID */
+    projectId: string;
+    /** Telemetry API base URL. Defaults to the hosted service. */
+    apiUrl?: string;
+    /**
+     * Whether to track ALL requests (including human visits) or only bots.
+     * Default: false — only sends events for detected bots.
+     * Set to true to also track server-side pageviews for humans (useful for SPAs).
+     */
+    trackAll?: boolean;
+    /**
+     * Whether to include known search engine bots (Googlebot, Bingbot etc.)
+     * Default: true
+     */
+    trackSearchBots?: boolean;
+    /**
+     * Routes/paths to ignore. Supports exact strings and regex patterns.
+     * e.g. ["/health", /^\/api\//]
+     */
+    ignorePaths?: (string | RegExp)[];
+    /**
+     * Custom bot definitions to add on top of the built-in list.
+     */
+    customBots?: Array<{
+        name: string;
+        pattern: RegExp;
+        category?: string;
+    }>;
+    /** Enable verbose logging for debugging. Default: false */
+    debug?: boolean;
+    /**
+     * Secret key for server-side bot tracking requests.
+     * This is required since these requests originate from the server and cannot pass domain validation checks via Origin.
+     */
+    serverSecret?: string;
+}
+/** Payload sent to the Telemetry server-side tracking endpoint */
+interface ServerTrackPayload {
+    projectId: string;
+    path: string;
+    method: string;
+    referrer: string | null;
+    userAgent: string;
+    ip: string;
+    isBot: boolean;
+    botName: string | null;
+    botCategory: string | null;
+    confidence: DetectionConfidence;
+    detectionMethod: DetectionMethod | null;
+    source: "server-middleware";
+    timestamp: string;
+}
+/**
+ * Detects whether a normalized request is from a bot.
+ *
+ * Detection layers (in order of priority):
+ *  1. Automation headers — explicit tooling markers
+ *  2. HTTP/1.0 — no modern browser uses this
+ *  3. Named bot UA match (AI crawlers, search engines, generic scrapers)
+ *  4. Generic bot UA pattern match
+ *  5. Header anomaly — claims modern browser but missing sec-fetch / accept-language
+ */
+declare function detectBot(req: NormalizedRequest, customBots?: Array<{
+    name: string;
+    pattern: RegExp;
+    category?: string;
+}>): BotDetectionResult;
+/**
+ * Checks whether an automation header is present in a plain headers object.
+ * Adapters call this to populate NormalizedRequest.automationHeaders.
+ */
+declare function extractAutomationHeaders(headers: Record<string, string | string[] | undefined>): string[];
+/**
+ * Known bot definitions with categories and detection metadata.
+ * Order matters — more specific patterns are listed first.
+ */
+type BotCategory = "ai-crawler" | "search" | "scraper" | "monitor" | "unknown";
+interface BotDefinition {
+    /** Human-readable name shown in the dashboard */
+    name: string;
+    /** Classification bucket */
+    category: BotCategory;
+    /** User-Agent regex pattern */
+    pattern: RegExp;
+    /**
+     * Whether the operator publishes official IP ranges.
+     * When true, IP cross-check can upgrade confidence to "certain".
+     */
+    verifiable: boolean;
+}
+declare const AI_BOTS: BotDefinition[];
+declare const SEARCH_BOTS: BotDefinition[];
+declare const GENERIC_BOTS: BotDefinition[];
+/** All known bots — AI crawlers checked first for best specificity */
+declare const ALL_BOTS: BotDefinition[];
+/**
+ * Sends a server-side payload to the Telemetry backend.
+ * This is meant to be fire-and-forget, so it does not throw errors.
+ *
+ * @param payload The data to track.
+ * @param config The Telemetry configuration.
+ */
+declare function sendToTelemetry(payload: ServerTrackPayload, config: TelemetryConfig): Promise<void>;
+export { AI_BOTS, ALL_BOTS, type BotCategory, type BotDefinition, type BotDetectionResult, type DetectionConfidence, type DetectionMethod, GENERIC_BOTS, type NormalizedRequest, SEARCH_BOTS, type ServerTrackPayload, type TelemetryConfig, detectBot, extractAutomationHeaders, sendToTelemetry };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Shared types for the Telemetry SDK.
+ */
+type DetectionConfidence = "certain" | "high" | "medium" | "low";
+type DetectionMethod = "ua-match" | "header-anomaly" | "no-js" | "http10" | "automation-header";
+interface BotDetectionResult {
+    isBot: boolean;
+    confidence: DetectionConfidence;
+    method: DetectionMethod | null;
+    /** e.g. "GPTBot", "ClaudeBot" — null if not matched to a known bot */
+    botName: string | null;
+    /** e.g. "ai-crawler", "search", "scraper" */
+    botCategory: string | null;
+}
+/** Normalized request data that adapters extract from their framework's request object */
+interface NormalizedRequest {
+    userAgent: string;
+    ip: string;
+    path: string;
+    method: string;
+    referrer: string | null;
+    acceptLanguage: string | null;
+    acceptEncoding: string | null;
+    /** sec-fetch-site header — present in Chrome 80+, Firefox 90+, Safari 15+ */
+    secFetchSite: string | null;
+    /** HTTP version e.g. "1.0", "1.1", "2.0" */
+    httpVersion: string | null;
+    /** Any automation-related custom headers the client sent */
+    automationHeaders: string[];
+}
+/** Configuration passed when creating a tracker instance */
+interface TelemetryConfig {
+    /** Your Telemetry project ID */
+    projectId: string;
+    /** Telemetry API base URL. Defaults to the hosted service. */
+    apiUrl?: string;
+    /**
+     * Whether to track ALL requests (including human visits) or only bots.
+     * Default: false — only sends events for detected bots.
+     * Set to true to also track server-side pageviews for humans (useful for SPAs).
+     */
+    trackAll?: boolean;
+    /**
+     * Whether to include known search engine bots (Googlebot, Bingbot etc.)
+     * Default: true
+     */
+    trackSearchBots?: boolean;
+    /**
+     * Routes/paths to ignore. Supports exact strings and regex patterns.
+     * e.g. ["/health", /^\/api\//]
+     */
+    ignorePaths?: (string | RegExp)[];
+    /**
+     * Custom bot definitions to add on top of the built-in list.
+     */
+    customBots?: Array<{
+        name: string;
+        pattern: RegExp;
+        category?: string;
+    }>;
+    /** Enable verbose logging for debugging. Default: false */
+    debug?: boolean;
+    /**
+     * Secret key for server-side bot tracking requests.
+     * This is required since these requests originate from the server and cannot pass domain validation checks via Origin.
+     */
+    serverSecret?: string;
+}
+/** Payload sent to the Telemetry server-side tracking endpoint */
+interface ServerTrackPayload {
+    projectId: string;
+    path: string;
+    method: string;
+    referrer: string | null;
+    userAgent: string;
+    ip: string;
+    isBot: boolean;
+    botName: string | null;
+    botCategory: string | null;
+    confidence: DetectionConfidence;
+    detectionMethod: DetectionMethod | null;
+    source: "server-middleware";
+    timestamp: string;
+}
+/**
+ * Detects whether a normalized request is from a bot.
+ *
+ * Detection layers (in order of priority):
+ *  1. Automation headers — explicit tooling markers
+ *  2. HTTP/1.0 — no modern browser uses this
+ *  3. Named bot UA match (AI crawlers, search engines, generic scrapers)
+ *  4. Generic bot UA pattern match
+ *  5. Header anomaly — claims modern browser but missing sec-fetch / accept-language
+ */
+declare function detectBot(req: NormalizedRequest, customBots?: Array<{
+    name: string;
+    pattern: RegExp;
+    category?: string;
+}>): BotDetectionResult;
+/**
+ * Checks whether an automation header is present in a plain headers object.
+ * Adapters call this to populate NormalizedRequest.automationHeaders.
+ */
+declare function extractAutomationHeaders(headers: Record<string, string | string[] | undefined>): string[];
+/**
+ * Known bot definitions with categories and detection metadata.
+ * Order matters — more specific patterns are listed first.
+ */
+type BotCategory = "ai-crawler" | "search" | "scraper" | "monitor" | "unknown";
+interface BotDefinition {
+    /** Human-readable name shown in the dashboard */
+    name: string;
+    /** Classification bucket */
+    category: BotCategory;
+    /** User-Agent regex pattern */
+    pattern: RegExp;
+    /**
+     * Whether the operator publishes official IP ranges.
+     * When true, IP cross-check can upgrade confidence to "certain".
+     */
+    verifiable: boolean;
+}
+declare const AI_BOTS: BotDefinition[];
+declare const SEARCH_BOTS: BotDefinition[];
+declare const GENERIC_BOTS: BotDefinition[];
+/** All known bots — AI crawlers checked first for best specificity */
+declare const ALL_BOTS: BotDefinition[];
+/**
+ * Sends a server-side payload to the Telemetry backend.
+ * This is meant to be fire-and-forget, so it does not throw errors.
+ *
+ * @param payload The data to track.
+ * @param config The Telemetry configuration.
+ */
+declare function sendToTelemetry(payload: ServerTrackPayload, config: TelemetryConfig): Promise<void>;
+export { AI_BOTS, ALL_BOTS, type BotCategory, type BotDefinition, type BotDetectionResult, type DetectionConfidence, type DetectionMethod, GENERIC_BOTS, type NormalizedRequest, SEARCH_BOTS, type ServerTrackPayload, type TelemetryConfig, detectBot, extractAutomationHeaders, sendToTelemetry };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,377 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  AI_BOTS: () => AI_BOTS,
+  ALL_BOTS: () => ALL_BOTS,
+  GENERIC_BOTS: () => GENERIC_BOTS,
+  SEARCH_BOTS: () => SEARCH_BOTS,
+  detectBot: () => detectBot,
+  extractAutomationHeaders: () => extractAutomationHeaders,
+  sendToTelemetry: () => sendToTelemetry
+});
+module.exports = __toCommonJS(index_exports);
+// src/bots.ts
+var AI_BOTS = [
+  {
+    name: "GPTBot",
+    category: "ai-crawler",
+    pattern: /GPTBot/i,
+    verifiable: true
+    // https://openai.com/gptbot
+  },
+  {
+    name: "ClaudeBot",
+    category: "ai-crawler",
+    pattern: /ClaudeBot|anthropic-ai/i,
+    verifiable: true
+    // Anthropic publishes IP ranges
+  },
+  {
+    name: "Google-Extended",
+    category: "ai-crawler",
+    pattern: /Google-Extended/i,
+    verifiable: true
+    // Gemini/Bard training crawler
+  },
+  {
+    name: "PerplexityBot",
+    category: "ai-crawler",
+    pattern: /PerplexityBot/i,
+    verifiable: false
+  },
+  {
+    name: "Amazonbot",
+    category: "ai-crawler",
+    pattern: /Amazonbot/i,
+    verifiable: true
+  },
+  {
+    name: "Meta-ExternalAgent",
+    category: "ai-crawler",
+    pattern: /Meta-ExternalAgent/i,
+    verifiable: false
+  },
+  {
+    name: "Applebot",
+    category: "ai-crawler",
+    pattern: /Applebot/i,
+    verifiable: true
+  },
+  {
+    name: "YouBot",
+    category: "ai-crawler",
+    pattern: /YouBot/i,
+    verifiable: false
+  },
+  {
+    name: "Bytespider",
+    category: "ai-crawler",
+    pattern: /Bytespider/i,
+    // TikTok / ByteDance — used for AI training
+    verifiable: false
+  },
+  {
+    name: "CCBot",
+    category: "ai-crawler",
+    pattern: /CCBot/i,
+    // Common Crawl — dataset used by many LLMs
+    verifiable: false
+  },
+  {
+    name: "cohere-ai",
+    category: "ai-crawler",
+    pattern: /cohere-ai/i,
+    verifiable: false
+  },
+  {
+    name: "DuckAssistBot",
+    category: "ai-crawler",
+    pattern: /DuckAssistBot/i,
+    verifiable: false
+  },
+  {
+    name: "Diffbot",
+    category: "ai-crawler",
+    pattern: /Diffbot/i,
+    verifiable: false
+  },
+  {
+    name: "Omgili",
+    category: "ai-crawler",
+    pattern: /Omgili|omgilibot/i,
+    verifiable: false
+  },
+  {
+    name: "ImagesiftBot",
+    category: "ai-crawler",
+    pattern: /ImagesiftBot/i,
+    verifiable: false
+  },
+  {
+    name: "Timpibot",
+    category: "ai-crawler",
+    pattern: /Timpibot/i,
+    verifiable: false
+  }
+];
+var SEARCH_BOTS = [
+  {
+    name: "Googlebot",
+    category: "search",
+    pattern: /Googlebot/i,
+    verifiable: true
+    // Reverse DNS: *.googlebot.com / *.google.com
+  },
+  {
+    name: "Bingbot",
+    category: "search",
+    pattern: /bingbot/i,
+    verifiable: true
+    // Reverse DNS: *.search.msn.com
+  },
+  {
+    name: "Slurp",
+    category: "search",
+    pattern: /Slurp/i,
+    // Yahoo Search
+    verifiable: false
+  },
+  {
+    name: "DuckDuckBot",
+    category: "search",
+    pattern: /DuckDuckBot/i,
+    verifiable: false
+  },
+  {
+    name: "Baiduspider",
+    category: "search",
+    pattern: /Baiduspider/i,
+    verifiable: false
+  },
+  {
+    name: "YandexBot",
+    category: "search",
+    pattern: /YandexBot/i,
+    verifiable: false
+  },
+  {
+    name: "Sogou",
+    category: "search",
+    pattern: /Sogou/i,
+    verifiable: false
+  },
+  {
+    name: "Exabot",
+    category: "search",
+    pattern: /Exabot/i,
+    verifiable: false
+  }
+];
+var GENERIC_BOTS = [
+  { name: "curl", category: "scraper", pattern: /^curl\//i, verifiable: false },
+  { name: "wget", category: "scraper", pattern: /^Wget\//i, verifiable: false },
+  {
+    name: "python-requests",
+    category: "scraper",
+    pattern: /python-requests/i,
+    verifiable: false
+  },
+  {
+    name: "Go-http-client",
+    category: "scraper",
+    pattern: /Go-http-client/i,
+    verifiable: false
+  },
+  {
+    name: "axios",
+    category: "scraper",
+    pattern: /^axios\//i,
+    verifiable: false
+  },
+  {
+    name: "node-fetch",
+    category: "scraper",
+    pattern: /node-fetch/i,
+    verifiable: false
+  },
+  {
+    name: "Scrapy",
+    category: "scraper",
+    pattern: /Scrapy/i,
+    verifiable: false
+  },
+  {
+    name: "UptimeRobot",
+    category: "monitor",
+    pattern: /UptimeRobot/i,
+    verifiable: false
+  },
+  {
+    name: "Pingdom",
+    category: "monitor",
+    pattern: /Pingdom/i,
+    verifiable: false
+  },
+  {
+    name: "StatusCake",
+    category: "monitor",
+    pattern: /StatusCake/i,
+    verifiable: false
+  },
+  {
+    name: "DatadogSynthetics",
+    category: "monitor",
+    pattern: /DatadogSynthetics/i,
+    verifiable: false
+  }
+];
+var ALL_BOTS = [
+  ...AI_BOTS,
+  ...SEARCH_BOTS,
+  ...GENERIC_BOTS
+];
+// src/detect.ts
+var AUTOMATION_HEADER_PATTERNS = [
+  "x-selenium",
+  "x-puppeteer",
+  "x-playwright",
+  "x-cypress",
+  "x-automated",
+  "x-bot",
+  "x-crawler"
+];
+var DEFINITIVE_BOT_UA_PATTERNS = [
+  /bot/i,
+  /crawler/i,
+  /spider/i,
+  /scraper/i,
+  /crawl/i,
+  /fetch/i,
+  /http_request/i,
+  /libwww/i,
+  /lwp-/i,
+  /python/i,
+  /ruby/i,
+  /java\//i,
+  /perl/i,
+  /go-http/i,
+  /okhttp/i,
+  /headless/i,
+  /phantom/i,
+  /selenium/i,
+  /webdriver/i,
+  /puppeteer/i,
+  /playwright/i
+];
+function detectBot(req, customBots = []) {
+  const ua = req.userAgent || "";
+  if (req.automationHeaders.length > 0) {
+    return result(true, "certain", "automation-header", null, null);
+  }
+  if (req.httpVersion === "1.0") {
+    return result(true, "high", "http10", null, null);
+  }
+  const allBots = [
+    ...customBots.map((b) => ({
+      name: b.name,
+      pattern: b.pattern,
+      category: b.category ?? "unknown",
+      verifiable: false
+    })),
+    ...ALL_BOTS
+  ];
+  for (const bot of allBots) {
+    if (bot.pattern.test(ua)) {
+      return result(true, "certain", "ua-match", bot.name, bot.category);
+    }
+  }
+  if (!ua || ua.length < 10) {
+    return result(true, "high", "ua-match", null, "unknown");
+  }
+  for (const pattern of DEFINITIVE_BOT_UA_PATTERNS) {
+    if (pattern.test(ua)) {
+      return result(true, "high", "ua-match", null, "unknown");
+    }
+  }
+  const claimsModernBrowser = /Chrome\/([8-9]\d|1\d\d)|Firefox\/([8-9]\d|1\d\d)|Safari\/1[5-9]/i.test(ua);
+  if (claimsModernBrowser) {
+    const missingSec = !req.secFetchSite;
+    const missingLang = !req.acceptLanguage;
+    if (missingSec && missingLang) {
+      return result(true, "high", "header-anomaly", null, "unknown");
+    }
+    if (missingSec || missingLang) {
+      return result(true, "medium", "header-anomaly", null, "unknown");
+    }
+  }
+  return result(false, "low", null, null, null);
+}
+function result(isBot, confidence, method, botName, botCategory) {
+  return { isBot, confidence, method, botName, botCategory };
+}
+function extractAutomationHeaders(headers) {
+  return AUTOMATION_HEADER_PATTERNS.filter((h) => h in headers);
+}
+// src/index.ts
+async function sendToTelemetry(payload, config) {
+  if (!config.apiUrl) {
+    config.apiUrl = "http://localhost:3001";
+  }
+  const apiUrl = config.apiUrl || "https://telemetry.yourdomain.com";
+  const endpoint = `${apiUrl}/v1/track/server-pageview`;
+  if (config.debug) {
+    console.log(`[Telemetry] Sending payload to ${endpoint}`, payload);
+  }
+  try {
+    const response = await fetch(endpoint, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // The middleware passes a server secret instead of origin check
+        "Authorization": `Bearer ${config.serverSecret}`
+      },
+      body: JSON.stringify(payload)
+    });
+    if (!response.ok && config.debug) {
+      console.error(`[Telemetry] Failed to send payload: ${response.status} ${response.statusText}`);
+      const text = await response.text();
+      console.error(`[Telemetry] Response body:`, text);
+    }
+  } catch (error) {
+    if (config.debug) {
+      console.error("[Telemetry] Error sending payload:", error);
+    }
+  }
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  AI_BOTS,
+  ALL_BOTS,
+  GENERIC_BOTS,
+  SEARCH_BOTS,
+  detectBot,
+  extractAutomationHeaders,
+  sendToTelemetry
+});

package/dist/index.mjs ADDED Viewed

@@ -0,0 +1,344 @@
+// src/bots.ts
+var AI_BOTS = [
+  {
+    name: "GPTBot",
+    category: "ai-crawler",
+    pattern: /GPTBot/i,
+    verifiable: true
+    // https://openai.com/gptbot
+  },
+  {
+    name: "ClaudeBot",
+    category: "ai-crawler",
+    pattern: /ClaudeBot|anthropic-ai/i,
+    verifiable: true
+    // Anthropic publishes IP ranges
+  },
+  {
+    name: "Google-Extended",
+    category: "ai-crawler",
+    pattern: /Google-Extended/i,
+    verifiable: true
+    // Gemini/Bard training crawler
+  },
+  {
+    name: "PerplexityBot",
+    category: "ai-crawler",
+    pattern: /PerplexityBot/i,
+    verifiable: false
+  },
+  {
+    name: "Amazonbot",
+    category: "ai-crawler",
+    pattern: /Amazonbot/i,
+    verifiable: true
+  },
+  {
+    name: "Meta-ExternalAgent",
+    category: "ai-crawler",
+    pattern: /Meta-ExternalAgent/i,
+    verifiable: false
+  },
+  {
+    name: "Applebot",
+    category: "ai-crawler",
+    pattern: /Applebot/i,
+    verifiable: true
+  },
+  {
+    name: "YouBot",
+    category: "ai-crawler",
+    pattern: /YouBot/i,
+    verifiable: false
+  },
+  {
+    name: "Bytespider",
+    category: "ai-crawler",
+    pattern: /Bytespider/i,
+    // TikTok / ByteDance — used for AI training
+    verifiable: false
+  },
+  {
+    name: "CCBot",
+    category: "ai-crawler",
+    pattern: /CCBot/i,
+    // Common Crawl — dataset used by many LLMs
+    verifiable: false
+  },
+  {
+    name: "cohere-ai",
+    category: "ai-crawler",
+    pattern: /cohere-ai/i,
+    verifiable: false
+  },
+  {
+    name: "DuckAssistBot",
+    category: "ai-crawler",
+    pattern: /DuckAssistBot/i,
+    verifiable: false
+  },
+  {
+    name: "Diffbot",
+    category: "ai-crawler",
+    pattern: /Diffbot/i,
+    verifiable: false
+  },
+  {
+    name: "Omgili",
+    category: "ai-crawler",
+    pattern: /Omgili|omgilibot/i,
+    verifiable: false
+  },
+  {
+    name: "ImagesiftBot",
+    category: "ai-crawler",
+    pattern: /ImagesiftBot/i,
+    verifiable: false
+  },
+  {
+    name: "Timpibot",
+    category: "ai-crawler",
+    pattern: /Timpibot/i,
+    verifiable: false
+  }
+];
+var SEARCH_BOTS = [
+  {
+    name: "Googlebot",
+    category: "search",
+    pattern: /Googlebot/i,
+    verifiable: true
+    // Reverse DNS: *.googlebot.com / *.google.com
+  },
+  {
+    name: "Bingbot",
+    category: "search",
+    pattern: /bingbot/i,
+    verifiable: true
+    // Reverse DNS: *.search.msn.com
+  },
+  {
+    name: "Slurp",
+    category: "search",
+    pattern: /Slurp/i,
+    // Yahoo Search
+    verifiable: false
+  },
+  {
+    name: "DuckDuckBot",
+    category: "search",
+    pattern: /DuckDuckBot/i,
+    verifiable: false
+  },
+  {
+    name: "Baiduspider",
+    category: "search",
+    pattern: /Baiduspider/i,
+    verifiable: false
+  },
+  {
+    name: "YandexBot",
+    category: "search",
+    pattern: /YandexBot/i,
+    verifiable: false
+  },
+  {
+    name: "Sogou",
+    category: "search",
+    pattern: /Sogou/i,
+    verifiable: false
+  },
+  {
+    name: "Exabot",
+    category: "search",
+    pattern: /Exabot/i,
+    verifiable: false
+  }
+];
+var GENERIC_BOTS = [
+  { name: "curl", category: "scraper", pattern: /^curl\//i, verifiable: false },
+  { name: "wget", category: "scraper", pattern: /^Wget\//i, verifiable: false },
+  {
+    name: "python-requests",
+    category: "scraper",
+    pattern: /python-requests/i,
+    verifiable: false
+  },
+  {
+    name: "Go-http-client",
+    category: "scraper",
+    pattern: /Go-http-client/i,
+    verifiable: false
+  },
+  {
+    name: "axios",
+    category: "scraper",
+    pattern: /^axios\//i,
+    verifiable: false
+  },
+  {
+    name: "node-fetch",
+    category: "scraper",
+    pattern: /node-fetch/i,
+    verifiable: false
+  },
+  {
+    name: "Scrapy",
+    category: "scraper",
+    pattern: /Scrapy/i,
+    verifiable: false
+  },
+  {
+    name: "UptimeRobot",
+    category: "monitor",
+    pattern: /UptimeRobot/i,
+    verifiable: false
+  },
+  {
+    name: "Pingdom",
+    category: "monitor",
+    pattern: /Pingdom/i,
+    verifiable: false
+  },
+  {
+    name: "StatusCake",
+    category: "monitor",
+    pattern: /StatusCake/i,
+    verifiable: false
+  },
+  {
+    name: "DatadogSynthetics",
+    category: "monitor",
+    pattern: /DatadogSynthetics/i,
+    verifiable: false
+  }
+];
+var ALL_BOTS = [
+  ...AI_BOTS,
+  ...SEARCH_BOTS,
+  ...GENERIC_BOTS
+];
+// src/detect.ts
+var AUTOMATION_HEADER_PATTERNS = [
+  "x-selenium",
+  "x-puppeteer",
+  "x-playwright",
+  "x-cypress",
+  "x-automated",
+  "x-bot",
+  "x-crawler"
+];
+var DEFINITIVE_BOT_UA_PATTERNS = [
+  /bot/i,
+  /crawler/i,
+  /spider/i,
+  /scraper/i,
+  /crawl/i,
+  /fetch/i,
+  /http_request/i,
+  /libwww/i,
+  /lwp-/i,
+  /python/i,
+  /ruby/i,
+  /java\//i,
+  /perl/i,
+  /go-http/i,
+  /okhttp/i,
+  /headless/i,
+  /phantom/i,
+  /selenium/i,
+  /webdriver/i,
+  /puppeteer/i,
+  /playwright/i
+];
+function detectBot(req, customBots = []) {
+  const ua = req.userAgent || "";
+  if (req.automationHeaders.length > 0) {
+    return result(true, "certain", "automation-header", null, null);
+  }
+  if (req.httpVersion === "1.0") {
+    return result(true, "high", "http10", null, null);
+  }
+  const allBots = [
+    ...customBots.map((b) => ({
+      name: b.name,
+      pattern: b.pattern,
+      category: b.category ?? "unknown",
+      verifiable: false
+    })),
+    ...ALL_BOTS
+  ];
+  for (const bot of allBots) {
+    if (bot.pattern.test(ua)) {
+      return result(true, "certain", "ua-match", bot.name, bot.category);
+    }
+  }
+  if (!ua || ua.length < 10) {
+    return result(true, "high", "ua-match", null, "unknown");
+  }
+  for (const pattern of DEFINITIVE_BOT_UA_PATTERNS) {
+    if (pattern.test(ua)) {
+      return result(true, "high", "ua-match", null, "unknown");
+    }
+  }
+  const claimsModernBrowser = /Chrome\/([8-9]\d|1\d\d)|Firefox\/([8-9]\d|1\d\d)|Safari\/1[5-9]/i.test(ua);
+  if (claimsModernBrowser) {
+    const missingSec = !req.secFetchSite;
+    const missingLang = !req.acceptLanguage;
+    if (missingSec && missingLang) {
+      return result(true, "high", "header-anomaly", null, "unknown");
+    }
+    if (missingSec || missingLang) {
+      return result(true, "medium", "header-anomaly", null, "unknown");
+    }
+  }
+  return result(false, "low", null, null, null);
+}
+function result(isBot, confidence, method, botName, botCategory) {
+  return { isBot, confidence, method, botName, botCategory };
+}
+function extractAutomationHeaders(headers) {
+  return AUTOMATION_HEADER_PATTERNS.filter((h) => h in headers);
+}
+// src/index.ts
+async function sendToTelemetry(payload, config) {
+  if (!config.apiUrl) {
+    config.apiUrl = "http://localhost:3001";
+  }
+  const apiUrl = config.apiUrl || "https://telemetry.yourdomain.com";
+  const endpoint = `${apiUrl}/v1/track/server-pageview`;
+  if (config.debug) {
+    console.log(`[Telemetry] Sending payload to ${endpoint}`, payload);
+  }
+  try {
+    const response = await fetch(endpoint, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        // The middleware passes a server secret instead of origin check
+        "Authorization": `Bearer ${config.serverSecret}`
+      },
+      body: JSON.stringify(payload)
+    });
+    if (!response.ok && config.debug) {
+      console.error(`[Telemetry] Failed to send payload: ${response.status} ${response.statusText}`);
+      const text = await response.text();
+      console.error(`[Telemetry] Response body:`, text);
+    }
+  } catch (error) {
+    if (config.debug) {
+      console.error("[Telemetry] Error sending payload:", error);
+    }
+  }
+}
+export {
+  AI_BOTS,
+  ALL_BOTS,
+  GENERIC_BOTS,
+  SEARCH_BOTS,
+  detectBot,
+  extractAutomationHeaders,
+  sendToTelemetry
+};

package/package.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "name": "@adwait12345/telemetry-core",
+  "version": "0.1.0",
+  "description": "Framework-agnostic core for Telemetry SDK — bot detection and server-side tracking",
+  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.mjs",
+      "require": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "devDependencies": {
+    "tsup": "^8.0.0",
+    "typescript": "^5.4.0",
+    "@types/node": "^20.0.0"
+  },
+  "scripts": {
+    "build": "tsup src/index.ts --format esm,cjs --dts --clean",
+    "dev": "tsup src/index.ts --format esm,cjs --dts --watch",
+    "typecheck": "tsc --noEmit",
+    "clean": "rm -rf dist"
+  }
+}