@adwait12345/telemetry-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Shared types for the Telemetry SDK.
3
+ */
4
+ type DetectionConfidence = "certain" | "high" | "medium" | "low";
5
+ type DetectionMethod = "ua-match" | "header-anomaly" | "no-js" | "http10" | "automation-header";
6
+ interface BotDetectionResult {
7
+ isBot: boolean;
8
+ confidence: DetectionConfidence;
9
+ method: DetectionMethod | null;
10
+ /** e.g. "GPTBot", "ClaudeBot" — null if not matched to a known bot */
11
+ botName: string | null;
12
+ /** e.g. "ai-crawler", "search", "scraper" */
13
+ botCategory: string | null;
14
+ }
15
+ /** Normalized request data that adapters extract from their framework's request object */
16
+ interface NormalizedRequest {
17
+ userAgent: string;
18
+ ip: string;
19
+ path: string;
20
+ method: string;
21
+ referrer: string | null;
22
+ acceptLanguage: string | null;
23
+ acceptEncoding: string | null;
24
+ /** sec-fetch-site header — present in Chrome 80+, Firefox 90+, Safari 15+ */
25
+ secFetchSite: string | null;
26
+ /** HTTP version e.g. "1.0", "1.1", "2.0" */
27
+ httpVersion: string | null;
28
+ /** Any automation-related custom headers the client sent */
29
+ automationHeaders: string[];
30
+ }
31
+ /** Configuration passed when creating a tracker instance */
32
+ interface TelemetryConfig {
33
+ /** Your Telemetry project ID */
34
+ projectId: string;
35
+ /** Telemetry API base URL. Defaults to the hosted service. */
36
+ apiUrl?: string;
37
+ /**
38
+ * Whether to track ALL requests (including human visits) or only bots.
39
+ * Default: false — only sends events for detected bots.
40
+ * Set to true to also track server-side pageviews for humans (useful for SPAs).
41
+ */
42
+ trackAll?: boolean;
43
+ /**
44
+ * Whether to include known search engine bots (Googlebot, Bingbot etc.)
45
+ * Default: true
46
+ */
47
+ trackSearchBots?: boolean;
48
+ /**
49
+ * Routes/paths to ignore. Supports exact strings and regex patterns.
50
+ * e.g. ["/health", /^\/api\//]
51
+ */
52
+ ignorePaths?: (string | RegExp)[];
53
+ /**
54
+ * Custom bot definitions to add on top of the built-in list.
55
+ */
56
+ customBots?: Array<{
57
+ name: string;
58
+ pattern: RegExp;
59
+ category?: string;
60
+ }>;
61
+ /** Enable verbose logging for debugging. Default: false */
62
+ debug?: boolean;
63
+ /**
64
+ * Secret key for server-side bot tracking requests.
65
+ * This is required since these requests originate from the server and cannot pass domain validation checks via Origin.
66
+ */
67
+ serverSecret?: string;
68
+ }
69
+ /** Payload sent to the Telemetry server-side tracking endpoint */
70
+ interface ServerTrackPayload {
71
+ projectId: string;
72
+ path: string;
73
+ method: string;
74
+ referrer: string | null;
75
+ userAgent: string;
76
+ ip: string;
77
+ isBot: boolean;
78
+ botName: string | null;
79
+ botCategory: string | null;
80
+ confidence: DetectionConfidence;
81
+ detectionMethod: DetectionMethod | null;
82
+ source: "server-middleware";
83
+ timestamp: string;
84
+ }
85
+
86
+ /**
87
+ * Detects whether a normalized request is from a bot.
88
+ *
89
+ * Detection layers (in order of priority):
90
+ * 1. Automation headers — explicit tooling markers
91
+ * 2. HTTP/1.0 — no modern browser uses this
92
+ * 3. Named bot UA match (AI crawlers, search engines, generic scrapers)
93
+ * 4. Generic bot UA pattern match
94
+ * 5. Header anomaly — claims modern browser but missing sec-fetch / accept-language
95
+ */
96
+ declare function detectBot(req: NormalizedRequest, customBots?: Array<{
97
+ name: string;
98
+ pattern: RegExp;
99
+ category?: string;
100
+ }>): BotDetectionResult;
101
+ /**
102
+ * Checks whether an automation header is present in a plain headers object.
103
+ * Adapters call this to populate NormalizedRequest.automationHeaders.
104
+ */
105
+ declare function extractAutomationHeaders(headers: Record<string, string | string[] | undefined>): string[];
106
+
107
+ /**
108
+ * Known bot definitions with categories and detection metadata.
109
+ * Order matters — more specific patterns are listed first.
110
+ */
111
+ type BotCategory = "ai-crawler" | "search" | "scraper" | "monitor" | "unknown";
112
+ interface BotDefinition {
113
+ /** Human-readable name shown in the dashboard */
114
+ name: string;
115
+ /** Classification bucket */
116
+ category: BotCategory;
117
+ /** User-Agent regex pattern */
118
+ pattern: RegExp;
119
+ /**
120
+ * Whether the operator publishes official IP ranges.
121
+ * When true, IP cross-check can upgrade confidence to "certain".
122
+ */
123
+ verifiable: boolean;
124
+ }
125
+ declare const AI_BOTS: BotDefinition[];
126
+ declare const SEARCH_BOTS: BotDefinition[];
127
+ declare const GENERIC_BOTS: BotDefinition[];
128
+ /** All known bots — AI crawlers checked first for best specificity */
129
+ declare const ALL_BOTS: BotDefinition[];
130
+
131
+ /**
132
+ * Sends a server-side payload to the Telemetry backend.
133
+ * This is meant to be fire-and-forget, so it does not throw errors.
134
+ *
135
+ * @param payload The data to track.
136
+ * @param config The Telemetry configuration.
137
+ */
138
+ declare function sendToTelemetry(payload: ServerTrackPayload, config: TelemetryConfig): Promise<void>;
139
+
140
+ export { AI_BOTS, ALL_BOTS, type BotCategory, type BotDefinition, type BotDetectionResult, type DetectionConfidence, type DetectionMethod, GENERIC_BOTS, type NormalizedRequest, SEARCH_BOTS, type ServerTrackPayload, type TelemetryConfig, detectBot, extractAutomationHeaders, sendToTelemetry };
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Shared types for the Telemetry SDK.
3
+ */
4
+ type DetectionConfidence = "certain" | "high" | "medium" | "low";
5
+ type DetectionMethod = "ua-match" | "header-anomaly" | "no-js" | "http10" | "automation-header";
6
+ interface BotDetectionResult {
7
+ isBot: boolean;
8
+ confidence: DetectionConfidence;
9
+ method: DetectionMethod | null;
10
+ /** e.g. "GPTBot", "ClaudeBot" — null if not matched to a known bot */
11
+ botName: string | null;
12
+ /** e.g. "ai-crawler", "search", "scraper" */
13
+ botCategory: string | null;
14
+ }
15
+ /** Normalized request data that adapters extract from their framework's request object */
16
+ interface NormalizedRequest {
17
+ userAgent: string;
18
+ ip: string;
19
+ path: string;
20
+ method: string;
21
+ referrer: string | null;
22
+ acceptLanguage: string | null;
23
+ acceptEncoding: string | null;
24
+ /** sec-fetch-site header — present in Chrome 80+, Firefox 90+, Safari 15+ */
25
+ secFetchSite: string | null;
26
+ /** HTTP version e.g. "1.0", "1.1", "2.0" */
27
+ httpVersion: string | null;
28
+ /** Any automation-related custom headers the client sent */
29
+ automationHeaders: string[];
30
+ }
31
+ /** Configuration passed when creating a tracker instance */
32
+ interface TelemetryConfig {
33
+ /** Your Telemetry project ID */
34
+ projectId: string;
35
+ /** Telemetry API base URL. Defaults to the hosted service. */
36
+ apiUrl?: string;
37
+ /**
38
+ * Whether to track ALL requests (including human visits) or only bots.
39
+ * Default: false — only sends events for detected bots.
40
+ * Set to true to also track server-side pageviews for humans (useful for SPAs).
41
+ */
42
+ trackAll?: boolean;
43
+ /**
44
+ * Whether to include known search engine bots (Googlebot, Bingbot etc.)
45
+ * Default: true
46
+ */
47
+ trackSearchBots?: boolean;
48
+ /**
49
+ * Routes/paths to ignore. Supports exact strings and regex patterns.
50
+ * e.g. ["/health", /^\/api\//]
51
+ */
52
+ ignorePaths?: (string | RegExp)[];
53
+ /**
54
+ * Custom bot definitions to add on top of the built-in list.
55
+ */
56
+ customBots?: Array<{
57
+ name: string;
58
+ pattern: RegExp;
59
+ category?: string;
60
+ }>;
61
+ /** Enable verbose logging for debugging. Default: false */
62
+ debug?: boolean;
63
+ /**
64
+ * Secret key for server-side bot tracking requests.
65
+ * This is required since these requests originate from the server and cannot pass domain validation checks via Origin.
66
+ */
67
+ serverSecret?: string;
68
+ }
69
+ /** Payload sent to the Telemetry server-side tracking endpoint */
70
+ interface ServerTrackPayload {
71
+ projectId: string;
72
+ path: string;
73
+ method: string;
74
+ referrer: string | null;
75
+ userAgent: string;
76
+ ip: string;
77
+ isBot: boolean;
78
+ botName: string | null;
79
+ botCategory: string | null;
80
+ confidence: DetectionConfidence;
81
+ detectionMethod: DetectionMethod | null;
82
+ source: "server-middleware";
83
+ timestamp: string;
84
+ }
85
+
86
+ /**
87
+ * Detects whether a normalized request is from a bot.
88
+ *
89
+ * Detection layers (in order of priority):
90
+ * 1. Automation headers — explicit tooling markers
91
+ * 2. HTTP/1.0 — no modern browser uses this
92
+ * 3. Named bot UA match (AI crawlers, search engines, generic scrapers)
93
+ * 4. Generic bot UA pattern match
94
+ * 5. Header anomaly — claims modern browser but missing sec-fetch / accept-language
95
+ */
96
+ declare function detectBot(req: NormalizedRequest, customBots?: Array<{
97
+ name: string;
98
+ pattern: RegExp;
99
+ category?: string;
100
+ }>): BotDetectionResult;
101
+ /**
102
+ * Checks whether an automation header is present in a plain headers object.
103
+ * Adapters call this to populate NormalizedRequest.automationHeaders.
104
+ */
105
+ declare function extractAutomationHeaders(headers: Record<string, string | string[] | undefined>): string[];
106
+
107
+ /**
108
+ * Known bot definitions with categories and detection metadata.
109
+ * Order matters — more specific patterns are listed first.
110
+ */
111
+ type BotCategory = "ai-crawler" | "search" | "scraper" | "monitor" | "unknown";
112
+ interface BotDefinition {
113
+ /** Human-readable name shown in the dashboard */
114
+ name: string;
115
+ /** Classification bucket */
116
+ category: BotCategory;
117
+ /** User-Agent regex pattern */
118
+ pattern: RegExp;
119
+ /**
120
+ * Whether the operator publishes official IP ranges.
121
+ * When true, IP cross-check can upgrade confidence to "certain".
122
+ */
123
+ verifiable: boolean;
124
+ }
125
+ declare const AI_BOTS: BotDefinition[];
126
+ declare const SEARCH_BOTS: BotDefinition[];
127
+ declare const GENERIC_BOTS: BotDefinition[];
128
+ /** All known bots — AI crawlers checked first for best specificity */
129
+ declare const ALL_BOTS: BotDefinition[];
130
+
131
+ /**
132
+ * Sends a server-side payload to the Telemetry backend.
133
+ * This is meant to be fire-and-forget, so it does not throw errors.
134
+ *
135
+ * @param payload The data to track.
136
+ * @param config The Telemetry configuration.
137
+ */
138
+ declare function sendToTelemetry(payload: ServerTrackPayload, config: TelemetryConfig): Promise<void>;
139
+
140
+ export { AI_BOTS, ALL_BOTS, type BotCategory, type BotDefinition, type BotDetectionResult, type DetectionConfidence, type DetectionMethod, GENERIC_BOTS, type NormalizedRequest, SEARCH_BOTS, type ServerTrackPayload, type TelemetryConfig, detectBot, extractAutomationHeaders, sendToTelemetry };
package/dist/index.js ADDED
@@ -0,0 +1,377 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ AI_BOTS: () => AI_BOTS,
24
+ ALL_BOTS: () => ALL_BOTS,
25
+ GENERIC_BOTS: () => GENERIC_BOTS,
26
+ SEARCH_BOTS: () => SEARCH_BOTS,
27
+ detectBot: () => detectBot,
28
+ extractAutomationHeaders: () => extractAutomationHeaders,
29
+ sendToTelemetry: () => sendToTelemetry
30
+ });
31
+ module.exports = __toCommonJS(index_exports);
32
+
33
+ // src/bots.ts
34
+ var AI_BOTS = [
35
+ {
36
+ name: "GPTBot",
37
+ category: "ai-crawler",
38
+ pattern: /GPTBot/i,
39
+ verifiable: true
40
+ // https://openai.com/gptbot
41
+ },
42
+ {
43
+ name: "ClaudeBot",
44
+ category: "ai-crawler",
45
+ pattern: /ClaudeBot|anthropic-ai/i,
46
+ verifiable: true
47
+ // Anthropic publishes IP ranges
48
+ },
49
+ {
50
+ name: "Google-Extended",
51
+ category: "ai-crawler",
52
+ pattern: /Google-Extended/i,
53
+ verifiable: true
54
+ // Gemini/Bard training crawler
55
+ },
56
+ {
57
+ name: "PerplexityBot",
58
+ category: "ai-crawler",
59
+ pattern: /PerplexityBot/i,
60
+ verifiable: false
61
+ },
62
+ {
63
+ name: "Amazonbot",
64
+ category: "ai-crawler",
65
+ pattern: /Amazonbot/i,
66
+ verifiable: true
67
+ },
68
+ {
69
+ name: "Meta-ExternalAgent",
70
+ category: "ai-crawler",
71
+ pattern: /Meta-ExternalAgent/i,
72
+ verifiable: false
73
+ },
74
+ {
75
+ name: "Applebot",
76
+ category: "ai-crawler",
77
+ pattern: /Applebot/i,
78
+ verifiable: true
79
+ },
80
+ {
81
+ name: "YouBot",
82
+ category: "ai-crawler",
83
+ pattern: /YouBot/i,
84
+ verifiable: false
85
+ },
86
+ {
87
+ name: "Bytespider",
88
+ category: "ai-crawler",
89
+ pattern: /Bytespider/i,
90
+ // TikTok / ByteDance — used for AI training
91
+ verifiable: false
92
+ },
93
+ {
94
+ name: "CCBot",
95
+ category: "ai-crawler",
96
+ pattern: /CCBot/i,
97
+ // Common Crawl — dataset used by many LLMs
98
+ verifiable: false
99
+ },
100
+ {
101
+ name: "cohere-ai",
102
+ category: "ai-crawler",
103
+ pattern: /cohere-ai/i,
104
+ verifiable: false
105
+ },
106
+ {
107
+ name: "DuckAssistBot",
108
+ category: "ai-crawler",
109
+ pattern: /DuckAssistBot/i,
110
+ verifiable: false
111
+ },
112
+ {
113
+ name: "Diffbot",
114
+ category: "ai-crawler",
115
+ pattern: /Diffbot/i,
116
+ verifiable: false
117
+ },
118
+ {
119
+ name: "Omgili",
120
+ category: "ai-crawler",
121
+ pattern: /Omgili|omgilibot/i,
122
+ verifiable: false
123
+ },
124
+ {
125
+ name: "ImagesiftBot",
126
+ category: "ai-crawler",
127
+ pattern: /ImagesiftBot/i,
128
+ verifiable: false
129
+ },
130
+ {
131
+ name: "Timpibot",
132
+ category: "ai-crawler",
133
+ pattern: /Timpibot/i,
134
+ verifiable: false
135
+ }
136
+ ];
137
+ var SEARCH_BOTS = [
138
+ {
139
+ name: "Googlebot",
140
+ category: "search",
141
+ pattern: /Googlebot/i,
142
+ verifiable: true
143
+ // Reverse DNS: *.googlebot.com / *.google.com
144
+ },
145
+ {
146
+ name: "Bingbot",
147
+ category: "search",
148
+ pattern: /bingbot/i,
149
+ verifiable: true
150
+ // Reverse DNS: *.search.msn.com
151
+ },
152
+ {
153
+ name: "Slurp",
154
+ category: "search",
155
+ pattern: /Slurp/i,
156
+ // Yahoo Search
157
+ verifiable: false
158
+ },
159
+ {
160
+ name: "DuckDuckBot",
161
+ category: "search",
162
+ pattern: /DuckDuckBot/i,
163
+ verifiable: false
164
+ },
165
+ {
166
+ name: "Baiduspider",
167
+ category: "search",
168
+ pattern: /Baiduspider/i,
169
+ verifiable: false
170
+ },
171
+ {
172
+ name: "YandexBot",
173
+ category: "search",
174
+ pattern: /YandexBot/i,
175
+ verifiable: false
176
+ },
177
+ {
178
+ name: "Sogou",
179
+ category: "search",
180
+ pattern: /Sogou/i,
181
+ verifiable: false
182
+ },
183
+ {
184
+ name: "Exabot",
185
+ category: "search",
186
+ pattern: /Exabot/i,
187
+ verifiable: false
188
+ }
189
+ ];
190
+ var GENERIC_BOTS = [
191
+ { name: "curl", category: "scraper", pattern: /^curl\//i, verifiable: false },
192
+ { name: "wget", category: "scraper", pattern: /^Wget\//i, verifiable: false },
193
+ {
194
+ name: "python-requests",
195
+ category: "scraper",
196
+ pattern: /python-requests/i,
197
+ verifiable: false
198
+ },
199
+ {
200
+ name: "Go-http-client",
201
+ category: "scraper",
202
+ pattern: /Go-http-client/i,
203
+ verifiable: false
204
+ },
205
+ {
206
+ name: "axios",
207
+ category: "scraper",
208
+ pattern: /^axios\//i,
209
+ verifiable: false
210
+ },
211
+ {
212
+ name: "node-fetch",
213
+ category: "scraper",
214
+ pattern: /node-fetch/i,
215
+ verifiable: false
216
+ },
217
+ {
218
+ name: "Scrapy",
219
+ category: "scraper",
220
+ pattern: /Scrapy/i,
221
+ verifiable: false
222
+ },
223
+ {
224
+ name: "UptimeRobot",
225
+ category: "monitor",
226
+ pattern: /UptimeRobot/i,
227
+ verifiable: false
228
+ },
229
+ {
230
+ name: "Pingdom",
231
+ category: "monitor",
232
+ pattern: /Pingdom/i,
233
+ verifiable: false
234
+ },
235
+ {
236
+ name: "StatusCake",
237
+ category: "monitor",
238
+ pattern: /StatusCake/i,
239
+ verifiable: false
240
+ },
241
+ {
242
+ name: "DatadogSynthetics",
243
+ category: "monitor",
244
+ pattern: /DatadogSynthetics/i,
245
+ verifiable: false
246
+ }
247
+ ];
248
+ var ALL_BOTS = [
249
+ ...AI_BOTS,
250
+ ...SEARCH_BOTS,
251
+ ...GENERIC_BOTS
252
+ ];
253
+
254
+ // src/detect.ts
255
+ var AUTOMATION_HEADER_PATTERNS = [
256
+ "x-selenium",
257
+ "x-puppeteer",
258
+ "x-playwright",
259
+ "x-cypress",
260
+ "x-automated",
261
+ "x-bot",
262
+ "x-crawler"
263
+ ];
264
+ var DEFINITIVE_BOT_UA_PATTERNS = [
265
+ /bot/i,
266
+ /crawler/i,
267
+ /spider/i,
268
+ /scraper/i,
269
+ /crawl/i,
270
+ /fetch/i,
271
+ /http_request/i,
272
+ /libwww/i,
273
+ /lwp-/i,
274
+ /python/i,
275
+ /ruby/i,
276
+ /java\//i,
277
+ /perl/i,
278
+ /go-http/i,
279
+ /okhttp/i,
280
+ /headless/i,
281
+ /phantom/i,
282
+ /selenium/i,
283
+ /webdriver/i,
284
+ /puppeteer/i,
285
+ /playwright/i
286
+ ];
287
+ function detectBot(req, customBots = []) {
288
+ const ua = req.userAgent || "";
289
+ if (req.automationHeaders.length > 0) {
290
+ return result(true, "certain", "automation-header", null, null);
291
+ }
292
+ if (req.httpVersion === "1.0") {
293
+ return result(true, "high", "http10", null, null);
294
+ }
295
+ const allBots = [
296
+ ...customBots.map((b) => ({
297
+ name: b.name,
298
+ pattern: b.pattern,
299
+ category: b.category ?? "unknown",
300
+ verifiable: false
301
+ })),
302
+ ...ALL_BOTS
303
+ ];
304
+ for (const bot of allBots) {
305
+ if (bot.pattern.test(ua)) {
306
+ return result(true, "certain", "ua-match", bot.name, bot.category);
307
+ }
308
+ }
309
+ if (!ua || ua.length < 10) {
310
+ return result(true, "high", "ua-match", null, "unknown");
311
+ }
312
+ for (const pattern of DEFINITIVE_BOT_UA_PATTERNS) {
313
+ if (pattern.test(ua)) {
314
+ return result(true, "high", "ua-match", null, "unknown");
315
+ }
316
+ }
317
+ const claimsModernBrowser = /Chrome\/([8-9]\d|1\d\d)|Firefox\/([8-9]\d|1\d\d)|Safari\/1[5-9]/i.test(ua);
318
+ if (claimsModernBrowser) {
319
+ const missingSec = !req.secFetchSite;
320
+ const missingLang = !req.acceptLanguage;
321
+ if (missingSec && missingLang) {
322
+ return result(true, "high", "header-anomaly", null, "unknown");
323
+ }
324
+ if (missingSec || missingLang) {
325
+ return result(true, "medium", "header-anomaly", null, "unknown");
326
+ }
327
+ }
328
+ return result(false, "low", null, null, null);
329
+ }
330
+ function result(isBot, confidence, method, botName, botCategory) {
331
+ return { isBot, confidence, method, botName, botCategory };
332
+ }
333
+ function extractAutomationHeaders(headers) {
334
+ return AUTOMATION_HEADER_PATTERNS.filter((h) => h in headers);
335
+ }
336
+
337
+ // src/index.ts
338
+ async function sendToTelemetry(payload, config) {
339
+ if (!config.apiUrl) {
340
+ config.apiUrl = "http://localhost:3001";
341
+ }
342
+ const apiUrl = config.apiUrl || "https://telemetry.yourdomain.com";
343
+ const endpoint = `${apiUrl}/v1/track/server-pageview`;
344
+ if (config.debug) {
345
+ console.log(`[Telemetry] Sending payload to ${endpoint}`, payload);
346
+ }
347
+ try {
348
+ const response = await fetch(endpoint, {
349
+ method: "POST",
350
+ headers: {
351
+ "Content-Type": "application/json",
352
+ // The middleware passes a server secret instead of origin check
353
+ "Authorization": `Bearer ${config.serverSecret}`
354
+ },
355
+ body: JSON.stringify(payload)
356
+ });
357
+ if (!response.ok && config.debug) {
358
+ console.error(`[Telemetry] Failed to send payload: ${response.status} ${response.statusText}`);
359
+ const text = await response.text();
360
+ console.error(`[Telemetry] Response body:`, text);
361
+ }
362
+ } catch (error) {
363
+ if (config.debug) {
364
+ console.error("[Telemetry] Error sending payload:", error);
365
+ }
366
+ }
367
+ }
368
+ // Annotate the CommonJS export names for ESM import in node:
369
+ 0 && (module.exports = {
370
+ AI_BOTS,
371
+ ALL_BOTS,
372
+ GENERIC_BOTS,
373
+ SEARCH_BOTS,
374
+ detectBot,
375
+ extractAutomationHeaders,
376
+ sendToTelemetry
377
+ });
package/dist/index.mjs ADDED
@@ -0,0 +1,344 @@
1
+ // src/bots.ts
2
+ var AI_BOTS = [
3
+ {
4
+ name: "GPTBot",
5
+ category: "ai-crawler",
6
+ pattern: /GPTBot/i,
7
+ verifiable: true
8
+ // https://openai.com/gptbot
9
+ },
10
+ {
11
+ name: "ClaudeBot",
12
+ category: "ai-crawler",
13
+ pattern: /ClaudeBot|anthropic-ai/i,
14
+ verifiable: true
15
+ // Anthropic publishes IP ranges
16
+ },
17
+ {
18
+ name: "Google-Extended",
19
+ category: "ai-crawler",
20
+ pattern: /Google-Extended/i,
21
+ verifiable: true
22
+ // Gemini/Bard training crawler
23
+ },
24
+ {
25
+ name: "PerplexityBot",
26
+ category: "ai-crawler",
27
+ pattern: /PerplexityBot/i,
28
+ verifiable: false
29
+ },
30
+ {
31
+ name: "Amazonbot",
32
+ category: "ai-crawler",
33
+ pattern: /Amazonbot/i,
34
+ verifiable: true
35
+ },
36
+ {
37
+ name: "Meta-ExternalAgent",
38
+ category: "ai-crawler",
39
+ pattern: /Meta-ExternalAgent/i,
40
+ verifiable: false
41
+ },
42
+ {
43
+ name: "Applebot",
44
+ category: "ai-crawler",
45
+ pattern: /Applebot/i,
46
+ verifiable: true
47
+ },
48
+ {
49
+ name: "YouBot",
50
+ category: "ai-crawler",
51
+ pattern: /YouBot/i,
52
+ verifiable: false
53
+ },
54
+ {
55
+ name: "Bytespider",
56
+ category: "ai-crawler",
57
+ pattern: /Bytespider/i,
58
+ // TikTok / ByteDance — used for AI training
59
+ verifiable: false
60
+ },
61
+ {
62
+ name: "CCBot",
63
+ category: "ai-crawler",
64
+ pattern: /CCBot/i,
65
+ // Common Crawl — dataset used by many LLMs
66
+ verifiable: false
67
+ },
68
+ {
69
+ name: "cohere-ai",
70
+ category: "ai-crawler",
71
+ pattern: /cohere-ai/i,
72
+ verifiable: false
73
+ },
74
+ {
75
+ name: "DuckAssistBot",
76
+ category: "ai-crawler",
77
+ pattern: /DuckAssistBot/i,
78
+ verifiable: false
79
+ },
80
+ {
81
+ name: "Diffbot",
82
+ category: "ai-crawler",
83
+ pattern: /Diffbot/i,
84
+ verifiable: false
85
+ },
86
+ {
87
+ name: "Omgili",
88
+ category: "ai-crawler",
89
+ pattern: /Omgili|omgilibot/i,
90
+ verifiable: false
91
+ },
92
+ {
93
+ name: "ImagesiftBot",
94
+ category: "ai-crawler",
95
+ pattern: /ImagesiftBot/i,
96
+ verifiable: false
97
+ },
98
+ {
99
+ name: "Timpibot",
100
+ category: "ai-crawler",
101
+ pattern: /Timpibot/i,
102
+ verifiable: false
103
+ }
104
+ ];
105
+ var SEARCH_BOTS = [
106
+ {
107
+ name: "Googlebot",
108
+ category: "search",
109
+ pattern: /Googlebot/i,
110
+ verifiable: true
111
+ // Reverse DNS: *.googlebot.com / *.google.com
112
+ },
113
+ {
114
+ name: "Bingbot",
115
+ category: "search",
116
+ pattern: /bingbot/i,
117
+ verifiable: true
118
+ // Reverse DNS: *.search.msn.com
119
+ },
120
+ {
121
+ name: "Slurp",
122
+ category: "search",
123
+ pattern: /Slurp/i,
124
+ // Yahoo Search
125
+ verifiable: false
126
+ },
127
+ {
128
+ name: "DuckDuckBot",
129
+ category: "search",
130
+ pattern: /DuckDuckBot/i,
131
+ verifiable: false
132
+ },
133
+ {
134
+ name: "Baiduspider",
135
+ category: "search",
136
+ pattern: /Baiduspider/i,
137
+ verifiable: false
138
+ },
139
+ {
140
+ name: "YandexBot",
141
+ category: "search",
142
+ pattern: /YandexBot/i,
143
+ verifiable: false
144
+ },
145
+ {
146
+ name: "Sogou",
147
+ category: "search",
148
+ pattern: /Sogou/i,
149
+ verifiable: false
150
+ },
151
+ {
152
+ name: "Exabot",
153
+ category: "search",
154
+ pattern: /Exabot/i,
155
+ verifiable: false
156
+ }
157
+ ];
158
+ var GENERIC_BOTS = [
159
+ { name: "curl", category: "scraper", pattern: /^curl\//i, verifiable: false },
160
+ { name: "wget", category: "scraper", pattern: /^Wget\//i, verifiable: false },
161
+ {
162
+ name: "python-requests",
163
+ category: "scraper",
164
+ pattern: /python-requests/i,
165
+ verifiable: false
166
+ },
167
+ {
168
+ name: "Go-http-client",
169
+ category: "scraper",
170
+ pattern: /Go-http-client/i,
171
+ verifiable: false
172
+ },
173
+ {
174
+ name: "axios",
175
+ category: "scraper",
176
+ pattern: /^axios\//i,
177
+ verifiable: false
178
+ },
179
+ {
180
+ name: "node-fetch",
181
+ category: "scraper",
182
+ pattern: /node-fetch/i,
183
+ verifiable: false
184
+ },
185
+ {
186
+ name: "Scrapy",
187
+ category: "scraper",
188
+ pattern: /Scrapy/i,
189
+ verifiable: false
190
+ },
191
+ {
192
+ name: "UptimeRobot",
193
+ category: "monitor",
194
+ pattern: /UptimeRobot/i,
195
+ verifiable: false
196
+ },
197
+ {
198
+ name: "Pingdom",
199
+ category: "monitor",
200
+ pattern: /Pingdom/i,
201
+ verifiable: false
202
+ },
203
+ {
204
+ name: "StatusCake",
205
+ category: "monitor",
206
+ pattern: /StatusCake/i,
207
+ verifiable: false
208
+ },
209
+ {
210
+ name: "DatadogSynthetics",
211
+ category: "monitor",
212
+ pattern: /DatadogSynthetics/i,
213
+ verifiable: false
214
+ }
215
+ ];
216
+ var ALL_BOTS = [
217
+ ...AI_BOTS,
218
+ ...SEARCH_BOTS,
219
+ ...GENERIC_BOTS
220
+ ];
221
+
222
+ // src/detect.ts
223
+ var AUTOMATION_HEADER_PATTERNS = [
224
+ "x-selenium",
225
+ "x-puppeteer",
226
+ "x-playwright",
227
+ "x-cypress",
228
+ "x-automated",
229
+ "x-bot",
230
+ "x-crawler"
231
+ ];
232
+ var DEFINITIVE_BOT_UA_PATTERNS = [
233
+ /bot/i,
234
+ /crawler/i,
235
+ /spider/i,
236
+ /scraper/i,
237
+ /crawl/i,
238
+ /fetch/i,
239
+ /http_request/i,
240
+ /libwww/i,
241
+ /lwp-/i,
242
+ /python/i,
243
+ /ruby/i,
244
+ /java\//i,
245
+ /perl/i,
246
+ /go-http/i,
247
+ /okhttp/i,
248
+ /headless/i,
249
+ /phantom/i,
250
+ /selenium/i,
251
+ /webdriver/i,
252
+ /puppeteer/i,
253
+ /playwright/i
254
+ ];
255
+ function detectBot(req, customBots = []) {
256
+ const ua = req.userAgent || "";
257
+ if (req.automationHeaders.length > 0) {
258
+ return result(true, "certain", "automation-header", null, null);
259
+ }
260
+ if (req.httpVersion === "1.0") {
261
+ return result(true, "high", "http10", null, null);
262
+ }
263
+ const allBots = [
264
+ ...customBots.map((b) => ({
265
+ name: b.name,
266
+ pattern: b.pattern,
267
+ category: b.category ?? "unknown",
268
+ verifiable: false
269
+ })),
270
+ ...ALL_BOTS
271
+ ];
272
+ for (const bot of allBots) {
273
+ if (bot.pattern.test(ua)) {
274
+ return result(true, "certain", "ua-match", bot.name, bot.category);
275
+ }
276
+ }
277
+ if (!ua || ua.length < 10) {
278
+ return result(true, "high", "ua-match", null, "unknown");
279
+ }
280
+ for (const pattern of DEFINITIVE_BOT_UA_PATTERNS) {
281
+ if (pattern.test(ua)) {
282
+ return result(true, "high", "ua-match", null, "unknown");
283
+ }
284
+ }
285
+ const claimsModernBrowser = /Chrome\/([8-9]\d|1\d\d)|Firefox\/([8-9]\d|1\d\d)|Safari\/1[5-9]/i.test(ua);
286
+ if (claimsModernBrowser) {
287
+ const missingSec = !req.secFetchSite;
288
+ const missingLang = !req.acceptLanguage;
289
+ if (missingSec && missingLang) {
290
+ return result(true, "high", "header-anomaly", null, "unknown");
291
+ }
292
+ if (missingSec || missingLang) {
293
+ return result(true, "medium", "header-anomaly", null, "unknown");
294
+ }
295
+ }
296
+ return result(false, "low", null, null, null);
297
+ }
298
+ function result(isBot, confidence, method, botName, botCategory) {
299
+ return { isBot, confidence, method, botName, botCategory };
300
+ }
301
+ function extractAutomationHeaders(headers) {
302
+ return AUTOMATION_HEADER_PATTERNS.filter((h) => h in headers);
303
+ }
304
+
305
+ // src/index.ts
306
+ async function sendToTelemetry(payload, config) {
307
+ if (!config.apiUrl) {
308
+ config.apiUrl = "http://localhost:3001";
309
+ }
310
+ const apiUrl = config.apiUrl || "https://telemetry.yourdomain.com";
311
+ const endpoint = `${apiUrl}/v1/track/server-pageview`;
312
+ if (config.debug) {
313
+ console.log(`[Telemetry] Sending payload to ${endpoint}`, payload);
314
+ }
315
+ try {
316
+ const response = await fetch(endpoint, {
317
+ method: "POST",
318
+ headers: {
319
+ "Content-Type": "application/json",
320
+ // The middleware passes a server secret instead of origin check
321
+ "Authorization": `Bearer ${config.serverSecret}`
322
+ },
323
+ body: JSON.stringify(payload)
324
+ });
325
+ if (!response.ok && config.debug) {
326
+ console.error(`[Telemetry] Failed to send payload: ${response.status} ${response.statusText}`);
327
+ const text = await response.text();
328
+ console.error(`[Telemetry] Response body:`, text);
329
+ }
330
+ } catch (error) {
331
+ if (config.debug) {
332
+ console.error("[Telemetry] Error sending payload:", error);
333
+ }
334
+ }
335
+ }
336
+ export {
337
+ AI_BOTS,
338
+ ALL_BOTS,
339
+ GENERIC_BOTS,
340
+ SEARCH_BOTS,
341
+ detectBot,
342
+ extractAutomationHeaders,
343
+ sendToTelemetry
344
+ };
package/package.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "@adwait12345/telemetry-core",
3
+ "version": "0.1.0",
4
+ "description": "Framework-agnostic core for Telemetry SDK — bot detection and server-side tracking",
5
+ "main": "./dist/index.js",
6
+ "module": "./dist/index.mjs",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.mjs",
11
+ "require": "./dist/index.js",
12
+ "types": "./dist/index.d.ts"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist"
17
+ ],
18
+ "devDependencies": {
19
+ "tsup": "^8.0.0",
20
+ "typescript": "^5.4.0",
21
+ "@types/node": "^20.0.0"
22
+ },
23
+ "scripts": {
24
+ "build": "tsup src/index.ts --format esm,cjs --dts --clean",
25
+ "dev": "tsup src/index.ts --format esm,cjs --dts --watch",
26
+ "typecheck": "tsc --noEmit",
27
+ "clean": "rm -rf dist"
28
+ }
29
+ }