@walkeros/server-transformer-bot 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,113 @@
1
+ # @walkeros/server-transformer-bot
2
+
3
+ Server-side bot and AI-agent detection transformer for walkerOS.
4
+
5
+ Annotates events with `user.botScore` (0-99, higher = more bot), `user.agentScore` (0-99, higher = more AI agent), and optionally `user.agentProduct` (matched UA substring). Never drops events — downstream destination mappings decide policy.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @walkeros/server-transformer-bot
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```typescript
16
+ import { startFlow } from '@walkeros/collector';
17
+ import { transformerBot } from '@walkeros/server-transformer-bot';
18
+
19
+ await startFlow({
20
+ sources: {
21
+ express: {
22
+ package: '@walkeros/server-source-express',
23
+ config: {
24
+ ingest: {
25
+ userAgent: 'req.headers.user-agent',
26
+ },
27
+ },
28
+ },
29
+ },
30
+ transformers: {
31
+ bot: { code: transformerBot },
32
+ },
33
+ destinations: {
34
+ ga4: {
35
+ package: '@walkeros/server-destination-google-ga4',
36
+ before: 'bot',
37
+ // mapping can filter: if (event.user.botScore > 50) drop
38
+ },
39
+ },
40
+ });
41
+ ```
42
+
43
+ After the transformer runs:
44
+
45
+ ```json
46
+ {
47
+ "user": {
48
+ "botScore": 0,
49
+ "agentScore": 0
50
+ }
51
+ }
52
+ ```
53
+
54
+ ## What it detects (v1)
55
+
56
+ | Visitor | botScore | agentScore | agentProduct |
57
+ |---|---|---|---|
58
+ | Real browser (Chrome, Firefox, Safari, Edge) | 0 | 0 | — |
59
+ | Empty / missing User-Agent | 70 | 0 | — |
60
+ | curl / wget / python-requests / well-known crawlers | 80 | 0 | — |
61
+ | AI training crawlers (GPTBot, ClaudeBot, CCBot, Bytespider, etc.) | 95 | 95 | e.g. "GPTBot" |
62
+ | AI search-index crawlers (OAI-SearchBot, Claude-SearchBot, PerplexityBot) | 95 | 95 | e.g. "PerplexityBot" |
63
+ | AI user-action agents (ChatGPT-User, Claude-User, Perplexity-User, etc.) | 90 | 95 | e.g. "ChatGPT-User" |
64
+
65
+ ## Output paths
66
+
67
+ All three outputs are configurable via `settings.output`:
68
+
69
+ | Field | Default path | Notes |
70
+ |---|---|---|
71
+ | `botScore` | `user.botScore` | Set to `"ingest.bot.score"` to route to pipeline scratch instead of the event. Set to `""` to skip writing. |
72
+ | `agentScore` | `user.agentScore` | v1 emits 0 or 95 only. |
73
+ | `agentProduct` | (off) | Set to `"user.agentProduct"` or similar to enable. |
74
+
75
+ ## Destination filtering recipes
76
+
77
+ Drop all bots:
78
+
79
+ ```
80
+ event.user.botScore > 50
81
+ ```
82
+
83
+ Drop crawlers but keep user-action AI traffic:
84
+
85
+ ```
86
+ event.user.botScore > 50 AND event.user.agentProduct NOT LIKE '%-User'
87
+ ```
88
+
89
+ AI traffic report:
90
+
91
+ ```
92
+ event.user.agentScore > 50, grouped by event.user.agentProduct
93
+ ```
94
+
95
+ ## Not in v1 (planned for v1.1+)
96
+
97
+ These signals are deferred. The `settings.input` schema reserves the relevant input field names so adding them in v1.1 will not be a breaking change.
98
+
99
+ - **Header consistency heuristics** — `Sec-Fetch-*` missing on Chromium UAs, `Sec-CH-UA` major version mismatch with UA, missing `Accept-Language`. Requires a structured-headers parser, GREASE filtering, and a captured-headers fixture suite to avoid false positives on WebView, Tor, corporate proxies, and old Safari.
100
+ - **ASN / datacenter-IP detection** — bring-your-own lookup function (the package will stay dependency-free; MaxMind GeoLite ASN's CC-BY-SA license precludes embedding).
101
+ - **Reverse DNS verification** for true `verified-bot` status (e.g. confirming Googlebot is actually Google).
102
+ - **Web-side runtime checks** — `navigator.webdriver`, `userAgentData` from a browser source.
103
+ - **Behavioral signals** (rate, session shape) — needs a store.
104
+ - **TLS / JA4 fingerprinting** — not application-layer reachable; would consume an upstream-injected `ja4` header if provided.
105
+ - **agentScore graduation** — v1 emits 0 or 95. v1.1 will use intermediate values (e.g. 70 for unverified UA claim, 99 for IP-reverse-DNS verified).
106
+
107
+ ## Limits
108
+
109
+ Will not catch: residential-proxy + stealth-patched Chrome + realistic behavior; paid CAPTCHA-solver farms (2Captcha residential, etc.); real-browser-as-a-service providers (Bright Data, ScrapingBee, Browserbase, Hyperbrowser, Browserless). For that threat model use Cloudflare Bot Management, DataDome, or HUMAN.
110
+
111
+ ## License
112
+
113
+ MIT
package/dist/dev.d.mts ADDED
@@ -0,0 +1,107 @@
1
+ import * as _walkeros_core_dev from '@walkeros/core/dev';
2
+ import { z } from '@walkeros/core/dev';
3
+ import { Flow, Hint } from '@walkeros/core';
4
+
5
+ declare const SettingsSchema: z.ZodObject<{
6
+ input: z.ZodOptional<z.ZodObject<{
7
+ userAgent: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
8
+ key: z.ZodOptional<z.ZodString>;
9
+ value: z.ZodOptional<z.ZodUnknown>;
10
+ fn: z.ZodOptional<z.ZodString>;
11
+ }, z.core.$strip>]>>;
12
+ ip: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
13
+ key: z.ZodOptional<z.ZodString>;
14
+ value: z.ZodOptional<z.ZodUnknown>;
15
+ fn: z.ZodOptional<z.ZodString>;
16
+ }, z.core.$strip>]>>;
17
+ acceptLanguage: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
18
+ key: z.ZodOptional<z.ZodString>;
19
+ value: z.ZodOptional<z.ZodUnknown>;
20
+ fn: z.ZodOptional<z.ZodString>;
21
+ }, z.core.$strip>]>>;
22
+ acceptEncoding: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
23
+ key: z.ZodOptional<z.ZodString>;
24
+ value: z.ZodOptional<z.ZodUnknown>;
25
+ fn: z.ZodOptional<z.ZodString>;
26
+ }, z.core.$strip>]>>;
27
+ secFetchSite: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
28
+ key: z.ZodOptional<z.ZodString>;
29
+ value: z.ZodOptional<z.ZodUnknown>;
30
+ fn: z.ZodOptional<z.ZodString>;
31
+ }, z.core.$strip>]>>;
32
+ secFetchMode: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
33
+ key: z.ZodOptional<z.ZodString>;
34
+ value: z.ZodOptional<z.ZodUnknown>;
35
+ fn: z.ZodOptional<z.ZodString>;
36
+ }, z.core.$strip>]>>;
37
+ secFetchDest: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
38
+ key: z.ZodOptional<z.ZodString>;
39
+ value: z.ZodOptional<z.ZodUnknown>;
40
+ fn: z.ZodOptional<z.ZodString>;
41
+ }, z.core.$strip>]>>;
42
+ secFetchUser: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
43
+ key: z.ZodOptional<z.ZodString>;
44
+ value: z.ZodOptional<z.ZodUnknown>;
45
+ fn: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>]>>;
47
+ secChUa: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
48
+ key: z.ZodOptional<z.ZodString>;
49
+ value: z.ZodOptional<z.ZodUnknown>;
50
+ fn: z.ZodOptional<z.ZodString>;
51
+ }, z.core.$strip>]>>;
52
+ secChUaMobile: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
53
+ key: z.ZodOptional<z.ZodString>;
54
+ value: z.ZodOptional<z.ZodUnknown>;
55
+ fn: z.ZodOptional<z.ZodString>;
56
+ }, z.core.$strip>]>>;
57
+ secChUaPlatform: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
58
+ key: z.ZodOptional<z.ZodString>;
59
+ value: z.ZodOptional<z.ZodUnknown>;
60
+ fn: z.ZodOptional<z.ZodString>;
61
+ }, z.core.$strip>]>>;
62
+ }, z.core.$strip>>;
63
+ output: z.ZodOptional<z.ZodObject<{
64
+ botScore: z.ZodOptional<z.ZodString>;
65
+ agentScore: z.ZodOptional<z.ZodString>;
66
+ agentProduct: z.ZodOptional<z.ZodString>;
67
+ }, z.core.$strip>>;
68
+ }, z.core.$strip>;
69
+ type Settings = z.infer<typeof SettingsSchema>;
70
+
71
+ declare const settings: _walkeros_core_dev.JSONSchema;
72
+
73
+ type index$1_Settings = Settings;
74
+ declare const index$1_SettingsSchema: typeof SettingsSchema;
75
+ declare const index$1_settings: typeof settings;
76
+ declare namespace index$1 {
77
+ export { type index$1_Settings as Settings, index$1_SettingsSchema as SettingsSchema, index$1_settings as settings };
78
+ }
79
+
80
+ /** Real Chrome — botScore 0, agentScore 0. */
81
+ declare const humanChrome: Flow.StepExample;
82
+ /** GPTBot — training crawler. */
83
+ declare const gptBotCrawler: Flow.StepExample;
84
+ /** ChatGPT-User — user-action AI agent. */
85
+ declare const chatgptUserAgent: Flow.StepExample;
86
+ /** curl — caught by isbot. */
87
+ declare const curlClient: Flow.StepExample;
88
+ /** Empty / missing UA — score 70 (suspicious; real browsers rarely strip UA). */
89
+ declare const missingUA: Flow.StepExample;
90
+
91
+ declare const step_chatgptUserAgent: typeof chatgptUserAgent;
92
+ declare const step_curlClient: typeof curlClient;
93
+ declare const step_gptBotCrawler: typeof gptBotCrawler;
94
+ declare const step_humanChrome: typeof humanChrome;
95
+ declare const step_missingUA: typeof missingUA;
96
+ declare namespace step {
97
+ export { step_chatgptUserAgent as chatgptUserAgent, step_curlClient as curlClient, step_gptBotCrawler as gptBotCrawler, step_humanChrome as humanChrome, step_missingUA as missingUA };
98
+ }
99
+
100
+ declare const index_step: typeof step;
101
+ declare namespace index {
102
+ export { index_step as step };
103
+ }
104
+
105
+ declare const hints: Hint.Hints;
106
+
107
+ export { index as examples, hints, index$1 as schemas };
package/dist/dev.d.ts ADDED
@@ -0,0 +1,107 @@
1
+ import * as _walkeros_core_dev from '@walkeros/core/dev';
2
+ import { z } from '@walkeros/core/dev';
3
+ import { Flow, Hint } from '@walkeros/core';
4
+
5
+ declare const SettingsSchema: z.ZodObject<{
6
+ input: z.ZodOptional<z.ZodObject<{
7
+ userAgent: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
8
+ key: z.ZodOptional<z.ZodString>;
9
+ value: z.ZodOptional<z.ZodUnknown>;
10
+ fn: z.ZodOptional<z.ZodString>;
11
+ }, z.core.$strip>]>>;
12
+ ip: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
13
+ key: z.ZodOptional<z.ZodString>;
14
+ value: z.ZodOptional<z.ZodUnknown>;
15
+ fn: z.ZodOptional<z.ZodString>;
16
+ }, z.core.$strip>]>>;
17
+ acceptLanguage: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
18
+ key: z.ZodOptional<z.ZodString>;
19
+ value: z.ZodOptional<z.ZodUnknown>;
20
+ fn: z.ZodOptional<z.ZodString>;
21
+ }, z.core.$strip>]>>;
22
+ acceptEncoding: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
23
+ key: z.ZodOptional<z.ZodString>;
24
+ value: z.ZodOptional<z.ZodUnknown>;
25
+ fn: z.ZodOptional<z.ZodString>;
26
+ }, z.core.$strip>]>>;
27
+ secFetchSite: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
28
+ key: z.ZodOptional<z.ZodString>;
29
+ value: z.ZodOptional<z.ZodUnknown>;
30
+ fn: z.ZodOptional<z.ZodString>;
31
+ }, z.core.$strip>]>>;
32
+ secFetchMode: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
33
+ key: z.ZodOptional<z.ZodString>;
34
+ value: z.ZodOptional<z.ZodUnknown>;
35
+ fn: z.ZodOptional<z.ZodString>;
36
+ }, z.core.$strip>]>>;
37
+ secFetchDest: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
38
+ key: z.ZodOptional<z.ZodString>;
39
+ value: z.ZodOptional<z.ZodUnknown>;
40
+ fn: z.ZodOptional<z.ZodString>;
41
+ }, z.core.$strip>]>>;
42
+ secFetchUser: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
43
+ key: z.ZodOptional<z.ZodString>;
44
+ value: z.ZodOptional<z.ZodUnknown>;
45
+ fn: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>]>>;
47
+ secChUa: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
48
+ key: z.ZodOptional<z.ZodString>;
49
+ value: z.ZodOptional<z.ZodUnknown>;
50
+ fn: z.ZodOptional<z.ZodString>;
51
+ }, z.core.$strip>]>>;
52
+ secChUaMobile: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
53
+ key: z.ZodOptional<z.ZodString>;
54
+ value: z.ZodOptional<z.ZodUnknown>;
55
+ fn: z.ZodOptional<z.ZodString>;
56
+ }, z.core.$strip>]>>;
57
+ secChUaPlatform: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
58
+ key: z.ZodOptional<z.ZodString>;
59
+ value: z.ZodOptional<z.ZodUnknown>;
60
+ fn: z.ZodOptional<z.ZodString>;
61
+ }, z.core.$strip>]>>;
62
+ }, z.core.$strip>>;
63
+ output: z.ZodOptional<z.ZodObject<{
64
+ botScore: z.ZodOptional<z.ZodString>;
65
+ agentScore: z.ZodOptional<z.ZodString>;
66
+ agentProduct: z.ZodOptional<z.ZodString>;
67
+ }, z.core.$strip>>;
68
+ }, z.core.$strip>;
69
+ type Settings = z.infer<typeof SettingsSchema>;
70
+
71
+ declare const settings: _walkeros_core_dev.JSONSchema;
72
+
73
+ type index$1_Settings = Settings;
74
+ declare const index$1_SettingsSchema: typeof SettingsSchema;
75
+ declare const index$1_settings: typeof settings;
76
+ declare namespace index$1 {
77
+ export { type index$1_Settings as Settings, index$1_SettingsSchema as SettingsSchema, index$1_settings as settings };
78
+ }
79
+
80
+ /** Real Chrome — botScore 0, agentScore 0. */
81
+ declare const humanChrome: Flow.StepExample;
82
+ /** GPTBot — training crawler. */
83
+ declare const gptBotCrawler: Flow.StepExample;
84
+ /** ChatGPT-User — user-action AI agent. */
85
+ declare const chatgptUserAgent: Flow.StepExample;
86
+ /** curl — caught by isbot. */
87
+ declare const curlClient: Flow.StepExample;
88
+ /** Empty / missing UA — score 70 (suspicious; real browsers rarely strip UA). */
89
+ declare const missingUA: Flow.StepExample;
90
+
91
+ declare const step_chatgptUserAgent: typeof chatgptUserAgent;
92
+ declare const step_curlClient: typeof curlClient;
93
+ declare const step_gptBotCrawler: typeof gptBotCrawler;
94
+ declare const step_humanChrome: typeof humanChrome;
95
+ declare const step_missingUA: typeof missingUA;
96
+ declare namespace step {
97
+ export { step_chatgptUserAgent as chatgptUserAgent, step_curlClient as curlClient, step_gptBotCrawler as gptBotCrawler, step_humanChrome as humanChrome, step_missingUA as missingUA };
98
+ }
99
+
100
+ declare const index_step: typeof step;
101
+ declare namespace index {
102
+ export { index_step as step };
103
+ }
104
+
105
+ declare const hints: Hint.Hints;
106
+
107
+ export { index as examples, hints, index$1 as schemas };
package/dist/dev.js ADDED
@@ -0,0 +1 @@
1
+ "use strict";var e,t=Object.defineProperty,r=Object.getOwnPropertyDescriptor,o=Object.getOwnPropertyNames,n=Object.prototype.hasOwnProperty,i=(e,r)=>{for(var o in r)t(e,o,{get:r[o],enumerable:!0})},a={};i(a,{examples:()=>d,hints:()=>w,schemas:()=>s}),module.exports=(e=a,((e,i,a,s)=>{if(i&&"object"==typeof i||"function"==typeof i)for(let c of o(i))n.call(e,c)||c===a||t(e,c,{get:()=>i[c],enumerable:!(s=r(i,c))||s.enumerable});return e})(t({},"__esModule",{value:!0}),e));var s={};i(s,{SettingsSchema:()=>p,settings:()=>u});var c=require("@walkeros/core/dev"),l=require("@walkeros/core/dev"),g=l.z.union([l.z.string().describe('Dot-notation path like "ingest.userAgent"'),l.z.object({key:l.z.string().optional(),value:l.z.unknown().optional(),fn:l.z.string().optional()}).describe("Mapping value object")]),p=l.z.object({input:l.z.object({userAgent:g.optional(),ip:g.optional(),acceptLanguage:g.optional(),acceptEncoding:g.optional(),secFetchSite:g.optional(),secFetchMode:g.optional(),secFetchDest:g.optional(),secFetchUser:g.optional(),secChUa:g.optional(),secChUaMobile:g.optional(),secChUaPlatform:g.optional()}).optional().describe("Input signal sources, resolved via getMappingValue against { event, ingest }. v1 only reads userAgent; other fields reserved for v1.1 header heuristics."),output:l.z.object({botScore:l.z.string().optional().describe('Path for bot score (0-99, higher = more bot). Default: "user.botScore". Use "ingest.*" to route to pipeline scratch instead of the event. Empty string or omit = skip.'),agentScore:l.z.string().optional().describe('Path for AI agent score (0-99). v1 emits 0 (no match) or 95 (UA-map match). Default: "user.agentScore".'),agentProduct:l.z.string().optional().describe('Path for matched UA substring (e.g. "ChatGPT-User"). Off by default — set to enable.')}).optional().describe("Output paths for bot/agent annotations.")}).describe("Bot detection transformer: annotates events with bot and AI-agent scores."),u=(0,c.zodToSchema)(p),d={};i(d,{step:()=>h});var h={};i(h,{chatgptUserAgent:()=>f,curlClient:()=>A,gptBotCrawler:()=>v,humanChrome:()=>m,missingUA:()=>S});var b={name:"page view",data:{title:"Home",id:"/"},id:"ev-1700000600",trigger:"load",entity:"page",action:"view",timestamp:1700000600,source:{type:"express",platform:"server"}},m={title:"Human visitor (Chrome)",description:"Modern Chrome UA. No bot or agent signals.",in:{...b},out:[["return",{event:{...b,user:{botScore:0,agentScore:0}}}]]},v={title:"GPTBot training crawler",description:"OpenAI training crawler. Both botScore and agentScore are high.",in:{...b,id:"ev-1700000601"},out:[["return",{event:{...b,id:"ev-1700000601",user:{botScore:95,agentScore:95}}}]]},f={title:"ChatGPT-User (user-action AI)",description:"A real human routed an AI to fetch this page. botScore high but lower than crawlers — agentProduct lets destinations keep this traffic.",in:{...b,id:"ev-1700000602"},out:[["return",{event:{...b,id:"ev-1700000602",user:{botScore:90,agentScore:95}}}]]},A={public:!1,description:"curl client — caught by isbot. agentScore zero.",in:{...b,id:"ev-1700000603"},out:[["return",{event:{...b,id:"ev-1700000603",user:{botScore:80,agentScore:0}}}]]},S={public:!1,description:"No User-Agent — baseline 70 (UA stripping is overwhelmingly bots or hardened privacy tools).",in:{...b,id:"ev-1700000604"},out:[["return",{event:{...b,id:"ev-1700000604",user:{botScore:70,agentScore:0}}}]]},w={"ingest-prerequisite":{text:'The bot transformer reads userAgent from ctx.ingest (path "ingest.userAgent" by default). The upstream server source must populate it via config.ingest mapping, otherwise the UA is empty and every event scores 70 (baseline for missing UA).',code:[{lang:"json",code:JSON.stringify({sources:{express:{package:"@walkeros/server-source-express",config:{ingest:{userAgent:"req.headers.user-agent"}}}},transformers:{bot:{package:"@walkeros/server-transformer-bot"}}},null,2)}]},"output-routing":{text:"Outputs default to event.user.botScore and event.user.agentScore. Redirect to ingest.* to keep the analytics event clean while still routing on the score downstream. Empty string (or omit) skips writing that field entirely. agentProduct is off by default — set it to enable writing the matched UA substring.",code:[{lang:"json",code:JSON.stringify({transformers:{bot:{package:"@walkeros/server-transformer-bot",config:{settings:{output:{botScore:"ingest.bot.score",agentScore:"ingest.bot.agent",agentProduct:"user.agentProduct"}}}}}},null,2)}]},"destination-filtering":{text:'Recommended destination-mapping recipes. Drop all bots: botScore > 50. Drop crawlers but keep user-action AI traffic: botScore > 50 AND agentProduct NOT LIKE "%-User". AI traffic report: group by agentProduct WHERE agentScore > 50. The transformer never drops events — filtering is always a destination decision.'},"detection-scope":{text:"v1 is UA-only: wraps isbot (curl, wget, headless Chrome defaults, well-known crawlers) plus a curated AI-agent UA map (OpenAI, Anthropic, Perplexity, Mistral, Meta, Google, Apple, Amazon, DuckDuckGo, ByteDance, Common Crawl). It will NOT catch: residential-proxy + stealth Chrome + realistic behavior; reverse-DNS-verified search engines; client-side runtime tells. v1.1 adds header consistency heuristics (Sec-Fetch, Sec-CH-UA, Accept-Language) with proper GREASE handling. For commercial-grade detection use Cloudflare Bot Management, DataDome, or HUMAN."}};//# sourceMappingURL=dev.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/dev.ts","../src/schemas/index.ts","../src/schemas/settings.ts","../src/examples/index.ts","../src/examples/step.ts","../src/hints.ts"],"sourcesContent":["export * as schemas from './schemas';\nexport * as examples from './examples';\nexport { hints } from './hints';\n","import { zodToSchema } from '@walkeros/core/dev';\nimport { SettingsSchema } from './settings';\n\nexport { SettingsSchema, type Settings } from './settings';\nexport const settings = zodToSchema(SettingsSchema);\n","import { z } from '@walkeros/core/dev';\n\nconst MappingValueSchema = z.union([\n z.string().describe('Dot-notation path like \"ingest.userAgent\"'),\n z\n .object({\n key: z.string().optional(),\n value: z.unknown().optional(),\n fn: z.string().optional(),\n })\n .describe('Mapping value object'),\n]);\n\nexport const SettingsSchema = z\n .object({\n input: z\n .object({\n userAgent: MappingValueSchema.optional(),\n ip: MappingValueSchema.optional(),\n acceptLanguage: MappingValueSchema.optional(),\n acceptEncoding: MappingValueSchema.optional(),\n secFetchSite: MappingValueSchema.optional(),\n secFetchMode: MappingValueSchema.optional(),\n secFetchDest: MappingValueSchema.optional(),\n secFetchUser: MappingValueSchema.optional(),\n secChUa: MappingValueSchema.optional(),\n secChUaMobile: MappingValueSchema.optional(),\n secChUaPlatform: MappingValueSchema.optional(),\n })\n .optional()\n .describe(\n 'Input signal sources, resolved via getMappingValue against { event, ingest }. v1 only reads userAgent; other fields reserved for v1.1 header heuristics.',\n ),\n output: z\n .object({\n botScore: z\n .string()\n .optional()\n .describe(\n 'Path for bot score (0-99, higher = more bot). Default: \"user.botScore\". Use \"ingest.*\" to route to pipeline scratch instead of the event. Empty string or omit = skip.',\n ),\n agentScore: z\n .string()\n .optional()\n .describe(\n 'Path for AI agent score (0-99). v1 emits 0 (no match) or 95 (UA-map match). Default: \"user.agentScore\".',\n ),\n agentProduct: z\n .string()\n .optional()\n .describe(\n 'Path for matched UA substring (e.g. \"ChatGPT-User\"). Off by default — set to enable.',\n ),\n })\n .optional()\n .describe('Output paths for bot/agent annotations.'),\n })\n .describe(\n 'Bot detection transformer: annotates events with bot and AI-agent scores.',\n );\n\nexport type Settings = z.infer<typeof SettingsSchema>;\n","export * as step from './step';\n","import type { Flow } from '@walkeros/core';\n\nconst baseEvent = {\n name: 'page view',\n data: { title: 'Home', id: '/' },\n id: 'ev-1700000600',\n trigger: 'load',\n entity: 'page',\n action: 'view',\n timestamp: 1700000600,\n source: { type: 'express', platform: 'server' as const },\n};\n\n/** Real Chrome — botScore 0, agentScore 0. */\nexport const humanChrome: Flow.StepExample = {\n title: 'Human visitor (Chrome)',\n description: 'Modern Chrome UA. No bot or agent signals.',\n in: { ...baseEvent },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n user: { botScore: 0, agentScore: 0 },\n },\n },\n ],\n ],\n};\n\n/** GPTBot — training crawler. */\nexport const gptBotCrawler: Flow.StepExample = {\n title: 'GPTBot training crawler',\n description:\n 'OpenAI training crawler. Both botScore and agentScore are high.',\n in: { ...baseEvent, id: 'ev-1700000601' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000601',\n user: { botScore: 95, agentScore: 95 },\n },\n },\n ],\n ],\n};\n\n/** ChatGPT-User — user-action AI agent. */\nexport const chatgptUserAgent: Flow.StepExample = {\n title: 'ChatGPT-User (user-action AI)',\n description:\n 'A real human routed an AI to fetch this page. botScore high but lower than crawlers — agentProduct lets destinations keep this traffic.',\n in: { ...baseEvent, id: 'ev-1700000602' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000602',\n user: { botScore: 90, agentScore: 95 },\n },\n },\n ],\n ],\n};\n\n/** curl — caught by isbot. */\nexport const curlClient: Flow.StepExample = {\n public: false,\n description: 'curl client — caught by isbot. agentScore zero.',\n in: { ...baseEvent, id: 'ev-1700000603' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000603',\n user: { botScore: 80, agentScore: 0 },\n },\n },\n ],\n ],\n};\n\n/** Empty / missing UA — score 70 (suspicious; real browsers rarely strip UA). */\nexport const missingUA: Flow.StepExample = {\n public: false,\n description:\n 'No User-Agent — baseline 70 (UA stripping is overwhelmingly bots or hardened privacy tools).',\n in: { ...baseEvent, id: 'ev-1700000604' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000604',\n user: { botScore: 70, agentScore: 0 },\n },\n },\n ],\n ],\n};\n","import type { Hint } from '@walkeros/core';\n\nexport const hints: Hint.Hints = {\n 'ingest-prerequisite': {\n text: 'The bot transformer reads userAgent from ctx.ingest (path \"ingest.userAgent\" by default). The upstream server source must populate it via config.ingest mapping, otherwise the UA is empty and every event scores 70 (baseline for missing UA).',\n code: [\n {\n lang: 'json',\n code: JSON.stringify(\n {\n sources: {\n express: {\n package: '@walkeros/server-source-express',\n config: {\n ingest: {\n userAgent: 'req.headers.user-agent',\n },\n },\n },\n },\n transformers: {\n bot: {\n package: '@walkeros/server-transformer-bot',\n },\n },\n },\n null,\n 2,\n ),\n },\n ],\n },\n 'output-routing': {\n text: 'Outputs default to event.user.botScore and event.user.agentScore. Redirect to ingest.* to keep the analytics event clean while still routing on the score downstream. Empty string (or omit) skips writing that field entirely. agentProduct is off by default — set it to enable writing the matched UA substring.',\n code: [\n {\n lang: 'json',\n code: JSON.stringify(\n {\n transformers: {\n bot: {\n package: '@walkeros/server-transformer-bot',\n config: {\n settings: {\n output: {\n botScore: 'ingest.bot.score',\n agentScore: 'ingest.bot.agent',\n agentProduct: 'user.agentProduct',\n },\n },\n },\n },\n },\n },\n null,\n 2,\n ),\n },\n ],\n },\n 'destination-filtering': {\n text: 'Recommended destination-mapping recipes. Drop all bots: botScore > 50. Drop crawlers but keep user-action AI traffic: botScore > 50 AND agentProduct NOT LIKE \"%-User\". AI traffic report: group by agentProduct WHERE agentScore > 50. The transformer never drops events — filtering is always a destination decision.',\n },\n 'detection-scope': {\n text: 'v1 is UA-only: wraps isbot (curl, wget, headless Chrome defaults, well-known crawlers) plus a curated AI-agent UA map (OpenAI, Anthropic, Perplexity, Mistral, Meta, Google, Apple, Amazon, DuckDuckGo, ByteDance, Common Crawl). It will NOT catch: residential-proxy + stealth Chrome + realistic behavior; reverse-DNS-verified search engines; client-side runtime tells. v1.1 adds header consistency heuristics (Sec-Fetch, Sec-CH-UA, Accept-Language) with proper GREASE handling. For commercial-grade detection use Cloudflare Bot Management, DataDome, or HUMAN.',\n },\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAAAA,cAA4B;;;ACA5B,iBAAkB;AAElB,IAAM,qBAAqB,aAAE,MAAM;AAAA,EACjC,aAAE,OAAO,EAAE,SAAS,2CAA2C;AAAA,EAC/D,aACG,OAAO;AAAA,IACN,KAAK,aAAE,OAAO,EAAE,SAAS;AAAA,IACzB,OAAO,aAAE,QAAQ,EAAE,SAAS;AAAA,IAC5B,IAAI,aAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,CAAC,EACA,SAAS,sBAAsB;AACpC,CAAC;AAEM,IAAM,iBAAiB,aAC3B,OAAO;AAAA,EACN,OAAO,aACJ,OAAO;AAAA,IACN,WAAW,mBAAmB,SAAS;AAAA,IACvC,IAAI,mBAAmB,SAAS;AAAA,IAChC,gBAAgB,mBAAmB,SAAS;AAAA,IAC5C,gBAAgB,mBAAmB,SAAS;AAAA,IAC5C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,SAAS,mBAAmB,SAAS;AAAA,IACrC,eAAe,mBAAmB,SAAS;AAAA,IAC3C,iBAAiB,mBAAmB,SAAS;AAAA,EAC/C,CAAC,EACA,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,QAAQ,aACL,OAAO;AAAA,IACN,UAAU,aACP,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,IACF,YAAY,aACT,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,IACF,cAAc,aACX,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,EACJ,CAAC,EACA,SAAS,EACT,SAAS,yCAAyC;AACvD,CAAC,EACA;AAAA,EACC;AACF;;;ADvDK,IAAM,eAAW,yBAAY,cAAc;;;AEJlD;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAEA,IAAM,YAAY;AAAA,EAChB,MAAM;AAAA,EACN,MAAM,EAAE,OAAO,QAAQ,IAAI,IAAI;AAAA,EAC/B,IAAI;AAAA,EACJ,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ,EAAE,MAAM,WAAW,UAAU,SAAkB;AACzD;AAGO,IAAM,cAAgC;AAAA,EAC3C,OAAO;AAAA,EACP,aAAa;AAAA,EACb,IAAI,EAAE,GAAG,UAAU;AAAA,EACnB,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,MAAM,EAAE,UAAU,GAAG,YAAY,EAAE;AAAA,QACrC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,gBAAkC;AAAA,EAC7C,OAAO;AAAA,EACP,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,GAAG;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,mBAAqC;AAAA,EAChD,OAAO;AAAA,EACP,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,GAAG;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,aAA+B;AAAA,EAC1C,QAAQ;AAAA,EACR,aAAa;AAAA,EACb,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,EAAE;AAAA,QACtC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,YAA8B;AAAA,EACzC,QAAQ;AAAA,EACR,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,EAAE;AAAA,QACtC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;AC1GO,IAAM,QAAoB;AAAA,EAC/B,uBAAuB;AAAA,IACrB,MAAM;AAAA,IACN,MAAM;AAAA,MACJ;AAAA,QACE,MAAM;AAAA,QACN,MAAM,KAAK;AAAA,UACT;AAAA,YACE,SAAS;AAAA,cACP,SAAS;AAAA,gBACP,SAAS;AAAA,gBACT,QAAQ;AAAA,kBACN,QAAQ;AAAA,oBACN,WAAW;AAAA,kBACb;AAAA,gBACF;AAAA,cACF;AAAA,YACF;AAAA,YACA,cAAc;AAAA,cACZ,KAAK;AAAA,gBACH,SAAS;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EACA,kBAAkB;AAAA,IAChB,MAAM;AAAA,IACN,MAAM;AAAA,MACJ;AAAA,QACE,MAAM;AAAA,QACN,MAAM,KAAK;AAAA,UACT;AAAA,YACE,cAAc;AAAA,cACZ,KAAK;AAAA,gBACH,SAAS;AAAA,gBACT,QAAQ;AAAA,kBACN,UAAU;AAAA,oBACR,QAAQ;AAAA,sBACN,UAAU;AAAA,sBACV,YAAY;AAAA,sBACZ,cAAc;AAAA,oBAChB;AAAA,kBACF;AAAA,gBACF;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EACA,yBAAyB;AAAA,IACvB,MAAM;AAAA,EACR;AAAA,EACA,mBAAmB;AAAA,IACjB,MAAM;AAAA,EACR;AACF;","names":["import_dev"]}
package/dist/dev.mjs ADDED
@@ -0,0 +1 @@
1
+ var e=Object.defineProperty,t=(t,r)=>{for(var o in r)e(t,o,{get:r[o],enumerable:!0})},r={};t(r,{SettingsSchema:()=>a,settings:()=>s});import{zodToSchema as o}from"@walkeros/core/dev";import{z as n}from"@walkeros/core/dev";var i=n.union([n.string().describe('Dot-notation path like "ingest.userAgent"'),n.object({key:n.string().optional(),value:n.unknown().optional(),fn:n.string().optional()}).describe("Mapping value object")]),a=n.object({input:n.object({userAgent:i.optional(),ip:i.optional(),acceptLanguage:i.optional(),acceptEncoding:i.optional(),secFetchSite:i.optional(),secFetchMode:i.optional(),secFetchDest:i.optional(),secFetchUser:i.optional(),secChUa:i.optional(),secChUaMobile:i.optional(),secChUaPlatform:i.optional()}).optional().describe("Input signal sources, resolved via getMappingValue against { event, ingest }. v1 only reads userAgent; other fields reserved for v1.1 header heuristics."),output:n.object({botScore:n.string().optional().describe('Path for bot score (0-99, higher = more bot). Default: "user.botScore". Use "ingest.*" to route to pipeline scratch instead of the event. Empty string or omit = skip.'),agentScore:n.string().optional().describe('Path for AI agent score (0-99). v1 emits 0 (no match) or 95 (UA-map match). Default: "user.agentScore".'),agentProduct:n.string().optional().describe('Path for matched UA substring (e.g. "ChatGPT-User"). Off by default — set to enable.')}).optional().describe("Output paths for bot/agent annotations.")}).describe("Bot detection transformer: annotates events with bot and AI-agent scores."),s=o(a),c={};t(c,{step:()=>l});var l={};t(l,{chatgptUserAgent:()=>d,curlClient:()=>h,gptBotCrawler:()=>u,humanChrome:()=>p,missingUA:()=>b});var g={name:"page view",data:{title:"Home",id:"/"},id:"ev-1700000600",trigger:"load",entity:"page",action:"view",timestamp:1700000600,source:{type:"express",platform:"server"}},p={title:"Human visitor (Chrome)",description:"Modern Chrome UA. No bot or agent signals.",in:{...g},out:[["return",{event:{...g,user:{botScore:0,agentScore:0}}}]]},u={title:"GPTBot training crawler",description:"OpenAI training crawler. Both botScore and agentScore are high.",in:{...g,id:"ev-1700000601"},out:[["return",{event:{...g,id:"ev-1700000601",user:{botScore:95,agentScore:95}}}]]},d={title:"ChatGPT-User (user-action AI)",description:"A real human routed an AI to fetch this page. botScore high but lower than crawlers — agentProduct lets destinations keep this traffic.",in:{...g,id:"ev-1700000602"},out:[["return",{event:{...g,id:"ev-1700000602",user:{botScore:90,agentScore:95}}}]]},h={public:!1,description:"curl client — caught by isbot. agentScore zero.",in:{...g,id:"ev-1700000603"},out:[["return",{event:{...g,id:"ev-1700000603",user:{botScore:80,agentScore:0}}}]]},b={public:!1,description:"No User-Agent — baseline 70 (UA stripping is overwhelmingly bots or hardened privacy tools).",in:{...g,id:"ev-1700000604"},out:[["return",{event:{...g,id:"ev-1700000604",user:{botScore:70,agentScore:0}}}]]},m={"ingest-prerequisite":{text:'The bot transformer reads userAgent from ctx.ingest (path "ingest.userAgent" by default). The upstream server source must populate it via config.ingest mapping, otherwise the UA is empty and every event scores 70 (baseline for missing UA).',code:[{lang:"json",code:JSON.stringify({sources:{express:{package:"@walkeros/server-source-express",config:{ingest:{userAgent:"req.headers.user-agent"}}}},transformers:{bot:{package:"@walkeros/server-transformer-bot"}}},null,2)}]},"output-routing":{text:"Outputs default to event.user.botScore and event.user.agentScore. Redirect to ingest.* to keep the analytics event clean while still routing on the score downstream. Empty string (or omit) skips writing that field entirely. agentProduct is off by default — set it to enable writing the matched UA substring.",code:[{lang:"json",code:JSON.stringify({transformers:{bot:{package:"@walkeros/server-transformer-bot",config:{settings:{output:{botScore:"ingest.bot.score",agentScore:"ingest.bot.agent",agentProduct:"user.agentProduct"}}}}}},null,2)}]},"destination-filtering":{text:'Recommended destination-mapping recipes. Drop all bots: botScore > 50. Drop crawlers but keep user-action AI traffic: botScore > 50 AND agentProduct NOT LIKE "%-User". AI traffic report: group by agentProduct WHERE agentScore > 50. The transformer never drops events — filtering is always a destination decision.'},"detection-scope":{text:"v1 is UA-only: wraps isbot (curl, wget, headless Chrome defaults, well-known crawlers) plus a curated AI-agent UA map (OpenAI, Anthropic, Perplexity, Mistral, Meta, Google, Apple, Amazon, DuckDuckGo, ByteDance, Common Crawl). It will NOT catch: residential-proxy + stealth Chrome + realistic behavior; reverse-DNS-verified search engines; client-side runtime tells. v1.1 adds header consistency heuristics (Sec-Fetch, Sec-CH-UA, Accept-Language) with proper GREASE handling. For commercial-grade detection use Cloudflare Bot Management, DataDome, or HUMAN."}};export{c as examples,m as hints,r as schemas};//# sourceMappingURL=dev.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/schemas/index.ts","../src/schemas/settings.ts","../src/examples/index.ts","../src/examples/step.ts","../src/hints.ts"],"sourcesContent":["import { zodToSchema } from '@walkeros/core/dev';\nimport { SettingsSchema } from './settings';\n\nexport { SettingsSchema, type Settings } from './settings';\nexport const settings = zodToSchema(SettingsSchema);\n","import { z } from '@walkeros/core/dev';\n\nconst MappingValueSchema = z.union([\n z.string().describe('Dot-notation path like \"ingest.userAgent\"'),\n z\n .object({\n key: z.string().optional(),\n value: z.unknown().optional(),\n fn: z.string().optional(),\n })\n .describe('Mapping value object'),\n]);\n\nexport const SettingsSchema = z\n .object({\n input: z\n .object({\n userAgent: MappingValueSchema.optional(),\n ip: MappingValueSchema.optional(),\n acceptLanguage: MappingValueSchema.optional(),\n acceptEncoding: MappingValueSchema.optional(),\n secFetchSite: MappingValueSchema.optional(),\n secFetchMode: MappingValueSchema.optional(),\n secFetchDest: MappingValueSchema.optional(),\n secFetchUser: MappingValueSchema.optional(),\n secChUa: MappingValueSchema.optional(),\n secChUaMobile: MappingValueSchema.optional(),\n secChUaPlatform: MappingValueSchema.optional(),\n })\n .optional()\n .describe(\n 'Input signal sources, resolved via getMappingValue against { event, ingest }. v1 only reads userAgent; other fields reserved for v1.1 header heuristics.',\n ),\n output: z\n .object({\n botScore: z\n .string()\n .optional()\n .describe(\n 'Path for bot score (0-99, higher = more bot). Default: \"user.botScore\". Use \"ingest.*\" to route to pipeline scratch instead of the event. Empty string or omit = skip.',\n ),\n agentScore: z\n .string()\n .optional()\n .describe(\n 'Path for AI agent score (0-99). v1 emits 0 (no match) or 95 (UA-map match). Default: \"user.agentScore\".',\n ),\n agentProduct: z\n .string()\n .optional()\n .describe(\n 'Path for matched UA substring (e.g. \"ChatGPT-User\"). Off by default — set to enable.',\n ),\n })\n .optional()\n .describe('Output paths for bot/agent annotations.'),\n })\n .describe(\n 'Bot detection transformer: annotates events with bot and AI-agent scores.',\n );\n\nexport type Settings = z.infer<typeof SettingsSchema>;\n","export * as step from './step';\n","import type { Flow } from '@walkeros/core';\n\nconst baseEvent = {\n name: 'page view',\n data: { title: 'Home', id: '/' },\n id: 'ev-1700000600',\n trigger: 'load',\n entity: 'page',\n action: 'view',\n timestamp: 1700000600,\n source: { type: 'express', platform: 'server' as const },\n};\n\n/** Real Chrome — botScore 0, agentScore 0. */\nexport const humanChrome: Flow.StepExample = {\n title: 'Human visitor (Chrome)',\n description: 'Modern Chrome UA. No bot or agent signals.',\n in: { ...baseEvent },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n user: { botScore: 0, agentScore: 0 },\n },\n },\n ],\n ],\n};\n\n/** GPTBot — training crawler. */\nexport const gptBotCrawler: Flow.StepExample = {\n title: 'GPTBot training crawler',\n description:\n 'OpenAI training crawler. Both botScore and agentScore are high.',\n in: { ...baseEvent, id: 'ev-1700000601' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000601',\n user: { botScore: 95, agentScore: 95 },\n },\n },\n ],\n ],\n};\n\n/** ChatGPT-User — user-action AI agent. */\nexport const chatgptUserAgent: Flow.StepExample = {\n title: 'ChatGPT-User (user-action AI)',\n description:\n 'A real human routed an AI to fetch this page. botScore high but lower than crawlers — agentProduct lets destinations keep this traffic.',\n in: { ...baseEvent, id: 'ev-1700000602' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000602',\n user: { botScore: 90, agentScore: 95 },\n },\n },\n ],\n ],\n};\n\n/** curl — caught by isbot. */\nexport const curlClient: Flow.StepExample = {\n public: false,\n description: 'curl client — caught by isbot. agentScore zero.',\n in: { ...baseEvent, id: 'ev-1700000603' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000603',\n user: { botScore: 80, agentScore: 0 },\n },\n },\n ],\n ],\n};\n\n/** Empty / missing UA — score 70 (suspicious; real browsers rarely strip UA). */\nexport const missingUA: Flow.StepExample = {\n public: false,\n description:\n 'No User-Agent — baseline 70 (UA stripping is overwhelmingly bots or hardened privacy tools).',\n in: { ...baseEvent, id: 'ev-1700000604' },\n out: [\n [\n 'return',\n {\n event: {\n ...baseEvent,\n id: 'ev-1700000604',\n user: { botScore: 70, agentScore: 0 },\n },\n },\n ],\n ],\n};\n","import type { Hint } from '@walkeros/core';\n\nexport const hints: Hint.Hints = {\n 'ingest-prerequisite': {\n text: 'The bot transformer reads userAgent from ctx.ingest (path \"ingest.userAgent\" by default). The upstream server source must populate it via config.ingest mapping, otherwise the UA is empty and every event scores 70 (baseline for missing UA).',\n code: [\n {\n lang: 'json',\n code: JSON.stringify(\n {\n sources: {\n express: {\n package: '@walkeros/server-source-express',\n config: {\n ingest: {\n userAgent: 'req.headers.user-agent',\n },\n },\n },\n },\n transformers: {\n bot: {\n package: '@walkeros/server-transformer-bot',\n },\n },\n },\n null,\n 2,\n ),\n },\n ],\n },\n 'output-routing': {\n text: 'Outputs default to event.user.botScore and event.user.agentScore. Redirect to ingest.* to keep the analytics event clean while still routing on the score downstream. Empty string (or omit) skips writing that field entirely. agentProduct is off by default — set it to enable writing the matched UA substring.',\n code: [\n {\n lang: 'json',\n code: JSON.stringify(\n {\n transformers: {\n bot: {\n package: '@walkeros/server-transformer-bot',\n config: {\n settings: {\n output: {\n botScore: 'ingest.bot.score',\n agentScore: 'ingest.bot.agent',\n agentProduct: 'user.agentProduct',\n },\n },\n },\n },\n },\n },\n null,\n 2,\n ),\n },\n ],\n },\n 'destination-filtering': {\n text: 'Recommended destination-mapping recipes. Drop all bots: botScore > 50. Drop crawlers but keep user-action AI traffic: botScore > 50 AND agentProduct NOT LIKE \"%-User\". AI traffic report: group by agentProduct WHERE agentScore > 50. The transformer never drops events — filtering is always a destination decision.',\n },\n 'detection-scope': {\n text: 'v1 is UA-only: wraps isbot (curl, wget, headless Chrome defaults, well-known crawlers) plus a curated AI-agent UA map (OpenAI, Anthropic, Perplexity, Mistral, Meta, Google, Apple, Amazon, DuckDuckGo, ByteDance, Common Crawl). It will NOT catch: residential-proxy + stealth Chrome + realistic behavior; reverse-DNS-verified search engines; client-side runtime tells. v1.1 adds header consistency heuristics (Sec-Fetch, Sec-CH-UA, Accept-Language) with proper GREASE handling. For commercial-grade detection use Cloudflare Bot Management, DataDome, or HUMAN.',\n },\n};\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,SAAS,mBAAmB;;;ACA5B,SAAS,SAAS;AAElB,IAAM,qBAAqB,EAAE,MAAM;AAAA,EACjC,EAAE,OAAO,EAAE,SAAS,2CAA2C;AAAA,EAC/D,EACG,OAAO;AAAA,IACN,KAAK,EAAE,OAAO,EAAE,SAAS;AAAA,IACzB,OAAO,EAAE,QAAQ,EAAE,SAAS;AAAA,IAC5B,IAAI,EAAE,OAAO,EAAE,SAAS;AAAA,EAC1B,CAAC,EACA,SAAS,sBAAsB;AACpC,CAAC;AAEM,IAAM,iBAAiB,EAC3B,OAAO;AAAA,EACN,OAAO,EACJ,OAAO;AAAA,IACN,WAAW,mBAAmB,SAAS;AAAA,IACvC,IAAI,mBAAmB,SAAS;AAAA,IAChC,gBAAgB,mBAAmB,SAAS;AAAA,IAC5C,gBAAgB,mBAAmB,SAAS;AAAA,IAC5C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,cAAc,mBAAmB,SAAS;AAAA,IAC1C,SAAS,mBAAmB,SAAS;AAAA,IACrC,eAAe,mBAAmB,SAAS;AAAA,IAC3C,iBAAiB,mBAAmB,SAAS;AAAA,EAC/C,CAAC,EACA,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,QAAQ,EACL,OAAO;AAAA,IACN,UAAU,EACP,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,IACF,YAAY,EACT,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,IACF,cAAc,EACX,OAAO,EACP,SAAS,EACT;AAAA,MACC;AAAA,IACF;AAAA,EACJ,CAAC,EACA,SAAS,EACT,SAAS,yCAAyC;AACvD,CAAC,EACA;AAAA,EACC;AACF;;;ADvDK,IAAM,WAAW,YAAY,cAAc;;;AEJlD;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAEA,IAAM,YAAY;AAAA,EAChB,MAAM;AAAA,EACN,MAAM,EAAE,OAAO,QAAQ,IAAI,IAAI;AAAA,EAC/B,IAAI;AAAA,EACJ,SAAS;AAAA,EACT,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,QAAQ,EAAE,MAAM,WAAW,UAAU,SAAkB;AACzD;AAGO,IAAM,cAAgC;AAAA,EAC3C,OAAO;AAAA,EACP,aAAa;AAAA,EACb,IAAI,EAAE,GAAG,UAAU;AAAA,EACnB,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,MAAM,EAAE,UAAU,GAAG,YAAY,EAAE;AAAA,QACrC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,gBAAkC;AAAA,EAC7C,OAAO;AAAA,EACP,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,GAAG;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,mBAAqC;AAAA,EAChD,OAAO;AAAA,EACP,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,GAAG;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,aAA+B;AAAA,EAC1C,QAAQ;AAAA,EACR,aAAa;AAAA,EACb,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,EAAE;AAAA,QACtC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAGO,IAAM,YAA8B;AAAA,EACzC,QAAQ;AAAA,EACR,aACE;AAAA,EACF,IAAI,EAAE,GAAG,WAAW,IAAI,gBAAgB;AAAA,EACxC,KAAK;AAAA,IACH;AAAA,MACE;AAAA,MACA;AAAA,QACE,OAAO;AAAA,UACL,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,EAAE,UAAU,IAAI,YAAY,EAAE;AAAA,QACtC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;AC1GO,IAAM,QAAoB;AAAA,EAC/B,uBAAuB;AAAA,IACrB,MAAM;AAAA,IACN,MAAM;AAAA,MACJ;AAAA,QACE,MAAM;AAAA,QACN,MAAM,KAAK;AAAA,UACT;AAAA,YACE,SAAS;AAAA,cACP,SAAS;AAAA,gBACP,SAAS;AAAA,gBACT,QAAQ;AAAA,kBACN,QAAQ;AAAA,oBACN,WAAW;AAAA,kBACb;AAAA,gBACF;AAAA,cACF;AAAA,YACF;AAAA,YACA,cAAc;AAAA,cACZ,KAAK;AAAA,gBACH,SAAS;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EACA,kBAAkB;AAAA,IAChB,MAAM;AAAA,IACN,MAAM;AAAA,MACJ;AAAA,QACE,MAAM;AAAA,QACN,MAAM,KAAK;AAAA,UACT;AAAA,YACE,cAAc;AAAA,cACZ,KAAK;AAAA,gBACH,SAAS;AAAA,gBACT,QAAQ;AAAA,kBACN,UAAU;AAAA,oBACR,QAAQ;AAAA,sBACN,UAAU;AAAA,sBACV,YAAY;AAAA,sBACZ,cAAc;AAAA,oBAChB;AAAA,kBACF;AAAA,gBACF;AAAA,cACF;AAAA,YACF;AAAA,UACF;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EACA,yBAAyB;AAAA,IACvB,MAAM;AAAA,EACR;AAAA,EACA,mBAAmB;AAAA,IACjB,MAAM;AAAA,EACR;AACF;","names":[]}
@@ -0,0 +1,40 @@
1
+ import { Mapping, Transformer } from '@walkeros/core';
2
+
3
+ /**
4
+ * Input signal sources. Each is a Mapping.Value resolved via getMappingValue
5
+ * against { event, ingest }.
6
+ *
7
+ * v1 only reads `userAgent`. The remaining fields are reserved for v1.1
8
+ * (header consistency heuristics) so the public schema stays stable.
9
+ */
10
+ interface BotInput {
11
+ userAgent?: Mapping.Value;
12
+ ip?: Mapping.Value;
13
+ acceptLanguage?: Mapping.Value;
14
+ acceptEncoding?: Mapping.Value;
15
+ secFetchSite?: Mapping.Value;
16
+ secFetchMode?: Mapping.Value;
17
+ secFetchDest?: Mapping.Value;
18
+ secFetchUser?: Mapping.Value;
19
+ secChUa?: Mapping.Value;
20
+ secChUaMobile?: Mapping.Value;
21
+ secChUaPlatform?: Mapping.Value;
22
+ }
23
+ /**
24
+ * Output paths. Empty string or omitted = skip writing that field.
25
+ * Paths starting with `ingest.` route to context.ingest (pipeline scratch);
26
+ * everything else routes to the event.
27
+ */
28
+ interface BotOutput {
29
+ botScore?: string;
30
+ agentScore?: string;
31
+ agentProduct?: string;
32
+ }
33
+ interface BotSettings {
34
+ input?: BotInput;
35
+ output?: BotOutput;
36
+ }
37
+
38
+ declare const transformerBot: Transformer.Init<Transformer.Types<BotSettings>>;
39
+
40
+ export { type BotInput, type BotOutput, type BotSettings, transformerBot as default, transformerBot };
@@ -0,0 +1,40 @@
1
+ import { Mapping, Transformer } from '@walkeros/core';
2
+
3
+ /**
4
+ * Input signal sources. Each is a Mapping.Value resolved via getMappingValue
5
+ * against { event, ingest }.
6
+ *
7
+ * v1 only reads `userAgent`. The remaining fields are reserved for v1.1
8
+ * (header consistency heuristics) so the public schema stays stable.
9
+ */
10
+ interface BotInput {
11
+ userAgent?: Mapping.Value;
12
+ ip?: Mapping.Value;
13
+ acceptLanguage?: Mapping.Value;
14
+ acceptEncoding?: Mapping.Value;
15
+ secFetchSite?: Mapping.Value;
16
+ secFetchMode?: Mapping.Value;
17
+ secFetchDest?: Mapping.Value;
18
+ secFetchUser?: Mapping.Value;
19
+ secChUa?: Mapping.Value;
20
+ secChUaMobile?: Mapping.Value;
21
+ secChUaPlatform?: Mapping.Value;
22
+ }
23
+ /**
24
+ * Output paths. Empty string or omitted = skip writing that field.
25
+ * Paths starting with `ingest.` route to context.ingest (pipeline scratch);
26
+ * everything else routes to the event.
27
+ */
28
+ interface BotOutput {
29
+ botScore?: string;
30
+ agentScore?: string;
31
+ agentProduct?: string;
32
+ }
33
+ interface BotSettings {
34
+ input?: BotInput;
35
+ output?: BotOutput;
36
+ }
37
+
38
+ declare const transformerBot: Transformer.Init<Transformer.Types<BotSettings>>;
39
+
40
+ export { type BotInput, type BotOutput, type BotSettings, transformerBot as default, transformerBot };
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ "use strict";var e,t=Object.defineProperty,o=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,c=Object.prototype.hasOwnProperty,a={};((e,o)=>{for(var r in o)t(e,r,{get:o[r],enumerable:!0})})(a,{default:()=>d,transformerBot:()=>d}),module.exports=(e=a,((e,a,n,s)=>{if(a&&"object"==typeof a||"function"==typeof a)for(let p of r(a))c.call(e,p)||p===n||t(e,p,{get:()=>a[p],enumerable:!(s=o(a,p))||s.enumerable});return e})(t({},"__esModule",{value:!0}),e));var n=require("@walkeros/core"),s=require("isbot"),p=[{match:"ChatGPT-User",product:"ChatGPT-User",purpose:"user-action"},{match:"ChatGPT-Agent",product:"ChatGPT-Agent",purpose:"user-action"},{match:"OAI-SearchBot",product:"OAI-SearchBot",purpose:"search-index"},{match:"GPTBot",product:"GPTBot",purpose:"training"},{match:"Claude-SearchBot",product:"Claude-SearchBot",purpose:"search-index"},{match:"Claude-User",product:"Claude-User",purpose:"user-action"},{match:"Claude-Code",product:"Claude-Code",purpose:"user-action"},{match:"ClaudeBot",product:"ClaudeBot",purpose:"training"},{match:"anthropic-ai",product:"anthropic-ai",purpose:"training"},{match:"Perplexity-User",product:"Perplexity-User",purpose:"user-action"},{match:"PerplexityBot",product:"PerplexityBot",purpose:"search-index"},{match:"MistralAI-User",product:"MistralAI-User",purpose:"user-action"},{match:"Meta-ExternalFetcher",product:"Meta-ExternalFetcher",purpose:"user-action"},{match:"Meta-ExternalAgent",product:"Meta-ExternalAgent",purpose:"training"},{match:"Google-CloudVertexBot",product:"Google-CloudVertexBot",purpose:"training"},{match:"Google-Extended",product:"Google-Extended",purpose:"training"},{match:"Applebot-Extended",product:"Applebot-Extended",purpose:"training"},{match:"Amazonbot",product:"Amazonbot",purpose:"training"},{match:"DuckAssistBot",product:"DuckAssistBot",purpose:"user-action"},{match:"Bytespider",product:"Bytespider",purpose:"training"},{match:"CCBot",product:"CCBot",purpose:"training"}];function u(e){if(!e)return{botScore:70,agentScore:0,agentProduct:void 0};const t=function(e){const t=e.toLowerCase(),o=p.find(e=>t.includes(e.match.toLowerCase()));return{isBot:!e||(0,s.isbot)(e)||void 0!==o,agent:o?{product:o.product,purpose:o.purpose}:void 0}}(e);let o=0;return t.agent?o="user-action"===t.agent.purpose?90:95:t.isBot&&(o=80),{botScore:o,agentScore:t.agent?95:0,agentProduct:t.agent?.product}}var i={userAgent:"ingest.userAgent",ip:"ingest.ip",acceptLanguage:"ingest.acceptLanguage",acceptEncoding:"ingest.acceptEncoding",secFetchSite:"ingest.secFetchSite",secFetchMode:"ingest.secFetchMode",secFetchDest:"ingest.secFetchDest",secFetchUser:"ingest.secFetchUser",secChUa:"ingest.secChUa",secChUaMobile:"ingest.secChUaMobile",secChUaPlatform:"ingest.secChUaPlatform"},g={botScore:"user.botScore",agentScore:"user.agentScore",agentProduct:""};var d=e=>{const{config:t}=e,o=t.settings??{},r={...i,...o.input??{}},c={...g,...o.output??{}};return{type:"bot",config:t,async push(e,t){const{ingest:o,collector:a}=t,s={event:e,ingest:o},p=await(0,n.getMappingValue)(s,r.userAgent,{collector:a}),i=u("string"==typeof p?p:"");let g=e;const d=(e,t)=>{e&&void 0!==t&&(e.startsWith("ingest.")?function(e,t,o){const r=t.split(".");let c=e;for(let e=0;e<r.length-1;e++){const t=r[e],o=c[t];"object"==typeof o&&null!==o||(c[t]={}),c=c[t]}c[r[r.length-1]]=o}(o,e.slice(7),t):g=(0,n.setByPath)(g,e,t))};return d(c.botScore??"",i.botScore),d(c.agentScore??"",i.agentScore),d(c.agentProduct??"",i.agentProduct),{event:g}}}};//# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts","../src/transformer.ts","../src/detect/ua.ts","../src/data/agents.ts","../src/detect/score.ts"],"sourcesContent":["export { transformerBot } from './transformer';\nexport type { BotSettings, BotInput, BotOutput } from './types';\nexport { transformerBot as default } from './transformer';\n","import type { Transformer } from '@walkeros/core';\nimport { getMappingValue, setByPath } from '@walkeros/core';\nimport { computeScore } from './detect/score';\nimport type { BotInput, BotOutput, BotSettings } from './types';\n\nconst DEFAULT_INPUT: Required<BotInput> = {\n userAgent: 'ingest.userAgent',\n ip: 'ingest.ip',\n acceptLanguage: 'ingest.acceptLanguage',\n acceptEncoding: 'ingest.acceptEncoding',\n secFetchSite: 'ingest.secFetchSite',\n secFetchMode: 'ingest.secFetchMode',\n secFetchDest: 'ingest.secFetchDest',\n secFetchUser: 'ingest.secFetchUser',\n secChUa: 'ingest.secChUa',\n secChUaMobile: 'ingest.secChUaMobile',\n secChUaPlatform: 'ingest.secChUaPlatform',\n};\n\nconst DEFAULT_OUTPUT: Required<BotOutput> = {\n botScore: 'user.botScore',\n agentScore: 'user.agentScore',\n agentProduct: '', // off by default\n};\n\n/**\n * Mutating dot-path setter for ingest writes.\n *\n * We can't use @walkeros/core setByPath here: it clones-and-returns (immutable),\n * but ingest is the pipeline's mutable scratch context. We need in-place writes\n * so subsequent transformers in the chain see the values.\n */\nfunction setNestedPath(\n obj: Record<string, unknown>,\n path: string,\n value: unknown,\n): void {\n const keys = path.split('.');\n let cur: Record<string, unknown> = obj;\n for (let i = 0; i < keys.length - 1; i++) {\n const k = keys[i];\n const next = cur[k];\n if (typeof next !== 'object' || next === null) cur[k] = {};\n cur = cur[k] as Record<string, unknown>;\n }\n cur[keys[keys.length - 1]] = value;\n}\n\nexport const transformerBot: Transformer.Init<\n Transformer.Types<BotSettings>\n> = (context) => {\n const { config } = context;\n const settings: BotSettings = config.settings ?? {};\n const input: Required<BotInput> = {\n ...DEFAULT_INPUT,\n ...(settings.input ?? {}),\n };\n const output: BotOutput = {\n ...DEFAULT_OUTPUT,\n ...(settings.output ?? {}),\n };\n\n return {\n // Init's input config type is Partial<Settings>; the instance config type\n // is Settings. Same cast pattern the fingerprint transformer uses.\n type: 'bot',\n config: config as Transformer.Config<Transformer.Types<BotSettings>>,\n\n async push(event, ctx) {\n const { ingest, collector } = ctx;\n const source = { event, ingest };\n\n // v1 only reads userAgent. Other input fields are reserved for v1.1\n // (header heuristics); resolved-but-unused here would be wasteful, so\n // they are intentionally not read yet.\n const uaValue = await getMappingValue(source, input.userAgent, {\n collector,\n });\n const ua = typeof uaValue === 'string' ? uaValue : '';\n const score = computeScore(ua);\n\n let nextEvent = event;\n\n const writeOutput = (path: string, value: unknown) => {\n if (!path || value === undefined) return;\n if (path.startsWith('ingest.')) {\n setNestedPath(ingest, path.slice('ingest.'.length), value);\n } else {\n nextEvent = setByPath(nextEvent, path, value);\n }\n };\n\n writeOutput(output.botScore ?? '', score.botScore);\n writeOutput(output.agentScore ?? '', score.agentScore);\n writeOutput(output.agentProduct ?? '', score.agentProduct);\n\n return { event: nextEvent };\n },\n };\n};\n","import { isbot } from 'isbot';\nimport { agents, type AgentEntry } from '../data/agents';\n\nexport interface UAResult {\n isBot: boolean;\n agent?: { product: string; purpose: AgentEntry['purpose'] };\n}\n\nexport function detectUA(ua: string): UAResult {\n const lower = ua.toLowerCase();\n const matched = agents.find((a) => lower.includes(a.match.toLowerCase()));\n return {\n isBot: !ua || isbot(ua) || matched !== undefined,\n agent: matched\n ? { product: matched.product, purpose: matched.purpose }\n : undefined,\n };\n}\n","/**\n * Curated AI agent UA-substring map (2026-Q2).\n *\n * Each entry: substring matched case-insensitively against the User-Agent,\n * a product label written to event.user.agentProduct, and the purpose category.\n *\n * Purpose semantics:\n * - 'training' — crawls for model training; usually filter from analytics\n * - 'search-index' — crawls to power AI search answers; AEO-relevant\n * - 'user-action' — fetch initiated by a human via an AI tool; often kept as traffic\n *\n * Order matters: first-hit wins. More-specific entries must precede broader ones.\n *\n * Vendor docs of record (verified 2026-05):\n * OpenAI: https://platform.openai.com/docs/bots\n * Anthropic: https://support.claude.com/en/articles/8896518\n * Perplexity: https://docs.perplexity.ai/guides/bots\n * Meta: https://developers.facebook.com/docs/sharing/webmasters/web-crawlers\n * Google: https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers\n * Apple: https://support.apple.com/en-us/119829\n * DuckDuckGo: https://duckduckgo.com/duckduckbot\n * Common Crawl: https://commoncrawl.org/faq\n * Amazon: https://developer.amazon.com/amazonbot\n *\n * Community cross-reference: https://github.com/ai-robots-txt/ai.robots.txt\n *\n * Reviewed quarterly — see /workspaces/developer/docs/research/2026-05-13-bot-detection.md.\n */\nexport interface AgentEntry {\n match: string;\n product: string;\n purpose: 'training' | 'search-index' | 'user-action';\n}\n\nexport const agents: AgentEntry[] = [\n // --- OpenAI ---\n { match: 'ChatGPT-User', product: 'ChatGPT-User', purpose: 'user-action' },\n { match: 'ChatGPT-Agent', product: 'ChatGPT-Agent', purpose: 'user-action' },\n { match: 'OAI-SearchBot', product: 'OAI-SearchBot', purpose: 'search-index' },\n { match: 'GPTBot', product: 'GPTBot', purpose: 'training' },\n\n // --- Anthropic ---\n // Claude-SearchBot must precede Claude-User (defensive specificity for composite UAs)\n {\n match: 'Claude-SearchBot',\n product: 'Claude-SearchBot',\n purpose: 'search-index',\n },\n { match: 'Claude-User', product: 'Claude-User', purpose: 'user-action' },\n { match: 'Claude-Code', product: 'Claude-Code', purpose: 'user-action' },\n { match: 'ClaudeBot', product: 'ClaudeBot', purpose: 'training' },\n // Legacy: only used by older Anthropic crawlers; kept for back-compat with old logs.\n { match: 'anthropic-ai', product: 'anthropic-ai', purpose: 'training' },\n\n // --- Perplexity ---\n {\n match: 'Perplexity-User',\n product: 'Perplexity-User',\n purpose: 'user-action',\n },\n {\n match: 'PerplexityBot',\n product: 'PerplexityBot',\n purpose: 'search-index',\n },\n\n // --- Mistral ---\n {\n match: 'MistralAI-User',\n product: 'MistralAI-User',\n purpose: 'user-action',\n },\n\n // --- Meta ---\n {\n match: 'Meta-ExternalFetcher',\n product: 'Meta-ExternalFetcher',\n purpose: 'user-action',\n },\n {\n match: 'Meta-ExternalAgent',\n product: 'Meta-ExternalAgent',\n purpose: 'training',\n },\n\n // --- Google ---\n {\n match: 'Google-CloudVertexBot',\n product: 'Google-CloudVertexBot',\n purpose: 'training',\n },\n {\n match: 'Google-Extended',\n product: 'Google-Extended',\n purpose: 'training',\n },\n\n // --- Apple ---\n {\n match: 'Applebot-Extended',\n product: 'Applebot-Extended',\n purpose: 'training',\n },\n\n // --- Amazon ---\n { match: 'Amazonbot', product: 'Amazonbot', purpose: 'training' },\n\n // --- DuckDuckGo ---\n {\n match: 'DuckAssistBot',\n product: 'DuckAssistBot',\n purpose: 'user-action',\n },\n\n // --- ByteDance ---\n { match: 'Bytespider', product: 'Bytespider', purpose: 'training' },\n\n // --- Common Crawl ---\n { match: 'CCBot', product: 'CCBot', purpose: 'training' },\n];\n","import { detectUA } from './ua';\n\nexport interface ScoreResult {\n /** 0-99, higher = more bot. v1 emits discrete values: 0, 70, 80, 90, 95. */\n botScore: number;\n /**\n * 0-99, higher = more likely an AI agent. v1 emits only 0 or 95\n * (binary UA-map match). Graduated values (e.g. 70 for unverified UA\n * claim, 99 for IP-reverse-DNS verified) are planned for v1.1.\n */\n agentScore: number;\n /** Matched AI agent UA substring, when one was found. */\n agentProduct?: string;\n}\n\n/**\n * v1: UA-only.\n *\n * botScore baseline:\n * - Empty UA → 70 (real browsers rarely strip UA)\n * - AI training crawler → 95\n * - AI user-action → 90\n * - isbot true → 80\n * - Otherwise → 0\n *\n * Header heuristics (Sec-Fetch missing, Sec-CH-UA major mismatch,\n * Accept-Language stripping) are intentionally deferred to v1.1 —\n * see the README \"Not in v1\" section and the research file.\n */\nexport function computeScore(ua: string): ScoreResult {\n if (!ua) {\n return { botScore: 70, agentScore: 0, agentProduct: undefined };\n }\n\n const uaResult = detectUA(ua);\n\n let botScore = 0;\n if (uaResult.agent) {\n botScore = uaResult.agent.purpose === 'user-action' ? 90 : 95;\n } else if (uaResult.isBot) {\n botScore = 80;\n }\n\n return {\n botScore,\n agentScore: uaResult.agent ? 95 : 0,\n agentProduct: uaResult.agent?.product,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACCA,kBAA2C;;;ACD3C,mBAAsB;;;ACkCf,IAAM,SAAuB;AAAA;AAAA,EAElC,EAAE,OAAO,gBAAgB,SAAS,gBAAgB,SAAS,cAAc;AAAA,EACzE,EAAE,OAAO,iBAAiB,SAAS,iBAAiB,SAAS,cAAc;AAAA,EAC3E,EAAE,OAAO,iBAAiB,SAAS,iBAAiB,SAAS,eAAe;AAAA,EAC5E,EAAE,OAAO,UAAU,SAAS,UAAU,SAAS,WAAW;AAAA;AAAA;AAAA,EAI1D;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA,EAAE,OAAO,eAAe,SAAS,eAAe,SAAS,cAAc;AAAA,EACvE,EAAE,OAAO,eAAe,SAAS,eAAe,SAAS,cAAc;AAAA,EACvE,EAAE,OAAO,aAAa,SAAS,aAAa,SAAS,WAAW;AAAA;AAAA,EAEhE,EAAE,OAAO,gBAAgB,SAAS,gBAAgB,SAAS,WAAW;AAAA;AAAA,EAGtE;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA,EAAE,OAAO,aAAa,SAAS,aAAa,SAAS,WAAW;AAAA;AAAA,EAGhE;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA,EAAE,OAAO,cAAc,SAAS,cAAc,SAAS,WAAW;AAAA;AAAA,EAGlE,EAAE,OAAO,SAAS,SAAS,SAAS,SAAS,WAAW;AAC1D;;;AD/GO,SAAS,SAAS,IAAsB;AAC7C,QAAM,QAAQ,GAAG,YAAY;AAC7B,QAAM,UAAU,OAAO,KAAK,CAAC,MAAM,MAAM,SAAS,EAAE,MAAM,YAAY,CAAC,CAAC;AACxE,SAAO;AAAA,IACL,OAAO,CAAC,UAAM,oBAAM,EAAE,KAAK,YAAY;AAAA,IACvC,OAAO,UACH,EAAE,SAAS,QAAQ,SAAS,SAAS,QAAQ,QAAQ,IACrD;AAAA,EACN;AACF;;;AEYO,SAAS,aAAa,IAAyB;AACpD,MAAI,CAAC,IAAI;AACP,WAAO,EAAE,UAAU,IAAI,YAAY,GAAG,cAAc,OAAU;AAAA,EAChE;AAEA,QAAM,WAAW,SAAS,EAAE;AAE5B,MAAI,WAAW;AACf,MAAI,SAAS,OAAO;AAClB,eAAW,SAAS,MAAM,YAAY,gBAAgB,KAAK;AAAA,EAC7D,WAAW,SAAS,OAAO;AACzB,eAAW;AAAA,EACb;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,SAAS,QAAQ,KAAK;AAAA,IAClC,cAAc,SAAS,OAAO;AAAA,EAChC;AACF;;;AH3CA,IAAM,gBAAoC;AAAA,EACxC,WAAW;AAAA,EACX,IAAI;AAAA,EACJ,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,cAAc;AAAA,EACd,cAAc;AAAA,EACd,cAAc;AAAA,EACd,cAAc;AAAA,EACd,SAAS;AAAA,EACT,eAAe;AAAA,EACf,iBAAiB;AACnB;AAEA,IAAM,iBAAsC;AAAA,EAC1C,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,cAAc;AAAA;AAChB;AASA,SAAS,cACP,KACA,MACA,OACM;AACN,QAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,MAAI,MAA+B;AACnC,WAAS,IAAI,GAAG,IAAI,KAAK,SAAS,GAAG,KAAK;AACxC,UAAM,IAAI,KAAK,CAAC;AAChB,UAAM,OAAO,IAAI,CAAC;AAClB,QAAI,OAAO,SAAS,YAAY,SAAS,KAAM,KAAI,CAAC,IAAI,CAAC;AACzD,UAAM,IAAI,CAAC;AAAA,EACb;AACA,MAAI,KAAK,KAAK,SAAS,CAAC,CAAC,IAAI;AAC/B;AAEO,IAAM,iBAET,CAAC,YAAY;AACf,QAAM,EAAE,OAAO,IAAI;AACnB,QAAM,WAAwB,OAAO,YAAY,CAAC;AAClD,QAAM,QAA4B;AAAA,IAChC,GAAG;AAAA,IACH,GAAI,SAAS,SAAS,CAAC;AAAA,EACzB;AACA,QAAM,SAAoB;AAAA,IACxB,GAAG;AAAA,IACH,GAAI,SAAS,UAAU,CAAC;AAAA,EAC1B;AAEA,SAAO;AAAA;AAAA;AAAA,IAGL,MAAM;AAAA,IACN;AAAA,IAEA,MAAM,KAAK,OAAO,KAAK;AACrB,YAAM,EAAE,QAAQ,UAAU,IAAI;AAC9B,YAAM,SAAS,EAAE,OAAO,OAAO;AAK/B,YAAM,UAAU,UAAM,6BAAgB,QAAQ,MAAM,WAAW;AAAA,QAC7D;AAAA,MACF,CAAC;AACD,YAAM,KAAK,OAAO,YAAY,WAAW,UAAU;AACnD,YAAM,QAAQ,aAAa,EAAE;AAE7B,UAAI,YAAY;AAEhB,YAAM,cAAc,CAAC,MAAc,UAAmB;AACpD,YAAI,CAAC,QAAQ,UAAU,OAAW;AAClC,YAAI,KAAK,WAAW,SAAS,GAAG;AAC9B,wBAAc,QAAQ,KAAK,MAAM,UAAU,MAAM,GAAG,KAAK;AAAA,QAC3D,OAAO;AACL,0BAAY,uBAAU,WAAW,MAAM,KAAK;AAAA,QAC9C;AAAA,MACF;AAEA,kBAAY,OAAO,YAAY,IAAI,MAAM,QAAQ;AACjD,kBAAY,OAAO,cAAc,IAAI,MAAM,UAAU;AACrD,kBAAY,OAAO,gBAAgB,IAAI,MAAM,YAAY;AAEzD,aAAO,EAAE,OAAO,UAAU;AAAA,IAC5B;AAAA,EACF;AACF;","names":[]}
package/dist/index.mjs ADDED
@@ -0,0 +1 @@
1
+ import{getMappingValue as t,setByPath as e}from"@walkeros/core";import{isbot as o}from"isbot";var r=[{match:"ChatGPT-User",product:"ChatGPT-User",purpose:"user-action"},{match:"ChatGPT-Agent",product:"ChatGPT-Agent",purpose:"user-action"},{match:"OAI-SearchBot",product:"OAI-SearchBot",purpose:"search-index"},{match:"GPTBot",product:"GPTBot",purpose:"training"},{match:"Claude-SearchBot",product:"Claude-SearchBot",purpose:"search-index"},{match:"Claude-User",product:"Claude-User",purpose:"user-action"},{match:"Claude-Code",product:"Claude-Code",purpose:"user-action"},{match:"ClaudeBot",product:"ClaudeBot",purpose:"training"},{match:"anthropic-ai",product:"anthropic-ai",purpose:"training"},{match:"Perplexity-User",product:"Perplexity-User",purpose:"user-action"},{match:"PerplexityBot",product:"PerplexityBot",purpose:"search-index"},{match:"MistralAI-User",product:"MistralAI-User",purpose:"user-action"},{match:"Meta-ExternalFetcher",product:"Meta-ExternalFetcher",purpose:"user-action"},{match:"Meta-ExternalAgent",product:"Meta-ExternalAgent",purpose:"training"},{match:"Google-CloudVertexBot",product:"Google-CloudVertexBot",purpose:"training"},{match:"Google-Extended",product:"Google-Extended",purpose:"training"},{match:"Applebot-Extended",product:"Applebot-Extended",purpose:"training"},{match:"Amazonbot",product:"Amazonbot",purpose:"training"},{match:"DuckAssistBot",product:"DuckAssistBot",purpose:"user-action"},{match:"Bytespider",product:"Bytespider",purpose:"training"},{match:"CCBot",product:"CCBot",purpose:"training"}];function c(t){if(!t)return{botScore:70,agentScore:0,agentProduct:void 0};const e=function(t){const e=t.toLowerCase(),c=r.find(t=>e.includes(t.match.toLowerCase()));return{isBot:!t||o(t)||void 0!==c,agent:c?{product:c.product,purpose:c.purpose}:void 0}}(t);let c=0;return e.agent?c="user-action"===e.agent.purpose?90:95:e.isBot&&(c=80),{botScore:c,agentScore:e.agent?95:0,agentProduct:e.agent?.product}}var a={userAgent:"ingest.userAgent",ip:"ingest.ip",acceptLanguage:"ingest.acceptLanguage",acceptEncoding:"ingest.acceptEncoding",secFetchSite:"ingest.secFetchSite",secFetchMode:"ingest.secFetchMode",secFetchDest:"ingest.secFetchDest",secFetchUser:"ingest.secFetchUser",secChUa:"ingest.secChUa",secChUaMobile:"ingest.secChUaMobile",secChUaPlatform:"ingest.secChUaPlatform"},n={botScore:"user.botScore",agentScore:"user.agentScore",agentProduct:""};var s=o=>{const{config:r}=o,s=r.settings??{},p={...a,...s.input??{}},u={...n,...s.output??{}};return{type:"bot",config:r,async push(o,r){const{ingest:a,collector:n}=r,s={event:o,ingest:a},i=await t(s,p.userAgent,{collector:n}),d=c("string"==typeof i?i:"");let g=o;const h=(t,o)=>{t&&void 0!==o&&(t.startsWith("ingest.")?function(t,e,o){const r=e.split(".");let c=t;for(let t=0;t<r.length-1;t++){const e=r[t],o=c[e];"object"==typeof o&&null!==o||(c[e]={}),c=c[e]}c[r[r.length-1]]=o}(a,t.slice(7),o):g=e(g,t,o))};return h(u.botScore??"",d.botScore),h(u.agentScore??"",d.agentScore),h(u.agentProduct??"",d.agentProduct),{event:g}}}};export{s as default,s as transformerBot};//# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/transformer.ts","../src/detect/ua.ts","../src/data/agents.ts","../src/detect/score.ts"],"sourcesContent":["import type { Transformer } from '@walkeros/core';\nimport { getMappingValue, setByPath } from '@walkeros/core';\nimport { computeScore } from './detect/score';\nimport type { BotInput, BotOutput, BotSettings } from './types';\n\nconst DEFAULT_INPUT: Required<BotInput> = {\n userAgent: 'ingest.userAgent',\n ip: 'ingest.ip',\n acceptLanguage: 'ingest.acceptLanguage',\n acceptEncoding: 'ingest.acceptEncoding',\n secFetchSite: 'ingest.secFetchSite',\n secFetchMode: 'ingest.secFetchMode',\n secFetchDest: 'ingest.secFetchDest',\n secFetchUser: 'ingest.secFetchUser',\n secChUa: 'ingest.secChUa',\n secChUaMobile: 'ingest.secChUaMobile',\n secChUaPlatform: 'ingest.secChUaPlatform',\n};\n\nconst DEFAULT_OUTPUT: Required<BotOutput> = {\n botScore: 'user.botScore',\n agentScore: 'user.agentScore',\n agentProduct: '', // off by default\n};\n\n/**\n * Mutating dot-path setter for ingest writes.\n *\n * We can't use @walkeros/core setByPath here: it clones-and-returns (immutable),\n * but ingest is the pipeline's mutable scratch context. We need in-place writes\n * so subsequent transformers in the chain see the values.\n */\nfunction setNestedPath(\n obj: Record<string, unknown>,\n path: string,\n value: unknown,\n): void {\n const keys = path.split('.');\n let cur: Record<string, unknown> = obj;\n for (let i = 0; i < keys.length - 1; i++) {\n const k = keys[i];\n const next = cur[k];\n if (typeof next !== 'object' || next === null) cur[k] = {};\n cur = cur[k] as Record<string, unknown>;\n }\n cur[keys[keys.length - 1]] = value;\n}\n\nexport const transformerBot: Transformer.Init<\n Transformer.Types<BotSettings>\n> = (context) => {\n const { config } = context;\n const settings: BotSettings = config.settings ?? {};\n const input: Required<BotInput> = {\n ...DEFAULT_INPUT,\n ...(settings.input ?? {}),\n };\n const output: BotOutput = {\n ...DEFAULT_OUTPUT,\n ...(settings.output ?? {}),\n };\n\n return {\n // Init's input config type is Partial<Settings>; the instance config type\n // is Settings. Same cast pattern the fingerprint transformer uses.\n type: 'bot',\n config: config as Transformer.Config<Transformer.Types<BotSettings>>,\n\n async push(event, ctx) {\n const { ingest, collector } = ctx;\n const source = { event, ingest };\n\n // v1 only reads userAgent. Other input fields are reserved for v1.1\n // (header heuristics); resolved-but-unused here would be wasteful, so\n // they are intentionally not read yet.\n const uaValue = await getMappingValue(source, input.userAgent, {\n collector,\n });\n const ua = typeof uaValue === 'string' ? uaValue : '';\n const score = computeScore(ua);\n\n let nextEvent = event;\n\n const writeOutput = (path: string, value: unknown) => {\n if (!path || value === undefined) return;\n if (path.startsWith('ingest.')) {\n setNestedPath(ingest, path.slice('ingest.'.length), value);\n } else {\n nextEvent = setByPath(nextEvent, path, value);\n }\n };\n\n writeOutput(output.botScore ?? '', score.botScore);\n writeOutput(output.agentScore ?? '', score.agentScore);\n writeOutput(output.agentProduct ?? '', score.agentProduct);\n\n return { event: nextEvent };\n },\n };\n};\n","import { isbot } from 'isbot';\nimport { agents, type AgentEntry } from '../data/agents';\n\nexport interface UAResult {\n isBot: boolean;\n agent?: { product: string; purpose: AgentEntry['purpose'] };\n}\n\nexport function detectUA(ua: string): UAResult {\n const lower = ua.toLowerCase();\n const matched = agents.find((a) => lower.includes(a.match.toLowerCase()));\n return {\n isBot: !ua || isbot(ua) || matched !== undefined,\n agent: matched\n ? { product: matched.product, purpose: matched.purpose }\n : undefined,\n };\n}\n","/**\n * Curated AI agent UA-substring map (2026-Q2).\n *\n * Each entry: substring matched case-insensitively against the User-Agent,\n * a product label written to event.user.agentProduct, and the purpose category.\n *\n * Purpose semantics:\n * - 'training' — crawls for model training; usually filter from analytics\n * - 'search-index' — crawls to power AI search answers; AEO-relevant\n * - 'user-action' — fetch initiated by a human via an AI tool; often kept as traffic\n *\n * Order matters: first-hit wins. More-specific entries must precede broader ones.\n *\n * Vendor docs of record (verified 2026-05):\n * OpenAI: https://platform.openai.com/docs/bots\n * Anthropic: https://support.claude.com/en/articles/8896518\n * Perplexity: https://docs.perplexity.ai/guides/bots\n * Meta: https://developers.facebook.com/docs/sharing/webmasters/web-crawlers\n * Google: https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers\n * Apple: https://support.apple.com/en-us/119829\n * DuckDuckGo: https://duckduckgo.com/duckduckbot\n * Common Crawl: https://commoncrawl.org/faq\n * Amazon: https://developer.amazon.com/amazonbot\n *\n * Community cross-reference: https://github.com/ai-robots-txt/ai.robots.txt\n *\n * Reviewed quarterly — see /workspaces/developer/docs/research/2026-05-13-bot-detection.md.\n */\nexport interface AgentEntry {\n match: string;\n product: string;\n purpose: 'training' | 'search-index' | 'user-action';\n}\n\nexport const agents: AgentEntry[] = [\n // --- OpenAI ---\n { match: 'ChatGPT-User', product: 'ChatGPT-User', purpose: 'user-action' },\n { match: 'ChatGPT-Agent', product: 'ChatGPT-Agent', purpose: 'user-action' },\n { match: 'OAI-SearchBot', product: 'OAI-SearchBot', purpose: 'search-index' },\n { match: 'GPTBot', product: 'GPTBot', purpose: 'training' },\n\n // --- Anthropic ---\n // Claude-SearchBot must precede Claude-User (defensive specificity for composite UAs)\n {\n match: 'Claude-SearchBot',\n product: 'Claude-SearchBot',\n purpose: 'search-index',\n },\n { match: 'Claude-User', product: 'Claude-User', purpose: 'user-action' },\n { match: 'Claude-Code', product: 'Claude-Code', purpose: 'user-action' },\n { match: 'ClaudeBot', product: 'ClaudeBot', purpose: 'training' },\n // Legacy: only used by older Anthropic crawlers; kept for back-compat with old logs.\n { match: 'anthropic-ai', product: 'anthropic-ai', purpose: 'training' },\n\n // --- Perplexity ---\n {\n match: 'Perplexity-User',\n product: 'Perplexity-User',\n purpose: 'user-action',\n },\n {\n match: 'PerplexityBot',\n product: 'PerplexityBot',\n purpose: 'search-index',\n },\n\n // --- Mistral ---\n {\n match: 'MistralAI-User',\n product: 'MistralAI-User',\n purpose: 'user-action',\n },\n\n // --- Meta ---\n {\n match: 'Meta-ExternalFetcher',\n product: 'Meta-ExternalFetcher',\n purpose: 'user-action',\n },\n {\n match: 'Meta-ExternalAgent',\n product: 'Meta-ExternalAgent',\n purpose: 'training',\n },\n\n // --- Google ---\n {\n match: 'Google-CloudVertexBot',\n product: 'Google-CloudVertexBot',\n purpose: 'training',\n },\n {\n match: 'Google-Extended',\n product: 'Google-Extended',\n purpose: 'training',\n },\n\n // --- Apple ---\n {\n match: 'Applebot-Extended',\n product: 'Applebot-Extended',\n purpose: 'training',\n },\n\n // --- Amazon ---\n { match: 'Amazonbot', product: 'Amazonbot', purpose: 'training' },\n\n // --- DuckDuckGo ---\n {\n match: 'DuckAssistBot',\n product: 'DuckAssistBot',\n purpose: 'user-action',\n },\n\n // --- ByteDance ---\n { match: 'Bytespider', product: 'Bytespider', purpose: 'training' },\n\n // --- Common Crawl ---\n { match: 'CCBot', product: 'CCBot', purpose: 'training' },\n];\n","import { detectUA } from './ua';\n\nexport interface ScoreResult {\n /** 0-99, higher = more bot. v1 emits discrete values: 0, 70, 80, 90, 95. */\n botScore: number;\n /**\n * 0-99, higher = more likely an AI agent. v1 emits only 0 or 95\n * (binary UA-map match). Graduated values (e.g. 70 for unverified UA\n * claim, 99 for IP-reverse-DNS verified) are planned for v1.1.\n */\n agentScore: number;\n /** Matched AI agent UA substring, when one was found. */\n agentProduct?: string;\n}\n\n/**\n * v1: UA-only.\n *\n * botScore baseline:\n * - Empty UA → 70 (real browsers rarely strip UA)\n * - AI training crawler → 95\n * - AI user-action → 90\n * - isbot true → 80\n * - Otherwise → 0\n *\n * Header heuristics (Sec-Fetch missing, Sec-CH-UA major mismatch,\n * Accept-Language stripping) are intentionally deferred to v1.1 —\n * see the README \"Not in v1\" section and the research file.\n */\nexport function computeScore(ua: string): ScoreResult {\n if (!ua) {\n return { botScore: 70, agentScore: 0, agentProduct: undefined };\n }\n\n const uaResult = detectUA(ua);\n\n let botScore = 0;\n if (uaResult.agent) {\n botScore = uaResult.agent.purpose === 'user-action' ? 90 : 95;\n } else if (uaResult.isBot) {\n botScore = 80;\n }\n\n return {\n botScore,\n agentScore: uaResult.agent ? 95 : 0,\n agentProduct: uaResult.agent?.product,\n };\n}\n"],"mappings":";AACA,SAAS,iBAAiB,iBAAiB;;;ACD3C,SAAS,aAAa;;;ACkCf,IAAM,SAAuB;AAAA;AAAA,EAElC,EAAE,OAAO,gBAAgB,SAAS,gBAAgB,SAAS,cAAc;AAAA,EACzE,EAAE,OAAO,iBAAiB,SAAS,iBAAiB,SAAS,cAAc;AAAA,EAC3E,EAAE,OAAO,iBAAiB,SAAS,iBAAiB,SAAS,eAAe;AAAA,EAC5E,EAAE,OAAO,UAAU,SAAS,UAAU,SAAS,WAAW;AAAA;AAAA;AAAA,EAI1D;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA,EAAE,OAAO,eAAe,SAAS,eAAe,SAAS,cAAc;AAAA,EACvE,EAAE,OAAO,eAAe,SAAS,eAAe,SAAS,cAAc;AAAA,EACvE,EAAE,OAAO,aAAa,SAAS,aAAa,SAAS,WAAW;AAAA;AAAA,EAEhE,EAAE,OAAO,gBAAgB,SAAS,gBAAgB,SAAS,WAAW;AAAA;AAAA,EAGtE;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA,EAAE,OAAO,aAAa,SAAS,aAAa,SAAS,WAAW;AAAA;AAAA,EAGhE;AAAA,IACE,OAAO;AAAA,IACP,SAAS;AAAA,IACT,SAAS;AAAA,EACX;AAAA;AAAA,EAGA,EAAE,OAAO,cAAc,SAAS,cAAc,SAAS,WAAW;AAAA;AAAA,EAGlE,EAAE,OAAO,SAAS,SAAS,SAAS,SAAS,WAAW;AAC1D;;;AD/GO,SAAS,SAAS,IAAsB;AAC7C,QAAM,QAAQ,GAAG,YAAY;AAC7B,QAAM,UAAU,OAAO,KAAK,CAAC,MAAM,MAAM,SAAS,EAAE,MAAM,YAAY,CAAC,CAAC;AACxE,SAAO;AAAA,IACL,OAAO,CAAC,MAAM,MAAM,EAAE,KAAK,YAAY;AAAA,IACvC,OAAO,UACH,EAAE,SAAS,QAAQ,SAAS,SAAS,QAAQ,QAAQ,IACrD;AAAA,EACN;AACF;;;AEYO,SAAS,aAAa,IAAyB;AACpD,MAAI,CAAC,IAAI;AACP,WAAO,EAAE,UAAU,IAAI,YAAY,GAAG,cAAc,OAAU;AAAA,EAChE;AAEA,QAAM,WAAW,SAAS,EAAE;AAE5B,MAAI,WAAW;AACf,MAAI,SAAS,OAAO;AAClB,eAAW,SAAS,MAAM,YAAY,gBAAgB,KAAK;AAAA,EAC7D,WAAW,SAAS,OAAO;AACzB,eAAW;AAAA,EACb;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,SAAS,QAAQ,KAAK;AAAA,IAClC,cAAc,SAAS,OAAO;AAAA,EAChC;AACF;;;AH3CA,IAAM,gBAAoC;AAAA,EACxC,WAAW;AAAA,EACX,IAAI;AAAA,EACJ,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,cAAc;AAAA,EACd,cAAc;AAAA,EACd,cAAc;AAAA,EACd,cAAc;AAAA,EACd,SAAS;AAAA,EACT,eAAe;AAAA,EACf,iBAAiB;AACnB;AAEA,IAAM,iBAAsC;AAAA,EAC1C,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,cAAc;AAAA;AAChB;AASA,SAAS,cACP,KACA,MACA,OACM;AACN,QAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,MAAI,MAA+B;AACnC,WAAS,IAAI,GAAG,IAAI,KAAK,SAAS,GAAG,KAAK;AACxC,UAAM,IAAI,KAAK,CAAC;AAChB,UAAM,OAAO,IAAI,CAAC;AAClB,QAAI,OAAO,SAAS,YAAY,SAAS,KAAM,KAAI,CAAC,IAAI,CAAC;AACzD,UAAM,IAAI,CAAC;AAAA,EACb;AACA,MAAI,KAAK,KAAK,SAAS,CAAC,CAAC,IAAI;AAC/B;AAEO,IAAM,iBAET,CAAC,YAAY;AACf,QAAM,EAAE,OAAO,IAAI;AACnB,QAAM,WAAwB,OAAO,YAAY,CAAC;AAClD,QAAM,QAA4B;AAAA,IAChC,GAAG;AAAA,IACH,GAAI,SAAS,SAAS,CAAC;AAAA,EACzB;AACA,QAAM,SAAoB;AAAA,IACxB,GAAG;AAAA,IACH,GAAI,SAAS,UAAU,CAAC;AAAA,EAC1B;AAEA,SAAO;AAAA;AAAA;AAAA,IAGL,MAAM;AAAA,IACN;AAAA,IAEA,MAAM,KAAK,OAAO,KAAK;AACrB,YAAM,EAAE,QAAQ,UAAU,IAAI;AAC9B,YAAM,SAAS,EAAE,OAAO,OAAO;AAK/B,YAAM,UAAU,MAAM,gBAAgB,QAAQ,MAAM,WAAW;AAAA,QAC7D;AAAA,MACF,CAAC;AACD,YAAM,KAAK,OAAO,YAAY,WAAW,UAAU;AACnD,YAAM,QAAQ,aAAa,EAAE;AAE7B,UAAI,YAAY;AAEhB,YAAM,cAAc,CAAC,MAAc,UAAmB;AACpD,YAAI,CAAC,QAAQ,UAAU,OAAW;AAClC,YAAI,KAAK,WAAW,SAAS,GAAG;AAC9B,wBAAc,QAAQ,KAAK,MAAM,UAAU,MAAM,GAAG,KAAK;AAAA,QAC3D,OAAO;AACL,sBAAY,UAAU,WAAW,MAAM,KAAK;AAAA,QAC9C;AAAA,MACF;AAEA,kBAAY,OAAO,YAAY,IAAI,MAAM,QAAQ;AACjD,kBAAY,OAAO,cAAc,IAAI,MAAM,UAAU;AACrD,kBAAY,OAAO,gBAAgB,IAAI,MAAM,YAAY;AAEzD,aAAO,EAAE,OAAO,UAAU;AAAA,IAC5B;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,557 @@
1
+ {
2
+ "$meta": {
3
+ "package": "@walkeros/server-transformer-bot",
4
+ "version": "4.1.0",
5
+ "type": "transformer",
6
+ "platform": [
7
+ "server"
8
+ ],
9
+ "docs": "https://www.walkeros.io/docs/transformers/bot",
10
+ "source": "https://github.com/elbwalker/walkerOS/tree/main/packages/server/transformers/bot/src"
11
+ },
12
+ "schemas": {
13
+ "settings": {
14
+ "$schema": "http://json-schema.org/draft-07/schema#",
15
+ "type": "object",
16
+ "properties": {
17
+ "input": {
18
+ "description": "Input signal sources, resolved via getMappingValue against { event, ingest }. v1 only reads userAgent; other fields reserved for v1.1 header heuristics.",
19
+ "type": "object",
20
+ "properties": {
21
+ "userAgent": {
22
+ "anyOf": [
23
+ {
24
+ "type": "string",
25
+ "description": "Dot-notation path like \"ingest.userAgent\""
26
+ },
27
+ {
28
+ "type": "object",
29
+ "properties": {
30
+ "key": {
31
+ "type": "string"
32
+ },
33
+ "value": {},
34
+ "fn": {
35
+ "type": "string"
36
+ }
37
+ },
38
+ "additionalProperties": false,
39
+ "description": "Mapping value object"
40
+ }
41
+ ]
42
+ },
43
+ "ip": {
44
+ "anyOf": [
45
+ {
46
+ "type": "string",
47
+ "description": "Dot-notation path like \"ingest.userAgent\""
48
+ },
49
+ {
50
+ "type": "object",
51
+ "properties": {
52
+ "key": {
53
+ "type": "string"
54
+ },
55
+ "value": {},
56
+ "fn": {
57
+ "type": "string"
58
+ }
59
+ },
60
+ "additionalProperties": false,
61
+ "description": "Mapping value object"
62
+ }
63
+ ]
64
+ },
65
+ "acceptLanguage": {
66
+ "anyOf": [
67
+ {
68
+ "type": "string",
69
+ "description": "Dot-notation path like \"ingest.userAgent\""
70
+ },
71
+ {
72
+ "type": "object",
73
+ "properties": {
74
+ "key": {
75
+ "type": "string"
76
+ },
77
+ "value": {},
78
+ "fn": {
79
+ "type": "string"
80
+ }
81
+ },
82
+ "additionalProperties": false,
83
+ "description": "Mapping value object"
84
+ }
85
+ ]
86
+ },
87
+ "acceptEncoding": {
88
+ "anyOf": [
89
+ {
90
+ "type": "string",
91
+ "description": "Dot-notation path like \"ingest.userAgent\""
92
+ },
93
+ {
94
+ "type": "object",
95
+ "properties": {
96
+ "key": {
97
+ "type": "string"
98
+ },
99
+ "value": {},
100
+ "fn": {
101
+ "type": "string"
102
+ }
103
+ },
104
+ "additionalProperties": false,
105
+ "description": "Mapping value object"
106
+ }
107
+ ]
108
+ },
109
+ "secFetchSite": {
110
+ "anyOf": [
111
+ {
112
+ "type": "string",
113
+ "description": "Dot-notation path like \"ingest.userAgent\""
114
+ },
115
+ {
116
+ "type": "object",
117
+ "properties": {
118
+ "key": {
119
+ "type": "string"
120
+ },
121
+ "value": {},
122
+ "fn": {
123
+ "type": "string"
124
+ }
125
+ },
126
+ "additionalProperties": false,
127
+ "description": "Mapping value object"
128
+ }
129
+ ]
130
+ },
131
+ "secFetchMode": {
132
+ "anyOf": [
133
+ {
134
+ "type": "string",
135
+ "description": "Dot-notation path like \"ingest.userAgent\""
136
+ },
137
+ {
138
+ "type": "object",
139
+ "properties": {
140
+ "key": {
141
+ "type": "string"
142
+ },
143
+ "value": {},
144
+ "fn": {
145
+ "type": "string"
146
+ }
147
+ },
148
+ "additionalProperties": false,
149
+ "description": "Mapping value object"
150
+ }
151
+ ]
152
+ },
153
+ "secFetchDest": {
154
+ "anyOf": [
155
+ {
156
+ "type": "string",
157
+ "description": "Dot-notation path like \"ingest.userAgent\""
158
+ },
159
+ {
160
+ "type": "object",
161
+ "properties": {
162
+ "key": {
163
+ "type": "string"
164
+ },
165
+ "value": {},
166
+ "fn": {
167
+ "type": "string"
168
+ }
169
+ },
170
+ "additionalProperties": false,
171
+ "description": "Mapping value object"
172
+ }
173
+ ]
174
+ },
175
+ "secFetchUser": {
176
+ "anyOf": [
177
+ {
178
+ "type": "string",
179
+ "description": "Dot-notation path like \"ingest.userAgent\""
180
+ },
181
+ {
182
+ "type": "object",
183
+ "properties": {
184
+ "key": {
185
+ "type": "string"
186
+ },
187
+ "value": {},
188
+ "fn": {
189
+ "type": "string"
190
+ }
191
+ },
192
+ "additionalProperties": false,
193
+ "description": "Mapping value object"
194
+ }
195
+ ]
196
+ },
197
+ "secChUa": {
198
+ "anyOf": [
199
+ {
200
+ "type": "string",
201
+ "description": "Dot-notation path like \"ingest.userAgent\""
202
+ },
203
+ {
204
+ "type": "object",
205
+ "properties": {
206
+ "key": {
207
+ "type": "string"
208
+ },
209
+ "value": {},
210
+ "fn": {
211
+ "type": "string"
212
+ }
213
+ },
214
+ "additionalProperties": false,
215
+ "description": "Mapping value object"
216
+ }
217
+ ]
218
+ },
219
+ "secChUaMobile": {
220
+ "anyOf": [
221
+ {
222
+ "type": "string",
223
+ "description": "Dot-notation path like \"ingest.userAgent\""
224
+ },
225
+ {
226
+ "type": "object",
227
+ "properties": {
228
+ "key": {
229
+ "type": "string"
230
+ },
231
+ "value": {},
232
+ "fn": {
233
+ "type": "string"
234
+ }
235
+ },
236
+ "additionalProperties": false,
237
+ "description": "Mapping value object"
238
+ }
239
+ ]
240
+ },
241
+ "secChUaPlatform": {
242
+ "anyOf": [
243
+ {
244
+ "type": "string",
245
+ "description": "Dot-notation path like \"ingest.userAgent\""
246
+ },
247
+ {
248
+ "type": "object",
249
+ "properties": {
250
+ "key": {
251
+ "type": "string"
252
+ },
253
+ "value": {},
254
+ "fn": {
255
+ "type": "string"
256
+ }
257
+ },
258
+ "additionalProperties": false,
259
+ "description": "Mapping value object"
260
+ }
261
+ ]
262
+ }
263
+ },
264
+ "additionalProperties": false,
265
+ "title": "input"
266
+ },
267
+ "output": {
268
+ "description": "Output paths for bot/agent annotations.",
269
+ "type": "object",
270
+ "properties": {
271
+ "botScore": {
272
+ "description": "Path for bot score (0-99, higher = more bot). Default: \"user.botScore\". Use \"ingest.*\" to route to pipeline scratch instead of the event. Empty string or omit = skip.",
273
+ "type": "string"
274
+ },
275
+ "agentScore": {
276
+ "description": "Path for AI agent score (0-99). v1 emits 0 (no match) or 95 (UA-map match). Default: \"user.agentScore\".",
277
+ "type": "string"
278
+ },
279
+ "agentProduct": {
280
+ "description": "Path for matched UA substring (e.g. \"ChatGPT-User\"). Off by default — set to enable.",
281
+ "type": "string"
282
+ }
283
+ },
284
+ "additionalProperties": false,
285
+ "title": "output"
286
+ }
287
+ },
288
+ "additionalProperties": false,
289
+ "description": "Bot detection transformer: annotates events with bot and AI-agent scores."
290
+ }
291
+ },
292
+ "examples": {
293
+ "step": {
294
+ "chatgptUserAgent": {
295
+ "title": "ChatGPT-User (user-action AI)",
296
+ "description": "A real human routed an AI to fetch this page. botScore high but lower than crawlers — agentProduct lets destinations keep this traffic.",
297
+ "in": {
298
+ "name": "page view",
299
+ "data": {
300
+ "title": "Home",
301
+ "id": "/"
302
+ },
303
+ "id": "ev-1700000602",
304
+ "trigger": "load",
305
+ "entity": "page",
306
+ "action": "view",
307
+ "timestamp": 1700000600,
308
+ "source": {
309
+ "type": "express",
310
+ "platform": "server"
311
+ }
312
+ },
313
+ "out": [
314
+ [
315
+ "return",
316
+ {
317
+ "event": {
318
+ "name": "page view",
319
+ "data": {
320
+ "title": "Home",
321
+ "id": "/"
322
+ },
323
+ "id": "ev-1700000602",
324
+ "trigger": "load",
325
+ "entity": "page",
326
+ "action": "view",
327
+ "timestamp": 1700000600,
328
+ "source": {
329
+ "type": "express",
330
+ "platform": "server"
331
+ },
332
+ "user": {
333
+ "botScore": 90,
334
+ "agentScore": 95
335
+ }
336
+ }
337
+ }
338
+ ]
339
+ ]
340
+ },
341
+ "curlClient": {
342
+ "public": false,
343
+ "description": "curl client — caught by isbot. agentScore zero.",
344
+ "in": {
345
+ "name": "page view",
346
+ "data": {
347
+ "title": "Home",
348
+ "id": "/"
349
+ },
350
+ "id": "ev-1700000603",
351
+ "trigger": "load",
352
+ "entity": "page",
353
+ "action": "view",
354
+ "timestamp": 1700000600,
355
+ "source": {
356
+ "type": "express",
357
+ "platform": "server"
358
+ }
359
+ },
360
+ "out": [
361
+ [
362
+ "return",
363
+ {
364
+ "event": {
365
+ "name": "page view",
366
+ "data": {
367
+ "title": "Home",
368
+ "id": "/"
369
+ },
370
+ "id": "ev-1700000603",
371
+ "trigger": "load",
372
+ "entity": "page",
373
+ "action": "view",
374
+ "timestamp": 1700000600,
375
+ "source": {
376
+ "type": "express",
377
+ "platform": "server"
378
+ },
379
+ "user": {
380
+ "botScore": 80,
381
+ "agentScore": 0
382
+ }
383
+ }
384
+ }
385
+ ]
386
+ ]
387
+ },
388
+ "gptBotCrawler": {
389
+ "title": "GPTBot training crawler",
390
+ "description": "OpenAI training crawler. Both botScore and agentScore are high.",
391
+ "in": {
392
+ "name": "page view",
393
+ "data": {
394
+ "title": "Home",
395
+ "id": "/"
396
+ },
397
+ "id": "ev-1700000601",
398
+ "trigger": "load",
399
+ "entity": "page",
400
+ "action": "view",
401
+ "timestamp": 1700000600,
402
+ "source": {
403
+ "type": "express",
404
+ "platform": "server"
405
+ }
406
+ },
407
+ "out": [
408
+ [
409
+ "return",
410
+ {
411
+ "event": {
412
+ "name": "page view",
413
+ "data": {
414
+ "title": "Home",
415
+ "id": "/"
416
+ },
417
+ "id": "ev-1700000601",
418
+ "trigger": "load",
419
+ "entity": "page",
420
+ "action": "view",
421
+ "timestamp": 1700000600,
422
+ "source": {
423
+ "type": "express",
424
+ "platform": "server"
425
+ },
426
+ "user": {
427
+ "botScore": 95,
428
+ "agentScore": 95
429
+ }
430
+ }
431
+ }
432
+ ]
433
+ ]
434
+ },
435
+ "humanChrome": {
436
+ "title": "Human visitor (Chrome)",
437
+ "description": "Modern Chrome UA. No bot or agent signals.",
438
+ "in": {
439
+ "name": "page view",
440
+ "data": {
441
+ "title": "Home",
442
+ "id": "/"
443
+ },
444
+ "id": "ev-1700000600",
445
+ "trigger": "load",
446
+ "entity": "page",
447
+ "action": "view",
448
+ "timestamp": 1700000600,
449
+ "source": {
450
+ "type": "express",
451
+ "platform": "server"
452
+ }
453
+ },
454
+ "out": [
455
+ [
456
+ "return",
457
+ {
458
+ "event": {
459
+ "name": "page view",
460
+ "data": {
461
+ "title": "Home",
462
+ "id": "/"
463
+ },
464
+ "id": "ev-1700000600",
465
+ "trigger": "load",
466
+ "entity": "page",
467
+ "action": "view",
468
+ "timestamp": 1700000600,
469
+ "source": {
470
+ "type": "express",
471
+ "platform": "server"
472
+ },
473
+ "user": {
474
+ "botScore": 0,
475
+ "agentScore": 0
476
+ }
477
+ }
478
+ }
479
+ ]
480
+ ]
481
+ },
482
+ "missingUA": {
483
+ "public": false,
484
+ "description": "No User-Agent — baseline 70 (UA stripping is overwhelmingly bots or hardened privacy tools).",
485
+ "in": {
486
+ "name": "page view",
487
+ "data": {
488
+ "title": "Home",
489
+ "id": "/"
490
+ },
491
+ "id": "ev-1700000604",
492
+ "trigger": "load",
493
+ "entity": "page",
494
+ "action": "view",
495
+ "timestamp": 1700000600,
496
+ "source": {
497
+ "type": "express",
498
+ "platform": "server"
499
+ }
500
+ },
501
+ "out": [
502
+ [
503
+ "return",
504
+ {
505
+ "event": {
506
+ "name": "page view",
507
+ "data": {
508
+ "title": "Home",
509
+ "id": "/"
510
+ },
511
+ "id": "ev-1700000604",
512
+ "trigger": "load",
513
+ "entity": "page",
514
+ "action": "view",
515
+ "timestamp": 1700000600,
516
+ "source": {
517
+ "type": "express",
518
+ "platform": "server"
519
+ },
520
+ "user": {
521
+ "botScore": 70,
522
+ "agentScore": 0
523
+ }
524
+ }
525
+ }
526
+ ]
527
+ ]
528
+ }
529
+ }
530
+ },
531
+ "hints": {
532
+ "ingest-prerequisite": {
533
+ "text": "The bot transformer reads userAgent from ctx.ingest (path \"ingest.userAgent\" by default). The upstream server source must populate it via config.ingest mapping, otherwise the UA is empty and every event scores 70 (baseline for missing UA).",
534
+ "code": [
535
+ {
536
+ "lang": "json",
537
+ "code": "{\n \"sources\": {\n \"express\": {\n \"package\": \"@walkeros/server-source-express\",\n \"config\": {\n \"ingest\": {\n \"userAgent\": \"req.headers.user-agent\"\n }\n }\n }\n },\n \"transformers\": {\n \"bot\": {\n \"package\": \"@walkeros/server-transformer-bot\"\n }\n }\n}"
538
+ }
539
+ ]
540
+ },
541
+ "output-routing": {
542
+ "text": "Outputs default to event.user.botScore and event.user.agentScore. Redirect to ingest.* to keep the analytics event clean while still routing on the score downstream. Empty string (or omit) skips writing that field entirely. agentProduct is off by default — set it to enable writing the matched UA substring.",
543
+ "code": [
544
+ {
545
+ "lang": "json",
546
+ "code": "{\n \"transformers\": {\n \"bot\": {\n \"package\": \"@walkeros/server-transformer-bot\",\n \"config\": {\n \"settings\": {\n \"output\": {\n \"botScore\": \"ingest.bot.score\",\n \"agentScore\": \"ingest.bot.agent\",\n \"agentProduct\": \"user.agentProduct\"\n }\n }\n }\n }\n }\n}"
547
+ }
548
+ ]
549
+ },
550
+ "destination-filtering": {
551
+ "text": "Recommended destination-mapping recipes. Drop all bots: botScore > 50. Drop crawlers but keep user-action AI traffic: botScore > 50 AND agentProduct NOT LIKE \"%-User\". AI traffic report: group by agentProduct WHERE agentScore > 50. The transformer never drops events — filtering is always a destination decision."
552
+ },
553
+ "detection-scope": {
554
+ "text": "v1 is UA-only: wraps isbot (curl, wget, headless Chrome defaults, well-known crawlers) plus a curated AI-agent UA map (OpenAI, Anthropic, Perplexity, Mistral, Meta, Google, Apple, Amazon, DuckDuckGo, ByteDance, Common Crawl). It will NOT catch: residential-proxy + stealth Chrome + realistic behavior; reverse-DNS-verified search engines; client-side runtime tells. v1.1 adds header consistency heuristics (Sec-Fetch, Sec-CH-UA, Accept-Language) with proper GREASE handling. For commercial-grade detection use Cloudflare Bot Management, DataDome, or HUMAN."
555
+ }
556
+ }
557
+ }
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "@walkeros/server-transformer-bot",
3
+ "description": "Server-side bot and AI-agent detection transformer for walkerOS",
4
+ "version": "4.1.0",
5
+ "license": "MIT",
6
+ "main": "./dist/index.js",
7
+ "module": "./dist/index.mjs",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.mjs",
13
+ "require": "./dist/index.js"
14
+ },
15
+ "./walkerOS.json": "./dist/walkerOS.json"
16
+ },
17
+ "files": [
18
+ "dist/**",
19
+ "CHANGELOG.md"
20
+ ],
21
+ "scripts": {
22
+ "build": "tsup --silent",
23
+ "clean": "rm -rf .turbo && rm -rf dist",
24
+ "dev": "jest --watchAll --colors",
25
+ "typecheck": "tsc --noEmit",
26
+ "lint": "eslint \"**/*.ts*\"",
27
+ "test": "jest",
28
+ "update": "npx npm-check-updates -u && npm update"
29
+ },
30
+ "dependencies": {
31
+ "@walkeros/core": "4.1.0",
32
+ "isbot": "^5.1.39"
33
+ },
34
+ "devDependencies": {
35
+ "@walkeros/core": "4.1.0"
36
+ },
37
+ "repository": {
38
+ "url": "git+https://github.com/elbwalker/walkerOS.git",
39
+ "directory": "packages/server/transformers/bot"
40
+ },
41
+ "author": "elbwalker <hello@elbwalker.com>",
42
+ "homepage": "https://github.com/elbwalker/walkerOS#readme",
43
+ "bugs": {
44
+ "url": "https://github.com/elbwalker/walkerOS/issues"
45
+ },
46
+ "walkerOS": {
47
+ "type": "transformer",
48
+ "platform": [
49
+ "server"
50
+ ],
51
+ "docs": "https://www.walkeros.io/docs/transformers/bot"
52
+ },
53
+ "keywords": [
54
+ "walkerOS",
55
+ "walkerOS-transformer",
56
+ "transformer",
57
+ "bot",
58
+ "bot-detection",
59
+ "ai-agent"
60
+ ],
61
+ "funding": [
62
+ {
63
+ "type": "GitHub Sponsors",
64
+ "url": "https://github.com/sponsors/elbwalker"
65
+ }
66
+ ]
67
+ }