cdp-edge 2.0.3 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/contracts/agent-versions.json +364 -0
- package/dist/commands/install.js +1 -1
- package/dist/commands/setup.js +326 -111
- package/extracted-skill/tracking-events-generator/INTEGRACAO-COMPLETA.md +89 -0
- package/extracted-skill/tracking-events-generator/MELHORIAS-IMPLEMENTADAS.md +101 -0
- package/extracted-skill/tracking-events-generator/agents/devops-agent.md +11 -0
- package/extracted-skill/tracking-events-generator/agents/intelligence-agent.md +27 -0
- package/extracted-skill/tracking-events-generator/agents/master-orchestrator.md +1 -1
- package/extracted-skill/tracking-events-generator/agents/server-tracking.md +5 -5
- package/extracted-skill/tracking-events-generator/knowledge-base.md +172 -0
- package/package.json +7 -2
- package/server-edge-tracker/INSTALAR.md +27 -3
- package/server-edge-tracker/SEGMENTATION-DOCS.md +69 -0
- package/server-edge-tracker/index.js +791 -0
- package/server-edge-tracker/migrate-v7.sql +64 -0
- package/server-edge-tracker/modules/db.js +531 -0
- package/server-edge-tracker/modules/dispatch/ga4.js +65 -0
- package/server-edge-tracker/modules/dispatch/meta.js +119 -0
- package/server-edge-tracker/modules/dispatch/platforms.js +237 -0
- package/server-edge-tracker/modules/dispatch/tiktok.js +100 -0
- package/server-edge-tracker/modules/dispatch/whatsapp.js +233 -0
- package/server-edge-tracker/modules/intelligence.js +321 -0
- package/server-edge-tracker/modules/ml/bidding.js +245 -0
- package/server-edge-tracker/modules/ml/fraud.js +301 -0
- package/server-edge-tracker/modules/ml/logistic.js +195 -0
- package/server-edge-tracker/modules/ml/ltv.js +420 -0
- package/server-edge-tracker/modules/ml/matchquality.js +176 -0
- package/server-edge-tracker/modules/ml/segmentation.js +316 -0
- package/server-edge-tracker/modules/utils.js +89 -0
- package/server-edge-tracker/schema-indexes.sql +67 -0
- package/server-edge-tracker/worker.js +395 -4
- package/server-edge-tracker/wrangler.toml +15 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CDP Edge — Fraud Detection (Fase 4)
|
|
3
|
+
* checkFraudGate, logFraudSignal, handlers das rotas /api/fraud/*
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { sha256, tryParseJson } from '../utils.js';
|
|
7
|
+
|
|
8
|
+
// ── Listas de detecção ────────────────────────────────────────────────────────
|
|
9
|
+
export const DISPOSABLE_EMAIL_DOMAINS = new Set([
|
|
10
|
+
'mailinator.com','guerrillamail.com','tempmail.com','throwaway.email',
|
|
11
|
+
'yopmail.com','sharklasers.com','guerrillamailblock.com','spam4.me',
|
|
12
|
+
'10minutemail.com','trashmail.com','maildrop.cc','fakeinbox.com',
|
|
13
|
+
'dispostable.com','getairmail.com','mailnull.com',
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
export const DATACENTER_PATTERNS = /amazon|google|microsoft|digitalocean|linode|ovh|vultr|hetzner|contabo|cloudflare|packet|rackspace|leaseweb/i;
|
|
17
|
+
|
|
18
|
+
// ── checkFraudGate — roda ANTES de qualquer processamento de evento ────────────
|
|
19
|
+
// Retorna { allowed, score, reasons, action }
|
|
20
|
+
// Falhas no gate = fail-safe (deixa passar)
|
|
21
|
+
export async function checkFraudGate(env, request, payload) {
|
|
22
|
+
const result = { allowed: true, score: 0, reasons: [], action: 'allowed' };
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const ip = request.headers.get('CF-Connecting-IP') || '';
|
|
26
|
+
const ua = request.headers.get('User-Agent') || '';
|
|
27
|
+
const fingerprint = payload.fingerprint || '';
|
|
28
|
+
const email = payload.email || '';
|
|
29
|
+
const botScore = parseInt(payload.botScore || payload.bot_score || 0);
|
|
30
|
+
const asn = String(request.cf?.asOrganization || '').toLowerCase();
|
|
31
|
+
const country = (request.cf?.country || '').toUpperCase();
|
|
32
|
+
const acceptLang = request.headers.get('Accept-Language');
|
|
33
|
+
|
|
34
|
+
// 1. KV blocklist check — instantâneo (~0ms)
|
|
35
|
+
if (env.GEO_CACHE && ip) {
|
|
36
|
+
const ipBlocked = await env.GEO_CACHE.get(`fraud_block:ip:${ip}`);
|
|
37
|
+
if (ipBlocked) {
|
|
38
|
+
return { allowed: false, score: 100, reasons: ['ip_blocklisted'], action: 'dropped' };
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (env.GEO_CACHE && fingerprint) {
|
|
42
|
+
const fpBlocked = await env.GEO_CACHE.get(`fraud_block:fp:${fingerprint}`);
|
|
43
|
+
if (fpBlocked) {
|
|
44
|
+
return { allowed: false, score: 100, reasons: ['fingerprint_blocklisted'], action: 'dropped' };
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 2. Bot score
|
|
49
|
+
if (botScore >= 3) { result.score += 60; result.reasons.push('bot_score_high'); }
|
|
50
|
+
else if (botScore === 2) { result.score += 30; result.reasons.push('bot_score_medium'); }
|
|
51
|
+
|
|
52
|
+
// 3. User-Agent suspeito
|
|
53
|
+
if (/headless|phantomjs|selenium|webdriver|curl|python|scrapy|bot|crawler|spider/i.test(ua)) {
|
|
54
|
+
result.score += 40; result.reasons.push('suspicious_user_agent');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// 4. Datacenter IP
|
|
58
|
+
if (ip && DATACENTER_PATTERNS.test(asn)) {
|
|
59
|
+
result.score += 35; result.reasons.push('datacenter_ip');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 5. Sem Accept-Language
|
|
63
|
+
if (!acceptLang) {
|
|
64
|
+
result.score += 20; result.reasons.push('no_accept_language');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// 6. Email descartável
|
|
68
|
+
if (email) {
|
|
69
|
+
const domain = email.split('@')[1]?.toLowerCase();
|
|
70
|
+
if (domain && DISPOSABLE_EMAIL_DOMAINS.has(domain)) {
|
|
71
|
+
result.score += 25; result.reasons.push('disposable_email');
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// 7. Velocity check via KV
|
|
76
|
+
if (env.GEO_CACHE && ip) {
|
|
77
|
+
const velKey1h = `fraud_velocity:${ip}:h`;
|
|
78
|
+
const velStr = await env.GEO_CACHE.get(velKey1h);
|
|
79
|
+
const vel1h = parseInt(velStr || '0') + 1;
|
|
80
|
+
|
|
81
|
+
await env.GEO_CACHE.put(velKey1h, String(vel1h), { expirationTtl: 3600 });
|
|
82
|
+
|
|
83
|
+
if (vel1h > 20) { result.score += 50; result.reasons.push('ip_velocity_very_high'); }
|
|
84
|
+
else if (vel1h > 10) { result.score += 25; result.reasons.push('ip_velocity_high'); }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
result.score = Math.min(100, result.score);
|
|
88
|
+
|
|
89
|
+
// 8. Decisão final
|
|
90
|
+
if (result.score >= 80) {
|
|
91
|
+
result.allowed = false;
|
|
92
|
+
result.action = 'dropped';
|
|
93
|
+
} else if (result.score >= 40) {
|
|
94
|
+
result.action = 'flagged';
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return result;
|
|
98
|
+
|
|
99
|
+
} catch (err) {
|
|
100
|
+
console.error('[Fraud] checkFraudGate error:', err.message);
|
|
101
|
+
return { allowed: true, score: 0, reasons: ['gate_error_fallback'], action: 'allowed' };
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ── logFraudSignal — persiste no D1 em background ────────────────────────────
|
|
106
|
+
export async function logFraudSignal(env, request, payload, fraudResult) {
|
|
107
|
+
if (!env.DB || fraudResult.action === 'allowed') return;
|
|
108
|
+
try {
|
|
109
|
+
const ip = request.headers.get('CF-Connecting-IP') || '';
|
|
110
|
+
const ua = request.headers.get('User-Agent') || '';
|
|
111
|
+
const fingerprint = payload.fingerprint || '';
|
|
112
|
+
const botScore = parseInt(payload.botScore || payload.bot_score || 0);
|
|
113
|
+
const asn = String(request.cf?.asOrganization || '');
|
|
114
|
+
const country = (request.cf?.country || '');
|
|
115
|
+
const velKey1h = `fraud_velocity:${ip}:h`;
|
|
116
|
+
const vel1h = env.GEO_CACHE ? parseInt(await env.GEO_CACHE.get(velKey1h) || '0') : 0;
|
|
117
|
+
|
|
118
|
+
let emailHash = null;
|
|
119
|
+
if (payload.email) {
|
|
120
|
+
try { emailHash = await sha256(payload.email.trim().toLowerCase()); } catch {}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
await env.DB.prepare(`
|
|
124
|
+
INSERT INTO fraud_signals (
|
|
125
|
+
ip_address, fingerprint, user_id, email_hash, event_name, event_id,
|
|
126
|
+
fraud_score, action_taken, reasons,
|
|
127
|
+
ip_country, ip_asn, user_agent, bot_score, velocity_1h, detected_at
|
|
128
|
+
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,datetime('now'))
|
|
129
|
+
`).bind(
|
|
130
|
+
ip, fingerprint || null, payload.userId || null, emailHash,
|
|
131
|
+
payload.eventName || null, payload.eventId || null,
|
|
132
|
+
fraudResult.score, fraudResult.action, JSON.stringify(fraudResult.reasons),
|
|
133
|
+
country, asn, ua.substring(0, 255), botScore, vel1h,
|
|
134
|
+
).run();
|
|
135
|
+
|
|
136
|
+
if (fraudResult.action === 'dropped' && ip) {
|
|
137
|
+
await env.DB.prepare(`
|
|
138
|
+
INSERT INTO fraud_alerts (alert_type, entity_type, entity_value, events_total, events_dropped, peak_score, first_seen, last_seen, top_reasons)
|
|
139
|
+
VALUES ('ip_attack', 'ip', ?, 1, 1, ?, datetime('now'), datetime('now'), ?)
|
|
140
|
+
ON CONFLICT(entity_type, entity_value) DO UPDATE SET
|
|
141
|
+
events_total = events_total + 1,
|
|
142
|
+
events_dropped = events_dropped + 1,
|
|
143
|
+
peak_score = MAX(peak_score, excluded.peak_score),
|
|
144
|
+
last_seen = datetime('now'),
|
|
145
|
+
updated_at = datetime('now')
|
|
146
|
+
`).bind(ip, fraudResult.score, JSON.stringify(fraudResult.reasons)).run().catch(() => {});
|
|
147
|
+
}
|
|
148
|
+
} catch (err) {
|
|
149
|
+
console.error('[Fraud] logFraudSignal error:', err.message);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ── GET /api/fraud/alerts ─────────────────────────────────────────────────────
|
|
154
|
+
export async function handleFraudAlerts(env, request, headers) {
|
|
155
|
+
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
156
|
+
|
|
157
|
+
const url = new URL(request.url);
|
|
158
|
+
const action = url.searchParams.get('action') || null;
|
|
159
|
+
const hours = parseInt(url.searchParams.get('hours') || '24');
|
|
160
|
+
const limit = Math.min(parseInt(url.searchParams.get('limit') || '50'), 200);
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
const cond = action ? 'AND action_taken = ?' : '';
|
|
164
|
+
const bindings = action ? [hours, action, limit] : [hours, limit];
|
|
165
|
+
|
|
166
|
+
const result = await env.DB.prepare(`
|
|
167
|
+
SELECT ip_address, fingerprint, event_name, fraud_score, action_taken,
|
|
168
|
+
reasons, ip_country, ip_asn, bot_score, velocity_1h, detected_at
|
|
169
|
+
FROM fraud_signals
|
|
170
|
+
WHERE detected_at >= datetime('now', '-' || ? || ' hours')
|
|
171
|
+
${cond}
|
|
172
|
+
ORDER BY fraud_score DESC, detected_at DESC
|
|
173
|
+
LIMIT ?
|
|
174
|
+
`).bind(...bindings).all();
|
|
175
|
+
|
|
176
|
+
const signals = (result.results || []).map(s => ({ ...s, reasons: tryParseJson(s.reasons, []) }));
|
|
177
|
+
const stats = await env.DB.prepare(`SELECT * FROM v_fraud_dashboard`).first().catch(() => null);
|
|
178
|
+
|
|
179
|
+
return new Response(JSON.stringify({ success: true, period_hours: hours, total: signals.length, stats, alerts: signals }), { status: 200, headers });
|
|
180
|
+
} catch (err) {
|
|
181
|
+
console.error('[Fraud] alerts error:', err.message);
|
|
182
|
+
return new Response(JSON.stringify({ error: err.message }), { status: 500, headers });
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// ── GET /api/fraud/blocklist ──────────────────────────────────────────────────
|
|
187
|
+
export async function handleFraudBlocklist(env, request, headers) {
|
|
188
|
+
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
189
|
+
|
|
190
|
+
try {
|
|
191
|
+
const result = await env.DB.prepare(`
|
|
192
|
+
SELECT entity_type, entity_value, events_total, events_dropped,
|
|
193
|
+
peak_score, first_seen, last_seen, blocked_at, block_expires, top_reasons
|
|
194
|
+
FROM fraud_alerts WHERE is_blocked = 1 ORDER BY events_dropped DESC LIMIT 100
|
|
195
|
+
`).all();
|
|
196
|
+
|
|
197
|
+
const blocklist = (result.results || []).map(r => ({ ...r, top_reasons: tryParseJson(r.top_reasons, []) }));
|
|
198
|
+
return new Response(JSON.stringify({ success: true, total: blocklist.length, blocklist }), { status: 200, headers });
|
|
199
|
+
} catch (err) {
|
|
200
|
+
console.error('[Fraud] blocklist error:', err.message);
|
|
201
|
+
return new Response(JSON.stringify({ error: err.message }), { status: 500, headers });
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ── POST /api/fraud/blocklist/add ─────────────────────────────────────────────
|
|
206
|
+
export async function handleFraudBlocklistAdd(env, request, headers) {
|
|
207
|
+
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
208
|
+
|
|
209
|
+
let body;
|
|
210
|
+
try { body = await request.json(); }
|
|
211
|
+
catch { return new Response(JSON.stringify({ error: 'JSON inválido' }), { status: 400, headers }); }
|
|
212
|
+
|
|
213
|
+
const { entity_type, entity_value, ttl_hours = 24, reason = 'manual_block' } = body;
|
|
214
|
+
if (!entity_type || !entity_value) {
|
|
215
|
+
return new Response(JSON.stringify({ error: 'entity_type (ip|fingerprint) e entity_value são obrigatórios' }), { status: 400, headers });
|
|
216
|
+
}
|
|
217
|
+
if (!['ip', 'fingerprint'].includes(entity_type)) {
|
|
218
|
+
return new Response(JSON.stringify({ error: 'entity_type deve ser: ip ou fingerprint' }), { status: 400, headers });
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
const kvKey = `fraud_block:${entity_type}:${entity_value}`;
|
|
223
|
+
const ttlSec = Math.min(ttl_hours * 3600, 7 * 24 * 3600);
|
|
224
|
+
const expiresAt = new Date(Date.now() + ttlSec * 1000).toISOString();
|
|
225
|
+
|
|
226
|
+
if (env.GEO_CACHE) {
|
|
227
|
+
await env.GEO_CACHE.put(kvKey, JSON.stringify({ reason, blocked_at: new Date().toISOString() }), { expirationTtl: ttlSec });
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
await env.DB.prepare(`
|
|
231
|
+
INSERT INTO fraud_alerts (alert_type, entity_type, entity_value, events_total, events_dropped, peak_score, first_seen, last_seen, is_blocked, blocked_at, block_expires, top_reasons)
|
|
232
|
+
VALUES ('manual', ?, ?, 0, 0, 100, datetime('now'), datetime('now'), 1, datetime('now'), ?, ?)
|
|
233
|
+
ON CONFLICT DO UPDATE SET is_blocked = 1, blocked_at = datetime('now'), block_expires = excluded.block_expires, updated_at = datetime('now')
|
|
234
|
+
`).bind(entity_type, entity_value, expiresAt, JSON.stringify([reason])).run().catch(() => {});
|
|
235
|
+
|
|
236
|
+
return new Response(JSON.stringify({
|
|
237
|
+
success: true, entity_type, entity_value, kv_key: kvKey, ttl_hours, expires_at: expiresAt,
|
|
238
|
+
message: `${entity_type} '${entity_value}' bloqueado por ${ttl_hours}h. Efeito imediato via KV.`,
|
|
239
|
+
}), { status: 200, headers });
|
|
240
|
+
} catch (err) {
|
|
241
|
+
console.error('[Fraud] blocklist add error:', err.message);
|
|
242
|
+
return new Response(JSON.stringify({ error: err.message }), { status: 500, headers });
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ── DELETE /api/fraud/blocklist/remove ───────────────────────────────────────
|
|
247
|
+
export async function handleFraudBlocklistRemove(env, request, headers) {
|
|
248
|
+
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
249
|
+
|
|
250
|
+
let body;
|
|
251
|
+
try { body = await request.json(); }
|
|
252
|
+
catch { return new Response(JSON.stringify({ error: 'JSON inválido' }), { status: 400, headers }); }
|
|
253
|
+
|
|
254
|
+
const { entity_type, entity_value } = body;
|
|
255
|
+
if (!entity_type || !entity_value) {
|
|
256
|
+
return new Response(JSON.stringify({ error: 'entity_type e entity_value são obrigatórios' }), { status: 400, headers });
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
try {
|
|
260
|
+
const kvKey = `fraud_block:${entity_type}:${entity_value}`;
|
|
261
|
+
if (env.GEO_CACHE) await env.GEO_CACHE.delete(kvKey);
|
|
262
|
+
await env.DB.prepare(`UPDATE fraud_alerts SET is_blocked = 0, resolved_at = datetime('now'), resolved_by = 'manual' WHERE entity_type = ? AND entity_value = ?`).bind(entity_type, entity_value).run();
|
|
263
|
+
|
|
264
|
+
return new Response(JSON.stringify({
|
|
265
|
+
success: true, entity_type, entity_value,
|
|
266
|
+
message: `${entity_type} '${entity_value}' removido do blocklist. Efeito imediato via KV.`,
|
|
267
|
+
}), { status: 200, headers });
|
|
268
|
+
} catch (err) {
|
|
269
|
+
console.error('[Fraud] blocklist remove error:', err.message);
|
|
270
|
+
return new Response(JSON.stringify({ error: err.message }), { status: 500, headers });
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// ── GET /api/fraud/stats ──────────────────────────────────────────────────────
|
|
275
|
+
export async function handleFraudStats(env, request, headers) {
|
|
276
|
+
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
277
|
+
|
|
278
|
+
try {
|
|
279
|
+
const dashboard = await env.DB.prepare(`SELECT * FROM v_fraud_dashboard`).first();
|
|
280
|
+
const topIps = await env.DB.prepare(`
|
|
281
|
+
SELECT ip_address, COUNT(*) as events, MAX(fraud_score) as peak_score
|
|
282
|
+
FROM fraud_signals
|
|
283
|
+
WHERE detected_at >= datetime('now', '-24 hours') AND action_taken = 'dropped'
|
|
284
|
+
GROUP BY ip_address ORDER BY events DESC LIMIT 10
|
|
285
|
+
`).all();
|
|
286
|
+
const topReasons = await env.DB.prepare(`
|
|
287
|
+
SELECT action_taken, COUNT(*) as count FROM fraud_signals
|
|
288
|
+
WHERE detected_at >= datetime('now', '-24 hours')
|
|
289
|
+
GROUP BY action_taken
|
|
290
|
+
`).all();
|
|
291
|
+
|
|
292
|
+
return new Response(JSON.stringify({
|
|
293
|
+
success: true, period: '24h', dashboard,
|
|
294
|
+
top_attacking_ips: topIps.results || [],
|
|
295
|
+
by_action: topReasons.results || [],
|
|
296
|
+
}), { status: 200, headers });
|
|
297
|
+
} catch (err) {
|
|
298
|
+
console.error('[Fraud] stats error:', err.message);
|
|
299
|
+
return new Response(JSON.stringify({ error: err.message }), { status: 500, headers });
|
|
300
|
+
}
|
|
301
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CDP Edge — Logistic Regression (pure JS, sem deps externas)
|
|
3
|
+
* Treina modelo de predição de conversão com dados reais do D1.
|
|
4
|
+
*
|
|
5
|
+
* Features usadas (todas normalizadas 0-1):
|
|
6
|
+
* utm_source, engagement_score, intention_level, recency,
|
|
7
|
+
* has_email, has_phone, is_br, hour_normalized
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ── Feature Engineering ───────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
const UTM_SCORES = {
|
|
13
|
+
facebook: 0.90, instagram: 0.90, meta: 0.90,
|
|
14
|
+
google: 0.82, youtube: 0.82,
|
|
15
|
+
tiktok: 0.75,
|
|
16
|
+
email: 0.68, sms: 0.68,
|
|
17
|
+
organic: 0.30,
|
|
18
|
+
direct: 0.20,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const INTENTION_SCORES = {
|
|
22
|
+
comprador: 1.00, high_intent: 1.00,
|
|
23
|
+
interessado: 0.60,
|
|
24
|
+
nurture: 0.30,
|
|
25
|
+
curioso: 0.15,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export function extractFeatures(row) {
|
|
29
|
+
const src = (row.utm_source || '').toLowerCase().trim();
|
|
30
|
+
const intention = (row.intention_level || '').toLowerCase().trim();
|
|
31
|
+
const daysSince = row.days_since_lead || 0;
|
|
32
|
+
|
|
33
|
+
return [
|
|
34
|
+
UTM_SCORES[src] ?? (src ? 0.10 : 0.05), // utm_score
|
|
35
|
+
Math.min((row.engagement_score || 0) / 5, 1), // engagement (0-5 → 0-1)
|
|
36
|
+
INTENTION_SCORES[intention] ?? 0, // intention
|
|
37
|
+
Math.max(0, 1 - daysSince / 90), // recency (0=90 dias, 1=hoje)
|
|
38
|
+
row.has_email ? 1 : 0, // has_email
|
|
39
|
+
row.has_phone ? 1 : 0, // has_phone
|
|
40
|
+
row.is_br ? 1 : 0, // is_br
|
|
41
|
+
((row.hour || 12) / 23), // hour normalized
|
|
42
|
+
];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Sigmoid ───────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
function sigmoid(z) {
|
|
48
|
+
if (z > 20) return 1;
|
|
49
|
+
if (z < -20) return 0;
|
|
50
|
+
return 1 / (1 + Math.exp(-z));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function dot(weights, features) {
|
|
54
|
+
return features.reduce((sum, f, i) => sum + (weights[i] || 0) * f, 0);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ── Treinamento ───────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Treina regressão logística com gradiente descendente.
|
|
61
|
+
* @param {Array<{features: number[], label: number}>} dataset
|
|
62
|
+
* @param {{ iterations?, learningRate?, lambda? }} opts
|
|
63
|
+
* @returns {{ bias, weights, accuracy, positiveRate }}
|
|
64
|
+
*/
|
|
65
|
+
export function trainLogisticRegression(dataset, opts = {}) {
|
|
66
|
+
if (!dataset || dataset.length < 50) {
|
|
67
|
+
return null; // dados insuficientes
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const iterations = opts.iterations || 200;
|
|
71
|
+
const learningRate = opts.learningRate || 0.1;
|
|
72
|
+
const lambda = opts.lambda || 0.01; // L2 regularization
|
|
73
|
+
const nFeatures = dataset[0].features.length;
|
|
74
|
+
|
|
75
|
+
let bias = 0;
|
|
76
|
+
let weights = new Array(nFeatures).fill(0);
|
|
77
|
+
|
|
78
|
+
const positives = dataset.filter(d => d.label === 1).length;
|
|
79
|
+
const positiveRate = positives / dataset.length;
|
|
80
|
+
|
|
81
|
+
// Se menos de 5% positivos, não treina (dados de compra insuficientes)
|
|
82
|
+
if (positiveRate < 0.03) return null;
|
|
83
|
+
|
|
84
|
+
for (let iter = 0; iter < iterations; iter++) {
|
|
85
|
+
let dBias = 0;
|
|
86
|
+
const dWeights = new Array(nFeatures).fill(0);
|
|
87
|
+
|
|
88
|
+
for (const { features, label } of dataset) {
|
|
89
|
+
const z = dot(weights, features) + bias;
|
|
90
|
+
const pred = sigmoid(z);
|
|
91
|
+
const error = pred - label;
|
|
92
|
+
|
|
93
|
+
dBias += error;
|
|
94
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
95
|
+
dWeights[j] += error * features[j];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const n = dataset.length;
|
|
100
|
+
bias -= learningRate * (dBias / n);
|
|
101
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
102
|
+
// L2: penaliza pesos grandes para evitar overfitting
|
|
103
|
+
weights[j] -= learningRate * ((dWeights[j] / n) + lambda * weights[j]);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Calcular acurácia no conjunto de treino
|
|
108
|
+
let correct = 0;
|
|
109
|
+
const threshold = positiveRate > 0.3 ? 0.5 : Math.max(0.3, positiveRate * 1.5);
|
|
110
|
+
|
|
111
|
+
for (const { features, label } of dataset) {
|
|
112
|
+
const z = dot(weights, features) + bias;
|
|
113
|
+
const pred = sigmoid(z) >= threshold ? 1 : 0;
|
|
114
|
+
if (pred === label) correct++;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const accuracy = correct / dataset.length;
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
bias,
|
|
121
|
+
weights,
|
|
122
|
+
accuracy,
|
|
123
|
+
positiveRate,
|
|
124
|
+
sampleSize: dataset.length,
|
|
125
|
+
threshold,
|
|
126
|
+
featureNames: ['utm_score', 'engagement', 'intention', 'recency', 'has_email', 'has_phone', 'is_br', 'hour'],
|
|
127
|
+
trainedAt: new Date().toISOString(),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ── Inferência ────────────────────────────────────────────────────────────────
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Prediz score de conversão (0-100) usando pesos treinados.
|
|
135
|
+
* @param {{ bias, weights, threshold }} model
|
|
136
|
+
* @param {number[]} features
|
|
137
|
+
* @returns {number} score 0-100
|
|
138
|
+
*/
|
|
139
|
+
export function predictWithWeights(model, features) {
|
|
140
|
+
const z = dot(model.weights, features) + model.bias;
|
|
141
|
+
const prob = sigmoid(z);
|
|
142
|
+
return Math.round(prob * 100);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// ── Helpers de persistência ───────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
export const LTV_WEIGHTS_KV_KEY = 'ltv_weights_active';
|
|
148
|
+
|
|
149
|
+
export async function loadActiveWeights(env) {
|
|
150
|
+
// 1. Tentar KV (cache ~7 dias)
|
|
151
|
+
if (env.GEO_CACHE) {
|
|
152
|
+
try {
|
|
153
|
+
const cached = await env.GEO_CACHE.get(LTV_WEIGHTS_KV_KEY, 'json');
|
|
154
|
+
if (cached?.weights?.length) return cached;
|
|
155
|
+
} catch {}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// 2. Fallback: D1
|
|
159
|
+
if (!env.DB) return null;
|
|
160
|
+
try {
|
|
161
|
+
const row = await env.DB.prepare(
|
|
162
|
+
`SELECT weights_json FROM ltv_model_weights WHERE is_active = 1 ORDER BY trained_at DESC LIMIT 1`
|
|
163
|
+
).first();
|
|
164
|
+
if (!row?.weights_json) return null;
|
|
165
|
+
const model = JSON.parse(row.weights_json);
|
|
166
|
+
|
|
167
|
+
// Popular KV para próximas requests
|
|
168
|
+
if (env.GEO_CACHE && model?.weights?.length) {
|
|
169
|
+
env.GEO_CACHE.put(LTV_WEIGHTS_KV_KEY, JSON.stringify(model), { expirationTtl: 604800 }).catch(() => {});
|
|
170
|
+
}
|
|
171
|
+
return model;
|
|
172
|
+
} catch {
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export async function saveWeights(DB, model) {
|
|
178
|
+
if (!DB || !model) return;
|
|
179
|
+
const now = new Date().toISOString();
|
|
180
|
+
|
|
181
|
+
// Desativar modelo anterior
|
|
182
|
+
await DB.prepare(`UPDATE ltv_model_weights SET is_active = 0 WHERE is_active = 1`).run();
|
|
183
|
+
|
|
184
|
+
// Inserir novo como ativo
|
|
185
|
+
await DB.prepare(`
|
|
186
|
+
INSERT INTO ltv_model_weights (trained_at, is_active, sample_size, positive_rate, accuracy, weights_json)
|
|
187
|
+
VALUES (?, 1, ?, ?, ?, ?)
|
|
188
|
+
`).bind(
|
|
189
|
+
now,
|
|
190
|
+
model.sampleSize,
|
|
191
|
+
model.positiveRate,
|
|
192
|
+
model.accuracy,
|
|
193
|
+
JSON.stringify(model),
|
|
194
|
+
).run();
|
|
195
|
+
}
|