cdp-edge 1.23.1 → 1.23.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -21
- package/bin/cdp-edge.js +1 -1
- package/contracts/agent-versions.json +67 -66
- package/dist/commands/install.js +1 -1
- package/dist/commands/server.js +4 -4
- package/extracted-skill/tracking-events-generator/agents/database-agent.md +5 -4
- package/extracted-skill/tracking-events-generator/agents/fraud-detection-agent.md +0 -1
- package/extracted-skill/tracking-events-generator/agents/linkedin-agent.md +1 -1
- package/extracted-skill/tracking-events-generator/agents/ltv-predictor-agent.md +4 -4
- package/extracted-skill/tracking-events-generator/agents/ml-clustering-agent.md +81 -70
- package/extracted-skill/tracking-events-generator/agents/page-analyzer.md +6 -2
- package/extracted-skill/tracking-events-generator/cdpTrack.js +7 -0
- package/extracted-skill/tracking-events-generator/models/lancamento-imobiliario.md +344 -0
- package/extracted-skill/tracking-events-generator/route-intent-capture.js +222 -0
- package/package.json +3 -2
- package/server-edge-tracker/INSTALAR.md +5 -5
- package/server-edge-tracker/index.js +109 -7
- package/server-edge-tracker/modules/db.js +71 -0
- package/server-edge-tracker/modules/dispatch/meta.js +12 -0
- package/server-edge-tracker/modules/ml/fraud.js +1 -16
- package/server-edge-tracker/modules/ml/ltv.js +62 -11
- package/server-edge-tracker/modules/ml/segmentation.js +157 -127
- package/server-edge-tracker/modules/utils.js +78 -0
- package/server-edge-tracker/schema-ltv-feedback.sql +11 -0
- package/server-edge-tracker/wrangler.toml +26 -8
- package/templates/lancamento-imobiliario.md +344 -0
- package/server-edge-tracker/worker.js +0 -4596
|
@@ -8,6 +8,23 @@ import { extractFeatures, predictWithWeights, loadActiveWeights } from './logist
|
|
|
8
8
|
// Cache key para o teste ativo (KV — evita hit no D1 a cada request /track)
|
|
9
9
|
const AB_LTV_CACHE_KEY = 'ab_ltv_active_test';
|
|
10
10
|
|
|
11
|
+
// ── Prompt especializado para imóveis ────────────────────────────────────────
|
|
12
|
+
// Ativado automaticamente quando property_lat/lng estão presentes no payload.
|
|
13
|
+
// Override por A/B test tem prioridade sobre este prompt.
|
|
14
|
+
const REAL_ESTATE_PROMPT = `You are a real estate lead scoring expert for the Brazilian market.
|
|
15
|
+
Reply ONLY with a JSON object {"adjustment": <integer between -15 and 15>} based on the lead data.
|
|
16
|
+
Scoring rules (apply additively):
|
|
17
|
+
- distance_km < 5: +12 (lives nearby, buys fast)
|
|
18
|
+
- distance_km 5-15: +8
|
|
19
|
+
- distance_km 15-30: +3
|
|
20
|
+
- distance_km > 30: 0
|
|
21
|
+
- distance_km unknown: +3 (gave intent signal without geo)
|
|
22
|
+
- event = Schedule or route click: +5 (physical visit intent)
|
|
23
|
+
- scroll_score >= 3 AND time_level = comprador: +4 (deep engagement)
|
|
24
|
+
- hour_brt between 18-22 (weekday): +3 (active decision window)
|
|
25
|
+
- has_phone = true: +2 (reachable for follow-up)
|
|
26
|
+
No explanation. JSON only.`;
|
|
27
|
+
|
|
11
28
|
// ── predictLtv — Heurística em 5 dimensões (0-100 pts) ───────────────────────
|
|
12
29
|
export async function predictLtv(env, payload, request, customSystemPrompt = null) {
|
|
13
30
|
// ── Tentar modelo treinado (regressão logística real) ─────────────────────
|
|
@@ -83,6 +100,25 @@ export async function predictLtv(env, payload, request, customSystemPrompt = nul
|
|
|
83
100
|
if (payload.phone) score += 4;
|
|
84
101
|
if (payload.firstName) score += 2;
|
|
85
102
|
|
|
103
|
+
// 5b. Tipo de evento imobiliário (0–15) — sinais de intenção de compra física
|
|
104
|
+
const evType = (payload.eventType || '').toLowerCase();
|
|
105
|
+
if (evType === 'customizeproduct') score += 15; // simulação de financiamento → intenção máxima
|
|
106
|
+
else if (evType === 'findlocation') score += 10; // viu mapa/localização → visita física iminente
|
|
107
|
+
else if (evType === 'addtowishlist') score += 8; // favoritou → interesse persistente
|
|
108
|
+
|
|
109
|
+
// 6. Proximidade ao imóvel físico (0–15) — apenas quando distância calculada
|
|
110
|
+
const distKm = parseFloat(payload.distanceKm ?? payload.user_distance_km ?? -1);
|
|
111
|
+
if (distKm >= 0) {
|
|
112
|
+
if (distKm < 5) score += 15;
|
|
113
|
+
else if (distKm < 15) score += 10;
|
|
114
|
+
else if (distKm < 30) score += 6;
|
|
115
|
+
else if (distKm < 60) score += 3;
|
|
116
|
+
// > 60km: sem bônus — lead distante precisa de argumento diferente
|
|
117
|
+
} else if (payload.property_lat || payload.propertyLat) {
|
|
118
|
+
// Coords no payload mas distância não resolvida: pequeno bônus por intenção de rota
|
|
119
|
+
score += 3;
|
|
120
|
+
}
|
|
121
|
+
|
|
86
122
|
score = Math.min(100, score);
|
|
87
123
|
|
|
88
124
|
let ltvClass, ltvMultiplier;
|
|
@@ -102,21 +138,36 @@ export async function predictLtv(env, payload, request, customSystemPrompt = nul
|
|
|
102
138
|
let aiAdjustment = 0;
|
|
103
139
|
if (env.AI && score >= 40) {
|
|
104
140
|
try {
|
|
141
|
+
const isRealEstate = !!(payload.property_lat || payload.propertyLat);
|
|
105
142
|
const systemContent = customSystemPrompt ||
|
|
106
|
-
|
|
143
|
+
(isRealEstate
|
|
144
|
+
? REAL_ESTATE_PROMPT
|
|
145
|
+
: 'You are a conversion rate expert. Reply ONLY with a JSON object {"adjustment": <number between -10 and 10>} based on the lead data provided. No explanation.');
|
|
146
|
+
|
|
147
|
+
const userContext = {
|
|
148
|
+
utm_source: payload.utmSource,
|
|
149
|
+
intention: intentionLevel,
|
|
150
|
+
engagement: engScore,
|
|
151
|
+
hour_utc: hour,
|
|
152
|
+
country,
|
|
153
|
+
has_email: !!payload.email,
|
|
154
|
+
has_phone: !!payload.phone,
|
|
155
|
+
};
|
|
156
|
+
if (isRealEstate) {
|
|
157
|
+
userContext.event_type = 'real_estate_schedule';
|
|
158
|
+
userContext.distance_km = payload.distanceKm ?? payload.user_distance_km ?? 'unknown';
|
|
159
|
+
userContext.distance_bucket = payload.distanceBucket ?? 'unknown';
|
|
160
|
+
userContext.scroll_score = payload.scrollScore || payload.scroll_score || 0;
|
|
161
|
+
userContext.time_level = payload.timeLevel || payload.time_level || 'unknown';
|
|
162
|
+
userContext.intent_score = payload.intent_score || 'high';
|
|
163
|
+
userContext.hour_brt = (hour - 3 + 24) % 24; // UTC-3 aproximado
|
|
164
|
+
}
|
|
165
|
+
|
|
107
166
|
const prompt = [
|
|
108
167
|
{ role: 'system', content: systemContent },
|
|
109
|
-
{ role: 'user', content: JSON.stringify(
|
|
110
|
-
utm_source: payload.utmSource,
|
|
111
|
-
intention: intentionLevel,
|
|
112
|
-
engagement: engScore,
|
|
113
|
-
hour_utc: hour,
|
|
114
|
-
country,
|
|
115
|
-
has_email: !!payload.email,
|
|
116
|
-
has_phone: !!payload.phone,
|
|
117
|
-
})},
|
|
168
|
+
{ role: 'user', content: JSON.stringify(userContext) },
|
|
118
169
|
];
|
|
119
|
-
const aiRes = await env.AI.run('@cf/
|
|
170
|
+
const aiRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', { messages: prompt, max_tokens: 32 });
|
|
120
171
|
const parsed = JSON.parse(aiRes.response.trim());
|
|
121
172
|
if (typeof parsed.adjustment === 'number') {
|
|
122
173
|
aiAdjustment = Math.max(-10, Math.min(10, parsed.adjustment));
|
|
@@ -5,14 +5,84 @@
|
|
|
5
5
|
|
|
6
6
|
import { tryParseJson } from '../utils.js';
|
|
7
7
|
|
|
8
|
+
// ── Helpers K-means vetorial ──────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
function _cosDist(a, b) {
|
|
11
|
+
let dot = 0, na = 0, nb = 0;
|
|
12
|
+
for (let i = 0; i < a.length; i++) { dot += a[i]*b[i]; na += a[i]*a[i]; nb += b[i]*b[i]; }
|
|
13
|
+
return 1 - dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-10);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function _kmeansRun(vectors, k, maxIter = 25) {
|
|
17
|
+
const n = vectors.length, dim = vectors[0].length;
|
|
18
|
+
const centroids = [vectors[Math.floor(Math.random() * n)]];
|
|
19
|
+
while (centroids.length < k) {
|
|
20
|
+
const dists = vectors.map(v => Math.min(...centroids.map(c => _cosDist(v, c))));
|
|
21
|
+
const sum = dists.reduce((a, b) => a + b, 0);
|
|
22
|
+
let r = Math.random() * sum, cumul = 0;
|
|
23
|
+
for (let i = 0; i < n; i++) { cumul += dists[i]; if (cumul >= r) { centroids.push(vectors[i]); break; } }
|
|
24
|
+
if (centroids.length < k) centroids.push(vectors[Math.floor(Math.random() * n)]);
|
|
25
|
+
}
|
|
26
|
+
let assignments = new Array(n).fill(0);
|
|
27
|
+
for (let iter = 0; iter < maxIter; iter++) {
|
|
28
|
+
let changed = false;
|
|
29
|
+
for (let i = 0; i < n; i++) {
|
|
30
|
+
let best = 0, bestD = Infinity;
|
|
31
|
+
for (let c = 0; c < k; c++) { const d = _cosDist(vectors[i], centroids[c]); if (d < bestD) { bestD = d; best = c; } }
|
|
32
|
+
if (assignments[i] !== best) { assignments[i] = best; changed = true; }
|
|
33
|
+
}
|
|
34
|
+
if (!changed) break;
|
|
35
|
+
for (let c = 0; c < k; c++) {
|
|
36
|
+
const members = vectors.filter((_, i) => assignments[i] === c);
|
|
37
|
+
if (!members.length) continue;
|
|
38
|
+
for (let d = 0; d < dim; d++) centroids[c][d] = members.reduce((s, v) => s + v[d], 0) / members.length;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return { assignments, centroids };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function _silhouette(vectors, assignments, k) {
|
|
45
|
+
const n = vectors.length;
|
|
46
|
+
let total = 0;
|
|
47
|
+
for (let i = 0; i < n; i++) {
|
|
48
|
+
const ci = assignments[i];
|
|
49
|
+
const same = vectors.filter((_, j) => j !== i && assignments[j] === ci);
|
|
50
|
+
const a = same.length ? same.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / same.length : 0;
|
|
51
|
+
let b = Infinity;
|
|
52
|
+
for (let c = 0; c < k; c++) {
|
|
53
|
+
if (c === ci) continue;
|
|
54
|
+
const other = vectors.filter((_, j) => assignments[j] === c);
|
|
55
|
+
if (other.length) b = Math.min(b, other.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / other.length);
|
|
56
|
+
}
|
|
57
|
+
total += b === Infinity ? 0 : (b - a) / Math.max(a, b);
|
|
58
|
+
}
|
|
59
|
+
return Math.round((total / n) * 1000) / 1000;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function _buildLeadProfile(l) {
|
|
63
|
+
return [
|
|
64
|
+
`LTV: ${l.predicted_ltv_class || 'desconhecido'}`,
|
|
65
|
+
`engajamento: ${Math.round(l.engagement_score || 0)}`,
|
|
66
|
+
`intenção: ${l.intention_level || 'desconhecida'}`,
|
|
67
|
+
`origem: ${l.utm_source || 'direto'}`,
|
|
68
|
+
`canal: ${l.utm_medium || 'desconhecido'}`,
|
|
69
|
+
`país: ${l.country || 'BR'}`,
|
|
70
|
+
`estado: ${l.state || ''}`,
|
|
71
|
+
`hora: ${l.hour_of_day || 12}h`,
|
|
72
|
+
(l.is_weekend ? 'fim-de-semana' : 'dia-útil'),
|
|
73
|
+
`recência: ${l.days_since_lead || 0} dias`,
|
|
74
|
+
].filter(Boolean).join(', ');
|
|
75
|
+
}
|
|
76
|
+
|
|
8
77
|
// ── POST /api/segmentation/cluster ────────────────────────────────────────────
|
|
78
|
+
// Clustering real: embeddinggemma-300m → K-means vetorial → Granite para nomear
|
|
9
79
|
export async function handleSegmentationCluster(env, request, headers) {
|
|
10
80
|
if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
|
|
11
|
-
if (!env.AI) return new Response(JSON.stringify({ error: 'Workers AI não configurado
|
|
81
|
+
if (!env.AI) return new Response(JSON.stringify({ error: 'Workers AI não configurado' }), { status: 503, headers });
|
|
12
82
|
|
|
13
83
|
const url = new URL(request.url);
|
|
14
84
|
const algorithm = url.searchParams.get('algorithm') || 'kmeans';
|
|
15
|
-
const nClusters = Math.min(10, Math.max(
|
|
85
|
+
const nClusters = Math.min(10, Math.max(2, parseInt(url.searchParams.get('n_clusters') || '5')));
|
|
16
86
|
const clientVertical = url.searchParams.get('vertical') || 'general';
|
|
17
87
|
const forceRecluster = url.searchParams.get('force') === 'true';
|
|
18
88
|
|
|
@@ -21,16 +91,14 @@ export async function handleSegmentationCluster(env, request, headers) {
|
|
|
21
91
|
}
|
|
22
92
|
|
|
23
93
|
try {
|
|
24
|
-
// 1. Cluster recente? Evitar re-clustering desnecessário (< 7 dias)
|
|
25
94
|
if (!forceRecluster) {
|
|
26
95
|
const existing = await env.DB.prepare(`
|
|
27
96
|
SELECT id, created_at, cluster_name FROM ml_segments
|
|
28
97
|
WHERE clustering_algorithm = ? AND is_active = 1 AND client_vertical = ?
|
|
29
98
|
ORDER BY created_at DESC LIMIT 1
|
|
30
99
|
`).bind(algorithm, clientVertical).first();
|
|
31
|
-
|
|
32
100
|
if (existing) {
|
|
33
|
-
const ageDays = (Date.now() - new Date(existing.created_at).getTime()) /
|
|
101
|
+
const ageDays = (Date.now() - new Date(existing.created_at).getTime()) / 864e5;
|
|
34
102
|
if (ageDays < 7) {
|
|
35
103
|
return new Response(JSON.stringify({
|
|
36
104
|
success: true, message: 'Cluster existente ainda válido (< 7 dias). Use ?force=true para re-clustering.',
|
|
@@ -41,7 +109,6 @@ export async function handleSegmentationCluster(env, request, headers) {
|
|
|
41
109
|
}
|
|
42
110
|
}
|
|
43
111
|
|
|
44
|
-
// 2. Extrair leads históricos do D1 (últimos 6 meses, excluindo bots confirmados)
|
|
45
112
|
const leadsRes = await env.DB.prepare(`
|
|
46
113
|
SELECT id, predicted_ltv_class, engagement_score, intention_level,
|
|
47
114
|
country, state, utm_source, utm_medium, bot_score,
|
|
@@ -49,162 +116,125 @@ export async function handleSegmentationCluster(env, request, headers) {
|
|
|
49
116
|
CAST(julianday('now') - julianday(created_at) AS INTEGER) AS days_since_lead,
|
|
50
117
|
CASE WHEN strftime('%w', created_at) IN ('0','6') THEN 1 ELSE 0 END AS is_weekend
|
|
51
118
|
FROM leads
|
|
52
|
-
WHERE created_at >= datetime('now', '-6 months')
|
|
53
|
-
|
|
54
|
-
ORDER BY RANDOM()
|
|
55
|
-
LIMIT 2000
|
|
119
|
+
WHERE created_at >= datetime('now', '-6 months') AND (bot_score IS NULL OR bot_score < 2)
|
|
120
|
+
ORDER BY RANDOM() LIMIT 2000
|
|
56
121
|
`).all();
|
|
57
122
|
|
|
58
123
|
const leads = leadsRes.results || [];
|
|
59
|
-
|
|
60
124
|
if (leads.length < 50) {
|
|
61
|
-
return new Response(JSON.stringify({
|
|
62
|
-
error: 'Dados insuficientes para clustering. Mínimo: 50 leads nos últimos 6 meses.',
|
|
63
|
-
leads_found: leads.length, required: 50,
|
|
64
|
-
}), { status: 400, headers });
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// 3. Feature Engineering — normalização 0–1
|
|
68
|
-
const features = leads.map(l => ({
|
|
69
|
-
id: l.id,
|
|
70
|
-
ltv: l.predicted_ltv_class === 'High' ? 1 : (l.predicted_ltv_class === 'Medium' ? 0.5 : 0),
|
|
71
|
-
engagement: Math.min((l.engagement_score || 0) / 100, 1),
|
|
72
|
-
intention: l.intention_level === 'comprador' || l.intention_level === 'high_intent' ? 1
|
|
73
|
-
: l.intention_level === 'interessado' ? 0.6
|
|
74
|
-
: l.intention_level === 'curioso' ? 0.3 : 0,
|
|
75
|
-
recency: Math.max(0, 1 - (l.days_since_lead || 0) / 180),
|
|
76
|
-
hour: (l.hour_of_day || 12) / 23,
|
|
77
|
-
is_weekend: l.is_weekend || 0,
|
|
78
|
-
is_br: l.country === 'BR' ? 1 : 0,
|
|
79
|
-
is_paid: ['facebook','google','tiktok','instagram','youtube'].includes((l.utm_source || '').toLowerCase()) ? 1 : 0,
|
|
80
|
-
}));
|
|
81
|
-
|
|
82
|
-
// 4. Prompt para Workers AI
|
|
83
|
-
const sampleSize = Math.min(features.length, 100);
|
|
84
|
-
const sample = features.slice(0, sampleSize);
|
|
85
|
-
|
|
86
|
-
const clusteringPrompt =
|
|
87
|
-
`You are a customer segmentation ML expert. Perform ${algorithm} clustering on ${sampleSize} customers into ${nClusters} segments.
|
|
88
|
-
|
|
89
|
-
Customer features (all normalized 0-1):
|
|
90
|
-
- ltv: predicted lifetime value (0=Low, 0.5=Medium, 1=High)
|
|
91
|
-
- engagement: browser engagement score
|
|
92
|
-
- intention: purchase intention (0=none, 0.3=curious, 0.6=interested, 1=buyer)
|
|
93
|
-
- recency: lead recency (1=today, 0=6 months ago)
|
|
94
|
-
- hour: conversion hour of day
|
|
95
|
-
- is_weekend: converted on weekend (0/1)
|
|
96
|
-
- is_br: lead from Brazil (0/1)
|
|
97
|
-
- is_paid: from paid traffic channel (0/1)
|
|
98
|
-
|
|
99
|
-
Data (${sampleSize} customers): ${JSON.stringify(sample.slice(0, 50))}
|
|
100
|
-
|
|
101
|
-
Return ONLY valid JSON, zero explanation:
|
|
102
|
-
{
|
|
103
|
-
"clusters": [
|
|
104
|
-
{
|
|
105
|
-
"cluster_id": 0,
|
|
106
|
-
"name": "[Nome Descritivo em Português]",
|
|
107
|
-
"size": ${Math.round(sampleSize / nClusters)},
|
|
108
|
-
"percentage": ${Math.round(100 / nClusters)},
|
|
109
|
-
"characteristics": {
|
|
110
|
-
"avg_ltv_class": 0.5,
|
|
111
|
-
"avg_behavior_score": 0.5,
|
|
112
|
-
"avg_engagement_score": 0.5,
|
|
113
|
-
"avg_intention_level": 0.5,
|
|
114
|
-
"avg_days_since_lead": 30,
|
|
115
|
-
"dominant_countries": ["BR"],
|
|
116
|
-
"dominant_states": ["SP", "RJ"],
|
|
117
|
-
"dominant_utm_sources": ["facebook"],
|
|
118
|
-
"top_features": ["ltv", "engagement"]
|
|
119
|
-
},
|
|
120
|
-
"centroid": { "ltv": 0.5, "engagement": 0.5, "intention": 0.5 },
|
|
121
|
-
"action_recommendation": "[Recomendação de campanha específica para este segmento]"
|
|
125
|
+
return new Response(JSON.stringify({ error: 'Dados insuficientes para clustering. Mínimo: 50 leads.', leads_found: leads.length, required: 50 }), { status: 400, headers });
|
|
122
126
|
}
|
|
123
|
-
],
|
|
124
|
-
"silhouette_score": 0.65,
|
|
125
|
-
"total_processed": ${sampleSize}
|
|
126
|
-
}`;
|
|
127
127
|
|
|
128
|
-
// 5. Workers AI
|
|
129
128
|
const startTime = Date.now();
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
if (!
|
|
129
|
+
const sample = leads.slice(0, 100);
|
|
130
|
+
const profiles = sample.map(_buildLeadProfile);
|
|
131
|
+
|
|
132
|
+
// Embeddings reais via embeddinggemma-300m
|
|
133
|
+
const embRes = await env.AI.run('@cf/baai/bge-m3', { text: profiles });
|
|
134
|
+
const vectors = embRes.data;
|
|
135
|
+
if (!vectors || vectors.length < nClusters) throw new Error(`embeddinggemma retornou ${vectors?.length ?? 0} vetores`);
|
|
136
|
+
|
|
137
|
+
// K-means vetorial real
|
|
138
|
+
const { assignments } = _kmeansRun(vectors, nClusters);
|
|
139
|
+
const silhouetteScore = _silhouette(vectors, assignments, nClusters);
|
|
140
|
+
|
|
141
|
+
// Agregação por cluster para nomear com Granite
|
|
142
|
+
const clusterStats = Array.from({ length: nClusters }, (_, c) => {
|
|
143
|
+
const members = sample.filter((_, i) => assignments[i] === c);
|
|
144
|
+
if (!members.length) return null;
|
|
145
|
+
const ltvMap = { High: 1, Medium: 0.5, Low: 0 };
|
|
146
|
+
const avgLtv = members.reduce((s, l) => s + (ltvMap[l.predicted_ltv_class] ?? 0), 0) / members.length;
|
|
147
|
+
const avgEng = members.reduce((s, l) => s + (l.engagement_score || 0), 0) / members.length;
|
|
148
|
+
const avgDays = members.reduce((s, l) => s + (l.days_since_lead || 0), 0) / members.length;
|
|
149
|
+
const freq = (arr) => arr.length ? [...arr.reduce((m,s) => m.set(s,(m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : null;
|
|
150
|
+
return {
|
|
151
|
+
c, size: members.length, pct: Math.round(members.length / sample.length * 100),
|
|
152
|
+
avgLtv, avgEng, avgDays,
|
|
153
|
+
topSource: freq(members.map(l => l.utm_source).filter(Boolean)) || 'direto',
|
|
154
|
+
topState: freq(members.map(l => l.state).filter(Boolean)) || 'BR',
|
|
155
|
+
topIntent: freq(members.map(l => l.intention_level).filter(Boolean)) || 'desconhecida',
|
|
156
|
+
};
|
|
157
|
+
}).filter(Boolean);
|
|
158
|
+
|
|
159
|
+
// Granite apenas para nomear segmentos
|
|
160
|
+
const namingPrompt =
|
|
161
|
+
`Você é especialista em segmentação de clientes. Dê um nome descritivo em português e uma recomendação de campanha para cada segmento. Retorne SOMENTE JSON válido:
|
|
162
|
+
{"segments":[{"cluster_id":0,"name":"...","action":"..."},...]}
|
|
163
|
+
|
|
164
|
+
${clusterStats.map(s => `Cluster ${s.c}: LTV=${s.avgLtv.toFixed(2)}, engajamento=${s.avgEng.toFixed(0)}, intenção="${s.topIntent}", origem="${s.topSource}", estado="${s.topState}", recência=${s.avgDays.toFixed(0)} dias, tamanho=${s.size}`).join('\n')}`;
|
|
165
|
+
|
|
166
|
+
const nameRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', { messages: [{ role: 'user', content: namingPrompt }], max_tokens: 800 });
|
|
167
|
+
let clusterNames = {};
|
|
168
|
+
try {
|
|
169
|
+
const m = (nameRes?.response || '').match(/\{[\s\S]*\}/);
|
|
170
|
+
if (m) (JSON.parse(m[0]).segments || []).forEach(s => { clusterNames[s.cluster_id] = { name: s.name, action: s.action }; });
|
|
171
|
+
} catch { /* usa nomes fallback */ }
|
|
137
172
|
|
|
138
|
-
const
|
|
139
|
-
if (!jsonMatch) throw new Error('Resposta do Workers AI não contém JSON válido');
|
|
140
|
-
const mlResult = JSON.parse(jsonMatch[0]);
|
|
173
|
+
const duration = Date.now() - startTime;
|
|
141
174
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
175
|
+
const clusters = clusterStats.map(s => ({
|
|
176
|
+
cluster_id: s.c,
|
|
177
|
+
name: clusterNames[s.c]?.name || `Segmento ${s.c + 1}`,
|
|
178
|
+
size: s.size, percentage: s.pct,
|
|
179
|
+
action_recommendation: clusterNames[s.c]?.action || '',
|
|
180
|
+
characteristics: {
|
|
181
|
+
avg_ltv_class: s.avgLtv, avg_engagement_score: s.avgEng,
|
|
182
|
+
avg_intention_level: s.avgLtv, avg_days_since_lead: s.avgDays,
|
|
183
|
+
dominant_countries: ['BR'], dominant_states: [s.topState],
|
|
184
|
+
dominant_utm_sources: [s.topSource], top_features: ['ltv', 'engagement', 'intention'],
|
|
185
|
+
},
|
|
186
|
+
}));
|
|
145
187
|
|
|
146
|
-
// 6. Inativar clusters anteriores
|
|
147
188
|
await env.DB.prepare(`UPDATE ml_segments SET is_active = 0 WHERE clustering_algorithm = ? AND client_vertical = ? AND is_active = 1`).bind(algorithm, clientVertical).run();
|
|
148
189
|
|
|
149
|
-
// 7. Persistir novos clusters
|
|
150
190
|
const now = new Date().toISOString();
|
|
151
|
-
for (const cluster of
|
|
152
|
-
const ch = cluster.characteristics
|
|
191
|
+
for (const cluster of clusters) {
|
|
192
|
+
const ch = cluster.characteristics;
|
|
153
193
|
await env.DB.prepare(`
|
|
154
194
|
INSERT INTO ml_segments (
|
|
155
|
-
cluster_id, cluster_name, clustering_algorithm, client_vertical,
|
|
156
|
-
|
|
157
|
-
avg_intention_level, avg_days_since_lead,
|
|
195
|
+
cluster_id, cluster_name, clustering_algorithm, client_vertical, size, percentage,
|
|
196
|
+
avg_ltv_class, avg_behavior_score, avg_engagement_score, avg_intention_level, avg_days_since_lead,
|
|
158
197
|
dominant_countries, dominant_states, dominant_utm_sources, dominant_features,
|
|
159
198
|
silhouette_score, action_recommendations, bid_recommendations, campaign_recommendations,
|
|
160
199
|
is_active, created_at, updated_at
|
|
161
200
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?)
|
|
162
201
|
`).bind(
|
|
163
|
-
cluster.cluster_id
|
|
164
|
-
|
|
165
|
-
ch.
|
|
166
|
-
ch.
|
|
167
|
-
|
|
168
|
-
JSON.stringify(
|
|
169
|
-
mlResult.silhouette_score || 0,
|
|
170
|
-
JSON.stringify([cluster.action_recommendation || '']), JSON.stringify([]), JSON.stringify([]),
|
|
202
|
+
cluster.cluster_id, cluster.name, algorithm, clientVertical, cluster.size, cluster.percentage,
|
|
203
|
+
ch.avg_ltv_class, ch.avg_engagement_score, ch.avg_engagement_score, ch.avg_intention_level, ch.avg_days_since_lead,
|
|
204
|
+
JSON.stringify(ch.dominant_countries), JSON.stringify(ch.dominant_states),
|
|
205
|
+
JSON.stringify(ch.dominant_utm_sources), JSON.stringify(ch.top_features),
|
|
206
|
+
silhouetteScore,
|
|
207
|
+
JSON.stringify([cluster.action_recommendation]), JSON.stringify([]), JSON.stringify([]),
|
|
171
208
|
now, now,
|
|
172
209
|
).run();
|
|
173
210
|
}
|
|
174
211
|
|
|
175
|
-
// 8. Log no histórico
|
|
176
212
|
try {
|
|
177
213
|
await env.DB.prepare(`
|
|
178
|
-
INSERT INTO ml_clustering_history (
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
`).bind(
|
|
184
|
-
new Date(startTime).toISOString(), algorithm, leads.length, mlResult.clusters.length,
|
|
185
|
-
duration, Math.ceil(duration * 0.01),
|
|
186
|
-
JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical }),
|
|
187
|
-
JSON.stringify({ clusters: mlResult.clusters.length, silhouette: mlResult.silhouette_score }),
|
|
214
|
+
INSERT INTO ml_clustering_history (clustering_id, started_at, completed_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, parameters, results_summary)
|
|
215
|
+
VALUES (0, ?, datetime('now'), ?, ?, ?, ?, ?, 'completed', ?, ?)
|
|
216
|
+
`).bind(new Date(startTime).toISOString(), algorithm, leads.length, clusters.length, duration, Math.ceil(duration * 0.01),
|
|
217
|
+
JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical, engine: 'embeddinggemma-300m+kmeans' }),
|
|
218
|
+
JSON.stringify({ clusters: clusters.length, silhouette: silhouetteScore }),
|
|
188
219
|
).run();
|
|
189
220
|
} catch (e) { console.error('[Segmentation] history log error:', e.message); }
|
|
190
221
|
|
|
191
222
|
return new Response(JSON.stringify({
|
|
192
|
-
success: true, algorithm,
|
|
193
|
-
|
|
194
|
-
|
|
223
|
+
success: true, algorithm, engine: 'embeddinggemma-300m + kmeans vetorial',
|
|
224
|
+
n_clusters: clusters.length, client_vertical: clientVertical,
|
|
225
|
+
leads_analyzed: leads.length, sample_embedded: sample.length,
|
|
226
|
+
duration_ms: duration, silhouette_score: silhouetteScore,
|
|
227
|
+
clusters, generated_at: now,
|
|
195
228
|
}), { status: 200, headers });
|
|
196
229
|
|
|
197
230
|
} catch (err) {
|
|
198
231
|
console.error('[Segmentation] cluster error:', err.message);
|
|
199
232
|
try {
|
|
200
|
-
if (env.DB)
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
}
|
|
206
|
-
} catch { /* não bloquear a resposta de erro */ }
|
|
207
|
-
|
|
233
|
+
if (env.DB) await env.DB.prepare(`
|
|
234
|
+
INSERT INTO ml_clustering_history (clustering_id, started_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, error_message, parameters, results_summary)
|
|
235
|
+
VALUES (0, datetime('now'), ?, 0, 0, 0, 0, 'failed', ?, ?, '{}')
|
|
236
|
+
`).bind(algorithm, err.message, JSON.stringify({ algorithm, n_clusters: nClusters })).run();
|
|
237
|
+
} catch { /* não bloquear */ }
|
|
208
238
|
return new Response(JSON.stringify({ error: 'Erro ao executar clustering', message: err.message }), { status: 500, headers });
|
|
209
239
|
}
|
|
210
240
|
}
|
|
@@ -86,4 +86,82 @@ export const VALID_EVENT_NAMES = new Set([
|
|
|
86
86
|
'AddToCart','CompleteRegistration','Contact','Schedule',
|
|
87
87
|
'StartTrial','Subscribe','SubmitApplication','Search',
|
|
88
88
|
'video_start','video_25','video_50','video_75','video_complete',
|
|
89
|
+
// Imóveis — intenção de visita física, financiamento e favoritar
|
|
90
|
+
'FindLocation','CustomizeProduct','AddToWishlist',
|
|
89
91
|
]);
|
|
92
|
+
|
|
93
|
+
// ── Taxonomia de funil (funnel_stage → profundidade semântica) ────────────────
|
|
94
|
+
// Fonte de verdade para interpretar funnel_stage em qualquer ponto do sistema.
|
|
95
|
+
export const FUNNEL_TAXONOMY = {
|
|
96
|
+
top: ['scroll_50', 'time_30s', 'page_view', 'gallery_view', 'AddToWishlist'],
|
|
97
|
+
mid: ['map_view', 'gallery_click', 'price_hover', 'time_3min', 'FindLocation'],
|
|
98
|
+
bottom: ['route_click', 'whatsapp_click', 'cta_hover', 'CustomizeProduct'],
|
|
99
|
+
conversion: ['schedule_confirmed', 'lead_form', 'purchase', 'visit_booked'],
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// Índice invertido: funnel_stage → depth (construído uma vez, zero custo em runtime)
|
|
103
|
+
const _STAGE_TO_DEPTH = Object.entries(FUNNEL_TAXONOMY).reduce((acc, [depth, stages]) => {
|
|
104
|
+
stages.forEach(s => { acc[s] = depth; });
|
|
105
|
+
return acc;
|
|
106
|
+
}, {});
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Resolve funnel_stage em funnelDepth semântico.
|
|
110
|
+
* bottom_intent = intenção forte (route_click, whatsapp_click)
|
|
111
|
+
* bottom_conversion = ação confirmada (schedule_confirmed, lead_form)
|
|
112
|
+
*/
|
|
113
|
+
export function resolveFunnelStage(funnel_stage) {
|
|
114
|
+
const depth = _STAGE_TO_DEPTH[funnel_stage] || 'unknown';
|
|
115
|
+
const funnelDepth = depth === 'conversion' ? 'bottom_conversion'
|
|
116
|
+
: depth === 'bottom' ? 'bottom_intent'
|
|
117
|
+
: depth;
|
|
118
|
+
return { depth, funnelDepth };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ── Normalização de intent_score → 0.0–1.0 ───────────────────────────────────
|
|
122
|
+
// Aceita: string ('high'/'medium'/'low'), numérico 0-1 ou numérico 0-100
|
|
123
|
+
const _INTENT_STRING_MAP = { high: 0.92, medium: 0.65, low: 0.30 };
|
|
124
|
+
|
|
125
|
+
export function resolveIntentScore(value) {
|
|
126
|
+
if (value === null || value === undefined) return null;
|
|
127
|
+
if (typeof value === 'string') return _INTENT_STRING_MAP[value.toLowerCase()] ?? null;
|
|
128
|
+
const num = parseFloat(value);
|
|
129
|
+
if (isNaN(num)) return null;
|
|
130
|
+
const normalized = num > 1 ? num / 100 : num; // escala 0-100 → 0-1
|
|
131
|
+
return Math.min(1, Math.max(0, Math.round(normalized * 100) / 100));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Distância (distanceBucket) → peso numérico para meta_signal.
|
|
136
|
+
* very_close=1.0 ... far=0.1 ... sem dado=0.3 (neutro)
|
|
137
|
+
*/
|
|
138
|
+
export function distanceBucketWeight(bucket) {
|
|
139
|
+
const map = { very_close: 1.0, close: 0.75, nearby: 0.5, moderate: 0.25, far: 0.1 };
|
|
140
|
+
return map[bucket] ?? 0.3;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Pesos dinâmicos do meta_signal por profundidade de funil.
|
|
145
|
+
* Fundo: comportamento pesa mais (intent + dist).
|
|
146
|
+
* Topo: perfil pesa mais (ltv).
|
|
147
|
+
* Default (mid/unknown): balanceado.
|
|
148
|
+
*/
|
|
149
|
+
export function computeMetaSignalWeights(funnelLevel) {
|
|
150
|
+
if (funnelLevel === 'bottom' || funnelLevel === 'conversion') {
|
|
151
|
+
return { intent: 0.5, ltv: 0.2, dist: 0.3 };
|
|
152
|
+
}
|
|
153
|
+
if (funnelLevel === 'top') {
|
|
154
|
+
return { intent: 0.2, ltv: 0.6, dist: 0.2 };
|
|
155
|
+
}
|
|
156
|
+
return { intent: 0.4, ltv: 0.4, dist: 0.2 };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Quantiza meta_signal contínuo em bucket legível.
|
|
161
|
+
* Usado em criação de públicos e leitura de BI.
|
|
162
|
+
*/
|
|
163
|
+
export function metaSignalBucket(score) {
|
|
164
|
+
if (score >= 0.8) return 'hot';
|
|
165
|
+
if (score >= 0.6) return 'warm';
|
|
166
|
+
return 'cold';
|
|
167
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
-- CDP Edge — Schema LTV Feedback Loop
|
|
2
|
+
-- Fecha o ciclo preditivo: Purchase real → corrige predicted_ltv_value
|
|
3
|
+
-- Execução: wrangler d1 execute cdp-edge-db --file=schema-ltv-feedback.sql --remote
|
|
4
|
+
--
|
|
5
|
+
-- Idempotência: ALTER TABLE não suporta IF NOT EXISTS no SQLite.
|
|
6
|
+
-- Se a coluna já existir, o comando gera erro mas não afeta dados existentes.
|
|
7
|
+
-- Seguro executar mais de uma vez.
|
|
8
|
+
|
|
9
|
+
ALTER TABLE user_profiles ADD COLUMN real_ltv_value REAL;
|
|
10
|
+
ALTER TABLE user_profiles ADD COLUMN ltv_accuracy REAL; -- 1 - |pred-real|/real (0–1, maior = melhor)
|
|
11
|
+
ALTER TABLE user_profiles ADD COLUMN ltv_feedback_at TEXT; -- timestamp do último feedback
|
|
@@ -4,6 +4,7 @@ name = "server-edge-tracker"
|
|
|
4
4
|
main = "index.js"
|
|
5
5
|
compatibility_date = "2025-01-01"
|
|
6
6
|
compatibility_flags = ["nodejs_compat"]
|
|
7
|
+
workers_dev = true
|
|
7
8
|
|
|
8
9
|
# ── Worker Routes — same-domain tracking (imune a bloqueios) ─────────────────
|
|
9
10
|
# Substituir SEU_DOMINIO pelo domínio do cliente antes do deploy
|
|
@@ -16,18 +17,18 @@ compatibility_flags = ["nodejs_compat"]
|
|
|
16
17
|
# zone_name = "SEU_DOMINIO"
|
|
17
18
|
|
|
18
19
|
[[routes]]
|
|
19
|
-
pattern = "
|
|
20
|
-
zone_name = "
|
|
20
|
+
pattern = "SEU_DOMINIO/track*"
|
|
21
|
+
zone_name = "SEU_DOMINIO"
|
|
21
22
|
|
|
22
23
|
[[routes]]
|
|
23
|
-
pattern = "*.
|
|
24
|
-
zone_name = "
|
|
24
|
+
pattern = "*.SEU_DOMINIO/track*"
|
|
25
|
+
zone_name = "SEU_DOMINIO"
|
|
25
26
|
|
|
26
27
|
# ── Variáveis públicas (não são segredos) ─────────────────────────────────────
|
|
27
28
|
[vars]
|
|
28
|
-
META_PIXEL_ID = "
|
|
29
|
-
GA4_MEASUREMENT_ID = "
|
|
30
|
-
TIKTOK_PIXEL_ID = "
|
|
29
|
+
META_PIXEL_ID = ""
|
|
30
|
+
GA4_MEASUREMENT_ID = ""
|
|
31
|
+
TIKTOK_PIXEL_ID = ""
|
|
31
32
|
SITE_DOMAIN = "SEU_DOMINIO"
|
|
32
33
|
|
|
33
34
|
# ── Banco D1 ──────────────────────────────────────────────────────────────────
|
|
@@ -36,7 +37,7 @@ SITE_DOMAIN = "SEU_DOMINIO"
|
|
|
36
37
|
[[d1_databases]]
|
|
37
38
|
binding = "DB"
|
|
38
39
|
database_name = "cdp-edge-db"
|
|
39
|
-
database_id = "
|
|
40
|
+
database_id = "SEU_DATABASE_ID"
|
|
40
41
|
|
|
41
42
|
# ── Queues — Retry + Dead Letter Queue ───────────────────────────────────────
|
|
42
43
|
# Produtor: worker envia eventos com falha para cdp-edge-retry
|
|
@@ -95,6 +96,22 @@ namespace_id = "1001"
|
|
|
95
96
|
limit = 60
|
|
96
97
|
period = 60
|
|
97
98
|
|
|
99
|
+
# ── Observabilidade — Logs + Traces persistidos no painel Cloudflare ─────────
|
|
100
|
+
[observability]
|
|
101
|
+
enabled = false
|
|
102
|
+
head_sampling_rate = 1
|
|
103
|
+
|
|
104
|
+
[observability.logs]
|
|
105
|
+
enabled = true
|
|
106
|
+
head_sampling_rate = 1
|
|
107
|
+
persist = true
|
|
108
|
+
invocation_logs = true
|
|
109
|
+
|
|
110
|
+
[observability.traces]
|
|
111
|
+
enabled = false
|
|
112
|
+
persist = true
|
|
113
|
+
head_sampling_rate = 1
|
|
114
|
+
|
|
98
115
|
# ── Secrets (NÃO ficam aqui — configurar via CLI) ─────────────────────────────
|
|
99
116
|
# wrangler secret put META_ACCESS_TOKEN ← token Meta CAPI (obrigatório)
|
|
100
117
|
# wrangler secret put GA4_API_SECRET ← secret GA4 Measurement Protocol (obrigatório)
|
|
@@ -107,6 +124,7 @@ period = 60
|
|
|
107
124
|
# wrangler secret put RESEND_API_KEY ← API Key do Resend (resend.com)
|
|
108
125
|
# wrangler secret put RESEND_FROM_EMAIL ← Remetente verificado ex: "CDP Edge <noreply@seudominio.com.br>"
|
|
109
126
|
# wrangler secret put WA_WEBHOOK_VERIFY_TOKEN ← Token de verificação do webhook WhatsApp (você define — qualquer string segura)
|
|
127
|
+
# wrangler secret put WEBHOOK_SECRET_TICTO ← HMAC-SHA256 Ticto
|
|
110
128
|
# wrangler secret put PINTEREST_ACCESS_TOKEN ← Bearer token Pinterest Conversions API
|
|
111
129
|
# wrangler secret put PINTEREST_AD_ACCOUNT_ID ← ID da conta de anúncios Pinterest (ex: 549755813XXX)
|
|
112
130
|
# wrangler secret put REDDIT_ACCESS_TOKEN ← Bearer token Reddit Conversions API
|