cdp-edge 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,84 @@
5
5
 
6
6
  import { tryParseJson } from '../utils.js';
7
7
 
8
+ // ── Helpers K-means vetorial ──────────────────────────────────────────────────
9
+
10
+ function _cosDist(a, b) {
11
+ let dot = 0, na = 0, nb = 0;
12
+ for (let i = 0; i < a.length; i++) { dot += a[i]*b[i]; na += a[i]*a[i]; nb += b[i]*b[i]; }
13
+ return 1 - dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-10);
14
+ }
15
+
16
+ function _kmeansRun(vectors, k, maxIter = 25) {
17
+ const n = vectors.length, dim = vectors[0].length;
18
+ const centroids = [vectors[Math.floor(Math.random() * n)]];
19
+ while (centroids.length < k) {
20
+ const dists = vectors.map(v => Math.min(...centroids.map(c => _cosDist(v, c))));
21
+ const sum = dists.reduce((a, b) => a + b, 0);
22
+ let r = Math.random() * sum, cumul = 0;
23
+ for (let i = 0; i < n; i++) { cumul += dists[i]; if (cumul >= r) { centroids.push(vectors[i]); break; } }
24
+ if (centroids.length < k) centroids.push(vectors[Math.floor(Math.random() * n)]);
25
+ }
26
+ let assignments = new Array(n).fill(0);
27
+ for (let iter = 0; iter < maxIter; iter++) {
28
+ let changed = false;
29
+ for (let i = 0; i < n; i++) {
30
+ let best = 0, bestD = Infinity;
31
+ for (let c = 0; c < k; c++) { const d = _cosDist(vectors[i], centroids[c]); if (d < bestD) { bestD = d; best = c; } }
32
+ if (assignments[i] !== best) { assignments[i] = best; changed = true; }
33
+ }
34
+ if (!changed) break;
35
+ for (let c = 0; c < k; c++) {
36
+ const members = vectors.filter((_, i) => assignments[i] === c);
37
+ if (!members.length) continue;
38
+ for (let d = 0; d < dim; d++) centroids[c][d] = members.reduce((s, v) => s + v[d], 0) / members.length;
39
+ }
40
+ }
41
+ return { assignments, centroids };
42
+ }
43
+
44
+ function _silhouette(vectors, assignments, k) {
45
+ const n = vectors.length;
46
+ let total = 0;
47
+ for (let i = 0; i < n; i++) {
48
+ const ci = assignments[i];
49
+ const same = vectors.filter((_, j) => j !== i && assignments[j] === ci);
50
+ const a = same.length ? same.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / same.length : 0;
51
+ let b = Infinity;
52
+ for (let c = 0; c < k; c++) {
53
+ if (c === ci) continue;
54
+ const other = vectors.filter((_, j) => assignments[j] === c);
55
+ if (other.length) b = Math.min(b, other.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / other.length);
56
+ }
57
+ total += b === Infinity ? 0 : (b - a) / Math.max(a, b);
58
+ }
59
+ return Math.round((total / n) * 1000) / 1000;
60
+ }
61
+
62
+ function _buildLeadProfile(l) {
63
+ return [
64
+ `LTV: ${l.predicted_ltv_class || 'desconhecido'}`,
65
+ `engajamento: ${Math.round(l.engagement_score || 0)}`,
66
+ `intenção: ${l.intention_level || 'desconhecida'}`,
67
+ `origem: ${l.utm_source || 'direto'}`,
68
+ `canal: ${l.utm_medium || 'desconhecido'}`,
69
+ `país: ${l.country || 'BR'}`,
70
+ `estado: ${l.state || ''}`,
71
+ `hora: ${l.hour_of_day || 12}h`,
72
+ (l.is_weekend ? 'fim-de-semana' : 'dia-útil'),
73
+ `recência: ${l.days_since_lead || 0} dias`,
74
+ ].filter(Boolean).join(', ');
75
+ }
76
+
8
77
  // ── POST /api/segmentation/cluster ────────────────────────────────────────────
78
+ // Clustering real: embeddinggemma-300m → K-means vetorial → Granite para nomear
9
79
  export async function handleSegmentationCluster(env, request, headers) {
10
80
  if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
11
- if (!env.AI) return new Response(JSON.stringify({ error: 'Workers AI não configurado (verifique binding AI no wrangler.toml)' }), { status: 503, headers });
81
+ if (!env.AI) return new Response(JSON.stringify({ error: 'Workers AI não configurado' }), { status: 503, headers });
12
82
 
13
83
  const url = new URL(request.url);
14
84
  const algorithm = url.searchParams.get('algorithm') || 'kmeans';
15
- const nClusters = Math.min(10, Math.max(3, parseInt(url.searchParams.get('n_clusters') || '5')));
85
+ const nClusters = Math.min(10, Math.max(2, parseInt(url.searchParams.get('n_clusters') || '5')));
16
86
  const clientVertical = url.searchParams.get('vertical') || 'general';
17
87
  const forceRecluster = url.searchParams.get('force') === 'true';
18
88
 
@@ -21,16 +91,14 @@ export async function handleSegmentationCluster(env, request, headers) {
21
91
  }
22
92
 
23
93
  try {
24
- // 1. Cluster recente? Evitar re-clustering desnecessário (< 7 dias)
25
94
  if (!forceRecluster) {
26
95
  const existing = await env.DB.prepare(`
27
96
  SELECT id, created_at, cluster_name FROM ml_segments
28
97
  WHERE clustering_algorithm = ? AND is_active = 1 AND client_vertical = ?
29
98
  ORDER BY created_at DESC LIMIT 1
30
99
  `).bind(algorithm, clientVertical).first();
31
-
32
100
  if (existing) {
33
- const ageDays = (Date.now() - new Date(existing.created_at).getTime()) / (1000 * 60 * 60 * 24);
101
+ const ageDays = (Date.now() - new Date(existing.created_at).getTime()) / 864e5;
34
102
  if (ageDays < 7) {
35
103
  return new Response(JSON.stringify({
36
104
  success: true, message: 'Cluster existente ainda válido (< 7 dias). Use ?force=true para re-clustering.',
@@ -41,7 +109,6 @@ export async function handleSegmentationCluster(env, request, headers) {
41
109
  }
42
110
  }
43
111
 
44
- // 2. Extrair leads históricos do D1 (últimos 6 meses, excluindo bots confirmados)
45
112
  const leadsRes = await env.DB.prepare(`
46
113
  SELECT id, predicted_ltv_class, engagement_score, intention_level,
47
114
  country, state, utm_source, utm_medium, bot_score,
@@ -49,162 +116,125 @@ export async function handleSegmentationCluster(env, request, headers) {
49
116
  CAST(julianday('now') - julianday(created_at) AS INTEGER) AS days_since_lead,
50
117
  CASE WHEN strftime('%w', created_at) IN ('0','6') THEN 1 ELSE 0 END AS is_weekend
51
118
  FROM leads
52
- WHERE created_at >= datetime('now', '-6 months')
53
- AND (bot_score IS NULL OR bot_score < 2)
54
- ORDER BY RANDOM()
55
- LIMIT 2000
119
+ WHERE created_at >= datetime('now', '-6 months') AND (bot_score IS NULL OR bot_score < 2)
120
+ ORDER BY RANDOM() LIMIT 2000
56
121
  `).all();
57
122
 
58
123
  const leads = leadsRes.results || [];
59
-
60
124
  if (leads.length < 50) {
61
- return new Response(JSON.stringify({
62
- error: 'Dados insuficientes para clustering. Mínimo: 50 leads nos últimos 6 meses.',
63
- leads_found: leads.length, required: 50,
64
- }), { status: 400, headers });
65
- }
66
-
67
- // 3. Feature Engineering — normalização 0–1
68
- const features = leads.map(l => ({
69
- id: l.id,
70
- ltv: l.predicted_ltv_class === 'High' ? 1 : (l.predicted_ltv_class === 'Medium' ? 0.5 : 0),
71
- engagement: Math.min((l.engagement_score || 0) / 100, 1),
72
- intention: l.intention_level === 'comprador' || l.intention_level === 'high_intent' ? 1
73
- : l.intention_level === 'interessado' ? 0.6
74
- : l.intention_level === 'curioso' ? 0.3 : 0,
75
- recency: Math.max(0, 1 - (l.days_since_lead || 0) / 180),
76
- hour: (l.hour_of_day || 12) / 23,
77
- is_weekend: l.is_weekend || 0,
78
- is_br: l.country === 'BR' ? 1 : 0,
79
- is_paid: ['facebook','google','tiktok','instagram','youtube'].includes((l.utm_source || '').toLowerCase()) ? 1 : 0,
80
- }));
81
-
82
- // 4. Prompt para Workers AI
83
- const sampleSize = Math.min(features.length, 100);
84
- const sample = features.slice(0, sampleSize);
85
-
86
- const clusteringPrompt =
87
- `You are a customer segmentation ML expert. Perform ${algorithm} clustering on ${sampleSize} customers into ${nClusters} segments.
88
-
89
- Customer features (all normalized 0-1):
90
- - ltv: predicted lifetime value (0=Low, 0.5=Medium, 1=High)
91
- - engagement: browser engagement score
92
- - intention: purchase intention (0=none, 0.3=curious, 0.6=interested, 1=buyer)
93
- - recency: lead recency (1=today, 0=6 months ago)
94
- - hour: conversion hour of day
95
- - is_weekend: converted on weekend (0/1)
96
- - is_br: lead from Brazil (0/1)
97
- - is_paid: from paid traffic channel (0/1)
98
-
99
- Data (${sampleSize} customers): ${JSON.stringify(sample.slice(0, 50))}
100
-
101
- Return ONLY valid JSON, zero explanation:
102
- {
103
- "clusters": [
104
- {
105
- "cluster_id": 0,
106
- "name": "[Nome Descritivo em Português]",
107
- "size": ${Math.round(sampleSize / nClusters)},
108
- "percentage": ${Math.round(100 / nClusters)},
109
- "characteristics": {
110
- "avg_ltv_class": 0.5,
111
- "avg_behavior_score": 0.5,
112
- "avg_engagement_score": 0.5,
113
- "avg_intention_level": 0.5,
114
- "avg_days_since_lead": 30,
115
- "dominant_countries": ["BR"],
116
- "dominant_states": ["SP", "RJ"],
117
- "dominant_utm_sources": ["facebook"],
118
- "top_features": ["ltv", "engagement"]
119
- },
120
- "centroid": { "ltv": 0.5, "engagement": 0.5, "intention": 0.5 },
121
- "action_recommendation": "[Recomendação de campanha específica para este segmento]"
125
+ return new Response(JSON.stringify({ error: 'Dados insuficientes para clustering. Mínimo: 50 leads.', leads_found: leads.length, required: 50 }), { status: 400, headers });
122
126
  }
123
- ],
124
- "silhouette_score": 0.65,
125
- "total_processed": ${sampleSize}
126
- }`;
127
127
 
128
- // 5. Workers AI
129
128
  const startTime = Date.now();
130
- const aiRes = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
131
- messages: [{ role: 'user', content: clusteringPrompt }],
132
- max_tokens: 2000,
133
- });
134
- const duration = Date.now() - startTime;
135
-
136
- if (!aiRes?.response) throw new Error('Workers AI não retornou resposta');
129
+ const sample = leads.slice(0, 100);
130
+ const profiles = sample.map(_buildLeadProfile);
131
+
132
+ // Embeddings reais via embeddinggemma-300m
133
+ const embRes = await env.AI.run('@cf/baai/bge-m3', { text: profiles });
134
+ const vectors = embRes.data;
135
+ if (!vectors || vectors.length < nClusters) throw new Error(`embeddinggemma retornou ${vectors?.length ?? 0} vetores`);
136
+
137
+ // K-means vetorial real
138
+ const { assignments } = _kmeansRun(vectors, nClusters);
139
+ const silhouetteScore = _silhouette(vectors, assignments, nClusters);
140
+
141
+ // Agregação por cluster para nomear com Granite
142
+ const clusterStats = Array.from({ length: nClusters }, (_, c) => {
143
+ const members = sample.filter((_, i) => assignments[i] === c);
144
+ if (!members.length) return null;
145
+ const ltvMap = { High: 1, Medium: 0.5, Low: 0 };
146
+ const avgLtv = members.reduce((s, l) => s + (ltvMap[l.predicted_ltv_class] ?? 0), 0) / members.length;
147
+ const avgEng = members.reduce((s, l) => s + (l.engagement_score || 0), 0) / members.length;
148
+ const avgDays = members.reduce((s, l) => s + (l.days_since_lead || 0), 0) / members.length;
149
+ const freq = (arr) => arr.length ? [...arr.reduce((m,s) => m.set(s,(m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : null;
150
+ return {
151
+ c, size: members.length, pct: Math.round(members.length / sample.length * 100),
152
+ avgLtv, avgEng, avgDays,
153
+ topSource: freq(members.map(l => l.utm_source).filter(Boolean)) || 'direto',
154
+ topState: freq(members.map(l => l.state).filter(Boolean)) || 'BR',
155
+ topIntent: freq(members.map(l => l.intention_level).filter(Boolean)) || 'desconhecida',
156
+ };
157
+ }).filter(Boolean);
158
+
159
+ // Granite apenas para nomear segmentos
160
+ const namingPrompt =
161
+ `Você é especialista em segmentação de clientes. Dê um nome descritivo em português e uma recomendação de campanha para cada segmento. Retorne SOMENTE JSON válido:
162
+ {"segments":[{"cluster_id":0,"name":"...","action":"..."},...]}
163
+
164
+ ${clusterStats.map(s => `Cluster ${s.c}: LTV=${s.avgLtv.toFixed(2)}, engajamento=${s.avgEng.toFixed(0)}, intenção="${s.topIntent}", origem="${s.topSource}", estado="${s.topState}", recência=${s.avgDays.toFixed(0)} dias, tamanho=${s.size}`).join('\n')}`;
165
+
166
+ const nameRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', { messages: [{ role: 'user', content: namingPrompt }], max_tokens: 800 });
167
+ let clusterNames = {};
168
+ try {
169
+ const m = (nameRes?.response || '').match(/\{[\s\S]*\}/);
170
+ if (m) (JSON.parse(m[0]).segments || []).forEach(s => { clusterNames[s.cluster_id] = { name: s.name, action: s.action }; });
171
+ } catch { /* usa nomes fallback */ }
137
172
 
138
- const jsonMatch = aiRes.response.trim().match(/\{[\s\S]*\}/);
139
- if (!jsonMatch) throw new Error('Resposta do Workers AI não contém JSON válido');
140
- const mlResult = JSON.parse(jsonMatch[0]);
173
+ const duration = Date.now() - startTime;
141
174
 
142
- if (!Array.isArray(mlResult.clusters) || mlResult.clusters.length === 0) {
143
- throw new Error('Workers AI não retornou clusters válidos');
144
- }
175
+ const clusters = clusterStats.map(s => ({
176
+ cluster_id: s.c,
177
+ name: clusterNames[s.c]?.name || `Segmento ${s.c + 1}`,
178
+ size: s.size, percentage: s.pct,
179
+ action_recommendation: clusterNames[s.c]?.action || '',
180
+ characteristics: {
181
+ avg_ltv_class: s.avgLtv, avg_engagement_score: s.avgEng,
182
+ avg_intention_level: s.avgLtv, avg_days_since_lead: s.avgDays,
183
+ dominant_countries: ['BR'], dominant_states: [s.topState],
184
+ dominant_utm_sources: [s.topSource], top_features: ['ltv', 'engagement', 'intention'],
185
+ },
186
+ }));
145
187
 
146
- // 6. Inativar clusters anteriores
147
188
  await env.DB.prepare(`UPDATE ml_segments SET is_active = 0 WHERE clustering_algorithm = ? AND client_vertical = ? AND is_active = 1`).bind(algorithm, clientVertical).run();
148
189
 
149
- // 7. Persistir novos clusters
150
190
  const now = new Date().toISOString();
151
- for (const cluster of mlResult.clusters) {
152
- const ch = cluster.characteristics || {};
191
+ for (const cluster of clusters) {
192
+ const ch = cluster.characteristics;
153
193
  await env.DB.prepare(`
154
194
  INSERT INTO ml_segments (
155
- cluster_id, cluster_name, clustering_algorithm, client_vertical,
156
- size, percentage, avg_ltv_class, avg_behavior_score, avg_engagement_score,
157
- avg_intention_level, avg_days_since_lead,
195
+ cluster_id, cluster_name, clustering_algorithm, client_vertical, size, percentage,
196
+ avg_ltv_class, avg_behavior_score, avg_engagement_score, avg_intention_level, avg_days_since_lead,
158
197
  dominant_countries, dominant_states, dominant_utm_sources, dominant_features,
159
198
  silhouette_score, action_recommendations, bid_recommendations, campaign_recommendations,
160
199
  is_active, created_at, updated_at
161
200
  ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?)
162
201
  `).bind(
163
- cluster.cluster_id || 0, cluster.name || `Segmento ${cluster.cluster_id}`, algorithm, clientVertical,
164
- cluster.size || 0, cluster.percentage || 0,
165
- ch.avg_ltv_class || 0, ch.avg_behavior_score || 0, ch.avg_engagement_score || 0,
166
- ch.avg_intention_level || 0, ch.avg_days_since_lead || 0,
167
- JSON.stringify(ch.dominant_countries || ['BR']), JSON.stringify(ch.dominant_states || []),
168
- JSON.stringify(ch.dominant_utm_sources || []), JSON.stringify(ch.top_features || []),
169
- mlResult.silhouette_score || 0,
170
- JSON.stringify([cluster.action_recommendation || '']), JSON.stringify([]), JSON.stringify([]),
202
+ cluster.cluster_id, cluster.name, algorithm, clientVertical, cluster.size, cluster.percentage,
203
+ ch.avg_ltv_class, ch.avg_engagement_score, ch.avg_engagement_score, ch.avg_intention_level, ch.avg_days_since_lead,
204
+ JSON.stringify(ch.dominant_countries), JSON.stringify(ch.dominant_states),
205
+ JSON.stringify(ch.dominant_utm_sources), JSON.stringify(ch.top_features),
206
+ silhouetteScore,
207
+ JSON.stringify([cluster.action_recommendation]), JSON.stringify([]), JSON.stringify([]),
171
208
  now, now,
172
209
  ).run();
173
210
  }
174
211
 
175
- // 8. Log no histórico
176
212
  try {
177
213
  await env.DB.prepare(`
178
- INSERT INTO ml_clustering_history (
179
- clustering_id, started_at, completed_at, algorithm,
180
- n_leads_processed, n_clusters_created, total_duration_ms,
181
- workers_ai_neurons_used, status, parameters, results_summary
182
- ) VALUES (0, ?, datetime('now'), ?, ?, ?, ?, ?, 'completed', ?, ?)
183
- `).bind(
184
- new Date(startTime).toISOString(), algorithm, leads.length, mlResult.clusters.length,
185
- duration, Math.ceil(duration * 0.01),
186
- JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical }),
187
- JSON.stringify({ clusters: mlResult.clusters.length, silhouette: mlResult.silhouette_score }),
214
+ INSERT INTO ml_clustering_history (clustering_id, started_at, completed_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, parameters, results_summary)
215
+ VALUES (0, ?, datetime('now'), ?, ?, ?, ?, ?, 'completed', ?, ?)
216
+ `).bind(new Date(startTime).toISOString(), algorithm, leads.length, clusters.length, duration, Math.ceil(duration * 0.01),
217
+ JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical, engine: 'embeddinggemma-300m+kmeans' }),
218
+ JSON.stringify({ clusters: clusters.length, silhouette: silhouetteScore }),
188
219
  ).run();
189
220
  } catch (e) { console.error('[Segmentation] history log error:', e.message); }
190
221
 
191
222
  return new Response(JSON.stringify({
192
- success: true, algorithm, n_clusters: mlResult.clusters.length, client_vertical: clientVertical,
193
- leads_analyzed: leads.length, duration_ms: duration, silhouette_score: mlResult.silhouette_score || null,
194
- clusters: mlResult.clusters, generated_at: now,
223
+ success: true, algorithm, engine: 'embeddinggemma-300m + kmeans vetorial',
224
+ n_clusters: clusters.length, client_vertical: clientVertical,
225
+ leads_analyzed: leads.length, sample_embedded: sample.length,
226
+ duration_ms: duration, silhouette_score: silhouetteScore,
227
+ clusters, generated_at: now,
195
228
  }), { status: 200, headers });
196
229
 
197
230
  } catch (err) {
198
231
  console.error('[Segmentation] cluster error:', err.message);
199
232
  try {
200
- if (env.DB) {
201
- await env.DB.prepare(`
202
- INSERT INTO ml_clustering_history (clustering_id, started_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, error_message, parameters, results_summary)
203
- VALUES (0, datetime('now'), ?, 0, 0, 0, 0, 'failed', ?, ?, '{}')
204
- `).bind(algorithm, err.message, JSON.stringify({ algorithm, n_clusters: nClusters })).run();
205
- }
206
- } catch { /* não bloquear a resposta de erro */ }
207
-
233
+ if (env.DB) await env.DB.prepare(`
234
+ INSERT INTO ml_clustering_history (clustering_id, started_at, algorithm, n_leads_processed, n_clusters_created, total_duration_ms, workers_ai_neurons_used, status, error_message, parameters, results_summary)
235
+ VALUES (0, datetime('now'), ?, 0, 0, 0, 0, 'failed', ?, ?, '{}')
236
+ `).bind(algorithm, err.message, JSON.stringify({ algorithm, n_clusters: nClusters })).run();
237
+ } catch { /* não bloquear */ }
208
238
  return new Response(JSON.stringify({ error: 'Erro ao executar clustering', message: err.message }), { status: 500, headers });
209
239
  }
210
240
  }
@@ -86,14 +86,16 @@ export const VALID_EVENT_NAMES = new Set([
86
86
  'AddToCart','CompleteRegistration','Contact','Schedule',
87
87
  'StartTrial','Subscribe','SubmitApplication','Search',
88
88
  'video_start','video_25','video_50','video_75','video_complete',
89
+ // Imóveis — intenção de visita física, financiamento e favoritar
90
+ 'FindLocation','CustomizeProduct','AddToWishlist',
89
91
  ]);
90
92
 
91
93
  // ── Taxonomia de funil (funnel_stage → profundidade semântica) ────────────────
92
94
  // Fonte de verdade para interpretar funnel_stage em qualquer ponto do sistema.
93
95
  export const FUNNEL_TAXONOMY = {
94
- top: ['scroll_50', 'time_30s', 'page_view', 'gallery_view'],
95
- mid: ['map_view', 'gallery_click', 'price_hover', 'time_3min'],
96
- bottom: ['route_click', 'whatsapp_click', 'cta_hover'],
96
+ top: ['scroll_50', 'time_30s', 'page_view', 'gallery_view', 'AddToWishlist'],
97
+ mid: ['map_view', 'gallery_click', 'price_hover', 'time_3min', 'FindLocation'],
98
+ bottom: ['route_click', 'whatsapp_click', 'cta_hover', 'CustomizeProduct'],
97
99
  conversion: ['schedule_confirmed', 'lead_form', 'purchase', 'visit_booked'],
98
100
  };
99
101