npm - cdp-edge - Versions diffs - 2.1.0 → 2.2.1 - Mend

cdp-edge 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/server-edge-tracker/worker.js CHANGED Viewed

@@ -1903,7 +1903,7 @@ async function predictLtv(env, payload, request, customSystemPrompt = null) {
           has_phone: !!payload.phone,
         })},
       ];
-      const aiRes = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: prompt, max_tokens: 32 });
+      const aiRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', { messages: prompt, max_tokens: 32 });
       const parsed = JSON.parse(aiRes.response.trim());
       if (typeof parsed.adjustment === 'number') {
         aiAdjustment = Math.max(-10, Math.min(10, parsed.adjustment));
@@ -2415,8 +2415,82 @@ function tryParseJson(str, fallback) {
   try { return JSON.parse(str); } catch { return fallback !== undefined ? fallback : null; }
 }
+// ── Helpers K-means vetorial (usado pelo clustering com embeddings) ───────────
+function _cosDist(a, b) {
+  let dot = 0, na = 0, nb = 0;
+  for (let i = 0; i < a.length; i++) { dot += a[i]*b[i]; na += a[i]*a[i]; nb += b[i]*b[i]; }
+  return 1 - dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-10);
+}
+function _kmeansRun(vectors, k, maxIter = 25) {
+  const n   = vectors.length;
+  const dim = vectors[0].length;
+  // K-means++ init
+  const centroids = [vectors[Math.floor(Math.random() * n)]];
+  while (centroids.length < k) {
+    const dists = vectors.map(v => Math.min(...centroids.map(c => _cosDist(v, c))));
+    const sum   = dists.reduce((a, b) => a + b, 0);
+    let r = Math.random() * sum, cumul = 0;
+    for (let i = 0; i < n; i++) { cumul += dists[i]; if (cumul >= r) { centroids.push(vectors[i]); break; } }
+    if (centroids.length < k) centroids.push(vectors[Math.floor(Math.random() * n)]);
+  }
+  let assignments = new Array(n).fill(0);
+  for (let iter = 0; iter < maxIter; iter++) {
+    let changed = false;
+    for (let i = 0; i < n; i++) {
+      let best = 0, bestD = Infinity;
+      for (let c = 0; c < k; c++) { const d = _cosDist(vectors[i], centroids[c]); if (d < bestD) { bestD = d; best = c; } }
+      if (assignments[i] !== best) { assignments[i] = best; changed = true; }
+    }
+    if (!changed) break;
+    // Recompute centroids
+    for (let c = 0; c < k; c++) {
+      const members = vectors.filter((_, i) => assignments[i] === c);
+      if (members.length === 0) continue;
+      for (let d = 0; d < dim; d++) centroids[c][d] = members.reduce((s, v) => s + v[d], 0) / members.length;
+    }
+  }
+  return { assignments, centroids };
+}
+function _silhouette(vectors, assignments, k) {
+  const n = vectors.length;
+  let total = 0;
+  for (let i = 0; i < n; i++) {
+    const ci = assignments[i];
+    const sameCluster  = vectors.filter((_, j) => j !== i && assignments[j] === ci);
+    const a = sameCluster.length ? sameCluster.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / sameCluster.length : 0;
+    let b = Infinity;
+    for (let c = 0; c < k; c++) {
+      if (c === ci) continue;
+      const other = vectors.filter((_, j) => assignments[j] === c);
+      if (other.length) b = Math.min(b, other.reduce((s, v) => s + _cosDist(vectors[i], v), 0) / other.length);
+    }
+    total += b === Infinity ? 0 : (b - a) / Math.max(a, b);
+  }
+  return Math.round((total / n) * 1000) / 1000;
+}
+function _buildLeadProfile(l) {
+  return [
+    `LTV: ${l.predicted_ltv_class || 'desconhecido'}`,
+    `engajamento: ${Math.round(l.engagement_score || 0)}`,
+    `intenção: ${l.intention_level || 'desconhecida'}`,
+    `origem: ${l.utm_source || 'direto'}`,
+    `canal: ${l.utm_medium || 'desconhecido'}`,
+    `país: ${l.country || 'BR'}`,
+    `estado: ${l.state || ''}`,
+    `hora: ${l.hour_of_day || 12}h`,
+    (l.is_weekend ? 'fim-de-semana' : 'dia-útil'),
+    `recência: ${l.days_since_lead || 0} dias`,
+  ].filter(Boolean).join(', ');
+}
 // ── POST /api/segmentation/cluster ───────────────────────────────────────────
-// Executa clustering K-means/DBSCAN/Hierarchical via Workers AI
+// Clustering real com embeddings (embeddinggemma-300m) + K-means vetorial
+// Granite usado apenas para nomear segmentos
 // Requer bindings: DB + AI
 async function handleSegmentationCluster(env, request, headers) {
   if (!env.DB) return new Response(JSON.stringify({ error: 'DB não configurado' }), { status: 503, headers });
@@ -2424,7 +2498,7 @@ async function handleSegmentationCluster(env, request, headers) {
   const url = new URL(request.url);
   const algorithm      = url.searchParams.get('algorithm') || 'kmeans';
-  const nClusters      = Math.min(10, Math.max(3, parseInt(url.searchParams.get('n_clusters') || '5')));
+  const nClusters      = Math.min(10, Math.max(2, parseInt(url.searchParams.get('n_clusters') || '5')));
   const clientVertical = url.searchParams.get('vertical') || 'general';
   const forceRecluster = url.searchParams.get('force') === 'true';
@@ -2480,96 +2554,94 @@ async function handleSegmentationCluster(env, request, headers) {
       }), { status: 400, headers });
     }
-    // 3. Feature Engineering — normalização 0–1
-    const features = leads.map(l => ({
-      id:         l.id,
-      ltv:        l.predicted_ltv_class === 'High' ? 1 : (l.predicted_ltv_class === 'Medium' ? 0.5 : 0),
-      engagement: Math.min((l.engagement_score || 0) / 100, 1),
-      intention:  l.intention_level === 'comprador' || l.intention_level === 'high_intent' ? 1
-                : l.intention_level === 'interessado' ? 0.6
-                : l.intention_level === 'curioso'     ? 0.3 : 0,
-      recency:    Math.max(0, 1 - (l.days_since_lead || 0) / 180),
-      hour:       (l.hour_of_day || 12) / 23,
-      is_weekend: l.is_weekend || 0,
-      is_br:      l.country === 'BR' ? 1 : 0,
-      is_paid:    ['facebook','google','tiktok','instagram','youtube'].includes(
-                    (l.utm_source || '').toLowerCase()) ? 1 : 0,
-    }));
+    const startTime = Date.now();
-    // 4. Prompt para Workers AI
-    const sampleSize = Math.min(features.length, 100);
-    const sample     = features.slice(0, sampleSize);
-    const clusteringPrompt =
-`You are a customer segmentation ML expert. Perform ${algorithm} clustering on ${sampleSize} customers into ${nClusters} segments.
-Customer features (all normalized 0-1):
-- ltv: predicted lifetime value (0=Low, 0.5=Medium, 1=High)
-- engagement: browser engagement score
-- intention: purchase intention (0=none, 0.3=curious, 0.6=interested, 1=buyer)
-- recency: lead recency (1=today, 0=6 months ago)
-- hour: conversion hour of day
-- is_weekend: converted on weekend (0/1)
-- is_br: lead from Brazil (0/1)
-- is_paid: from paid traffic channel (0/1)
-Data (${sampleSize} customers): ${JSON.stringify(sample.slice(0, 50))}
-Return ONLY valid JSON, zero explanation:
-{
-  "clusters": [
-    {
-      "cluster_id": 0,
-      "name": "[Nome Descritivo em Português]",
-      "size": ${Math.round(sampleSize / nClusters)},
-      "percentage": ${Math.round(100 / nClusters)},
-      "characteristics": {
-        "avg_ltv_class": 0.5,
-        "avg_behavior_score": 0.5,
-        "avg_engagement_score": 0.5,
-        "avg_intention_level": 0.5,
-        "avg_days_since_lead": 30,
-        "dominant_countries": ["BR"],
-        "dominant_states": ["SP", "RJ"],
-        "dominant_utm_sources": ["facebook"],
-        "top_features": ["ltv", "engagement"]
-      },
-      "centroid": { "ltv": 0.5, "engagement": 0.5, "intention": 0.5 },
-      "action_recommendation": "[Recomendação de campanha específica para este segmento]"
+    // 3. Gerar perfis textuais e embeddings via embeddinggemma-300m
+    const sample   = leads.slice(0, 100); // max 100 por batch
+    const profiles = sample.map(_buildLeadProfile);
+    const embRes = await env.AI.run('@cf/baai/bge-m3', { text: profiles });
+    const vectors = embRes.data; // float32[][] shape [N, 768]
+    if (!vectors || vectors.length < nClusters) {
+      throw new Error(`embeddinggemma retornou ${vectors?.length ?? 0} vetores — insuficiente para ${nClusters} clusters`);
     }
-  ],
-  "silhouette_score": 0.65,
-  "total_processed": ${sampleSize}
-}`;
-    // 5. Executar via Workers AI
-    const startTime = Date.now();
-    const aiRes = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
-      messages:   [{ role: 'user', content: clusteringPrompt }],
-      max_tokens: 2000,
+    // 4. K-means vetorial real (cosine distance)
+    const { assignments } = _kmeansRun(vectors, nClusters);
+    // 5. Silhouette score real
+    const silhouetteScore = _silhouette(vectors, assignments, nClusters);
+    // 6. Agregar estatísticas por cluster para nomear com Granite
+    const clusterStats = Array.from({ length: nClusters }, (_, c) => {
+      const members = sample.filter((_, i) => assignments[i] === c);
+      if (members.length === 0) return null;
+      const ltvMap = { High: 1, Medium: 0.5, Low: 0 };
+      const avgLtv  = members.reduce((s, l) => s + (ltvMap[l.predicted_ltv_class] ?? 0), 0) / members.length;
+      const avgEng  = members.reduce((s, l) => s + (l.engagement_score || 0), 0) / members.length;
+      const avgDays = members.reduce((s, l) => s + (l.days_since_lead || 0), 0) / members.length;
+      const sources = members.map(l => l.utm_source).filter(Boolean);
+      const states  = members.map(l => l.state).filter(Boolean);
+      const topSource = sources.length ? [...sources.reduce((m, s) => m.set(s, (m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : 'direto';
+      const topState  = states.length  ? [...states.reduce((m, s)  => m.set(s, (m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : 'BR';
+      const intentions = members.map(l => l.intention_level).filter(Boolean);
+      const topIntent = intentions.length ? [...intentions.reduce((m, s) => m.set(s,(m.get(s)||0)+1), new Map())].sort((a,b)=>b[1]-a[1])[0]?.[0] : 'desconhecida';
+      return { c, size: members.length, pct: Math.round(members.length / sample.length * 100), avgLtv, avgEng, avgDays, topSource, topState, topIntent };
+    }).filter(Boolean);
+    // 7. Usar Granite apenas para nomear e recomendar ação por cluster
+    const namingPrompt =
+`Você é especialista em segmentação de clientes. Dê um nome descritivo em português e uma recomendação de campanha para cada segmento abaixo. Retorne SOMENTE JSON válido:
+{"segments":[{"cluster_id":0,"name":"...","action":"..."},...]}
+Segmentos:
+${clusterStats.map(s => `Cluster ${s.c}: LTV médio=${s.avgLtv.toFixed(2)}, engajamento=${s.avgEng.toFixed(0)}, intenção dominante="${s.topIntent}", origem="${s.topSource}", estado="${s.topState}", recência=${s.avgDays.toFixed(0)} dias, tamanho=${s.size} leads`).join('\n')}`;
+    const nameRes = await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', {
+      messages: [{ role: 'user', content: namingPrompt }],
+      max_tokens: 800,
     });
-    const duration = Date.now() - startTime;
-    if (!aiRes?.response) throw new Error('Workers AI não retornou resposta');
+    let clusterNames = {};
+    try {
+      const m = (nameRes?.response || '').match(/\{[\s\S]*\}/);
+      if (m) {
+        const parsed = JSON.parse(m[0]);
+        (parsed.segments || []).forEach(s => { clusterNames[s.cluster_id] = { name: s.name, action: s.action }; });
+      }
+    } catch { /* usa nomes fallback */ }
-    // 6. Parse do resultado
-    const jsonMatch = aiRes.response.trim().match(/\{[\s\S]*\}/);
-    if (!jsonMatch) throw new Error('Resposta do Workers AI não contém JSON válido');
-    const mlResult = JSON.parse(jsonMatch[0]);
+    const duration = Date.now() - startTime;
-    if (!Array.isArray(mlResult.clusters) || mlResult.clusters.length === 0) {
-      throw new Error('Workers AI não retornou clusters válidos');
-    }
+    // 8. Montar resultado final
+    const clusters = clusterStats.map(s => ({
+      cluster_id:           s.c,
+      name:                 clusterNames[s.c]?.name || `Segmento ${s.c + 1}`,
+      size:                 s.size,
+      percentage:           s.pct,
+      action_recommendation: clusterNames[s.c]?.action || '',
+      characteristics: {
+        avg_ltv_class:        s.avgLtv,
+        avg_engagement_score: s.avgEng,
+        avg_intention_level:  s.avgLtv,
+        avg_days_since_lead:  s.avgDays,
+        dominant_countries:   ['BR'],
+        dominant_states:      [s.topState],
+        dominant_utm_sources: [s.topSource],
+        top_features:         ['ltv', 'engagement', 'intention'],
+      },
+    }));
-    // 7. Inativar clusters anteriores do mesmo algoritmo/vertical
+    // 9. Inativar clusters anteriores do mesmo algoritmo/vertical
     await env.DB.prepare(
       `UPDATE ml_segments SET is_active = 0 WHERE clustering_algorithm = ? AND client_vertical = ? AND is_active = 1`
     ).bind(algorithm, clientVertical).run();
-    // 8. Persistir novos clusters no D1
+    // 10. Persistir novos clusters no D1
     const now = new Date().toISOString();
-    for (const cluster of mlResult.clusters) {
-      const ch = cluster.characteristics || {};
+    for (const cluster of clusters) {
+      const ch = cluster.characteristics;
       await env.DB.prepare(`
         INSERT INTO ml_segments (
           cluster_id, cluster_name, clustering_algorithm, client_vertical,
@@ -2581,23 +2653,23 @@ Return ONLY valid JSON, zero explanation:
           is_active, created_at, updated_at
         ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?)
       `).bind(
-        cluster.cluster_id || 0,
-        cluster.name        || `Segmento ${cluster.cluster_id}`,
+        cluster.cluster_id,
+        cluster.name,
         algorithm,
         clientVertical,
-        cluster.size        || 0,
-        cluster.percentage  || 0,
-        ch.avg_ltv_class    || 0,
-        ch.avg_behavior_score   || 0,
-        ch.avg_engagement_score || 0,
-        ch.avg_intention_level  || 0,
-        ch.avg_days_since_lead  || 0,
-        JSON.stringify(ch.dominant_countries   || ['BR']),
-        JSON.stringify(ch.dominant_states      || []),
-        JSON.stringify(ch.dominant_utm_sources || []),
-        JSON.stringify(ch.top_features         || []),
-        mlResult.silhouette_score             || 0,
-        JSON.stringify([cluster.action_recommendation || '']),
+        cluster.size,
+        cluster.percentage,
+        ch.avg_ltv_class,
+        ch.avg_engagement_score,
+        ch.avg_engagement_score,
+        ch.avg_intention_level,
+        ch.avg_days_since_lead,
+        JSON.stringify(ch.dominant_countries),
+        JSON.stringify(ch.dominant_states),
+        JSON.stringify(ch.dominant_utm_sources),
+        JSON.stringify(ch.top_features),
+        silhouetteScore,
+        JSON.stringify([cluster.action_recommendation]),
         JSON.stringify([]),
         JSON.stringify([]),
         now,
@@ -2605,7 +2677,7 @@ Return ONLY valid JSON, zero explanation:
       ).run();
     }
-    // 9. Log no histórico de clustering
+    // 11. Log no histórico de clustering
     try {
       await env.DB.prepare(`
         INSERT INTO ml_clustering_history (
@@ -2617,23 +2689,25 @@ Return ONLY valid JSON, zero explanation:
         new Date(startTime).toISOString(),
         algorithm,
         leads.length,
-        mlResult.clusters.length,
+        clusters.length,
         duration,
         Math.ceil(duration * 0.01),
-        JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical }),
-        JSON.stringify({ clusters: mlResult.clusters.length, silhouette: mlResult.silhouette_score }),
+        JSON.stringify({ algorithm, n_clusters: nClusters, vertical: clientVertical, engine: 'embeddinggemma-300m+kmeans' }),
+        JSON.stringify({ clusters: clusters.length, silhouette: silhouetteScore }),
       ).run();
     } catch (e) { console.error('[Segmentation] history log error:', e.message); }
     return new Response(JSON.stringify({
       success:          true,
       algorithm,
-      n_clusters:       mlResult.clusters.length,
+      engine:           'embeddinggemma-300m + kmeans vetorial',
+      n_clusters:       clusters.length,
       client_vertical:  clientVertical,
       leads_analyzed:   leads.length,
+      sample_embedded:  sample.length,
       duration_ms:      duration,
-      silhouette_score: mlResult.silhouette_score || null,
-      clusters:         mlResult.clusters,
+      silhouette_score: silhouetteScore,
+      clusters,
       generated_at:     now,
     }), { status: 200, headers });
@@ -2794,14 +2868,6 @@ async function handleSegmentationUpdate(env, request, headers) {
 // Heurístico puro (sem AI) — latência zero no /track
 // ─────────────────────────────────────────────────────────────────────────────
-// Domínios de email descartáveis
-const DISPOSABLE_EMAIL_DOMAINS = new Set([
-  'mailinator.com','guerrillamail.com','tempmail.com','throwaway.email',
-  'yopmail.com','sharklasers.com','guerrillamailblock.com','spam4.me',
-  '10minutemail.com','trashmail.com','maildrop.cc','fakeinbox.com',
-  'dispostable.com','mailnull.com','tempr.email','getnada.com',
-]);
 // ASNs conhecidos de datacenters (evitar falsos negativos em ASNs legítimos)
 const DATACENTER_PATTERNS = /amazon|google|microsoft|digitalocean|linode|ovh|vultr|hetzner|contabo|cloudflare|packet|rackspace|leaseweb/i;
@@ -2854,15 +2920,7 @@ async function checkFraudGate(env, request, payload) {
       result.score += 20; result.reasons.push('no_accept_language');
     }
-    // 6. Email descartável
-    if (email) {
-      const domain = email.split('@')[1]?.toLowerCase();
-      if (domain && DISPOSABLE_EMAIL_DOMAINS.has(domain)) {
-        result.score += 25; result.reasons.push('disposable_email');
-      }
-    }
-    // 7. Velocity check via KV
+    // 6. Velocity check via KV
     if (env.GEO_CACHE && ip) {
       const velKey1h = `fraud_velocity:${ip}:h`;
       const velStr   = await env.GEO_CACHE.get(velKey1h);
@@ -3839,7 +3897,7 @@ export default {
       // Workers AI — ping
       try {
-        await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+        await env.AI.run('@cf/ibm-granite/granite-4.0-h-micro', {
           messages: [{ role: 'user', content: 'ping' }],
           max_tokens: 1,
         });

package/server-edge-tracker/wrangler.toml CHANGED Viewed

@@ -25,10 +25,10 @@ zone_name = "lancamentosabc.com.br"
 # ── Variáveis públicas (não são segredos) ─────────────────────────────────────
 [vars]
-META_PIXEL_ID      = "SEU_META_PIXEL_ID"
-GA4_MEASUREMENT_ID = "G-XXXXXXXXXX"
-TIKTOK_PIXEL_ID    = "CXXXXXXXXXXXXXXX"
-SITE_DOMAIN        = "SEU_DOMINIO"
+META_PIXEL_ID      = "1583939052660159"
+GA4_MEASUREMENT_ID = "G-G7VEN1MNH1"
+TIKTOK_PIXEL_ID    = "D71D6T3C77U56RM5VF0G"
+SITE_DOMAIN        = "lancamentosabc.com.br"
 # ── Banco D1 ──────────────────────────────────────────────────────────────────
 # Após criar o banco com "wrangler d1 create cdp-edge-db",
@@ -95,6 +95,22 @@ namespace_id = "1001"
 limit  = 60
 period = 60
+# ── Observabilidade — Logs + Traces persistidos no painel Cloudflare ─────────
+[observability]
+enabled            = false
+head_sampling_rate = 1
+[observability.logs]
+enabled            = true
+head_sampling_rate = 1
+persist            = true
+invocation_logs    = true
+[observability.traces]
+enabled            = false
+persist            = true
+head_sampling_rate = 1
 # ── Secrets (NÃO ficam aqui — configurar via CLI) ─────────────────────────────
 # wrangler secret put META_ACCESS_TOKEN     ← token Meta CAPI (obrigatório)
 # wrangler secret put GA4_API_SECRET        ← secret GA4 Measurement Protocol (obrigatório)
@@ -107,6 +123,7 @@ period = 60
 # wrangler secret put RESEND_API_KEY            ← API Key do Resend (resend.com)
 # wrangler secret put RESEND_FROM_EMAIL         ← Remetente verificado ex: "CDP Edge <noreply@seudominio.com.br>"
 # wrangler secret put WA_WEBHOOK_VERIFY_TOKEN   ← Token de verificação do webhook WhatsApp (você define — qualquer string segura)
+# wrangler secret put WEBHOOK_SECRET_TICTO      ← HMAC-SHA256 Ticto
 # wrangler secret put PINTEREST_ACCESS_TOKEN    ← Bearer token Pinterest Conversions API
 # wrangler secret put PINTEREST_AD_ACCOUNT_ID   ← ID da conta de anúncios Pinterest (ex: 549755813XXX)
 # wrangler secret put REDDIT_ACCESS_TOKEN       ← Bearer token Reddit Conversions API