npm - cdp-edge - Versions diffs - 1.2.2 → 1.3.0 - Mend

cdp-edge 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

package/server-edge-tracker/SEGMENTATION-DOCS.md ADDED Viewed

@@ -0,0 +1,513 @@
+# API de Segmentação Dinâmica ML — Documentação Completa
+## Visão Geral
+API REST para segmentação dinâmica de leads via Machine Learning (K-means, DBSCAN, Hierarchical).
+---
+## Endpoints Disponíveis
+| Método | Endpoint | Descrição |
+|---|---|---|
+| **POST** | `/api/segmentation/cluster` | Executar clustering K-means |
+| **GET** | `/api/segmentation/list` | Listar todos os segmentos ativos |
+| **GET** | `/api/segmentation/outliers` | Listar leads anômalos (outliers) |
+| **PUT** | `/api/segmentation/update` | Atualizar recomendações de segmento |
+---
+## 1. POST /api/segmentation/cluster — Executar Clustering
+Executa clustering K-means via Workers AI para criar segmentos de leads.
+### Request Body
+```json
+{
+  "algorithm": "kmeans",              // Opcional: 'kmeans', 'dbscan', 'hierarchical'
+  "n_clusters": 5,                   // Opcional: 3-10 (padrão: 5)
+  "vertical": "curso-online",          // Opcional: vertical do cliente
+  "force": false                        // Opcional: forçar novo clustering
+}
+```
+### Response
+```json
+{
+  "success": true,
+  "cluster_id": 123,
+  "clustering_algorithm": "kmeans",
+  "n_clusters": 5,
+  "client_vertical": "curso-online",
+  "duration_ms": 12543,
+  "clusters": [
+    {
+      "cluster_id": 0,
+      "name": "Segmento 0 - Alto Valor + Alto Engajamento (SP)",
+      "size": 95,
+      "percentage": 0.25,
+      "characteristics": {
+        "avg_ltv": 497.50,
+        "avg_ltv_class": "High",
+        "avg_behavior_score": 75.3,
+        "avg_engagement_score": 82.1,
+        "avg_intention_level": 85.0,
+        "dominant_countries": ["BR", "AR"],
+        "dominant_states": ["SP", "RJ"],
+        "dominant_utm_sources": ["facebook", "google"],
+        "top_features": ["ltv", "behavior_score", "engagement_score"]
+      },
+      "centroid": {
+        "ltv_class": 0.75,
+        "behavior_score": 0.80,
+        "engagement_score": 0.85
+      },
+      "sample_leads": ["lead_451", "lead_892", "lead_1034"]
+    }
+  ],
+  "silhouette_scores": {
+    "overall": 0.62,
+    "by_cluster": [0.71, 0.58, 0.65, 0.60, 0.55]
+  },
+  "convergence": {
+    "iterations": 47,
+    "final_inertia": 1523.45
+  },
+  "generated_at": "2026-04-09T18:45:32.000Z"
+}
+```
+### Exemplo de Uso (cURL)
+```bash
+curl -X POST "https://seudominio.com/api/segmentation/cluster" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "algorithm": "kmeans",
+    "n_clusters": 5,
+    "vertical": "curso-online"
+  }'
+```
+---
+## 2. GET /api/segmentation/list — Listar Segmentos
+Lista todos os segmentos ativos com estatísticas detalhadas.
+### Query Parameters
+| Parâmetro | Tipo | Descrição |
+|---|---|---|
+| `algorithm` | string | Filtro por algoritmo: `kmeans`, `dbscan`, `hierarchical` |
+| `active` | boolean | Filtro por status: `1` (ativo) ou `0` (arquivado) |
+| `limit` | number | Máximo de resultados (padrão: 10) |
+### Exemplo de Request
+```bash
+curl "https://seudominio.com/api/segmentation/list?algorithm=kmeans&active=1&limit=10"
+```
+### Response
+```json
+{
+  "success": true,
+  "count": 5,
+  "segments": [
+    {
+      "id": 45,
+      "cluster_id": 123,
+      "cluster_name": "Segmento 0 - Alto Valor + Alto Engajamento (SP)",
+      "clustering_algorithm": "kmeans",
+      "client_vertical": "curso-online",
+      "size": 95,
+      "percentage": 0.25,
+      "avg_ltv": 497.50,
+      "avg_ltv_class": "High",
+      "avg_behavior_score": 75.3,
+      "avg_engagement_score": 82.1,
+      "avg_intention_level": 85.0,
+      "dominant_countries": ["BR", "AR"],
+      "dominant_states": ["SP", "RJ"],
+      "dominant_features": ["ltv", "behavior_score", "engagement_score"],
+      "silhouette_score": 0.71,
+      "action_recommendations": [
+        "Priorizar remarketing em 24h",
+        "Criar lookalike audience de alto valor"
+      ],
+      "bid_recommendations": [
+        {"adset_id": "456", "recommended_bid": "R$ 18.50", "confidence": 0.85}
+      ],
+      "campaign_recommendations": [
+        {"creative": "VSL A", "audience": "Segmento 0"},
+        {"creative": "VSL B", "audience": "Segmento 0"}
+      ],
+      "is_active": true,
+      "created_at": "2026-04-09T18:45:32.000Z",
+      "updated_at": "2026-04-09T18:45:32.000Z"
+    }
+  ]
+}
+```
+---
+## 3. GET /api/segmentation/outliers — Listar Anomalias
+Lista leads considerados outliers/anomalias (detecção DBSCAN).
+### Query Parameters
+| Parâmetro | Tipo | Descrição |
+|---|---|---|
+| `confidence` | number | Threshold de confiança (padrão: 0.5) |
+| `risk` | string | Filtro por nível: `high`, `medium`, `low` |
+| `limit` | number | Máximo de resultados (padrão: 50) |
+### Exemplo de Request
+```bash
+curl "https://seudominio.com/api/segmentation/outliers?confidence=0.3&risk=high&limit=20"
+```
+### Response
+```json
+{
+  "success": true,
+  "outliers": [
+    {
+      "lead_id": "lead_451",
+      "email": "suspicious@example.com",
+      "first_name": "João",
+      "last_name": "Silva",
+      "ltv_class": "High",
+      "behavior_score": 98,
+      "engagement_score": 85,
+      "intention_level": 90,
+      "days_since_lead": 0,
+      "cluster_id": null,
+      "cluster_name": "Outlier",
+      "confidence": 0.12,
+      "distance_to_centroid": 0.89,
+      "is_outlier": true,
+      "outlier_reason": "behavior_score too high (> 95), instant lead, suspicious",
+      "risk_level": "high",
+      "risk_score": 88,
+      "should_block": true,
+      "actionable_reasons": [
+        "Comportamento extremamente ativo (possível bot)",
+        "Lead instantâneo com engajamento muito alto (suspicious)"
+      ],
+      "assigned_at": "2026-04-09T18:47:15.000Z"
+    }
+  ],
+  "statistics": {
+    "total_outliers": 20,
+    "by_risk_level": {
+      "high": 15,
+      "medium": 3,
+      "low": 2
+    },
+    "by_reason": {
+      "behavior_score too high (> 95)": 12,
+      "instant lead": 8,
+      "unusual geo": 0
+    },
+    "avg_confidence": 0.18
+  },
+  "generated_at": "2026-04-09T18:47:15.000Z"
+}
+```
+---
+## 4. PUT /api/segmentation/update — Atualizar Segmento
+Atualiza recomendações de ações, bids e campanhas de um segmento existente.
+### Request Body
+```json
+{
+  "cluster_id": 45,
+  "action_recommendations": [
+    "Priorizar remarketing em 24h",
+    "Criar lookalike audience de alto valor",
+    "Enrichir creative com copy específico"
+  ],
+  "bid_recommendations": [
+    {"adset_id": "456", "recommended_bid": "R$ 20.00"},
+    {"adset_id": "789", "recommended_bid": "R$ 18.50"}
+  ],
+  "campaign_recommendations": [
+    {"creative": "VSL Segmentado", "audience": "Segmento 0"}
+  ],
+  "is_active": true
+}
+```
+### Response
+```json
+{
+  "success": true,
+  "cluster_id": 45,
+  "cluster_name": "Segmento 0 - Alto Valor + Alto Engajamento (SP)",
+  "updates_applied": {
+    "action_recommendations": 3,
+    "bid_recommendations": 2,
+    "campaign_recommendations": 1,
+    "is_active": true
+  },
+  "updated_at": "2026-04-09T18:50:23.000Z"
+}
+```
+---
+## Estrutura das Tabelas D1
+### ml_segments — Metadados dos Segmentos
+```sql
+CREATE TABLE ml_segments (
+  id INTEGER PRIMARY KEY,
+  cluster_id INTEGER NOT NULL,
+  cluster_name TEXT NOT NULL,
+  clustering_algorithm TEXT NOT NULL,        -- 'kmeans', 'dbscan', 'hierarchical'
+  client_vertical TEXT,
+  created_at TEXT,
+  updated_at TEXT,
+  -- Estatísticas
+  size INTEGER,                                  -- Nº de leads
+  percentage REAL,                               -- % do total
+  -- Centróides (médias)
+  avg_ltv REAL,
+  avg_ltv_class REAL,
+  avg_behavior_score REAL,
+  avg_engagement_score REAL,
+  avg_intention_level REAL,
+  -- Características dominantes
+  dominant_countries TEXT,                        -- JSON: ["BR", "US"]
+  dominant_states TEXT,                            -- JSON: ["SP", "RJ"]
+  dominant_cities TEXT,                             -- JSON: ["São Paulo", "Rio"]
+  dominant_timezones TEXT,                           -- JSON: ["America/Sao_Paulo"]
+  dominant_utm_sources TEXT,                        -- JSON: ["facebook", "google"]
+  dominant_features TEXT,                            -- JSON: ["ltv", "behavior_score"]
+  -- Métricas de qualidade
+  silhouette_score REAL,                           -- 0-1 (quanto maior, melhor)
+  cohesion REAL,                                    -- Similaridade intra-cluster
+  separation REAL,                                  -- Distância inter-cluster
+  inertia REAL,                                   -- Soma dos quadrados
+  -- Recomendações automáticas
+  action_recommendations TEXT,                       -- JSON array
+  bid_recommendations TEXT,                           -- JSON array
+  campaign_recommendations TEXT,                        -- JSON array
+  -- Metadados
+  is_active INTEGER DEFAULT 1,
+  min_data_points INTEGER,
+  epsilon REAL,                                      -- DBSCAN
+  min_samples INTEGER,                                -- DBSCAN
+  max_depth INTEGER                                   -- Hierarchical
+);
+```
+### ml_segment_members — Associação Lead ↔ Segmento
+```sql
+CREATE TABLE ml_segment_members (
+  id INTEGER PRIMARY KEY,
+  lead_id TEXT NOT NULL,
+  cluster_id INTEGER NOT NULL,
+  clustering_algorithm TEXT NOT NULL,
+  confidence REAL NOT NULL,                          -- 0-1 (quão perto do centroide)
+  distance_to_centroid REAL,
+  updated_at TEXT,
+  assigned_at TEXT,
+  is_outlier INTEGER DEFAULT 0,
+  outlier_reason TEXT,
+  -- Estado do lead no momento da atribuição
+  lead_ltv REAL,
+  lead_ltv_class REAL,
+  lead_behavior_score REAL,
+  lead_engagement_score REAL,
+  lead_intention_level REAL,
+  -- Características
+  lead_country TEXT,
+  lead_state TEXT,
+  lead_city TEXT,
+  UNIQUE(lead_id, cluster_id, clustering_algorithm)
+);
+```
+---
+## Integração com Outros Agentes
+### Com LTV Predictor Agent
+```typescript
+// Enricher predição LTV com segment_id
+const enrichedLTV = {
+  original_prediction: 497.50,
+  segment_id: 123,
+  segment_name: "Alto Valor + Alto Engajamento (SP)",
+  adjusted_prediction: 612.30,  // +23% baseado no segmento
+  confidence: 0.85
+};
+```
+### Com Dashboard Agent
+```typescript
+// Visualização de segmentos em gráficos
+const segmentChartData = {
+  labels: ["Segmento 0", "Segmento 1", "Segmento 2"],
+  data: [95, 150, 120, 85, 50],
+  colors: ["#22c55e", "#3b82f6", "#1e40af", "#06b6d4", "#f59e0b"]
+};
+```
+### Com Meta Agent
+```typescript
+// Criar campanhas segmentadas por segmento
+const segmentedCampaigns = [
+  {
+    adset_name: "Segmento 0 - Alto Valor",
+    audience: ml_segment_id_123,
+    creative: "VSL Personalizado Segmento 0",
+    bid_strategy: "high_bid"
+  }
+];
+```
+---
+## Troubleshooting Comum
+### Erro: "Dados insuficientes para clustering"
+**Causa:** Menos de 100 leads nos últimos 6 meses
+**Solução:**
+- Aguardar mais dados acumularem
+- Aumentar `max_data_age_months` para 12
+- Usar `force=true` para clustering com menos dados
+### Erro: "Silhouette Score < 0.3"
+**Causa:** Clusters não são bem separados
+**Solução:**
+- Aumentar `n_clusters` para 7-10
+- Reduzir dimensionalidade (usar features mais importantes)
+- Usar algoritmo `hierarchical` em vez de `kmeans`
+### Erro: "Workers AI timeout"
+**Causa:** Prompt muito longo ou muitos dados
+**Solução:**
+- Reduzir leads por batch (max 500)
+- Simplificar prompt (menos instruções)
+- Aumentar timeout no wrangler.toml
+---
+## Roadmap Futuro
+- [ ] DBSCAN Clustering completo (outliers avançado)
+- [ ] Hierarchical Clustering (drill-down de clusters)
+- [ ] Auto-feature selection (identificar features mais importantes)
+- [ ] Clustering temporal (leads mudam de segmento ao longo do tempo)
+- [ ] A/B testing automático de segmentos
+- [ ] Dashboard visual de segmentos em gráficos interativos
+---
+*API de Segmentação Dinâmica ML v1.0 — CDP Edge*
+*Data: 9 de Abril de 2026*
+---
+## Integração com LTV Real e Match Quality (Fase 5)
+### Como ml_segments alimenta o modelo LTV
+Os segmentos gerados pelo clustering não são apenas para campanhas — eles são features do modelo LTV treinado semanalmente.
+Quando o cron semanal treina a regressão logística em `ltv_model_weights`, ele inclui o `cluster_id` do lead como feature. Isso significa:
+- Leads do "Segmento Alto Valor + Alto Engajamento (SP)" têm peso positivo maior no modelo
+- O modelo aprende quais segmentos historicamente convertem mais
+- O LTV Score de cada novo `/track` já leva em conta o segmento do usuário
+**Fluxo de dados:**
+```
+ml_segment_members (cluster_id por lead)
+    │
+    └─ JOIN com leads × purchases
+            │
+            └─ Treino semanal da regressão logística
+                    │
+                    └─ ltv_model_weights (is_active=1)
+                            │
+                            └─ Score LTV em cada /track
+```
+### Como bid_recommendations se conecta ao LTV treinado
+O `ltv_model_weights` ativo gera scores mais precisos, que alimentam diretamente as recomendações de bid:
+1. LTV Score do segmento sobe (modelo mais preciso) → `avg_ltv` do segmento é recalculado
+2. Bidding Agent roda `POST /api/bidding/recommend` com o novo `avg_ltv`
+3. `bid_recommendations` é atualizado com bid recomendado para o segmento × plataforma
+4. Você aplica o bid sugerido em Meta/Google Ads
+**Consulta útil — bid atual por segmento:**
+```bash
+curl "https://seudominio.com/api/bidding/status"
+# Retorna: bid recomendado atual por segmento × plataforma
+```
+### Como Match Quality afeta a qualidade dos segmentos
+A tabela `match_quality_log` registra se cada evento que alimentou o D1 tinha email, fbp, etc. Eventos com `has_email = 0` têm Advanced Matching incompleto — a Meta pode não ter conseguido fazer o match com um usuário real.
+Isso significa que `ml_segments` pode conter leads "fantasmas" (usuários que a Meta não reconheceu). Para garantir a qualidade dos segmentos:
+1. Monitore `v_match_quality_24h` para manter `email_rate > 40%`
+2. Se a taxa cair, o Auto-Enrich (Identity Graph) recupera emails de sessões anteriores automaticamente
+3. Leads com email recuperado (`was_email_recovered = 1`) são indistinguíveis dos outros no clustering — têm o mesmo peso
+**Consulta de match quality:**
+```bash
+curl "https://seudominio.com/api/fraud/stats"
+# Inclui métricas de qualidade de sinal junto com dados de fraude
+```
+### Tabelas relacionadas (Fase 5)
+| Tabela | Relação com segmentação |
+|---|---|
+| `ltv_model_weights` | Usa `cluster_id` como feature; melhora scores LTV por segmento |
+| `match_quality_log` | Indica qualidade dos eventos que geraram os leads dos segmentos |
+| `user_profiles` | Auto-Enrich recupera dados antes do dispatch → mais leads com email → melhor clustering |

package/server-edge-tracker/config/utm-mapping.json ADDED Viewed

@@ -0,0 +1,64 @@
+{
+  "version": "1.0.0",
+  "config": {
+    "method": "sha256",
+    "salt": "CDP_EDGE_UTM_SALT",
+    "truncated_length": 8
+  },
+  "mappings": [],
+  "examples": {
+    "imoveis": [
+      {
+        "obfuscated": "8a3f1d2b",
+        "original": "500k-800k",
+        "category": "imovel",
+        "pixel_audience": "AUDIENCE_LOW",
+        "platform_specific": {
+          "meta": { "custom_audience_id": "2385xxx_LOW" },
+          "tiktok": { "pixel_id": "C1xxx_LOW" }
+        }
+      },
+      {
+        "obfuscated": "b4e2a1c9",
+        "original": "800k-1.5M",
+        "category": "imovel",
+        "pixel_audience": "AUDIENCE_MID",
+        "platform_specific": {
+          "meta": { "custom_audience_id": "2385xxx_MID" },
+          "tiktok": { "pixel_id": "C1xxx_MID" }
+        }
+      },
+      {
+        "obfuscated": "d6c3b4e7",
+        "original": "1M-3M",
+        "category": "imovel",
+        "pixel_audience": "AUDIENCE_HIGH",
+        "platform_specific": {
+          "meta": { "custom_audience_id": "2385xxx_HIGH" },
+          "tiktok": { "pixel_id": "C1xxx_HIGH" }
+        }
+      }
+    ],
+    "automotivos": [
+      {
+        "obfuscated": "e7a5b3d1",
+        "original": "50k-100k",
+        "category": "automotivo",
+        "pixel_audience": "AUDIENCE_ENTRY"
+      },
+      {
+        "obfuscated": "f4c8d2a6",
+        "original": "100k-200k",
+        "category": "automotivo",
+        "pixel_audience": "AUDIENCE_MID"
+      },
+      {
+        "obfuscated": "a9b1e4c3",
+        "original": "200k-500k",
+        "category": "automotivo",
+        "pixel_audience": "AUDIENCE_PREMIUM"
+      }
+    ]
+  },
+  "instructions": "Preencha o array 'mappings' com as categorias do seu projeto. Use o comando 'node scripts/sync-agents.js' para sincronizar com o Agente UTM."
+}

package/server-edge-tracker/deploy-client.cjs ADDED Viewed

@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+/**
+ * CDP Edge — deploy-client.js
+ *
+ * Deploy do Worker com variáveis reais do cliente, sem commitar credenciais no repo.
+ * Lê de .client.env (gitignored) e gera um wrangler.deploy.toml temporário.
+ *
+ * Uso:
+ *   node deploy-client.js           → deploy completo
+ *   node deploy-client.js --dry-run → valida sem subir ao Cloudflare
+ *
+ * Setup:
+ *   cp .client.env.example .client.env
+ *   # edite .client.env com os valores do cliente
+ *   node deploy-client.js
+ */
+const fs   = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+const ROOT    = __dirname;
+const TOML    = path.join(ROOT, 'wrangler.toml');
+const DEPLOY  = path.join(ROOT, 'wrangler.deploy.toml');
+const ENV     = path.join(ROOT, '.client.env');
+const DRY_RUN = process.argv.includes('--dry-run');
+// ── Carregar .client.env ──────────────────────────────────────────────────────
+if (!fs.existsSync(ENV)) {
+  console.error('\n❌  .client.env não encontrado.');
+  console.error('   cp .client.env.example .client.env  e preencha os valores do cliente.\n');
+  process.exit(1);
+}
+const env = {};
+fs.readFileSync(ENV, 'utf8').split('\n').forEach(line => {
+  const trimmed = line.trim();
+  if (!trimmed || trimmed.startsWith('#')) return;
+  const [key, ...rest] = trimmed.split('=');
+  if (key) env[key.trim()] = rest.join('=').trim();
+});
+const required = ['DATABASE_ID', 'SITE_DOMAIN'];
+const missing  = required.filter(k => !env[k]);
+if (missing.length > 0) {
+  console.error(`\n❌  Variáveis obrigatórias faltando no .client.env: ${missing.join(', ')}\n`);
+  process.exit(1);
+}
+// ── Substituir placeholders no wrangler.toml → wrangler.deploy.toml ───────────
+let toml = fs.readFileSync(TOML, 'utf8');
+toml = toml
+  .replace(/SEU_DATABASE_ID/g,  env.DATABASE_ID)
+  .replace(/SEU_DOMINIO/g,       env.SITE_DOMAIN)
+  .replace(/META_PIXEL_ID\s*=\s*""/,      `META_PIXEL_ID      = "${env.META_PIXEL_ID      || ''}"`)
+  .replace(/GA4_MEASUREMENT_ID\s*=\s*""/,  `GA4_MEASUREMENT_ID = "${env.GA4_MEASUREMENT_ID || ''}"`)
+  .replace(/TIKTOK_PIXEL_ID\s*=\s*""/,     `TIKTOK_PIXEL_ID    = "${env.TIKTOK_PIXEL_ID    || ''}"`);
+fs.writeFileSync(DEPLOY, toml);
+// ── Executar wrangler deploy ──────────────────────────────────────────────────
+const cmd = `wrangler deploy --config wrangler.deploy.toml${DRY_RUN ? ' --dry-run' : ''}`;
+console.log(`\n🚀  ${DRY_RUN ? '[DRY-RUN] ' : ''}Deploying com config do cliente...\n`);
+try {
+  execSync(cmd, { stdio: 'inherit', cwd: ROOT });
+  console.log(`\n✅  Deploy ${DRY_RUN ? '(dry-run) ' : ''}concluído.\n`);
+} catch (err) {
+  console.error('\n❌  Deploy falhou.\n');
+  process.exit(1);
+} finally {
+  // sempre remove o arquivo temporário
+  if (fs.existsSync(DEPLOY)) fs.unlinkSync(DEPLOY);
+  console.log('🧹  wrangler.deploy.toml removido.\n');
+}