cdp-edge 1.13.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ -- Schema Fraud Detection — CDP Edge Quantum Tier
2
+ -- Versão: 1.0
3
+ -- Data: 9 de Abril de 2026
4
+ -- Fase 4 Enterprise-Level
5
+
6
+ -- TABELA: Sinais de Fraude por Evento
7
+ CREATE TABLE IF NOT EXISTS fraud_signals (
8
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
9
+ detected_at TEXT NOT NULL DEFAULT (datetime('now')),
10
+
11
+ -- Identificadores do evento
12
+ ip_address TEXT,
13
+ fingerprint TEXT,
14
+ user_id TEXT,
15
+ email_hash TEXT, -- SHA256, sem PII
16
+ event_name TEXT,
17
+ event_id TEXT,
18
+
19
+ -- Score e decisão
20
+ fraud_score INTEGER NOT NULL, -- 0-100
21
+ action_taken TEXT NOT NULL, -- 'allowed', 'flagged', 'dropped'
22
+ reasons TEXT NOT NULL, -- JSON array: ["ip_velocity_high", "datacenter_ip", ...]
23
+
24
+ -- Contexto
25
+ ip_country TEXT,
26
+ ip_asn TEXT,
27
+ user_agent TEXT,
28
+ bot_score INTEGER,
29
+ velocity_1h INTEGER, -- Eventos deste IP na última 1h
30
+ velocity_1m INTEGER, -- Eventos deste IP no último 1min
31
+
32
+ -- Resultado (preenchido depois se lead converter)
33
+ was_real_user INTEGER -- 1 = comprou depois (falso positivo), 0 = fraude confirmada
34
+ );
35
+
36
+ -- TABELA: Alertas Agregados (quando IP/fingerprint atinge threshold repetidamente)
37
+ CREATE TABLE IF NOT EXISTS fraud_alerts (
38
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
39
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
40
+ updated_at TEXT NOT NULL DEFAULT (datetime('now')),
41
+
42
+ alert_type TEXT NOT NULL, -- 'ip_attack', 'fingerprint_abuse', 'bot_network'
43
+ entity_type TEXT NOT NULL, -- 'ip', 'fingerprint', 'asn'
44
+ entity_value TEXT NOT NULL, -- O IP, fingerprint ou ASN em questão
45
+
46
+ -- Métricas do ataque
47
+ events_total INTEGER NOT NULL DEFAULT 0,
48
+ events_dropped INTEGER NOT NULL DEFAULT 0,
49
+ peak_score INTEGER NOT NULL DEFAULT 0,
50
+ first_seen TEXT NOT NULL DEFAULT (datetime('now')),
51
+ last_seen TEXT NOT NULL DEFAULT (datetime('now')),
52
+
53
+ -- Status
54
+ is_blocked INTEGER NOT NULL DEFAULT 0, -- 1 = está no KV blocklist
55
+ blocked_at TEXT,
56
+ block_expires TEXT, -- NULL = permanente
57
+ resolved_at TEXT,
58
+ resolved_by TEXT, -- 'auto', 'manual'
59
+
60
+ -- Contexto
61
+ top_reasons TEXT, -- JSON: razões mais comuns
62
+ sample_ips TEXT -- JSON: amostra de IPs relacionados (para redes de bots)
63
+ );
64
+
65
+ -- Índices
66
+ CREATE INDEX IF NOT EXISTS idx_fraud_signals_ip ON fraud_signals(ip_address);
67
+ CREATE INDEX IF NOT EXISTS idx_fraud_signals_fp ON fraud_signals(fingerprint);
68
+ CREATE INDEX IF NOT EXISTS idx_fraud_signals_score ON fraud_signals(fraud_score);
69
+ CREATE INDEX IF NOT EXISTS idx_fraud_signals_action ON fraud_signals(action_taken);
70
+ CREATE INDEX IF NOT EXISTS idx_fraud_signals_date ON fraud_signals(detected_at);
71
+ CREATE INDEX IF NOT EXISTS idx_fraud_alerts_entity ON fraud_alerts(entity_type, entity_value);
72
+ CREATE INDEX IF NOT EXISTS idx_fraud_alerts_blocked ON fraud_alerts(is_blocked);
73
+ CREATE INDEX IF NOT EXISTS idx_fraud_alerts_date ON fraud_alerts(created_at);
74
+
75
+ -- VIEW: Dashboard de fraude (últimas 24h)
76
+ CREATE VIEW IF NOT EXISTS v_fraud_dashboard AS
77
+ SELECT
78
+ COUNT(*) AS total_events_checked,
79
+ SUM(CASE WHEN action_taken = 'dropped' THEN 1 ELSE 0 END) AS events_dropped,
80
+ SUM(CASE WHEN action_taken = 'flagged' THEN 1 ELSE 0 END) AS events_flagged,
81
+ SUM(CASE WHEN action_taken = 'allowed' THEN 1 ELSE 0 END) AS events_allowed,
82
+ ROUND(
83
+ CAST(SUM(CASE WHEN action_taken = 'dropped' THEN 1 ELSE 0 END) AS REAL) /
84
+ NULLIF(COUNT(*), 0) * 100
85
+ , 2) AS fraud_rate_pct,
86
+ AVG(fraud_score) AS avg_fraud_score,
87
+ MAX(fraud_score) AS peak_fraud_score,
88
+ COUNT(DISTINCT ip_address) AS unique_ips_flagged
89
+ FROM fraud_signals
90
+ WHERE detected_at >= datetime('now', '-24 hours');
@@ -0,0 +1,219 @@
1
+ -- Schema de Segmentação Dinâmica ML — CDP Edge Quantum Tier
2
+ -- Versão: 1.0
3
+ -- Data: 9 de Abril de 2026
4
+
5
+ -- TABELA PRINCIPAL: Segmentos ML
6
+ CREATE TABLE IF NOT EXISTS ml_segments (
7
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
8
+ cluster_id INTEGER NOT NULL,
9
+ cluster_name TEXT NOT NULL, -- Nome descritivo gerado por ML
10
+ clustering_algorithm TEXT NOT NULL, -- 'kmeans', 'dbscan', 'hierarchical'
11
+ client_vertical TEXT, -- 'curso-online', 'ecommerce', 'saaS'
12
+ created_at TEXT DEFAULT (datetime('now')),
13
+ updated_at TEXT DEFAULT (datetime('now')),
14
+
15
+ -- Estatísticas do cluster
16
+ size INTEGER NOT NULL, -- Número de leads no cluster
17
+ percentage REAL NOT NULL, -- % do total (ex: 0.25 = 25%)
18
+
19
+ -- Características médias (centróides)
20
+ avg_ltv REAL, -- LTV médio do cluster
21
+ avg_ltv_class REAL, -- 0=Low, 1=Medium, 2=High (média)
22
+ avg_behavior_score REAL, -- Engajamento médio (0-100)
23
+ avg_engagement_score REAL, -- Interações médias (0-100)
24
+ avg_intention_level REAL, -- Intenção de compra média (0-100)
25
+ avg_days_since_lead REAL, -- Recência média em dias
26
+ avg_hour_of_day REAL, -- Hora média do dia (0-23)
27
+ avg_is_weekend REAL, -- % que acessa no fim de semana (0-1)
28
+ avg_is_business_hours REAL, -- % em horário comercial (0-1)
29
+ avg_bot_score REAL, -- % humano médio (0-100, 100=100% humano)
30
+
31
+ -- Características dominantes (top features por cluster)
32
+ dominant_countries TEXT, -- JSON array: ["BR", "US", "AR"]
33
+ dominant_states TEXT, -- JSON array: ["SP", "RJ", "MG"]
34
+ dominant_cities TEXT, -- JSON array: ["São Paulo", "Rio de Janeiro"]
35
+ dominant_timezones TEXT, -- JSON array: ["America/Sao_Paulo", "America/New_York"]
36
+ dominant_utm_sources TEXT, -- JSON array: ["facebook", "google", "tiktok"]
37
+ dominant_utm_mediums TEXT, -- JSON array: ["cpc", "organic", "social"]
38
+ dominant_features TEXT, -- JSON array: ["ltv", "behavior_score", "engagement_score"]
39
+
40
+ -- Métricas de qualidade do clustering
41
+ silhouette_score REAL, -- Coesão vs separação (-1 a 1, > 0.5 = bom)
42
+ cohesion REAL, -- Similaridade média intra-cluster
43
+ separation REAL, -- Distância média inter-cluster
44
+ inertia REAL, -- Soma dos quadrados das distâncias (menor = melhor)
45
+
46
+ -- Recomendações automáticas geradas por ML
47
+ action_recommendations TEXT, -- JSON array: ["Priorizar remarketing", "Aumentar bid", ...]
48
+ bid_recommendations TEXT, -- JSON array: [{"adset_id": "123", "recommended_bid": "R$ 18.50"}, ...]
49
+ campaign_recommendations TEXT, -- JSON array: [{"creative": "VSL A", "audience": "Segment 0"}, ...]
50
+
51
+ -- Metadados de controle
52
+ is_active INTEGER DEFAULT 1, -- 0 = arquivado, 1 = ativo
53
+ min_data_points INTEGER, -- Mínimo de leads para formar cluster
54
+ epsilon REAL, -- Para DBSCAN (distância máxima)
55
+ min_samples INTEGER, -- Para DBSCAN (pontos mínimos para cluster)
56
+ max_depth INTEGER -- Para Hierarchical (profundidade máxima)
57
+ );
58
+
59
+ -- Índices para performance
60
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_id ON ml_segments(id);
61
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_cluster ON ml_segments(cluster_id);
62
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_algorithm ON ml_segments(clustering_algorithm);
63
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_created ON ml_segments(created_at);
64
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_active ON ml_segments(is_active);
65
+ CREATE INDEX IF NOT EXISTS idx_ml_segments_vertical ON ml_segments(client_vertical);
66
+
67
+ -- TABELA DE ASSOCIAÇÃO: Segment Members (Quem está em cada segmento)
68
+ CREATE TABLE IF NOT EXISTS ml_segment_members (
69
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
70
+ lead_id TEXT NOT NULL, -- ID da tabela leads
71
+ cluster_id INTEGER NOT NULL, -- ID do cluster (ml_segments.id)
72
+ clustering_algorithm TEXT NOT NULL, -- Mesmo algoritmo usado para criar o cluster
73
+ confidence REAL NOT NULL, -- 0-1 (quão perto do centroide)
74
+ distance_to_centroid REAL, -- Distância Euclidiana ao centroide
75
+ updated_at TEXT DEFAULT (datetime('now')),
76
+
77
+ -- Metadados para rastreamento
78
+ assigned_at TEXT DEFAULT (datetime('now')), -- Quando foi atribuído ao segmento
79
+ is_outlier INTEGER DEFAULT 0, -- 0 = normal, 1 = outlier (DBSCAN)
80
+ outlier_reason TEXT, -- "behavior_score too high", "unusual geo", etc.
81
+
82
+ -- Características do lead no momento da atribuição
83
+ lead_ltv REAL, -- LTV do lead no momento
84
+ lead_ltv_class REAL, -- Classe de LTV do lead
85
+ lead_behavior_score REAL, -- Behavior score no momento
86
+ lead_engagement_score REAL, -- Engagement score no momento
87
+ lead_intention_level REAL, -- Intention level no momento
88
+ lead_days_since_lead REAL, -- Dias desde lead no momento
89
+ lead_hour_of_day REAL, -- Hora do dia no momento
90
+ lead_is_weekend INTEGER, -- Se era fim de semana (0/1)
91
+ lead_is_business_hours INTEGER, -- Se era horário comercial (0/1)
92
+ lead_country TEXT, -- País do lead
93
+ lead_state TEXT, -- Estado do lead
94
+ lead_city TEXT, -- Cidade do lead
95
+ lead_utm_source TEXT, -- UTM source do lead
96
+ lead_utm_medium TEXT, -- UTM medium do lead
97
+
98
+ -- Chave composta para evitar duplicatas
99
+ UNIQUE(lead_id, cluster_id, clustering_algorithm)
100
+ );
101
+
102
+ -- Índices para performance
103
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_lead ON ml_segment_members(lead_id);
104
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_cluster ON ml_segment_members(cluster_id);
105
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_algorithm ON ml_segment_members(clustering_algorithm);
106
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_confidence ON ml_segment_members(confidence);
107
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_outlier ON ml_segment_members(is_outlier);
108
+ CREATE INDEX IF NOT EXISTS idx_ml_segment_members_assigned ON ml_segment_members(assigned_at);
109
+
110
+ -- TABELA DE HISTÓRICO DE CLUSTERING (Auditoria)
111
+ CREATE TABLE IF NOT EXISTS ml_clustering_history (
112
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
113
+ clustering_id INTEGER NOT NULL, -- ID em ml_segments
114
+ started_at TEXT NOT NULL, -- Quando iniciou o clustering
115
+ completed_at TEXT, -- Quando terminou
116
+ algorithm TEXT NOT NULL, -- 'kmeans', 'dbscan', 'hierarchical'
117
+ n_leads_processed INTEGER NOT NULL, -- Quantos leads foram processados
118
+ n_clusters_created INTEGER NOT NULL, -- Quantos clusters foram criados
119
+ total_duration_ms INTEGER NOT NULL, -- Duração total em milissegundos
120
+ workers_ai_neurons_used INTEGER, -- Neurônios usados (para billing)
121
+ status TEXT NOT NULL, -- 'completed', 'failed', 'timeout'
122
+ error_message TEXT, -- Se falhou, motivo do erro
123
+ parameters TEXT NOT NULL, -- Parâmetros usados (JSON)
124
+ results_summary TEXT, -- Resumo dos resultados (JSON)
125
+ created_at TEXT DEFAULT (datetime('now'))
126
+ );
127
+
128
+ -- Índices para auditoria
129
+ CREATE INDEX IF NOT EXISTS idx_ml_clustering_history_clustering ON ml_clustering_history(clustering_id);
130
+ CREATE INDEX IF NOT EXISTS idx_ml_clustering_history_started ON ml_clustering_history(started_at);
131
+ CREATE INDEX IF NOT EXISTS idx_ml_clustering_history_algorithm ON ml_clustering_history(algorithm);
132
+ CREATE INDEX IF NOT EXISTS idx_ml_clustering_history_status ON ml_clustering_history(status);
133
+
134
+ -- TABELA DE FEATURE IMPORTANCE (Otimização Futura)
135
+ CREATE TABLE IF NOT EXISTS ml_feature_importance (
136
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
137
+ feature_name TEXT NOT NULL, -- 'ltv', 'behavior_score', etc.
138
+ importance_score REAL NOT NULL, -- 0-1 (quão importante para clustering)
139
+ clustering_algorithm TEXT NOT NULL, -- Algoritmo usado para calcular
140
+ variance_explained REAL, -- % da variância explicada
141
+ created_at TEXT DEFAULT (datetime('now')),
142
+ updated_at TEXT DEFAULT (datetime('now'))
143
+ );
144
+
145
+ -- Índices
146
+ CREATE INDEX IF NOT EXISTS idx_ml_feature_importance_feature ON ml_feature_importance(feature_name);
147
+ CREATE INDEX IF NOT EXISTS idx_ml_feature_importance_algorithm ON ml_feature_importance(clustering_algorithm);
148
+
149
+ -- VIEW: Segmentos Ativos com Estatísticas
150
+ CREATE VIEW IF NOT EXISTS v_active_segments AS
151
+ SELECT
152
+ ms.id,
153
+ ms.cluster_id,
154
+ ms.cluster_name,
155
+ ms.clustering_algorithm,
156
+ ms.client_vertical,
157
+ ms.size,
158
+ ms.percentage,
159
+ ms.avg_ltv,
160
+ ms.avg_ltv_class,
161
+ ms.avg_behavior_score,
162
+ ms.avg_engagement_score,
163
+ ms.avg_intention_level,
164
+ ms.avg_days_since_lead,
165
+ ms.dominant_countries,
166
+ ms.dominant_states,
167
+ ms.dominant_cities,
168
+ ms.dominant_utm_sources,
169
+ ms.dominant_features,
170
+ ms.silhouette_score,
171
+ ms.cohesion,
172
+ ms.separation,
173
+ ms.inertia,
174
+ ms.action_recommendations,
175
+ ms.bid_recommendations,
176
+ ms.campaign_recommendations,
177
+ COUNT(msm.id) as member_count
178
+ FROM ml_segments ms
179
+ LEFT JOIN ml_segment_members msm ON msm.cluster_id = ms.id
180
+ WHERE ms.is_active = 1
181
+ GROUP BY ms.id
182
+ ORDER BY ms.created_at DESC;
183
+
184
+
185
+ -- VIEW: Membros de Segmentos (enriquecidos com dados de leads)
186
+ CREATE VIEW IF NOT EXISTS v_segment_members_enriched AS
187
+ SELECT
188
+ msm.id,
189
+ msm.lead_id,
190
+ msm.cluster_id,
191
+ msm.clustering_algorithm,
192
+ msm.confidence,
193
+ msm.distance_to_centroid,
194
+ msm.is_outlier,
195
+ msm.outlier_reason,
196
+ msm.assigned_at,
197
+ msm.lead_ltv,
198
+ msm.lead_ltv_class,
199
+ msm.lead_behavior_score,
200
+ msm.lead_engagement_score,
201
+ msm.lead_intention_level,
202
+ msm.lead_days_since_lead,
203
+ msm.lead_hour_of_day,
204
+ msm.lead_is_weekend,
205
+ msm.lead_is_business_hours,
206
+ l.email,
207
+ l.first_name,
208
+ l.last_name,
209
+ l.city,
210
+ l.state,
211
+ l.country,
212
+ l.utm_source,
213
+ l.utm_medium,
214
+ l.utm_campaign,
215
+ l.created_at as lead_created_at,
216
+ ms.cluster_name
217
+ FROM ml_segment_members msm
218
+ INNER JOIN leads l ON msm.lead_id = l.id
219
+ ORDER BY msm.confidence DESC, msm.assigned_at DESC;