@kaikybrofc/omnizap-system 2.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +534 -0
- package/LICENSE +21 -0
- package/README.md +431 -0
- package/RELEASE-v2.1.2.md +83 -0
- package/app/config/adminIdentity.js +87 -0
- package/app/config/baileysConfig.js +693 -0
- package/app/config/groupUtils.js +388 -0
- package/app/connection/socketController.js +992 -0
- package/app/controllers/messageController.js +354 -0
- package/app/modules/adminModule/groupCommandHandlers.js +1294 -0
- package/app/modules/adminModule/groupEventHandlers.js +355 -0
- package/app/modules/aiModule/catCommand.js +1006 -0
- package/app/modules/broadcastModule/noticeCommand.js +416 -0
- package/app/modules/gameModule/diceCommand.js +67 -0
- package/app/modules/menuModule/common.js +311 -0
- package/app/modules/menuModule/menus.js +59 -0
- package/app/modules/playModule/playCommand.js +1615 -0
- package/app/modules/quoteModule/quoteCommand.js +851 -0
- package/app/modules/rpgPokemonModule/rpgBattleCanvasRenderer.js +786 -0
- package/app/modules/rpgPokemonModule/rpgBattleService.js +2082 -0
- package/app/modules/rpgPokemonModule/rpgBattleService.test.js +760 -0
- package/app/modules/rpgPokemonModule/rpgEvolutionUtils.js +22 -0
- package/app/modules/rpgPokemonModule/rpgPokemonCommand.js +172 -0
- package/app/modules/rpgPokemonModule/rpgPokemonDomain.js +192 -0
- package/app/modules/rpgPokemonModule/rpgPokemonDomain.test.js +93 -0
- package/app/modules/rpgPokemonModule/rpgPokemonEvolution.test.js +46 -0
- package/app/modules/rpgPokemonModule/rpgPokemonMessages.js +746 -0
- package/app/modules/rpgPokemonModule/rpgPokemonRepository.js +1859 -0
- package/app/modules/rpgPokemonModule/rpgPokemonService.js +6738 -0
- package/app/modules/rpgPokemonModule/rpgProfileCanvasRenderer.js +354 -0
- package/app/modules/statsModule/globalRankingCommand.js +65 -0
- package/app/modules/statsModule/noMessageCommand.js +288 -0
- package/app/modules/statsModule/rankingCommand.js +60 -0
- package/app/modules/statsModule/rankingCommon.js +889 -0
- package/app/modules/stickerModule/addStickerMetadata.js +239 -0
- package/app/modules/stickerModule/convertToWebp.js +390 -0
- package/app/modules/stickerModule/stickerCommand.js +454 -0
- package/app/modules/stickerModule/stickerConvertCommand.js +156 -0
- package/app/modules/stickerModule/stickerTextCommand.js +657 -0
- package/app/modules/stickerPackModule/autoPackCollectorRuntime.js +20 -0
- package/app/modules/stickerPackModule/autoPackCollectorService.js +284 -0
- package/app/modules/stickerPackModule/semanticReclassificationEngine.js +466 -0
- package/app/modules/stickerPackModule/semanticReclassificationEngine.test.js +88 -0
- package/app/modules/stickerPackModule/semanticThemeClusterService.js +571 -0
- package/app/modules/stickerPackModule/stickerAssetClassificationRepository.js +449 -0
- package/app/modules/stickerPackModule/stickerAssetRepository.js +400 -0
- package/app/modules/stickerPackModule/stickerAssetReprocessQueueRepository.js +180 -0
- package/app/modules/stickerPackModule/stickerAutoPackByTagsRuntime.js +4078 -0
- package/app/modules/stickerPackModule/stickerClassificationBackgroundRuntime.js +598 -0
- package/app/modules/stickerPackModule/stickerClassificationService.js +588 -0
- package/app/modules/stickerPackModule/stickerMarketplaceDriftService.js +102 -0
- package/app/modules/stickerPackModule/stickerPackCatalogHttp.js +7506 -0
- package/app/modules/stickerPackModule/stickerPackCommandHandlers.js +1095 -0
- package/app/modules/stickerPackModule/stickerPackEngagementRepository.js +108 -0
- package/app/modules/stickerPackModule/stickerPackErrors.js +30 -0
- package/app/modules/stickerPackModule/stickerPackInteractionEventRepository.js +110 -0
- package/app/modules/stickerPackModule/stickerPackItemRepository.js +440 -0
- package/app/modules/stickerPackModule/stickerPackMarketplaceService.js +337 -0
- package/app/modules/stickerPackModule/stickerPackMessageService.js +296 -0
- package/app/modules/stickerPackModule/stickerPackRepository.js +442 -0
- package/app/modules/stickerPackModule/stickerPackService.js +788 -0
- package/app/modules/stickerPackModule/stickerPackServiceRuntime.js +51 -0
- package/app/modules/stickerPackModule/stickerPackUtils.js +97 -0
- package/app/modules/stickerPackModule/stickerStorageService.js +507 -0
- package/app/modules/stickerPackModule/stickerWorkerPipelineRuntime.js +233 -0
- package/app/modules/stickerPackModule/stickerWorkerTaskQueueRepository.js +205 -0
- package/app/modules/systemMetricsModule/pingCommand.js +421 -0
- package/app/modules/tiktokModule/tiktokCommand.js +798 -0
- package/app/modules/userModule/userCommand.js +1217 -0
- package/app/modules/waifuPicsModule/waifuPicsCommand.js +177 -0
- package/app/observability/metrics.js +734 -0
- package/app/services/captchaService.js +492 -0
- package/app/services/dbWriteQueue.js +572 -0
- package/app/services/groupMetadataService.js +279 -0
- package/app/services/lidMapService.js +663 -0
- package/app/services/messagePersistenceService.js +56 -0
- package/app/services/newsBroadcastService.js +351 -0
- package/app/services/pokeApiService.js +398 -0
- package/app/services/queueUtils.js +57 -0
- package/app/services/socketState.js +7 -0
- package/app/store/aiPromptStore.js +38 -0
- package/app/store/groupConfigStore.js +58 -0
- package/app/store/premiumUserStore.js +36 -0
- package/app/utils/antiLink/antiLinkModule.js +804 -0
- package/app/utils/http/getImageBufferModule.js +18 -0
- package/app/utils/json/jsonSanitizer.js +113 -0
- package/app/utils/json/jsonSanitizer.test.js +40 -0
- package/app/utils/logger/loggerModule.js +262 -0
- package/app/utils/systemMetrics/systemMetricsModule.js +91 -0
- package/database/index.js +2052 -0
- package/database/init.js +516 -0
- package/database/migrations/20260203_0001_sticker_packs.sql +54 -0
- package/database/migrations/20260210_0003_rpg_pokemon.sql +58 -0
- package/database/migrations/20260210_0004_rpg_shiny_biome.sql +9 -0
- package/database/migrations/20260210_0005_rpg_missions.sql +14 -0
- package/database/migrations/20260210_0006_rpg_world_pokedex_traits.sql +27 -0
- package/database/migrations/20260210_0007_rpg_raid_pvp.sql +56 -0
- package/database/migrations/20260210_0008_rpg_social_system.sql +195 -0
- package/database/migrations/20260211_0009_rpg_social_xp.sql +36 -0
- package/database/migrations/20260222_0010_remove_message_xp.sql +2 -0
- package/database/migrations/20260226_0011_sticker_asset_classification.sql +17 -0
- package/database/migrations/20260226_0012_sticker_pack_engagement.sql +16 -0
- package/database/migrations/20260226_0013_sticker_marketplace_intelligence.sql +19 -0
- package/database/migrations/20260226_0014_sticker_pack_publish_flow.sql +30 -0
- package/database/migrations/20260226_0014_sticker_worker_queues.sql +42 -0
- package/database/migrations/20260226_0015_sticker_auto_pack_curation_integrity.sql +18 -0
- package/database/migrations/20260226_0016_sticker_web_google_auth_persistence.sql +34 -0
- package/database/migrations/20260226_0017_sticker_web_admin_ban.sql +22 -0
- package/database/migrations/20260226_0018_sticker_web_admin_moderator.sql +18 -0
- package/database/migrations/20260227_0019_sticker_classification_v2_signals.sql +12 -0
- package/database/migrations/20260227_0020_semantic_theme_clusters.sql +35 -0
- package/docker-compose.yml +103 -0
- package/ecosystem.prod.config.cjs +35 -0
- package/eslint.config.js +61 -0
- package/index.js +437 -0
- package/ml/clip_classifier/Dockerfile +16 -0
- package/ml/clip_classifier/README.md +120 -0
- package/ml/clip_classifier/adaptive_scoring.py +40 -0
- package/ml/clip_classifier/classifier.py +654 -0
- package/ml/clip_classifier/embedding_store.py +481 -0
- package/ml/clip_classifier/env_loader.py +15 -0
- package/ml/clip_classifier/llm_label_expander.py +144 -0
- package/ml/clip_classifier/main.py +213 -0
- package/ml/clip_classifier/requirements.txt +10 -0
- package/ml/clip_classifier/similarity_engine.py +74 -0
- package/observability/alert-rules.yml +60 -0
- package/observability/grafana/dashboards/omnizap-mysql.json +136 -0
- package/observability/grafana/dashboards/omnizap-overview.json +170 -0
- package/observability/grafana/provisioning/dashboards/dashboards.yml +11 -0
- package/observability/grafana/provisioning/datasources/datasources.yml +15 -0
- package/observability/loki-config.yml +38 -0
- package/observability/mysql-exporter.cnf +5 -0
- package/observability/mysql-setup.sql +46 -0
- package/observability/prometheus.yml +32 -0
- package/observability/promtail-config.yml +84 -0
- package/package.json +109 -0
- package/public/api-docs/index.html +144 -0
- package/public/css/github-project-panel.css +297 -0
- package/public/css/stickers-admin.css +1272 -0
- package/public/css/styles.css +671 -0
- package/public/index.html +1311 -0
- package/public/js/apps/apiDocsApp.js +310 -0
- package/public/js/apps/createPackApp.js +2069 -0
- package/public/js/apps/homeApp.js +396 -0
- package/public/js/apps/stickersAdminApp.js +1744 -0
- package/public/js/apps/stickersApp.js +4830 -0
- package/public/js/catalog.js +1019 -0
- package/public/js/github-panel/components/CommitList.js +34 -0
- package/public/js/github-panel/components/ErrorState.js +16 -0
- package/public/js/github-panel/components/GithubProjectPanel.js +106 -0
- package/public/js/github-panel/components/ReleaseList.js +38 -0
- package/public/js/github-panel/components/SkeletonPanel.js +22 -0
- package/public/js/github-panel/components/StatCard.js +15 -0
- package/public/js/github-panel/index.js +15 -0
- package/public/js/github-panel/useGithubRepoData.js +154 -0
- package/public/js/github-panel/vendor/react.js +11 -0
- package/public/js/runtime/react-runtime.js +19 -0
- package/public/licenca/index.html +106 -0
- package/public/stickers/admin/index.html +23 -0
- package/public/stickers/create/index.html +47 -0
- package/public/stickers/index.html +48 -0
- package/public/termos-de-uso/index.html +125 -0
- package/scripts/cache-bust.mjs +107 -0
- package/scripts/deploy.sh +458 -0
- package/scripts/github-deploy-notify.mjs +174 -0
- package/scripts/release.sh +129 -0
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
|
|
5
|
+
import { executeQuery, TABLES } from '../../../database/index.js';
|
|
6
|
+
import logger from '../../utils/logger/loggerModule.js';
|
|
7
|
+
import {
|
|
8
|
+
findStickerClassificationByAssetId,
|
|
9
|
+
updateStickerClassificationSemanticCluster,
|
|
10
|
+
} from './stickerAssetClassificationRepository.js';
|
|
11
|
+
|
|
12
|
+
const parseEnvBool = (value, fallback) => {
|
|
13
|
+
if (value === undefined || value === null || value === '') return fallback;
|
|
14
|
+
const normalized = String(value).trim().toLowerCase();
|
|
15
|
+
if (['1', 'true', 'yes', 'y', 'on'].includes(normalized)) return true;
|
|
16
|
+
if (['0', 'false', 'no', 'n', 'off'].includes(normalized)) return false;
|
|
17
|
+
return fallback;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
const ENABLE_SEMANTIC_CLUSTERING = parseEnvBool(process.env.ENABLE_SEMANTIC_CLUSTERING, false);
|
|
21
|
+
const OPENAI_TIMEOUT_MS = Math.max(1_000, Number(process.env.SEMANTIC_CLUSTER_OPENAI_TIMEOUT_MS) || 10_000);
|
|
22
|
+
const EMBEDDING_MODEL = String(process.env.SEMANTIC_CLUSTER_EMBEDDING_MODEL || 'text-embedding-3-small').trim()
|
|
23
|
+
|| 'text-embedding-3-small';
|
|
24
|
+
const SLUG_MODEL = String(process.env.SEMANTIC_CLUSTER_SLUG_MODEL || 'gpt-4o-mini').trim() || 'gpt-4o-mini';
|
|
25
|
+
const SIMILARITY_THRESHOLD = Number.isFinite(Number(process.env.SEMANTIC_CLUSTER_SIMILARITY_THRESHOLD))
|
|
26
|
+
? Math.max(0.5, Math.min(0.99, Number(process.env.SEMANTIC_CLUSTER_SIMILARITY_THRESHOLD)))
|
|
27
|
+
: 0.87;
|
|
28
|
+
const MAX_CLUSTER_SCAN = Math.max(100, Math.min(20_000, Number(process.env.SEMANTIC_CLUSTER_MAX_SCAN) || 5_000));
|
|
29
|
+
const MAX_SUGGESTIONS_PER_ASSET = Math.max(1, Math.min(20, Number(process.env.SEMANTIC_CLUSTER_MAX_SUGGESTIONS_PER_ASSET) || 8));
|
|
30
|
+
const CLUSTERING_CONCURRENCY = Math.max(1, Math.min(8, Number(process.env.SEMANTIC_CLUSTER_CONCURRENCY) || 2));
|
|
31
|
+
const RESOLUTION_CACHE_TTL_MS = Math.max(5_000, Number(process.env.SEMANTIC_CLUSTER_MEMORY_CACHE_TTL_MS) || 5 * 60 * 1000);
|
|
32
|
+
const SEMANTIC_CLUSTER_REPROCESS_EXISTING = parseEnvBool(process.env.SEMANTIC_CLUSTER_REPROCESS_EXISTING, false);
|
|
33
|
+
|
|
34
|
+
let cachedClient = null;
|
|
35
|
+
const inMemorySuggestionCache = new Map();
|
|
36
|
+
const inMemoryClusterById = new Map();
|
|
37
|
+
let inMemoryClusterList = {
|
|
38
|
+
expiresAt: 0,
|
|
39
|
+
items: [],
|
|
40
|
+
};
|
|
41
|
+
let clusterListPending = null;
|
|
42
|
+
|
|
43
|
+
const pendingTasksByAssetId = new Map();
|
|
44
|
+
let queueDrainScheduled = false;
|
|
45
|
+
let queueRunning = 0;
|
|
46
|
+
|
|
47
|
+
const normalizeSuggestion = (value) =>
|
|
48
|
+
String(value || '')
|
|
49
|
+
.trim()
|
|
50
|
+
.toLowerCase()
|
|
51
|
+
.replace(/\s+/g, ' ')
|
|
52
|
+
.slice(0, 512);
|
|
53
|
+
|
|
54
|
+
const normalizeSlug = (value) => {
|
|
55
|
+
const normalized = String(value || '')
|
|
56
|
+
.trim()
|
|
57
|
+
.toLowerCase()
|
|
58
|
+
.normalize('NFD')
|
|
59
|
+
.replace(/[\u0300-\u036f]/g, '')
|
|
60
|
+
.replace(/[^a-z0-9]+/g, '_')
|
|
61
|
+
.replace(/^_+|_+$/g, '')
|
|
62
|
+
.replace(/_+/g, '_');
|
|
63
|
+
if (!normalized) return '';
|
|
64
|
+
return normalized.split('_').filter(Boolean).slice(0, 2).join('_');
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const fallbackSlugFromSuggestion = (suggestionText) => {
|
|
68
|
+
const normalized = normalizeSuggestion(suggestionText);
|
|
69
|
+
if (!normalized) return 'misc_theme';
|
|
70
|
+
const slug = normalizeSlug(normalized);
|
|
71
|
+
return slug || 'misc_theme';
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const hashSuggestion = (normalizedSuggestion) =>
|
|
75
|
+
createHash('sha256').update(String(normalizedSuggestion || ''), 'utf8').digest('hex');
|
|
76
|
+
|
|
77
|
+
const serializeEmbedding = (embedding = []) => {
|
|
78
|
+
const vector = Array.isArray(embedding) ? embedding : [];
|
|
79
|
+
const clean = vector
|
|
80
|
+
.map((value) => Number(value))
|
|
81
|
+
.filter((value) => Number.isFinite(value));
|
|
82
|
+
if (!clean.length) return { dim: 0, buffer: Buffer.alloc(0) };
|
|
83
|
+
const buffer = Buffer.allocUnsafe(clean.length * 4);
|
|
84
|
+
for (let index = 0; index < clean.length; index += 1) {
|
|
85
|
+
buffer.writeFloatLE(clean[index], index * 4);
|
|
86
|
+
}
|
|
87
|
+
return { dim: clean.length, buffer };
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const parseEmbedding = (raw, dim = 0) => {
|
|
91
|
+
if (!Buffer.isBuffer(raw) || raw.length < 4) return [];
|
|
92
|
+
const vectorSize = Math.floor(raw.length / 4);
|
|
93
|
+
const size = dim > 0 ? Math.min(dim, vectorSize) : vectorSize;
|
|
94
|
+
if (size <= 0) return [];
|
|
95
|
+
const output = new Array(size);
|
|
96
|
+
for (let index = 0; index < size; index += 1) {
|
|
97
|
+
output[index] = raw.readFloatLE(index * 4);
|
|
98
|
+
}
|
|
99
|
+
return output;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const cosineSimilarity = (left, right) => {
|
|
103
|
+
if (!Array.isArray(left) || !Array.isArray(right) || !left.length || !right.length) return 0;
|
|
104
|
+
const size = Math.min(left.length, right.length);
|
|
105
|
+
if (size <= 0) return 0;
|
|
106
|
+
|
|
107
|
+
let dot = 0;
|
|
108
|
+
let leftNorm = 0;
|
|
109
|
+
let rightNorm = 0;
|
|
110
|
+
for (let index = 0; index < size; index += 1) {
|
|
111
|
+
const leftValue = Number(left[index] || 0);
|
|
112
|
+
const rightValue = Number(right[index] || 0);
|
|
113
|
+
dot += leftValue * rightValue;
|
|
114
|
+
leftNorm += leftValue * leftValue;
|
|
115
|
+
rightNorm += rightValue * rightValue;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (leftNorm <= 0 || rightNorm <= 0) return 0;
|
|
119
|
+
return Math.max(-1, Math.min(1, dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm))));
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const resolveOpenAIClient = () => {
|
|
123
|
+
if (cachedClient) return cachedClient;
|
|
124
|
+
const apiKey = String(process.env.OPENAI_API_KEY || '').trim();
|
|
125
|
+
if (!apiKey) return null;
|
|
126
|
+
cachedClient = new OpenAI({
|
|
127
|
+
apiKey,
|
|
128
|
+
timeout: OPENAI_TIMEOUT_MS,
|
|
129
|
+
maxRetries: 0,
|
|
130
|
+
});
|
|
131
|
+
return cachedClient;
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const shouldRunSemanticClustering = () => ENABLE_SEMANTIC_CLUSTERING && Boolean(resolveOpenAIClient());
|
|
135
|
+
|
|
136
|
+
const getSuggestionCacheRow = async (normalizedSuggestion) => {
|
|
137
|
+
const normalized = normalizeSuggestion(normalizedSuggestion);
|
|
138
|
+
if (!normalized) return null;
|
|
139
|
+
|
|
140
|
+
const rows = await executeQuery(
|
|
141
|
+
`SELECT suggestion_hash, normalized_text, semantic_cluster_id, canonical_slug, embedding_dim, embedding, last_similarity
|
|
142
|
+
FROM ${TABLES.SEMANTIC_THEME_SUGGESTION_CACHE}
|
|
143
|
+
WHERE normalized_text = ?
|
|
144
|
+
LIMIT 1`,
|
|
145
|
+
[normalized],
|
|
146
|
+
);
|
|
147
|
+
const row = rows?.[0] || null;
|
|
148
|
+
if (!row) return null;
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
suggestion_hash: row.suggestion_hash,
|
|
152
|
+
normalized_text: row.normalized_text,
|
|
153
|
+
semantic_cluster_id: Number(row.semantic_cluster_id || 0) || null,
|
|
154
|
+
canonical_slug: row.canonical_slug || null,
|
|
155
|
+
embedding: parseEmbedding(row.embedding, Number(row.embedding_dim || 0)),
|
|
156
|
+
last_similarity: Number.isFinite(Number(row.last_similarity)) ? Number(row.last_similarity) : null,
|
|
157
|
+
};
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const upsertSuggestionCacheRow = async ({
|
|
161
|
+
suggestionText,
|
|
162
|
+
normalizedText,
|
|
163
|
+
semanticClusterId,
|
|
164
|
+
canonicalSlug,
|
|
165
|
+
embedding = [],
|
|
166
|
+
similarity = null,
|
|
167
|
+
}) => {
|
|
168
|
+
const normalized = normalizeSuggestion(normalizedText || suggestionText);
|
|
169
|
+
if (!normalized || !semanticClusterId) return false;
|
|
170
|
+
const suggestionHash = hashSuggestion(normalized);
|
|
171
|
+
const { dim, buffer } = serializeEmbedding(embedding);
|
|
172
|
+
if (dim <= 0 || !buffer.length) return false;
|
|
173
|
+
|
|
174
|
+
await executeQuery(
|
|
175
|
+
`INSERT INTO ${TABLES.SEMANTIC_THEME_SUGGESTION_CACHE}
|
|
176
|
+
(suggestion_hash, suggestion_text, normalized_text, semantic_cluster_id, canonical_slug, embedding_dim, embedding, last_similarity)
|
|
177
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
178
|
+
ON DUPLICATE KEY UPDATE
|
|
179
|
+
suggestion_text = VALUES(suggestion_text),
|
|
180
|
+
normalized_text = VALUES(normalized_text),
|
|
181
|
+
semantic_cluster_id = VALUES(semantic_cluster_id),
|
|
182
|
+
canonical_slug = VALUES(canonical_slug),
|
|
183
|
+
embedding_dim = VALUES(embedding_dim),
|
|
184
|
+
embedding = VALUES(embedding),
|
|
185
|
+
last_similarity = VALUES(last_similarity),
|
|
186
|
+
updated_at = CURRENT_TIMESTAMP`,
|
|
187
|
+
[
|
|
188
|
+
suggestionHash,
|
|
189
|
+
String(suggestionText || normalized).slice(0, 512),
|
|
190
|
+
normalized,
|
|
191
|
+
semanticClusterId,
|
|
192
|
+
canonicalSlug || null,
|
|
193
|
+
dim,
|
|
194
|
+
buffer,
|
|
195
|
+
similarity !== null && Number.isFinite(Number(similarity)) ? Number(Number(similarity).toFixed(6)) : null,
|
|
196
|
+
],
|
|
197
|
+
);
|
|
198
|
+
return true;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const listSemanticClusters = async () => {
|
|
202
|
+
const now = Date.now();
|
|
203
|
+
if (inMemoryClusterList.expiresAt > now && Array.isArray(inMemoryClusterList.items)) {
|
|
204
|
+
return inMemoryClusterList.items;
|
|
205
|
+
}
|
|
206
|
+
if (clusterListPending) return clusterListPending;
|
|
207
|
+
|
|
208
|
+
clusterListPending = executeQuery(
|
|
209
|
+
`SELECT id, canonical_slug, embedding_dim, embedding
|
|
210
|
+
FROM ${TABLES.SEMANTIC_THEME_CLUSTER}
|
|
211
|
+
ORDER BY id DESC
|
|
212
|
+
LIMIT ${Math.max(1, MAX_CLUSTER_SCAN)}`,
|
|
213
|
+
[],
|
|
214
|
+
)
|
|
215
|
+
.then((rows) => {
|
|
216
|
+
const parsed = (Array.isArray(rows) ? rows : [])
|
|
217
|
+
.map((row) => ({
|
|
218
|
+
id: Number(row.id || 0),
|
|
219
|
+
canonical_slug: row.canonical_slug || null,
|
|
220
|
+
embedding: parseEmbedding(row.embedding, Number(row.embedding_dim || 0)),
|
|
221
|
+
}))
|
|
222
|
+
.filter((row) => row.id > 0 && Array.isArray(row.embedding) && row.embedding.length > 0);
|
|
223
|
+
inMemoryClusterList = {
|
|
224
|
+
expiresAt: Date.now() + RESOLUTION_CACHE_TTL_MS,
|
|
225
|
+
items: parsed,
|
|
226
|
+
};
|
|
227
|
+
for (const cluster of parsed) {
|
|
228
|
+
inMemoryClusterById.set(cluster.id, cluster);
|
|
229
|
+
}
|
|
230
|
+
return parsed;
|
|
231
|
+
})
|
|
232
|
+
.finally(() => {
|
|
233
|
+
clusterListPending = null;
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
return clusterListPending;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const createSemanticCluster = async ({ canonicalSlug, embedding }) => {
|
|
240
|
+
const slug = normalizeSlug(canonicalSlug) || 'misc_theme';
|
|
241
|
+
const { dim, buffer } = serializeEmbedding(embedding);
|
|
242
|
+
if (!buffer.length || dim <= 0) {
|
|
243
|
+
throw new Error('embedding_invalid_for_cluster');
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const result = await executeQuery(
|
|
247
|
+
`INSERT INTO ${TABLES.SEMANTIC_THEME_CLUSTER}
|
|
248
|
+
(canonical_slug, embedding_dim, embedding)
|
|
249
|
+
VALUES (?, ?, ?)`,
|
|
250
|
+
[slug, dim, buffer],
|
|
251
|
+
);
|
|
252
|
+
const clusterId = Number(result?.insertId || 0);
|
|
253
|
+
if (!clusterId) {
|
|
254
|
+
throw new Error('cluster_insert_failed');
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const created = {
|
|
258
|
+
id: clusterId,
|
|
259
|
+
canonical_slug: slug,
|
|
260
|
+
embedding: Array.isArray(embedding) ? embedding : [],
|
|
261
|
+
};
|
|
262
|
+
inMemoryClusterById.set(clusterId, created);
|
|
263
|
+
inMemoryClusterList = {
|
|
264
|
+
expiresAt: 0,
|
|
265
|
+
items: [],
|
|
266
|
+
};
|
|
267
|
+
return created;
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
const generateEmbedding = async (text) => {
|
|
271
|
+
const client = resolveOpenAIClient();
|
|
272
|
+
if (!client) return null;
|
|
273
|
+
|
|
274
|
+
const response = await client.embeddings.create({
|
|
275
|
+
model: EMBEDDING_MODEL,
|
|
276
|
+
input: text,
|
|
277
|
+
});
|
|
278
|
+
const vector = response?.data?.[0]?.embedding;
|
|
279
|
+
if (!Array.isArray(vector) || !vector.length) return null;
|
|
280
|
+
const clean = vector
|
|
281
|
+
.map((value) => Number(value))
|
|
282
|
+
.filter((value) => Number.isFinite(value));
|
|
283
|
+
return clean.length ? clean : null;
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
const generateCanonicalSlug = async (suggestionText) => {
|
|
287
|
+
const fallback = fallbackSlugFromSuggestion(suggestionText);
|
|
288
|
+
const client = resolveOpenAIClient();
|
|
289
|
+
if (!client) return fallback;
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const completion = await client.chat.completions.create({
|
|
293
|
+
model: SLUG_MODEL,
|
|
294
|
+
temperature: 0,
|
|
295
|
+
max_tokens: 32,
|
|
296
|
+
response_format: { type: 'json_object' },
|
|
297
|
+
messages: [
|
|
298
|
+
{
|
|
299
|
+
role: 'system',
|
|
300
|
+
content: 'Normalize short theme phrases into a canonical 1-2 word lowercase slug with underscores. Return JSON: {"slug":"..."}',
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
role: 'user',
|
|
304
|
+
content: String(suggestionText || ''),
|
|
305
|
+
},
|
|
306
|
+
],
|
|
307
|
+
});
|
|
308
|
+
const content = String(completion?.choices?.[0]?.message?.content || '').trim();
|
|
309
|
+
if (!content) return fallback;
|
|
310
|
+
const parsed = JSON.parse(content);
|
|
311
|
+
const slug = normalizeSlug(parsed?.slug);
|
|
312
|
+
return slug || fallback;
|
|
313
|
+
} catch {
|
|
314
|
+
return fallback;
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
const resolveClusterBySimilarity = async (embedding, threshold = SIMILARITY_THRESHOLD) => {
|
|
319
|
+
const clusters = await listSemanticClusters();
|
|
320
|
+
if (!clusters.length) return null;
|
|
321
|
+
|
|
322
|
+
let best = null;
|
|
323
|
+
for (const cluster of clusters) {
|
|
324
|
+
const similarity = cosineSimilarity(embedding, cluster.embedding);
|
|
325
|
+
if (similarity < threshold) continue;
|
|
326
|
+
if (!best || similarity > best.similarity) {
|
|
327
|
+
best = {
|
|
328
|
+
id: cluster.id,
|
|
329
|
+
canonical_slug: cluster.canonical_slug,
|
|
330
|
+
similarity,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
return best;
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
const resolveSemanticCluster = async (suggestionText) => {
|
|
338
|
+
const normalizedSuggestion = normalizeSuggestion(suggestionText);
|
|
339
|
+
if (!normalizedSuggestion) return null;
|
|
340
|
+
if (!shouldRunSemanticClustering()) return null;
|
|
341
|
+
|
|
342
|
+
const memoryCached = inMemorySuggestionCache.get(normalizedSuggestion);
|
|
343
|
+
if (memoryCached && memoryCached.expiresAt > Date.now()) {
|
|
344
|
+
return memoryCached.value;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const dbCached = await getSuggestionCacheRow(normalizedSuggestion);
|
|
348
|
+
if (dbCached?.semantic_cluster_id) {
|
|
349
|
+
const payload = {
|
|
350
|
+
semantic_cluster_id: dbCached.semantic_cluster_id,
|
|
351
|
+
semantic_cluster_slug: normalizeSlug(dbCached.canonical_slug),
|
|
352
|
+
similarity: dbCached.last_similarity,
|
|
353
|
+
created: false,
|
|
354
|
+
source: 'cache',
|
|
355
|
+
suggestion: normalizedSuggestion,
|
|
356
|
+
};
|
|
357
|
+
inMemorySuggestionCache.set(normalizedSuggestion, {
|
|
358
|
+
expiresAt: Date.now() + RESOLUTION_CACHE_TTL_MS,
|
|
359
|
+
value: payload,
|
|
360
|
+
});
|
|
361
|
+
return payload;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const embedding = await generateEmbedding(normalizedSuggestion);
|
|
365
|
+
if (!embedding?.length) return null;
|
|
366
|
+
|
|
367
|
+
const matched = await resolveClusterBySimilarity(embedding, SIMILARITY_THRESHOLD);
|
|
368
|
+
if (matched?.id) {
|
|
369
|
+
const payload = {
|
|
370
|
+
semantic_cluster_id: matched.id,
|
|
371
|
+
semantic_cluster_slug: normalizeSlug(matched.canonical_slug) || null,
|
|
372
|
+
similarity: Number(matched.similarity.toFixed(6)),
|
|
373
|
+
created: false,
|
|
374
|
+
source: 'similarity',
|
|
375
|
+
suggestion: normalizedSuggestion,
|
|
376
|
+
};
|
|
377
|
+
|
|
378
|
+
await upsertSuggestionCacheRow({
|
|
379
|
+
suggestionText,
|
|
380
|
+
normalizedText: normalizedSuggestion,
|
|
381
|
+
semanticClusterId: payload.semantic_cluster_id,
|
|
382
|
+
canonicalSlug: payload.semantic_cluster_slug,
|
|
383
|
+
embedding,
|
|
384
|
+
similarity: payload.similarity,
|
|
385
|
+
});
|
|
386
|
+
inMemorySuggestionCache.set(normalizedSuggestion, {
|
|
387
|
+
expiresAt: Date.now() + RESOLUTION_CACHE_TTL_MS,
|
|
388
|
+
value: payload,
|
|
389
|
+
});
|
|
390
|
+
return payload;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
const canonicalSlug = await generateCanonicalSlug(normalizedSuggestion);
|
|
394
|
+
const createdCluster = await createSemanticCluster({
|
|
395
|
+
canonicalSlug,
|
|
396
|
+
embedding,
|
|
397
|
+
});
|
|
398
|
+
const payload = {
|
|
399
|
+
semantic_cluster_id: createdCluster.id,
|
|
400
|
+
semantic_cluster_slug: normalizeSlug(createdCluster.canonical_slug) || canonicalSlug,
|
|
401
|
+
similarity: 1,
|
|
402
|
+
created: true,
|
|
403
|
+
source: 'new_cluster',
|
|
404
|
+
suggestion: normalizedSuggestion,
|
|
405
|
+
};
|
|
406
|
+
await upsertSuggestionCacheRow({
|
|
407
|
+
suggestionText,
|
|
408
|
+
normalizedText: normalizedSuggestion,
|
|
409
|
+
semanticClusterId: payload.semantic_cluster_id,
|
|
410
|
+
canonicalSlug: payload.semantic_cluster_slug,
|
|
411
|
+
embedding,
|
|
412
|
+
similarity: 1,
|
|
413
|
+
});
|
|
414
|
+
inMemorySuggestionCache.set(normalizedSuggestion, {
|
|
415
|
+
expiresAt: Date.now() + RESOLUTION_CACHE_TTL_MS,
|
|
416
|
+
value: payload,
|
|
417
|
+
});
|
|
418
|
+
return payload;
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
const pickPrimaryCluster = (matches) => {
|
|
422
|
+
if (!Array.isArray(matches) || !matches.length) return null;
|
|
423
|
+
const tally = new Map();
|
|
424
|
+
for (const match of matches) {
|
|
425
|
+
const id = Number(match?.semantic_cluster_id || 0);
|
|
426
|
+
if (!id) continue;
|
|
427
|
+
const current = tally.get(id) || {
|
|
428
|
+
semantic_cluster_id: id,
|
|
429
|
+
semantic_cluster_slug: normalizeSlug(match?.semantic_cluster_slug || '') || null,
|
|
430
|
+
count: 0,
|
|
431
|
+
best_similarity: -1,
|
|
432
|
+
};
|
|
433
|
+
current.count += 1;
|
|
434
|
+
current.best_similarity = Math.max(current.best_similarity, Number(match?.similarity || 0));
|
|
435
|
+
if (!current.semantic_cluster_slug && match?.semantic_cluster_slug) {
|
|
436
|
+
current.semantic_cluster_slug = normalizeSlug(match.semantic_cluster_slug);
|
|
437
|
+
}
|
|
438
|
+
tally.set(id, current);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const ranked = Array.from(tally.values()).sort((left, right) => {
|
|
442
|
+
if (right.count !== left.count) return right.count - left.count;
|
|
443
|
+
if (right.best_similarity !== left.best_similarity) return right.best_similarity - left.best_similarity;
|
|
444
|
+
return left.semantic_cluster_id - right.semantic_cluster_id;
|
|
445
|
+
});
|
|
446
|
+
return ranked[0] || null;
|
|
447
|
+
};
|
|
448
|
+
|
|
449
|
+
const sanitizeSuggestions = (values = []) => {
|
|
450
|
+
const suggestions = [];
|
|
451
|
+
const seen = new Set();
|
|
452
|
+
for (const value of Array.isArray(values) ? values : []) {
|
|
453
|
+
const normalized = normalizeSuggestion(value);
|
|
454
|
+
if (!normalized || seen.has(normalized)) continue;
|
|
455
|
+
seen.add(normalized);
|
|
456
|
+
suggestions.push(normalized);
|
|
457
|
+
if (suggestions.length >= MAX_SUGGESTIONS_PER_ASSET) break;
|
|
458
|
+
}
|
|
459
|
+
return suggestions;
|
|
460
|
+
};
|
|
461
|
+
|
|
462
|
+
const resolveSuggestionsToPrimaryCluster = async ({ suggestions = [], fallbackText = '' } = {}) => {
|
|
463
|
+
const normalizedSuggestions = sanitizeSuggestions(suggestions);
|
|
464
|
+
if (!normalizedSuggestions.length && fallbackText) {
|
|
465
|
+
normalizedSuggestions.push(normalizeSuggestion(fallbackText));
|
|
466
|
+
}
|
|
467
|
+
if (!normalizedSuggestions.length) return null;
|
|
468
|
+
|
|
469
|
+
const matches = [];
|
|
470
|
+
for (const suggestion of normalizedSuggestions) {
|
|
471
|
+
const resolved = await resolveSemanticCluster(suggestion);
|
|
472
|
+
if (resolved?.semantic_cluster_id) {
|
|
473
|
+
matches.push(resolved);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
if (!matches.length) return null;
|
|
477
|
+
|
|
478
|
+
const primary = pickPrimaryCluster(matches);
|
|
479
|
+
if (!primary?.semantic_cluster_id) return null;
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
semantic_cluster_id: primary.semantic_cluster_id,
|
|
483
|
+
semantic_cluster_slug: primary.semantic_cluster_slug || fallbackSlugFromSuggestion(normalizedSuggestions[0] || ''),
|
|
484
|
+
matches,
|
|
485
|
+
};
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
const scheduleQueueDrain = () => {
|
|
489
|
+
if (queueDrainScheduled) return;
|
|
490
|
+
queueDrainScheduled = true;
|
|
491
|
+
setImmediate(() => {
|
|
492
|
+
queueDrainScheduled = false;
|
|
493
|
+
void drainSemanticClusterQueue();
|
|
494
|
+
});
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
const processSemanticClusterTask = async (task) => {
|
|
498
|
+
const assetId = String(task?.assetId || '').trim();
|
|
499
|
+
if (!assetId || !shouldRunSemanticClustering()) return;
|
|
500
|
+
|
|
501
|
+
try {
|
|
502
|
+
const current = await findStickerClassificationByAssetId(assetId);
|
|
503
|
+
if (!current) return;
|
|
504
|
+
if (current.semantic_cluster_id && !SEMANTIC_CLUSTER_REPROCESS_EXISTING && !task?.force) {
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
const result = await resolveSuggestionsToPrimaryCluster({
|
|
509
|
+
suggestions: task?.suggestions || current.llm_pack_suggestions || [],
|
|
510
|
+
fallbackText: task?.fallbackText || current.category || '',
|
|
511
|
+
});
|
|
512
|
+
if (!result?.semantic_cluster_id) return;
|
|
513
|
+
|
|
514
|
+
await updateStickerClassificationSemanticCluster(assetId, {
|
|
515
|
+
semanticClusterId: result.semantic_cluster_id,
|
|
516
|
+
semanticClusterSlug: result.semantic_cluster_slug,
|
|
517
|
+
});
|
|
518
|
+
} catch (error) {
|
|
519
|
+
logger.warn('Falha ao processar clusterização semântica de sugestão LLM.', {
|
|
520
|
+
action: 'semantic_theme_cluster_task_failed',
|
|
521
|
+
asset_id: assetId,
|
|
522
|
+
error: error?.message,
|
|
523
|
+
});
|
|
524
|
+
}
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
const drainSemanticClusterQueue = async () => {
|
|
528
|
+
while (queueRunning < CLUSTERING_CONCURRENCY && pendingTasksByAssetId.size > 0) {
|
|
529
|
+
const firstEntry = pendingTasksByAssetId.entries().next().value;
|
|
530
|
+
if (!firstEntry) break;
|
|
531
|
+
const [assetId, task] = firstEntry;
|
|
532
|
+
pendingTasksByAssetId.delete(assetId);
|
|
533
|
+
queueRunning += 1;
|
|
534
|
+
void processSemanticClusterTask(task)
|
|
535
|
+
.catch(() => {})
|
|
536
|
+
.finally(() => {
|
|
537
|
+
queueRunning = Math.max(0, queueRunning - 1);
|
|
538
|
+
scheduleQueueDrain();
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
};
|
|
542
|
+
|
|
543
|
+
export const enqueueSemanticClusterResolution = ({
|
|
544
|
+
assetId,
|
|
545
|
+
suggestions = [],
|
|
546
|
+
fallbackText = '',
|
|
547
|
+
force = false,
|
|
548
|
+
} = {}) => {
|
|
549
|
+
const normalizedAssetId = String(assetId || '').trim();
|
|
550
|
+
if (!normalizedAssetId || !ENABLE_SEMANTIC_CLUSTERING) return false;
|
|
551
|
+
|
|
552
|
+
pendingTasksByAssetId.set(normalizedAssetId, {
|
|
553
|
+
assetId: normalizedAssetId,
|
|
554
|
+
suggestions: sanitizeSuggestions(suggestions),
|
|
555
|
+
fallbackText: String(fallbackText || '').trim().slice(0, 255),
|
|
556
|
+
force: Boolean(force),
|
|
557
|
+
});
|
|
558
|
+
scheduleQueueDrain();
|
|
559
|
+
return true;
|
|
560
|
+
};
|
|
561
|
+
|
|
562
|
+
export const semanticClusterConfig = {
|
|
563
|
+
enabled: ENABLE_SEMANTIC_CLUSTERING,
|
|
564
|
+
similarity_threshold: SIMILARITY_THRESHOLD,
|
|
565
|
+
embedding_model: EMBEDDING_MODEL,
|
|
566
|
+
slug_model: SLUG_MODEL,
|
|
567
|
+
max_cluster_scan: MAX_CLUSTER_SCAN,
|
|
568
|
+
queue_concurrency: CLUSTERING_CONCURRENCY,
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
export const isSemanticClusteringEnabled = () => ENABLE_SEMANTIC_CLUSTERING;
|