thumbgate 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.claude-plugin/README.md +45 -34
  2. package/.claude-plugin/marketplace.json +3 -3
  3. package/.claude-plugin/plugin.json +3 -3
  4. package/.well-known/llms.txt +1 -1
  5. package/.well-known/mcp/server-card.json +1 -1
  6. package/README.md +26 -2
  7. package/adapters/README.md +4 -1
  8. package/adapters/chatgpt/INSTALL.md +39 -19
  9. package/adapters/claude/.mcp.json +2 -2
  10. package/adapters/codex/config.toml +2 -2
  11. package/adapters/mcp/server-stdio.js +10 -4
  12. package/adapters/opencode/opencode.json +1 -1
  13. package/adapters/perplexity/.mcp.json +36 -0
  14. package/adapters/perplexity/config.toml +16 -0
  15. package/adapters/perplexity/opencode.json +29 -0
  16. package/bin/cli.js +246 -90
  17. package/config/mcp-allowlists.json +11 -3
  18. package/package.json +28 -13
  19. package/plugins/claude-codex-bridge/.claude-plugin/plugin.json +1 -1
  20. package/plugins/claude-codex-bridge/.mcp.json +1 -1
  21. package/plugins/codex-profile/.codex-plugin/plugin.json +1 -1
  22. package/plugins/codex-profile/.mcp.json +1 -1
  23. package/plugins/codex-profile/INSTALL.md +1 -1
  24. package/plugins/codex-profile/README.md +1 -1
  25. package/plugins/cursor-marketplace/.cursor-plugin/plugin.json +1 -1
  26. package/plugins/opencode-profile/INSTALL.md +1 -1
  27. package/public/index.html +121 -24
  28. package/public/llm-context.md +17 -1
  29. package/scripts/ai-search-visibility.js +10 -36
  30. package/scripts/audit-trail.js +25 -15
  31. package/scripts/auto-wire-hooks.js +127 -0
  32. package/scripts/cli-demo.js +102 -0
  33. package/scripts/cli-schema.js +285 -0
  34. package/scripts/cli-status.js +166 -0
  35. package/scripts/cross-encoder-reranker.js +235 -0
  36. package/scripts/explore-subcommands.js +277 -0
  37. package/scripts/explore.js +569 -0
  38. package/scripts/feedback-loop.js +20 -6
  39. package/scripts/lesson-inference.js +27 -2
  40. package/scripts/lesson-reranker.js +263 -0
  41. package/scripts/lesson-retrieval.js +34 -17
  42. package/scripts/lesson-search.js +69 -0
  43. package/scripts/perplexity-client.js +210 -0
  44. package/scripts/perplexity-command-center.js +644 -0
  45. package/scripts/perplexity-marketing.js +17 -29
  46. package/scripts/prove-packaged-runtime.js +5 -4
  47. package/scripts/ralph-mode-ci.js +122 -19
  48. package/scripts/reflector-agent.js +2 -2
  49. package/scripts/session-analyzer.js +533 -0
  50. package/scripts/social-analytics/db/marketing-db.js +179 -0
  51. package/scripts/social-analytics/db/schema.sql +23 -0
  52. package/scripts/social-analytics/generate-instagram-card.js +31 -5
  53. package/scripts/social-analytics/generate-slides.js +268 -0
  54. package/scripts/social-analytics/post-video.js +316 -0
  55. package/scripts/social-analytics/publishers/zernio.js +52 -23
  56. package/scripts/statusline-local-stats.js +3 -1
  57. package/scripts/statusline.sh +15 -10
  58. package/scripts/thumbgate-bench.js +494 -0
  59. package/src/api/server.js +65 -1
  60. package/scripts/social-analytics/db/analytics.sqlite +0 -0
@@ -0,0 +1,533 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * Session Analyzer — reads Claude Code JSONL transcripts and extracts
6
+ * actionable intelligence: token usage, waste (duplicate reads), confusion
7
+ * signals, and auto-generated lessons for ThumbGate enforcement.
8
+ *
9
+ * Gives ThumbGate parity with Leo Godin's session analyzer plus enforcement
10
+ * integration via lesson-inference.js.
11
+ */
12
+
13
+ const fs = require('node:fs');
14
+ const path = require('node:path');
15
+ const os = require('node:os');
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // 1. JSONL Parsing
19
+ // ---------------------------------------------------------------------------
20
+
21
+ /**
22
+ * Parse a Claude Code session JSONL file into an array of event objects.
23
+ * Malformed lines are silently skipped.
24
+ * @param {string} sessionPath - absolute path to the .jsonl file
25
+ * @returns {Array<Object>}
26
+ */
27
+ function parseSessionJSONL(sessionPath) {
28
+ const raw = fs.readFileSync(sessionPath, 'utf-8');
29
+ const lines = raw.split('\n').filter((l) => l.trim().length > 0);
30
+ const events = [];
31
+ for (const line of lines) {
32
+ try {
33
+ events.push(JSON.parse(line));
34
+ } catch {
35
+ // skip malformed lines
36
+ }
37
+ }
38
+ return events;
39
+ }
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // 2. Token Usage Tracking
43
+ // ---------------------------------------------------------------------------
44
+
45
+ /**
46
+ * Extract per-turn and cumulative token usage from assistant messages.
47
+ * @param {Array<Object>} events
48
+ * @returns {{ turns: Array, totals: Object }}
49
+ */
50
+ function analyzeTokenUsage(events) {
51
+ const turns = [];
52
+ let cumulativeInput = 0;
53
+ let cumulativeOutput = 0;
54
+ let cumulativeCacheRead = 0;
55
+ let cumulativeCacheCreation = 0;
56
+
57
+ for (const event of events) {
58
+ if (event.type !== 'assistant') continue;
59
+ const usage = event.message?.usage;
60
+ if (!usage) continue;
61
+
62
+ const input = usage.input_tokens || 0;
63
+ const output = usage.output_tokens || 0;
64
+ const cacheRead = usage.cache_read_input_tokens || 0;
65
+ const cacheCreation = usage.cache_creation_input_tokens || 0;
66
+
67
+ cumulativeInput += input;
68
+ cumulativeOutput += output;
69
+ cumulativeCacheRead += cacheRead;
70
+ cumulativeCacheCreation += cacheCreation;
71
+
72
+ turns.push({
73
+ timestamp: event.timestamp || null,
74
+ input,
75
+ output,
76
+ cacheRead,
77
+ cacheCreation,
78
+ cumulativeInput,
79
+ cumulativeOutput,
80
+ });
81
+ }
82
+
83
+ return {
84
+ turns,
85
+ totals: {
86
+ input: cumulativeInput,
87
+ output: cumulativeOutput,
88
+ cacheRead: cumulativeCacheRead,
89
+ cacheCreation: cumulativeCacheCreation,
90
+ total: cumulativeInput + cumulativeOutput,
91
+ },
92
+ };
93
+ }
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // 3. Waste Detection — Duplicate File Reads
97
+ // ---------------------------------------------------------------------------
98
+
99
+ /**
100
+ * Find files read more than once in the session.
101
+ * @param {Array<Object>} events
102
+ * @returns {{ duplicateReads: Object<string, number>, wasteScore: number }}
103
+ */
104
+ function detectDuplicateReads(events) {
105
+ const readCounts = {};
106
+
107
+ for (const event of events) {
108
+ if (event.type !== 'assistant') continue;
109
+ const content = event.message?.content;
110
+ if (!Array.isArray(content)) continue;
111
+
112
+ for (const block of content) {
113
+ if (block.type === 'tool_use' && block.name === 'Read' && block.input?.file_path) {
114
+ const fp = block.input.file_path;
115
+ readCounts[fp] = (readCounts[fp] || 0) + 1;
116
+ }
117
+ }
118
+ }
119
+
120
+ const duplicateReads = {};
121
+ for (const [fp, count] of Object.entries(readCounts)) {
122
+ if (count >= 2) {
123
+ duplicateReads[fp] = count;
124
+ }
125
+ }
126
+
127
+ const totalReads = Object.values(readCounts).reduce((a, b) => a + b, 0);
128
+ const wastedReads = Object.values(duplicateReads).reduce((a, b) => a + b - 1, 0);
129
+ const wasteScore = totalReads > 0 ? Math.round((wastedReads / totalReads) * 100) : 0;
130
+
131
+ return { duplicateReads, wasteScore, totalReads, wastedReads };
132
+ }
133
+
134
+ // ---------------------------------------------------------------------------
135
+ // 4. Confusion Signal Detection
136
+ // ---------------------------------------------------------------------------
137
+
138
+ const CONFUSION_KEYWORDS = {
139
+ backtracking: ['actually', 'wait', 'wrong', 'mistake', 'let me reconsider', 'should have'],
140
+ rework: ['revert', 'undo', 'let me try', "didn't work", 'failed'],
141
+ workarounds: ['circular', 'workaround', 'hack'],
142
+ scopeCreep: ['refactor', 'restructur', 'redesign'],
143
+ };
144
+
145
+ /**
146
+ * Detect confusion signals in assistant message text.
147
+ * @param {Array<Object>} events
148
+ * @returns {Array<{ category: string, keyword: string, context: string, timestamp: string|null }>}
149
+ */
150
+ function detectConfusionSignals(events) {
151
+ const signals = [];
152
+
153
+ for (const event of events) {
154
+ for (const block of assistantTextBlocks(event)) {
155
+ signals.push(...detectConfusionInText(block.text, event.timestamp || null));
156
+ }
157
+ }
158
+
159
+ return signals;
160
+ }
161
+
162
+ function assistantTextBlocks(event) {
163
+ if (event.type !== 'assistant') return [];
164
+ const content = event.message?.content;
165
+ if (!Array.isArray(content)) return [];
166
+ return content.filter((block) => block.type === 'text' && block.text);
167
+ }
168
+
169
+ function detectConfusionInText(text, timestamp) {
170
+ const signals = [];
171
+ const lower = text.toLowerCase();
172
+
173
+ for (const [category, keywords] of Object.entries(CONFUSION_KEYWORDS)) {
174
+ for (const keyword of keywords) {
175
+ for (const idx of keywordIndexes(lower, keyword)) {
176
+ signals.push({
177
+ category,
178
+ keyword,
179
+ context: confusionContext(text, idx, keyword),
180
+ timestamp,
181
+ });
182
+ }
183
+ }
184
+ }
185
+
186
+ return signals;
187
+ }
188
+
189
+ function keywordIndexes(text, keyword) {
190
+ const indexes = [];
191
+ let idx = 0;
192
+ while ((idx = text.indexOf(keyword, idx)) !== -1) {
193
+ indexes.push(idx);
194
+ idx += keyword.length;
195
+ }
196
+ return indexes;
197
+ }
198
+
199
+ function confusionContext(text, idx, keyword) {
200
+ const start = Math.max(0, idx - 40);
201
+ const end = Math.min(text.length, idx + keyword.length + 40);
202
+ return text.slice(start, end).replaceAll('\n', ' ').trim();
203
+ }
204
+
205
+ // ---------------------------------------------------------------------------
206
+ // 5. Session Summary
207
+ // ---------------------------------------------------------------------------
208
+
209
+ /**
210
+ * Tool call counts and files touched.
211
+ * @param {Array<Object>} events
212
+ * @returns {{ toolCounts: Object, filesTouched: Set<string> }}
213
+ */
214
+ function extractToolUsage(events) {
215
+ const toolCounts = {};
216
+ const filesTouched = new Set();
217
+
218
+ for (const event of events) {
219
+ if (event.type !== 'assistant') continue;
220
+ const content = event.message?.content;
221
+ if (!Array.isArray(content)) continue;
222
+
223
+ for (const block of content) {
224
+ if (block.type !== 'tool_use') continue;
225
+ toolCounts[block.name] = (toolCounts[block.name] || 0) + 1;
226
+
227
+ if (['Read', 'Write', 'Edit'].includes(block.name) && block.input?.file_path) {
228
+ filesTouched.add(block.input.file_path);
229
+ }
230
+ }
231
+ }
232
+
233
+ return { toolCounts, filesTouched: Array.from(filesTouched) };
234
+ }
235
+
236
+ /**
237
+ * Full session summary.
238
+ * @param {string} sessionPath
239
+ * @returns {Object}
240
+ */
241
+ function sessionSummary(sessionPath) {
242
+ const events = parseSessionJSONL(sessionPath);
243
+ const tokens = analyzeTokenUsage(events);
244
+ const waste = detectDuplicateReads(events);
245
+ const confusion = detectConfusionSignals(events);
246
+ const { toolCounts, filesTouched } = extractToolUsage(events);
247
+
248
+ // Duration
249
+ const timestamps = events
250
+ .map((e) => e.timestamp)
251
+ .filter(Boolean)
252
+ .map((t) => new Date(t).getTime())
253
+ .filter((t) => Number.isFinite(t));
254
+
255
+ let durationMs = 0;
256
+ let startTime = null;
257
+ let endTime = null;
258
+ if (timestamps.length >= 2) {
259
+ startTime = new Date(Math.min(...timestamps)).toISOString();
260
+ endTime = new Date(Math.max(...timestamps)).toISOString();
261
+ durationMs = Math.max(...timestamps) - Math.min(...timestamps);
262
+ }
263
+
264
+ return {
265
+ sessionPath,
266
+ eventCount: events.length,
267
+ duration: {
268
+ ms: durationMs,
269
+ human: formatDuration(durationMs),
270
+ startTime,
271
+ endTime,
272
+ },
273
+ tokens: tokens.totals,
274
+ tokenTurns: tokens.turns.length,
275
+ toolCounts,
276
+ filesTouched,
277
+ confusionSignals: confusion.length,
278
+ confusionDetails: confusion,
279
+ waste: {
280
+ duplicateReads: waste.duplicateReads,
281
+ wasteScore: waste.wasteScore,
282
+ totalReads: waste.totalReads,
283
+ wastedReads: waste.wastedReads,
284
+ },
285
+ };
286
+ }
287
+
288
+ function formatDuration(ms) {
289
+ if (ms < 1000) return `${ms}ms`;
290
+ const seconds = Math.floor(ms / 1000);
291
+ const minutes = Math.floor(seconds / 60);
292
+ const hours = Math.floor(minutes / 60);
293
+ if (hours > 0) return `${hours}h ${minutes % 60}m`;
294
+ if (minutes > 0) return `${minutes}m ${seconds % 60}s`;
295
+ return `${seconds}s`;
296
+ }
297
+
298
+ // ---------------------------------------------------------------------------
299
+ // 6. Integration with ThumbGate Lessons
300
+ // ---------------------------------------------------------------------------
301
+
302
+ /**
303
+ * Analyze a session and create ThumbGate lessons from confusion signals.
304
+ * @param {string} sessionPath
305
+ * @returns {{ summary: Object, lessonsCreated: Array }}
306
+ */
307
+ function analyzeAndCreateLessons(sessionPath) {
308
+ const summary = sessionSummary(sessionPath);
309
+ const lessonsCreated = [];
310
+
311
+ // Group confusion signals by keyword
312
+ const keywordCounts = {};
313
+ for (const signal of summary.confusionDetails) {
314
+ const key = `${signal.category}:${signal.keyword}`;
315
+ if (!keywordCounts[key]) {
316
+ keywordCounts[key] = { category: signal.category, keyword: signal.keyword, count: 0, contexts: [] };
317
+ }
318
+ keywordCounts[key].count += 1;
319
+ if (keywordCounts[key].contexts.length < 3) {
320
+ keywordCounts[key].contexts.push(signal.context);
321
+ }
322
+ }
323
+
324
+ // Create lessons for signals that appear 2+ times
325
+ const { createLesson } = require('./lesson-inference');
326
+
327
+ for (const [, info] of Object.entries(keywordCounts)) {
328
+ if (info.count < 2) continue;
329
+
330
+ const lessonText = `AVOID: ${info.category} pattern detected — "${info.keyword}" appeared ${info.count} times. Example: "${info.contexts[0]}"`;
331
+
332
+ const lesson = createLesson({
333
+ signal: 'negative',
334
+ inferredLesson: lessonText,
335
+ triggerMessage: `Session analysis: confusion signal "${info.keyword}" (${info.category})`,
336
+ priorSummary: `Auto-detected from session transcript at ${sessionPath}`,
337
+ confidence: Math.min(90, 50 + info.count * 10),
338
+ tags: ['session-analysis', info.category, 'auto-learned'],
339
+ metadata: {
340
+ source: 'session-analyzer',
341
+ keyword: info.keyword,
342
+ occurrences: info.count,
343
+ sessionPath,
344
+ },
345
+ });
346
+
347
+ lessonsCreated.push(lesson);
348
+ }
349
+
350
+ // Create lesson for high waste score
351
+ if (summary.waste.wasteScore > 20 && Object.keys(summary.waste.duplicateReads).length > 0) {
352
+ const topDuplicates = Object.entries(summary.waste.duplicateReads)
353
+ .sort((a, b) => b[1] - a[1])
354
+ .slice(0, 3)
355
+ .map(([fp, count]) => `${path.basename(fp)} (${count}x)`)
356
+ .join(', ');
357
+
358
+ const lesson = createLesson({
359
+ signal: 'negative',
360
+ inferredLesson: `AVOID: duplicate file reads detected (waste score ${summary.waste.wasteScore}%). Top offenders: ${topDuplicates}`,
361
+ triggerMessage: 'Session analysis: duplicate Read tool calls',
362
+ priorSummary: `Auto-detected from session transcript at ${sessionPath}`,
363
+ confidence: Math.min(85, 40 + summary.waste.wasteScore),
364
+ tags: ['session-analysis', 'waste', 'duplicate-reads', 'auto-learned'],
365
+ metadata: {
366
+ source: 'session-analyzer',
367
+ wasteScore: summary.waste.wasteScore,
368
+ duplicateReads: summary.waste.duplicateReads,
369
+ sessionPath,
370
+ },
371
+ });
372
+
373
+ lessonsCreated.push(lesson);
374
+ }
375
+
376
+ return { summary, lessonsCreated };
377
+ }
378
+
379
+ // ---------------------------------------------------------------------------
380
+ // 7. Session Discovery
381
+ // ---------------------------------------------------------------------------
382
+
383
+ /**
384
+ * List recent Claude Code sessions from ~/.claude/projects/.
385
+ * @param {Object} opts
386
+ * @param {number} opts.recent - number of recent sessions to return (default 10)
387
+ * @returns {Array<{ path: string, project: string, sessionId: string, modified: Date, size: number }>}
388
+ */
389
+ function listSessions({ recent = 10 } = {}) {
390
+ const projectsDir = path.join(os.homedir(), '.claude', 'projects');
391
+ if (!fs.existsSync(projectsDir)) return [];
392
+
393
+ const sessions = [];
394
+
395
+ try {
396
+ const projectDirs = fs.readdirSync(projectsDir, { withFileTypes: true });
397
+ for (const pd of projectDirs) {
398
+ if (!pd.isDirectory()) continue;
399
+ const projectPath = path.join(projectsDir, pd.name);
400
+ try {
401
+ const files = fs.readdirSync(projectPath);
402
+ for (const file of files) {
403
+ if (!file.endsWith('.jsonl')) continue;
404
+ const fullPath = path.join(projectPath, file);
405
+ try {
406
+ const stat = fs.statSync(fullPath);
407
+ sessions.push({
408
+ path: fullPath,
409
+ project: pd.name,
410
+ sessionId: file.replace('.jsonl', ''),
411
+ modified: stat.mtime,
412
+ size: stat.size,
413
+ });
414
+ } catch {
415
+ // skip inaccessible files
416
+ }
417
+ }
418
+ } catch {
419
+ // skip inaccessible directories
420
+ }
421
+ }
422
+ } catch {
423
+ // projects dir not readable
424
+ }
425
+
426
+ sessions.sort((a, b) => b.modified - a.modified);
427
+ return sessions.slice(0, recent);
428
+ }
429
+
430
+ // ---------------------------------------------------------------------------
431
+ // 8. CLI
432
+ // ---------------------------------------------------------------------------
433
+
434
+ function runCLI() {
435
+ const args = process.argv.slice(2);
436
+ const command = args[0];
437
+
438
+ if (!command) {
439
+ console.log(`Usage:
440
+ node scripts/session-analyzer.js summary <session-path>
441
+ node scripts/session-analyzer.js tokens <session-path>
442
+ node scripts/session-analyzer.js waste <session-path>
443
+ node scripts/session-analyzer.js confusion <session-path>
444
+ node scripts/session-analyzer.js auto-learn <session-path>
445
+ node scripts/session-analyzer.js list [--recent N]`);
446
+ process.exit(1);
447
+ }
448
+
449
+ switch (command) {
450
+ case 'summary': {
451
+ const sp = args[1];
452
+ if (!sp) { console.error('Error: session path required'); process.exit(1); }
453
+ console.log(JSON.stringify(sessionSummary(sp), null, 2));
454
+ break;
455
+ }
456
+ case 'tokens': {
457
+ const sp = args[1];
458
+ if (!sp) { console.error('Error: session path required'); process.exit(1); }
459
+ const events = parseSessionJSONL(sp);
460
+ const tokens = analyzeTokenUsage(events);
461
+ console.log(JSON.stringify(tokens, null, 2));
462
+ break;
463
+ }
464
+ case 'waste': {
465
+ const sp = args[1];
466
+ if (!sp) { console.error('Error: session path required'); process.exit(1); }
467
+ const events = parseSessionJSONL(sp);
468
+ const waste = detectDuplicateReads(events);
469
+ console.log(JSON.stringify(waste, null, 2));
470
+ break;
471
+ }
472
+ case 'confusion': {
473
+ const sp = args[1];
474
+ if (!sp) { console.error('Error: session path required'); process.exit(1); }
475
+ const events = parseSessionJSONL(sp);
476
+ const confusion = detectConfusionSignals(events);
477
+ console.log(JSON.stringify(confusion, null, 2));
478
+ break;
479
+ }
480
+ case 'auto-learn': {
481
+ const sp = args[1];
482
+ if (!sp) { console.error('Error: session path required'); process.exit(1); }
483
+ const result = analyzeAndCreateLessons(sp);
484
+ console.log(JSON.stringify({
485
+ confusionSignals: result.summary.confusionSignals,
486
+ wasteScore: result.summary.waste.wasteScore,
487
+ lessonsCreated: result.lessonsCreated.length,
488
+ lessons: result.lessonsCreated.map((l) => ({ id: l.id, lesson: l.lesson })),
489
+ }, null, 2));
490
+ break;
491
+ }
492
+ case 'list': {
493
+ let recent = 10;
494
+ const recentIdx = args.indexOf('--recent');
495
+ if (recentIdx !== -1 && args[recentIdx + 1]) {
496
+ recent = Number.parseInt(args[recentIdx + 1], 10) || 10;
497
+ }
498
+ const sessions = listSessions({ recent });
499
+ console.log(JSON.stringify(sessions, null, 2));
500
+ break;
501
+ }
502
+ default:
503
+ console.error(`Unknown command: ${command}`);
504
+ process.exit(1);
505
+ }
506
+ }
507
+
508
+ function isCliEntryPoint() {
509
+ return Boolean(process.argv[1]) && path.resolve(process.argv[1]) === __filename;
510
+ }
511
+
512
+ // ---------------------------------------------------------------------------
513
+ // Exports
514
+ // ---------------------------------------------------------------------------
515
+
516
+ module.exports = {
517
+ parseSessionJSONL,
518
+ analyzeTokenUsage,
519
+ detectDuplicateReads,
520
+ detectConfusionSignals,
521
+ extractToolUsage,
522
+ sessionSummary,
523
+ analyzeAndCreateLessons,
524
+ listSessions,
525
+ formatDuration,
526
+ runCLI,
527
+ isCliEntryPoint,
528
+ CONFUSION_KEYWORDS,
529
+ };
530
+
531
+ if (isCliEntryPoint()) {
532
+ runCLI();
533
+ }
@@ -0,0 +1,179 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * marketing-db.js
5
+ * Unified marketing activity database.
6
+ *
7
+ * Tracks every post, video, article, and reply published to any platform
8
+ * so we never double-post and can measure marketing effort over time.
9
+ *
10
+ * DB file: .thumbgate/marketing-analytics.sqlite by default. The schema is
11
+ * tracked in this directory, but runtime SQLite files stay local/ignored.
12
+ *
13
+ * Usage:
14
+ * const db = require('./marketing-db');
15
+ * if (db.isDuplicate('twitter', contentHash)) return;
16
+ * const result = await publish(...);
17
+ * db.record({ type: 'post', platform: 'twitter', postUrl: result.url, contentHash, campaign: 'v1.4.1' });
18
+ */
19
+
20
+ const path = require('node:path');
21
+ const fs = require('node:fs');
22
+ const crypto = require('node:crypto');
23
+
24
+ const REPO_ROOT = path.resolve(__dirname, '..', '..', '..');
25
+
26
+ const DB_PATH = process.env.THUMBGATE_ANALYTICS_DB
27
+ ? path.resolve(process.env.THUMBGATE_ANALYTICS_DB)
28
+ : path.join(REPO_ROOT, '.thumbgate', 'marketing-analytics.sqlite');
29
+
30
+ const SCHEMA_PATH = path.join(__dirname, 'schema.sql');
31
+
32
+ let _db = null;
33
+
34
+ function getDb() {
35
+ if (_db) return _db;
36
+ const Database = require('better-sqlite3');
37
+ fs.mkdirSync(path.dirname(DB_PATH), { recursive: true });
38
+ const isNew = !fs.existsSync(DB_PATH);
39
+ _db = new Database(DB_PATH);
40
+ _db.pragma('journal_mode = WAL');
41
+ _db.pragma('foreign_keys = ON');
42
+ // Always apply schema (idempotent — CREATE IF NOT EXISTS)
43
+ const schema = fs.readFileSync(SCHEMA_PATH, 'utf8');
44
+ _db.exec(schema);
45
+ if (isNew) console.log('[marketing-db] Created new analytics DB at', DB_PATH);
46
+ return _db;
47
+ }
48
+
49
+ /**
50
+ * Hash content deterministically for dedup.
51
+ * Normalises whitespace so minor edits don't bypass dedup.
52
+ */
53
+ function hashContent(content) {
54
+ const normalised = String(content).trim().replaceAll(/\s+/g, ' ');
55
+ return crypto.createHash('sha256').update(normalised).digest('hex').slice(0, 32);
56
+ }
57
+
58
+ /**
59
+ * Check whether this content was already published to this platform.
60
+ *
61
+ * @param {string} platform e.g. 'twitter', 'linkedin', 'youtube'
62
+ * @param {string} contentHash from hashContent() or a stable identifier
63
+ * @param {number} [windowDays=7] look-back window in days (0 = all-time)
64
+ * @returns {object|null} existing row if duplicate, null otherwise
65
+ */
66
+ function isDuplicate(platform, contentHash, windowDays = 7) {
67
+ const db = getDb();
68
+ let row;
69
+ if (windowDays > 0) {
70
+ const cutoff = new Date(Date.now() - windowDays * 86_400_000).toISOString();
71
+ row = db.prepare(`
72
+ SELECT id, post_url, published_at, status FROM marketing_posts
73
+ WHERE platform = ? AND content_hash = ? AND published_at >= ? AND status = 'published'
74
+ LIMIT 1
75
+ `).get(platform, contentHash, cutoff);
76
+ } else {
77
+ row = db.prepare(`
78
+ SELECT id, post_url, published_at, status FROM marketing_posts
79
+ WHERE platform = ? AND content_hash = ? AND status = 'published'
80
+ LIMIT 1
81
+ `).get(platform, contentHash);
82
+ }
83
+ return row || null;
84
+ }
85
+
86
+ /**
87
+ * Record a marketing activity.
88
+ *
89
+ * @param {object} opts
90
+ * @param {'post'|'video'|'article'|'reply'|'thread'} opts.type
91
+ * @param {string} opts.platform e.g. 'twitter', 'youtube'
92
+ * @param {string} opts.contentHash from hashContent()
93
+ * @param {string} [opts.postUrl]
94
+ * @param {string} [opts.postId]
95
+ * @param {string} [opts.accountId]
96
+ * @param {string} [opts.title]
97
+ * @param {'published'|'failed'|'skipped'|'draft'} [opts.status='published']
98
+ * @param {string[]} [opts.tags]
99
+ * @param {string} [opts.campaign]
100
+ * @param {object} [opts.extra]
101
+ * @returns {number} inserted row id
102
+ */
103
+ function record(opts) {
104
+ const db = getDb();
105
+ const {
106
+ type, platform, contentHash,
107
+ postUrl = null, postId = null, accountId = null, title = null,
108
+ status = 'published', tags = [], campaign = null, extra = null,
109
+ } = opts;
110
+
111
+ const stmt = db.prepare(`
112
+ INSERT INTO marketing_posts
113
+ (type, platform, account_id, post_id, post_url, title,
114
+ content_hash, published_at, status, tags, campaign, extra_json)
115
+ VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
116
+ ON CONFLICT(platform, content_hash) DO UPDATE SET
117
+ post_url = excluded.post_url,
118
+ post_id = excluded.post_id,
119
+ status = excluded.status,
120
+ published_at= excluded.published_at,
121
+ extra_json = excluded.extra_json
122
+ `);
123
+
124
+ const result = stmt.run(
125
+ type, platform, accountId, postId, postUrl, title,
126
+ contentHash,
127
+ new Date().toISOString(),
128
+ status,
129
+ JSON.stringify(tags),
130
+ campaign,
131
+ extra ? JSON.stringify(extra) : null,
132
+ );
133
+ return result.lastInsertRowid;
134
+ }
135
+
136
+ /**
137
+ * List recent marketing posts, optionally filtered.
138
+ *
139
+ * @param {{ platform?: string, type?: string, campaign?: string, limit?: number, days?: number }} opts
140
+ * @returns {object[]}
141
+ */
142
+ function list({ platform, type, campaign, limit = 50, days = 30 } = {}) {
143
+ const db = getDb();
144
+ const conditions = [];
145
+ const params = [];
146
+
147
+ if (platform) { conditions.push('platform = ?'); params.push(platform); }
148
+ if (type) { conditions.push('type = ?'); params.push(type); }
149
+ if (campaign) { conditions.push('campaign = ?'); params.push(campaign); }
150
+ if (days > 0) {
151
+ conditions.push('published_at >= ?');
152
+ params.push(new Date(Date.now() - days * 86_400_000).toISOString());
153
+ }
154
+
155
+ const where = conditions.length ? `WHERE ${conditions.join(' AND ')}` : '';
156
+ params.push(limit);
157
+
158
+ return db.prepare(`
159
+ SELECT * FROM marketing_posts
160
+ ${where}
161
+ ORDER BY published_at DESC
162
+ LIMIT ?
163
+ `).all(...params);
164
+ }
165
+
166
+ /**
167
+ * Return a summary count by platform and type.
168
+ */
169
+ function summary() {
170
+ const db = getDb();
171
+ return db.prepare(`
172
+ SELECT platform, type, status, COUNT(*) as count
173
+ FROM marketing_posts
174
+ GROUP BY platform, type, status
175
+ ORDER BY count DESC
176
+ `).all();
177
+ }
178
+
179
+ module.exports = { hashContent, isDuplicate, record, list, summary, getDb };