superbrain-server 1.0.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/bin/superbrain.js +196 -0
  2. package/package.json +23 -0
  3. package/payload/.dockerignore +45 -0
  4. package/payload/.env.example +58 -0
  5. package/payload/Dockerfile +73 -0
  6. package/payload/analyzers/__init__.py +0 -0
  7. package/payload/analyzers/audio_transcribe.py +225 -0
  8. package/payload/analyzers/caption.py +244 -0
  9. package/payload/analyzers/music_identifier.py +346 -0
  10. package/payload/analyzers/text_analyzer.py +117 -0
  11. package/payload/analyzers/visual_analyze.py +218 -0
  12. package/payload/analyzers/webpage_analyzer.py +789 -0
  13. package/payload/analyzers/youtube_analyzer.py +320 -0
  14. package/payload/api.py +1676 -0
  15. package/payload/config/.api_keys.example +22 -0
  16. package/payload/config/model_rankings.json +492 -0
  17. package/payload/config/openrouter_free_models.json +1364 -0
  18. package/payload/config/whisper_model.txt +1 -0
  19. package/payload/config_settings.py +185 -0
  20. package/payload/core/__init__.py +0 -0
  21. package/payload/core/category_manager.py +219 -0
  22. package/payload/core/database.py +811 -0
  23. package/payload/core/link_checker.py +300 -0
  24. package/payload/core/model_router.py +1253 -0
  25. package/payload/docker-compose.yml +120 -0
  26. package/payload/instagram/__init__.py +0 -0
  27. package/payload/instagram/instagram_downloader.py +253 -0
  28. package/payload/instagram/instagram_login.py +190 -0
  29. package/payload/main.py +912 -0
  30. package/payload/requirements.txt +39 -0
  31. package/payload/reset.py +311 -0
  32. package/payload/start-docker-prod.sh +125 -0
  33. package/payload/start-docker.sh +56 -0
  34. package/payload/start.py +1302 -0
  35. package/payload/static/favicon.ico +0 -0
  36. package/payload/stop-docker.sh +16 -0
  37. package/payload/utils/__init__.py +0 -0
  38. package/payload/utils/db_stats.py +108 -0
  39. package/payload/utils/manage_token.py +91 -0
@@ -0,0 +1,811 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SQLite Database Manager for SuperBrain
4
+ Handles caching and retrieval of Instagram analysis results
5
+ Self-hosted, zero-config, file-based database
6
+ """
7
+
8
+ import sqlite3
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ from datetime import datetime
13
+
14
+ # Database file path can be overridden for Docker deployments
15
+ DB_PATH = Path(os.getenv("DATABASE_PATH", str(Path(__file__).resolve().parent.parent / 'superbrain.db')))
16
+
17
+
18
+ class Database:
19
+ """SQLite database manager with caching functionality"""
20
+
21
+ def __init__(self):
22
+ self.db_path = DB_PATH
23
+ self._conn = None
24
+ self._connect()
25
+
26
+ def _connect(self):
27
+ try:
28
+ self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
29
+ self._conn.row_factory = sqlite3.Row
30
+ # WAL mode for better concurrent read performance
31
+ self._conn.execute("PRAGMA journal_mode=WAL")
32
+ self._conn.execute("PRAGMA foreign_keys=ON")
33
+ self._create_tables()
34
+ print(f"[OK] Connected to SQLite database: {self.db_path}")
35
+ except Exception as e:
36
+ print(f"[WARNING] SQLite connection failed: {e}")
37
+ self._conn = None
38
+
39
+ def _create_tables(self):
40
+ self._conn.executescript("""
41
+ CREATE TABLE IF NOT EXISTS analyses (
42
+ shortcode TEXT PRIMARY KEY,
43
+ url TEXT,
44
+ username TEXT,
45
+ content_type TEXT DEFAULT 'instagram',
46
+ analyzed_at TEXT,
47
+ updated_at TEXT,
48
+ post_date TEXT,
49
+ likes INTEGER DEFAULT 0,
50
+ thumbnail TEXT DEFAULT '',
51
+ title TEXT,
52
+ summary TEXT,
53
+ tags TEXT,
54
+ music TEXT,
55
+ category TEXT,
56
+ visual_analysis TEXT,
57
+ audio_transcription TEXT,
58
+ text_analysis TEXT
59
+ );
60
+
61
+ CREATE TABLE IF NOT EXISTS processing_queue (
62
+ shortcode TEXT PRIMARY KEY,
63
+ url TEXT,
64
+ status TEXT DEFAULT 'queued',
65
+ position INTEGER,
66
+ added_at TEXT,
67
+ started_at TEXT,
68
+ updated_at TEXT
69
+ );
70
+
71
+ CREATE INDEX IF NOT EXISTS idx_analyses_category ON analyses (category);
72
+ CREATE INDEX IF NOT EXISTS idx_analyses_analyzed_at ON analyses (analyzed_at DESC);
73
+ CREATE INDEX IF NOT EXISTS idx_queue_status ON processing_queue (status);
74
+ CREATE INDEX IF NOT EXISTS idx_queue_position ON processing_queue (position);
75
+ """)
76
+ self._conn.commit()
77
+
78
+ # Migration: add content_type to databases that predate this column
79
+ try:
80
+ self._conn.execute("ALTER TABLE analyses ADD COLUMN content_type TEXT DEFAULT 'instagram'")
81
+ self._conn.commit()
82
+ except sqlite3.OperationalError:
83
+ pass # Column already exists – expected on most runs
84
+
85
+ # Migration: add thumbnail column
86
+ try:
87
+ self._conn.execute("ALTER TABLE analyses ADD COLUMN thumbnail TEXT DEFAULT ''")
88
+ self._conn.commit()
89
+ except sqlite3.OperationalError:
90
+ pass
91
+
92
+ # Migration: add retry columns to processing_queue
93
+ for _col, _dflt in [
94
+ ("retry_after", "TEXT"),
95
+ ("attempts", "INTEGER DEFAULT 0"),
96
+ ("reason", "TEXT"),
97
+ ("content_type", "TEXT"),
98
+ ]:
99
+ try:
100
+ self._conn.execute(
101
+ f"ALTER TABLE processing_queue ADD COLUMN {_col} {_dflt}"
102
+ )
103
+ self._conn.commit()
104
+ except sqlite3.OperationalError:
105
+ pass # already exists
106
+
107
+ try:
108
+ self._conn.execute(
109
+ "CREATE INDEX IF NOT EXISTS idx_queue_retry ON processing_queue (status, retry_after)"
110
+ )
111
+ self._conn.commit()
112
+ except sqlite3.OperationalError:
113
+ pass
114
+
115
+ # Create content_type index only after the column is guaranteed to exist
116
+ try:
117
+ self._conn.execute(
118
+ "CREATE INDEX IF NOT EXISTS idx_analyses_content_type ON analyses (content_type)"
119
+ )
120
+ self._conn.commit()
121
+ except sqlite3.OperationalError:
122
+ pass
123
+
124
+ # Migration: add is_hidden for soft-delete support
125
+ try:
126
+ self._conn.execute("ALTER TABLE analyses ADD COLUMN is_hidden INTEGER DEFAULT 0")
127
+ self._conn.commit()
128
+ except sqlite3.OperationalError:
129
+ pass
130
+
131
+ # Collections table
132
+ self._conn.executescript("""
133
+ CREATE TABLE IF NOT EXISTS collections (
134
+ id TEXT PRIMARY KEY,
135
+ name TEXT NOT NULL,
136
+ icon TEXT DEFAULT '📁',
137
+ post_ids TEXT DEFAULT '[]',
138
+ created_at TEXT,
139
+ updated_at TEXT
140
+ );
141
+ """)
142
+ self._conn.commit()
143
+ # Seed default Watch Later if missing
144
+ cur = self._conn.cursor()
145
+ cur.execute("SELECT id FROM collections WHERE id = 'default_watch_later'")
146
+ if cur.fetchone() is None:
147
+ now = datetime.utcnow().isoformat()
148
+ self._conn.execute(
149
+ "INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
150
+ ('default_watch_later', 'Watch Later', 'time', '[]', now, now)
151
+ )
152
+ self._conn.commit()
153
+
154
+ # Migration: normalize Watch Later icon to Ionicons-safe name
155
+ try:
156
+ self._conn.execute(
157
+ "UPDATE collections SET icon = 'time' WHERE id = 'default_watch_later' AND (icon IS NULL OR icon = '' OR icon = '⏰' OR icon = 'clock')"
158
+ )
159
+ self._conn.commit()
160
+ except sqlite3.OperationalError:
161
+ pass
162
+
163
+ # ------------------------------------------------------------------
164
+ # Helpers
165
+ # ------------------------------------------------------------------
166
+
167
+ def _row_to_dict(self, row):
168
+ if row is None:
169
+ return None
170
+ d = dict(row)
171
+ if d.get('tags'):
172
+ try:
173
+ d['tags'] = json.loads(d['tags'])
174
+ except Exception:
175
+ d['tags'] = []
176
+ else:
177
+ d['tags'] = []
178
+ return d
179
+
180
+ # ------------------------------------------------------------------
181
+ # Connection
182
+ # ------------------------------------------------------------------
183
+
184
+ def is_connected(self):
185
+ return self._conn is not None
186
+
187
+ # ------------------------------------------------------------------
188
+ # Cache / Analyses
189
+ # ------------------------------------------------------------------
190
+
191
+ def check_cache(self, shortcode):
192
+ """Return cached analysis dict or None."""
193
+ if not self.is_connected():
194
+ return None
195
+ try:
196
+ cur = self._conn.cursor()
197
+ cur.execute("SELECT * FROM analyses WHERE shortcode = ?", (shortcode,))
198
+ return self._row_to_dict(cur.fetchone())
199
+ except Exception as e:
200
+ print(f"[WARNING] Cache lookup error: {e}")
201
+ return None
202
+
203
+ def save_analysis(self, shortcode, url, username, title, summary, tags, music, category,
204
+ visual_analysis="", audio_transcription="", text_analysis="",
205
+ likes=0, post_date=None, content_type="instagram", thumbnail=""):
206
+ """Insert or update an analysis record. Returns True on success."""
207
+ if not self.is_connected():
208
+ print("[WARNING] Database not connected. Analysis not saved.")
209
+ return False
210
+ try:
211
+ print(f"📝 Saving to database with shortcode: {shortcode}")
212
+ now = datetime.utcnow().isoformat()
213
+ tags_json = json.dumps(tags if isinstance(tags, list) else tags.split())
214
+
215
+ self._conn.execute("""
216
+ INSERT INTO analyses
217
+ (shortcode, url, username, content_type, analyzed_at, updated_at, post_date, likes,
218
+ thumbnail, title, summary, tags, music, category,
219
+ visual_analysis, audio_transcription, text_analysis)
220
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
221
+ ON CONFLICT(shortcode) DO UPDATE SET
222
+ url = excluded.url,
223
+ username = excluded.username,
224
+ content_type = excluded.content_type,
225
+ updated_at = excluded.updated_at,
226
+ post_date = excluded.post_date,
227
+ likes = excluded.likes,
228
+ thumbnail = excluded.thumbnail,
229
+ title = excluded.title,
230
+ summary = excluded.summary,
231
+ tags = excluded.tags,
232
+ music = excluded.music,
233
+ category = excluded.category,
234
+ visual_analysis = excluded.visual_analysis,
235
+ audio_transcription = excluded.audio_transcription,
236
+ text_analysis = excluded.text_analysis
237
+ """, (shortcode, url, username, content_type, now, now, post_date, likes,
238
+ thumbnail, title, summary, tags_json, music, category,
239
+ visual_analysis, audio_transcription, text_analysis))
240
+ self._conn.commit()
241
+ print(f"[OK] Analysis saved to database ({shortcode})")
242
+ return True
243
+ except Exception as e:
244
+ print(f"[WARNING] Error saving to database: {e}")
245
+ import traceback
246
+ traceback.print_exc()
247
+ return False
248
+
249
+ def get_recent(self, limit=10):
250
+ """Return the most recently analysed posts (excludes soft-deleted)."""
251
+ if not self.is_connected():
252
+ return []
253
+ try:
254
+ cur = self._conn.cursor()
255
+ cur.execute(
256
+ "SELECT * FROM analyses WHERE (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?", (limit,)
257
+ )
258
+ return [self._row_to_dict(r) for r in cur.fetchall()]
259
+ except Exception as e:
260
+ print(f"[WARNING] Error retrieving recent: {e}")
261
+ return []
262
+
263
+ def get_by_category(self, category, limit=20):
264
+ """Return all analyses for a given category (excludes soft-deleted)."""
265
+ if not self.is_connected():
266
+ return []
267
+ try:
268
+ cur = self._conn.cursor()
269
+ cur.execute(
270
+ "SELECT * FROM analyses WHERE category = ? AND (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?",
271
+ (category, limit)
272
+ )
273
+ return [self._row_to_dict(r) for r in cur.fetchall()]
274
+ except Exception as e:
275
+ print(f"[WARNING] Error retrieving by category: {e}")
276
+ return []
277
+
278
+ def search_tags(self, tags, limit=20):
279
+ """
280
+ Search analyses by one or more tags (case-insensitive substring match
281
+ against the JSON-encoded tags column).
282
+
283
+ Args:
284
+ tags: str or list[str]
285
+ limit: int
286
+ """
287
+ if not self.is_connected():
288
+ return []
289
+ try:
290
+ if isinstance(tags, str):
291
+ tags = [tags]
292
+ cur = self._conn.cursor()
293
+ conditions = " OR ".join(["LOWER(tags) LIKE ?" for _ in tags])
294
+ params = [f"%{t.lower()}%" for t in tags] + [limit]
295
+ cur.execute(
296
+ f"SELECT * FROM analyses WHERE ({conditions}) AND (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?",
297
+ params
298
+ )
299
+ return [self._row_to_dict(r) for r in cur.fetchall()]
300
+ except Exception as e:
301
+ print(f"[WARNING] Error searching tags: {e}")
302
+ return []
303
+
304
+ def get_stats(self):
305
+ """Return basic statistics about the database."""
306
+ if not self.is_connected():
307
+ return {
308
+ "document_count": 0,
309
+ "total_posts": 0,
310
+ "total_collections": 0,
311
+ "storage_mb": 0,
312
+ "categories": {},
313
+ "capacity_used": "N/A",
314
+ }
315
+ try:
316
+ cur = self._conn.cursor()
317
+
318
+ cur.execute("SELECT COUNT(*) FROM analyses")
319
+ total = cur.fetchone()[0]
320
+
321
+ cur.execute("SELECT COUNT(*) FROM collections")
322
+ total_collections = cur.fetchone()[0]
323
+
324
+ cur.execute(
325
+ "SELECT COALESCE(category,'Uncategorized') as cat, COUNT(*) as cnt "
326
+ "FROM analyses GROUP BY cat"
327
+ )
328
+ category_counts = {r["cat"]: r["cnt"] for r in cur.fetchall()}
329
+
330
+ storage_bytes = self.db_path.stat().st_size if self.db_path.exists() else 0
331
+ storage_mb = round(storage_bytes / (1024 * 1024), 2)
332
+
333
+ return {
334
+ "document_count": total,
335
+ "total_posts": total,
336
+ "total_collections": total_collections,
337
+ "storage_mb": storage_mb,
338
+ "categories": category_counts,
339
+ "capacity_used": "N/A (local SQLite)"
340
+ }
341
+ except Exception as e:
342
+ print(f"[WARNING] Error getting stats: {e}")
343
+ return {
344
+ "document_count": 0,
345
+ "total_posts": 0,
346
+ "total_collections": 0,
347
+ "storage_mb": 0,
348
+ "categories": {},
349
+ "capacity_used": "N/A",
350
+ }
351
+
352
+ def get_all_posts(self, limit: int = 50000, offset: int = 0) -> list:
353
+ """Return all posts for export (excludes soft-deleted)."""
354
+ if not self.is_connected():
355
+ return []
356
+ try:
357
+ cur = self._conn.cursor()
358
+ cur.execute(
359
+ "SELECT * FROM analyses WHERE (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ? OFFSET ?",
360
+ (limit, offset)
361
+ )
362
+ return [self._row_to_dict(r) for r in cur.fetchall()]
363
+ except Exception as e:
364
+ print(f"[WARNING] Error getting all posts for export: {e}")
365
+ return []
366
+
367
+ def get_all_collections(self) -> list:
368
+ """Return all collections for export."""
369
+ if not self.is_connected():
370
+ return []
371
+ try:
372
+ cur = self._conn.cursor()
373
+ cur.execute("SELECT * FROM collections ORDER BY created_at DESC")
374
+ return [self._row_to_dict(r) for r in cur.fetchall()]
375
+ except Exception as e:
376
+ print(f"[WARNING] Error getting collections for export: {e}")
377
+ return []
378
+
379
+ def close(self):
380
+ if self._conn:
381
+ self._conn.close()
382
+ self._conn = None
383
+
384
+ # ==================== RETRY QUEUE ====================
385
+
386
+ def queue_for_retry(self, shortcode: str, url: str, content_type: str,
387
+ reason: str, retry_hours: float = 24.0) -> bool:
388
+ """
389
+ Schedule an item to be retried after `retry_hours` from now.
390
+ Sets status='retry' and populates retry_after, reason, content_type.
391
+ Returns True on success.
392
+ """
393
+ if not self.is_connected():
394
+ return False
395
+ try:
396
+ from datetime import timezone, timedelta
397
+ now = datetime.utcnow()
398
+ retry_at = (now + timedelta(hours=retry_hours)).isoformat()
399
+ now_str = now.isoformat()
400
+
401
+ # Get current attempts count
402
+ cur = self._conn.cursor()
403
+ cur.execute(
404
+ "SELECT attempts FROM processing_queue WHERE shortcode = ?", (shortcode,)
405
+ )
406
+ row = cur.fetchone()
407
+ attempts = (row["attempts"] or 0) + 1 if row else 1
408
+
409
+ self._conn.execute("""
410
+ INSERT INTO processing_queue
411
+ (shortcode, url, content_type, status, position,
412
+ added_at, updated_at, retry_after, attempts, reason)
413
+ VALUES (?, ?, ?, 'retry', 0, ?, ?, ?, ?, ?)
414
+ ON CONFLICT(shortcode) DO UPDATE SET
415
+ url = excluded.url,
416
+ content_type = excluded.content_type,
417
+ status = 'retry',
418
+ updated_at = excluded.updated_at,
419
+ retry_after = excluded.retry_after,
420
+ attempts = excluded.attempts,
421
+ reason = excluded.reason
422
+ """, (shortcode, url, content_type, now_str, now_str,
423
+ retry_at, attempts, reason))
424
+ self._conn.commit()
425
+ print(f"⏰ Queued for retry in {retry_hours:.0f}h: {shortcode} ({reason})")
426
+ return True
427
+ except Exception as e:
428
+ print(f"[WARNING] Error queuing for retry: {e}")
429
+ return False
430
+
431
+ def get_retry_ready(self):
432
+ """Return retry items whose retry_after time has passed."""
433
+ if not self.is_connected():
434
+ return []
435
+ try:
436
+ now = datetime.utcnow().isoformat()
437
+ cur = self._conn.cursor()
438
+ cur.execute("""
439
+ SELECT shortcode, url, content_type, reason, attempts, retry_after
440
+ FROM processing_queue
441
+ WHERE status = 'retry' AND retry_after <= ?
442
+ ORDER BY retry_after
443
+ """, (now,))
444
+ return [
445
+ {
446
+ "shortcode": r["shortcode"],
447
+ "url": r["url"],
448
+ "content_type": r["content_type"],
449
+ "reason": r["reason"],
450
+ "attempts": r["attempts"],
451
+ "retry_after": r["retry_after"],
452
+ }
453
+ for r in cur.fetchall()
454
+ ]
455
+ except Exception as e:
456
+ print(f"[WARNING] Error getting retry-ready items: {e}")
457
+ return []
458
+
459
+ def get_retry_queue(self):
460
+ """Return all items currently awaiting retry (status='retry')."""
461
+ if not self.is_connected():
462
+ return []
463
+ try:
464
+ cur = self._conn.cursor()
465
+ cur.execute("""
466
+ SELECT shortcode, url, content_type, reason, attempts,
467
+ retry_after, added_at
468
+ FROM processing_queue
469
+ WHERE status = 'retry'
470
+ ORDER BY retry_after
471
+ """)
472
+ return [dict(r) for r in cur.fetchall()]
473
+ except Exception as e:
474
+ print(f"[WARNING] Error getting retry queue: {e}")
475
+ return []
476
+
477
+ # ==================== QUEUE MANAGEMENT ====================
478
+
479
+ def add_to_queue(self, shortcode, url):
480
+ """Add item to processing queue. Returns queue position (1-based), or -1 on error."""
481
+ if not self.is_connected():
482
+ return -1
483
+ try:
484
+ cur = self._conn.cursor()
485
+ cur.execute(
486
+ "SELECT status, position FROM processing_queue WHERE shortcode = ?", (shortcode,)
487
+ )
488
+ existing = cur.fetchone()
489
+ if existing:
490
+ if existing["status"] == "queued":
491
+ return existing["position"]
492
+ if existing["status"] == "processing":
493
+ return 0
494
+
495
+ cur.execute(
496
+ "SELECT MAX(position) FROM processing_queue WHERE status = 'queued'"
497
+ )
498
+ row = cur.fetchone()
499
+ position = (row[0] + 1) if row[0] is not None else 1
500
+
501
+ now = datetime.utcnow().isoformat()
502
+ self._conn.execute("""
503
+ INSERT INTO processing_queue (shortcode, url, status, position, added_at, updated_at)
504
+ VALUES (?, ?, 'queued', ?, ?, ?)
505
+ ON CONFLICT(shortcode) DO UPDATE SET
506
+ url = excluded.url,
507
+ status = 'queued',
508
+ position = excluded.position,
509
+ updated_at = excluded.updated_at
510
+ """, (shortcode, url, position, now, now))
511
+ self._conn.commit()
512
+ return position
513
+ except Exception as e:
514
+ print(f"[WARNING] Error adding to queue: {e}")
515
+ return -1
516
+
517
+ def get_queue(self):
518
+ """Return list of queued items ordered by position."""
519
+ if not self.is_connected():
520
+ return []
521
+ try:
522
+ cur = self._conn.cursor()
523
+ cur.execute(
524
+ "SELECT shortcode, url, position FROM processing_queue "
525
+ "WHERE status = 'queued' ORDER BY position"
526
+ )
527
+ return [
528
+ {"shortcode": r["shortcode"], "url": r["url"], "position": r["position"]}
529
+ for r in cur.fetchall()
530
+ ]
531
+ except Exception as e:
532
+ print(f"[WARNING] Error getting queue: {e}")
533
+ return []
534
+
535
+ def get_processing(self):
536
+ """Return list of shortcodes currently being processed."""
537
+ if not self.is_connected():
538
+ return []
539
+ try:
540
+ cur = self._conn.cursor()
541
+ cur.execute(
542
+ "SELECT shortcode FROM processing_queue WHERE status = 'processing'"
543
+ )
544
+ return [r["shortcode"] for r in cur.fetchall()]
545
+ except Exception as e:
546
+ print(f"[WARNING] Error getting processing items: {e}")
547
+ return []
548
+
549
+ def mark_processing(self, shortcode):
550
+ """Mark a queued item as currently processing."""
551
+ if not self.is_connected():
552
+ return False
553
+ try:
554
+ now = datetime.utcnow().isoformat()
555
+ self._conn.execute("""
556
+ UPDATE processing_queue
557
+ SET status = 'processing', started_at = ?, updated_at = ?
558
+ WHERE shortcode = ?
559
+ """, (now, now, shortcode))
560
+ self._conn.commit()
561
+ return True
562
+ except Exception as e:
563
+ print(f"[WARNING] Error marking as processing: {e}")
564
+ return False
565
+
566
+ def remove_from_queue(self, shortcode):
567
+ """Remove an item from the queue and compact positions."""
568
+ if not self.is_connected():
569
+ return False
570
+ try:
571
+ self._conn.execute(
572
+ "DELETE FROM processing_queue WHERE shortcode = ?", (shortcode,)
573
+ )
574
+ self._conn.commit()
575
+
576
+ cur = self._conn.cursor()
577
+ cur.execute(
578
+ "SELECT shortcode FROM processing_queue "
579
+ "WHERE status = 'queued' ORDER BY position"
580
+ )
581
+ for idx, item in enumerate(cur.fetchall(), 1):
582
+ self._conn.execute(
583
+ "UPDATE processing_queue SET position = ? WHERE shortcode = ?",
584
+ (idx, item["shortcode"])
585
+ )
586
+ self._conn.commit()
587
+ return True
588
+ except Exception as e:
589
+ print(f"[WARNING] Error removing from queue: {e}")
590
+ return False
591
+
592
+ def recover_interrupted_items(self):
593
+ """
594
+ Move items stuck in 'processing' back to 'queued' (e.g. after a crash).
595
+ Returns the number of items recovered.
596
+ """
597
+ if not self.is_connected():
598
+ return 0
599
+ try:
600
+ now = datetime.utcnow().isoformat()
601
+ cur = self._conn.cursor()
602
+ cur.execute("""
603
+ UPDATE processing_queue
604
+ SET status = 'queued', updated_at = ?
605
+ WHERE status = 'processing'
606
+ """, (now,))
607
+ count = cur.rowcount
608
+ self._conn.commit()
609
+
610
+ cur.execute(
611
+ "SELECT shortcode FROM processing_queue "
612
+ "WHERE status = 'queued' ORDER BY added_at"
613
+ )
614
+ for idx, item in enumerate(cur.fetchall(), 1):
615
+ self._conn.execute(
616
+ "UPDATE processing_queue SET position = ? WHERE shortcode = ?",
617
+ (idx, item["shortcode"])
618
+ )
619
+ self._conn.commit()
620
+
621
+ if count > 0:
622
+ print(f"[RECOVERED] Recovered {count} interrupted items")
623
+ return count
624
+ except Exception as e:
625
+ print(f"[WARNING] Error recovering items: {e}")
626
+ return 0
627
+
628
+ # ------------------------------------------------------------------
629
+ # Post management
630
+ # ------------------------------------------------------------------
631
+
632
+ def delete_post(self, shortcode):
633
+ """Soft-delete a post (is_hidden=1). Data kept for re-add reuse. Returns True if updated."""
634
+ if not self.is_connected():
635
+ return False
636
+ try:
637
+ cur = self._conn.execute(
638
+ "UPDATE analyses SET is_hidden = 1, updated_at = ? WHERE shortcode = ?",
639
+ (datetime.utcnow().isoformat(), shortcode)
640
+ )
641
+ self._conn.commit()
642
+ return cur.rowcount > 0
643
+ except Exception as e:
644
+ print(f"[WARNING] Error soft-deleting post: {e}")
645
+ return False
646
+
647
+ def hard_delete_post(self, shortcode):
648
+ """Permanently remove a post row — used for force re-analysis. Returns True if deleted."""
649
+ if not self.is_connected():
650
+ return False
651
+ try:
652
+ cur = self._conn.execute(
653
+ "DELETE FROM analyses WHERE shortcode = ?",
654
+ (shortcode,)
655
+ )
656
+ self._conn.commit()
657
+ return cur.rowcount > 0
658
+ except Exception as e:
659
+ print(f"[WARNING] Error hard-deleting post: {e}")
660
+ return False
661
+
662
+ def restore_post(self, shortcode):
663
+ """Restore a soft-deleted post (is_hidden=0). Returns True if updated."""
664
+ if not self.is_connected():
665
+ return False
666
+ try:
667
+ cur = self._conn.execute(
668
+ "UPDATE analyses SET is_hidden = 0, updated_at = ? WHERE shortcode = ?",
669
+ (datetime.utcnow().isoformat(), shortcode)
670
+ )
671
+ self._conn.commit()
672
+ return cur.rowcount > 0
673
+ except Exception as e:
674
+ print(f"[WARNING] Error restoring post: {e}")
675
+ return False
676
+
677
+ def update_post(self, shortcode, updates):
678
+ """
679
+ Update specific fields of a post.
680
+
681
+ Args:
682
+ shortcode: Instagram post shortcode
683
+ updates: dict of allowed fields (category, title, summary)
684
+
685
+ Returns:
686
+ bool: True if updated
687
+ """
688
+ if not self.is_connected():
689
+ return False
690
+ try:
691
+ updates["updated_at"] = datetime.utcnow().isoformat()
692
+ set_clause = ", ".join(f"{k} = ?" for k in updates)
693
+ values = list(updates.values()) + [shortcode]
694
+ cur = self._conn.execute(
695
+ f"UPDATE analyses SET {set_clause} WHERE shortcode = ?", values
696
+ )
697
+ self._conn.commit()
698
+ if cur.rowcount == 0:
699
+ print(f"[WARNING] Post not found: {shortcode}")
700
+ return False
701
+ print(f"[OK] Updated post: {shortcode}")
702
+ return True
703
+ except Exception as e:
704
+ print(f"[WARNING] Error updating post: {e}")
705
+ return False
706
+
707
+ # ------------------------------------------------------------------
708
+ # Collections
709
+ # ------------------------------------------------------------------
710
+
711
+ def _collection_row_to_dict(self, row):
712
+ if row is None:
713
+ return None
714
+ d = dict(row)
715
+ try:
716
+ d['post_ids'] = json.loads(d.get('post_ids') or '[]')
717
+ except Exception:
718
+ d['post_ids'] = []
719
+ return d
720
+
721
+ def get_collections(self):
722
+ """Return all collections ordered by created_at."""
723
+ if not self.is_connected():
724
+ return []
725
+ try:
726
+ cur = self._conn.cursor()
727
+ cur.execute("SELECT * FROM collections ORDER BY created_at ASC")
728
+ return [self._collection_row_to_dict(r) for r in cur.fetchall()]
729
+ except Exception as e:
730
+ print(f"[WARNING] Error getting collections: {e}")
731
+ return []
732
+
733
+ def get_collection(self, collection_id):
734
+ """Return a single collection by id."""
735
+ if not self.is_connected():
736
+ return None
737
+ try:
738
+ cur = self._conn.cursor()
739
+ cur.execute("SELECT * FROM collections WHERE id = ?", (collection_id,))
740
+ return self._collection_row_to_dict(cur.fetchone())
741
+ except Exception as e:
742
+ print(f"[WARNING] Error getting collection: {e}")
743
+ return None
744
+
745
+ def upsert_collection(self, collection_id, name, icon, post_ids, created_at=None, updated_at=None):
746
+ """Insert or fully replace a collection. Returns the saved dict."""
747
+ if not self.is_connected():
748
+ return None
749
+ try:
750
+ now = datetime.utcnow().isoformat()
751
+ self._conn.execute("""
752
+ INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at)
753
+ VALUES (?, ?, ?, ?, ?, ?)
754
+ ON CONFLICT(id) DO UPDATE SET
755
+ name = excluded.name,
756
+ icon = excluded.icon,
757
+ post_ids = excluded.post_ids,
758
+ updated_at = excluded.updated_at
759
+ """, (
760
+ collection_id, name, icon,
761
+ json.dumps(post_ids if isinstance(post_ids, list) else []),
762
+ created_at or now, updated_at or now
763
+ ))
764
+ self._conn.commit()
765
+ return self.get_collection(collection_id)
766
+ except Exception as e:
767
+ print(f"[WARNING] Error upserting collection: {e}")
768
+ return None
769
+
770
+ def update_collection_posts(self, collection_id, post_ids):
771
+ """Replace the post_ids list for a collection."""
772
+ if not self.is_connected():
773
+ return False
774
+ try:
775
+ now = datetime.utcnow().isoformat()
776
+ cur = self._conn.execute(
777
+ "UPDATE collections SET post_ids = ?, updated_at = ? WHERE id = ?",
778
+ (json.dumps(post_ids), now, collection_id)
779
+ )
780
+ self._conn.commit()
781
+ return cur.rowcount > 0
782
+ except Exception as e:
783
+ print(f"[WARNING] Error updating collection posts: {e}")
784
+ return False
785
+
786
+ def delete_collection(self, collection_id):
787
+ """Delete a collection. Returns True if deleted."""
788
+ if not self.is_connected():
789
+ return False
790
+ try:
791
+ cur = self._conn.execute("DELETE FROM collections WHERE id = ?", (collection_id,))
792
+ self._conn.commit()
793
+ return cur.rowcount > 0
794
+ except Exception as e:
795
+ print(f"[WARNING] Error deleting collection: {e}")
796
+ return False
797
+
798
+
799
+ # ------------------------------------------------------------------
800
+ # Singleton accessor
801
+ # ------------------------------------------------------------------
802
+
803
+ _db_instance = None
804
+
805
+
806
+ def get_db():
807
+ """Get or create the shared Database instance."""
808
+ global _db_instance
809
+ if _db_instance is None:
810
+ _db_instance = Database()
811
+ return _db_instance