superbrain-server 1.0.2-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/superbrain.js +196 -0
- package/package.json +23 -0
- package/payload/.dockerignore +45 -0
- package/payload/.env.example +58 -0
- package/payload/Dockerfile +73 -0
- package/payload/analyzers/__init__.py +0 -0
- package/payload/analyzers/audio_transcribe.py +225 -0
- package/payload/analyzers/caption.py +244 -0
- package/payload/analyzers/music_identifier.py +346 -0
- package/payload/analyzers/text_analyzer.py +117 -0
- package/payload/analyzers/visual_analyze.py +218 -0
- package/payload/analyzers/webpage_analyzer.py +789 -0
- package/payload/analyzers/youtube_analyzer.py +320 -0
- package/payload/api.py +1676 -0
- package/payload/config/.api_keys.example +22 -0
- package/payload/config/model_rankings.json +492 -0
- package/payload/config/openrouter_free_models.json +1364 -0
- package/payload/config/whisper_model.txt +1 -0
- package/payload/config_settings.py +185 -0
- package/payload/core/__init__.py +0 -0
- package/payload/core/category_manager.py +219 -0
- package/payload/core/database.py +811 -0
- package/payload/core/link_checker.py +300 -0
- package/payload/core/model_router.py +1253 -0
- package/payload/docker-compose.yml +120 -0
- package/payload/instagram/__init__.py +0 -0
- package/payload/instagram/instagram_downloader.py +253 -0
- package/payload/instagram/instagram_login.py +190 -0
- package/payload/main.py +912 -0
- package/payload/requirements.txt +39 -0
- package/payload/reset.py +311 -0
- package/payload/start-docker-prod.sh +125 -0
- package/payload/start-docker.sh +56 -0
- package/payload/start.py +1302 -0
- package/payload/static/favicon.ico +0 -0
- package/payload/stop-docker.sh +16 -0
- package/payload/utils/__init__.py +0 -0
- package/payload/utils/db_stats.py +108 -0
- package/payload/utils/manage_token.py +91 -0
|
@@ -0,0 +1,811 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SQLite Database Manager for SuperBrain
|
|
4
|
+
Handles caching and retrieval of Instagram analysis results
|
|
5
|
+
Self-hosted, zero-config, file-based database
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
# Database file path can be overridden for Docker deployments
|
|
15
|
+
DB_PATH = Path(os.getenv("DATABASE_PATH", str(Path(__file__).resolve().parent.parent / 'superbrain.db')))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Database:
|
|
19
|
+
"""SQLite database manager with caching functionality"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.db_path = DB_PATH
|
|
23
|
+
self._conn = None
|
|
24
|
+
self._connect()
|
|
25
|
+
|
|
26
|
+
def _connect(self):
|
|
27
|
+
try:
|
|
28
|
+
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
|
29
|
+
self._conn.row_factory = sqlite3.Row
|
|
30
|
+
# WAL mode for better concurrent read performance
|
|
31
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
32
|
+
self._conn.execute("PRAGMA foreign_keys=ON")
|
|
33
|
+
self._create_tables()
|
|
34
|
+
print(f"[OK] Connected to SQLite database: {self.db_path}")
|
|
35
|
+
except Exception as e:
|
|
36
|
+
print(f"[WARNING] SQLite connection failed: {e}")
|
|
37
|
+
self._conn = None
|
|
38
|
+
|
|
39
|
+
def _create_tables(self):
|
|
40
|
+
self._conn.executescript("""
|
|
41
|
+
CREATE TABLE IF NOT EXISTS analyses (
|
|
42
|
+
shortcode TEXT PRIMARY KEY,
|
|
43
|
+
url TEXT,
|
|
44
|
+
username TEXT,
|
|
45
|
+
content_type TEXT DEFAULT 'instagram',
|
|
46
|
+
analyzed_at TEXT,
|
|
47
|
+
updated_at TEXT,
|
|
48
|
+
post_date TEXT,
|
|
49
|
+
likes INTEGER DEFAULT 0,
|
|
50
|
+
thumbnail TEXT DEFAULT '',
|
|
51
|
+
title TEXT,
|
|
52
|
+
summary TEXT,
|
|
53
|
+
tags TEXT,
|
|
54
|
+
music TEXT,
|
|
55
|
+
category TEXT,
|
|
56
|
+
visual_analysis TEXT,
|
|
57
|
+
audio_transcription TEXT,
|
|
58
|
+
text_analysis TEXT
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
CREATE TABLE IF NOT EXISTS processing_queue (
|
|
62
|
+
shortcode TEXT PRIMARY KEY,
|
|
63
|
+
url TEXT,
|
|
64
|
+
status TEXT DEFAULT 'queued',
|
|
65
|
+
position INTEGER,
|
|
66
|
+
added_at TEXT,
|
|
67
|
+
started_at TEXT,
|
|
68
|
+
updated_at TEXT
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_analyses_category ON analyses (category);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_analyses_analyzed_at ON analyses (analyzed_at DESC);
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_queue_status ON processing_queue (status);
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_queue_position ON processing_queue (position);
|
|
75
|
+
""")
|
|
76
|
+
self._conn.commit()
|
|
77
|
+
|
|
78
|
+
# Migration: add content_type to databases that predate this column
|
|
79
|
+
try:
|
|
80
|
+
self._conn.execute("ALTER TABLE analyses ADD COLUMN content_type TEXT DEFAULT 'instagram'")
|
|
81
|
+
self._conn.commit()
|
|
82
|
+
except sqlite3.OperationalError:
|
|
83
|
+
pass # Column already exists – expected on most runs
|
|
84
|
+
|
|
85
|
+
# Migration: add thumbnail column
|
|
86
|
+
try:
|
|
87
|
+
self._conn.execute("ALTER TABLE analyses ADD COLUMN thumbnail TEXT DEFAULT ''")
|
|
88
|
+
self._conn.commit()
|
|
89
|
+
except sqlite3.OperationalError:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
# Migration: add retry columns to processing_queue
|
|
93
|
+
for _col, _dflt in [
|
|
94
|
+
("retry_after", "TEXT"),
|
|
95
|
+
("attempts", "INTEGER DEFAULT 0"),
|
|
96
|
+
("reason", "TEXT"),
|
|
97
|
+
("content_type", "TEXT"),
|
|
98
|
+
]:
|
|
99
|
+
try:
|
|
100
|
+
self._conn.execute(
|
|
101
|
+
f"ALTER TABLE processing_queue ADD COLUMN {_col} {_dflt}"
|
|
102
|
+
)
|
|
103
|
+
self._conn.commit()
|
|
104
|
+
except sqlite3.OperationalError:
|
|
105
|
+
pass # already exists
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
self._conn.execute(
|
|
109
|
+
"CREATE INDEX IF NOT EXISTS idx_queue_retry ON processing_queue (status, retry_after)"
|
|
110
|
+
)
|
|
111
|
+
self._conn.commit()
|
|
112
|
+
except sqlite3.OperationalError:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Create content_type index only after the column is guaranteed to exist
|
|
116
|
+
try:
|
|
117
|
+
self._conn.execute(
|
|
118
|
+
"CREATE INDEX IF NOT EXISTS idx_analyses_content_type ON analyses (content_type)"
|
|
119
|
+
)
|
|
120
|
+
self._conn.commit()
|
|
121
|
+
except sqlite3.OperationalError:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
# Migration: add is_hidden for soft-delete support
|
|
125
|
+
try:
|
|
126
|
+
self._conn.execute("ALTER TABLE analyses ADD COLUMN is_hidden INTEGER DEFAULT 0")
|
|
127
|
+
self._conn.commit()
|
|
128
|
+
except sqlite3.OperationalError:
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
# Collections table
|
|
132
|
+
self._conn.executescript("""
|
|
133
|
+
CREATE TABLE IF NOT EXISTS collections (
|
|
134
|
+
id TEXT PRIMARY KEY,
|
|
135
|
+
name TEXT NOT NULL,
|
|
136
|
+
icon TEXT DEFAULT '📁',
|
|
137
|
+
post_ids TEXT DEFAULT '[]',
|
|
138
|
+
created_at TEXT,
|
|
139
|
+
updated_at TEXT
|
|
140
|
+
);
|
|
141
|
+
""")
|
|
142
|
+
self._conn.commit()
|
|
143
|
+
# Seed default Watch Later if missing
|
|
144
|
+
cur = self._conn.cursor()
|
|
145
|
+
cur.execute("SELECT id FROM collections WHERE id = 'default_watch_later'")
|
|
146
|
+
if cur.fetchone() is None:
|
|
147
|
+
now = datetime.utcnow().isoformat()
|
|
148
|
+
self._conn.execute(
|
|
149
|
+
"INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
|
|
150
|
+
('default_watch_later', 'Watch Later', 'time', '[]', now, now)
|
|
151
|
+
)
|
|
152
|
+
self._conn.commit()
|
|
153
|
+
|
|
154
|
+
# Migration: normalize Watch Later icon to Ionicons-safe name
|
|
155
|
+
try:
|
|
156
|
+
self._conn.execute(
|
|
157
|
+
"UPDATE collections SET icon = 'time' WHERE id = 'default_watch_later' AND (icon IS NULL OR icon = '' OR icon = '⏰' OR icon = 'clock')"
|
|
158
|
+
)
|
|
159
|
+
self._conn.commit()
|
|
160
|
+
except sqlite3.OperationalError:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
# Helpers
|
|
165
|
+
# ------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
def _row_to_dict(self, row):
|
|
168
|
+
if row is None:
|
|
169
|
+
return None
|
|
170
|
+
d = dict(row)
|
|
171
|
+
if d.get('tags'):
|
|
172
|
+
try:
|
|
173
|
+
d['tags'] = json.loads(d['tags'])
|
|
174
|
+
except Exception:
|
|
175
|
+
d['tags'] = []
|
|
176
|
+
else:
|
|
177
|
+
d['tags'] = []
|
|
178
|
+
return d
|
|
179
|
+
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
# Connection
|
|
182
|
+
# ------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
def is_connected(self):
|
|
185
|
+
return self._conn is not None
|
|
186
|
+
|
|
187
|
+
# ------------------------------------------------------------------
|
|
188
|
+
# Cache / Analyses
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
def check_cache(self, shortcode):
|
|
192
|
+
"""Return cached analysis dict or None."""
|
|
193
|
+
if not self.is_connected():
|
|
194
|
+
return None
|
|
195
|
+
try:
|
|
196
|
+
cur = self._conn.cursor()
|
|
197
|
+
cur.execute("SELECT * FROM analyses WHERE shortcode = ?", (shortcode,))
|
|
198
|
+
return self._row_to_dict(cur.fetchone())
|
|
199
|
+
except Exception as e:
|
|
200
|
+
print(f"[WARNING] Cache lookup error: {e}")
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
def save_analysis(self, shortcode, url, username, title, summary, tags, music, category,
|
|
204
|
+
visual_analysis="", audio_transcription="", text_analysis="",
|
|
205
|
+
likes=0, post_date=None, content_type="instagram", thumbnail=""):
|
|
206
|
+
"""Insert or update an analysis record. Returns True on success."""
|
|
207
|
+
if not self.is_connected():
|
|
208
|
+
print("[WARNING] Database not connected. Analysis not saved.")
|
|
209
|
+
return False
|
|
210
|
+
try:
|
|
211
|
+
print(f"📝 Saving to database with shortcode: {shortcode}")
|
|
212
|
+
now = datetime.utcnow().isoformat()
|
|
213
|
+
tags_json = json.dumps(tags if isinstance(tags, list) else tags.split())
|
|
214
|
+
|
|
215
|
+
self._conn.execute("""
|
|
216
|
+
INSERT INTO analyses
|
|
217
|
+
(shortcode, url, username, content_type, analyzed_at, updated_at, post_date, likes,
|
|
218
|
+
thumbnail, title, summary, tags, music, category,
|
|
219
|
+
visual_analysis, audio_transcription, text_analysis)
|
|
220
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
221
|
+
ON CONFLICT(shortcode) DO UPDATE SET
|
|
222
|
+
url = excluded.url,
|
|
223
|
+
username = excluded.username,
|
|
224
|
+
content_type = excluded.content_type,
|
|
225
|
+
updated_at = excluded.updated_at,
|
|
226
|
+
post_date = excluded.post_date,
|
|
227
|
+
likes = excluded.likes,
|
|
228
|
+
thumbnail = excluded.thumbnail,
|
|
229
|
+
title = excluded.title,
|
|
230
|
+
summary = excluded.summary,
|
|
231
|
+
tags = excluded.tags,
|
|
232
|
+
music = excluded.music,
|
|
233
|
+
category = excluded.category,
|
|
234
|
+
visual_analysis = excluded.visual_analysis,
|
|
235
|
+
audio_transcription = excluded.audio_transcription,
|
|
236
|
+
text_analysis = excluded.text_analysis
|
|
237
|
+
""", (shortcode, url, username, content_type, now, now, post_date, likes,
|
|
238
|
+
thumbnail, title, summary, tags_json, music, category,
|
|
239
|
+
visual_analysis, audio_transcription, text_analysis))
|
|
240
|
+
self._conn.commit()
|
|
241
|
+
print(f"[OK] Analysis saved to database ({shortcode})")
|
|
242
|
+
return True
|
|
243
|
+
except Exception as e:
|
|
244
|
+
print(f"[WARNING] Error saving to database: {e}")
|
|
245
|
+
import traceback
|
|
246
|
+
traceback.print_exc()
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
def get_recent(self, limit=10):
|
|
250
|
+
"""Return the most recently analysed posts (excludes soft-deleted)."""
|
|
251
|
+
if not self.is_connected():
|
|
252
|
+
return []
|
|
253
|
+
try:
|
|
254
|
+
cur = self._conn.cursor()
|
|
255
|
+
cur.execute(
|
|
256
|
+
"SELECT * FROM analyses WHERE (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?", (limit,)
|
|
257
|
+
)
|
|
258
|
+
return [self._row_to_dict(r) for r in cur.fetchall()]
|
|
259
|
+
except Exception as e:
|
|
260
|
+
print(f"[WARNING] Error retrieving recent: {e}")
|
|
261
|
+
return []
|
|
262
|
+
|
|
263
|
+
def get_by_category(self, category, limit=20):
|
|
264
|
+
"""Return all analyses for a given category (excludes soft-deleted)."""
|
|
265
|
+
if not self.is_connected():
|
|
266
|
+
return []
|
|
267
|
+
try:
|
|
268
|
+
cur = self._conn.cursor()
|
|
269
|
+
cur.execute(
|
|
270
|
+
"SELECT * FROM analyses WHERE category = ? AND (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?",
|
|
271
|
+
(category, limit)
|
|
272
|
+
)
|
|
273
|
+
return [self._row_to_dict(r) for r in cur.fetchall()]
|
|
274
|
+
except Exception as e:
|
|
275
|
+
print(f"[WARNING] Error retrieving by category: {e}")
|
|
276
|
+
return []
|
|
277
|
+
|
|
278
|
+
def search_tags(self, tags, limit=20):
|
|
279
|
+
"""
|
|
280
|
+
Search analyses by one or more tags (case-insensitive substring match
|
|
281
|
+
against the JSON-encoded tags column).
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
tags: str or list[str]
|
|
285
|
+
limit: int
|
|
286
|
+
"""
|
|
287
|
+
if not self.is_connected():
|
|
288
|
+
return []
|
|
289
|
+
try:
|
|
290
|
+
if isinstance(tags, str):
|
|
291
|
+
tags = [tags]
|
|
292
|
+
cur = self._conn.cursor()
|
|
293
|
+
conditions = " OR ".join(["LOWER(tags) LIKE ?" for _ in tags])
|
|
294
|
+
params = [f"%{t.lower()}%" for t in tags] + [limit]
|
|
295
|
+
cur.execute(
|
|
296
|
+
f"SELECT * FROM analyses WHERE ({conditions}) AND (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ?",
|
|
297
|
+
params
|
|
298
|
+
)
|
|
299
|
+
return [self._row_to_dict(r) for r in cur.fetchall()]
|
|
300
|
+
except Exception as e:
|
|
301
|
+
print(f"[WARNING] Error searching tags: {e}")
|
|
302
|
+
return []
|
|
303
|
+
|
|
304
|
+
def get_stats(self):
|
|
305
|
+
"""Return basic statistics about the database."""
|
|
306
|
+
if not self.is_connected():
|
|
307
|
+
return {
|
|
308
|
+
"document_count": 0,
|
|
309
|
+
"total_posts": 0,
|
|
310
|
+
"total_collections": 0,
|
|
311
|
+
"storage_mb": 0,
|
|
312
|
+
"categories": {},
|
|
313
|
+
"capacity_used": "N/A",
|
|
314
|
+
}
|
|
315
|
+
try:
|
|
316
|
+
cur = self._conn.cursor()
|
|
317
|
+
|
|
318
|
+
cur.execute("SELECT COUNT(*) FROM analyses")
|
|
319
|
+
total = cur.fetchone()[0]
|
|
320
|
+
|
|
321
|
+
cur.execute("SELECT COUNT(*) FROM collections")
|
|
322
|
+
total_collections = cur.fetchone()[0]
|
|
323
|
+
|
|
324
|
+
cur.execute(
|
|
325
|
+
"SELECT COALESCE(category,'Uncategorized') as cat, COUNT(*) as cnt "
|
|
326
|
+
"FROM analyses GROUP BY cat"
|
|
327
|
+
)
|
|
328
|
+
category_counts = {r["cat"]: r["cnt"] for r in cur.fetchall()}
|
|
329
|
+
|
|
330
|
+
storage_bytes = self.db_path.stat().st_size if self.db_path.exists() else 0
|
|
331
|
+
storage_mb = round(storage_bytes / (1024 * 1024), 2)
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
"document_count": total,
|
|
335
|
+
"total_posts": total,
|
|
336
|
+
"total_collections": total_collections,
|
|
337
|
+
"storage_mb": storage_mb,
|
|
338
|
+
"categories": category_counts,
|
|
339
|
+
"capacity_used": "N/A (local SQLite)"
|
|
340
|
+
}
|
|
341
|
+
except Exception as e:
|
|
342
|
+
print(f"[WARNING] Error getting stats: {e}")
|
|
343
|
+
return {
|
|
344
|
+
"document_count": 0,
|
|
345
|
+
"total_posts": 0,
|
|
346
|
+
"total_collections": 0,
|
|
347
|
+
"storage_mb": 0,
|
|
348
|
+
"categories": {},
|
|
349
|
+
"capacity_used": "N/A",
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
def get_all_posts(self, limit: int = 50000, offset: int = 0) -> list:
|
|
353
|
+
"""Return all posts for export (excludes soft-deleted)."""
|
|
354
|
+
if not self.is_connected():
|
|
355
|
+
return []
|
|
356
|
+
try:
|
|
357
|
+
cur = self._conn.cursor()
|
|
358
|
+
cur.execute(
|
|
359
|
+
"SELECT * FROM analyses WHERE (is_hidden IS NULL OR is_hidden = 0) ORDER BY analyzed_at DESC LIMIT ? OFFSET ?",
|
|
360
|
+
(limit, offset)
|
|
361
|
+
)
|
|
362
|
+
return [self._row_to_dict(r) for r in cur.fetchall()]
|
|
363
|
+
except Exception as e:
|
|
364
|
+
print(f"[WARNING] Error getting all posts for export: {e}")
|
|
365
|
+
return []
|
|
366
|
+
|
|
367
|
+
def get_all_collections(self) -> list:
|
|
368
|
+
"""Return all collections for export."""
|
|
369
|
+
if not self.is_connected():
|
|
370
|
+
return []
|
|
371
|
+
try:
|
|
372
|
+
cur = self._conn.cursor()
|
|
373
|
+
cur.execute("SELECT * FROM collections ORDER BY created_at DESC")
|
|
374
|
+
return [self._row_to_dict(r) for r in cur.fetchall()]
|
|
375
|
+
except Exception as e:
|
|
376
|
+
print(f"[WARNING] Error getting collections for export: {e}")
|
|
377
|
+
return []
|
|
378
|
+
|
|
379
|
+
def close(self):
|
|
380
|
+
if self._conn:
|
|
381
|
+
self._conn.close()
|
|
382
|
+
self._conn = None
|
|
383
|
+
|
|
384
|
+
# ==================== RETRY QUEUE ====================
|
|
385
|
+
|
|
386
|
+
def queue_for_retry(self, shortcode: str, url: str, content_type: str,
|
|
387
|
+
reason: str, retry_hours: float = 24.0) -> bool:
|
|
388
|
+
"""
|
|
389
|
+
Schedule an item to be retried after `retry_hours` from now.
|
|
390
|
+
Sets status='retry' and populates retry_after, reason, content_type.
|
|
391
|
+
Returns True on success.
|
|
392
|
+
"""
|
|
393
|
+
if not self.is_connected():
|
|
394
|
+
return False
|
|
395
|
+
try:
|
|
396
|
+
from datetime import timezone, timedelta
|
|
397
|
+
now = datetime.utcnow()
|
|
398
|
+
retry_at = (now + timedelta(hours=retry_hours)).isoformat()
|
|
399
|
+
now_str = now.isoformat()
|
|
400
|
+
|
|
401
|
+
# Get current attempts count
|
|
402
|
+
cur = self._conn.cursor()
|
|
403
|
+
cur.execute(
|
|
404
|
+
"SELECT attempts FROM processing_queue WHERE shortcode = ?", (shortcode,)
|
|
405
|
+
)
|
|
406
|
+
row = cur.fetchone()
|
|
407
|
+
attempts = (row["attempts"] or 0) + 1 if row else 1
|
|
408
|
+
|
|
409
|
+
self._conn.execute("""
|
|
410
|
+
INSERT INTO processing_queue
|
|
411
|
+
(shortcode, url, content_type, status, position,
|
|
412
|
+
added_at, updated_at, retry_after, attempts, reason)
|
|
413
|
+
VALUES (?, ?, ?, 'retry', 0, ?, ?, ?, ?, ?)
|
|
414
|
+
ON CONFLICT(shortcode) DO UPDATE SET
|
|
415
|
+
url = excluded.url,
|
|
416
|
+
content_type = excluded.content_type,
|
|
417
|
+
status = 'retry',
|
|
418
|
+
updated_at = excluded.updated_at,
|
|
419
|
+
retry_after = excluded.retry_after,
|
|
420
|
+
attempts = excluded.attempts,
|
|
421
|
+
reason = excluded.reason
|
|
422
|
+
""", (shortcode, url, content_type, now_str, now_str,
|
|
423
|
+
retry_at, attempts, reason))
|
|
424
|
+
self._conn.commit()
|
|
425
|
+
print(f"⏰ Queued for retry in {retry_hours:.0f}h: {shortcode} ({reason})")
|
|
426
|
+
return True
|
|
427
|
+
except Exception as e:
|
|
428
|
+
print(f"[WARNING] Error queuing for retry: {e}")
|
|
429
|
+
return False
|
|
430
|
+
|
|
431
|
+
def get_retry_ready(self):
|
|
432
|
+
"""Return retry items whose retry_after time has passed."""
|
|
433
|
+
if not self.is_connected():
|
|
434
|
+
return []
|
|
435
|
+
try:
|
|
436
|
+
now = datetime.utcnow().isoformat()
|
|
437
|
+
cur = self._conn.cursor()
|
|
438
|
+
cur.execute("""
|
|
439
|
+
SELECT shortcode, url, content_type, reason, attempts, retry_after
|
|
440
|
+
FROM processing_queue
|
|
441
|
+
WHERE status = 'retry' AND retry_after <= ?
|
|
442
|
+
ORDER BY retry_after
|
|
443
|
+
""", (now,))
|
|
444
|
+
return [
|
|
445
|
+
{
|
|
446
|
+
"shortcode": r["shortcode"],
|
|
447
|
+
"url": r["url"],
|
|
448
|
+
"content_type": r["content_type"],
|
|
449
|
+
"reason": r["reason"],
|
|
450
|
+
"attempts": r["attempts"],
|
|
451
|
+
"retry_after": r["retry_after"],
|
|
452
|
+
}
|
|
453
|
+
for r in cur.fetchall()
|
|
454
|
+
]
|
|
455
|
+
except Exception as e:
|
|
456
|
+
print(f"[WARNING] Error getting retry-ready items: {e}")
|
|
457
|
+
return []
|
|
458
|
+
|
|
459
|
+
def get_retry_queue(self):
|
|
460
|
+
"""Return all items currently awaiting retry (status='retry')."""
|
|
461
|
+
if not self.is_connected():
|
|
462
|
+
return []
|
|
463
|
+
try:
|
|
464
|
+
cur = self._conn.cursor()
|
|
465
|
+
cur.execute("""
|
|
466
|
+
SELECT shortcode, url, content_type, reason, attempts,
|
|
467
|
+
retry_after, added_at
|
|
468
|
+
FROM processing_queue
|
|
469
|
+
WHERE status = 'retry'
|
|
470
|
+
ORDER BY retry_after
|
|
471
|
+
""")
|
|
472
|
+
return [dict(r) for r in cur.fetchall()]
|
|
473
|
+
except Exception as e:
|
|
474
|
+
print(f"[WARNING] Error getting retry queue: {e}")
|
|
475
|
+
return []
|
|
476
|
+
|
|
477
|
+
# ==================== QUEUE MANAGEMENT ====================
|
|
478
|
+
|
|
479
|
+
def add_to_queue(self, shortcode, url):
|
|
480
|
+
"""Add item to processing queue. Returns queue position (1-based), or -1 on error."""
|
|
481
|
+
if not self.is_connected():
|
|
482
|
+
return -1
|
|
483
|
+
try:
|
|
484
|
+
cur = self._conn.cursor()
|
|
485
|
+
cur.execute(
|
|
486
|
+
"SELECT status, position FROM processing_queue WHERE shortcode = ?", (shortcode,)
|
|
487
|
+
)
|
|
488
|
+
existing = cur.fetchone()
|
|
489
|
+
if existing:
|
|
490
|
+
if existing["status"] == "queued":
|
|
491
|
+
return existing["position"]
|
|
492
|
+
if existing["status"] == "processing":
|
|
493
|
+
return 0
|
|
494
|
+
|
|
495
|
+
cur.execute(
|
|
496
|
+
"SELECT MAX(position) FROM processing_queue WHERE status = 'queued'"
|
|
497
|
+
)
|
|
498
|
+
row = cur.fetchone()
|
|
499
|
+
position = (row[0] + 1) if row[0] is not None else 1
|
|
500
|
+
|
|
501
|
+
now = datetime.utcnow().isoformat()
|
|
502
|
+
self._conn.execute("""
|
|
503
|
+
INSERT INTO processing_queue (shortcode, url, status, position, added_at, updated_at)
|
|
504
|
+
VALUES (?, ?, 'queued', ?, ?, ?)
|
|
505
|
+
ON CONFLICT(shortcode) DO UPDATE SET
|
|
506
|
+
url = excluded.url,
|
|
507
|
+
status = 'queued',
|
|
508
|
+
position = excluded.position,
|
|
509
|
+
updated_at = excluded.updated_at
|
|
510
|
+
""", (shortcode, url, position, now, now))
|
|
511
|
+
self._conn.commit()
|
|
512
|
+
return position
|
|
513
|
+
except Exception as e:
|
|
514
|
+
print(f"[WARNING] Error adding to queue: {e}")
|
|
515
|
+
return -1
|
|
516
|
+
|
|
517
|
+
def get_queue(self):
|
|
518
|
+
"""Return list of queued items ordered by position."""
|
|
519
|
+
if not self.is_connected():
|
|
520
|
+
return []
|
|
521
|
+
try:
|
|
522
|
+
cur = self._conn.cursor()
|
|
523
|
+
cur.execute(
|
|
524
|
+
"SELECT shortcode, url, position FROM processing_queue "
|
|
525
|
+
"WHERE status = 'queued' ORDER BY position"
|
|
526
|
+
)
|
|
527
|
+
return [
|
|
528
|
+
{"shortcode": r["shortcode"], "url": r["url"], "position": r["position"]}
|
|
529
|
+
for r in cur.fetchall()
|
|
530
|
+
]
|
|
531
|
+
except Exception as e:
|
|
532
|
+
print(f"[WARNING] Error getting queue: {e}")
|
|
533
|
+
return []
|
|
534
|
+
|
|
535
|
+
def get_processing(self):
|
|
536
|
+
"""Return list of shortcodes currently being processed."""
|
|
537
|
+
if not self.is_connected():
|
|
538
|
+
return []
|
|
539
|
+
try:
|
|
540
|
+
cur = self._conn.cursor()
|
|
541
|
+
cur.execute(
|
|
542
|
+
"SELECT shortcode FROM processing_queue WHERE status = 'processing'"
|
|
543
|
+
)
|
|
544
|
+
return [r["shortcode"] for r in cur.fetchall()]
|
|
545
|
+
except Exception as e:
|
|
546
|
+
print(f"[WARNING] Error getting processing items: {e}")
|
|
547
|
+
return []
|
|
548
|
+
|
|
549
|
+
def mark_processing(self, shortcode):
|
|
550
|
+
"""Mark a queued item as currently processing."""
|
|
551
|
+
if not self.is_connected():
|
|
552
|
+
return False
|
|
553
|
+
try:
|
|
554
|
+
now = datetime.utcnow().isoformat()
|
|
555
|
+
self._conn.execute("""
|
|
556
|
+
UPDATE processing_queue
|
|
557
|
+
SET status = 'processing', started_at = ?, updated_at = ?
|
|
558
|
+
WHERE shortcode = ?
|
|
559
|
+
""", (now, now, shortcode))
|
|
560
|
+
self._conn.commit()
|
|
561
|
+
return True
|
|
562
|
+
except Exception as e:
|
|
563
|
+
print(f"[WARNING] Error marking as processing: {e}")
|
|
564
|
+
return False
|
|
565
|
+
|
|
566
|
+
def remove_from_queue(self, shortcode):
|
|
567
|
+
"""Remove an item from the queue and compact positions."""
|
|
568
|
+
if not self.is_connected():
|
|
569
|
+
return False
|
|
570
|
+
try:
|
|
571
|
+
self._conn.execute(
|
|
572
|
+
"DELETE FROM processing_queue WHERE shortcode = ?", (shortcode,)
|
|
573
|
+
)
|
|
574
|
+
self._conn.commit()
|
|
575
|
+
|
|
576
|
+
cur = self._conn.cursor()
|
|
577
|
+
cur.execute(
|
|
578
|
+
"SELECT shortcode FROM processing_queue "
|
|
579
|
+
"WHERE status = 'queued' ORDER BY position"
|
|
580
|
+
)
|
|
581
|
+
for idx, item in enumerate(cur.fetchall(), 1):
|
|
582
|
+
self._conn.execute(
|
|
583
|
+
"UPDATE processing_queue SET position = ? WHERE shortcode = ?",
|
|
584
|
+
(idx, item["shortcode"])
|
|
585
|
+
)
|
|
586
|
+
self._conn.commit()
|
|
587
|
+
return True
|
|
588
|
+
except Exception as e:
|
|
589
|
+
print(f"[WARNING] Error removing from queue: {e}")
|
|
590
|
+
return False
|
|
591
|
+
|
|
592
|
+
def recover_interrupted_items(self):
|
|
593
|
+
"""
|
|
594
|
+
Move items stuck in 'processing' back to 'queued' (e.g. after a crash).
|
|
595
|
+
Returns the number of items recovered.
|
|
596
|
+
"""
|
|
597
|
+
if not self.is_connected():
|
|
598
|
+
return 0
|
|
599
|
+
try:
|
|
600
|
+
now = datetime.utcnow().isoformat()
|
|
601
|
+
cur = self._conn.cursor()
|
|
602
|
+
cur.execute("""
|
|
603
|
+
UPDATE processing_queue
|
|
604
|
+
SET status = 'queued', updated_at = ?
|
|
605
|
+
WHERE status = 'processing'
|
|
606
|
+
""", (now,))
|
|
607
|
+
count = cur.rowcount
|
|
608
|
+
self._conn.commit()
|
|
609
|
+
|
|
610
|
+
cur.execute(
|
|
611
|
+
"SELECT shortcode FROM processing_queue "
|
|
612
|
+
"WHERE status = 'queued' ORDER BY added_at"
|
|
613
|
+
)
|
|
614
|
+
for idx, item in enumerate(cur.fetchall(), 1):
|
|
615
|
+
self._conn.execute(
|
|
616
|
+
"UPDATE processing_queue SET position = ? WHERE shortcode = ?",
|
|
617
|
+
(idx, item["shortcode"])
|
|
618
|
+
)
|
|
619
|
+
self._conn.commit()
|
|
620
|
+
|
|
621
|
+
if count > 0:
|
|
622
|
+
print(f"[RECOVERED] Recovered {count} interrupted items")
|
|
623
|
+
return count
|
|
624
|
+
except Exception as e:
|
|
625
|
+
print(f"[WARNING] Error recovering items: {e}")
|
|
626
|
+
return 0
|
|
627
|
+
|
|
628
|
+
# ------------------------------------------------------------------
|
|
629
|
+
# Post management
|
|
630
|
+
# ------------------------------------------------------------------
|
|
631
|
+
|
|
632
|
+
def delete_post(self, shortcode):
|
|
633
|
+
"""Soft-delete a post (is_hidden=1). Data kept for re-add reuse. Returns True if updated."""
|
|
634
|
+
if not self.is_connected():
|
|
635
|
+
return False
|
|
636
|
+
try:
|
|
637
|
+
cur = self._conn.execute(
|
|
638
|
+
"UPDATE analyses SET is_hidden = 1, updated_at = ? WHERE shortcode = ?",
|
|
639
|
+
(datetime.utcnow().isoformat(), shortcode)
|
|
640
|
+
)
|
|
641
|
+
self._conn.commit()
|
|
642
|
+
return cur.rowcount > 0
|
|
643
|
+
except Exception as e:
|
|
644
|
+
print(f"[WARNING] Error soft-deleting post: {e}")
|
|
645
|
+
return False
|
|
646
|
+
|
|
647
|
+
def hard_delete_post(self, shortcode):
|
|
648
|
+
"""Permanently remove a post row — used for force re-analysis. Returns True if deleted."""
|
|
649
|
+
if not self.is_connected():
|
|
650
|
+
return False
|
|
651
|
+
try:
|
|
652
|
+
cur = self._conn.execute(
|
|
653
|
+
"DELETE FROM analyses WHERE shortcode = ?",
|
|
654
|
+
(shortcode,)
|
|
655
|
+
)
|
|
656
|
+
self._conn.commit()
|
|
657
|
+
return cur.rowcount > 0
|
|
658
|
+
except Exception as e:
|
|
659
|
+
print(f"[WARNING] Error hard-deleting post: {e}")
|
|
660
|
+
return False
|
|
661
|
+
|
|
662
|
+
def restore_post(self, shortcode):
|
|
663
|
+
"""Restore a soft-deleted post (is_hidden=0). Returns True if updated."""
|
|
664
|
+
if not self.is_connected():
|
|
665
|
+
return False
|
|
666
|
+
try:
|
|
667
|
+
cur = self._conn.execute(
|
|
668
|
+
"UPDATE analyses SET is_hidden = 0, updated_at = ? WHERE shortcode = ?",
|
|
669
|
+
(datetime.utcnow().isoformat(), shortcode)
|
|
670
|
+
)
|
|
671
|
+
self._conn.commit()
|
|
672
|
+
return cur.rowcount > 0
|
|
673
|
+
except Exception as e:
|
|
674
|
+
print(f"[WARNING] Error restoring post: {e}")
|
|
675
|
+
return False
|
|
676
|
+
|
|
677
|
+
def update_post(self, shortcode, updates):
|
|
678
|
+
"""
|
|
679
|
+
Update specific fields of a post.
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
shortcode: Instagram post shortcode
|
|
683
|
+
updates: dict of allowed fields (category, title, summary)
|
|
684
|
+
|
|
685
|
+
Returns:
|
|
686
|
+
bool: True if updated
|
|
687
|
+
"""
|
|
688
|
+
if not self.is_connected():
|
|
689
|
+
return False
|
|
690
|
+
try:
|
|
691
|
+
updates["updated_at"] = datetime.utcnow().isoformat()
|
|
692
|
+
set_clause = ", ".join(f"{k} = ?" for k in updates)
|
|
693
|
+
values = list(updates.values()) + [shortcode]
|
|
694
|
+
cur = self._conn.execute(
|
|
695
|
+
f"UPDATE analyses SET {set_clause} WHERE shortcode = ?", values
|
|
696
|
+
)
|
|
697
|
+
self._conn.commit()
|
|
698
|
+
if cur.rowcount == 0:
|
|
699
|
+
print(f"[WARNING] Post not found: {shortcode}")
|
|
700
|
+
return False
|
|
701
|
+
print(f"[OK] Updated post: {shortcode}")
|
|
702
|
+
return True
|
|
703
|
+
except Exception as e:
|
|
704
|
+
print(f"[WARNING] Error updating post: {e}")
|
|
705
|
+
return False
|
|
706
|
+
|
|
707
|
+
# ------------------------------------------------------------------
|
|
708
|
+
# Collections
|
|
709
|
+
# ------------------------------------------------------------------
|
|
710
|
+
|
|
711
|
+
def _collection_row_to_dict(self, row):
|
|
712
|
+
if row is None:
|
|
713
|
+
return None
|
|
714
|
+
d = dict(row)
|
|
715
|
+
try:
|
|
716
|
+
d['post_ids'] = json.loads(d.get('post_ids') or '[]')
|
|
717
|
+
except Exception:
|
|
718
|
+
d['post_ids'] = []
|
|
719
|
+
return d
|
|
720
|
+
|
|
721
|
+
def get_collections(self):
|
|
722
|
+
"""Return all collections ordered by created_at."""
|
|
723
|
+
if not self.is_connected():
|
|
724
|
+
return []
|
|
725
|
+
try:
|
|
726
|
+
cur = self._conn.cursor()
|
|
727
|
+
cur.execute("SELECT * FROM collections ORDER BY created_at ASC")
|
|
728
|
+
return [self._collection_row_to_dict(r) for r in cur.fetchall()]
|
|
729
|
+
except Exception as e:
|
|
730
|
+
print(f"[WARNING] Error getting collections: {e}")
|
|
731
|
+
return []
|
|
732
|
+
|
|
733
|
+
def get_collection(self, collection_id):
|
|
734
|
+
"""Return a single collection by id."""
|
|
735
|
+
if not self.is_connected():
|
|
736
|
+
return None
|
|
737
|
+
try:
|
|
738
|
+
cur = self._conn.cursor()
|
|
739
|
+
cur.execute("SELECT * FROM collections WHERE id = ?", (collection_id,))
|
|
740
|
+
return self._collection_row_to_dict(cur.fetchone())
|
|
741
|
+
except Exception as e:
|
|
742
|
+
print(f"[WARNING] Error getting collection: {e}")
|
|
743
|
+
return None
|
|
744
|
+
|
|
745
|
+
def upsert_collection(self, collection_id, name, icon, post_ids, created_at=None, updated_at=None):
|
|
746
|
+
"""Insert or fully replace a collection. Returns the saved dict."""
|
|
747
|
+
if not self.is_connected():
|
|
748
|
+
return None
|
|
749
|
+
try:
|
|
750
|
+
now = datetime.utcnow().isoformat()
|
|
751
|
+
self._conn.execute("""
|
|
752
|
+
INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at)
|
|
753
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
754
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
755
|
+
name = excluded.name,
|
|
756
|
+
icon = excluded.icon,
|
|
757
|
+
post_ids = excluded.post_ids,
|
|
758
|
+
updated_at = excluded.updated_at
|
|
759
|
+
""", (
|
|
760
|
+
collection_id, name, icon,
|
|
761
|
+
json.dumps(post_ids if isinstance(post_ids, list) else []),
|
|
762
|
+
created_at or now, updated_at or now
|
|
763
|
+
))
|
|
764
|
+
self._conn.commit()
|
|
765
|
+
return self.get_collection(collection_id)
|
|
766
|
+
except Exception as e:
|
|
767
|
+
print(f"[WARNING] Error upserting collection: {e}")
|
|
768
|
+
return None
|
|
769
|
+
|
|
770
|
+
def update_collection_posts(self, collection_id, post_ids):
|
|
771
|
+
"""Replace the post_ids list for a collection."""
|
|
772
|
+
if not self.is_connected():
|
|
773
|
+
return False
|
|
774
|
+
try:
|
|
775
|
+
now = datetime.utcnow().isoformat()
|
|
776
|
+
cur = self._conn.execute(
|
|
777
|
+
"UPDATE collections SET post_ids = ?, updated_at = ? WHERE id = ?",
|
|
778
|
+
(json.dumps(post_ids), now, collection_id)
|
|
779
|
+
)
|
|
780
|
+
self._conn.commit()
|
|
781
|
+
return cur.rowcount > 0
|
|
782
|
+
except Exception as e:
|
|
783
|
+
print(f"[WARNING] Error updating collection posts: {e}")
|
|
784
|
+
return False
|
|
785
|
+
|
|
786
|
+
def delete_collection(self, collection_id):
|
|
787
|
+
"""Delete a collection. Returns True if deleted."""
|
|
788
|
+
if not self.is_connected():
|
|
789
|
+
return False
|
|
790
|
+
try:
|
|
791
|
+
cur = self._conn.execute("DELETE FROM collections WHERE id = ?", (collection_id,))
|
|
792
|
+
self._conn.commit()
|
|
793
|
+
return cur.rowcount > 0
|
|
794
|
+
except Exception as e:
|
|
795
|
+
print(f"[WARNING] Error deleting collection: {e}")
|
|
796
|
+
return False
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
# ------------------------------------------------------------------
|
|
800
|
+
# Singleton accessor
|
|
801
|
+
# ------------------------------------------------------------------
|
|
802
|
+
|
|
803
|
+
_db_instance = None
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def get_db():
|
|
807
|
+
"""Get or create the shared Database instance."""
|
|
808
|
+
global _db_instance
|
|
809
|
+
if _db_instance is None:
|
|
810
|
+
_db_instance = Database()
|
|
811
|
+
return _db_instance
|