superbrain-server 1.0.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/bin/superbrain.js +196 -0
  2. package/package.json +23 -0
  3. package/payload/.dockerignore +45 -0
  4. package/payload/.env.example +58 -0
  5. package/payload/Dockerfile +73 -0
  6. package/payload/analyzers/__init__.py +0 -0
  7. package/payload/analyzers/audio_transcribe.py +225 -0
  8. package/payload/analyzers/caption.py +244 -0
  9. package/payload/analyzers/music_identifier.py +346 -0
  10. package/payload/analyzers/text_analyzer.py +117 -0
  11. package/payload/analyzers/visual_analyze.py +218 -0
  12. package/payload/analyzers/webpage_analyzer.py +789 -0
  13. package/payload/analyzers/youtube_analyzer.py +320 -0
  14. package/payload/api.py +1676 -0
  15. package/payload/config/.api_keys.example +22 -0
  16. package/payload/config/model_rankings.json +492 -0
  17. package/payload/config/openrouter_free_models.json +1364 -0
  18. package/payload/config/whisper_model.txt +1 -0
  19. package/payload/config_settings.py +185 -0
  20. package/payload/core/__init__.py +0 -0
  21. package/payload/core/category_manager.py +219 -0
  22. package/payload/core/database.py +811 -0
  23. package/payload/core/link_checker.py +300 -0
  24. package/payload/core/model_router.py +1253 -0
  25. package/payload/docker-compose.yml +120 -0
  26. package/payload/instagram/__init__.py +0 -0
  27. package/payload/instagram/instagram_downloader.py +253 -0
  28. package/payload/instagram/instagram_login.py +190 -0
  29. package/payload/main.py +912 -0
  30. package/payload/requirements.txt +39 -0
  31. package/payload/reset.py +311 -0
  32. package/payload/start-docker-prod.sh +125 -0
  33. package/payload/start-docker.sh +56 -0
  34. package/payload/start.py +1302 -0
  35. package/payload/static/favicon.ico +0 -0
  36. package/payload/stop-docker.sh +16 -0
  37. package/payload/utils/__init__.py +0 -0
  38. package/payload/utils/db_stats.py +108 -0
  39. package/payload/utils/manage_token.py +91 -0
package/payload/api.py ADDED
@@ -0,0 +1,1676 @@
1
+ """
2
+ SuperBrain Instagram Content Analysis API
3
+ Version: 1.02
4
+ FastAPI REST endpoints for analyzing Instagram content with MongoDB caching
5
+ With request queuing, live progress logging, and API key authentication
6
+ """
7
+
8
+ from fastapi import FastAPI, HTTPException, Query, Header, Depends, Request, UploadFile, File, Body
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import FileResponse, Response, StreamingResponse
11
+ from pydantic import BaseModel, HttpUrl
12
+ from typing import Optional, List, Dict, Any
13
+ import subprocess
14
+ import asyncio
15
+ import sys
16
+ import os
17
+ import json
18
+ import zipfile
19
+ import io
20
+ from datetime import datetime
21
+ import logging
22
+ import secrets
23
+ import string
24
+ import threading
25
+ import time
26
+ from pathlib import Path
27
+
28
+ # Import database module
29
+ from core.database import get_db
30
+ from core.link_checker import validate_link
31
+
32
+ # Configure logging
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(levelname)s - %(message)s',
36
+ datefmt='%H:%M:%S'
37
+ )
38
+ logger = logging.getLogger(__name__)
39
+
40
+ def generate_api_token(length=8):
41
+ """Generate an 8-character alphanumeric Access Token."""
42
+ alphabet = string.ascii_uppercase + string.digits
43
+ return ''.join(secrets.choice(alphabet) for _ in range(length))
44
+
45
+
46
+ def is_valid_api_token_format(token: str) -> bool:
47
+ """Validate token format: exactly 8 alphanumeric chars."""
48
+ return len(token) == 8 and token.isalnum()
49
+
50
+ TOKEN_FILE = Path(__file__).parent / "token.txt"
51
+
52
+ def load_or_create_api_token():
53
+ """Load existing API token or create one if missing."""
54
+ if TOKEN_FILE.exists():
55
+ content = TOKEN_FILE.read_text(encoding="utf-8", errors="ignore").strip()
56
+ if content and is_valid_api_token_format(content):
57
+ logger.info("=" * 80)
58
+ logger.info(f"🔐 API Token: {content}")
59
+ logger.info("=" * 80)
60
+ return content
61
+
62
+ if content:
63
+ logger.warning("Existing token format is legacy/invalid. Regenerating 8-character Access Token.")
64
+
65
+ token = generate_api_token()
66
+ TOKEN_FILE.write_text(token)
67
+
68
+ logger.info("=" * 80)
69
+ logger.info(f"🔐 API Token (NEW): {token}")
70
+ logger.info("=" * 80)
71
+ return token
72
+
73
+ API_TOKEN = load_or_create_api_token()
74
+
75
+ async def verify_token(request: Request, x_api_key: str = Header(..., description="Access Token for authentication")):
76
+ """
77
+ Verify authentication using Access Token.
78
+ """
79
+ if x_api_key != API_TOKEN:
80
+ logger.warning("Invalid Access Token attempt from IP: %s", request.client.host if hasattr(request, 'client') else 'unknown')
81
+ raise HTTPException(
82
+ status_code=401,
83
+ detail="Invalid Access Token. Use the token from backend/token.txt."
84
+ )
85
+ return x_api_key
86
+
87
+ # Initialize FastAPI app
88
+ app = FastAPI(
89
+ title="SuperBrain",
90
+ description="AI-powered Instagram content analysis with caching",
91
+ version="1.02"
92
+ )
93
+
94
+ # CORS configuration
95
+ import os
96
+
97
+ # Get allowed origins from environment variable or allow all for development
98
+ allowed_origins_env = os.getenv('ALLOWED_ORIGINS', '')
99
+ if allowed_origins_env:
100
+ allowed_origins = [origin.strip() for origin in allowed_origins_env.split(',')]
101
+ else:
102
+ # Development: allow all origins so phones on same WiFi can connect
103
+ allowed_origins = ["*"]
104
+
105
+ app.add_middleware(
106
+ CORSMiddleware,
107
+ allow_origins=allowed_origins,
108
+ allow_credentials=False if allowed_origins == ["*"] else True,
109
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
110
+ allow_headers=["Content-Type", "Authorization", "X-API-Key"],
111
+ )
112
+
113
+ # Security headers middleware
114
+ @app.middleware("http")
115
+ async def add_security_headers(request: Request, call_next):
116
+ response = await call_next(request)
117
+ response.headers["X-Content-Type-Options"] = "nosniff"
118
+ response.headers["X-Frame-Options"] = "DENY"
119
+ response.headers["X-XSS-Protection"] = "1; mode=block"
120
+ response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
121
+ # CSP - adjust based on your needs
122
+ response.headers["Content-Security-Policy"] = "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'"
123
+ return response
124
+
125
+ # Request queue management (persistent)
126
+ max_concurrent = 1 # Process one post at a time - queue others sequentially
127
+
128
+ # Track active analysis subprocesses so they can be killed on delete
129
+ _active_processes: dict = {} # shortcode -> subprocess.Popen
130
+ _active_processes_lock = threading.Lock()
131
+
132
+ _STATIC_DIR = Path(__file__).parent / "static"
133
+
134
+ @app.get("/favicon.ico", include_in_schema=False)
135
+ async def favicon():
136
+ ico = _STATIC_DIR / "favicon.ico"
137
+ if ico.exists():
138
+ return FileResponse(str(ico), media_type="image/x-icon")
139
+ from fastapi.responses import Response
140
+ return Response(status_code=204)
141
+
142
+ # Shared Instaloader instance for caption fetching (reuse session to avoid rate limits)
143
+ caption_loader = None
144
+ caption_loader_lock = threading.Lock()
145
+
146
+ def get_caption_loader():
147
+ """Get or create shared Instaloader instance for caption fetching"""
148
+ global caption_loader
149
+ with caption_loader_lock:
150
+ if caption_loader is None:
151
+ import instaloader
152
+ caption_loader = instaloader.Instaloader(
153
+ download_pictures=False,
154
+ download_videos=False,
155
+ download_video_thumbnails=False,
156
+ download_geotags=False,
157
+ download_comments=False,
158
+ save_metadata=False,
159
+ compress_json=False,
160
+ max_connection_attempts=1 # Fail fast
161
+ )
162
+ return caption_loader
163
+
164
+ # Initialize database and recover interrupted items on startup
165
+ db = get_db()
166
+ if db.is_connected():
167
+ recovered = db.recover_interrupted_items()
168
+ if recovered > 0:
169
+ logger.info(f"🔄 Recovered {recovered} interrupted items from previous session")
170
+
171
+ # Background worker to process queue
172
+ def queue_worker():
173
+ """Background thread that processes queued items automatically"""
174
+ logger.info("🔧 Queue worker thread started")
175
+ _retry_check_counter = 0
176
+
177
+ while True:
178
+ try:
179
+ # ── Periodic retry-queue drain (every ~2.5 min) ─────────────────
180
+ _retry_check_counter += 1
181
+ if _retry_check_counter >= 30:
182
+ _retry_check_counter = 0
183
+ ready = db.get_retry_ready()
184
+ if ready:
185
+ logger.info(f"🔄 Promoting {len(ready)} retry-ready item(s) back to queue")
186
+ for r_item in ready:
187
+ logger.info(
188
+ f" ↩ {r_item['shortcode']} "
189
+ f"(reason={r_item['reason']}, attempts={r_item['attempts']})"
190
+ )
191
+ db.add_to_queue(r_item['shortcode'], r_item['url'])
192
+
193
+ # Check if we have capacity
194
+ processing = db.get_processing()
195
+ if len(processing) < max_concurrent:
196
+ # Get next item from queue
197
+ queue = db.get_queue()
198
+ if queue:
199
+ item = queue[0] # Get first item
200
+ shortcode = item['shortcode']
201
+ url = item['url']
202
+
203
+ logger.info(f"📋 Queue alert: Processing next in queue")
204
+ logger.info(f"📊 Queue status: {len(queue) - 1} remaining after this | Starting: {shortcode}")
205
+ logger.info(f"📤 [{shortcode}] Starting analysis from queue...")
206
+
207
+ # Mark as processing
208
+ db.mark_processing(shortcode)
209
+
210
+ # Run analysis
211
+ try:
212
+ process = subprocess.Popen(
213
+ [sys.executable, "main.py", url],
214
+ cwd=Path(__file__).parent,
215
+ stdout=subprocess.PIPE,
216
+ stderr=subprocess.PIPE,
217
+ text=True,
218
+ bufsize=1
219
+ )
220
+
221
+ # Register so delete_post can kill it
222
+ with _active_processes_lock:
223
+ _active_processes[shortcode] = process
224
+
225
+ # Wait for completion
226
+ process.wait()
227
+
228
+ with _active_processes_lock:
229
+ _active_processes.pop(shortcode, None)
230
+
231
+ # If the post was deleted while processing, skip queue cleanup
232
+ # (delete_post already called remove_from_queue)
233
+ if process.returncode == -9 or process.returncode == -15:
234
+ logger.info(f"🛑 [{shortcode}] Analysis killed (post was deleted)")
235
+ elif process.returncode == 0:
236
+ logger.info(f"✅ Queue item completed: {shortcode}")
237
+ db.remove_from_queue(shortcode)
238
+ elif process.returncode == 2:
239
+ # main.py queued this item for retry — status already set in DB
240
+ logger.info(f"⏰ [{shortcode}] Quota exhausted — moved to retry queue")
241
+ db.remove_from_queue(shortcode)
242
+ else:
243
+ logger.error(f"❌ Queue item failed (rc={process.returncode}): {shortcode}")
244
+ db.remove_from_queue(shortcode)
245
+
246
+ except Exception as e:
247
+ with _active_processes_lock:
248
+ _active_processes.pop(shortcode, None)
249
+ logger.error(f"❌ Error processing queue item {shortcode}: {e}")
250
+ db.remove_from_queue(shortcode)
251
+
252
+ # Sleep before next check
253
+ time.sleep(5)
254
+
255
+ except Exception as e:
256
+ logger.error(f"Queue worker error: {e}")
257
+ time.sleep(10)
258
+
259
+ # Start worker thread
260
+ worker_thread = threading.Thread(target=queue_worker, daemon=True)
261
+ worker_thread.start()
262
+ logger.info("✅ Background queue worker initialized")
263
+
264
+ # Request/Response models
265
+ class AnalyzeRequest(BaseModel):
266
+ url: str
267
+ force: bool = False
268
+
269
+ class Config:
270
+ json_schema_extra = {
271
+ "example": {
272
+ "url": "https://www.instagram.com/p/DRWKk5JiL0h/",
273
+ "force": False
274
+ }
275
+ }
276
+
277
+ class AnalysisResponse(BaseModel):
278
+ success: bool
279
+ cached: bool
280
+ data: Optional[dict] = None
281
+ error: Optional[str] = None
282
+ processing_time: Optional[float] = None
283
+
284
+
285
+ # API Endpoints
286
+
287
+ @app.get("/")
288
+ async def root():
289
+ """API information and health check (no authentication required)"""
290
+ return {
291
+ "name": "SuperBrain Instagram Analyzer API",
292
+ "version": "1.02",
293
+ "status": "operational",
294
+ "authentication": "Required - Use Access Token with X-API-Key header",
295
+ "message": "Run start.py on the server and use the token from token.txt.",
296
+ "endpoints": {
297
+ "POST /analyze": "Analyze content (requires auth)",
298
+ "GET /caption": "Get post caption quickly (requires auth)",
299
+ "GET /cache/{shortcode}": "Check cache (requires auth)",
300
+ "GET /recent": "Get recent analyses (requires auth)",
301
+ "GET /stats": "Database statistics (requires auth)",
302
+ "GET /category/{category}": "Get by category (requires auth)",
303
+ "GET /search": "Search by tags (requires auth)"
304
+ }
305
+ }
306
+
307
+
308
+ @app.get("/caption")
309
+ async def get_caption(url: str, token: str = Depends(verify_token)):
310
+ """
311
+ Quick caption fetch - calls caption.py as subprocess
312
+ Simple and works every time
313
+ """
314
+ try:
315
+ logger.info(f"🔍 Quick caption fetch for: {url}")
316
+
317
+ # Extract shortcode for logging
318
+ validation = validate_link(url)
319
+ shortcode = validation['shortcode']
320
+
321
+ # Run caption.py as subprocess - simple and reliable
322
+ import subprocess
323
+ import sys
324
+
325
+ loop = asyncio.get_event_loop()
326
+
327
+ def run_caption_script():
328
+ # Use the same Python interpreter as the API (with all packages)
329
+ python_exe = sys.executable
330
+ print(f"[API] Using Python: {python_exe}")
331
+
332
+ result = subprocess.run(
333
+ [python_exe, 'analyzers/caption.py', url],
334
+ capture_output=True,
335
+ text=True,
336
+ encoding='utf-8',
337
+ errors='replace',
338
+ timeout=15,
339
+ cwd=str(Path(__file__).parent)
340
+ )
341
+ print(f"[API] Subprocess stdout: {repr(result.stdout[:200])}")
342
+ print(f"[API] Subprocess stderr: {repr(result.stderr[:200])}")
343
+ print(f"[API] Subprocess returncode: {result.returncode}")
344
+ return result.stdout.strip() if result.stdout else ""
345
+
346
+ try:
347
+ caption_text = await asyncio.wait_for(
348
+ loop.run_in_executor(None, run_caption_script),
349
+ timeout=20.0
350
+ )
351
+ except asyncio.TimeoutError:
352
+ logger.error(f"❌ [{shortcode}] Caption fetch timed out")
353
+ return {
354
+ "success": True,
355
+ "shortcode": shortcode,
356
+ "username": "",
357
+ "title": "Instagram Post",
358
+ "full_caption": ""
359
+ }
360
+
361
+ print(f"[API] Final caption_text: {repr(caption_text)}")
362
+
363
+ # Check if it's an error message
364
+ if caption_text.startswith('❌') or caption_text.startswith('ℹ️'):
365
+ logger.warning(f"⚠️ [{shortcode}] {caption_text}")
366
+ return {
367
+ "success": True,
368
+ "shortcode": shortcode,
369
+ "username": "",
370
+ "title": "Instagram Post",
371
+ "full_caption": ""
372
+ }
373
+
374
+ # Limit to 100 chars for title
375
+ title = caption_text[:100] if len(caption_text) > 100 else caption_text
376
+ title = title if title else "Instagram Post"
377
+
378
+ logger.info(f"✅ [{shortcode}] Caption: {title}")
379
+
380
+ return {
381
+ "success": True,
382
+ "shortcode": shortcode,
383
+ "username": "",
384
+ "title": title,
385
+ "full_caption": caption_text
386
+ }
387
+
388
+ except Exception as e:
389
+ logger.error(f"❌ Caption fetch failed: {str(e)}", exc_info=True)
390
+ return {
391
+ "success": True,
392
+ "shortcode": "",
393
+ "username": "",
394
+ "title": "Instagram Post",
395
+ "full_caption": "",
396
+ "error": str(e)
397
+ }
398
+
399
+
400
+ @app.post("/analyze", response_model=AnalysisResponse)
401
+ async def analyze_instagram(request: AnalyzeRequest, token: str = Depends(verify_token)):
402
+ """
403
+ Analyze content from URL (Instagram, YouTube, or web page)
404
+
405
+ - Checks cache first for instant retrieval
406
+ - If not cached, adds to processing queue
407
+ - Handles multiple concurrent requests with queuing
408
+ - Returns comprehensive summary with title, tags, music, category
409
+ """
410
+ start_time = datetime.now()
411
+
412
+ # Detect content type and extract primary key
413
+ try:
414
+ url_str = str(request.url)
415
+ validation = validate_link(url_str)
416
+ if not validation['valid']:
417
+ raise ValueError(validation['error'])
418
+ shortcode = validation['shortcode']
419
+ content_type = validation['content_type']
420
+ # Use the normalised URL (e.g. canonical YouTube URL)
421
+ url_str = validation['url']
422
+ except Exception as e:
423
+ raise HTTPException(status_code=400, detail=f"Invalid URL: {str(e)}")
424
+
425
+ logger.info(f"📥 New request: {shortcode}")
426
+
427
+ # Initialize database connection
428
+ db = get_db()
429
+
430
+ try:
431
+ # Step 1: Check database cache first
432
+ logger.info(f"🔍 [{shortcode}] Checking database cache...")
433
+ cached_result = db.check_cache(shortcode)
434
+
435
+ if cached_result:
436
+ # Force re-analyze: hard-delete existing record and proceed with fresh analysis
437
+ if request.force:
438
+ logger.info(f"🔄 [{shortcode}] Force re-analyze requested — clearing cached data")
439
+ db.hard_delete_post(shortcode)
440
+ cached_result = None # fall through to fresh analysis
441
+ # Restore soft-deleted post if user is re-adding it
442
+ elif cached_result.get('is_hidden') == 1:
443
+ db.restore_post(shortcode)
444
+ cached_result['is_hidden'] = 0
445
+ logger.info(f"♻️ [{shortcode}] Restored from soft-delete. Returning cached data.")
446
+ else:
447
+ logger.info(f"⚡ [{shortcode}] Found in cache! Returning instantly.")
448
+
449
+ if cached_result:
450
+ # Filter response
451
+ filtered_data = {
452
+ 'url': cached_result.get('url', ''),
453
+ 'username': cached_result.get('username', ''),
454
+ 'content_type': cached_result.get('content_type', content_type),
455
+ 'thumbnail': cached_result.get('thumbnail', ''),
456
+ 'title': cached_result.get('title', ''),
457
+ 'summary': cached_result.get('summary', ''),
458
+ 'tags': cached_result.get('tags', []),
459
+ 'music': cached_result.get('music', ''),
460
+ 'category': cached_result.get('category', '')
461
+ }
462
+
463
+ processing_time = (datetime.now() - start_time).total_seconds()
464
+ logger.info(f"✅ [{shortcode}] Response sent ({processing_time:.2f}s)")
465
+
466
+ return AnalysisResponse(
467
+ success=True,
468
+ cached=True,
469
+ data=filtered_data,
470
+ processing_time=processing_time
471
+ )
472
+
473
+ # Step 2: Not in cache - check if already being processed
474
+ logger.info(f"💾 [{shortcode}] Not in cache")
475
+
476
+ # Check if already in queue or processing
477
+ processing_items = db.get_processing()
478
+ if shortcode in processing_items:
479
+ logger.warning(f"⏳ [{shortcode}] Already being processed. Please wait...")
480
+ raise HTTPException(
481
+ status_code=409,
482
+ detail="This URL is already being analyzed. Please wait and try again in a moment."
483
+ )
484
+
485
+ # Check if URL is already in queue - if so, remove the old queued item
486
+ queue_items = db.get_queue()
487
+ for item in queue_items:
488
+ if item['shortcode'] == shortcode:
489
+ logger.info(f"🔄 [{shortcode}] Duplicate found in queue - removing old entry and processing fresh request")
490
+ db.remove_from_queue(shortcode)
491
+ break
492
+
493
+ # Step 3: Check queue size (re-fetch after potential duplicate removal)
494
+ queue_items = db.get_queue()
495
+ if len(processing_items) >= max_concurrent:
496
+ logger.warning(f"🚦 [{shortcode}] Server busy - 1 post analyzing. Adding to queue...")
497
+ queue_position = db.add_to_queue(shortcode, request.url)
498
+ if queue_position > 0:
499
+ logger.info(f"📝 [{shortcode}] ✅ Added to queue at position {queue_position}")
500
+ logger.info(f"📊 Queue status: {len(queue_items) + 1} waiting | 1 analyzing")
501
+ raise HTTPException(
502
+ status_code=503,
503
+ detail=f"Server busy analyzing 1 post. Your request is queued (position: {queue_position}). It will be processed automatically. Check back in a few minutes."
504
+ )
505
+ else:
506
+ raise HTTPException(
507
+ status_code=500,
508
+ detail="Failed to add to queue. Please try again."
509
+ )
510
+
511
+ # Step 4: Start processing
512
+ if len(queue_items) > 0:
513
+ logger.info(f"📊 Queue status: {len(queue_items)} waiting | Starting: {shortcode}")
514
+ logger.info(f"🚀 [{shortcode}] Starting analysis...")
515
+ db.mark_processing(shortcode)
516
+
517
+ # Run main.py as subprocess — executed in a thread pool so the asyncio
518
+ # event loop stays free to serve /ping and other requests during analysis.
519
+ logger.info(f"📊 [{shortcode}] Phase 1: Downloading content...")
520
+
521
+ def _run_subprocess() -> tuple:
522
+ proc = subprocess.Popen(
523
+ [sys.executable, "main.py", url_str],
524
+ stdout=subprocess.PIPE,
525
+ stderr=subprocess.PIPE,
526
+ text=True,
527
+ encoding='utf-8',
528
+ env={**os.environ, 'PYTHONIOENCODING': 'utf-8'},
529
+ cwd=str(Path(__file__).parent),
530
+ bufsize=1
531
+ )
532
+ with _active_processes_lock:
533
+ _active_processes[shortcode] = proc
534
+ lines = []
535
+ for line in proc.stdout:
536
+ lines.append(line)
537
+ lc = line.strip()
538
+ if "Step 4: Visual Analysis" in lc:
539
+ logger.info(f"🎬 [{shortcode}] Phase 2: Visual analysis (AI processing)...")
540
+ elif "Step 5: Audio Transcription" in lc or "Phase 2: Audio" in lc:
541
+ logger.info(f"🎙️ [{shortcode}] Phase 3: Audio transcription (Whisper)...")
542
+ elif "Phase 3: Light Tasks" in lc:
543
+ logger.info(f"⚡ [{shortcode}] Phase 4: Music ID + Text (parallel)...")
544
+ elif "GENERATING COMPREHENSIVE SUMMARY" in lc:
545
+ logger.info(f"🧠 [{shortcode}] Phase 5: Generating AI summary...")
546
+ elif "Saving to Database" in lc:
547
+ logger.info(f"💾 [{shortcode}] Phase 6: Saving to database...")
548
+ elif "Cleaned up temp folder" in lc:
549
+ logger.info(f"🗑️ [{shortcode}] Phase 7: Cleanup complete")
550
+ proc.wait()
551
+ with _active_processes_lock:
552
+ _active_processes.pop(shortcode, None)
553
+ return proc.returncode, ''.join(lines), proc.stderr.read()
554
+
555
+ returncode, stdout, stderr = await asyncio.to_thread(_run_subprocess)
556
+
557
+ if stderr.strip():
558
+ # Log stderr from main.py to help diagnose issues
559
+ logger.warning(f"⚠️ [{shortcode}] main.py stderr:\n{stderr[:1000]}")
560
+
561
+ if returncode == 2:
562
+ # main.py detected quota exhaustion and queued item for retry.
563
+ # NOTE: Do NOT remove from queue here — main.py already called
564
+ # queue_for_retry() which set status='retry'. Removing would lose it.
565
+ logger.info(f"⏰ [{shortcode}] Quota exhausted — queued for automatic retry")
566
+ raise HTTPException(
567
+ status_code=202,
568
+ detail="API quota exhausted. Your request has been queued for automatic retry in 24 hours."
569
+ )
570
+
571
+ if returncode != 0:
572
+ # Extract last meaningful error line from stdout for the error message
573
+ error_lines = [l.strip() for l in stdout.splitlines() if l.strip() and ('❌' in l or 'Error' in l or 'failed' in l.lower())]
574
+ error_detail = error_lines[-1] if error_lines else (stderr.strip()[:200] or "Analysis failed")
575
+ logger.error(f"❌ [{shortcode}] Analysis failed: {error_detail}")
576
+ logger.debug(f"[{shortcode}] stdout tail:\n{stdout[-800:]}")
577
+ raise HTTPException(
578
+ status_code=400,
579
+ detail=error_detail
580
+ )
581
+
582
+ logger.info(f"✅ [{shortcode}] Analysis complete! Fetching from database...")
583
+
584
+ # Get result from database — retry up to 4 times in case the SQLite write
585
+ # hasn't flushed yet (race condition between subprocess write and our read).
586
+ analysis = None
587
+ for _attempt in range(4):
588
+ analysis = db.check_cache(shortcode)
589
+ if analysis:
590
+ if _attempt > 0:
591
+ logger.info(f"🔄 [{shortcode}] Found in database on retry {_attempt}")
592
+ break
593
+ if _attempt < 3:
594
+ logger.warning(f"⏳ [{shortcode}] Not in DB yet (attempt {_attempt+1}/4), retrying in 1s…")
595
+ await asyncio.sleep(1)
596
+
597
+ if not analysis:
598
+ logger.error(f"❌ [{shortcode}] Not found in database after 4 attempts!")
599
+ raise HTTPException(
600
+ status_code=500,
601
+ detail="Analysis completed but result not found in database"
602
+ )
603
+
604
+ # Filter response
605
+ filtered_data = {
606
+ 'url': analysis.get('url', ''),
607
+ 'username': analysis.get('username', ''),
608
+ 'content_type': analysis.get('content_type', content_type),
609
+ 'thumbnail': analysis.get('thumbnail', ''),
610
+ 'title': analysis.get('title', ''),
611
+ 'summary': analysis.get('summary', ''),
612
+ 'tags': analysis.get('tags', []),
613
+ 'music': analysis.get('music', ''),
614
+ 'category': analysis.get('category', '')
615
+ }
616
+
617
+ processing_time = (datetime.now() - start_time).total_seconds()
618
+ logger.info(f"✅ [{shortcode}] Response sent ({processing_time:.2f}s total)")
619
+
620
+ # Remove from processing queue
621
+ db.remove_from_queue(shortcode)
622
+ logger.info(f"🔓 [{shortcode}] Released from processing queue")
623
+
624
+ return AnalysisResponse(
625
+ success=True,
626
+ cached=False,
627
+ data=filtered_data,
628
+ processing_time=processing_time
629
+ )
630
+
631
+ except HTTPException as he:
632
+ # Don't remove from queue for 202 (retry-queued) — the item was
633
+ # intentionally kept in the retry queue by main.py.
634
+ if he.status_code != 202:
635
+ db.remove_from_queue(shortcode)
636
+ raise
637
+ except subprocess.SubprocessError as e:
638
+ logger.error(f"❌ [{shortcode}] Subprocess error: {str(e)}")
639
+ db.remove_from_queue(shortcode)
640
+ raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
641
+ except Exception as e:
642
+ logger.error(f"❌ [{shortcode}] Unexpected error: {str(e)}")
643
+ db.remove_from_queue(shortcode)
644
+ raise HTTPException(status_code=500, detail=f"Server error: {str(e)}")
645
+
646
+
647
+ @app.get("/cache/{shortcode}")
648
+ async def check_cache(shortcode: str, token: str = Depends(verify_token)):
649
+ """
650
+ Check if Instagram post is already analyzed and cached
651
+
652
+ - Returns cached analysis if available
653
+ - Returns 404 if not found
654
+ - Requires API authentication
655
+ """
656
+ try:
657
+ db = get_db()
658
+ result = db.check_cache(shortcode)
659
+
660
+ if not result:
661
+ raise HTTPException(status_code=404, detail="Not found in cache")
662
+
663
+ # Filter response to only include essential fields
664
+ filtered_data = {
665
+ 'url': result.get('url', ''),
666
+ 'username': result.get('username', ''),
667
+ 'title': result.get('title', ''),
668
+ 'summary': result.get('summary', ''),
669
+ 'tags': result.get('tags', []),
670
+ 'music': result.get('music', ''),
671
+ 'category': result.get('category', '')
672
+ }
673
+
674
+ return {
675
+ "success": True,
676
+ "cached": True,
677
+ "data": filtered_data
678
+ }
679
+
680
+ except HTTPException:
681
+ raise
682
+ except Exception as e:
683
+ raise HTTPException(status_code=500, detail=str(e))
684
+
685
+
686
+ @app.get("/recent")
687
+ async def get_recent_analyses(limit: int = Query(default=10, ge=1, le=100), token: str = Depends(verify_token)):
688
+ """
689
+ Get recent analyses from database
690
+
691
+ - Returns most recently analyzed content
692
+ - Default limit: 10, max: 100
693
+ - Requires API authentication
694
+ """
695
+ try:
696
+ db = get_db()
697
+ results = db.get_recent(limit=limit)
698
+
699
+ return {
700
+ "success": True,
701
+ "count": len(results),
702
+ "data": results
703
+ }
704
+
705
+ except Exception as e:
706
+ raise HTTPException(status_code=500, detail=str(e))
707
+
708
+
709
+ @app.get("/stats")
710
+ async def get_database_stats(token: str = Depends(verify_token)):
711
+ """
712
+ Get database statistics
713
+
714
+ - Total documents
715
+ - Storage usage
716
+ - Category breakdown
717
+ - Capacity information
718
+ - Requires API authentication
719
+ """
720
+ try:
721
+ db = get_db()
722
+ stats = db.get_stats()
723
+
724
+ return {
725
+ "success": True,
726
+ "data": stats
727
+ }
728
+
729
+ except Exception as e:
730
+ raise HTTPException(status_code=500, detail=str(e))
731
+
732
+
733
+ @app.get("/categories")
734
+ async def get_all_categories(token: str = Depends(verify_token)):
735
+ """
736
+ Get all categories with post counts
737
+ - Requires API authentication
738
+ """
739
+ try:
740
+ db = get_db()
741
+ stats = db.get_stats()
742
+ category_counts = stats.get('categories', {})
743
+
744
+ # Convert to list format with icons
745
+ categories = []
746
+ for cat, count in sorted(category_counts.items(), key=lambda x: -x[1]):
747
+ categories.append({
748
+ 'id': cat.lower(),
749
+ 'name': cat,
750
+ 'count': count
751
+ })
752
+
753
+ return {
754
+ "success": True,
755
+ "categories": categories,
756
+ "total": sum(c['count'] for c in categories)
757
+ }
758
+
759
+ except Exception as e:
760
+ raise HTTPException(status_code=500, detail=str(e))
761
+
762
+
763
+ @app.get("/category/{category}")
764
+ async def get_by_category(
765
+ category: str,
766
+ limit: int = Query(default=20, ge=1, le=100),
767
+ token: str = Depends(verify_token)
768
+ ):
769
+ """
770
+ Get analyses by category
771
+
772
+ Categories: product, places, food, fashion, fitness, education,
773
+ entertainment, pets, other
774
+ - Requires API authentication
775
+ """
776
+ try:
777
+ db = get_db()
778
+ results = db.get_by_category(category, limit=limit)
779
+
780
+ return {
781
+ "success": True,
782
+ "category": category,
783
+ "count": len(results),
784
+ "data": results
785
+ }
786
+
787
+ except Exception as e:
788
+ raise HTTPException(status_code=500, detail=str(e))
789
+
790
+
791
+ @app.get("/search")
792
+ async def search_by_tags(
793
+ tags: str = Query(..., description="Comma-separated tags to search"),
794
+ limit: int = Query(default=20, ge=1, le=100),
795
+ token: str = Depends(verify_token)
796
+ ):
797
+ """
798
+ Search analyses by tags
799
+
800
+ - Provide comma-separated tags
801
+ - Example: travel,sikkim,budget
802
+ - Requires API authentication
803
+ """
804
+ try:
805
+ tag_list = [tag.strip() for tag in tags.split(',')]
806
+
807
+ db = get_db()
808
+ results = db.search_tags(tag_list, limit=limit)
809
+
810
+ return {
811
+ "success": True,
812
+ "tags": tag_list,
813
+ "count": len(results),
814
+ "data": results
815
+ }
816
+
817
+ except Exception as e:
818
+ raise HTTPException(status_code=500, detail=str(e))
819
+
820
+
821
+ @app.get("/ping")
822
+ async def ping():
823
+ """Ultra-lightweight liveness check — no DB, no auth, instant response."""
824
+ return {"status": "ok", "timestamp": datetime.now().isoformat()}
825
+
826
+
827
+ @app.get("/status")
828
+ async def status():
829
+ """
830
+ Server status check - no auth required.
831
+ """
832
+ return {
833
+ "status": "online",
834
+ "version": "1.02",
835
+ "message": "Server is running. Configure app with server URL and Access Token from token.txt."
836
+ }
837
+
838
+
839
+ @app.get("/connect-info")
840
+ async def connect_info(request: Request):
841
+ """
842
+ Returns connection details for QR code scanning.
843
+ No auth required — used by the mobile app to auto-fill settings.
844
+ The URL is built from the request so it matches whatever address the client used.
845
+ """
846
+ # Build the base URL from the incoming request
847
+ scheme = request.headers.get('x-forwarded-proto', request.url.scheme)
848
+ host = request.headers.get('x-forwarded-host', request.headers.get('host', 'localhost:5000'))
849
+ base_url = f"{scheme}://{host}"
850
+
851
+ return {
852
+ "url": base_url,
853
+ "token": API_TOKEN,
854
+ "version": "1.02",
855
+ "name": "SuperBrain"
856
+ }
857
+
858
+
859
+ @app.get("/analysis-status/{shortcode}")
860
+ async def analysis_status(shortcode: str, token: str = Depends(verify_token)):
861
+ """
862
+ Check if a post has been analyzed yet.
863
+ Returns status: 'complete', 'processing', 'queued', or 'not_found'.
864
+ Used by the app to poll for completion after sharing a URL.
865
+ """
866
+ try:
867
+ db = get_db()
868
+
869
+ # Check if fully analyzed
870
+ cached = db.check_cache(shortcode)
871
+ if cached:
872
+ return {
873
+ "status": "complete",
874
+ "shortcode": shortcode,
875
+ "title": cached.get('title', ''),
876
+ "category": cached.get('category', ''),
877
+ "data": {
878
+ 'title': cached.get('title', ''),
879
+ 'summary': cached.get('summary', ''),
880
+ 'tags': cached.get('tags', []),
881
+ 'category': cached.get('category', ''),
882
+ 'content_type': cached.get('content_type', ''),
883
+ 'thumbnail': cached.get('thumbnail', ''),
884
+ }
885
+ }
886
+
887
+ # Check if currently processing
888
+ processing = db.get_processing()
889
+ if shortcode in processing:
890
+ return {"status": "processing", "shortcode": shortcode}
891
+
892
+ # Check if in queue
893
+ queue = db.get_queue()
894
+ for i, item in enumerate(queue):
895
+ if item['shortcode'] == shortcode:
896
+ return {"status": "queued", "shortcode": shortcode, "position": i + 1}
897
+
898
+ return {"status": "not_found", "shortcode": shortcode}
899
+
900
+ except Exception as e:
901
+ logger.error(f"Error checking analysis status for {shortcode}: {e}")
902
+ raise HTTPException(status_code=500, detail=str(e))
903
+
904
+
905
+ class ConnectRequest(BaseModel):
906
+ """Request model for deprecated /connect endpoint."""
907
+ api_key: Optional[str] = None
908
+
909
+
910
+ @app.post("/connect")
911
+ async def connect(request: ConnectRequest):
912
+ """
913
+ Deprecated endpoint kept for backward compatibility.
914
+ Use direct Server URL + API key configuration in app settings.
915
+ """
916
+ raise HTTPException(
917
+ status_code=410,
918
+ detail="Deprecated. Configure Server URL and Access Token directly in app Settings."
919
+ )
920
+
921
+
922
+ # ─────────────────────────────────────────────────────────────────
923
+ # Collections endpoints
924
+ # ──────────────────────────────────────────────────────────────────
925
+
926
+ class CollectionUpsertRequest(BaseModel):
927
+ id: str
928
+ name: str
929
+ icon: str = "📁"
930
+ post_ids: List[str] = []
931
+ created_at: Optional[str] = None
932
+ updated_at: Optional[str] = None
933
+
934
+
935
+ class CollectionPostsRequest(BaseModel):
936
+ post_ids: List[str]
937
+
938
+
939
+ # Allowed fields for import validation
940
+ ALLOWED_POST_FIELDS = {
941
+ 'shortcode', 'url', 'username', 'content_type', 'post_date',
942
+ 'likes', 'thumbnail', 'title', 'summary', 'tags', 'music', 'category',
943
+ 'visual_analysis', 'audio_transcription', 'text_analysis'
944
+ }
945
+
946
+ class ImportData(BaseModel):
947
+ version: Optional[str] = None
948
+ posts: List[dict] = []
949
+ collections: List[dict] = []
950
+
951
+
952
+ @app.get("/collections")
953
+ async def get_collections(token: str = Depends(verify_token)):
954
+ """Return all collections stored on the server."""
955
+ try:
956
+ db = get_db()
957
+ return {"success": True, "data": db.get_collections()}
958
+ except Exception as e:
959
+ raise HTTPException(status_code=500, detail=str(e))
960
+
961
+
962
+ @app.post("/collections")
963
+ async def upsert_collection(req: CollectionUpsertRequest, token: str = Depends(verify_token)):
964
+ """Create or fully replace a collection (upsert by id)."""
965
+ try:
966
+ db = get_db()
967
+ saved = db.upsert_collection(
968
+ req.id, req.name, req.icon, req.post_ids,
969
+ req.created_at, req.updated_at
970
+ )
971
+ if saved:
972
+ return {"success": True, "data": saved}
973
+ raise HTTPException(status_code=500, detail="Failed to save collection")
974
+ except HTTPException:
975
+ raise
976
+ except Exception as e:
977
+ raise HTTPException(status_code=500, detail=str(e))
978
+
979
+
980
+ @app.put("/collections/{collection_id}/posts")
981
+ async def update_collection_posts(collection_id: str, req: CollectionPostsRequest,
982
+ token: str = Depends(verify_token)):
983
+ """Replace the post_ids list for a collection."""
984
+ try:
985
+ db = get_db()
986
+ ok = db.update_collection_posts(collection_id, req.post_ids)
987
+ if ok:
988
+ return {"success": True, "data": db.get_collection(collection_id)}
989
+ raise HTTPException(status_code=404, detail="Collection not found")
990
+ except HTTPException:
991
+ raise
992
+ except Exception as e:
993
+ raise HTTPException(status_code=500, detail=str(e))
994
+
995
+
996
+ @app.delete("/collections/{collection_id}")
997
+ async def delete_collection(collection_id: str, token: str = Depends(verify_token)):
998
+ """Delete a collection by id. The default Watch Later cannot be deleted."""
999
+ if collection_id == "default_watch_later":
1000
+ raise HTTPException(status_code=403, detail="Cannot delete the default Watch Later collection")
1001
+ try:
1002
+ db = get_db()
1003
+ ok = db.delete_collection(collection_id)
1004
+ if ok:
1005
+ return {"success": True, "message": "Collection deleted"}
1006
+ raise HTTPException(status_code=404, detail="Collection not found")
1007
+ except HTTPException:
1008
+ raise
1009
+ except Exception as e:
1010
+ raise HTTPException(status_code=500, detail=str(e))
1011
+
1012
+
1013
+ @app.get("/health")
1014
+ async def health_check(token: str = Depends(verify_token)):
1015
+ """API health check with database connectivity test (requires auth)"""
1016
+ try:
1017
+ db = get_db()
1018
+ stats = db.get_stats()
1019
+
1020
+ return {
1021
+ "status": "healthy",
1022
+ "database": "connected",
1023
+ "documents": stats.get('document_count', 0),
1024
+ "timestamp": datetime.now().isoformat()
1025
+ }
1026
+
1027
+ except Exception as e:
1028
+ return {
1029
+ "status": "unhealthy",
1030
+ "database": "disconnected",
1031
+ "error": str(e),
1032
+ "timestamp": datetime.now().isoformat()
1033
+ }
1034
+
1035
+
1036
+ @app.get("/queue-status")
1037
+ async def queue_status(token: str = Depends(verify_token)):
1038
+ """Get current queue and processing status"""
1039
+ try:
1040
+ db = get_db()
1041
+ processing = db.get_processing()
1042
+ queue = db.get_queue()
1043
+
1044
+ retry_queue = db.get_retry_queue()
1045
+ return {
1046
+ "currently_processing": processing,
1047
+ "processing_count": len(processing),
1048
+ "queue": queue,
1049
+ "queue_count": len(queue),
1050
+ "retry_queue": retry_queue,
1051
+ "retry_count": len(retry_queue),
1052
+ "max_concurrent": max_concurrent,
1053
+ "available_slots": max(0, max_concurrent - len(processing))
1054
+ }
1055
+ except Exception as e:
1056
+ logger.error(f"Error getting queue status: {e}")
1057
+ return {
1058
+ "error": str(e),
1059
+ "timestamp": datetime.now().isoformat()
1060
+ }
1061
+
1062
+
1063
+ @app.delete("/post/{shortcode}")
1064
+ async def delete_post(shortcode: str, token: str = Depends(verify_token)):
1065
+ """Delete a post by shortcode, killing any active analysis subprocess"""
1066
+ try:
1067
+ db = get_db()
1068
+
1069
+ # Kill active analysis subprocess if this post is currently being processed
1070
+ with _active_processes_lock:
1071
+ proc = _active_processes.pop(shortcode, None)
1072
+ if proc and proc.poll() is None:
1073
+ proc.terminate()
1074
+ try:
1075
+ proc.wait(timeout=5)
1076
+ except subprocess.TimeoutExpired:
1077
+ proc.kill()
1078
+ logger.info(f"🛑 Killed active analysis for: {shortcode}")
1079
+
1080
+ # Remove from queue (handles both 'queued' and 'processing' states)
1081
+ db.remove_from_queue(shortcode)
1082
+
1083
+ result = db.delete_post(shortcode)
1084
+
1085
+ if result:
1086
+ logger.info(f"✅ Deleted post: {shortcode}")
1087
+ return {
1088
+ "success": True,
1089
+ "message": "Post deleted successfully",
1090
+ "shortcode": shortcode
1091
+ }
1092
+ else:
1093
+ raise HTTPException(status_code=404, detail="Post not found")
1094
+
1095
+ except HTTPException:
1096
+ raise
1097
+ except Exception as e:
1098
+ logger.error(f"Error deleting post: {e}")
1099
+ raise HTTPException(status_code=500, detail=str(e))
1100
+
1101
+
1102
+ @app.put("/post/{shortcode}")
1103
+ async def update_post(shortcode: str, updates: dict, token: str = Depends(verify_token)):
1104
+ """Update a post's category, title, or summary"""
1105
+ try:
1106
+ db = get_db()
1107
+
1108
+ # Only allow specific fields to be updated
1109
+ allowed_fields = {'category', 'title', 'summary'}
1110
+ filtered_updates = {k: v for k, v in updates.items() if k in allowed_fields}
1111
+
1112
+ if not filtered_updates:
1113
+ raise HTTPException(status_code=400, detail="No valid fields to update")
1114
+
1115
+ result = db.update_post(shortcode, filtered_updates)
1116
+
1117
+ if result:
1118
+ logger.info(f"✅ Updated post: {shortcode} - {list(filtered_updates.keys())}")
1119
+ return {
1120
+ "success": True,
1121
+ "message": "Post updated successfully",
1122
+ "shortcode": shortcode,
1123
+ "updated_fields": list(filtered_updates.keys())
1124
+ }
1125
+ else:
1126
+ raise HTTPException(status_code=404, detail="Post not found")
1127
+
1128
+ except HTTPException:
1129
+ raise
1130
+ except Exception as e:
1131
+ logger.error(f"Error updating post: {e}")
1132
+ raise HTTPException(status_code=500, detail=str(e))
1133
+
1134
+
1135
+ @app.get("/queue/retry")
1136
+ async def get_retry_queue(token: str = Depends(verify_token)):
1137
+ """Show all items currently scheduled for automatic retry"""
1138
+ try:
1139
+ items = db.get_retry_queue()
1140
+ return {
1141
+ "retry_queue": items,
1142
+ "count": len(items)
1143
+ }
1144
+ except Exception as e:
1145
+ logger.error(f"Error fetching retry queue: {e}")
1146
+ raise HTTPException(status_code=500, detail=str(e))
1147
+
1148
+
1149
+ @app.post("/queue/retry/flush")
1150
+ async def flush_retry_queue(token: str = Depends(verify_token)):
1151
+ """Immediately promote all retry-ready items to the active queue"""
1152
+ try:
1153
+ ready = db.get_retry_ready()
1154
+ for item in ready:
1155
+ db.add_to_queue(item['shortcode'], item['url'])
1156
+ logger.info(f"🔄 Flushed retry item: {item['shortcode']} ({item['reason']})")
1157
+ return {
1158
+ "flushed": len(ready),
1159
+ "items": [i['shortcode'] for i in ready]
1160
+ }
1161
+ except Exception as e:
1162
+ logger.error(f"Error flushing retry queue: {e}")
1163
+ raise HTTPException(status_code=500, detail=str(e))
1164
+
1165
+
1166
+ # ─────────────────────────────────────────────────────────────────
1167
+ # Reset endpoints (admin only)
1168
+ # ─────────────────────────────────────────────────────────────────
1169
+
1170
+ @app.post("/reset/api-token")
1171
+ async def reset_api_token(token: str = Depends(verify_token)):
1172
+ """
1173
+ Reset the API token. A new token will be generated.
1174
+ - Requires API authentication
1175
+ - Returns the new token
1176
+ """
1177
+ global API_TOKEN
1178
+ try:
1179
+ # Generate new 8-character alphanumeric token
1180
+ new_token = generate_api_token()
1181
+
1182
+ # Save to file
1183
+ TOKEN_FILE.write_text(new_token)
1184
+
1185
+ # Update in-memory token so it takes effect immediately
1186
+ API_TOKEN = new_token
1187
+
1188
+ logger.warning("API token was reset by a client")
1189
+
1190
+ return {
1191
+ "success": True,
1192
+ "new_token": new_token,
1193
+ "message": "API token has been reset. Update this token in your mobile app settings."
1194
+ }
1195
+ except Exception as e:
1196
+ logger.error(f"Error resetting API token: {e}")
1197
+ raise HTTPException(status_code=500, detail=str(e))
1198
+
1199
+
1200
+ @app.post("/reset/database")
1201
+ async def reset_database(
1202
+ token: str = Depends(verify_token),
1203
+ confirm: str = Body(..., description="Must be 'DELETE_ALL' to confirm")
1204
+ ):
1205
+ """
1206
+ Reset (clear) the database. This will delete all posts and collections.
1207
+ - Requires API authentication
1208
+ - Requires confirm='DELETE_ALL' in body
1209
+ """
1210
+ if confirm != "DELETE_ALL":
1211
+ raise HTTPException(status_code=400, detail="Confirmation required: pass confirm='DELETE_ALL'")
1212
+
1213
+ try:
1214
+ db = get_db()
1215
+
1216
+ # Delete all posts/collections/queue entries in SQLite
1217
+ conn = db._conn
1218
+ cur_posts = conn.execute("DELETE FROM analyses")
1219
+ conn.execute("DELETE FROM collections")
1220
+ conn.execute("DELETE FROM processing_queue")
1221
+
1222
+ # Recreate default Watch Later collection
1223
+ now = datetime.utcnow().isoformat()
1224
+ conn.execute(
1225
+ "INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
1226
+ ('default_watch_later', 'Watch Later', 'time', '[]', now, now)
1227
+ )
1228
+ conn.commit()
1229
+
1230
+ deleted_count = cur_posts.rowcount
1231
+
1232
+ logger.warning(f"🗑️ Database was reset by a client. Deleted {deleted_count} posts.")
1233
+
1234
+ return {
1235
+ "success": True,
1236
+ "deleted_count": deleted_count,
1237
+ "message": f"Database cleared. {deleted_count} posts deleted."
1238
+ }
1239
+ except Exception as e:
1240
+ logger.error(f"Error resetting database: {e}")
1241
+ raise HTTPException(status_code=500, detail=str(e))
1242
+
1243
+
1244
+ # ─────────────────────────────────────────────────────────────────
1245
+ # Import/Export endpoints
1246
+ # ─────────────────────────────────────────────────────────────────
1247
+
1248
+ @app.get("/export")
1249
+ async def export_data(
1250
+ token: str = Depends(verify_token),
1251
+ limit: int = Query(default=10000, ge=1, le=50000, description="Max posts to export"),
1252
+ offset: int = Query(default=0, ge=0, description="Offset for pagination"),
1253
+ format: str = Query(default="json", description="Export format: json or zip")
1254
+ ):
1255
+ """
1256
+ Export data as JSON or ZIP (posts, collections, settings).
1257
+ - Requires API authentication
1258
+ - Supports pagination with limit and offset
1259
+ """
1260
+ try:
1261
+ db = get_db()
1262
+
1263
+ # Get posts with pagination using SQLite
1264
+ posts = db.get_all_posts(limit=limit, offset=offset)
1265
+
1266
+ # Get all collections
1267
+ collections = db.get_all_collections()
1268
+
1269
+ # Get stats
1270
+ stats = db.get_stats()
1271
+
1272
+ export_payload = {
1273
+ "version": "1.0",
1274
+ "exported_at": datetime.now().isoformat(),
1275
+ "posts": posts,
1276
+ "collections": collections,
1277
+ "stats": stats
1278
+ }
1279
+
1280
+ if format.lower() == "zip":
1281
+ # Create a zip file in memory
1282
+ zip_buffer = io.BytesIO()
1283
+ with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file:
1284
+ # Write main export data
1285
+ zip_file.writestr("superbrain_export.json", json.dumps(export_payload, default=str))
1286
+
1287
+ # Reset buffer pointer
1288
+ zip_buffer.seek(0)
1289
+
1290
+ # Return as streaming response
1291
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1292
+ filename = f"superbrain_export_{timestamp}.zip"
1293
+
1294
+ return StreamingResponse(
1295
+ iter([zip_buffer.getvalue()]),
1296
+ media_type="application/zip",
1297
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
1298
+ )
1299
+
1300
+ # Default JSON response
1301
+ return export_payload
1302
+ except Exception as e:
1303
+ logger.error(f"Error exporting data: {e}")
1304
+ raise HTTPException(status_code=500, detail=str(e))
1305
+
1306
+
1307
+ @app.post("/import")
1308
+ async def import_data(
1309
+ data: dict,
1310
+ token: str = Depends(verify_token),
1311
+ mode: str = Query(default="merge", description="merge or replace")
1312
+ ):
1313
+ """
1314
+ Import data from JSON.
1315
+ - Requires API authentication
1316
+ - mode=merge: Add to existing data (skip duplicates by shortcode)
1317
+ - mode=replace: Replace all data (will clear database first)
1318
+ """
1319
+ return await _process_import_data(data, mode)
1320
+
1321
+ @app.post("/import/file")
1322
+ async def import_data_file(
1323
+ file: UploadFile = File(...),
1324
+ token: str = Depends(verify_token),
1325
+ mode: str = Query(default="merge", description="merge or replace")
1326
+ ):
1327
+ """
1328
+ Import data from a ZIP or JSON file.
1329
+ - Requires API authentication
1330
+ - mode=merge or replace
1331
+ """
1332
+ try:
1333
+ content = await file.read()
1334
+
1335
+ # Check if it's a ZIP file
1336
+ if file.filename.endswith('.zip') or file.content_type == 'application/zip' or content.startswith(b'PK'):
1337
+ try:
1338
+ with zipfile.ZipFile(io.BytesIO(content)) as z:
1339
+ # Look for superbrain_export.json or any json file
1340
+ json_files = [name for name in z.namelist() if name.endswith('.json')]
1341
+
1342
+ if not json_files:
1343
+ raise HTTPException(status_code=400, detail="No JSON file found in the ZIP archive")
1344
+
1345
+ # Prefer superbrain_export.json if it exists
1346
+ target_file = "superbrain_export.json" if "superbrain_export.json" in json_files else json_files[0]
1347
+
1348
+ with z.open(target_file) as f:
1349
+ data = json.load(f)
1350
+ except zipfile.BadZipFile:
1351
+ raise HTTPException(status_code=400, detail="Invalid ZIP file")
1352
+ else:
1353
+ # Assume it's a direct JSON file
1354
+ try:
1355
+ data = json.loads(content.decode('utf-8'))
1356
+ except json.JSONDecodeError:
1357
+ raise HTTPException(status_code=400, detail="Invalid JSON file")
1358
+
1359
+ return await _process_import_data(data, mode)
1360
+
1361
+ except HTTPException:
1362
+ raise
1363
+ except Exception as e:
1364
+ logger.error(f"Error processing import file: {e}")
1365
+ raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
1366
+
1367
+ async def _process_import_data(data: dict, mode: str):
1368
+ try:
1369
+ # Validate input data structure
1370
+ validated = ImportData.model_validate(data)
1371
+
1372
+ if mode not in {"merge", "replace"}:
1373
+ raise HTTPException(status_code=400, detail="Invalid import mode. Use 'merge' or 'replace'.")
1374
+
1375
+ db = get_db()
1376
+
1377
+ imported_posts = 0
1378
+ skipped_posts = 0
1379
+
1380
+ # Handle mode=replace
1381
+ if mode == "replace":
1382
+ logger.warning("Import mode=replace: clearing database first")
1383
+ conn = db._conn
1384
+ conn.execute("DELETE FROM analyses")
1385
+ conn.execute("DELETE FROM collections")
1386
+ conn.execute("DELETE FROM processing_queue")
1387
+ # Ensure default Watch Later exists even if import has no collections
1388
+ now = datetime.utcnow().isoformat()
1389
+ conn.execute(
1390
+ "INSERT INTO collections (id, name, icon, post_ids, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
1391
+ ('default_watch_later', 'Watch Later', 'time', '[]', now, now)
1392
+ )
1393
+ conn.commit()
1394
+
1395
+ # Import posts - validate and filter allowed fields
1396
+ posts = validated.posts or []
1397
+ for post in posts:
1398
+ shortcode = post.get("shortcode")
1399
+ if not shortcode:
1400
+ continue
1401
+
1402
+ # Filter to only allowed fields (prevent arbitrary field injection)
1403
+ filtered_post = {k: v for k, v in post.items() if k in ALLOWED_POST_FIELDS}
1404
+
1405
+ # Check if exists (for merge mode)
1406
+ existing = db.check_cache(shortcode)
1407
+ if existing and mode == "merge":
1408
+ skipped_posts += 1
1409
+ continue
1410
+
1411
+ db.save_analysis(
1412
+ shortcode=shortcode,
1413
+ url=filtered_post.get("url", ""),
1414
+ username=filtered_post.get("username", ""),
1415
+ title=filtered_post.get("title", ""),
1416
+ summary=filtered_post.get("summary", ""),
1417
+ tags=filtered_post.get("tags", []),
1418
+ music=filtered_post.get("music", ""),
1419
+ category=filtered_post.get("category", "other"),
1420
+ visual_analysis=filtered_post.get("visual_analysis", ""),
1421
+ audio_transcription=filtered_post.get("audio_transcription", ""),
1422
+ text_analysis=filtered_post.get("text_analysis", ""),
1423
+ likes=filtered_post.get("likes", 0),
1424
+ post_date=filtered_post.get("post_date"),
1425
+ content_type=filtered_post.get("content_type", "instagram"),
1426
+ thumbnail=filtered_post.get("thumbnail", ""),
1427
+ )
1428
+ imported_posts += 1
1429
+
1430
+ # Import collections - validate and filter allowed fields
1431
+ collections = validated.collections or []
1432
+ imported_collections = 0
1433
+ for coll in collections:
1434
+ coll_id = coll.get("id")
1435
+ if not coll_id:
1436
+ continue
1437
+
1438
+ post_ids = coll.get("post_ids")
1439
+ if post_ids is None:
1440
+ post_ids = coll.get("postIds", [])
1441
+
1442
+ db.upsert_collection(
1443
+ collection_id=coll_id,
1444
+ name=coll.get("name", "Untitled"),
1445
+ icon=coll.get("icon", "folder"),
1446
+ post_ids=post_ids if isinstance(post_ids, list) else [],
1447
+ created_at=coll.get("created_at") or coll.get("createdAt"),
1448
+ updated_at=coll.get("updated_at") or coll.get("updatedAt"),
1449
+ )
1450
+ imported_collections += 1
1451
+
1452
+ logger.info(f"📥 Import complete: {imported_posts} posts, {skipped_posts} skipped, {imported_collections} collections")
1453
+
1454
+ return {
1455
+ "success": True,
1456
+ "imported_posts": imported_posts,
1457
+ "skipped_posts": skipped_posts,
1458
+ "imported_collections": imported_collections,
1459
+ "mode": mode
1460
+ }
1461
+ except Exception as e:
1462
+ logger.error(f"Error importing data: {e}")
1463
+ raise HTTPException(status_code=500, detail=str(e))
1464
+
1465
+
1466
+ # Settings endpoints for AI Providers and Instagram configuration
1467
+ # These manage the .api_keys file and dynamically update the ModelRouter
1468
+
1469
+ class ProviderKeyUpdate(BaseModel):
1470
+ provider: str
1471
+ api_key: str
1472
+
1473
+ class InstagramCredentials(BaseModel):
1474
+ username: str
1475
+ password: str
1476
+
1477
+ def get_config_path(filename: str) -> Path:
1478
+ """Get path to config directory files."""
1479
+ return Path(__file__).parent / "config" / filename
1480
+
1481
+ @app.get("/settings/ai-providers")
1482
+ async def get_ai_providers(token: str = Depends(verify_token)):
1483
+ """
1484
+ Get available AI providers and their key status.
1485
+ - Requires API authentication
1486
+ """
1487
+ from core.model_router import get_router
1488
+ router = get_router()
1489
+ providers = router.get_available_providers()
1490
+
1491
+ return {
1492
+ "success": True,
1493
+ "providers": {
1494
+ "groq": {
1495
+ "name": "Groq",
1496
+ "has_key": providers.get("groq", False),
1497
+ "key_hint": "gsk_..." if providers.get("groq") else None
1498
+ },
1499
+ "gemini": {
1500
+ "name": "Google Gemini",
1501
+ "has_key": providers.get("gemini", False),
1502
+ "key_hint": "AIza..." if providers.get("gemini") else None
1503
+ },
1504
+ "openrouter": {
1505
+ "name": "OpenRouter",
1506
+ "has_key": providers.get("openrouter", False),
1507
+ "key_hint": "sk-or-..." if providers.get("openrouter") else None
1508
+ },
1509
+ "ollama": {
1510
+ "name": "Ollama (Local)",
1511
+ "has_key": providers.get("ollama", False),
1512
+ "key_hint": None
1513
+ }
1514
+ }
1515
+ }
1516
+
1517
+ @app.post("/settings/ai-providers")
1518
+ async def set_ai_provider_key(
1519
+ data: ProviderKeyUpdate,
1520
+ token: str = Depends(verify_token)
1521
+ ):
1522
+ """
1523
+ Set an API key for an AI provider.
1524
+ - Requires API authentication
1525
+ - provider: groq, gemini, or openrouter
1526
+ """
1527
+ from core.model_router import get_router
1528
+
1529
+ valid_providers = ["groq", "gemini", "openrouter"]
1530
+ if data.provider.lower() not in valid_providers:
1531
+ raise HTTPException(
1532
+ status_code=400,
1533
+ detail=f"Invalid provider. Must be one of: {', '.join(valid_providers)}"
1534
+ )
1535
+
1536
+ if not data.api_key or len(data.api_key.strip()) < 5:
1537
+ raise HTTPException(status_code=400, detail="Invalid API key")
1538
+
1539
+ router = get_router()
1540
+ success = router.set_api_key(data.provider.lower(), data.api_key.strip())
1541
+
1542
+ if success:
1543
+ logger.info(f"🔑 API key updated for {data.provider}")
1544
+ return {"success": True, "provider": data.provider, "message": "API key updated"}
1545
+ else:
1546
+ raise HTTPException(status_code=500, detail="Failed to save API key")
1547
+
1548
+ @app.delete("/settings/ai-providers/{provider}")
1549
+ async def delete_ai_provider_key(
1550
+ provider: str,
1551
+ token: str = Depends(verify_token)
1552
+ ):
1553
+ """
1554
+ Delete an API key for an AI provider.
1555
+ - Requires API authentication
1556
+ """
1557
+ from core.model_router import get_router
1558
+
1559
+ valid_providers = ["groq", "gemini", "openrouter"]
1560
+ if provider.lower() not in valid_providers:
1561
+ raise HTTPException(
1562
+ status_code=400,
1563
+ detail=f"Invalid provider. Must be one of: {', '.join(valid_providers)}"
1564
+ )
1565
+
1566
+ router = get_router()
1567
+ success = router.delete_api_key(provider.lower())
1568
+
1569
+ if success:
1570
+ logger.info(f"🔑 API key deleted for {provider}")
1571
+ return {"success": True, "provider": provider, "message": "API key deleted"}
1572
+ else:
1573
+ raise HTTPException(status_code=500, detail="Failed to delete API key")
1574
+
1575
+ @app.get("/settings/instagram")
1576
+ async def get_instagram_credentials(token: str = Depends(verify_token)):
1577
+ """
1578
+ Get Instagram credentials (masked).
1579
+ - Requires API authentication
1580
+ """
1581
+ api_keys_file = get_config_path(".api_keys")
1582
+
1583
+ username = None
1584
+ has_password = False
1585
+
1586
+ if api_keys_file.exists():
1587
+ with open(api_keys_file, "r") as f:
1588
+ for line in f:
1589
+ if line.startswith("INSTAGRAM_USERNAME="):
1590
+ username = line.split("=", 1)[1].strip()
1591
+ elif line.startswith("INSTAGRAM_PASSWORD="):
1592
+ has_password = bool(line.split("=", 1)[1].strip())
1593
+
1594
+ return {
1595
+ "success": True,
1596
+ "configured": username is not None and username != "",
1597
+ "username": username if username else None,
1598
+ "has_password": has_password
1599
+ }
1600
+
1601
+ @app.post("/settings/instagram")
1602
+ async def set_instagram_credentials(
1603
+ data: InstagramCredentials,
1604
+ token: str = Depends(verify_token)
1605
+ ):
1606
+ """
1607
+ Set Instagram credentials.
1608
+ - Requires API authentication
1609
+ """
1610
+ if not data.username or not data.password:
1611
+ raise HTTPException(status_code=400, detail="Username and password required")
1612
+
1613
+ api_keys_file = get_config_path(".api_keys")
1614
+
1615
+ # Read existing content
1616
+ lines = []
1617
+ username_found = False
1618
+ password_found = False
1619
+
1620
+ if api_keys_file.exists():
1621
+ with open(api_keys_file, "r") as f:
1622
+ for line in f:
1623
+ if line.startswith("INSTAGRAM_USERNAME="):
1624
+ lines.append(f"INSTAGRAM_USERNAME={data.username}\n")
1625
+ username_found = True
1626
+ elif line.startswith("INSTAGRAM_PASSWORD="):
1627
+ lines.append(f"INSTAGRAM_PASSWORD={data.password}\n")
1628
+ password_found = True
1629
+ else:
1630
+ lines.append(line)
1631
+
1632
+ if not username_found:
1633
+ lines.append(f"INSTAGRAM_USERNAME={data.username}\n")
1634
+ if not password_found:
1635
+ lines.append(f"INSTAGRAM_PASSWORD={data.password}\n")
1636
+
1637
+ with open(api_keys_file, "w") as f:
1638
+ f.writelines(lines)
1639
+
1640
+ logger.info(f"📸 Instagram credentials updated for {data.username}")
1641
+
1642
+ return {
1643
+ "success": True,
1644
+ "username": data.username,
1645
+ "message": "Instagram credentials updated"
1646
+ }
1647
+
1648
+ @app.delete("/settings/instagram")
1649
+ async def delete_instagram_credentials(token: str = Depends(verify_token)):
1650
+ """
1651
+ Delete Instagram credentials.
1652
+ - Requires API authentication
1653
+ """
1654
+ api_keys_file = get_config_path(".api_keys")
1655
+
1656
+ if api_keys_file.exists():
1657
+ lines = []
1658
+ with open(api_keys_file, "r") as f:
1659
+ for line in f:
1660
+ if not line.startswith("INSTAGRAM_USERNAME=") and not line.startswith("INSTAGRAM_PASSWORD="):
1661
+ lines.append(line)
1662
+
1663
+ with open(api_keys_file, "w") as f:
1664
+ f.writelines(lines)
1665
+
1666
+ logger.info("📸 Instagram credentials deleted")
1667
+
1668
+ return {"success": True, "message": "Instagram credentials deleted"}
1669
+
1670
+
1671
+ if __name__ == "__main__":
1672
+ import uvicorn
1673
+ print("🚀 Starting SuperBrain API...")
1674
+ print("📖 API Docs: http://localhost:5000/docs")
1675
+ print("🔍 Interactive: http://localhost:5000/redoc")
1676
+ uvicorn.run("api:app", host="0.0.0.0", port=5000, reload=False)