multivol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
multivol/api.py ADDED
@@ -0,0 +1,1001 @@
1
+ from flask import Flask, request, jsonify, abort
2
+ import argparse
3
+ import os
4
+ import docker
5
+ import threading
6
+ import uuid
7
+ import time
8
+ import sqlite3
9
+ import json
10
+ import glob
11
+ import hashlib
12
+ from werkzeug.utils import secure_filename
13
+ from flask import Flask, request, jsonify, abort, send_from_directory, send_file
14
+ from flask_cors import CORS
15
+ import zipfile
16
+ import io
17
+
18
+ app = Flask(__name__)
19
+ # Increase max upload size to 16GB (or appropriate limit for dumps)
20
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 * 1024
21
+ CORS(app, resources={r"/*": {"origins": "*"}}) # Explicitly allow all origins
22
+
23
+ STORAGE_DIR = os.environ.get("STORAGE_DIR", os.path.join(os.getcwd(), "storage"))
24
+ if not os.path.exists(STORAGE_DIR):
25
+ os.makedirs(STORAGE_DIR)
26
+
27
+ runner_func = None
28
+
29
+ @app.route('/health', methods=['GET'])
30
+ def health_check():
31
+ return jsonify({"status": "ok", "timestamp": time.time()})
32
+
33
+ @app.before_request
34
+ def restrict_to_localhost():
35
+ # Allow bypass via environment variable
36
+ if os.environ.get("DISABLE_LOCALHOST_ONLY"):
37
+ return
38
+
39
+ # Allow 127.0.0.1 and ::1 (IPv6 localhost)
40
+ allowed_ips = ["127.0.0.1", "::1"]
41
+ if request.remote_addr not in allowed_ips:
42
+ abort(403, description="Access forbidden: Only localhost connections allowed, please set DISABLE_LOCALHOST_ONLY=1 to disable this check.")
43
+
44
+ def init_db():
45
+ conn = sqlite3.connect('scans.db')
46
+ c = conn.cursor()
47
+ c.execute('''CREATE TABLE IF NOT EXISTS scans
48
+ (uuid TEXT PRIMARY KEY, status TEXT, mode TEXT, os TEXT, volatility_version TEXT, dump_path TEXT, output_dir TEXT, created_at REAL, error TEXT)''')
49
+
50
+ # New table for results
51
+ c.execute('''CREATE TABLE IF NOT EXISTS scan_results
52
+ (id INTEGER PRIMARY KEY AUTOINCREMENT, scan_id TEXT, module TEXT, content TEXT, created_at REAL,
53
+ FOREIGN KEY(scan_id) REFERENCES scans(uuid))''')
54
+
55
+ # Migration: Check if 'name' column exists
56
+ try:
57
+ c.execute("SELECT name FROM scans LIMIT 1")
58
+ except sqlite3.OperationalError:
59
+ print("[INFO] Migrating DB: Adding 'name' column to scans table")
60
+ c.execute("ALTER TABLE scans ADD COLUMN name TEXT")
61
+
62
+ # Migration: Check if 'image' column exists (New for file download)
63
+ try:
64
+ c.execute("SELECT image FROM scans LIMIT 1")
65
+ except sqlite3.OperationalError:
66
+ print("[INFO] Migrating DB: Adding 'image' column to scans table")
67
+ c.execute("ALTER TABLE scans ADD COLUMN image TEXT")
68
+
69
+ # Table for async dump tasks
70
+ c.execute('''CREATE TABLE IF NOT EXISTS dump_tasks
71
+ (task_id TEXT PRIMARY KEY, scan_id TEXT, status TEXT, output_path TEXT, error TEXT, created_at REAL)''')
72
+
73
+ conn.commit()
74
+ conn.close()
75
+
76
+ init_db()
77
+
78
+ @app.route('/scans/<uuid>', methods=['PUT'])
79
+ def rename_scan(uuid):
80
+ data = request.json
81
+ new_name = data.get('name')
82
+ if not new_name:
83
+ return jsonify({"error": "Name is required"}), 400
84
+
85
+ conn = sqlite3.connect('scans.db')
86
+ c = conn.cursor()
87
+ c.execute("UPDATE scans SET name = ? WHERE uuid = ?", (new_name, uuid))
88
+ conn.commit()
89
+ conn.close()
90
+ return jsonify({"status": "updated"})
91
+
92
+ @app.route('/scans/<uuid>', methods=['DELETE'])
93
+ def delete_scan(uuid):
94
+ conn = sqlite3.connect('scans.db')
95
+ conn.row_factory = sqlite3.Row
96
+ c = conn.cursor()
97
+
98
+ # Get output dir to cleanup
99
+ c.execute("SELECT output_dir FROM scans WHERE uuid = ?", (uuid,))
100
+ row = c.fetchone()
101
+
102
+ if row and row['output_dir'] and os.path.exists(row['output_dir']):
103
+ import shutil
104
+ try:
105
+ shutil.rmtree(row['output_dir'])
106
+ except Exception as e:
107
+ print(f"Error deleting output dir: {e}")
108
+
109
+ c.execute("DELETE FROM scan_results WHERE scan_id = ?", (uuid,))
110
+ c.execute("DELETE FROM scans WHERE uuid = ?", (uuid,))
111
+ conn.commit()
112
+ conn.close()
113
+ return jsonify({"status": "deleted"})
114
+
115
+ @app.route('/scans/<uuid>/download', methods=['GET'])
116
+ def download_scan_zip(uuid):
117
+ conn = sqlite3.connect('scans.db')
118
+ conn.row_factory = sqlite3.Row
119
+ c = conn.cursor()
120
+ c.execute("SELECT output_dir, name FROM scans WHERE uuid = ?", (uuid,))
121
+ row = c.fetchone()
122
+ conn.close()
123
+
124
+ if not row:
125
+ return jsonify({"error": "Scan not found"}), 404
126
+
127
+ output_dir = row['output_dir']
128
+ scan_name = row['name'] or f"scan_{uuid[:8]}"
129
+
130
+ if not output_dir:
131
+ return jsonify({"error": "No output directory for this scan"}), 404
132
+
133
+ # Ensure absolute path resolution
134
+ if not os.path.isabs(output_dir):
135
+ output_dir = os.path.join(os.getcwd(), output_dir)
136
+
137
+ if not os.path.exists(output_dir):
138
+ # Scan might have failed before creating dir, or it was deleted
139
+ return jsonify({"error": "Output directory not found on server"}), 404
140
+
141
+ # Create Zip in memory
142
+ memory_file = io.BytesIO()
143
+ with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
144
+ json_files = glob.glob(os.path.join(output_dir, "*_output.json"))
145
+ for f in json_files:
146
+ # Validate JSON
147
+ parsed = clean_and_parse_json(f)
148
+ # Only include if valid JSON and not an error object we created
149
+ if parsed and not (isinstance(parsed, dict) and "error" in parsed and parsed["error"] == "Invalid JSON output"):
150
+ # Add to zip
151
+ arcname = os.path.basename(f)
152
+ zf.writestr(arcname, json.dumps(parsed, indent=2))
153
+
154
+ memory_file.seek(0)
155
+ return send_file(
156
+ memory_file,
157
+ mimetype='application/zip',
158
+ as_attachment=True,
159
+ download_name=f"{secure_filename(scan_name)}_results.zip"
160
+ )
161
+
162
+
163
+ def clean_and_parse_json(filepath):
164
+ """Helper to parse JSON from Volatility output files, handling errors gracefully."""
165
+ try:
166
+ with open(filepath, 'r') as f:
167
+ content = f.read()
168
+
169
+ start_index = content.find('[')
170
+ if start_index == -1:
171
+ start_index = content.find('{')
172
+
173
+ parsed_data = None
174
+ if start_index != -1:
175
+ try:
176
+ json_content = content[start_index:]
177
+ parsed_data = json.loads(json_content)
178
+ except:
179
+ pass # Try fallback
180
+
181
+ if parsed_data is None:
182
+ lines = content.splitlines()
183
+ if len(lines) > 1:
184
+ try:
185
+ parsed_data = json.loads('\n'.join(lines[1:]))
186
+ except:
187
+ pass
188
+
189
+ if parsed_data is not None:
190
+ return parsed_data
191
+
192
+ # Fallback: Return raw content as error object if not valid JSON
193
+ # This handles Volatility error messages stored in .json files
194
+ return {"error": "Invalid JSON output", "raw_output": content}
195
+
196
+ except Exception as e:
197
+ return {"error": f"Error reading file: {str(e)}"}
198
+
199
+ def ingest_results_to_db(scan_id, output_dir):
200
+ """Reads JSON output files and stores them in the database."""
201
+ print(f"[DEBUG] Ingesting results for {scan_id} from {output_dir}")
202
+ if not os.path.exists(output_dir):
203
+ print(f"[ERROR] Output dir not found: {output_dir}")
204
+ return
205
+
206
+ conn = sqlite3.connect('scans.db')
207
+ c = conn.cursor()
208
+
209
+ json_files = glob.glob(os.path.join(output_dir, "*_output.json"))
210
+ for f in json_files:
211
+ try:
212
+ filename = os.path.basename(f)
213
+ if filename.endswith("_output.json"):
214
+ module_name = filename[:-12]
215
+
216
+ # Check if result already exists to avoid duplicates (idempotency)
217
+ c.execute("SELECT id FROM scan_results WHERE scan_id = ? AND module = ?", (scan_id, module_name))
218
+ if c.fetchone():
219
+ continue
220
+
221
+ # Parse or read content
222
+ parsed_data = clean_and_parse_json(f)
223
+ content_str = json.dumps(parsed_data) if parsed_data else "{}"
224
+ if parsed_data and "error" in parsed_data and parsed_data["error"] == "Invalid JSON output":
225
+ # Store raw output if it was an error
226
+ content_str = json.dumps(parsed_data)
227
+
228
+ c.execute("INSERT INTO scan_results (scan_id, module, content, created_at) VALUES (?, ?, ?, ?)",
229
+ (scan_id, module_name, content_str, time.time()))
230
+ except Exception as e:
231
+ print(f"[ERROR] Failed to ingest {f}: {e}")
232
+
233
+ conn.commit()
234
+ conn.close()
235
+ print(f"[DEBUG] Ingestion complete for {scan_id}")
236
+
237
+ @app.route('/upload', methods=['POST'])
238
+ def upload_file():
239
+ if 'file' not in request.files:
240
+ return jsonify({"error": "No file part"}), 400
241
+ file = request.files['file']
242
+ if file.filename == '':
243
+ return jsonify({"error": "No chosen file"}), 400
244
+
245
+ if file:
246
+ filename = secure_filename(file.filename)
247
+ save_path = os.path.join(STORAGE_DIR, filename)
248
+
249
+ # Check if file already exists to avoid overwrite or just overwrite?
250
+ # For simplicity, we overwrite.
251
+ try:
252
+ print(f"[DEBUG] Saving file to {save_path}")
253
+ file.save(save_path)
254
+
255
+ # Calculate and cache hash immediately
256
+ print(f"[DEBUG] Calculating hash for {save_path}")
257
+ get_file_hash(save_path)
258
+
259
+ print(f"[DEBUG] File saved successfully")
260
+ return jsonify({"status": "success", "path": save_path, "server_path": save_path})
261
+ except Exception as e:
262
+ print(f"[ERROR] Failed to save file: {e}")
263
+ return jsonify({"error": str(e)}), 500
264
+
265
+ @app.route('/scan', methods=['POST'])
266
+ def scan():
267
+ data = request.json
268
+
269
+ # Define default arguments matching CLI defaults and requirements
270
+ default_args = {
271
+ "profiles_path": os.path.join(os.getcwd(), "volatility2_profiles"),
272
+ "symbols_path": os.path.join(os.getcwd(), "volatility3_symbols"),
273
+ "cache_path": os.path.join(os.getcwd(), "volatility3_cache"),
274
+ "plugins_dir": os.path.join(os.getcwd(), "volatility3_plugins"),
275
+ "format": "json",
276
+ "commands": None,
277
+ "light": False,
278
+ "full": False,
279
+ "linux": False,
280
+ "windows": False,
281
+ "mode": None,
282
+ "profile": None,
283
+ "processes": None,
284
+ "host_path": os.environ.get("HOST_PATH") # Added for DooD support via Env
285
+ }
286
+
287
+ args_dict = default_args.copy()
288
+ args_dict.update(data)
289
+
290
+ # Basic Validation
291
+ if "dump" not in data or "image" not in data or "mode" not in data:
292
+ return jsonify({"error": "Missing required fields: dump, image, mode"}), 400
293
+
294
+ # Ensure mutual exclusion for OS flags
295
+ is_linux = bool(data.get("linux"))
296
+ is_windows = bool(data.get("windows"))
297
+
298
+ if is_linux == is_windows:
299
+ return jsonify({"error": "You must specify either 'linux': true or 'windows': true, but not both or neither."}), 400
300
+
301
+ args_obj = argparse.Namespace(**args_dict)
302
+
303
+ scan_id = str(uuid.uuid4())
304
+ # Construct output directory with UUID
305
+ base_name = f"volatility2_{scan_id}" if args_obj.mode == "vol2" else f"volatility3_{scan_id}"
306
+ # Use absolute path for output_dir to avoid CWD ambiguity and ensure persistence
307
+ final_output_dir = os.path.join(os.getcwd(), "outputs", base_name)
308
+ args_obj.output_dir = final_output_dir
309
+
310
+ # Ensure directory exists immediately (even if empty) to prevent "No output dir" errors on early failure
311
+ try:
312
+ os.makedirs(final_output_dir, exist_ok=True)
313
+ except Exception as e:
314
+ print(f"[ERROR] Failed to create output dir {final_output_dir}: {e}")
315
+ return jsonify({"error": f"Failed to create output directory: {e}"}), 500
316
+
317
+ # Determine OS and Volatility Version for DB
318
+ target_os = "windows" if args_obj.windows else ("linux" if args_obj.linux else "unknown")
319
+ vol_version = args_obj.mode
320
+
321
+ # Fix dump path if it's just a filename (assume it's in storage)
322
+ # If it's an absolute path (from previous configs), we trust it?
323
+ # Actually, we should force it to check /storage if it looks like a filename
324
+ if not os.path.isabs(args_obj.dump) and not args_obj.dump.startswith('/'):
325
+ args_obj.dump = os.path.join(STORAGE_DIR, args_obj.dump)
326
+
327
+ if not os.path.exists(args_obj.dump):
328
+ return jsonify({"error": f"Dump file not found at {args_obj.dump}"}), 400
329
+
330
+ args_obj.image = data.get("image") # Ensure image is passed
331
+ case_name = data.get("name") # Optional custom case name
332
+
333
+ conn = sqlite3.connect('scans.db')
334
+ c = conn.cursor()
335
+ c.execute("INSERT INTO scans (uuid, status, mode, os, volatility_version, dump_path, output_dir, created_at, image, name) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
336
+ (scan_id, "pending", "light" if args_obj.light else "full", target_os, vol_version, args_obj.dump, final_output_dir, time.time(), args_obj.image, case_name))
337
+ conn.commit()
338
+ conn.close()
339
+
340
+ def background_scan(s_id, args):
341
+ conn = sqlite3.connect('scans.db')
342
+ c = conn.cursor()
343
+
344
+ try:
345
+ c.execute("UPDATE scans SET status = 'running' WHERE uuid = ?", (s_id,))
346
+ conn.commit()
347
+
348
+ # Execute the runner
349
+ if runner_func:
350
+ runner_func(args)
351
+
352
+ # Ingest results to DB
353
+ ingest_results_to_db(s_id, args.output_dir)
354
+
355
+ c.execute("UPDATE scans SET status = 'completed' WHERE uuid = ?", (s_id,))
356
+ conn.commit()
357
+ except Exception as e:
358
+ print(f"[ERROR] Scan failed: {e}")
359
+ c.execute("UPDATE scans SET status = 'failed', error = ? WHERE uuid = ?", (str(e), s_id))
360
+ conn.commit()
361
+ finally:
362
+ conn.close()
363
+
364
+ thread = threading.Thread(target=background_scan, args=(scan_id, args_obj))
365
+ thread.daemon = True
366
+ thread.start()
367
+
368
+ return jsonify({"scan_id": scan_id, "status": "pending", "output_dir": final_output_dir})
369
+
370
+ @app.route('/status/<scan_id>', methods=['GET'])
371
+ def get_status(scan_id):
372
+ conn = sqlite3.connect('scans.db')
373
+ conn.row_factory = sqlite3.Row
374
+ c = conn.cursor()
375
+ c.execute("SELECT * FROM scans WHERE uuid = ?", (scan_id,))
376
+ row = c.fetchone()
377
+ conn.close()
378
+
379
+ if row:
380
+ return jsonify(dict(row))
381
+ return jsonify({"error": "Scan not found"}), 404
382
+
383
+ @app.route('/list_images', methods=['GET'])
384
+ def list_images():
385
+ try:
386
+ client = docker.from_env()
387
+ images = client.images.list()
388
+ volatility_images = []
389
+ for img in images:
390
+ if img.tags:
391
+ for tag in img.tags:
392
+ if "volatility" in tag:
393
+ volatility_images.append(tag)
394
+ return jsonify({"images": list(set(volatility_images))})
395
+ except Exception as e:
396
+ return jsonify({"error": str(e)}), 500
397
+
398
+ @app.route('/results/<uuid>/modules', methods=['GET'])
399
+ def get_scan_modules(uuid):
400
+ conn = sqlite3.connect('scans.db')
401
+ conn.row_factory = sqlite3.Row
402
+ c = conn.cursor()
403
+
404
+ # Check if we have results in DB
405
+ c.execute("SELECT module, content FROM scan_results WHERE scan_id = ?", (uuid,))
406
+ rows = c.fetchall()
407
+
408
+ if rows:
409
+ modules = []
410
+ for row in rows:
411
+ try:
412
+ # content is stored as JSON string
413
+ # We do a quick check to see if it's our known error structure
414
+ # Parsing huge JSONs just to check error might be slow, but safe
415
+ data = json.loads(row['content'])
416
+ if isinstance(data, dict) and data.get("error") == "Invalid JSON output":
417
+ continue
418
+ modules.append(row['module'])
419
+ except:
420
+ continue
421
+ conn.close()
422
+ return jsonify({"modules": modules})
423
+
424
+ # Fallback to filesystem if DB empty
425
+ c.execute("SELECT output_dir FROM scans WHERE uuid = ?", (uuid,))
426
+ scan = c.fetchone()
427
+ conn.close()
428
+
429
+ if not scan:
430
+ return jsonify({"error": "Scan not found"}), 404
431
+
432
+ output_dir = scan['output_dir']
433
+ if output_dir and os.path.exists(output_dir):
434
+ json_files = glob.glob(os.path.join(output_dir, "*_output.json"))
435
+ modules = []
436
+ for f in json_files:
437
+ filename = os.path.basename(f)
438
+ if filename.endswith("_output.json"):
439
+ # Validate content
440
+ parsed_data = clean_and_parse_json(f)
441
+ if parsed_data and isinstance(parsed_data, dict) and parsed_data.get("error") == "Invalid JSON output":
442
+ continue
443
+
444
+ module_name = filename[:-12]
445
+ modules.append(module_name)
446
+ return jsonify({"modules": modules})
447
+
448
+ return jsonify({"modules": []})
449
+
450
+ @app.route('/results/<uuid>', methods=['GET'])
451
+ def get_scan_results(uuid):
452
+ module_param = request.args.get('module')
453
+ if not module_param:
454
+ return jsonify({"error": "Missing 'module' query parameter"}), 400
455
+
456
+ conn = sqlite3.connect('scans.db')
457
+ conn.row_factory = sqlite3.Row
458
+ c = conn.cursor()
459
+
460
+ # try DB first
461
+ c.execute("SELECT content FROM scan_results WHERE scan_id = ? AND module = ?", (uuid, module_param))
462
+ row = c.fetchone()
463
+ if row:
464
+ conn.close()
465
+ try:
466
+ return jsonify(json.loads(row['content']))
467
+ except:
468
+ return jsonify({"error": "Failed to parse stored content", "raw": row['content']}), 500
469
+
470
+ # Fallback to filesystem
471
+ c.execute("SELECT output_dir FROM scans WHERE uuid = ?", (uuid,))
472
+ scan = c.fetchone()
473
+ conn.close()
474
+
475
+ if not scan:
476
+ return jsonify({"error": "Scan not found"}), 404
477
+
478
+ output_dir = scan['output_dir']
479
+ if not output_dir or not os.path.exists(output_dir):
480
+ return jsonify({"error": "Output directory not found"}), 404
481
+
482
+ if module_param == 'all':
483
+ results = {}
484
+ json_files = glob.glob(os.path.join(output_dir, "*_output.json"))
485
+ for f in json_files:
486
+ filename = os.path.basename(f)
487
+ if filename.endswith("_output.json"):
488
+ module_name = filename[:-12]
489
+ parsed_data = clean_and_parse_json(f)
490
+ if parsed_data is not None:
491
+ results[module_name] = parsed_data
492
+ return jsonify(results)
493
+ else:
494
+ target_file = os.path.join(output_dir, f"{module_param}_output.json")
495
+ if not os.path.exists(target_file):
496
+ return jsonify({"error": f"Module {module_param} output not found"}), 404
497
+
498
+ parsed_data = clean_and_parse_json(target_file)
499
+ if parsed_data is None:
500
+ return jsonify({"error": f"Failed to parse JSON for {module_param}"}), 500
501
+
502
+ return jsonify(parsed_data)
503
+
504
+ @app.route('/scans', methods=['GET'])
505
+ def list_scans():
506
+ conn = sqlite3.connect('scans.db')
507
+ conn.row_factory = sqlite3.Row
508
+ c = conn.cursor()
509
+ c.execute("SELECT * FROM scans ORDER BY created_at DESC")
510
+ rows = c.fetchall()
511
+
512
+ scans_list = []
513
+ for row in rows:
514
+ scan_dict = dict(row)
515
+ scan_uuid = scan_dict['uuid']
516
+
517
+ # Count valid modules from DB (excluding known errors)
518
+ # We explicitly check for our known error string to filter out failed modules
519
+ c.execute("SELECT COUNT(*) FROM scan_results WHERE scan_id = ? AND content NOT LIKE '%\"error\": \"Invalid JSON output\"%'", (scan_uuid,))
520
+ db_count = c.fetchone()[0]
521
+
522
+ scan_dict['modules'] = db_count
523
+
524
+ # Override status to 'failed' if technically completed but 0 valid modules
525
+ if scan_dict['status'] == 'completed' and db_count == 0:
526
+ scan_dict['status'] = 'failed'
527
+ scan_dict['error'] = 'No valid JSON results parsed'
528
+
529
+ # Fallback to filesystem count (only if DB count is 0 and we want to be sure?
530
+ # Actually DB is source of truth for results now. If ingest failed, it's failed.)
531
+
532
+ scan_dict['findings'] = 0
533
+ scans_list.append(scan_dict)
534
+
535
+
536
+
537
+ conn.close()
538
+ return jsonify(scans_list)
539
+
540
+ @app.route('/stats', methods=['GET'])
541
+ def get_stats():
542
+ conn = sqlite3.connect('scans.db')
543
+ c = conn.cursor()
544
+ c.execute("SELECT COUNT(*) FROM scans")
545
+ total_cases = c.fetchone()[0]
546
+
547
+ c.execute("SELECT COUNT(*) FROM scans WHERE status='running'")
548
+ running_cases = c.fetchone()[0]
549
+
550
+ c.execute("SELECT COUNT(DISTINCT dump_path) FROM scans")
551
+ total_evidences = c.fetchone()[0]
552
+
553
+ conn.close()
554
+
555
+ # Count symbols
556
+ symbols_path = os.path.join(os.getcwd(), "volatility3_symbols")
557
+ total_symbols = 0
558
+ if os.path.exists(symbols_path):
559
+ for root, dirs, files in os.walk(symbols_path):
560
+ total_symbols += len(files)
561
+
562
+ return jsonify({
563
+ "total_cases": total_cases,
564
+ "processing": running_cases,
565
+ "total_evidences": total_evidences,
566
+ "total_symbols": total_symbols
567
+ })
568
+
569
+ @app.route('/evidences', methods=['GET'])
570
+ def list_evidences():
571
+ # Helper to calculate size recursively
572
+ def get_dir_size(start_path):
573
+ total_size = 0
574
+ for dirpath, dirnames, filenames in os.walk(start_path):
575
+ for f in filenames:
576
+ fp = os.path.join(dirpath, f)
577
+ total_size += os.path.getsize(fp)
578
+ return total_size
579
+
580
+ try:
581
+ items = os.listdir(STORAGE_DIR)
582
+ print(f"[DEBUG] list_evidences found {len(items)} items in {STORAGE_DIR}")
583
+ print(f"[DEBUG] Items: {items}")
584
+ except FileNotFoundError:
585
+ print(f"[ERROR] Storage dir not found: {STORAGE_DIR}")
586
+ items = []
587
+
588
+ # Pre-load Case Name map from DB
589
+ case_map = {} # filename -> case name
590
+ try:
591
+ conn = sqlite3.connect('scans.db')
592
+ conn.row_factory = sqlite3.Row
593
+ c = conn.cursor()
594
+ c.execute("SELECT name, dump_path FROM scans")
595
+ rows = c.fetchall()
596
+ for r in rows:
597
+ if r['name'] and r['dump_path']:
598
+ fname = os.path.basename(r['dump_path'])
599
+ case_map[fname] = r['name']
600
+ conn.close()
601
+ except:
602
+ pass
603
+
604
+ evidences = []
605
+
606
+ # First pass: Identify extracted folders
607
+ processed_dumps = set()
608
+ extracted_map = {} # Unused but keeping for minimization if referenced elsewhere, though we will ignore it.
609
+
610
+
611
+ for item in items:
612
+ path = os.path.join(STORAGE_DIR, item)
613
+ if os.path.isdir(path) and item.endswith("_extracted"):
614
+ # This is an extracted folder
615
+ dump_base = item[:-10] # remove _extracted
616
+ files = []
617
+ try:
618
+ subitems = os.listdir(path)
619
+ for sub in subitems:
620
+ if sub.endswith('.sha256'):
621
+ continue
622
+
623
+ subpath = os.path.join(path, sub)
624
+ if os.path.isfile(subpath):
625
+ files.append({
626
+ "id": os.path.join(item, sub), # Relative path ID for download
627
+ "name": sub,
628
+ "size": os.path.getsize(subpath),
629
+ "type": "Extracted File"
630
+ })
631
+ except Exception as e:
632
+ print(f"Error reading subdir {path}: {e}")
633
+
634
+ # Resolve Source Dump from DB using Folder Name matches
635
+ source_dump = "Unknown Source"
636
+ # 1. Check if dump_base matches a Case Name (Case Name Extraction)
637
+ # If so, source is the dump file associated with that case
638
+ # 2. Check if dump_base matches a Filename (Legacy Extraction)
639
+
640
+ # Case Name match attempt
641
+ matched_case_name = dump_base
642
+ try:
643
+ conn = sqlite3.connect('scans.db')
644
+ conn.row_factory = sqlite3.Row
645
+ c = conn.cursor()
646
+ c.execute("SELECT dump_path FROM scans WHERE name = ? ORDER BY created_at DESC LIMIT 1", (dump_base,))
647
+ row = c.fetchone()
648
+ if row:
649
+ source_dump = os.path.basename(row['dump_path'])
650
+ else:
651
+ source_dump = dump_base
652
+ conn.close()
653
+ except:
654
+ source_dump = dump_base
655
+
656
+ # If source dump exists in storage, add it to children list as requested
657
+ if source_dump and source_dump != "Unknown Source":
658
+ dump_path = os.path.join(STORAGE_DIR, source_dump)
659
+ if os.path.exists(dump_path):
660
+ processed_dumps.add(source_dump)
661
+ files.insert(0, {
662
+ "id": source_dump, # Relative path (just filename)
663
+ "name": source_dump,
664
+ "size": os.path.getsize(dump_path),
665
+ "type": "Memory Dump",
666
+ "is_source": True
667
+ })
668
+
669
+ evidences.append({
670
+ "id": item,
671
+ "name": matched_case_name,
672
+ "type": "Evidence Group",
673
+ "size": get_dir_size(path),
674
+ "hash": source_dump,
675
+ "source_id": source_dump if os.path.exists(os.path.join(STORAGE_DIR, source_dump)) else None,
676
+ "uploaded": "Extracted group",
677
+ "children": files
678
+ })
679
+
680
+ # Second pass: List main dumps and attach extracted files
681
+ for item in items:
682
+ path = os.path.join(STORAGE_DIR, item)
683
+ if os.path.isfile(path) and not item.endswith('.sha256'):
684
+ # It's a dump file (or other uploaded file)
685
+ # Skip if it's already included in an evidence group
686
+ if item in processed_dumps:
687
+ continue
688
+
689
+ # Resolve Display Name (Case Name)
690
+ display_name = case_map.get(item, item)
691
+
692
+ # WRAP IN GROUP to ensure Folder Style
693
+ child_file = {
694
+ "id": item,
695
+ "name": item,
696
+ "size": os.path.getsize(path),
697
+ "type": "Memory Dump",
698
+ "hash": get_file_hash(path),
699
+ "uploaded": time.strftime('%Y-%m-%d', time.localtime(os.path.getmtime(path))),
700
+ "is_source": True
701
+ }
702
+
703
+ evidences.append({
704
+ "id": f"group_{item}", # Virtual ID for the group
705
+ "name": display_name,
706
+ "size": os.path.getsize(path),
707
+ "type": "Evidence Group",
708
+ "hash": item,
709
+ "source_id": item,
710
+ "uploaded": time.strftime('%Y-%m-%d', time.localtime(os.path.getmtime(path))),
711
+ "children": [child_file]
712
+ })
713
+
714
+ return jsonify(evidences)
715
+
716
+ def calculate_sha256(filepath):
717
+ """Calculates SHA-256 hash of a file."""
718
+ sha256_hash = hashlib.sha256()
719
+ with open(filepath, "rb") as f:
720
+ for byte_block in iter(lambda: f.read(4096), b""):
721
+ sha256_hash.update(byte_block)
722
+ return sha256_hash.hexdigest()
723
+
724
+ def get_file_hash(filepath):
725
+ """Gets cached hash or calculates it."""
726
+ hash_file = filepath + ".sha256"
727
+ if os.path.exists(hash_file):
728
+ try:
729
+ with open(hash_file, 'r') as f:
730
+ return f.read().strip()
731
+ except:
732
+ pass
733
+
734
+ # Calculate and cache
735
+ try:
736
+ file_hash = calculate_sha256(filepath)
737
+ with open(hash_file, 'w') as f:
738
+ f.write(file_hash)
739
+ return file_hash
740
+ except Exception as e:
741
+ print(f"[ERROR] Failed to calc hash for {filepath}: {e}")
742
+ return "Error"
743
+
744
+
745
+
746
+ @app.route('/evidence/<filename>', methods=['DELETE'])
747
+ def delete_evidence(filename):
748
+ filename = secure_filename(filename)
749
+ path = os.path.join(STORAGE_DIR, filename)
750
+ if os.path.exists(path):
751
+ import shutil
752
+ try:
753
+ if os.path.isdir(path):
754
+ shutil.rmtree(path)
755
+ else:
756
+ os.remove(path)
757
+ # Remove sidecar hash if exists
758
+ if os.path.exists(path + ".sha256"):
759
+ os.remove(path + ".sha256")
760
+
761
+ # Also remove extracted directory (if this was a dump file)
762
+ # Checks for standard <filename>_extracted pattern
763
+ extracted_dir = os.path.join(STORAGE_DIR, f"{filename}_extracted")
764
+ if os.path.exists(extracted_dir):
765
+ shutil.rmtree(extracted_dir)
766
+
767
+ return jsonify({"status": "deleted"})
768
+ except Exception as e:
769
+ print(f"[ERROR] Failed to delete {path}: {e}")
770
+ return jsonify({"error": str(e)}), 500
771
+ return jsonify({"error": "File not found"}), 404
772
+
773
+ @app.route('/evidence/<path:filename>/download', methods=['GET'])
774
+ def download_evidence(filename):
775
+ # Allow nested paths for extracted files
776
+ # send_from_directory handles traversal attacks (mostly), but we shouldn't use secure_filename on the whole path
777
+ return send_from_directory(STORAGE_DIR, filename, as_attachment=True)
778
+
779
+
780
+ def cleanup_timeouts():
781
+ """Marks scans running for > 1 hour as failed (timeout)."""
782
+ try:
783
+ conn = sqlite3.connect('scans.db')
784
+ c = conn.cursor()
785
+ one_hour_ago = time.time() - 3600
786
+
787
+ # Find tasks that are 'running' and older than 1 hour
788
+ c.execute("SELECT uuid FROM scans WHERE status='running' AND created_at < ?", (one_hour_ago,))
789
+ stale_scans = c.fetchall()
790
+
791
+ if stale_scans:
792
+ print(f"Cleaning up {len(stale_scans)} stale scans...")
793
+ c.execute("UPDATE scans SET status='failed', error='Timeout (>1h)' WHERE status='running' AND created_at < ?", (one_hour_ago,))
794
+ conn.commit()
795
+
796
+ conn.close()
797
+ except Exception as e:
798
+ print(f"Error cleaning up timeouts: {e}")
799
+
800
+ def background_dump_task(task_id, scan_id, virt_addr, docker_image):
801
+ """Background worker for extracting files from memory dump."""
802
+ conn = sqlite3.connect('scans.db')
803
+ c = conn.cursor()
804
+ c.row_factory = sqlite3.Row
805
+
806
+ try:
807
+ c.execute("UPDATE dump_tasks SET status = 'running' WHERE task_id = ?", (task_id,))
808
+ conn.commit()
809
+
810
+ # 1. Get Scan Details
811
+ c.execute("SELECT * FROM scans WHERE uuid = ?", (scan_id,))
812
+ scan = c.fetchone()
813
+
814
+ if not scan:
815
+ raise Exception("Scan not found")
816
+
817
+ dump_path = scan['dump_path']
818
+ if not os.path.isabs(dump_path) and not dump_path.startswith('/'):
819
+ dump_path = os.path.join(STORAGE_DIR, dump_path)
820
+
821
+ # 2. Setup Output Paths
822
+ scan_output_dir = scan['output_dir']
823
+ # Ensure scan output dir exists
824
+ if not scan_output_dir or not os.path.exists(scan_output_dir):
825
+ scan_output_dir = os.path.join(os.getcwd(), "outputs", f"volatility3_{scan_id}")
826
+ os.makedirs(scan_output_dir, exist_ok=True)
827
+
828
+ target_output_dir = os.path.join(scan_output_dir, "downloads", task_id)
829
+ os.makedirs(target_output_dir, exist_ok=True)
830
+
831
+ # 3. Resolve Paths & Volumes
832
+ host_path = os.environ.get("HOST_PATH")
833
+ def resolve(p):
834
+ if host_path:
835
+ if p.startswith(os.getcwd()):
836
+ rel = os.path.relpath(p, os.getcwd())
837
+ return os.path.join(host_path, rel)
838
+ if p.startswith("/storage"):
839
+ return os.path.join(host_path, "storage", "data", os.path.relpath(p, "/storage"))
840
+ return p
841
+
842
+ abs_dump_path = os.path.abspath(dump_path)
843
+ abs_dump_dir = os.path.dirname(abs_dump_path)
844
+ dump_filename = os.path.basename(abs_dump_path)
845
+
846
+ symbols_path = os.path.join(os.getcwd(), "volatility3_symbols")
847
+ cache_path = os.path.join(os.getcwd(), "volatility3_cache")
848
+ plugins_path = os.path.join(os.getcwd(), "volatility3_plugins")
849
+
850
+ volumes = {
851
+ resolve(abs_dump_dir): {'bind': '/dump_dir', 'mode': 'ro'},
852
+ resolve(target_output_dir): {'bind': '/output', 'mode': 'rw'},
853
+ resolve(symbols_path): {'bind': '/symbols', 'mode': 'rw'},
854
+ resolve(cache_path): {'bind': '/root/.cache/volatility3', 'mode': 'rw'},
855
+ resolve(plugins_path): {'bind': '/plugins', 'mode': 'ro'}
856
+ }
857
+
858
+ # 4. Run Docker Command
859
+ client = docker.from_env()
860
+ cmd = [
861
+ "vol", "-q",
862
+ "-f", f"/dump_dir/{dump_filename}",
863
+ "-o", "/output",
864
+ "windows.dumpfiles.DumpFiles",
865
+ "--virtaddr", str(virt_addr)
866
+ ]
867
+
868
+ print(f"[DEBUG] [Task {task_id}] Running: {cmd}")
869
+
870
+ client.containers.run(
871
+ image=docker_image,
872
+ command=cmd,
873
+ volumes=volumes,
874
+ remove=True,
875
+ stderr=True,
876
+ stdout=True
877
+ ) # This blocks until completion
878
+
879
+ # 5. Identify Result File
880
+ files = os.listdir(target_output_dir)
881
+ target_file = None
882
+
883
+ for f in files:
884
+ if not f.endswith(".json") and f != "." and f != "..":
885
+ target_file = f
886
+ break
887
+
888
+ if not target_file:
889
+ raise Exception("No file extracted (DumpFiles returned no candidate)")
890
+
891
+ # Organize downloads in STORAGE_DIR / <CaseName_or_DumpName>_extracted / <target_file>
892
+ # Use Case Name if available, otherwise dump filename
893
+ case_name = scan['name']
894
+ if case_name:
895
+ # Sanitize case name for folder usage
896
+ safe_case_name = secure_filename(case_name)
897
+ extracted_dir_name = f"{safe_case_name}_extracted"
898
+ else:
899
+ extracted_dir_name = f"{dump_filename}_extracted"
900
+
901
+ storage_extracted_dir = os.path.join(STORAGE_DIR, extracted_dir_name)
902
+ os.makedirs(storage_extracted_dir, exist_ok=True)
903
+
904
+ final_path = os.path.join(storage_extracted_dir, target_file)
905
+
906
+ # Move from temp output to final storage
907
+ import shutil
908
+ shutil.move(os.path.join(target_output_dir, target_file), final_path)
909
+
910
+ # 6. Mark Completed
911
+ c.execute("UPDATE dump_tasks SET status = 'completed', output_path = ? WHERE task_id = ?", (final_path, task_id))
912
+ conn.commit()
913
+ print(f"[DEBUG] [Task {task_id}] Completed. Moved to: {final_path}")
914
+
915
+ except Exception as e:
916
+ print(f"[ERROR] [Task {task_id}] Failed: {e}")
917
+ c.execute("UPDATE dump_tasks SET status = 'failed', error = ? WHERE task_id = ?", (str(e), task_id))
918
+ conn.commit()
919
+ finally:
920
+ conn.close()
921
+
922
+ @app.route('/scan/<scan_id>/dump-file', methods=['POST'])
923
+ def dump_file_from_memory(scan_id):
924
+ data = request.json
925
+ virt_addr = data.get('virt_addr')
926
+ docker_image = data.get('image')
927
+
928
+ if not virt_addr:
929
+ return jsonify({"error": "Missing 'virt_addr'"}), 400
930
+ if not docker_image:
931
+ return jsonify({"error": "Missing 'image'"}), 400
932
+
933
+ conn = sqlite3.connect('scans.db')
934
+ conn.row_factory = sqlite3.Row
935
+ c = conn.cursor()
936
+ c.execute("SELECT * FROM scans WHERE uuid = ?", (scan_id,))
937
+ scan = c.fetchone()
938
+
939
+ if not scan:
940
+ conn.close()
941
+ return jsonify({"error": "Scan not found"}), 404
942
+
943
+ # Create Task
944
+ task_id = str(uuid.uuid4())
945
+ c.execute("INSERT INTO dump_tasks (task_id, scan_id, status, created_at) VALUES (?, ?, ?, ?)",
946
+ (task_id, scan_id, "pending", time.time()))
947
+ conn.commit()
948
+ conn.close()
949
+
950
+ # Start Background Thread
951
+ thread = threading.Thread(target=background_dump_task, args=(task_id, scan_id, virt_addr, docker_image))
952
+ thread.daemon = True
953
+ thread.start()
954
+
955
+ return jsonify({"task_id": task_id, "status": "pending"})
956
+
957
+ @app.route('/dump-task/<task_id>', methods=['GET'])
958
+ def get_dump_status(task_id):
959
+ conn = sqlite3.connect('scans.db')
960
+ conn.row_factory = sqlite3.Row
961
+ c = conn.cursor()
962
+ c.execute("SELECT * FROM dump_tasks WHERE task_id = ?", (task_id,))
963
+ task = c.fetchone()
964
+ conn.close()
965
+
966
+ if not task:
967
+ return jsonify({"error": "Task not found"}), 404
968
+
969
+ return jsonify(dict(task))
970
+
971
+ @app.route('/dump-task/<task_id>/download', methods=['GET'])
972
+ def download_dump_result(task_id):
973
+ conn = sqlite3.connect('scans.db')
974
+ conn.row_factory = sqlite3.Row
975
+ c = conn.cursor()
976
+ c.execute("SELECT * FROM dump_tasks WHERE task_id = ?", (task_id,))
977
+ task = c.fetchone()
978
+ conn.close()
979
+
980
+ if not task:
981
+ return jsonify({"error": "Task not found"}), 404
982
+
983
+ if task['status'] != 'completed':
984
+ return jsonify({"error": "Task not completed"}), 400
985
+
986
+ file_path = task['output_path']
987
+ if not file_path or not os.path.exists(file_path):
988
+ return jsonify({"error": "File not found on server"}), 404
989
+
990
+ return send_file(
991
+ file_path,
992
+ as_attachment=True,
993
+ download_name=os.path.basename(file_path)
994
+ )
995
+
996
+ def run_api(runner_cb, debug_mode=False):
997
+ global runner_func
998
+ runner_func = runner_cb
999
+ cleanup_timeouts() # Clean up stale tasks on startup
1000
+ app.run(host='0.0.0.0', port=5001, debug=debug_mode)
1001
+