mobox-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Volume Analyzer - 持久化分析器
4
+
5
+ 智能识别需要持久化的目录(数据库、上传文件等)
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Dict, List
13
+
14
+
15
+ # 持久化检测规则
16
+ PERSISTENCE_PATTERNS = [
17
+ {
18
+ 'pattern': ['*.db', '*.sqlite', '*.sqlite3'],
19
+ 'type': 'file',
20
+ 'reason': 'SQLite database file',
21
+ 'priority': 'high',
22
+ 'suggest_parent_dir': True, # 建议持久化文件所在目录
23
+ },
24
+ {
25
+ 'pattern': ['schema.prisma'],
26
+ 'type': 'file',
27
+ 'reason': 'Prisma schema configuration',
28
+ 'priority': 'high',
29
+ 'suggest_dir': './prisma',
30
+ },
31
+ {
32
+ 'pattern': ['data', 'database'],
33
+ 'type': 'dir',
34
+ 'reason': 'Data directory',
35
+ 'priority': 'high',
36
+ },
37
+ {
38
+ 'pattern': ['storage'],
39
+ 'type': 'dir',
40
+ 'reason': 'Storage directory',
41
+ 'priority': 'high',
42
+ },
43
+ {
44
+ 'pattern': ['uploads', 'public/uploads', 'static/uploads'],
45
+ 'type': 'dir',
46
+ 'reason': 'User upload directory',
47
+ 'priority': 'medium',
48
+ },
49
+ {
50
+ 'pattern': ['db.json', 'database.json'],
51
+ 'type': 'file',
52
+ 'reason': 'JSON database file (lowdb)',
53
+ 'priority': 'medium',
54
+ 'suggest_parent_dir': True,
55
+ },
56
+ {
57
+ 'pattern': ['logs'],
58
+ 'type': 'dir',
59
+ 'reason': 'Log directory',
60
+ 'priority': 'low',
61
+ },
62
+
63
+ # ============= Python 特定规则 =============
64
+ {
65
+ 'pattern': ['instance'],
66
+ 'type': 'dir',
67
+ 'reason': 'Flask instance folder (SQLite database)',
68
+ 'priority': 'high',
69
+ },
70
+ {
71
+ 'pattern': ['data.json', 'db.json', 'database.json'],
72
+ 'type': 'file',
73
+ 'reason': 'JSON database file',
74
+ 'priority': 'high',
75
+ 'suggest_parent_dir': True,
76
+ },
77
+ {
78
+ 'pattern': ['*.csv', '*.xlsx'],
79
+ 'type': 'file',
80
+ 'reason': 'Data file (Streamlit/Data Science)',
81
+ 'priority': 'medium',
82
+ 'suggest_parent_dir': True,
83
+ },
84
+ ]
85
+
86
+ # 排除目录(不扫描)
87
+ EXCLUDE_DIRS = [
88
+ 'node_modules',
89
+ '.git',
90
+ '.next',
91
+ '.nuxt',
92
+ 'dist',
93
+ 'build',
94
+ '.turbo',
95
+ '.cache',
96
+ 'coverage',
97
+ '.vscode',
98
+ '.idea',
99
+
100
+ # Python 排除目录
101
+ '__pycache__',
102
+ '.venv',
103
+ 'venv',
104
+ 'env',
105
+ 'ENV',
106
+ '.pytest_cache',
107
+ '.mypy_cache',
108
+ '.ruff_cache',
109
+ '.tox',
110
+ 'htmlcov',
111
+ ]
112
+
113
+
114
+ class VolumeAnalyzer:
115
+ def __init__(self, project_path: str):
116
+ self.project_path = Path(project_path).resolve()
117
+ self.suggestions = []
118
+ self.suggested_paths = set() # 去重
119
+
120
+ def analyze(self) -> Dict:
121
+ """执行持久化分析"""
122
+ if not self.project_path.exists():
123
+ return {
124
+ 'success': False,
125
+ 'error': f'Project path does not exist: {self.project_path}'
126
+ }
127
+
128
+ # 扫描项目目录
129
+ self._scan_directory(self.project_path)
130
+
131
+ # 检查 package.json 中的特殊依赖
132
+ self._check_dependencies()
133
+
134
+ # ========== 第三层防御:结果验证和过滤 ==========
135
+ # 过滤掉任何可能遗漏的危险配置
136
+ safe_suggestions = []
137
+ filtered_count = 0
138
+
139
+ for suggestion in self.suggestions:
140
+ source = suggestion['source']
141
+ if self._validate_volume_source(source):
142
+ safe_suggestions.append(suggestion)
143
+ else:
144
+ filtered_count += 1
145
+ print(
146
+ f"⚠️ Warning: Filtered dangerous volume configuration:\n"
147
+ f" Source: {source}\n"
148
+ f" Reason: {suggestion['reason']}\n"
149
+ f" This configuration would cause container startup failure.",
150
+ file=sys.stderr
151
+ )
152
+
153
+ # 使用过滤后的建议
154
+ self.suggestions = safe_suggestions
155
+
156
+ if filtered_count > 0:
157
+ print(
158
+ f"\n🛡️ Security: Filtered {filtered_count} dangerous volume configuration(s).\n",
159
+ file=sys.stderr
160
+ )
161
+
162
+ # 计算总大小
163
+ total_size = sum(s['size_bytes'] for s in self.suggestions)
164
+
165
+ return {
166
+ 'success': True,
167
+ 'suggested': self.suggestions,
168
+ 'excluded': EXCLUDE_DIRS,
169
+ 'total_size': self._format_size(total_size),
170
+ 'total_size_bytes': total_size,
171
+ }
172
+
173
+ def _scan_directory(self, directory: Path, depth: int = 0, max_depth: int = 5):
174
+ """递归扫描目录"""
175
+ if depth > max_depth:
176
+ return
177
+
178
+ try:
179
+ for item in directory.iterdir():
180
+ # 跳过排除目录
181
+ if item.name in EXCLUDE_DIRS:
182
+ continue
183
+
184
+ # 跳过隐藏文件/目录(以 . 开头)
185
+ if item.name.startswith('.') and item.name not in ['.env']:
186
+ continue
187
+
188
+ # 检查文件
189
+ if item.is_file():
190
+ self._check_file(item)
191
+
192
+ # 检查目录
193
+ elif item.is_dir():
194
+ self._check_directory(item)
195
+ # 递归扫描子目录
196
+ self._scan_directory(item, depth + 1, max_depth)
197
+
198
+ except PermissionError:
199
+ pass # 跳过没有权限的目录
200
+
201
+ def _check_file(self, file_path: Path):
202
+ """检查文件是否匹配持久化规则"""
203
+ for rule in PERSISTENCE_PATTERNS:
204
+ if rule['type'] != 'file':
205
+ continue
206
+
207
+ # 检查文件名模式
208
+ for pattern in rule['pattern']:
209
+ if self._match_pattern(file_path.name, pattern):
210
+ # 如果建议持久化父目录
211
+ if rule.get('suggest_parent_dir', False):
212
+ target_dir = file_path.parent
213
+ else:
214
+ target_dir = file_path
215
+
216
+ self._add_suggestion(
217
+ target_dir,
218
+ rule['reason'],
219
+ rule['priority']
220
+ )
221
+ break
222
+
223
+ def _check_directory(self, dir_path: Path):
224
+ """检查目录是否匹配持久化规则"""
225
+ relative_path = dir_path.relative_to(self.project_path)
226
+
227
+ for rule in PERSISTENCE_PATTERNS:
228
+ if rule['type'] != 'dir':
229
+ continue
230
+
231
+ # 检查目录名模式
232
+ for pattern in rule['pattern']:
233
+ # 支持路径匹配(如 "public/uploads")
234
+ if '/' in pattern:
235
+ if str(relative_path) == pattern or str(relative_path).endswith(pattern):
236
+ self._add_suggestion(
237
+ dir_path,
238
+ rule['reason'],
239
+ rule['priority']
240
+ )
241
+ break
242
+ else:
243
+ if dir_path.name == pattern:
244
+ self._add_suggestion(
245
+ dir_path,
246
+ rule['reason'],
247
+ rule['priority']
248
+ )
249
+ break
250
+
251
+ # 检查特定目录(如 ./prisma)
252
+ if 'suggest_dir' in rule:
253
+ suggest_path = self.project_path / rule['suggest_dir'].lstrip('./')
254
+ if dir_path == suggest_path:
255
+ self._add_suggestion(
256
+ dir_path,
257
+ rule['reason'],
258
+ rule['priority']
259
+ )
260
+
261
+ def _check_dependencies(self):
262
+ """检查 package.json 中的依赖"""
263
+ package_json_path = self.project_path / 'package.json'
264
+ if not package_json_path.exists():
265
+ return
266
+
267
+ try:
268
+ with open(package_json_path, 'r', encoding='utf-8') as f:
269
+ package_json = json.load(f)
270
+
271
+ dependencies = {
272
+ **package_json.get('dependencies', {}),
273
+ **package_json.get('devDependencies', {})
274
+ }
275
+
276
+ # 检查 Prisma
277
+ if 'prisma' in dependencies or '@prisma/client' in dependencies:
278
+ prisma_dir = self.project_path / 'prisma'
279
+ if prisma_dir.exists():
280
+ self._add_suggestion(
281
+ prisma_dir,
282
+ 'Prisma database directory',
283
+ 'high'
284
+ )
285
+
286
+ # 检查 lowdb
287
+ if 'lowdb' in dependencies:
288
+ # 查找 .json 数据库文件
289
+ for json_file in self.project_path.rglob('*.json'):
290
+ if 'db' in json_file.name.lower() or 'database' in json_file.name.lower():
291
+ self._add_suggestion(
292
+ json_file.parent,
293
+ 'lowdb JSON database',
294
+ 'medium'
295
+ )
296
+ break
297
+
298
+ except Exception:
299
+ pass # 忽略读取错误
300
+
301
+ # ============= Python 依赖检测 =============
302
+ req_file = self.project_path / 'requirements.txt'
303
+ if req_file.exists():
304
+ self._check_python_dependencies(req_file)
305
+
306
+ def _check_python_dependencies(self, req_file: Path):
307
+ """检查 Python 依赖的持久化需求"""
308
+ try:
309
+ content = req_file.read_text().lower()
310
+
311
+ # 检查 Flask (instance 文件夹)
312
+ if 'flask' in content:
313
+ instance_dir = self.project_path / 'instance'
314
+ if instance_dir.exists():
315
+ self._add_suggestion(
316
+ instance_dir,
317
+ 'Flask instance folder (SQLite database)',
318
+ 'high'
319
+ )
320
+
321
+ # 检查 SQLAlchemy (查找 .db 文件)
322
+ if 'sqlalchemy' in content or 'flask-sqlalchemy' in content:
323
+ for db_file in self.project_path.rglob('*.db'):
324
+ if db_file.parent.name not in EXCLUDE_DIRS:
325
+ self._add_suggestion(
326
+ db_file.parent,
327
+ 'SQLAlchemy database directory',
328
+ 'high'
329
+ )
330
+ break
331
+
332
+ # 检查 Streamlit (data 文件夹)
333
+ if 'streamlit' in content:
334
+ data_dir = self.project_path / 'data'
335
+ if data_dir.exists():
336
+ self._add_suggestion(
337
+ data_dir,
338
+ 'Streamlit data directory',
339
+ 'medium'
340
+ )
341
+
342
+ except Exception:
343
+ pass # 忽略读取错误
344
+
345
+ def _validate_volume_source(self, source_path: str) -> bool:
346
+ """验证卷源路径的安全性
347
+
348
+ Args:
349
+ source_path: 相对路径字符串 (如 "./data", "./app.db")
350
+
351
+ Returns:
352
+ True if valid, False if dangerous
353
+ """
354
+ # 标准化路径
355
+ normalized = str(Path(source_path).as_posix())
356
+
357
+ # 禁止的模式
358
+ forbidden_patterns = {
359
+ ".", # 当前目录
360
+ "./.", # 当前目录(另一种写法)
361
+ "..", # 父目录
362
+ "/", # 根目录
363
+ }
364
+
365
+ # 检查精确匹配
366
+ if normalized in forbidden_patterns:
367
+ return False
368
+
369
+ # 检查父目录路径
370
+ if normalized.startswith("../"):
371
+ return False
372
+
373
+ return True
374
+
375
+ def _add_suggestion(self, path: Path, reason: str, priority: str):
376
+ """添加持久化建议(去重)"""
377
+ # ========== 第一层防御:禁止添加项目根目录 ==========
378
+ if path == self.project_path:
379
+ print(
380
+ f"⚠️ Warning: Skipping volume suggestion for project root directory.\n"
381
+ f" Reason: {reason}\n"
382
+ f" Please specify a subdirectory or file instead.",
383
+ file=sys.stderr
384
+ )
385
+ return
386
+
387
+ # 计算相对路径
388
+ relative_path = path.relative_to(self.project_path)
389
+ source_path = f"./{relative_path}"
390
+
391
+ # ========== 第二层防御:验证路径安全性 ==========
392
+ if not self._validate_volume_source(source_path):
393
+ print(
394
+ f"⚠️ Warning: Invalid volume source path: '{source_path}'\n"
395
+ f" Reason: {reason}\n"
396
+ f" This path would mount dangerous directories (project root, parent, etc.).\n"
397
+ f" Skipping this suggestion.",
398
+ file=sys.stderr
399
+ )
400
+ return
401
+
402
+ # 去重
403
+ if source_path in self.suggested_paths:
404
+ return
405
+
406
+ self.suggested_paths.add(source_path)
407
+
408
+ # 计算大小
409
+ size_bytes = self._get_size(path)
410
+
411
+ self.suggestions.append({
412
+ 'source': source_path,
413
+ 'reason': reason,
414
+ 'priority': priority,
415
+ 'size': self._format_size(size_bytes),
416
+ 'size_bytes': size_bytes,
417
+ })
418
+
419
+ def _get_size(self, path: Path) -> int:
420
+ """计算文件或目录大小"""
421
+ if path.is_file():
422
+ return path.stat().st_size
423
+ elif path.is_dir():
424
+ total_size = 0
425
+ try:
426
+ for item in path.rglob('*'):
427
+ if item.is_file():
428
+ try:
429
+ total_size += item.stat().st_size
430
+ except (PermissionError, FileNotFoundError):
431
+ pass
432
+ except (PermissionError, FileNotFoundError):
433
+ pass
434
+ return total_size
435
+ return 0
436
+
437
+ def _format_size(self, size_bytes: int) -> str:
438
+ """格式化文件大小"""
439
+ for unit in ['B', 'KB', 'MB', 'GB']:
440
+ if size_bytes < 1024:
441
+ return f"{size_bytes:.1f} {unit}"
442
+ size_bytes /= 1024
443
+ return f"{size_bytes:.1f} TB"
444
+
445
+ def _match_pattern(self, name: str, pattern: str) -> bool:
446
+ """匹配文件名模式(支持通配符)"""
447
+ if '*' in pattern:
448
+ # 简单的通配符匹配
449
+ parts = pattern.split('*')
450
+ if len(parts) == 2:
451
+ prefix, suffix = parts
452
+ return name.startswith(prefix) and name.endswith(suffix)
453
+ return name == pattern
454
+
455
+
456
+ def main():
457
+ if len(sys.argv) < 2:
458
+ print(json.dumps({
459
+ 'success': False,
460
+ 'error': 'Usage: analyze_volumes.py <project_path>'
461
+ }, indent=2))
462
+ sys.exit(1)
463
+
464
+ project_path = sys.argv[1]
465
+
466
+ # 执行分析
467
+ analyzer = VolumeAnalyzer(project_path)
468
+ result = analyzer.analyze()
469
+
470
+ # 按优先级排序
471
+ if result.get('success'):
472
+ priority_order = {'high': 0, 'medium': 1, 'low': 2}
473
+ result['suggested'].sort(key=lambda x: priority_order.get(x['priority'], 3))
474
+
475
+ # 输出 JSON 结果
476
+ print(json.dumps(result, indent=2))
477
+
478
+ if not result.get('success', False):
479
+ sys.exit(1)
480
+
481
+
482
+ if __name__ == '__main__':
483
+ main()