ltcai 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/knowledge_graph.py +18 -5
- package/package.json +1 -1
- package/server.py +144 -20
- package/tests/unit/__pycache__/test_security.cpython-314-pytest-9.0.3.pyc +0 -0
- package/tests/unit/test_security.py +125 -0
package/README.md
CHANGED
|
@@ -10,6 +10,24 @@ LTCAI # → http://localhost:4825
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## v0.1.9 변경사항
|
|
14
|
+
|
|
15
|
+
### Security
|
|
16
|
+
- 세션 TTL 7일 → 24시간, sliding refresh (활동시 자동 연장)
|
|
17
|
+
- 파일 업로드 magic-number 검증 (PDF/DOCX/PNG 등 시그니처 확인)
|
|
18
|
+
- Rate limiting: `/chat` 30/min, `/agent` 6/min, `/upload` 12/min (per user, 토큰 버킷)
|
|
19
|
+
|
|
20
|
+
### Reliability
|
|
21
|
+
- PyMuPDF 파일 핸들 누수 수정, ollama serve 좀비 방지 (detach)
|
|
22
|
+
- knowledge_graph metadata 손상 row 안전 통과
|
|
23
|
+
- 백그라운드 asyncio 태스크 예외 로깅 (`_spawn` 헬퍼)
|
|
24
|
+
- silent except → logging.warning (sessions/config 로딩)
|
|
25
|
+
|
|
26
|
+
### Tests
|
|
27
|
+
- `tests/unit/test_security.py` 16개 추가 (bcrypt, MIME, rate limit, harness risk)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
13
31
|
## v0.1.8 변경사항
|
|
14
32
|
|
|
15
33
|
### Added
|
|
@@ -25,6 +43,12 @@ LTCAI # → http://localhost:4825
|
|
|
25
43
|
- `computer_screenshot`: macOS `screencapture` → Windows/Linux `pyautogui` fallback
|
|
26
44
|
- `computer_open_app` / `computer_open_url`: `open -a` / `cmd /c start` / `xdg-open` 플랫폼 자동 분기
|
|
27
45
|
|
|
46
|
+
### 배포 현황
|
|
47
|
+
- npm ✅
|
|
48
|
+
- PyPI ✅
|
|
49
|
+
- VS Code Marketplace ✅
|
|
50
|
+
- Open VSX ✅
|
|
51
|
+
|
|
28
52
|
---
|
|
29
53
|
|
|
30
54
|
## v0.1.7 변경사항
|
package/knowledge_graph.py
CHANGED
|
@@ -8,6 +8,7 @@ the ingestion contract.
|
|
|
8
8
|
|
|
9
9
|
import hashlib
|
|
10
10
|
import json
|
|
11
|
+
import logging
|
|
11
12
|
import re
|
|
12
13
|
import shutil
|
|
13
14
|
import sqlite3
|
|
@@ -28,6 +29,18 @@ def _json(data: Optional[Dict[str, Any]]) -> str:
|
|
|
28
29
|
return json.dumps(data or {}, ensure_ascii=False, sort_keys=True)
|
|
29
30
|
|
|
30
31
|
|
|
32
|
+
def _safe_loads(raw: Optional[str]) -> Dict[str, Any]:
|
|
33
|
+
"""Tolerantly parse a metadata_json column — returns {} on corrupt rows."""
|
|
34
|
+
if not raw:
|
|
35
|
+
return {}
|
|
36
|
+
try:
|
|
37
|
+
value = json.loads(raw)
|
|
38
|
+
return value if isinstance(value, dict) else {}
|
|
39
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
40
|
+
logging.warning("knowledge_graph: corrupt metadata_json (%s) — using empty dict", e)
|
|
41
|
+
return {}
|
|
42
|
+
|
|
43
|
+
|
|
31
44
|
def _slug(text: str, max_len: int = 96) -> str:
|
|
32
45
|
value = re.sub(r"\s+", " ", str(text or "")).strip().lower()
|
|
33
46
|
value = re.sub(r"[^0-9a-zA-Z가-힣._:@/-]+", "-", value).strip("-")
|
|
@@ -573,7 +586,7 @@ class KnowledgeGraphStore:
|
|
|
573
586
|
"type": row["type"],
|
|
574
587
|
"title": row["title"],
|
|
575
588
|
"summary": row["summary"],
|
|
576
|
-
"metadata":
|
|
589
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
577
590
|
}
|
|
578
591
|
for row in conn.execute(
|
|
579
592
|
"SELECT id, type, title, summary, metadata_json FROM nodes WHERE type != 'Chunk' ORDER BY updated_at DESC LIMIT ?",
|
|
@@ -588,7 +601,7 @@ class KnowledgeGraphStore:
|
|
|
588
601
|
"to": row["to_node"],
|
|
589
602
|
"type": row["type"],
|
|
590
603
|
"weight": row["weight"],
|
|
591
|
-
"metadata":
|
|
604
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
592
605
|
}
|
|
593
606
|
for row in conn.execute(
|
|
594
607
|
"SELECT id, from_node, to_node, type, weight, metadata_json FROM edges ORDER BY created_at DESC LIMIT ?",
|
|
@@ -655,7 +668,7 @@ class KnowledgeGraphStore:
|
|
|
655
668
|
"type": row["type"],
|
|
656
669
|
"title": row["title"],
|
|
657
670
|
"summary": row["summary"],
|
|
658
|
-
"metadata":
|
|
671
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
659
672
|
}
|
|
660
673
|
for row in rows
|
|
661
674
|
],
|
|
@@ -694,7 +707,7 @@ class KnowledgeGraphStore:
|
|
|
694
707
|
"type": row["type"],
|
|
695
708
|
"title": row["title"],
|
|
696
709
|
"summary": row["summary"],
|
|
697
|
-
"metadata":
|
|
710
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
698
711
|
})
|
|
699
712
|
if len(matches) >= limit:
|
|
700
713
|
break
|
|
@@ -729,7 +742,7 @@ class KnowledgeGraphStore:
|
|
|
729
742
|
"type": row["type"],
|
|
730
743
|
"title": row["title"],
|
|
731
744
|
"summary": row["summary"],
|
|
732
|
-
"metadata":
|
|
745
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
733
746
|
}
|
|
734
747
|
for row in conn.execute(
|
|
735
748
|
f"SELECT id, type, title, summary, metadata_json FROM nodes WHERE id IN ({placeholders})",
|
package/package.json
CHANGED
package/server.py
CHANGED
|
@@ -217,17 +217,25 @@ def verify_password(password: str, hashed: str) -> bool:
|
|
|
217
217
|
return False
|
|
218
218
|
|
|
219
219
|
def verify_and_migrate_password(email: str, plain: str, stored: str, users: Dict) -> bool:
|
|
220
|
-
"""평문 비밀번호를 투명하게 해시로 마이그레이션."""
|
|
220
|
+
"""평문 비밀번호를 투명하게 해시로 마이그레이션. 마이그레이션 발생 시 audit log 남김."""
|
|
221
221
|
if ":" in stored and len(stored) > 64:
|
|
222
222
|
return verify_password(plain, stored)
|
|
223
223
|
if plain == stored:
|
|
224
224
|
users[email]["password"] = hash_password(plain)
|
|
225
225
|
save_users(users)
|
|
226
|
+
try:
|
|
227
|
+
append_audit_event("password_migrated_from_plaintext", user_email=email)
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logging.warning("audit log failed on password migration: %s", e)
|
|
230
|
+
logging.info("Migrated plaintext password to bcrypt hash for %s", email)
|
|
226
231
|
return True
|
|
227
232
|
return False
|
|
228
233
|
|
|
229
234
|
# ── Session store (file-backed, survives restarts) ────────────────────────────
|
|
230
|
-
|
|
235
|
+
# 24-hour TTL with sliding-window refresh — every authenticated request bumps
|
|
236
|
+
# created_at, so an active user stays logged in while idle sessions auto-expire.
|
|
237
|
+
_SESSION_TTL = 60 * 60 * 24 # 24 hours
|
|
238
|
+
_SESSION_REFRESH_THRESHOLD = 60 * 15 # only persist if >15 min since last bump (write amplification guard)
|
|
231
239
|
_sessions_lock = threading.Lock()
|
|
232
240
|
|
|
233
241
|
def _sessions_file() -> Path:
|
|
@@ -239,15 +247,15 @@ def _load_sessions() -> Dict[str, tuple]:
|
|
|
239
247
|
if f.exists():
|
|
240
248
|
raw = json.loads(f.read_text())
|
|
241
249
|
return {k: tuple(v) for k, v in raw.items()}
|
|
242
|
-
except Exception:
|
|
243
|
-
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logging.warning("_load_sessions failed (starting empty): %s", e)
|
|
244
252
|
return {}
|
|
245
253
|
|
|
246
254
|
def _persist_sessions(sessions: Dict[str, tuple]) -> None:
|
|
247
255
|
try:
|
|
248
256
|
_sessions_file().write_text(json.dumps({k: list(v) for k, v in sessions.items()}, ensure_ascii=False))
|
|
249
|
-
except Exception:
|
|
250
|
-
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logging.warning("_persist_sessions failed: %s", e)
|
|
251
259
|
|
|
252
260
|
_sessions: Dict[str, tuple] = _load_sessions()
|
|
253
261
|
|
|
@@ -259,15 +267,21 @@ def create_session(email: str) -> str:
|
|
|
259
267
|
return token
|
|
260
268
|
|
|
261
269
|
def get_session_email(token: str) -> Optional[str]:
|
|
270
|
+
"""Return email for a valid session, sliding the expiry forward on activity."""
|
|
271
|
+
now = time.time()
|
|
262
272
|
with _sessions_lock:
|
|
263
273
|
entry = _sessions.get(token)
|
|
264
274
|
if entry is None:
|
|
265
275
|
return None
|
|
266
276
|
email, created_at = entry
|
|
267
|
-
if
|
|
277
|
+
if now - created_at > _SESSION_TTL:
|
|
268
278
|
_sessions.pop(token, None)
|
|
269
279
|
_persist_sessions(_sessions)
|
|
270
280
|
return None
|
|
281
|
+
# Sliding refresh: only update if the timestamp drifted enough to be worth a disk write
|
|
282
|
+
if now - created_at > _SESSION_REFRESH_THRESHOLD:
|
|
283
|
+
_sessions[token] = (email, now)
|
|
284
|
+
_persist_sessions(_sessions)
|
|
271
285
|
return email
|
|
272
286
|
|
|
273
287
|
def invalidate_session(token: str) -> None:
|
|
@@ -628,7 +642,8 @@ def load_vpc_config() -> Dict:
|
|
|
628
642
|
with open(VPC_FILE, "r", encoding="utf-8") as f:
|
|
629
643
|
stored = json.load(f)
|
|
630
644
|
return {**DEFAULT_VPC_CONFIG, **stored}
|
|
631
|
-
except Exception:
|
|
645
|
+
except Exception as e:
|
|
646
|
+
logging.warning("load_vpc_config failed (using defaults): %s", e)
|
|
632
647
|
return DEFAULT_VPC_CONFIG.copy()
|
|
633
648
|
|
|
634
649
|
def save_vpc_config(config: Dict):
|
|
@@ -645,7 +660,8 @@ def load_mcp_installs() -> Dict:
|
|
|
645
660
|
if "installed" not in data:
|
|
646
661
|
data["installed"] = {}
|
|
647
662
|
return data
|
|
648
|
-
except Exception:
|
|
663
|
+
except Exception as e:
|
|
664
|
+
logging.warning("load_mcp_installs failed: %s", e)
|
|
649
665
|
return {"installed": {}, "updated_at": None}
|
|
650
666
|
|
|
651
667
|
def save_mcp_installs(data: Dict):
|
|
@@ -1048,6 +1064,71 @@ def require_user(request: Request) -> str:
|
|
|
1048
1064
|
raise HTTPException(status_code=401, detail="인증이 필요합니다.")
|
|
1049
1065
|
return email or ""
|
|
1050
1066
|
|
|
1067
|
+
|
|
1068
|
+
# ── Rate limiting ─────────────────────────────────────────────────────────────
|
|
1069
|
+
# Per-user token bucket. Disabled when LATTICEAI_RATE_LIMIT=0 (default: enabled).
|
|
1070
|
+
_RATE_LIMIT_ENABLED = os.getenv("LATTICEAI_RATE_LIMIT", "1") != "0"
|
|
1071
|
+
_rate_buckets: Dict[str, Dict[str, float]] = {}
|
|
1072
|
+
_rate_lock = threading.Lock()
|
|
1073
|
+
|
|
1074
|
+
# (capacity, refill_per_second) per endpoint family
|
|
1075
|
+
_RATE_LIMITS = {
|
|
1076
|
+
"chat": (30, 0.5), # 30 burst, 30/min sustained
|
|
1077
|
+
"agent": (10, 0.1), # 10 burst, 6/min sustained (agent is expensive)
|
|
1078
|
+
"upload": (20, 0.2), # 20 burst, 12/min sustained
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def enforce_rate_limit(email: str, bucket_key: str) -> None:
|
|
1083
|
+
"""Raise HTTP 429 if user exceeds the bucket. No-op when disabled or unauth'd."""
|
|
1084
|
+
if not _RATE_LIMIT_ENABLED or not email:
|
|
1085
|
+
return
|
|
1086
|
+
cap, refill = _RATE_LIMITS.get(bucket_key, (60, 1.0))
|
|
1087
|
+
key = f"{email}:{bucket_key}"
|
|
1088
|
+
now = time.time()
|
|
1089
|
+
with _rate_lock:
|
|
1090
|
+
bucket = _rate_buckets.get(key)
|
|
1091
|
+
if bucket is None:
|
|
1092
|
+
_rate_buckets[key] = {"tokens": cap - 1, "ts": now}
|
|
1093
|
+
return
|
|
1094
|
+
elapsed = now - bucket["ts"]
|
|
1095
|
+
bucket["tokens"] = min(cap, bucket["tokens"] + elapsed * refill)
|
|
1096
|
+
bucket["ts"] = now
|
|
1097
|
+
if bucket["tokens"] < 1:
|
|
1098
|
+
retry_after = max(1, int((1 - bucket["tokens"]) / refill))
|
|
1099
|
+
raise HTTPException(
|
|
1100
|
+
status_code=429,
|
|
1101
|
+
detail=f"Rate limit exceeded for {bucket_key}. Retry after {retry_after}s.",
|
|
1102
|
+
headers={"Retry-After": str(retry_after)},
|
|
1103
|
+
)
|
|
1104
|
+
bucket["tokens"] -= 1
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
# ── File magic-number validation ──────────────────────────────────────────────
|
|
1108
|
+
# Map of extension → list of byte-prefix signatures (any-match). Files without
|
|
1109
|
+
# distinctive magic (.txt, .md, .csv) skip the check.
|
|
1110
|
+
_FILE_MAGIC: Dict[str, List[bytes]] = {
|
|
1111
|
+
".pdf": [b"%PDF-"],
|
|
1112
|
+
".docx": [b"PK\x03\x04"],
|
|
1113
|
+
".xlsx": [b"PK\x03\x04"],
|
|
1114
|
+
".pptx": [b"PK\x03\x04"],
|
|
1115
|
+
".zip": [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"],
|
|
1116
|
+
".png": [b"\x89PNG\r\n\x1a\n"],
|
|
1117
|
+
".jpg": [b"\xff\xd8\xff"],
|
|
1118
|
+
".jpeg": [b"\xff\xd8\xff"],
|
|
1119
|
+
".gif": [b"GIF87a", b"GIF89a"],
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
|
|
1123
|
+
def _bytes_match_extension(data: bytes, ext: str) -> bool:
|
|
1124
|
+
"""Return True if the file bytes match the claimed extension (or extension has no magic)."""
|
|
1125
|
+
ext = (ext or "").lower()
|
|
1126
|
+
signatures = _FILE_MAGIC.get(ext)
|
|
1127
|
+
if not signatures:
|
|
1128
|
+
return True # text-like formats — no reliable magic
|
|
1129
|
+
head = data[:16]
|
|
1130
|
+
return any(head.startswith(sig) for sig in signatures)
|
|
1131
|
+
|
|
1051
1132
|
def require_admin(request: Request) -> tuple[str, Dict]:
|
|
1052
1133
|
users = load_users()
|
|
1053
1134
|
token = _extract_bearer_token(request)
|
|
@@ -1414,18 +1495,31 @@ async def unload_idle_models_loop() -> None:
|
|
|
1414
1495
|
except Exception as e:
|
|
1415
1496
|
logging.warning("Idle model unload failed: %s", e)
|
|
1416
1497
|
|
|
1498
|
+
def _spawn(coro, *, name: str):
|
|
1499
|
+
"""Fire-and-forget asyncio task that logs exceptions instead of swallowing them."""
|
|
1500
|
+
task = asyncio.create_task(coro, name=name)
|
|
1501
|
+
def _on_done(t: asyncio.Task) -> None:
|
|
1502
|
+
if t.cancelled():
|
|
1503
|
+
return
|
|
1504
|
+
exc = t.exception()
|
|
1505
|
+
if exc is not None:
|
|
1506
|
+
logging.warning("background task '%s' failed: %s", name, exc)
|
|
1507
|
+
task.add_done_callback(_on_done)
|
|
1508
|
+
return task
|
|
1509
|
+
|
|
1510
|
+
|
|
1417
1511
|
@asynccontextmanager
|
|
1418
1512
|
async def lifespan(app: FastAPI):
|
|
1419
1513
|
try:
|
|
1420
1514
|
print(f"🧭 Lattice AI mode: {APP_MODE}")
|
|
1421
1515
|
if ENABLE_TELEGRAM:
|
|
1422
1516
|
from telegram_bot import run_bot
|
|
1423
|
-
|
|
1517
|
+
_spawn(run_bot(), name="telegram_bot")
|
|
1424
1518
|
print("🚀 Telegram Bot Bridge activated!")
|
|
1425
1519
|
else:
|
|
1426
1520
|
print("⏭️ Telegram Bot Bridge disabled for this mode.")
|
|
1427
|
-
|
|
1428
|
-
|
|
1521
|
+
_spawn(unload_idle_models_loop(), name="unload_idle_models")
|
|
1522
|
+
_spawn(autoload_default_model(), name="autoload_default_model")
|
|
1429
1523
|
except Exception as e:
|
|
1430
1524
|
print(f"⚠️ Startup sequence failed: {e}")
|
|
1431
1525
|
try:
|
|
@@ -1491,7 +1585,7 @@ async def login(req: UserLogin):
|
|
|
1491
1585
|
"is_admin": role == "admin",
|
|
1492
1586
|
"token": token,
|
|
1493
1587
|
})
|
|
1494
|
-
response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=
|
|
1588
|
+
response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=_SESSION_TTL)
|
|
1495
1589
|
return response
|
|
1496
1590
|
|
|
1497
1591
|
@app.get("/auth/sso/config")
|
|
@@ -2349,11 +2443,28 @@ def install_engine(engine: str) -> Dict:
|
|
|
2349
2443
|
"installed": engine_installed(engine),
|
|
2350
2444
|
}
|
|
2351
2445
|
if engine == "ollama" and completed.returncode == 0 and shutil.which("ollama"):
|
|
2446
|
+
# Skip if already running to avoid orphan daemons.
|
|
2447
|
+
already_up = False
|
|
2352
2448
|
try:
|
|
2353
|
-
subprocess.
|
|
2354
|
-
|
|
2449
|
+
probe = subprocess.run(["ollama", "list"], capture_output=True, timeout=2, check=False)
|
|
2450
|
+
already_up = probe.returncode == 0
|
|
2355
2451
|
except Exception:
|
|
2356
|
-
|
|
2452
|
+
already_up = False
|
|
2453
|
+
if already_up:
|
|
2454
|
+
result["daemon_started"] = "already_running"
|
|
2455
|
+
else:
|
|
2456
|
+
try:
|
|
2457
|
+
# Detach so the daemon survives this request but doesn't become our zombie.
|
|
2458
|
+
subprocess.Popen(
|
|
2459
|
+
["ollama", "serve"],
|
|
2460
|
+
stdout=subprocess.DEVNULL,
|
|
2461
|
+
stderr=subprocess.DEVNULL,
|
|
2462
|
+
start_new_session=True,
|
|
2463
|
+
)
|
|
2464
|
+
result["daemon_started"] = True
|
|
2465
|
+
except Exception as e:
|
|
2466
|
+
logging.warning("ollama serve spawn failed: %s", e)
|
|
2467
|
+
result["daemon_started"] = False
|
|
2357
2468
|
return result
|
|
2358
2469
|
|
|
2359
2470
|
CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
|
|
@@ -2623,6 +2734,7 @@ async def unload_all_models(request: Request):
|
|
|
2623
2734
|
@app.post("/chat")
|
|
2624
2735
|
async def chat(req: ChatRequest, request: Request):
|
|
2625
2736
|
current_user = require_user(request)
|
|
2737
|
+
enforce_rate_limit(current_user, "chat")
|
|
2626
2738
|
img_len = len(req.image_data) if req.image_data else 0
|
|
2627
2739
|
print(
|
|
2628
2740
|
f"🧪 /chat request: stream={req.stream} image_data_len={img_len} "
|
|
@@ -3142,6 +3254,7 @@ def _extract_agent_action(raw: str) -> Dict:
|
|
|
3142
3254
|
async def agent(req: AgentRequest, request: Request):
|
|
3143
3255
|
"""Natural-language local agent loop for Telegram and future clients."""
|
|
3144
3256
|
current_user = require_user(request)
|
|
3257
|
+
enforce_rate_limit(current_user, "agent")
|
|
3145
3258
|
if not router.current_model_id:
|
|
3146
3259
|
raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
|
|
3147
3260
|
|
|
@@ -3378,21 +3491,28 @@ async def tools_pdf_pages(path: str, request: Request):
|
|
|
3378
3491
|
target = Path(path).expanduser().resolve()
|
|
3379
3492
|
if not target.exists() or not target.is_file():
|
|
3380
3493
|
raise HTTPException(status_code=404, detail="File not found")
|
|
3494
|
+
import fitz # PyMuPDF
|
|
3495
|
+
doc = None
|
|
3381
3496
|
try:
|
|
3382
|
-
import fitz # PyMuPDF
|
|
3383
3497
|
doc = fitz.open(str(target))
|
|
3498
|
+
total = len(doc)
|
|
3384
3499
|
pages = []
|
|
3385
3500
|
for i, page in enumerate(doc):
|
|
3386
3501
|
if i >= 20: # 최대 20페이지
|
|
3387
3502
|
break
|
|
3388
|
-
mat = fitz.Matrix(1.5, 1.5)
|
|
3503
|
+
mat = fitz.Matrix(1.5, 1.5)
|
|
3389
3504
|
pix = page.get_pixmap(matrix=mat)
|
|
3390
3505
|
b64 = base64.b64encode(pix.tobytes("png")).decode()
|
|
3391
3506
|
pages.append({"page": i + 1, "b64": b64})
|
|
3392
|
-
|
|
3393
|
-
return {"total": len(doc), "pages": pages}
|
|
3507
|
+
return {"total": total, "pages": pages}
|
|
3394
3508
|
except Exception as e:
|
|
3395
3509
|
raise HTTPException(status_code=500, detail=f"PDF 렌더링 실패: {e}")
|
|
3510
|
+
finally:
|
|
3511
|
+
if doc is not None:
|
|
3512
|
+
try:
|
|
3513
|
+
doc.close()
|
|
3514
|
+
except Exception as e:
|
|
3515
|
+
logging.warning("fitz doc close failed: %s", e)
|
|
3396
3516
|
|
|
3397
3517
|
|
|
3398
3518
|
@app.get("/tools/download")
|
|
@@ -3416,6 +3536,7 @@ async def tools_download(path: str, request: Request):
|
|
|
3416
3536
|
@app.post("/upload/document")
|
|
3417
3537
|
async def upload_document(request: Request, file: UploadFile = File(...)):
|
|
3418
3538
|
current_user = require_user(request)
|
|
3539
|
+
enforce_rate_limit(current_user, "upload")
|
|
3419
3540
|
"""Upload a document and extract text (PDF, DOCX, XLSX, PPTX, TXT, MD, CSV)."""
|
|
3420
3541
|
suffix = Path(file.filename or "upload").suffix.lower()
|
|
3421
3542
|
allowed = {".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".csv"}
|
|
@@ -3424,6 +3545,9 @@ async def upload_document(request: Request, file: UploadFile = File(...)):
|
|
|
3424
3545
|
contents = await file.read()
|
|
3425
3546
|
if len(contents) > 10 * 1024 * 1024:
|
|
3426
3547
|
raise HTTPException(status_code=400, detail="파일이 너무 큽니다. 최대 10MB.")
|
|
3548
|
+
# MIME sniff — verify the bytes actually match the claimed extension (cheap header check)
|
|
3549
|
+
if not _bytes_match_extension(contents, suffix):
|
|
3550
|
+
raise HTTPException(status_code=400, detail=f"파일 내용이 확장자({suffix})와 일치하지 않습니다.")
|
|
3427
3551
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
3428
3552
|
tmp.write(contents)
|
|
3429
3553
|
tmp_path = tmp.name
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Unit tests for security-sensitive helpers in server.py."""
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
import pytest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
8
|
+
|
|
9
|
+
from server import (
|
|
10
|
+
_bytes_match_extension,
|
|
11
|
+
_rate_buckets,
|
|
12
|
+
enforce_rate_limit,
|
|
13
|
+
hash_password,
|
|
14
|
+
verify_password,
|
|
15
|
+
_agent_risk,
|
|
16
|
+
_LOCAL_WRITE_BLOCKED_PREFIXES,
|
|
17
|
+
)
|
|
18
|
+
from fastapi import HTTPException
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Password hashing
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def test_password_hash_roundtrip():
|
|
26
|
+
h = hash_password("hunter2")
|
|
27
|
+
assert verify_password("hunter2", h)
|
|
28
|
+
assert not verify_password("wrong", h)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_password_hash_not_plaintext():
|
|
32
|
+
h = hash_password("hunter2")
|
|
33
|
+
assert "hunter2" not in h
|
|
34
|
+
assert ":" in h # salt:hash format
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_password_hash_unique_per_call():
|
|
38
|
+
"""Same input must yield different hashes (salted)."""
|
|
39
|
+
h1 = hash_password("same")
|
|
40
|
+
h2 = hash_password("same")
|
|
41
|
+
assert h1 != h2
|
|
42
|
+
assert verify_password("same", h1)
|
|
43
|
+
assert verify_password("same", h2)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# MIME / magic-number sniffing
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
def test_bytes_match_pdf():
|
|
51
|
+
assert _bytes_match_extension(b"%PDF-1.7\n...", ".pdf")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_bytes_match_pdf_rejects_zip_bytes():
|
|
55
|
+
assert not _bytes_match_extension(b"PK\x03\x04...", ".pdf")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_bytes_match_docx_is_zip():
|
|
59
|
+
assert _bytes_match_extension(b"PK\x03\x04...", ".docx")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_bytes_match_png():
|
|
63
|
+
assert _bytes_match_extension(b"\x89PNG\r\n\x1a\nrest", ".png")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_bytes_match_txt_skips_check():
|
|
67
|
+
"""Text-like formats have no magic — always accepted."""
|
|
68
|
+
assert _bytes_match_extension(b"anything goes", ".txt")
|
|
69
|
+
assert _bytes_match_extension(b"anything goes", ".md")
|
|
70
|
+
assert _bytes_match_extension(b"anything goes", ".csv")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Rate limiting
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def test_rate_limit_allows_within_capacity():
|
|
78
|
+
_rate_buckets.clear()
|
|
79
|
+
for _ in range(10):
|
|
80
|
+
enforce_rate_limit("test_user@example.com", "agent") # capacity 10
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_rate_limit_blocks_over_capacity():
|
|
84
|
+
_rate_buckets.clear()
|
|
85
|
+
for _ in range(10):
|
|
86
|
+
enforce_rate_limit("burst_user@example.com", "agent")
|
|
87
|
+
with pytest.raises(HTTPException) as exc:
|
|
88
|
+
enforce_rate_limit("burst_user@example.com", "agent")
|
|
89
|
+
assert exc.value.status_code == 429
|
|
90
|
+
assert "Retry-After" in exc.value.headers
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_rate_limit_skips_unauth():
|
|
94
|
+
"""Empty email = no rate-limit (anon health-check style)."""
|
|
95
|
+
_rate_buckets.clear()
|
|
96
|
+
for _ in range(200):
|
|
97
|
+
enforce_rate_limit("", "agent") # never raises
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# Harness risk classification
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def test_agent_risk_read_only_is_low():
|
|
105
|
+
assert _agent_risk("local_read", {"path": "/tmp/x"}) == "low"
|
|
106
|
+
assert _agent_risk("list_dir", {}) == "low"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_agent_risk_write_is_medium():
|
|
110
|
+
assert _agent_risk("write_file", {"path": "out.txt"}) == "medium"
|
|
111
|
+
assert _agent_risk("local_write", {"path": "/tmp/safe.txt"}) == "medium"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_agent_risk_run_command_is_high():
|
|
115
|
+
assert _agent_risk("run_command", {"command": "ls"}) == "high"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_agent_risk_system_path_write_upgraded_to_high():
|
|
119
|
+
for prefix in _LOCAL_WRITE_BLOCKED_PREFIXES:
|
|
120
|
+
risk = _agent_risk("local_write", {"path": prefix + "evil.txt"})
|
|
121
|
+
assert risk == "high", f"prefix {prefix} should upgrade local_write to high"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_agent_risk_unknown_action_defaults_medium():
|
|
125
|
+
assert _agent_risk("nonexistent_tool_xyz", {}) == "medium"
|