ltcai 0.1.31 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ """Audit logging, sensitivity analysis, and admin reporting."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ import threading
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Callable, Dict, List, Optional
11
+
12
+ _history_lock = threading.Lock()
13
+
14
+ SENSITIVE_PATTERNS = [
15
+ {"key": "rrn", "label": "주민등록번호", "severity": "high", "pattern": r"\b\d{6}[- ]?[1-4]\d{6}\b"},
16
+ {"key": "card", "label": "카드번호", "severity": "high", "pattern": r"\b(?:\d[ -]?){13,19}\b"},
17
+ {"key": "account", "label": "계좌번호", "severity": "medium", "pattern": r"(?:계좌|account|bank).{0,12}\d[\d -]{8,24}"},
18
+ {"key": "password", "label": "비밀번호/인증정보", "severity": "high", "pattern": r"(?:password|passwd|비밀번호|암호|token|api[_ -]?key|secret)\s*[:=]\s*[^\s,;]{4,}"},
19
+ {"key": "email", "label": "이메일", "severity": "low", "pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"},
20
+ {"key": "phone", "label": "전화번호", "severity": "medium", "pattern": r"\b(?:01[016789]|02|0[3-6][1-5])[- ]?\d{3,4}[- ]?\d{4}\b"},
21
+ {"key": "address", "label": "주소", "severity": "medium", "pattern": r"(?:[가-힣]+(?:시|도)\s*)?[가-힣]+(?:시|군|구)\s+[가-힣0-9\s-]+(?:로|길)\s*\d*"},
22
+ {"key": "health", "label": "건강/의료정보", "severity": "medium", "pattern": r"(?:진단|병명|처방|복용|수술|장애|임신|혈액형|알레르기|medical|diagnosis)"},
23
+ ]
24
+ SEVERITY_SCORE = {"low": 1, "medium": 2, "high": 3}
25
+ AUDIT_DELETE_EVENTS = {"conversation_delete", "history_delete", "user_delete"}
26
+
27
+
28
+ def get_audit_log(audit_file: Path) -> List[Dict]:
29
+ if not os.path.exists(audit_file):
30
+ return []
31
+ try:
32
+ with open(audit_file, "r", encoding="utf-8") as f:
33
+ data = json.load(f)
34
+ return data if isinstance(data, list) else []
35
+ except Exception as e:
36
+ logging.warning("get_audit_log failed: %s", e)
37
+ return []
38
+
39
+
40
+ def append_audit_event(audit_file: Path, event_type: str, **payload) -> None:
41
+ try:
42
+ event = {
43
+ "event_type": event_type,
44
+ "timestamp": datetime.now().isoformat(),
45
+ **payload,
46
+ }
47
+ with _history_lock:
48
+ events = get_audit_log(audit_file)
49
+ events.append(event)
50
+ if len(events) > 5000:
51
+ events = events[-5000:]
52
+ tmp_path = str(audit_file) + ".tmp"
53
+ with open(tmp_path, "w", encoding="utf-8") as f:
54
+ json.dump(events, f, ensure_ascii=False, indent=2)
55
+ os.replace(tmp_path, audit_file)
56
+ except Exception as e:
57
+ logging.warning("append_audit_event failed: %s", e)
58
+
59
+
60
+ def mask_sensitive_text(text: str, matches: List[Dict]) -> str:
61
+ masked = text
62
+ for item in sorted(matches, key=lambda m: m["start"], reverse=True):
63
+ value = masked[item["start"]:item["end"]]
64
+ if len(value) <= 4:
65
+ replacement = "*" * len(value)
66
+ else:
67
+ replacement = value[:2] + "*" * min(len(value) - 4, 12) + value[-2:]
68
+ masked = masked[:item["start"]] + replacement + masked[item["end"]:]
69
+ return masked
70
+
71
+
72
+ def classify_sensitive_message(item: Dict, index: int) -> Dict:
73
+ content = str(item.get("content", ""))
74
+ found = []
75
+ seen: set = set()
76
+ for rule in SENSITIVE_PATTERNS:
77
+ for match in re.finditer(rule["pattern"], content, flags=re.IGNORECASE):
78
+ key = (rule["key"], match.start(), match.end())
79
+ if key in seen:
80
+ continue
81
+ seen.add(key)
82
+ found.append({
83
+ "type": rule["key"],
84
+ "label": rule["label"],
85
+ "severity": rule["severity"],
86
+ "start": match.start(),
87
+ "end": match.end(),
88
+ })
89
+ severity = "none"
90
+ if found:
91
+ severity = max(found, key=lambda m: SEVERITY_SCORE[m["severity"]])["severity"]
92
+ preview_text = content[:240]
93
+ preview_matches = [m for m in found if m["start"] < len(preview_text)]
94
+ return {
95
+ "index": index,
96
+ "role": item.get("role", ""),
97
+ "user_email": item.get("user_email"),
98
+ "user_nickname": item.get("user_nickname") or item.get("user_email") or "Unknown",
99
+ "timestamp": item.get("timestamp"),
100
+ "sensitivity": severity,
101
+ "labels": sorted({m["label"] for m in found}),
102
+ "risk_fields": found,
103
+ "compliance_fields": [] if found else ["민감정보 미검출"],
104
+ "preview": mask_sensitive_text(preview_text, preview_matches),
105
+ }
106
+
107
+
108
+ def build_sensitivity_report(history: List[Dict]) -> Dict:
109
+ items = [classify_sensitive_message(item, i) for i, item in enumerate(history)]
110
+ risky = [x for x in items if x["risk_fields"]]
111
+ compliant = [x for x in items if not x["risk_fields"]]
112
+ field_counts: Dict[str, int] = {}
113
+ user_counts: Dict[str, int] = {}
114
+ severity_counts = {"high": 0, "medium": 0, "low": 0, "none": len(compliant)}
115
+ for item in risky:
116
+ severity_counts[item["sensitivity"]] += 1
117
+ user_key = item.get("user_email") or item.get("user_nickname") or "Unknown"
118
+ user_counts[user_key] = user_counts.get(user_key, 0) + 1
119
+ for field in item["risk_fields"]:
120
+ field_counts[field["label"]] = field_counts.get(field["label"], 0) + 1
121
+ return {
122
+ "summary": {
123
+ "total_messages": len(items),
124
+ "risky_messages": len(risky),
125
+ "compliant_messages": len(compliant),
126
+ "risk_rate": round((len(risky) / len(items)) * 100, 1) if items else 0,
127
+ "severity_counts": severity_counts,
128
+ "field_counts": field_counts,
129
+ "user_counts": user_counts,
130
+ },
131
+ "risk_fields": risky[-30:],
132
+ "compliance_fields": compliant[-30:],
133
+ }
134
+
135
+
136
+ def build_admin_audit_report(
137
+ audit_file: Path,
138
+ users: Dict,
139
+ *,
140
+ get_user_role: Callable[[str, Optional[Dict]], str],
141
+ graph_stats: Optional[Dict] = None,
142
+ ) -> Dict:
143
+ events = get_audit_log(audit_file)
144
+
145
+ def _user_bucket(email: Optional[str], nickname: Optional[str] = None) -> Dict:
146
+ user = users.get(email or "", {})
147
+ return {
148
+ "email": email or "Unknown",
149
+ "nickname": nickname or user.get("nickname") or user.get("name") or email or "Unknown",
150
+ "role": get_user_role(email, users) if email else "unknown",
151
+ "disabled": bool(user.get("disabled")) if user else False,
152
+ "user_messages": 0, "assistant_messages": 0, "document_uploads": 0,
153
+ "clear_events": 0, "delete_events": 0, "sensitive_events": 0,
154
+ "high_sensitive_events": 0, "total_content_chars": 0, "last_activity_at": None,
155
+ }
156
+
157
+ per_user: Dict[str, Dict] = {}
158
+
159
+ def ensure(email: Optional[str], nickname: Optional[str] = None) -> Dict:
160
+ key = email or nickname or "Unknown"
161
+ if key not in per_user:
162
+ per_user[key] = _user_bucket(email, nickname)
163
+ elif nickname and per_user[key].get("nickname") in {"Unknown", email, None}:
164
+ per_user[key]["nickname"] = nickname
165
+ return per_user[key]
166
+
167
+ for email, user in users.items():
168
+ ensure(email, user.get("nickname") or user.get("name"))
169
+
170
+ summary: Dict[str, Any] = {
171
+ "total_events": len(events), "chat_events": 0, "user_messages": 0,
172
+ "assistant_messages": 0, "document_uploads": 0, "clear_events": 0,
173
+ "delete_events": 0, "sensitive_events": 0, "high_sensitive_events": 0,
174
+ }
175
+ sensitive_events: List[Dict] = []
176
+ deletion_events: List[Dict] = []
177
+
178
+ for event in events:
179
+ event_type = event.get("event_type")
180
+ email = event.get("user_email")
181
+ u = ensure(email, event.get("user_nickname"))
182
+ ts = event.get("timestamp")
183
+ if ts and (not u["last_activity_at"] or ts > u["last_activity_at"]):
184
+ u["last_activity_at"] = ts
185
+ u["total_content_chars"] += int(event.get("content_chars") or event.get("extracted_chars") or 0)
186
+ sensitivity = event.get("sensitivity") or "none"
187
+ labels = event.get("sensitive_labels") or []
188
+ is_sensitive = sensitivity != "none" or bool(labels)
189
+
190
+ if event_type == "chat_message":
191
+ summary["chat_events"] += 1
192
+ if event.get("role") == "user":
193
+ summary["user_messages"] += 1
194
+ u["user_messages"] += 1
195
+ elif event.get("role") == "assistant":
196
+ summary["assistant_messages"] += 1
197
+ u["assistant_messages"] += 1
198
+ elif event_type == "document_upload":
199
+ summary["document_uploads"] += 1
200
+ u["document_uploads"] += 1
201
+ elif event_type == "clear_command":
202
+ summary["clear_events"] += 1
203
+ u["clear_events"] += 1
204
+ elif event_type in AUDIT_DELETE_EVENTS:
205
+ summary["delete_events"] += 1
206
+ u["delete_events"] += 1
207
+ deletion_events.append(_public_audit_event(event))
208
+
209
+ if is_sensitive:
210
+ summary["sensitive_events"] += 1
211
+ u["sensitive_events"] += 1
212
+ if sensitivity == "high":
213
+ summary["high_sensitive_events"] += 1
214
+ u["high_sensitive_events"] += 1
215
+ sensitive_events.append(_public_audit_event(event))
216
+
217
+ allowed_keys = {
218
+ "event_type", "timestamp", "role", "user_email", "user_nickname", "source",
219
+ "conversation_id", "command", "scope", "target_email", "filename", "mime_type",
220
+ "ext", "bytes", "extracted_chars", "graph_node", "keep_last", "removed", "kept",
221
+ "started_at", "sensitivity", "sensitive_labels", "content_preview", "content_chars",
222
+ }
223
+ recent = [_public_audit_event(e) for e in events[-50:]]
224
+
225
+ result: Dict[str, Any] = {
226
+ "summary": summary,
227
+ "per_user": sorted(per_user.values(), key=lambda u: u.get("last_activity_at") or "", reverse=True),
228
+ "recent_events": list(reversed(recent)),
229
+ "sensitive_events": sensitive_events[-30:],
230
+ "deletion_events": deletion_events[-30:],
231
+ }
232
+ if graph_stats:
233
+ result["summary"]["graph_nodes"] = graph_stats.get("total_nodes", 0)
234
+ result["summary"]["graph_edges"] = graph_stats.get("total_edges", 0)
235
+ return result
236
+
237
+
238
+ def _public_audit_event(event: Dict) -> Dict:
239
+ allowed = {
240
+ "event_type", "timestamp", "role", "user_email", "user_nickname", "source",
241
+ "conversation_id", "command", "scope", "target_email", "filename", "mime_type",
242
+ "ext", "bytes", "extracted_chars", "graph_node", "keep_last", "removed", "kept",
243
+ "started_at", "sensitivity", "sensitive_labels", "content_preview", "content_chars",
244
+ }
245
+ return {k: event.get(k) for k in allowed if k in event}
@@ -0,0 +1,131 @@
1
+ """Password hashing, rate limiting, IP detection, file-magic validation."""
2
+
3
+ import hashlib
4
+ import ipaddress
5
+ import re
6
+ import secrets
7
+ import threading
8
+ import time
9
+ from typing import Dict, List, Optional
10
+
11
+ from fastapi import HTTPException
12
+
13
+
14
+ def hash_password(password: str) -> str:
15
+ salt = secrets.token_hex(16)
16
+ key = hashlib.scrypt(password.encode(), salt=salt.encode(), n=16384, r=8, p=1)
17
+ return f"{salt}:{key.hex()}"
18
+
19
+
20
+ def verify_password(password: str, hashed: str) -> bool:
21
+ try:
22
+ salt, key_hex = hashed.split(":", 1)
23
+ key = hashlib.scrypt(password.encode(), salt=salt.encode(), n=16384, r=8, p=1)
24
+ return secrets.compare_digest(key.hex(), key_hex)
25
+ except Exception:
26
+ return False
27
+
28
+
29
+ def host_is_loopback(host: str) -> bool:
30
+ if host in {"localhost", "127.0.0.1", "::1"}:
31
+ return True
32
+ try:
33
+ return ipaddress.ip_address(host).is_loopback
34
+ except ValueError:
35
+ return False
36
+
37
+
38
+ def client_ip(request) -> str:
39
+ for header in ("CF-Connecting-IP", "X-Forwarded-For"):
40
+ val = request.headers.get(header)
41
+ if val:
42
+ return val.split(",")[0].strip()
43
+ return request.client.host if request.client else "unknown"
44
+
45
+
46
+ _FILE_MAGIC: Dict[str, List[bytes]] = {
47
+ ".pdf": [b"%PDF-"],
48
+ ".docx": [b"PK\x03\x04"],
49
+ ".xlsx": [b"PK\x03\x04"],
50
+ ".pptx": [b"PK\x03\x04"],
51
+ ".zip": [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"],
52
+ ".png": [b"\x89PNG\r\n\x1a\n"],
53
+ ".jpg": [b"\xff\xd8\xff"],
54
+ ".jpeg": [b"\xff\xd8\xff"],
55
+ ".gif": [b"GIF87a", b"GIF89a"],
56
+ }
57
+
58
+
59
+ def bytes_match_extension(data: bytes, ext: str) -> bool:
60
+ ext = (ext or "").lower()
61
+ signatures = _FILE_MAGIC.get(ext)
62
+ if not signatures:
63
+ return True
64
+ head = data[:16]
65
+ return any(head.startswith(sig) for sig in signatures)
66
+
67
+
68
+ def redact_secret_text(text: str) -> str:
69
+ if not text:
70
+ return ""
71
+ patterns = [
72
+ r"(?i)(api[_ -]?key|secret|token|password|passwd)\s*[:=]\s*['\"]?([A-Za-z0-9_\-\.]{12,})['\"]?",
73
+ r"\b(sk-[A-Za-z0-9_\-]{16,})\b",
74
+ r"\b(xai-[A-Za-z0-9_\-]{16,})\b",
75
+ r"\b(gsk_[A-Za-z0-9_\-]{16,})\b",
76
+ ]
77
+ redacted = str(text)
78
+ for pattern in patterns:
79
+ redacted = re.sub(pattern, lambda m: f"{m.group(1)}=[REDACTED]" if len(m.groups()) > 1 else "[REDACTED]", redacted)
80
+ return redacted
81
+
82
+
83
+ # ── IP-based rate limiting (registration / login) ────────────────────────────
84
+ _ip_rate_windows: dict = {}
85
+ _ip_rate_lock = threading.Lock()
86
+
87
+
88
+ def check_ip_rate_limit(ip: str, action: str, max_calls: int, window_secs: float) -> None:
89
+ key = (ip, action)
90
+ now = time.time()
91
+ cutoff = now - window_secs
92
+ with _ip_rate_lock:
93
+ calls = [t for t in _ip_rate_windows.get(key, []) if t > cutoff]
94
+ if len(calls) >= max_calls:
95
+ raise HTTPException(status_code=429, detail="요청이 너무 많습니다. 잠시 후 다시 시도하세요.")
96
+ calls.append(now)
97
+ _ip_rate_windows[key] = calls
98
+
99
+
100
+ # ── Per-user token-bucket rate limiting ──────────────────────────────────────
101
+ _RATE_LIMITS = {
102
+ "chat": (30, 0.5),
103
+ "agent": (10, 0.1),
104
+ "upload": (20, 0.2),
105
+ }
106
+ _rate_buckets: Dict[str, Dict[str, float]] = {}
107
+ _user_rate_lock = threading.Lock()
108
+
109
+
110
+ def enforce_rate_limit(email: str, bucket_key: str, *, enabled: bool = True) -> None:
111
+ if not enabled or not email:
112
+ return
113
+ cap, refill = _RATE_LIMITS.get(bucket_key, (60, 1.0))
114
+ key = f"{email}:{bucket_key}"
115
+ now = time.time()
116
+ with _user_rate_lock:
117
+ bucket = _rate_buckets.get(key)
118
+ if bucket is None:
119
+ _rate_buckets[key] = {"tokens": cap - 1, "ts": now}
120
+ return
121
+ elapsed = now - bucket["ts"]
122
+ bucket["tokens"] = min(cap, bucket["tokens"] + elapsed * refill)
123
+ bucket["ts"] = now
124
+ if bucket["tokens"] < 1:
125
+ retry_after = max(1, int((1 - bucket["tokens"]) / refill))
126
+ raise HTTPException(
127
+ status_code=429,
128
+ detail=f"Rate limit exceeded for {bucket_key}. Retry after {retry_after}s.",
129
+ headers={"Retry-After": str(retry_after)},
130
+ )
131
+ bucket["tokens"] -= 1
@@ -0,0 +1,72 @@
1
+ """File-backed session store with sliding-window TTL."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import secrets
7
+ import threading
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Dict, Optional
11
+
12
+ SESSION_TTL = 60 * 60 * 24 # 24 hours
13
+ SESSION_REFRESH_THRESHOLD = 60 * 15 # only persist if >15 min since last bump
14
+ _lock = threading.Lock()
15
+
16
+
17
+ def _sessions_file(data_dir: Optional[Path] = None) -> Path:
18
+ d = data_dir or Path(os.getenv("LATTICEAI_DATA_DIR") or (Path.home() / ".ltcai"))
19
+ d.mkdir(parents=True, exist_ok=True)
20
+ return d / "sessions.json"
21
+
22
+
23
+ def load_sessions(data_dir: Optional[Path] = None) -> Dict[str, tuple]:
24
+ try:
25
+ f = _sessions_file(data_dir)
26
+ if f.exists():
27
+ raw = json.loads(f.read_text())
28
+ return {k: tuple(v) for k, v in raw.items()}
29
+ except Exception as e:
30
+ logging.warning("load_sessions failed (starting empty): %s", e)
31
+ return {}
32
+
33
+
34
+ def persist_sessions(sessions: Dict[str, tuple], data_dir: Optional[Path] = None) -> None:
35
+ try:
36
+ _sessions_file(data_dir).write_text(json.dumps({k: list(v) for k, v in sessions.items()}, ensure_ascii=False))
37
+ except Exception as e:
38
+ logging.warning("persist_sessions failed: %s", e)
39
+
40
+
41
+ class SessionStore:
42
+ def __init__(self, data_dir: Optional[Path] = None):
43
+ self._data_dir = data_dir
44
+ self._sessions: Dict[str, tuple] = load_sessions(data_dir)
45
+
46
+ def create(self, email: str) -> str:
47
+ token = secrets.token_urlsafe(32)
48
+ with _lock:
49
+ self._sessions[token] = (email, time.time())
50
+ persist_sessions(self._sessions, self._data_dir)
51
+ return token
52
+
53
+ def get_email(self, token: str) -> Optional[str]:
54
+ now = time.time()
55
+ with _lock:
56
+ entry = self._sessions.get(token)
57
+ if entry is None:
58
+ return None
59
+ email, created_at = entry
60
+ if now - created_at > SESSION_TTL:
61
+ self._sessions.pop(token, None)
62
+ persist_sessions(self._sessions, self._data_dir)
63
+ return None
64
+ if now - created_at > SESSION_REFRESH_THRESHOLD:
65
+ self._sessions[token] = (email, now)
66
+ persist_sessions(self._sessions, self._data_dir)
67
+ return email
68
+
69
+ def invalidate(self, token: str) -> None:
70
+ with _lock:
71
+ self._sessions.pop(token, None)
72
+ persist_sessions(self._sessions, self._data_dir)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "0.1.31",
3
+ "version": "0.2.1",
4
4
  "description": "Lattice AI local MLX/cloud LLM workspace server",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {
@@ -54,6 +54,7 @@
54
54
  "tools.py",
55
55
  "codex_telegram_bot.py",
56
56
  "mcp_registry.py",
57
+ "latticeai/",
57
58
  "skills/",
58
59
  "static/account.html",
59
60
  "static/chat.html",