ltcai 0.1.30 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -184
- package/auto_setup.py +279 -55
- package/docs/CHANGELOG.md +69 -0
- package/knowledge_graph.py +1338 -3
- package/knowledge_graph_api.py +112 -0
- package/latticeai/__init__.py +1 -0
- package/latticeai/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/api/__init__.py +1 -0
- package/latticeai/api/__pycache__/admin.cpython-314.pyc +0 -0
- package/latticeai/api/__pycache__/auth.cpython-314.pyc +0 -0
- package/latticeai/api/admin.py +187 -0
- package/latticeai/api/auth.py +233 -0
- package/latticeai/core/__init__.py +1 -0
- package/latticeai/core/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/audit.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/security.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/sessions.cpython-314.pyc +0 -0
- package/latticeai/core/audit.py +245 -0
- package/latticeai/core/security.py +131 -0
- package/latticeai/core/sessions.py +72 -0
- package/llm_router.py +13 -7
- package/local_knowledge_api.py +319 -0
- package/package.json +5 -2
- package/requirements.txt +2 -1
- package/server.py +290 -901
- package/static/graph.html +7 -2
- package/static/lattice-reference.css +220 -0
- package/static/scripts/graph.js +305 -4
package/auto_setup.py
CHANGED
|
@@ -38,6 +38,7 @@ import argparse
|
|
|
38
38
|
import json
|
|
39
39
|
import os
|
|
40
40
|
import platform
|
|
41
|
+
import re
|
|
41
42
|
import shutil
|
|
42
43
|
import subprocess
|
|
43
44
|
import sys
|
|
@@ -68,12 +69,19 @@ class SystemProfile:
|
|
|
68
69
|
arch: str = "" # x86_64 | arm64 | …
|
|
69
70
|
cpu_model: str = ""
|
|
70
71
|
cpu_cores: int = 0
|
|
72
|
+
cpu_logical_cores: int = 0
|
|
73
|
+
cpu_instructions: List[str] = field(default_factory=list)
|
|
71
74
|
ram_mb: int = 0
|
|
72
75
|
disk_free_mb: int = 0
|
|
73
76
|
gpu: GPUInfo = field(default_factory=GPUInfo)
|
|
74
77
|
package_manager: Optional[str] = None # winget | brew | apt | dnf | pacman
|
|
75
78
|
has_internet: bool = True
|
|
76
79
|
python_version: str = ""
|
|
80
|
+
is_wsl: bool = False
|
|
81
|
+
wsl_version: str = ""
|
|
82
|
+
cuda_available: bool = False
|
|
83
|
+
cuda_version: str = ""
|
|
84
|
+
tools: Dict[str, str] = field(default_factory=dict)
|
|
77
85
|
|
|
78
86
|
def score(self) -> int:
|
|
79
87
|
"""LLM 적합도 점수 (0..100). RECOMMEND 의 입력."""
|
|
@@ -105,13 +113,84 @@ def _run(cmd: List[str], timeout: float = 4.0) -> str:
|
|
|
105
113
|
return ""
|
|
106
114
|
|
|
107
115
|
|
|
116
|
+
def _windows_candidate_paths(binary: str) -> List[str]:
|
|
117
|
+
local_appdata = os.environ.get("LOCALAPPDATA", "")
|
|
118
|
+
program_files = os.environ.get("ProgramFiles", r"C:\Program Files")
|
|
119
|
+
program_files_x86 = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)")
|
|
120
|
+
candidates = {
|
|
121
|
+
"ollama": [
|
|
122
|
+
str(Path(local_appdata) / "Programs" / "Ollama" / "ollama.exe") if local_appdata else "",
|
|
123
|
+
str(Path(program_files) / "Ollama" / "ollama.exe"),
|
|
124
|
+
],
|
|
125
|
+
"lms": [
|
|
126
|
+
str(Path(local_appdata) / "Programs" / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe") if local_appdata else "",
|
|
127
|
+
str(Path(program_files) / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe"),
|
|
128
|
+
],
|
|
129
|
+
"nvidia-smi": [
|
|
130
|
+
str(Path(program_files) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe"),
|
|
131
|
+
str(Path(program_files_x86) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe"),
|
|
132
|
+
],
|
|
133
|
+
}
|
|
134
|
+
return [item for item in candidates.get(binary, []) if item]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _which(binary: str) -> Optional[str]:
|
|
138
|
+
found = shutil.which(binary)
|
|
139
|
+
if found:
|
|
140
|
+
return found
|
|
141
|
+
if platform.system() == "Windows":
|
|
142
|
+
for candidate in _windows_candidate_paths(binary):
|
|
143
|
+
if Path(candidate).exists():
|
|
144
|
+
return candidate
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _parse_windows_video_controllers(raw: str) -> List[Dict[str, Any]]:
|
|
149
|
+
controllers: List[Dict[str, Any]] = []
|
|
150
|
+
if not raw:
|
|
151
|
+
return controllers
|
|
152
|
+
try:
|
|
153
|
+
data = json.loads(raw)
|
|
154
|
+
if isinstance(data, dict):
|
|
155
|
+
data = [data]
|
|
156
|
+
if isinstance(data, list):
|
|
157
|
+
for item in data:
|
|
158
|
+
name = str(item.get("Name") or "").strip()
|
|
159
|
+
if not name:
|
|
160
|
+
continue
|
|
161
|
+
try:
|
|
162
|
+
ram_mb = int(item.get("AdapterRAM") or 0) // (1024 * 1024)
|
|
163
|
+
except Exception:
|
|
164
|
+
ram_mb = 0
|
|
165
|
+
controllers.append({"name": name, "vram_mb": ram_mb})
|
|
166
|
+
if controllers:
|
|
167
|
+
return controllers
|
|
168
|
+
except Exception:
|
|
169
|
+
pass
|
|
170
|
+
current: Dict[str, Any] = {}
|
|
171
|
+
for line in raw.splitlines():
|
|
172
|
+
if line.startswith("Name="):
|
|
173
|
+
if current:
|
|
174
|
+
controllers.append(current)
|
|
175
|
+
current = {"name": line.split("=", 1)[-1].strip(), "vram_mb": 0}
|
|
176
|
+
elif line.startswith("AdapterRAM=") and current:
|
|
177
|
+
try:
|
|
178
|
+
current["vram_mb"] = int(line.split("=", 1)[-1].strip()) // (1024 * 1024)
|
|
179
|
+
except ValueError:
|
|
180
|
+
current["vram_mb"] = 0
|
|
181
|
+
if current:
|
|
182
|
+
controllers.append(current)
|
|
183
|
+
return controllers
|
|
184
|
+
|
|
185
|
+
|
|
108
186
|
def _detect_gpu(prof_os: str, arch: str) -> GPUInfo:
|
|
109
187
|
"""OS별 휴리스틱으로 GPU 감지. 외부 라이브러리 없이 가능한 만큼만."""
|
|
110
188
|
gpu = GPUInfo()
|
|
111
189
|
|
|
112
190
|
# NVIDIA
|
|
113
|
-
|
|
114
|
-
|
|
191
|
+
nvidia_smi = _which("nvidia-smi")
|
|
192
|
+
if nvidia_smi:
|
|
193
|
+
info = _run([nvidia_smi, "--query-gpu=name,memory.total",
|
|
115
194
|
"--format=csv,noheader,nounits"])
|
|
116
195
|
if info.strip():
|
|
117
196
|
first = info.strip().splitlines()[0]
|
|
@@ -139,30 +218,29 @@ def _detect_gpu(prof_os: str, arch: str) -> GPUInfo:
|
|
|
139
218
|
|
|
140
219
|
# Windows
|
|
141
220
|
if prof_os == "windows" and gpu.vendor == "unknown":
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
gpu.vram_mb = ram
|
|
221
|
+
shell = _which("powershell") or _which("pwsh")
|
|
222
|
+
info = ""
|
|
223
|
+
if shell:
|
|
224
|
+
info = _run([
|
|
225
|
+
shell, "-NoProfile", "-Command",
|
|
226
|
+
"Get-CimInstance Win32_VideoController | Select-Object Name,AdapterRAM | ConvertTo-Json -Compress",
|
|
227
|
+
], timeout=8.0)
|
|
228
|
+
if not info:
|
|
229
|
+
info = _run(["wmic", "path", "win32_VideoController", "get",
|
|
230
|
+
"Name,AdapterRAM", "/format:list"])
|
|
231
|
+
controllers = _parse_windows_video_controllers(info)
|
|
232
|
+
if controllers:
|
|
233
|
+
primary = max(controllers, key=lambda item: int(item.get("vram_mb") or 0))
|
|
234
|
+
name = str(primary.get("name") or "")
|
|
235
|
+
gpu.model = name
|
|
236
|
+
gpu.vram_mb = int(primary.get("vram_mb") or 0)
|
|
237
|
+
low = name.lower()
|
|
238
|
+
if "nvidia" in low or "rtx" in low or "geforce" in low:
|
|
239
|
+
gpu.vendor = "nvidia"; gpu.sdk.append("cuda")
|
|
240
|
+
elif "amd" in low or "radeon" in low:
|
|
241
|
+
gpu.vendor = "amd"; gpu.sdk.extend(["directml", "vulkan"])
|
|
242
|
+
elif "intel" in low or "arc" in low or "iris" in low:
|
|
243
|
+
gpu.vendor = "intel"; gpu.sdk.extend(["directml", "vulkan"])
|
|
166
244
|
|
|
167
245
|
# Linux (lspci)
|
|
168
246
|
if prof_os == "linux" and gpu.vendor == "unknown":
|
|
@@ -179,16 +257,96 @@ def _detect_gpu(prof_os: str, arch: str) -> GPUInfo:
|
|
|
179
257
|
|
|
180
258
|
def _detect_package_manager(prof_os: str) -> Optional[str]:
|
|
181
259
|
if prof_os == "windows":
|
|
182
|
-
return "winget" if
|
|
260
|
+
return "winget" if _which("winget") else None
|
|
183
261
|
if prof_os == "darwin":
|
|
184
|
-
return "brew" if
|
|
262
|
+
return "brew" if _which("brew") else None
|
|
185
263
|
if prof_os == "linux":
|
|
186
264
|
for pm in ("apt", "dnf", "pacman", "zypper", "apk"):
|
|
187
|
-
if
|
|
265
|
+
if _which(pm):
|
|
188
266
|
return pm
|
|
189
267
|
return None
|
|
190
268
|
|
|
191
269
|
|
|
270
|
+
def _detect_tools() -> Dict[str, str]:
|
|
271
|
+
tools: Dict[str, str] = {}
|
|
272
|
+
for binary in ("ollama", "lms", "nvidia-smi", "nvcc", "winget", "brew", "apt", "git", "node", "python", "python3"):
|
|
273
|
+
found = _which(binary)
|
|
274
|
+
if found:
|
|
275
|
+
tools[binary] = found
|
|
276
|
+
return tools
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _detect_wsl(prof_os: str) -> Tuple[bool, str]:
|
|
280
|
+
if prof_os != "linux":
|
|
281
|
+
return False, ""
|
|
282
|
+
raw = _read_text("/proc/version")
|
|
283
|
+
is_wsl = "microsoft" in raw.lower() or "wsl" in raw.lower()
|
|
284
|
+
version = "2" if "microsoft-standard" in raw.lower() or "wsl2" in raw.lower() else ("1" if is_wsl else "")
|
|
285
|
+
return is_wsl, version
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _detect_cuda() -> Tuple[bool, str]:
|
|
289
|
+
nvidia_smi = _which("nvidia-smi")
|
|
290
|
+
nvcc = _which("nvcc")
|
|
291
|
+
version = ""
|
|
292
|
+
if nvidia_smi:
|
|
293
|
+
raw = _run([nvidia_smi, "--query-gpu=driver_version", "--format=csv,noheader"], timeout=4.0)
|
|
294
|
+
version = raw.splitlines()[0].strip() if raw.splitlines() else ""
|
|
295
|
+
if nvcc:
|
|
296
|
+
raw = _run([nvcc, "--version"], timeout=4.0)
|
|
297
|
+
m = re.search(r"release\s+([\d.]+)", raw)
|
|
298
|
+
if m:
|
|
299
|
+
version = m.group(1)
|
|
300
|
+
return bool(nvidia_smi or nvcc), version
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _detect_cpu_details(prof_os: str) -> Tuple[str, int, int, List[str]]:
|
|
304
|
+
model = platform.processor() or ""
|
|
305
|
+
physical = os.cpu_count() or 0
|
|
306
|
+
logical = os.cpu_count() or 0
|
|
307
|
+
flags: List[str] = []
|
|
308
|
+
if prof_os == "darwin":
|
|
309
|
+
model = _run(["sysctl", "-n", "machdep.cpu.brand_string"]).strip() or model
|
|
310
|
+
try:
|
|
311
|
+
physical = int((_run(["sysctl", "-n", "hw.physicalcpu"]).strip() or physical))
|
|
312
|
+
logical = int((_run(["sysctl", "-n", "hw.logicalcpu"]).strip() or logical))
|
|
313
|
+
except ValueError:
|
|
314
|
+
pass
|
|
315
|
+
flags = [item.lower() for item in _run(["sysctl", "-n", "machdep.cpu.features"]).split()]
|
|
316
|
+
elif prof_os == "linux":
|
|
317
|
+
text = _read_text("/proc/cpuinfo")
|
|
318
|
+
for line in text.splitlines():
|
|
319
|
+
if line.lower().startswith("model name") and not model:
|
|
320
|
+
model = line.split(":", 1)[-1].strip()
|
|
321
|
+
if line.lower().startswith(("flags", "features")) and not flags:
|
|
322
|
+
flags = line.split(":", 1)[-1].strip().lower().split()
|
|
323
|
+
elif prof_os == "windows":
|
|
324
|
+
raw = _run(["wmic", "cpu", "get", "Name,NumberOfCores,NumberOfLogicalProcessors", "/format:list"])
|
|
325
|
+
for line in raw.splitlines():
|
|
326
|
+
key, _, value = line.partition("=")
|
|
327
|
+
if key == "Name" and value.strip():
|
|
328
|
+
model = value.strip()
|
|
329
|
+
elif key == "NumberOfCores" and value.strip():
|
|
330
|
+
try:
|
|
331
|
+
physical = int(value.strip())
|
|
332
|
+
except ValueError:
|
|
333
|
+
pass
|
|
334
|
+
elif key == "NumberOfLogicalProcessors" and value.strip():
|
|
335
|
+
try:
|
|
336
|
+
logical = int(value.strip())
|
|
337
|
+
except ValueError:
|
|
338
|
+
pass
|
|
339
|
+
try:
|
|
340
|
+
import ctypes
|
|
341
|
+
kernel32 = ctypes.windll.kernel32
|
|
342
|
+
feature_map = {6: "sse", 10: "sse2", 13: "sse3", 19: "neon", 28: "rdrand"}
|
|
343
|
+
flags.extend(name for code, name in feature_map.items() if kernel32.IsProcessorFeaturePresent(code))
|
|
344
|
+
except Exception:
|
|
345
|
+
pass
|
|
346
|
+
interesting = {"avx", "avx2", "avx512f", "fma", "neon", "sse4_2", "sse", "sse2", "sse3", "rdrand"}
|
|
347
|
+
return model, physical, logical, sorted({flag for flag in flags if flag in interesting})
|
|
348
|
+
|
|
349
|
+
|
|
192
350
|
def _has_module(name: str) -> bool:
|
|
193
351
|
try:
|
|
194
352
|
__import__(name)
|
|
@@ -204,9 +362,15 @@ def probe() -> SystemProfile:
|
|
|
204
362
|
"Linux": "linux"}.get(platform.system(), platform.system().lower())
|
|
205
363
|
prof.os_version = platform.release()
|
|
206
364
|
prof.arch = platform.machine().lower()
|
|
207
|
-
|
|
208
|
-
prof.
|
|
365
|
+
cpu_model, cpu_cores, cpu_logical_cores, cpu_instructions = _detect_cpu_details(prof.os)
|
|
366
|
+
prof.cpu_model = cpu_model
|
|
367
|
+
prof.cpu_cores = cpu_cores
|
|
368
|
+
prof.cpu_logical_cores = cpu_logical_cores
|
|
369
|
+
prof.cpu_instructions = cpu_instructions
|
|
209
370
|
prof.python_version = platform.python_version()
|
|
371
|
+
prof.is_wsl, prof.wsl_version = _detect_wsl(prof.os)
|
|
372
|
+
prof.cuda_available, prof.cuda_version = _detect_cuda()
|
|
373
|
+
prof.tools = _detect_tools()
|
|
210
374
|
|
|
211
375
|
# RAM
|
|
212
376
|
try:
|
|
@@ -218,7 +382,27 @@ def probe() -> SystemProfile:
|
|
|
218
382
|
elif prof.os == "darwin":
|
|
219
383
|
out = _run(["sysctl", "-n", "hw.memsize"])
|
|
220
384
|
if out.strip():
|
|
221
|
-
|
|
385
|
+
try:
|
|
386
|
+
prof.ram_mb = int(out.strip()) // (1024 * 1024)
|
|
387
|
+
except ValueError:
|
|
388
|
+
prof.ram_mb = 0
|
|
389
|
+
if not prof.ram_mb:
|
|
390
|
+
profiler = _run(["system_profiler", "SPHardwareDataType"], timeout=8.0)
|
|
391
|
+
m = re.search(r"Memory:\s+([\d.]+)\s*(TB|GB|MB)", profiler, re.IGNORECASE)
|
|
392
|
+
if m:
|
|
393
|
+
value = float(m.group(1))
|
|
394
|
+
unit = m.group(2).lower()
|
|
395
|
+
if unit == "tb":
|
|
396
|
+
prof.ram_mb = int(value * 1024 * 1024)
|
|
397
|
+
elif unit == "gb":
|
|
398
|
+
prof.ram_mb = int(value * 1024)
|
|
399
|
+
else:
|
|
400
|
+
prof.ram_mb = int(value)
|
|
401
|
+
if not prof.ram_mb:
|
|
402
|
+
hostinfo = _run(["hostinfo"])
|
|
403
|
+
m = re.search(r"Primary memory available:\s+([\d.]+)\s+gigabytes", hostinfo, re.IGNORECASE)
|
|
404
|
+
if m:
|
|
405
|
+
prof.ram_mb = int(float(m.group(1)) * 1024)
|
|
222
406
|
elif prof.os == "windows":
|
|
223
407
|
out = _run(["wmic", "ComputerSystem", "get", "TotalPhysicalMemory",
|
|
224
408
|
"/format:list"])
|
|
@@ -258,16 +442,23 @@ class Recommendation:
|
|
|
258
442
|
# 모델 카탈로그. PPT 슬라이드 16 의 "추천 모델" 열과 동기화.
|
|
259
443
|
_MODEL_CATALOG: List[Dict[str, Any]] = [
|
|
260
444
|
# (min_ram_mb, min_vram_mb, model_id, quant, runtime_preference)
|
|
261
|
-
|
|
262
|
-
|
|
445
|
+
# OS 오버헤드(~4-6 GB) + KV 캐시 여유를 감안한 보수적 RAM 임계값
|
|
446
|
+
{"ram": 64 * 1024, "vram": 32 * 1024,
|
|
447
|
+
"id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
448
|
+
{"ram": 48 * 1024, "vram": 24 * 1024,
|
|
449
|
+
"id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
450
|
+
{"ram": 32 * 1024, "vram": 16 * 1024,
|
|
451
|
+
"id": "Qwen/Qwen3-VL-8B-Instruct", "q": "q5_K_M", "multimodal": True},
|
|
452
|
+
{"ram": 24 * 1024, "vram": 12 * 1024,
|
|
453
|
+
"id": "Qwen/Qwen3-VL-8B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
263
454
|
{"ram": 16 * 1024, "vram": 8 * 1024,
|
|
264
|
-
"id": "Qwen/
|
|
455
|
+
"id": "Qwen/Qwen3-VL-8B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
265
456
|
{"ram": 12 * 1024, "vram": 6 * 1024,
|
|
266
|
-
"id": "
|
|
457
|
+
"id": "Qwen/Qwen3-VL-4B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
267
458
|
{"ram": 8 * 1024, "vram": 4 * 1024,
|
|
268
|
-
"id": "
|
|
459
|
+
"id": "Qwen/Qwen3-VL-4B-Instruct", "q": "q4_K_M", "multimodal": True},
|
|
269
460
|
{"ram": 4 * 1024, "vram": 0,
|
|
270
|
-
"id": "google/gemma-3-
|
|
461
|
+
"id": "google/gemma-3-1b-it", "q": "q4_K_M", "multimodal": False},
|
|
271
462
|
]
|
|
272
463
|
|
|
273
464
|
|
|
@@ -280,34 +471,41 @@ def recommend(profile: SystemProfile) -> Recommendation:
|
|
|
280
471
|
backend = "metal+mlx"
|
|
281
472
|
runtime = "mlx" if _has_module("mlx") else "llama.cpp"
|
|
282
473
|
rationale.append("Apple Silicon → Metal + MLX")
|
|
283
|
-
elif profile.gpu.vendor == "nvidia" and profile.
|
|
474
|
+
elif profile.gpu.vendor == "nvidia" and profile.cuda_available and (profile.os == "linux" or profile.is_wsl):
|
|
284
475
|
backend = "cuda"
|
|
285
|
-
runtime = "llama.cpp"
|
|
286
|
-
rationale.append(f"NVIDIA GPU {profile.gpu.vram_mb} MB VRAM
|
|
476
|
+
runtime = "vllm" if profile.gpu.vram_mb >= 12 * 1024 else "llama.cpp"
|
|
477
|
+
rationale.append(f"NVIDIA GPU {profile.gpu.vram_mb} MB VRAM + CUDA → {runtime}")
|
|
478
|
+
elif profile.gpu.vendor == "nvidia":
|
|
479
|
+
backend = "cuda" if profile.cuda_available else "vulkan"
|
|
480
|
+
runtime = "lmstudio" if profile.tools.get("lms") else ("ollama" if profile.tools.get("ollama") else "llama.cpp")
|
|
481
|
+
rationale.append("Windows NVIDIA는 LM Studio/Ollama 우선, vLLM은 WSL/Linux 권장")
|
|
287
482
|
elif profile.os == "windows" and profile.gpu.vendor in ("amd", "intel"):
|
|
288
|
-
backend = "directml"
|
|
289
|
-
runtime = "llama.cpp"
|
|
290
|
-
rationale.append("Windows + AMD/Intel GPU → DirectML")
|
|
483
|
+
backend = "directml/vulkan"
|
|
484
|
+
runtime = "lmstudio" if profile.tools.get("lms") else ("ollama" if profile.tools.get("ollama") else "llama.cpp")
|
|
485
|
+
rationale.append("Windows + AMD/Intel GPU → DirectML/Vulkan")
|
|
291
486
|
elif profile.os == "linux" and profile.gpu.vendor == "amd":
|
|
292
487
|
backend = "rocm" if "rocm" in profile.gpu.sdk else "vulkan"
|
|
293
|
-
runtime = "llama.cpp"
|
|
488
|
+
runtime = "ollama" if profile.tools.get("ollama") else "llama.cpp"
|
|
294
489
|
rationale.append("Linux + AMD GPU → ROCm/Vulkan")
|
|
295
490
|
else:
|
|
296
491
|
backend = "cpu"
|
|
297
|
-
runtime = "llama.cpp"
|
|
298
|
-
|
|
492
|
+
runtime = "ollama" if profile.tools.get("ollama") else "llama.cpp"
|
|
493
|
+
instruction_hint = ", ".join(profile.cpu_instructions) or "명령어 미감지"
|
|
494
|
+
rationale.append(f"GPU 가속이 없거나 미감지 → CPU 추론 ({profile.cpu_logical_cores or profile.cpu_cores} threads, {instruction_hint})")
|
|
299
495
|
|
|
300
496
|
# model size by RAM/VRAM
|
|
301
497
|
pick = _MODEL_CATALOG[-1] # 가장 작은 모델 기본값
|
|
302
498
|
for entry in _MODEL_CATALOG:
|
|
303
499
|
if profile.ram_mb >= entry["ram"] and (
|
|
304
|
-
backend
|
|
500
|
+
backend in {"cpu", "metal+mlx"} or profile.gpu.vram_mb >= entry["vram"]
|
|
305
501
|
):
|
|
306
502
|
pick = entry
|
|
307
503
|
break
|
|
308
504
|
rationale.append(
|
|
309
505
|
f"RAM {profile.ram_mb} MB · VRAM {profile.gpu.vram_mb} MB → {pick['id']}"
|
|
310
506
|
)
|
|
507
|
+
if pick.get("multimodal"):
|
|
508
|
+
rationale.append("최신 멀티모달 모델을 우선 선택")
|
|
311
509
|
|
|
312
510
|
# 양자화: VRAM 충분 → 더 정밀한 양자화로 업그레이드
|
|
313
511
|
quant = pick["q"]
|
|
@@ -402,7 +600,7 @@ def plan(profile: SystemProfile, rec: Recommendation) -> InstallPlan:
|
|
|
402
600
|
|
|
403
601
|
if sys.version_info < (3, 11):
|
|
404
602
|
need("python3.11+", "Lattice AI 서버는 Python 3.11 이상이 필요합니다.")
|
|
405
|
-
if not
|
|
603
|
+
if not _which("node"):
|
|
406
604
|
need("node20", "VSCode 확장 / npm CLI 부트스트랩에 필요")
|
|
407
605
|
|
|
408
606
|
# 런타임별 추가
|
|
@@ -411,17 +609,39 @@ def plan(profile: SystemProfile, rec: Recommendation) -> InstallPlan:
|
|
|
411
609
|
name="mlx-lm", why="Apple Silicon LLM 추론",
|
|
412
610
|
command=["pip3", "install", "--upgrade", "mlx-lm"],
|
|
413
611
|
))
|
|
414
|
-
if rec.runtime
|
|
612
|
+
if rec.runtime in {"llama.cpp", "ollama"} and not _which("ollama"):
|
|
415
613
|
need("ollama", "llama.cpp 가중치를 가장 쉽게 받는 경로")
|
|
614
|
+
if rec.runtime == "lmstudio" and not _which("lms"):
|
|
615
|
+
notes.append("LM Studio CLI(lms)를 찾지 못했습니다. https://lmstudio.ai/download 에서 설치하면 Windows/macOS/Linux 모델 다운로드와 GPU 백엔드를 자동 감지합니다.")
|
|
616
|
+
if rec.runtime == "vllm" and not _has_module("vllm"):
|
|
617
|
+
steps.append(InstallStep(
|
|
618
|
+
name="vllm", why="NVIDIA CUDA/WSL/Linux 서버형 추론",
|
|
619
|
+
command=["pip3", "install", "--upgrade", "vllm", "huggingface_hub"],
|
|
620
|
+
))
|
|
621
|
+
if profile.gpu.vendor == "nvidia" and not profile.cuda_available:
|
|
622
|
+
notes.append("NVIDIA GPU는 감지됐지만 CUDA/nvidia-smi를 찾지 못했습니다. Windows에서는 NVIDIA 드라이버와 CUDA Toolkit 설치 후 재검사를 권장합니다.")
|
|
623
|
+
if profile.os == "windows" and profile.gpu.vendor == "nvidia" and not profile.is_wsl:
|
|
624
|
+
notes.append("vLLM은 Windows native보다 WSL2/Linux에서 안정적입니다. Windows 데스크톱은 LM Studio 또는 Ollama GPU 경로를 먼저 권장합니다.")
|
|
416
625
|
|
|
417
|
-
if not
|
|
626
|
+
if not _which("huggingface-cli"):
|
|
418
627
|
need("huggingface-cli", "추천 모델 가중치 다운로드용")
|
|
419
628
|
|
|
420
629
|
# 모델 가중치 풀
|
|
630
|
+
model_command = ["huggingface-cli", "download", rec.model_id, "--quiet"]
|
|
631
|
+
if rec.runtime == "ollama":
|
|
632
|
+
lower = rec.model_id.lower()
|
|
633
|
+
if "qwen3-vl-8b" in lower:
|
|
634
|
+
model_command = ["ollama", "pull", "qwen3-vl:8b"]
|
|
635
|
+
elif "qwen3-vl-4b" in lower:
|
|
636
|
+
model_command = ["ollama", "pull", "qwen3-vl:4b"]
|
|
637
|
+
elif "gemma-3-1b" in lower:
|
|
638
|
+
model_command = ["ollama", "pull", "gemma3:1b"]
|
|
639
|
+
elif rec.runtime == "lmstudio":
|
|
640
|
+
model_command = ["lms", "get", rec.model_id]
|
|
421
641
|
steps.append(InstallStep(
|
|
422
642
|
name=f"weights:{rec.model_id}",
|
|
423
643
|
why="추론에 사용할 모델 가중치",
|
|
424
|
-
command=
|
|
644
|
+
command=model_command,
|
|
425
645
|
))
|
|
426
646
|
|
|
427
647
|
return InstallPlan(package_manager=pm, steps=steps, notes=notes)
|
|
@@ -463,9 +683,13 @@ def verify(profile: SystemProfile, rec: Recommendation) -> Dict[str, Any]:
|
|
|
463
683
|
|
|
464
684
|
if rec.runtime == "mlx":
|
|
465
685
|
add("mlx_lm import", _has_module("mlx_lm"), "Apple Silicon 런타임")
|
|
466
|
-
if rec.runtime
|
|
467
|
-
add("ollama binary",
|
|
468
|
-
|
|
686
|
+
if rec.runtime in {"llama.cpp", "ollama"}:
|
|
687
|
+
add("ollama binary", _which("ollama") is not None,
|
|
688
|
+
_which("ollama") or "not found")
|
|
689
|
+
if rec.runtime == "lmstudio":
|
|
690
|
+
add("LM Studio CLI", _which("lms") is not None, _which("lms") or "not found")
|
|
691
|
+
if rec.backend == "cuda":
|
|
692
|
+
add("CUDA/nvidia-smi", profile.cuda_available, profile.cuda_version or "not found")
|
|
469
693
|
|
|
470
694
|
# CPU/메모리 잠깐 측정
|
|
471
695
|
t0 = time.perf_counter()
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,74 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.0] - 2026-05-25
|
|
4
|
+
|
|
5
|
+
### server.py 모듈 분리 — latticeai/ 패키지 도입
|
|
6
|
+
|
|
7
|
+
server.py(6,815줄)에서 핵심 로직을 `latticeai/` 패키지로 분리하여 유지보수성을 개선했습니다.
|
|
8
|
+
|
|
9
|
+
**새 패키지 구조:**
|
|
10
|
+
```
|
|
11
|
+
latticeai/
|
|
12
|
+
core/
|
|
13
|
+
security.py — 비밀번호 해싱, 레이트 리밋, IP 감지, 파일 매직 검증
|
|
14
|
+
sessions.py — 파일 기반 세션 저장소 (SessionStore 클래스)
|
|
15
|
+
audit.py — 감사 로깅, 민감정보 분석, 관리자 감사 리포트
|
|
16
|
+
api/
|
|
17
|
+
auth.py — 인증/SSO/프로필 API 라우터 (register, login, logout, SSO, profile)
|
|
18
|
+
admin.py — 관리자 API 라우터 (dashboard, users, VPC, SSO, audit)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
- server.py: 6,815줄 → 6,187줄 (628줄 감소, 868줄이 5개 모듈로 분산)
|
|
22
|
+
- 기존 API 호환성 100% 유지 — 모든 엔드포인트 경로와 응답 동일
|
|
23
|
+
- `knowledge_graph_api.py` / `local_knowledge_api.py`와 동일한 팩토리 라우터 패턴 사용
|
|
24
|
+
|
|
25
|
+
### README 전면 개편 — 사용자 경험 중심
|
|
26
|
+
|
|
27
|
+
- 핵심 메시지: "내 파일과 대화를 기억하고 연결하는 로컬 AI 워크스페이스"
|
|
28
|
+
- 기능 나열형 → 3분 워크플로 + Why 섹션 + 지식 그래프 설명
|
|
29
|
+
- 고급 기능(전체 기능표, 보안, 설정, API, 트러블슈팅)은 접기(details) 섹션으로 이동
|
|
30
|
+
- 비교표에 Knowledge Graph, Local Folder Indexing 항목 추가
|
|
31
|
+
- 모델 추천표에 최소 RAM 컬럼 추가
|
|
32
|
+
- 한국어 섹션도 경험 중심으로 재작성
|
|
33
|
+
|
|
34
|
+
### 보안 강화 — 패키지 설치 관리자 전용
|
|
35
|
+
|
|
36
|
+
- `/mcp/install`: `require_user` → `require_admin` + 감사 로그
|
|
37
|
+
- `/skills/install`: `require_user` → `require_admin` + 감사 로그
|
|
38
|
+
- `/mcp/custom` POST: `require_user` → `require_admin` + 감사 로그
|
|
39
|
+
- pip/npm 패키지 설치는 관리자만 실행 가능, 모든 시도가 `audit_log.json`에 기록
|
|
40
|
+
|
|
41
|
+
### Release
|
|
42
|
+
- 배포 버전을 `0.2.0`으로 상향 (메이저 구조 변경)
|
|
43
|
+
- 대상 채널: `npm` · `PyPI` · `VS Code Marketplace` · `Open VSX`
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## [0.1.31] - 2026-05-25
|
|
48
|
+
|
|
49
|
+
### 모델 추천 보정 — 하드웨어 대비 과도한 모델 방지
|
|
50
|
+
|
|
51
|
+
- **Apple Silicon 32GB 추천 모델 하향 조정**
|
|
52
|
+
- 32GB Mac: `Qwen3-VL-30B-A3B` (18GB) → `Qwen3-VL-8B` (q5_K_M, 5GB) 로 변경
|
|
53
|
+
- 30B-A3B 모델은 48GB 이상에서만 추천 (OS 오버헤드 + KV 캐시 여유 확보)
|
|
54
|
+
- 32GB 시스템에서 메모리 압박으로 인한 성능 저하 방지
|
|
55
|
+
|
|
56
|
+
- **`auto_setup.py` `_MODEL_CATALOG` 보수적 임계값 적용**
|
|
57
|
+
- 30B-A3B: 최소 RAM 32GB → 48GB
|
|
58
|
+
- 24GB VRAM 임계값 조정 (12GB로 완화하여 중급 GPU 커버)
|
|
59
|
+
- 각 티어 간 여유분을 확보하여 실사용 시 안정적 추론 보장
|
|
60
|
+
|
|
61
|
+
- **`setup.py` 추천 로직 보정**
|
|
62
|
+
- Apple Silicon 기본 추천 30B 임계값: `ram >= 32` → `ram >= 48`
|
|
63
|
+
- MLX 모델 카탈로그 min_ram 상향: Qwen3-VL 30B (32→48), Gemma 3 27B (32→48), Gemma 4 26B (24→32), Mistral Small 24B (24→32), Qwen2.5 Coder 32B (32→36)
|
|
64
|
+
- 크로스 플랫폼(vLLM/LM Studio) 30B 모델: 전용 GPU 시스템은 min_ram=32 유지 (VRAM에 로드되므로 RAM 부담 적음)
|
|
65
|
+
|
|
66
|
+
### Release
|
|
67
|
+
- 배포 버전을 `0.1.31`로 상향
|
|
68
|
+
- 대상 채널: `npm` · `PyPI` · `VS Code Marketplace` · `Open VSX`
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
3
72
|
## [0.1.30] - 2026-05-25
|
|
4
73
|
|
|
5
74
|
### 코드 품질 및 리팩토링
|