ltcai 4.3.1 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +191 -278
  2. package/docs/CHANGELOG.md +128 -0
  3. package/docs/V4_3_2_DEADCODE_AUDIT_REPORT.md +174 -0
  4. package/docs/V4_3_2_DOCUMENTATION_CLEANUP_REPORT.md +81 -0
  5. package/docs/V4_3_2_GITHUB_VERCEL_CHECK_REPORT.md +75 -0
  6. package/docs/V4_3_2_GRAPH_UX_REPORT.md +48 -0
  7. package/docs/V4_3_2_INDEPENDENT_AUDIT_PACKAGE.md +209 -0
  8. package/docs/V4_3_2_PRODUCT_POLISH_REPORT.md +57 -0
  9. package/docs/V4_3_2_SELF_AUDIT_REPORT.md +63 -0
  10. package/docs/V4_3_2_VALIDATION_REPORT.md +97 -0
  11. package/docs/V4_3_3_VALIDATION_REPORT.md +46 -0
  12. package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
  13. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +18 -19
  14. package/frontend/openapi.json +1 -1
  15. package/frontend/src/components/primitives.tsx +92 -10
  16. package/frontend/src/pages/Act.tsx +11 -9
  17. package/frontend/src/pages/Ask.tsx +2 -2
  18. package/frontend/src/pages/Brain.tsx +607 -65
  19. package/frontend/src/pages/Capture.tsx +11 -7
  20. package/frontend/src/pages/Library.tsx +3 -3
  21. package/frontend/src/pages/System.tsx +186 -23
  22. package/lattice_brain/__init__.py +38 -23
  23. package/lattice_brain/_kg_common.py +11 -1
  24. package/lattice_brain/context.py +212 -2
  25. package/lattice_brain/conversations.py +234 -1
  26. package/lattice_brain/discovery.py +11 -1
  27. package/lattice_brain/documents.py +11 -1
  28. package/lattice_brain/graph/__init__.py +28 -0
  29. package/lattice_brain/graph/_kg_common.py +1123 -0
  30. package/lattice_brain/graph/curator.py +473 -0
  31. package/lattice_brain/graph/discovery.py +1455 -0
  32. package/lattice_brain/graph/documents.py +218 -0
  33. package/lattice_brain/graph/identity.py +175 -0
  34. package/lattice_brain/graph/ingest.py +644 -0
  35. package/lattice_brain/graph/network.py +205 -0
  36. package/lattice_brain/graph/projection.py +571 -0
  37. package/lattice_brain/graph/provenance.py +401 -0
  38. package/lattice_brain/graph/retrieval.py +1341 -0
  39. package/lattice_brain/graph/schema.py +640 -0
  40. package/lattice_brain/graph/store.py +237 -0
  41. package/lattice_brain/graph/write_master.py +225 -0
  42. package/lattice_brain/identity.py +11 -13
  43. package/lattice_brain/ingest.py +11 -1
  44. package/lattice_brain/ingestion.py +318 -0
  45. package/lattice_brain/memory.py +100 -1
  46. package/lattice_brain/network.py +11 -1
  47. package/lattice_brain/portability.py +431 -0
  48. package/lattice_brain/projection.py +11 -1
  49. package/lattice_brain/provenance.py +11 -1
  50. package/lattice_brain/retrieval.py +11 -1
  51. package/lattice_brain/runtime/__init__.py +32 -0
  52. package/lattice_brain/runtime/agent_runtime.py +569 -0
  53. package/lattice_brain/runtime/hooks.py +754 -0
  54. package/lattice_brain/runtime/multi_agent.py +795 -0
  55. package/lattice_brain/schema.py +11 -1
  56. package/lattice_brain/store.py +10 -2
  57. package/lattice_brain/workflow.py +461 -0
  58. package/lattice_brain/write_master.py +11 -1
  59. package/latticeai/__init__.py +1 -1
  60. package/latticeai/api/agents.py +2 -2
  61. package/latticeai/api/browser.py +1 -1
  62. package/latticeai/api/chat.py +1 -1
  63. package/latticeai/api/computer_use.py +1 -1
  64. package/latticeai/api/hooks.py +2 -2
  65. package/latticeai/api/mcp.py +1 -1
  66. package/latticeai/api/tools.py +1 -1
  67. package/latticeai/api/workflow_designer.py +2 -2
  68. package/latticeai/app_factory.py +4 -4
  69. package/latticeai/brain/__init__.py +24 -6
  70. package/latticeai/brain/_kg_common.py +11 -1117
  71. package/latticeai/brain/context.py +12 -208
  72. package/latticeai/brain/conversations.py +12 -231
  73. package/latticeai/brain/discovery.py +13 -1451
  74. package/latticeai/brain/documents.py +13 -214
  75. package/latticeai/brain/identity.py +11 -169
  76. package/latticeai/brain/ingest.py +13 -640
  77. package/latticeai/brain/memory.py +12 -97
  78. package/latticeai/brain/network.py +12 -200
  79. package/latticeai/brain/projection.py +13 -567
  80. package/latticeai/brain/provenance.py +13 -397
  81. package/latticeai/brain/retrieval.py +13 -1337
  82. package/latticeai/brain/schema.py +12 -635
  83. package/latticeai/brain/store.py +13 -233
  84. package/latticeai/brain/write_master.py +13 -221
  85. package/latticeai/core/agent.py +1 -1
  86. package/latticeai/core/agent_registry.py +2 -2
  87. package/latticeai/core/builtin_hooks.py +2 -2
  88. package/latticeai/core/graph_curator.py +6 -468
  89. package/latticeai/core/hooks.py +6 -749
  90. package/latticeai/core/marketplace.py +1 -1
  91. package/latticeai/core/multi_agent.py +6 -790
  92. package/latticeai/core/workflow_engine.py +6 -456
  93. package/latticeai/core/workspace_os.py +1 -1
  94. package/latticeai/services/agent_runtime.py +6 -564
  95. package/latticeai/services/ingestion.py +6 -313
  96. package/latticeai/services/kg_portability.py +6 -426
  97. package/latticeai/services/platform_runtime.py +3 -3
  98. package/latticeai/services/run_executor.py +1 -1
  99. package/latticeai/services/upload_service.py +1 -1
  100. package/p_reinforce.py +1 -1
  101. package/package.json +3 -6
  102. package/scripts/build_vercel_static.mjs +77 -0
  103. package/scripts/bump_version.py +1 -1
  104. package/scripts/check_markdown_links.mjs +75 -0
  105. package/scripts/wheel_smoke.py +7 -0
  106. package/src-tauri/Cargo.lock +1 -1
  107. package/src-tauri/Cargo.toml +1 -1
  108. package/src-tauri/src/main.rs +12 -2
  109. package/src-tauri/tauri.conf.json +1 -1
  110. package/static/app/asset-manifest.json +5 -5
  111. package/static/app/assets/index-CHHal8Zl.css +2 -0
  112. package/static/app/assets/index-pdzil9ac.js +333 -0
  113. package/static/app/assets/index-pdzil9ac.js.map +1 -0
  114. package/static/app/index.html +2 -2
  115. package/latticeai/api/deps.py +0 -15
  116. package/scripts/capture/README.md +0 -28
  117. package/scripts/capture/capture_enterprise.js +0 -8
  118. package/scripts/capture/capture_graph.js +0 -8
  119. package/scripts/capture/capture_onboarding.js +0 -8
  120. package/scripts/capture/capture_page.js +0 -43
  121. package/scripts/capture/capture_release_media.js +0 -125
  122. package/scripts/capture/capture_skills.js +0 -8
  123. package/scripts/capture/capture_v340.js +0 -88
  124. package/scripts/capture/capture_workspace.js +0 -8
  125. package/scripts/generate_diagrams.py +0 -512
  126. package/scripts/release-0.3.1.sh +0 -105
  127. package/scripts/take_screenshots.js +0 -69
  128. package/static/app/assets/index-BhPuj8rT.js +0 -333
  129. package/static/app/assets/index-BhPuj8rT.js.map +0 -1
  130. package/static/app/assets/index-yZswHE3d.css +0 -2
  131. package/static/css/tokens.3ba22e37.css +0 -260
@@ -0,0 +1,1123 @@
1
+ """
2
+ SQLite knowledge graph for Lattice AI workspace memory.
3
+
4
+ The graph keeps raw event JSON, normalized node metadata, and edges in one
5
+ portable database so it can later migrate to Neo4j/Postgres without changing
6
+ the ingestion contract.
7
+ """
8
+
9
+ # ruff: noqa: F401,F841
10
+
11
+ import asyncio
12
+ import hashlib
13
+ import json
14
+ import logging
15
+ import math
16
+ import os
17
+ import platform
18
+ import re
19
+ import shutil
20
+ import sqlite3
21
+ import time
22
+ import zipfile
23
+ from collections import Counter
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
27
+
28
+ try:
29
+ from .schema import KGStoreV2, NodeType, EdgeType, _exec_script
30
+ except Exception: # pragma: no cover - v2 schema is optional at import time
31
+ KGStoreV2 = None # type: ignore[assignment]
32
+ NodeType = None # type: ignore[assignment]
33
+ EdgeType = None # type: ignore[assignment]
34
+ _exec_script = None # type: ignore[assignment]
35
+
36
+ from ..embeddings import LocalEmbeddingModel
37
+
38
+ # Default read source for the graph queries: v2 reconstruction views.
39
+ # Override with LATTICEAI_KG_READ_V2=0 to fall back to the legacy tables.
40
+ _READ_FROM_V2_DEFAULT = os.getenv("LATTICEAI_KG_READ_V2", "1") != "0"
41
+
42
+ # Bump when the v2 projection layout changes (columns, normalization rules).
43
+ # On init, a stale projection is dropped and rebuilt from the authoritative
44
+ # legacy tables — safe because nodes_v2/edges_v2 only ever hold a derived view.
45
+ # v4: summary nullable + verbatim (byte-faithful) projection of legacy values.
46
+ _PROJECTION_VERSION = 4
47
+ _KG_DB_FORMAT_VERSION = 4
48
+ _KG_DB_FORMAT_KEY = "db_format_version"
49
+ _V2_WRITE_MASTER_KEY = "v2_write_mastered_at"
50
+
51
+ _llm_router_ref = None
52
+
53
+
54
+ def set_llm_router(router_instance):
55
+ global _llm_router_ref
56
+ _llm_router_ref = router_instance
57
+
58
+
59
+ GRAPH_SCHEMA_VERSION = 1
60
+
61
+ LOCAL_TEXT_EXTENSIONS = {".txt", ".md"}
62
+ LOCAL_CODE_EXTENSIONS = {
63
+ ".py",
64
+ ".js",
65
+ ".ts",
66
+ ".tsx",
67
+ ".jsx",
68
+ ".html",
69
+ ".css",
70
+ ".json",
71
+ ".yaml",
72
+ ".yml",
73
+ ".xml",
74
+ ".sql",
75
+ ".sh",
76
+ ".zsh",
77
+ ".toml",
78
+ ".ini",
79
+ }
80
+ LOCAL_DOCUMENT_EXTENSIONS = {".pdf", ".docx"}
81
+ LOCAL_SPREADSHEET_EXTENSIONS = {".xlsx", ".csv"}
82
+ LOCAL_SLIDE_EXTENSIONS = {".pptx"}
83
+ LOCAL_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
84
+ LOCAL_SUPPORTED_EXTENSIONS = (
85
+ LOCAL_TEXT_EXTENSIONS
86
+ | LOCAL_CODE_EXTENSIONS
87
+ | LOCAL_DOCUMENT_EXTENSIONS
88
+ | LOCAL_SPREADSHEET_EXTENSIONS
89
+ | LOCAL_SLIDE_EXTENSIONS
90
+ | LOCAL_IMAGE_EXTENSIONS
91
+ )
92
+
93
+ LOCAL_SIZE_LIMITS = {
94
+ "text": 4_000_000,
95
+ "code": 4_000_000,
96
+ "pdf": 50_000_000,
97
+ "document": 50_000_000,
98
+ "spreadsheet": 50_000_000,
99
+ "slide_deck": 50_000_000,
100
+ "image": 100_000_000,
101
+ }
102
+
103
+ COMMON_EXCLUDED_DIRS = {
104
+ ".git",
105
+ "node_modules",
106
+ ".venv",
107
+ "venv",
108
+ "env",
109
+ "__pycache__",
110
+ ".pytest_cache",
111
+ ".mypy_cache",
112
+ ".ruff_cache",
113
+ ".next",
114
+ ".nuxt",
115
+ ".turbo",
116
+ "dist",
117
+ "build",
118
+ "target",
119
+ "out",
120
+ "coverage",
121
+ ".cache",
122
+ ".config",
123
+ ".ssh",
124
+ ".gnupg",
125
+ ".docker",
126
+ ".kube",
127
+ ".aws",
128
+ ".azure",
129
+ ".npm",
130
+ ".pnpm-store",
131
+ ".yarn",
132
+ ".bun",
133
+ ".cargo",
134
+ ".rustup",
135
+ ".pyenv",
136
+ ".conda",
137
+ ".local",
138
+ ".claude",
139
+ ".codex",
140
+ ".cursor",
141
+ ".copilot",
142
+ ".antigravity",
143
+ ".antigravity-ide",
144
+ }
145
+
146
+ COMMON_EXCLUDED_FILE_NAMES = {
147
+ ".env",
148
+ ".env.local",
149
+ ".env.production",
150
+ ".env.development",
151
+ "id_rsa",
152
+ "id_ed25519",
153
+ "authorized_keys",
154
+ "known_hosts",
155
+ "credentials.json",
156
+ "service-account.json",
157
+ "token.json",
158
+ "secrets.json",
159
+ "cookies",
160
+ "login data",
161
+ "history",
162
+ "web data",
163
+ ".ds_store",
164
+ "thumbs.db",
165
+ }
166
+ COMMON_EXCLUDED_FILE_SUFFIXES = {
167
+ ".pem",
168
+ ".key",
169
+ ".p12",
170
+ ".pfx",
171
+ ".kdbx",
172
+ ".wallet",
173
+ ".sqlite",
174
+ ".db",
175
+ ".exe",
176
+ ".dll",
177
+ ".sys",
178
+ ".msi",
179
+ ".dmg",
180
+ ".pkg",
181
+ ".app",
182
+ ".zip",
183
+ ".tar",
184
+ ".gz",
185
+ ".7z",
186
+ ".rar",
187
+ ".mp4",
188
+ ".mov",
189
+ ".mp3",
190
+ ".wav",
191
+ ".tmp",
192
+ ".bak",
193
+ ".lock",
194
+ }
195
+ SENSITIVE_PATH_KEYWORDS = {
196
+ "secret",
197
+ "secrets",
198
+ "token",
199
+ "password",
200
+ "passwd",
201
+ "credential",
202
+ "credentials",
203
+ "private",
204
+ "key",
205
+ "wallet",
206
+ "recovery",
207
+ "seed",
208
+ "mnemonic",
209
+ "cookie",
210
+ "session",
211
+ "auth",
212
+ "oauth",
213
+ "certificate",
214
+ "cert",
215
+ "api_key",
216
+ "apikey",
217
+ }
218
+
219
+ MACOS_EXCLUDED_PREFIXES = (
220
+ "/System",
221
+ "/Library",
222
+ "/Applications",
223
+ "/private",
224
+ "/tmp",
225
+ "/var",
226
+ )
227
+ WINDOWS_EXCLUDED_NAMES = {
228
+ "windows",
229
+ "program files",
230
+ "program files (x86)",
231
+ "programdata",
232
+ "appdata",
233
+ "$recycle.bin",
234
+ "system volume information",
235
+ "recovery",
236
+ "perflogs",
237
+ "intel",
238
+ "amd",
239
+ "nvidia",
240
+ }
241
+ LINUX_EXCLUDED_PREFIXES = (
242
+ "/bin",
243
+ "/boot",
244
+ "/dev",
245
+ "/etc",
246
+ "/lib",
247
+ "/lib64",
248
+ "/proc",
249
+ "/root",
250
+ "/run",
251
+ "/sbin",
252
+ "/sys",
253
+ "/tmp",
254
+ "/usr",
255
+ "/var",
256
+ "/snap",
257
+ "/lost+found",
258
+ )
259
+
260
+
261
+ def _now() -> str:
262
+ return datetime.now().isoformat()
263
+
264
+
265
+ def _parse_iso(raw: Optional[str]) -> Optional[datetime]:
266
+ if not raw:
267
+ return None
268
+ try:
269
+ return datetime.fromisoformat(str(raw))
270
+ except (TypeError, ValueError):
271
+ return None
272
+
273
+
274
+ def _recency_score(
275
+ updated_at: Optional[str],
276
+ *,
277
+ now: Optional[datetime] = None,
278
+ half_life_days: float = 14.0,
279
+ ) -> float:
280
+ stamp = _parse_iso(updated_at)
281
+ if not stamp:
282
+ return 0.0
283
+ now = now or datetime.now()
284
+ age_days = max(0.0, (now - stamp).total_seconds() / 86400.0)
285
+ decay = math.log(2) / max(0.1, half_life_days)
286
+ return math.exp(-decay * age_days)
287
+
288
+
289
+ def _json(data: Optional[Dict[str, Any]]) -> str:
290
+ return json.dumps(data or {}, ensure_ascii=False, sort_keys=True)
291
+
292
+
293
+ def _safe_loads(raw: Optional[str]) -> Dict[str, Any]:
294
+ """Tolerantly parse a metadata_json column — returns {} on corrupt rows."""
295
+ if not raw:
296
+ return {}
297
+ try:
298
+ value = json.loads(raw)
299
+ return value if isinstance(value, dict) else {}
300
+ except (json.JSONDecodeError, TypeError) as e:
301
+ logging.warning(
302
+ "knowledge_graph: corrupt metadata_json (%s) — using empty dict", e
303
+ )
304
+ return {}
305
+
306
+
307
+ def _slug(text: str, max_len: int = 96) -> str:
308
+ value = re.sub(r"\s+", " ", str(text or "")).strip().lower()
309
+ value = re.sub(r"[^0-9a-zA-Z가-힣._:@/-]+", "-", value).strip("-")
310
+ return (value or "untitled")[:max_len]
311
+
312
+
313
+ def _sha256_bytes(data: bytes) -> str:
314
+ return hashlib.sha256(data).hexdigest()
315
+
316
+
317
+ def _sha256_text(text: str) -> str:
318
+ return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
319
+
320
+
321
+ def _safe_iso_from_stat_mtime(mtime: float) -> str:
322
+ try:
323
+ return datetime.fromtimestamp(float(mtime)).isoformat()
324
+ except (TypeError, ValueError, OSError):
325
+ return ""
326
+
327
+
328
+ def _path_fingerprint(path: Path) -> str:
329
+ return _sha256_text(str(path.expanduser().resolve()))[:24]
330
+
331
+
332
+ def _is_relative_to(path: Path, base: Path) -> bool:
333
+ try:
334
+ path.relative_to(base)
335
+ return True
336
+ except ValueError:
337
+ return False
338
+
339
+
340
+ def _path_parts_lower(path: Path) -> List[str]:
341
+ return [
342
+ part.lower()
343
+ for part in path.parts
344
+ if part and part not in {os.sep, path.anchor}
345
+ ]
346
+
347
+
348
+ def _current_os_type() -> str:
349
+ system = platform.system().lower()
350
+ if system.startswith("darwin"):
351
+ return "macos"
352
+ if system.startswith("windows"):
353
+ return "windows"
354
+ if system.startswith("linux"):
355
+ return "linux"
356
+ return system or "unknown"
357
+
358
+
359
+ def _drive_id_for_path(path: Path) -> str:
360
+ resolved = path.expanduser().resolve()
361
+ if resolved.drive:
362
+ return resolved.drive.upper()
363
+ parts = resolved.parts
364
+ if len(parts) >= 3 and parts[1] == "Volumes":
365
+ return f"/Volumes/{parts[2]}"
366
+ if len(parts) >= 3 and parts[1] == "media":
367
+ return f"/media/{parts[2]}"
368
+ if len(parts) >= 3 and parts[1] == "mnt":
369
+ return f"/mnt/{parts[2]}"
370
+ return resolved.anchor or "/"
371
+
372
+
373
+ def _file_category(ext: str) -> str:
374
+ ext = (ext or "").lower()
375
+ if ext in LOCAL_CODE_EXTENSIONS:
376
+ return "code"
377
+ if ext in LOCAL_TEXT_EXTENSIONS:
378
+ return "text"
379
+ if ext == ".pdf":
380
+ return "pdf"
381
+ if ext in LOCAL_DOCUMENT_EXTENSIONS:
382
+ return "document"
383
+ if ext in LOCAL_SPREADSHEET_EXTENSIONS:
384
+ return "spreadsheet"
385
+ if ext in LOCAL_SLIDE_EXTENSIONS:
386
+ return "slide_deck"
387
+ if ext in LOCAL_IMAGE_EXTENSIONS:
388
+ return "image"
389
+ return "unsupported"
390
+
391
+
392
+ def _node_type_for_category(category: str) -> str:
393
+ return {
394
+ "code": "CodeFile",
395
+ "spreadsheet": "Spreadsheet",
396
+ "slide_deck": "SlideDeck",
397
+ "image": "Image",
398
+ "unsupported": "File",
399
+ }.get(category, "Document")
400
+
401
+
402
+ def _parser_type_for_category(category: str, ext: str) -> str:
403
+ if category in {"text", "code"}:
404
+ return "plain_text"
405
+ if category == "spreadsheet" and ext == ".csv":
406
+ return "csv_text"
407
+ if category == "image":
408
+ return "image_ocr"
409
+ return ext.lstrip(".") or category
410
+
411
+
412
+ def _size_limit_for_category(category: str) -> int:
413
+ return LOCAL_SIZE_LIMITS.get(category, LOCAL_SIZE_LIMITS["document"])
414
+
415
+
416
+ def _is_hidden_path(path: Path, root: Optional[Path] = None) -> bool:
417
+ parts: Iterable[str]
418
+ if root is not None:
419
+ try:
420
+ parts = path.relative_to(root).parts
421
+ except ValueError:
422
+ parts = path.parts
423
+ else:
424
+ parts = path.parts
425
+ return any(part.startswith(".") and part not in {".", ".."} for part in parts)
426
+
427
+
428
+ def _excluded_directory_reason(
429
+ path: Path, *, root: Optional[Path] = None, os_type: Optional[str] = None
430
+ ) -> Optional[str]:
431
+ os_type = os_type or _current_os_type()
432
+ name = path.name.lower()
433
+ if name in COMMON_EXCLUDED_DIRS:
434
+ return "excluded_folder"
435
+ if _is_hidden_path(path, root):
436
+ return "hidden_folder"
437
+ parts = _path_parts_lower(path)
438
+ if os_type == "windows" and any(part in WINDOWS_EXCLUDED_NAMES for part in parts):
439
+ return "system_folder"
440
+ normalized = path.as_posix()
441
+ root_normalized = root.as_posix() if root else ""
442
+
443
+ def _prefix_blocks(prefixes: Tuple[str, ...]) -> bool:
444
+ for prefix in prefixes:
445
+ path_under_prefix = normalized == prefix or normalized.startswith(
446
+ f"{prefix}/"
447
+ )
448
+ root_under_prefix = bool(root_normalized) and (
449
+ root_normalized == prefix or root_normalized.startswith(f"{prefix}/")
450
+ )
451
+ if path_under_prefix and not root_under_prefix:
452
+ return True
453
+ return False
454
+
455
+ if os_type == "macos":
456
+ home_library = Path.home() / "Library"
457
+ try:
458
+ root_is_library = bool(root) and _is_relative_to(
459
+ root.expanduser().resolve(), home_library.expanduser().resolve()
460
+ )
461
+ if (
462
+ _is_relative_to(
463
+ path.expanduser().resolve(), home_library.expanduser().resolve()
464
+ )
465
+ and not root_is_library
466
+ ):
467
+ return "user_library"
468
+ except OSError:
469
+ pass
470
+ if _prefix_blocks(MACOS_EXCLUDED_PREFIXES):
471
+ return "system_folder"
472
+ if os_type == "linux":
473
+ if _prefix_blocks(LINUX_EXCLUDED_PREFIXES):
474
+ return "system_folder"
475
+ return None
476
+
477
+
478
+ def _sensitive_file_reason(path: Path, *, root: Optional[Path] = None) -> Optional[str]:
479
+ name = path.name.lower()
480
+ suffix = path.suffix.lower()
481
+ if name in COMMON_EXCLUDED_FILE_NAMES or suffix in COMMON_EXCLUDED_FILE_SUFFIXES:
482
+ return "sensitive_or_excluded_file"
483
+ try:
484
+ rel_text = (
485
+ path.relative_to(root).as_posix().lower()
486
+ if root
487
+ else path.as_posix().lower()
488
+ )
489
+ except ValueError:
490
+ rel_text = path.as_posix().lower()
491
+ tokens = re.split(r"[^0-9a-zA-Z_가-힣]+", rel_text)
492
+ if any(token in SENSITIVE_PATH_KEYWORDS for token in tokens):
493
+ return "sensitive_name"
494
+ return None
495
+
496
+
497
+ def _root_warning(path: Path, os_type: str) -> Optional[str]:
498
+ resolved = path.expanduser().resolve()
499
+ home = Path.home().expanduser().resolve()
500
+ if os_type == "macos" and resolved == home:
501
+ return "홈 전체에는 설정/숨김 폴더가 포함될 수 있습니다. 문서, 데스크탑, 다운로드, 프로젝트 폴더부터 추가하는 것을 권장합니다."
502
+ if os_type == "linux" and resolved.as_posix() == "/":
503
+ return "루트 디렉터리에는 시스템 파일이 포함되어 있습니다. 일반 사용자 폴더나 마운트된 데이터 폴더를 권장합니다."
504
+ if os_type == "windows" and str(resolved).rstrip("\\/").upper() in {"C:", "C:\\"}:
505
+ return "C드라이브에는 Windows 시스템 파일과 앱 설정 파일이 포함되어 있습니다. 하위 폴더를 선택하는 것을 권장합니다."
506
+ return None
507
+
508
+
509
+ def _sample_file(
510
+ path: Path, root: Path, status: str, reason: str = ""
511
+ ) -> Dict[str, Any]:
512
+ try:
513
+ rel = path.relative_to(root).as_posix()
514
+ except ValueError:
515
+ rel = path.name
516
+ try:
517
+ stat = path.stat()
518
+ size = stat.st_size if path.is_file() else None
519
+ modified_at = _safe_iso_from_stat_mtime(stat.st_mtime)
520
+ except OSError:
521
+ size = None
522
+ modified_at = ""
523
+ return {
524
+ "path": str(path),
525
+ "relative_path": rel,
526
+ "name": path.name,
527
+ "extension": path.suffix.lower(),
528
+ "status": status,
529
+ "reason": reason,
530
+ "size_bytes": size,
531
+ "modified_at": modified_at,
532
+ }
533
+
534
+
535
+ def _clean_text(text: str) -> str:
536
+ return re.sub(r"\s+", " ", str(text or "")).strip()
537
+
538
+
539
+ def _chunks(text: str, size: int = 1200, overlap: int = 160) -> List[str]:
540
+ cleaned = str(text or "").strip()
541
+ if not cleaned:
542
+ return []
543
+ chunks: List[str] = []
544
+ start = 0
545
+ while start < len(cleaned):
546
+ end = min(len(cleaned), start + size)
547
+ chunks.append(cleaned[start:end])
548
+ if end >= len(cleaned):
549
+ break
550
+ start = max(0, end - overlap)
551
+ return chunks
552
+
553
+
554
+ _LLM_EXTRACT_CONCEPT_PROMPT = """Extract the key concepts from the following text.
555
+ Return ONLY a JSON array of objects, each with "concept" (string) and "importance" (float 0-1).
556
+ Extract up to {limit} concepts. Focus on named entities, technical terms, and domain-specific nouns.
557
+ Do NOT include common words, stop words, or generic terms.
558
+
559
+ Text:
560
+ {text}
561
+
562
+ JSON:"""
563
+
564
+ _LLM_EXTRACT_TRIPLE_PROMPT = """Extract relationship triples from the following text.
565
+ Return ONLY a JSON array of objects, each with:
566
+ - "subject": source concept (string)
567
+ - "relation": relationship verb (string, Korean or English)
568
+ - "object": target concept (string)
569
+ - "evidence": the sentence supporting this triple (string, max 240 chars)
570
+ - "confidence": how confident you are (float 0-1)
571
+
572
+ Extract up to {limit} triples. Focus on meaningful semantic relationships.
573
+
574
+ Text:
575
+ {text}
576
+
577
+ Concepts already identified: {concepts}
578
+
579
+ JSON:"""
580
+
581
+ ENABLE_LLM_EXTRACTION = os.getenv("LATTICEAI_LLM_EXTRACTION", "true").lower() in (
582
+ "1",
583
+ "true",
584
+ "yes",
585
+ )
586
+
587
+
588
+ def _llm_extract_concepts(text: str, limit: int = 12) -> Optional[List[str]]:
589
+ if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
590
+ return None
591
+ if not _llm_router_ref.current_model_id:
592
+ return None
593
+ prompt = _LLM_EXTRACT_CONCEPT_PROMPT.format(text=text[:3000], limit=limit)
594
+ try:
595
+ loop = asyncio.get_event_loop()
596
+ if loop.is_running():
597
+ import concurrent.futures
598
+
599
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
600
+ future = pool.submit(
601
+ asyncio.run,
602
+ _llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1),
603
+ )
604
+ raw = future.result(timeout=30)
605
+ else:
606
+ raw = asyncio.run(
607
+ _llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1)
608
+ )
609
+ raw = raw.strip()
610
+ if raw.startswith("```"):
611
+ raw = re.sub(r"^```(?:json)?\s*", "", raw)
612
+ raw = re.sub(r"\s*```$", "", raw)
613
+ parsed = json.loads(raw)
614
+ if isinstance(parsed, list):
615
+ concepts = []
616
+ for item in parsed[:limit]:
617
+ if isinstance(item, dict) and "concept" in item:
618
+ concepts.append(item["concept"])
619
+ elif isinstance(item, str):
620
+ concepts.append(item)
621
+ return concepts if concepts else None
622
+ except Exception as e:
623
+ logging.debug("LLM concept extraction failed (falling back to rules): %s", e)
624
+ return None
625
+
626
+
627
+ def _llm_extract_triples(
628
+ text: str, concepts: List[str], limit: int = 20
629
+ ) -> Optional[List[Dict[str, str]]]:
630
+ if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
631
+ return None
632
+ if not _llm_router_ref.current_model_id:
633
+ return None
634
+ prompt = _LLM_EXTRACT_TRIPLE_PROMPT.format(
635
+ text=text[:3000],
636
+ limit=limit,
637
+ concepts=", ".join(concepts[:15]),
638
+ )
639
+ try:
640
+ loop = asyncio.get_event_loop()
641
+ if loop.is_running():
642
+ import concurrent.futures
643
+
644
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
645
+ future = pool.submit(
646
+ asyncio.run,
647
+ _llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1),
648
+ )
649
+ raw = future.result(timeout=30)
650
+ else:
651
+ raw = asyncio.run(
652
+ _llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1)
653
+ )
654
+ raw = raw.strip()
655
+ if raw.startswith("```"):
656
+ raw = re.sub(r"^```(?:json)?\s*", "", raw)
657
+ raw = re.sub(r"\s*```$", "", raw)
658
+ parsed = json.loads(raw)
659
+ if isinstance(parsed, list):
660
+ triples = []
661
+ for item in parsed[:limit]:
662
+ if isinstance(item, dict) and "subject" in item and "object" in item:
663
+ triples.append(
664
+ {
665
+ "subject": str(item["subject"]),
666
+ "relation": str(item.get("relation", "관련됨")),
667
+ "object": str(item["object"]),
668
+ "context": str(item.get("evidence", ""))[:240],
669
+ "confidence": float(item.get("confidence", 0.8)),
670
+ }
671
+ )
672
+ return triples if triples else None
673
+ except Exception as e:
674
+ logging.debug("LLM triple extraction failed (falling back to rules): %s", e)
675
+ return None
676
+
677
+
678
+ _CONCEPT_STOP: set = {
679
+ # English stop words
680
+ "the",
681
+ "and",
682
+ "for",
683
+ "with",
684
+ "this",
685
+ "that",
686
+ "from",
687
+ "into",
688
+ "which",
689
+ "are",
690
+ "was",
691
+ "were",
692
+ "has",
693
+ "have",
694
+ "had",
695
+ "can",
696
+ "will",
697
+ "would",
698
+ "could",
699
+ "should",
700
+ "may",
701
+ "might",
702
+ "must",
703
+ "shall",
704
+ "being",
705
+ "been",
706
+ "also",
707
+ "just",
708
+ "then",
709
+ "than",
710
+ "when",
711
+ "where",
712
+ "what",
713
+ "how",
714
+ "why",
715
+ "its",
716
+ "their",
717
+ "your",
718
+ "our",
719
+ "you",
720
+ "they",
721
+ "them",
722
+ "these",
723
+ "those",
724
+ "use",
725
+ "used",
726
+ "using",
727
+ "based",
728
+ "like",
729
+ "such",
730
+ "via",
731
+ "per",
732
+ "let",
733
+ "yes",
734
+ "not",
735
+ "but",
736
+ "are",
737
+ "all",
738
+ "any",
739
+ "out",
740
+ "new",
741
+ "get",
742
+ "set",
743
+ # Korean stop words
744
+ "사용자",
745
+ "내용",
746
+ "파일",
747
+ "채팅",
748
+ "답변",
749
+ "입니다",
750
+ "그리고",
751
+ "처럼",
752
+ "있어",
753
+ "없어",
754
+ "이야",
755
+ "이다",
756
+ "한다",
757
+ "하다",
758
+ "되다",
759
+ "됩니다",
760
+ "경우",
761
+ "방법",
762
+ "부분",
763
+ "상태",
764
+ "정도",
765
+ "결과",
766
+ "이후",
767
+ "이전",
768
+ "그것",
769
+ "이것",
770
+ "저것",
771
+ "여기",
772
+ "거기",
773
+ "저기",
774
+ "우리",
775
+ "저희",
776
+ "기능",
777
+ "서버",
778
+ "모델",
779
+ "설정",
780
+ "설명",
781
+ "버전",
782
+ "지원",
783
+ "사용",
784
+ "실행",
785
+ "todo",
786
+ "fixme",
787
+ "note",
788
+ "참고",
789
+ "주의",
790
+ "warning",
791
+ }
792
+
793
+
794
+ def _extract_concepts(text: str, limit: int = 12) -> List[str]:
795
+ """LLM-first concept extraction with rule-based fallback."""
796
+ llm_result = _llm_extract_concepts(text, limit)
797
+ if llm_result:
798
+ return llm_result
799
+ return _extract_concepts_rules(text, limit)
800
+
801
+
802
+ def _extract_concepts_rules(text: str, limit: int = 12) -> List[str]:
803
+ """Extract meaningful named concepts from text (rule-based).
804
+
805
+ Priority order:
806
+ 1. Backtick / quoted terms (explicitly technical)
807
+ 2. Multi-word proper nouns (Lattice AI, GPT-4o, Claude Sonnet)
808
+ 3. Single capitalized proper nouns not at sentence start (Claude, Python, FastAPI)
809
+ 4. Korean compound technical terms (멀티모달, 에이전트, 그래프RAG)
810
+ 5. Hyphenated / versioned identifiers (gpt-4o, mlx-vlm, gemma-4)
811
+ """
812
+ text = str(text or "")
813
+ seen: dict = {} # concept_lower → original form
814
+
815
+ def _add(term: str) -> None:
816
+ key = term.strip().lower()
817
+ if key and key not in _CONCEPT_STOP and not key.isdigit() and len(key) >= 2:
818
+ seen.setdefault(key, term.strip())
819
+
820
+ # 1. Backtick-quoted code/term (highest confidence)
821
+ for m in re.findall(r"`([^`]{2,40})`", text):
822
+ if not re.search(r"[\(\)\[\]{}]", m): # skip code expressions
823
+ _add(m)
824
+
825
+ # 2. Double/single quoted terms
826
+ for m in re.findall(r'"([^"]{2,40})"', text):
827
+ _add(m)
828
+
829
+ # 3. Multi-word English proper nouns (Title Case or ALL-CAPS first word, 2–4 words).
830
+ # Pattern A: Mixed-case first word — "Lattice AI", "Tool Use", "Graph RAG"
831
+ for m in re.findall(
832
+ r"([A-Z][a-z]{1,20}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20}|\d[\w.]{0,6})){1,3})",
833
+ text,
834
+ ):
835
+ _add(m)
836
+ # Pattern B: ALL-CAPS first word — "VS Code", "MCP Server", "GPT-4o Mini"
837
+ for m in re.findall(
838
+ r"([A-Z]{2,6}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20})){1,2})",
839
+ text,
840
+ ):
841
+ _add(m)
842
+
843
+ # 4. Single capitalized proper noun.
844
+ # Use ASCII-boundary lookaround instead of \b so Korean particles
845
+ # (와, 의, 는 …) after an English word don't block the match.
846
+ all_caps_words = re.findall(
847
+ r"(?<![A-Za-z0-9])([A-Z][A-Za-z0-9]{2,24})(?![A-Za-z0-9])", text
848
+ )
849
+ freq: Dict[str, int] = {}
850
+ for w in all_caps_words:
851
+ freq[w] = freq.get(w, 0) + 1
852
+ sentence_starts = set(re.findall(r"(?:^|(?<=[.!?])\s+)([A-Z][a-z]+)", text))
853
+ for m, cnt in freq.items():
854
+ if m.lower() in _CONCEPT_STOP:
855
+ continue
856
+ if cnt >= 2 or m not in sentence_starts:
857
+ _add(m)
858
+
859
+ # 5. Korean technical compound nouns (3–12 chars, no common particles)
860
+ for m in re.findall(
861
+ r"[가-힣]{2,12}(?:AI|LLM|API|UI|RAG|bot|Bot|기능|모델|서버|에이전트|파이프라인|워크플로)",
862
+ text,
863
+ ):
864
+ _add(m)
865
+ # Korean standalone terms that appear after topic markers (은/는/이/가 앞)
866
+ for m in re.findall(
867
+ r"([가-힣]{2,12})(?:은|는|이|가|을|를|의|에서|으로|와|과)", text
868
+ ):
869
+ if m.lower() not in _CONCEPT_STOP and len(m) >= 2:
870
+ # Only add if it's non-trivial (has 3+ chars or appears multiple times)
871
+ cnt = text.count(m)
872
+ if len(m) >= 3 or cnt >= 2:
873
+ _add(m)
874
+
875
+ # 6. Hyphenated / versioned identifiers (gpt-4o, gemma-4, mlx-vlm)
876
+ for m in re.findall(r"\b([a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z0-9.]+)+)\b", text):
877
+ if len(m) >= 4:
878
+ _add(m)
879
+
880
+ # De-duplicate: remove shorter if ALL its occurrences in the source text
881
+ # are followed immediately by the suffix that forms the longer concept.
882
+ # "Lattice" → dropped when every occurrence is "Lattice AI"
883
+ # "Claude" → kept because it appears as just "Claude" too.
884
+ values = list(seen.values())
885
+ values_lower = [v.lower() for v in values]
886
+ keep = set(range(len(values)))
887
+ for i, v in enumerate(values):
888
+ vl = v.lower()
889
+ for j, wl in enumerate(values_lower):
890
+ if i == j or j not in keep:
891
+ continue
892
+ # Check if vl is a word-prefix of wl
893
+ suffix = wl[len(vl) :]
894
+ if not (wl.startswith(vl) and re.match(r"^[\s\-]", suffix)):
895
+ continue
896
+ # Count occurrences of v NOT followed by the suffix
897
+ suffix_stripped = suffix.lstrip(" -")
898
+ # Escape for regex
899
+ pattern_with_suffix = re.escape(v) + r"[\s\-]+" + re.escape(suffix_stripped)
900
+ pattern_alone = (
901
+ re.escape(v) + r"(?![\s\-]*" + re.escape(suffix_stripped) + r")"
902
+ )
903
+ alone_count = len(re.findall(pattern_alone, text, re.IGNORECASE))
904
+ if alone_count == 0:
905
+ # Shorter term never appears alone → safe to remove
906
+ keep.discard(i)
907
+ break
908
+
909
+ final = [values[i] for i in range(len(values)) if i in keep]
910
+ return final[:limit]
911
+
912
+
913
+ # ──────────────────────────────────────────────────────────────────────────────
914
+ # Node type taxonomy (점 = 명사)
915
+ # ──────────────────────────────────────────────────────────────────────────────
916
+ # Chat — 대화 세션
917
+ # Document — 파일 (PDF·PPT·Word·Excel·이미지 등)
918
+ # Concept — 개념·아이디어·기술 용어
919
+ # Person — 사람 (사용자, 언급된 인물)
920
+ # Error — 오류·버그·예외
921
+ # Code — 코드 스니펫·함수·클래스
922
+ # Feature — 소프트웨어 기능
923
+ # Task — 할 일·액션 아이템
924
+ # Decision — 결정 사항
925
+
926
+ # Edge type vocabulary (선 = 동사 — 과거형 서술어)
927
+ EDGE_VERB = {
928
+ "언급함": r"언급|mention|refer|cited",
929
+ "포함함": r"포함|include|consist|구성|탑재|contains",
930
+ "해결함": r"해결|resolv|fix|수정|고쳤|closed",
931
+ "의존함": r"의존|depend|require|필요|based on",
932
+ "설명함": r"설명|explain|describe|정의|란|이란|means",
933
+ "비교함": r"비교|versus|vs\.?|차이|다르|compare",
934
+ "사용함": r"사용|use|활용|이용|apply",
935
+ "연결함": r"연결|connect|통합|integrate|연동|link",
936
+ "확장함": r"확장|extend|플러그인|plugin|addon",
937
+ "생성함": r"생성|만들|create|generate|build|produced",
938
+ "대체함": r"대체|replace|instead|alternative",
939
+ "지원함": r"지원|support|제공|provide|offer",
940
+ "발생함": r"발생|occur|throw|raise|triggered",
941
+ "관련됨": r"관련|related|associated|연관",
942
+ }
943
+
944
+
945
+ def _infer_edge(sentence: str) -> str:
946
+ """Return the best-matching verb-form edge label for a sentence."""
947
+ s = sentence.lower()
948
+ for label, pattern in EDGE_VERB.items():
949
+ if re.search(pattern, s):
950
+ return label
951
+ return "관련됨"
952
+
953
+
954
+ # Technical words that cannot be person names
955
+ _NOT_PERSON_WORDS: set = {
956
+ "use",
957
+ "api",
958
+ "rag",
959
+ "sdk",
960
+ "ide",
961
+ "cli",
962
+ "llm",
963
+ "mcp",
964
+ "ui",
965
+ "ux",
966
+ "new",
967
+ "old",
968
+ "get",
969
+ "set",
970
+ "run",
971
+ "add",
972
+ "fix",
973
+ "tool",
974
+ "code",
975
+ "base",
976
+ "core",
977
+ "data",
978
+ "file",
979
+ "test",
980
+ "type",
981
+ "mode",
982
+ "view",
983
+ }
984
+
985
+
986
+ def _classify_node_type(concept: str, text: str) -> str:
987
+ """Classify a concept into the node taxonomy.
988
+
989
+ Term-level signals take priority; then a tight ±60-char window is used
990
+ so distant keywords don't cause mis-classification.
991
+ """
992
+ term = concept.lower()
993
+
994
+ # ── Term-level signals (highest confidence) ───────────────────────────
995
+ if re.search(r"(?:error|exception|traceback|오류|에러|버그)$", term, re.I):
996
+ return "Error"
997
+ if re.search(r"error|exception|err\b", term, re.I) and len(concept) < 30:
998
+ return "Error"
999
+ if re.search(r"\(\)|\.py$|\.js$|\.ts$|\.go$|::\w", term):
1000
+ return "Code"
1001
+
1002
+ # Person: "First Last" pattern, neither word is a known technical term
1003
+ if re.match(r"^[A-Z][a-z]{1,15} [A-Z][a-z]{1,15}$", concept):
1004
+ words = term.split()
1005
+ if not any(w in _NOT_PERSON_WORDS for w in words):
1006
+ return "Person"
1007
+
1008
+ # ── Windowed context (±60 chars) — NOT used for Error to avoid false positives
1009
+ idx = text.lower().find(term)
1010
+ if idx >= 0:
1011
+ win = text[max(0, idx - 60) : idx + len(concept) + 60].lower()
1012
+ if re.search(r"def |class |function|함수|클래스|메서드|import", win):
1013
+ return "Code"
1014
+ # Feature: concept appears DIRECTLY adjacent to 기능/feature keyword
1015
+ if len(concept) <= 12 and re.search(
1016
+ rf"{re.escape(term)}.{{0,8}}(?:기능|feature)|(?:기능|feature).{{0,8}}{re.escape(term)}",
1017
+ win,
1018
+ ):
1019
+ return "Feature"
1020
+
1021
+ return "Concept"
1022
+
1023
+
1024
+ def _extract_triples(
1025
+ text: str,
1026
+ concepts: List[str],
1027
+ limit: int = 20,
1028
+ ) -> List[Dict[str, str]]:
1029
+ """LLM-first triple extraction with rule-based fallback."""
1030
+ llm_result = _llm_extract_triples(text, concepts, limit)
1031
+ if llm_result:
1032
+ return llm_result
1033
+ return _extract_triples_rules(text, concepts, limit)
1034
+
1035
+
1036
+ def _extract_triples_rules(
1037
+ text: str,
1038
+ concepts: List[str],
1039
+ limit: int = 20,
1040
+ ) -> List[Dict[str, str]]:
1041
+ """Extract (subject, verb-edge, object, context) triples from text (rule-based).
1042
+
1043
+ For each sentence containing ≥2 concepts, infer the verb-form edge label
1044
+ from surrounding context and create a directed triple.
1045
+ """
1046
+ if len(concepts) < 2:
1047
+ return []
1048
+
1049
+ concept_lower = {c.lower(): c for c in concepts}
1050
+ triples: List[Dict[str, str]] = []
1051
+ seen_pairs: set = set()
1052
+
1053
+ # Split on sentence boundaries
1054
+ sentences = re.split(r"(?<=[.!?\n])\s+|\n{2,}", text)
1055
+ for sent in sentences:
1056
+ sent = sent.strip()
1057
+ if len(sent) < 8:
1058
+ continue
1059
+ sent_lower = sent.lower()
1060
+
1061
+ present = [concept_lower[k] for k in concept_lower if k in sent_lower]
1062
+ if len(present) < 2:
1063
+ continue
1064
+
1065
+ edge = _infer_edge(sent)
1066
+
1067
+ for i in range(len(present) - 1):
1068
+ subj, obj = present[i], present[i + 1]
1069
+ # Deduplicate by (subj, obj) regardless of direction for same edge
1070
+ pair_key = tuple(sorted([subj.lower(), obj.lower()])) + (edge,)
1071
+ if pair_key in seen_pairs:
1072
+ continue
1073
+ seen_pairs.add(pair_key)
1074
+ triples.append(
1075
+ {
1076
+ "subject": subj,
1077
+ "relation": edge, # verb form (동사)
1078
+ "object": obj,
1079
+ "context": sent[:240],
1080
+ }
1081
+ )
1082
+ if len(triples) >= limit:
1083
+ return triples
1084
+
1085
+ return triples
1086
+
1087
+
1088
+ def _semantic_items(text: str) -> List[Dict[str, str]]:
1089
+ """Extract explicit decision / task items from text."""
1090
+ items: List[Dict[str, str]] = []
1091
+ for raw_line in str(text or "").splitlines():
1092
+ line = _clean_text(raw_line)
1093
+ if len(line) < 6:
1094
+ continue
1095
+ lowered = line.lower()
1096
+ if re.search(r"(결정|확정|하기로|decided|decision)", lowered):
1097
+ items.append(
1098
+ {"type": "Decision", "title": line[:120], "summary": line[:500]}
1099
+ )
1100
+ if re.search(r"(todo|해야|하자|진행|구현|수정|확인|next|task|\[ \])", lowered):
1101
+ items.append({"type": "Task", "title": line[:120], "summary": line[:500]})
1102
+ return items[:8]
1103
+
1104
+
1105
+ def _topic_candidates(text: str, limit: int = 8) -> List[str]:
1106
+ """Return compact keyword candidates for fallback graph search."""
1107
+ candidates = _extract_concepts(text, limit=limit)
1108
+ if candidates:
1109
+ return candidates[:limit]
1110
+ seen: Dict[str, str] = {}
1111
+ for token in re.findall(
1112
+ r"[A-Za-z][A-Za-z0-9_.:-]{2,}|[가-힣]{2,12}", str(text or "")
1113
+ ):
1114
+ key = token.lower()
1115
+ if key in _CONCEPT_STOP or key.isdigit():
1116
+ continue
1117
+ seen.setdefault(key, token)
1118
+ if len(seen) >= limit:
1119
+ break
1120
+ return list(seen.values())[:limit]
1121
+
1122
+
1123
+ __all__ = [name for name in globals() if not name.startswith("__")]