ltcai 3.5.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +73 -35
  2. package/docs/CARRYOVER_AUDIT_v3.6.0.md +61 -0
  3. package/docs/CHANGELOG.md +32 -0
  4. package/docs/HANDOVER_v3.6.0.md +46 -0
  5. package/docs/RUNTIME_HOOK_COVERAGE_v3.6.0.md +49 -0
  6. package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
  7. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +509 -0
  8. package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
  9. package/docs/architecture.md +13 -12
  10. package/docs/kg-schema.md +102 -53
  11. package/docs/privacy.md +18 -2
  12. package/docs/security-model.md +17 -0
  13. package/kg_schema.py +139 -10
  14. package/knowledge_graph.py +874 -26
  15. package/knowledge_graph_api.py +11 -127
  16. package/latticeai/__init__.py +1 -1
  17. package/latticeai/api/admin.py +1 -1
  18. package/latticeai/api/agents.py +7 -1
  19. package/latticeai/api/auth.py +27 -4
  20. package/latticeai/api/browser.py +217 -0
  21. package/latticeai/api/chat.py +112 -76
  22. package/latticeai/api/health.py +1 -1
  23. package/latticeai/api/hooks.py +1 -1
  24. package/latticeai/api/knowledge_graph.py +146 -0
  25. package/latticeai/api/local_files.py +1 -1
  26. package/latticeai/api/mcp.py +23 -11
  27. package/latticeai/api/memory.py +1 -1
  28. package/latticeai/api/models.py +1 -1
  29. package/latticeai/api/network.py +81 -0
  30. package/latticeai/api/portability.py +93 -0
  31. package/latticeai/api/realtime.py +1 -1
  32. package/latticeai/api/search.py +26 -2
  33. package/latticeai/api/security_dashboard.py +2 -3
  34. package/latticeai/api/setup.py +2 -2
  35. package/latticeai/api/static_routes.py +2 -4
  36. package/latticeai/api/tools.py +3 -0
  37. package/latticeai/api/workflow_designer.py +46 -0
  38. package/latticeai/api/workspace.py +71 -49
  39. package/latticeai/app_factory.py +1710 -0
  40. package/latticeai/brain/__init__.py +18 -0
  41. package/latticeai/brain/context.py +213 -0
  42. package/latticeai/brain/conversations.py +236 -0
  43. package/latticeai/brain/identity.py +175 -0
  44. package/latticeai/brain/memory.py +102 -0
  45. package/latticeai/brain/network.py +205 -0
  46. package/latticeai/core/agent.py +31 -7
  47. package/latticeai/core/audit.py +0 -7
  48. package/latticeai/core/config.py +1 -1
  49. package/latticeai/core/context_builder.py +1 -2
  50. package/latticeai/core/enterprise.py +1 -1
  51. package/latticeai/core/graph_curator.py +2 -2
  52. package/latticeai/core/marketplace.py +1 -1
  53. package/latticeai/core/mcp_registry.py +791 -0
  54. package/latticeai/core/model_compat.py +1 -1
  55. package/latticeai/core/model_resolution.py +0 -1
  56. package/latticeai/core/multi_agent.py +238 -4
  57. package/latticeai/core/security.py +1 -1
  58. package/latticeai/core/sessions.py +37 -7
  59. package/latticeai/core/workflow_engine.py +114 -2
  60. package/latticeai/core/workspace_os.py +58 -10
  61. package/latticeai/models/__init__.py +7 -0
  62. package/latticeai/models/router.py +779 -0
  63. package/latticeai/server_app.py +29 -1504
  64. package/latticeai/services/agent_runtime.py +1 -0
  65. package/latticeai/services/app_context.py +75 -14
  66. package/latticeai/services/ingestion.py +318 -0
  67. package/latticeai/services/kg_portability.py +207 -0
  68. package/latticeai/services/memory_service.py +39 -11
  69. package/latticeai/services/model_runtime.py +2 -5
  70. package/latticeai/services/platform_runtime.py +100 -23
  71. package/latticeai/services/search_service.py +17 -8
  72. package/latticeai/services/tool_dispatch.py +12 -2
  73. package/latticeai/services/triggers.py +241 -0
  74. package/latticeai/services/upload_service.py +37 -12
  75. package/latticeai/services/workspace_service.py +31 -0
  76. package/llm_router.py +29 -772
  77. package/ltcai_cli.py +1 -2
  78. package/mcp_registry.py +25 -788
  79. package/p_reinforce.py +124 -14
  80. package/package.json +11 -8
  81. package/scripts/build_vsix.mjs +72 -0
  82. package/scripts/bump_version.py +99 -0
  83. package/scripts/generate_diagrams.py +0 -1
  84. package/scripts/lint_v3.mjs +82 -18
  85. package/scripts/validate_release_artifacts.py +0 -1
  86. package/scripts/wheel_smoke.py +142 -0
  87. package/server.py +11 -7
  88. package/setup_wizard.py +1142 -0
  89. package/static/account.html +2 -4
  90. package/static/admin.html +3 -5
  91. package/static/chat.html +3 -6
  92. package/static/graph.html +2 -4
  93. package/static/sw.js +81 -52
  94. package/static/v3/asset-manifest.json +20 -19
  95. package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
  96. package/static/v3/css/lattice.base.css +1 -1
  97. package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
  98. package/static/v3/css/lattice.components.css +1 -1
  99. package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
  100. package/static/v3/css/lattice.shell.css +1 -1
  101. package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
  102. package/static/v3/css/lattice.tokens.css +3 -0
  103. package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
  104. package/static/v3/css/lattice.views.css +2 -2
  105. package/static/v3/index.html +3 -4
  106. package/static/v3/js/{app.d086489d.js → app.356e6452.js} +1 -1
  107. package/static/v3/js/core/{api.12b568ad.js → api.7a308b89.js} +39 -1
  108. package/static/v3/js/core/api.js +38 -0
  109. package/static/v3/js/core/{routes.d214b399.js → routes.7222343d.js} +22 -22
  110. package/static/v3/js/core/routes.js +22 -22
  111. package/static/v3/js/core/{shell.d05266f5.js → shell.a1657f20.js} +4 -4
  112. package/static/v3/js/core/shell.js +1 -1
  113. package/static/v3/js/core/{store.34ebd5e6.js → store.204a08b2.js} +1 -1
  114. package/static/v3/js/core/store.js +1 -1
  115. package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
  116. package/static/v3/js/views/graph-canvas.js +509 -0
  117. package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
  118. package/static/v3/js/views/hybrid-search.js +1 -2
  119. package/static/v3/js/views/knowledge-graph.5e40cbeb.js +509 -0
  120. package/static/v3/js/views/knowledge-graph.js +326 -54
  121. package/static/vendor/chart.umd.min.js +20 -0
  122. package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
  123. package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
  124. package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
  125. package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
  126. package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
  127. package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
  128. package/static/vendor/fonts/inter.css +44 -0
  129. package/static/vendor/icons/tabler-icons.min.css +4 -0
  130. package/static/vendor/icons/tabler-icons.woff2 +0 -0
  131. package/static/vendor/marked.min.js +69 -0
  132. package/static/workspace.html +2 -2
  133. package/telegram_bot.py +1 -2
  134. package/tools/commands.py +4 -2
  135. package/tools/computer.py +1 -1
  136. package/tools/documents.py +1 -3
  137. package/tools/filesystem.py +0 -4
  138. package/tools/knowledge.py +1 -3
  139. package/tools/network.py +1 -3
  140. package/codex_telegram_bot.py +0 -195
  141. package/docs/assets/v3.4.0/agent-run.png +0 -0
  142. package/docs/assets/v3.4.0/agents.png +0 -0
  143. package/docs/assets/v3.4.0/before/chat-before.png +0 -0
  144. package/docs/assets/v3.4.0/before/files-before.png +0 -0
  145. package/docs/assets/v3.4.0/chat.png +0 -0
  146. package/docs/assets/v3.4.0/connect-folder.png +0 -0
  147. package/docs/assets/v3.4.0/files.png +0 -0
  148. package/docs/assets/v3.4.0/home.png +0 -0
  149. package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
  150. package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
  151. package/docs/assets/v3.4.0/local-agent.png +0 -0
  152. package/docs/assets/v3.4.0/memory.png +0 -0
  153. package/docs/assets/v3.4.0/settings.png +0 -0
  154. package/docs/assets/v3.4.0/vision-input.png +0 -0
  155. package/docs/assets/v3.4.0/workflows.png +0 -0
  156. package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
  157. package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
  158. package/docs/assets/v3.4.1/local-agent.png +0 -0
  159. package/docs/images/admin-dashboard.png +0 -0
  160. package/docs/images/architecture.png +0 -0
  161. package/docs/images/enterprise.png +0 -0
  162. package/docs/images/graph.png +0 -0
  163. package/docs/images/hero.gif +0 -0
  164. package/docs/images/knowledge-graph.png +0 -0
  165. package/docs/images/lattice-ai-demo.gif +0 -0
  166. package/docs/images/lattice-ai-hero.png +0 -0
  167. package/docs/images/logo.svg +0 -33
  168. package/docs/images/mobile-responsive.png +0 -0
  169. package/docs/images/model-recommendation.png +0 -0
  170. package/docs/images/onboarding.png +0 -0
  171. package/docs/images/organization.png +0 -0
  172. package/docs/images/pipeline.png +0 -0
  173. package/docs/images/screenshot-admin.png +0 -0
  174. package/docs/images/screenshot-chat.png +0 -0
  175. package/docs/images/screenshot-graph.png +0 -0
  176. package/docs/images/skills.png +0 -0
  177. package/docs/images/workspace-dark.png +0 -0
  178. package/docs/images/workspace-light.png +0 -0
  179. package/docs/images/workspace.png +0 -0
  180. package/requirements.txt +0 -16
  181. package/static/v3/js/views/knowledge-graph.a14ea7e7.js +0 -237
@@ -1,130 +1,14 @@
1
- """Knowledge graph page and API routes."""
1
+ """Deprecation shim — the knowledge graph router moved in v4.
2
2
 
3
- from pathlib import Path
4
- from typing import Any, Callable, Dict, Optional
3
+ The ``/knowledge-graph/*`` data router (and legacy ``/graph`` page routes)
4
+ now live in :mod:`latticeai.api.knowledge_graph`. This root module remains
5
+ importable for the deprecation window and will be removed in a future major
6
+ release.
7
+ """
5
8
 
6
- from fastapi import APIRouter, HTTPException, Request
7
- from fastapi.responses import FileResponse
8
- from pydantic import BaseModel
9
+ from latticeai.api.knowledge_graph import ( # noqa: F401
10
+ KnowledgeGraphIngestRequest,
11
+ create_knowledge_graph_router,
12
+ )
9
13
 
10
-
11
- class KnowledgeGraphIngestRequest(BaseModel):
12
- type: str
13
- content: str = ""
14
- role: Optional[str] = None
15
- title: Optional[str] = None
16
- source: Optional[str] = None
17
- conversation_id: Optional[str] = None
18
- user_email: Optional[str] = None
19
- user_nickname: Optional[str] = None
20
- metadata: Optional[Dict[str, Any]] = None
21
-
22
-
23
- def create_knowledge_graph_router(
24
- *,
25
- get_graph: Callable[[], Any],
26
- require_graph: Callable[[], None],
27
- require_user: Callable[[Request], str],
28
- static_dir: Path,
29
- ) -> APIRouter:
30
- router = APIRouter()
31
-
32
- def graph():
33
- require_graph()
34
- return get_graph()
35
-
36
- @router.get("/graph")
37
- async def knowledge_graph_page(request: Request):
38
- """Serve the interactive knowledge graph canvas UI."""
39
- graph()
40
- require_user(request)
41
- response = FileResponse(static_dir / "graph.html")
42
- response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
43
- response.headers["Pragma"] = "no-cache"
44
- response.headers["Expires"] = "0"
45
- return response
46
-
47
- @router.get("/knowledge-graph")
48
- async def knowledge_graph_legacy_page(request: Request):
49
- """Backward-compatible route for the graph page."""
50
- graph()
51
- require_user(request)
52
- response = FileResponse(static_dir / "graph.html")
53
- response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
54
- response.headers["Pragma"] = "no-cache"
55
- response.headers["Expires"] = "0"
56
- return response
57
-
58
- @router.get("/knowledge-graph/stats")
59
- async def knowledge_graph_stats(request: Request):
60
- require_user(request)
61
- return graph().stats()
62
-
63
- @router.get("/knowledge-graph/schema")
64
- async def knowledge_graph_schema(request: Request):
65
- require_user(request)
66
- stats = graph().stats()
67
- return {
68
- "legacy_schema_version": stats.get("schema_version"),
69
- "v2_schema_available": stats.get("v2_schema_available"),
70
- "v2": stats.get("v2"),
71
- }
72
-
73
- @router.get("/knowledge-graph/graph")
74
- async def knowledge_graph_data(request: Request, limit: int = 300):
75
- require_user(request)
76
- return graph().graph(limit)
77
-
78
- @router.get("/knowledge-graph/documents")
79
- async def knowledge_graph_documents(request: Request, limit: int = 200):
80
- """Ingested documents (uploads + indexed local docs) with index state.
81
-
82
- Backs the Files view so uploaded content is visible end-to-end:
83
- upload → Files → Knowledge Graph → Hybrid Search → Chat.
84
- """
85
- require_user(request)
86
- return graph().list_documents(limit)
87
-
88
- @router.get("/knowledge-graph/search")
89
- async def knowledge_graph_search(q: str, request: Request, limit: int = 30):
90
- require_user(request)
91
- if not q or not q.strip():
92
- return {"query": q, "matches": []}
93
- return graph().search(q, limit)
94
-
95
- @router.get("/knowledge-graph/context")
96
- async def knowledge_graph_context(q: str, request: Request, limit: int = 6):
97
- require_user(request)
98
- return {"query": q, "context": graph().context_for_query(q, limit)}
99
-
100
- @router.get("/knowledge-graph/neighbors/{node_id:path}")
101
- async def knowledge_graph_neighbors(node_id: str, request: Request):
102
- require_user(request)
103
- if not node_id:
104
- raise HTTPException(status_code=400, detail="node_id required")
105
- return graph().neighbors(node_id)
106
-
107
- @router.post("/knowledge-graph/ingest")
108
- async def knowledge_graph_ingest(req: KnowledgeGraphIngestRequest, request: Request):
109
- current_user = require_user(request)
110
- kg = graph()
111
- event_type = (req.type or "").strip().lower()
112
- if event_type not in {"message", "ai_response", "note"}:
113
- raise HTTPException(status_code=400, detail="지원하는 type: message, ai_response, note")
114
- role = req.role or ("assistant" if event_type == "ai_response" else "user")
115
- return kg.ingest_message(
116
- role,
117
- req.content,
118
- user_email=req.user_email or current_user,
119
- user_nickname=req.user_nickname,
120
- source=req.source or "mcp",
121
- conversation_id=req.conversation_id,
122
- raw={
123
- "type": req.type,
124
- "title": req.title,
125
- "content": req.content,
126
- "metadata": req.metadata or {},
127
- },
128
- )
129
-
130
- return router
14
+ __all__ = ["KnowledgeGraphIngestRequest", "create_knowledge_graph_router"]
@@ -1,3 +1,3 @@
1
1
  """Lattice AI - modular server package."""
2
2
 
3
- __version__ = "3.5.0"
3
+ __version__ = "4.0.0"
@@ -2,7 +2,7 @@
2
2
 
3
3
  import logging
4
4
  from collections import defaultdict
5
- from typing import Any, Callable, Dict, List, Optional
5
+ from typing import Callable, Dict, List, Optional
6
6
 
7
7
  from fastapi import APIRouter, HTTPException, Request
8
8
  from pydantic import BaseModel
@@ -163,7 +163,13 @@ def create_agents_router(
163
163
  current_user = require_user(request)
164
164
  scope = gate_write(request)
165
165
  try:
166
- return runtime.start(
166
+ # Worker thread: an LLM-backed run blocks on model generation and
167
+ # must not stall the event loop (the sync model bridge also
168
+ # requires a loop-free thread).
169
+ import asyncio
170
+
171
+ return await asyncio.to_thread(
172
+ runtime.start,
167
173
  req.goal,
168
174
  user_email=current_user or None,
169
175
  scope=scope,
@@ -1,5 +1,7 @@
1
1
  """Authentication API router: register, login, logout, SSO, profile."""
2
2
 
3
+ import base64
4
+ import hashlib
3
5
  import logging
4
6
  import secrets
5
7
  import time
@@ -69,11 +71,22 @@ def create_auth_router(
69
71
  ) -> APIRouter:
70
72
  router = APIRouter()
71
73
 
74
+ def _enforce_password_policy(password: str) -> None:
75
+ # Real policy (v4): length >= 8 with letters AND digits. A 4-char
76
+ # minimum was not a policy.
77
+ pw = str(password or "")
78
+ if len(pw) < 8 or not any(c.isalpha() for c in pw) or not any(c.isdigit() for c in pw):
79
+ raise HTTPException(
80
+ status_code=400,
81
+ detail="비밀번호는 8자 이상이며 영문자와 숫자를 모두 포함해야 합니다.",
82
+ )
83
+
72
84
  @router.post("/register")
73
85
  async def register(req: UserRegister, request: Request):
74
86
  check_ip_rate_limit(client_ip(request), "register", max_calls=5, window_secs=3600)
75
87
  if not open_registration:
76
88
  raise HTTPException(status_code=403, detail="회원가입이 비활성화되어 있습니다. 관리자에게 문의하세요.")
89
+ _enforce_password_policy(req.password)
77
90
  users = load_users()
78
91
  if req.email in users:
79
92
  raise HTTPException(status_code=400, detail="이미 존재하는 이메일입니다.")
@@ -123,7 +136,15 @@ def create_auth_router(
123
136
  raise HTTPException(status_code=503, detail="SSO가 설정되지 않았습니다.")
124
137
  state = secrets.token_urlsafe(16)
125
138
  nonce = secrets.token_urlsafe(16)
126
- _sso_states[state] = (time.time(), nonce)
139
+ # PKCE (S256): bind the token exchange to this login, so an
140
+ # intercepted authorization code is useless without the verifier.
141
+ code_verifier = secrets.token_urlsafe(48)
142
+ code_challenge = (
143
+ base64.urlsafe_b64encode(hashlib.sha256(code_verifier.encode("ascii")).digest())
144
+ .rstrip(b"=")
145
+ .decode("ascii")
146
+ )
147
+ _sso_states[state] = (time.time(), nonce, code_verifier)
127
148
  params = urlencode({
128
149
  "client_id": settings["client_id"],
129
150
  "response_type": "code",
@@ -131,6 +152,8 @@ def create_auth_router(
131
152
  "scope": settings.get("scopes") or "openid email profile",
132
153
  "state": state,
133
154
  "nonce": nonce,
155
+ "code_challenge": code_challenge,
156
+ "code_challenge_method": "S256",
134
157
  })
135
158
  return RedirectResponse(f"{discovery['authorization_endpoint']}?{params}")
136
159
 
@@ -141,7 +164,7 @@ def create_auth_router(
141
164
  entry = _sso_states.pop(state, None)
142
165
  if entry is None or time.time() - entry[0] > 300:
143
166
  raise HTTPException(status_code=400, detail="유효하지 않은 SSO 상태입니다.")
144
- _, nonce = entry
167
+ _, nonce, code_verifier = entry
145
168
  settings = get_sso_settings()
146
169
  discovery = await get_sso_discovery()
147
170
  if not settings.get("enabled") or not discovery:
@@ -154,6 +177,7 @@ def create_auth_router(
154
177
  "redirect_uri": settings["redirect_uri"],
155
178
  "client_id": settings["client_id"],
156
179
  "client_secret": settings["client_secret"],
180
+ "code_verifier": code_verifier,
157
181
  }, headers={"Accept": "application/json"}, timeout=15)
158
182
  tokens = r.json()
159
183
  id_token = tokens.get("id_token")
@@ -214,8 +238,7 @@ def create_auth_router(
214
238
  email = require_user(request)
215
239
  if not email:
216
240
  raise HTTPException(status_code=401, detail="인증이 필요합니다.")
217
- if len(req.new_password) < 4:
218
- raise HTTPException(status_code=400, detail="새 비밀번호는 4자 이상이어야 합니다.")
241
+ _enforce_password_policy(req.new_password)
219
242
  users = load_users()
220
243
  user = users.get(email)
221
244
  if not user:
@@ -0,0 +1,217 @@
1
+ """Browser & web ingestion — Knowledge Graph inputs, not standalone features.
2
+
3
+ v3.6.0 Knowledge Graph First: a public URL or an open browser tab is just another
4
+ source that converges into the Knowledge Graph through the unified ingestion
5
+ pipeline. Everything runs on the **local runtime** — the server fetches/reads
6
+ locally, stores into local SQLite, and never uploads to a cloud service.
7
+
8
+ Two layers, both feeding ``IngestionPipeline.ingest``:
9
+
10
+ * ``POST /api/browser/read-url`` — the runtime fetches a public URL locally,
11
+ extracts readable text, stores it as ``source_type=web_url``. Fails gracefully
12
+ on blocked / login-required pages.
13
+ * ``POST /api/browser/ingest-current-tab`` — accepts a payload from the local
14
+ browser extension (url/title/text/selected_text/html), sanitizes + size-limits
15
+ it, stores it as ``source_type=browser_tab``.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from html.parser import HTMLParser
21
+ from typing import Any, Callable, Optional, Tuple
22
+ from urllib.parse import urlparse
23
+
24
+ from fastapi import APIRouter, HTTPException, Request
25
+ from pydantic import BaseModel
26
+
27
+ from latticeai.services.ingestion import IngestionItem
28
+
29
+ MAX_TAB_BYTES = 4 * 1024 * 1024 # 4 MB per captured tab payload
30
+ MAX_URL_FETCH_BYTES = 4 * 1024 * 1024 # 4 MB cap on a fetched page
31
+ URL_FETCH_TIMEOUT = 12.0 # seconds
32
+
33
+
34
+ class BrowserFetchError(Exception):
35
+ """A URL could not be fetched (blocked, login-required, timeout, too big)."""
36
+
37
+
38
+ # ── readable-text extraction ─────────────────────────────────────────────────
39
+ _SKIP_TAGS = {"script", "style", "noscript", "template", "svg", "head"}
40
+
41
+
42
+ class _TextExtractor(HTMLParser):
43
+ def __init__(self) -> None:
44
+ super().__init__(convert_charrefs=True)
45
+ self._skip_depth = 0
46
+ self._chunks: list[str] = []
47
+ self.title: str = ""
48
+ self._in_title = False
49
+
50
+ def handle_starttag(self, tag, attrs):
51
+ if tag in _SKIP_TAGS:
52
+ self._skip_depth += 1
53
+ if tag == "title":
54
+ self._in_title = True
55
+
56
+ def handle_endtag(self, tag):
57
+ if tag in _SKIP_TAGS and self._skip_depth > 0:
58
+ self._skip_depth -= 1
59
+ if tag == "title":
60
+ self._in_title = False
61
+ if tag in {"p", "div", "br", "li", "h1", "h2", "h3", "h4", "section", "article"}:
62
+ self._chunks.append("\n")
63
+
64
+ def handle_data(self, data):
65
+ if self._in_title:
66
+ self.title += data
67
+ if self._skip_depth == 0:
68
+ text = data.strip()
69
+ if text:
70
+ self._chunks.append(text)
71
+
72
+ def text(self) -> str:
73
+ raw = " ".join(self._chunks)
74
+ # collapse runs of whitespace while keeping paragraph breaks
75
+ lines = [ln.strip() for ln in raw.replace("\r", "").split("\n")]
76
+ return "\n".join([ln for ln in lines if ln]).strip()
77
+
78
+
79
+ def extract_readable_text(html: str) -> Tuple[str, str]:
80
+ """Return (title, readable_text) from an HTML string. Never raises."""
81
+ parser = _TextExtractor()
82
+ try:
83
+ parser.feed(html or "")
84
+ except Exception: # noqa: BLE001 — malformed HTML must still yield best-effort text
85
+ pass
86
+ return parser.title.strip(), parser.text()
87
+
88
+
89
+ def _default_fetch_url(url: str) -> Tuple[str, str]:
90
+ """Fetch a public URL on the local runtime and extract readable text.
91
+
92
+ Raises :class:`BrowserFetchError` on any non-success (blocked, login wall,
93
+ timeout, oversized, non-HTML) so the route can fail gracefully.
94
+ """
95
+ import httpx
96
+
97
+ try:
98
+ with httpx.Client(
99
+ follow_redirects=True, timeout=URL_FETCH_TIMEOUT,
100
+ headers={"User-Agent": "LatticeAI-local/3.6 (+local-first knowledge graph)"},
101
+ ) as client:
102
+ resp = client.get(url)
103
+ except httpx.HTTPError as exc:
104
+ raise BrowserFetchError(f"Could not reach the page: {exc}") from exc
105
+
106
+ if resp.status_code in (401, 403):
107
+ raise BrowserFetchError("The page is login-required or blocked (HTTP %s)." % resp.status_code)
108
+ if resp.status_code >= 400:
109
+ raise BrowserFetchError(f"The page returned HTTP {resp.status_code}.")
110
+ content_type = resp.headers.get("content-type", "")
111
+ if content_type and "html" not in content_type and "text" not in content_type:
112
+ raise BrowserFetchError(f"Unsupported content type: {content_type or 'unknown'}.")
113
+ body = resp.text or ""
114
+ if len(body.encode("utf-8", "ignore")) > MAX_URL_FETCH_BYTES:
115
+ body = body.encode("utf-8", "ignore")[:MAX_URL_FETCH_BYTES].decode("utf-8", "ignore")
116
+ title, text = extract_readable_text(body)
117
+ return (title or url, text)
118
+
119
+
120
+ def _validate_http_url(url: str) -> str:
121
+ url = (url or "").strip()
122
+ if not url:
123
+ raise HTTPException(status_code=400, detail="url is required.")
124
+ parsed = urlparse(url)
125
+ if parsed.scheme not in ("http", "https"):
126
+ raise HTTPException(status_code=400, detail="Only http(s) URLs are supported.")
127
+ if not parsed.netloc:
128
+ raise HTTPException(status_code=400, detail="Malformed URL.")
129
+ return url
130
+
131
+
132
+ # ── request models ───────────────────────────────────────────────────────────
133
+ class ReadUrlRequest(BaseModel):
134
+ url: str
135
+ workspace_id: Optional[str] = None
136
+
137
+
138
+ class IngestTabRequest(BaseModel):
139
+ url: str
140
+ title: Optional[str] = None
141
+ text: Optional[str] = None
142
+ selected_text: Optional[str] = None
143
+ html: Optional[str] = None
144
+ captured_at: Optional[str] = None
145
+ workspace_id: Optional[str] = None
146
+
147
+
148
+ def create_browser_router(
149
+ *,
150
+ pipeline: Any,
151
+ require_user: Callable[[Request], str],
152
+ fetch_url: Optional[Callable[[str], Tuple[str, str]]] = None,
153
+ max_tab_bytes: int = MAX_TAB_BYTES,
154
+ ) -> APIRouter:
155
+ router = APIRouter()
156
+ _fetch = fetch_url or _default_fetch_url
157
+
158
+ def _require_pipeline():
159
+ if pipeline is None or not pipeline.available():
160
+ raise HTTPException(status_code=503, detail="Knowledge Graph ingestion is disabled.")
161
+
162
+ @router.post("/api/browser/read-url")
163
+ async def read_url(req: ReadUrlRequest, request: Request):
164
+ """Fetch a public URL locally and ingest it as a web_url source."""
165
+ user = require_user(request)
166
+ _require_pipeline()
167
+ url = _validate_http_url(req.url)
168
+ try:
169
+ title, text = _fetch(url)
170
+ except BrowserFetchError as exc:
171
+ # Graceful failure — not a 5xx; the page was simply unreadable.
172
+ raise HTTPException(status_code=422, detail=str(exc))
173
+ if not (text or "").strip():
174
+ return {"status": "empty", "source_type": "web_url", "url": url,
175
+ "detail": "No readable text was extracted from the page."}
176
+ res = pipeline.ingest(
177
+ IngestionItem(
178
+ source_type="web_url", title=title, text=text, source_uri=url,
179
+ owner=user, workspace_id=req.workspace_id,
180
+ ),
181
+ user_email=user,
182
+ )
183
+ return res.as_dict()
184
+
185
+ @router.post("/api/browser/ingest-current-tab")
186
+ async def ingest_current_tab(req: IngestTabRequest, request: Request):
187
+ """Ingest a payload captured from the local browser extension."""
188
+ user = require_user(request)
189
+ _require_pipeline()
190
+ url = _validate_http_url(req.url)
191
+ # Sanitize: reject an oversized payload before doing any work.
192
+ for value in (req.text, req.html, req.selected_text):
193
+ if value and len(value.encode("utf-8", "ignore")) > max_tab_bytes:
194
+ raise HTTPException(status_code=413, detail="Captured payload is too large.")
195
+ text = (req.text or "").strip()
196
+ if not text and req.html:
197
+ _title, text = extract_readable_text(req.html)
198
+ if not text:
199
+ text = (req.selected_text or "").strip()
200
+ if not text:
201
+ raise HTTPException(status_code=400, detail="No text, html, or selected_text provided.")
202
+ res = pipeline.ingest(
203
+ IngestionItem(
204
+ source_type="browser_tab",
205
+ title=req.title or url,
206
+ text=text,
207
+ source_uri=url,
208
+ captured_at=req.captured_at,
209
+ owner=user,
210
+ workspace_id=req.workspace_id,
211
+ metadata={"has_selection": bool(req.selected_text)},
212
+ ),
213
+ user_email=user,
214
+ )
215
+ return res.as_dict()
216
+
217
+ return router