ltcai 3.5.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -35
- package/docs/CARRYOVER_AUDIT_v3.6.0.md +61 -0
- package/docs/CHANGELOG.md +32 -0
- package/docs/HANDOVER_v3.6.0.md +46 -0
- package/docs/RUNTIME_HOOK_COVERAGE_v3.6.0.md +49 -0
- package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +509 -0
- package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
- package/docs/architecture.md +13 -12
- package/docs/kg-schema.md +102 -53
- package/docs/privacy.md +18 -2
- package/docs/security-model.md +17 -0
- package/kg_schema.py +139 -10
- package/knowledge_graph.py +874 -26
- package/knowledge_graph_api.py +11 -127
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +1 -1
- package/latticeai/api/agents.py +7 -1
- package/latticeai/api/auth.py +27 -4
- package/latticeai/api/browser.py +217 -0
- package/latticeai/api/chat.py +112 -76
- package/latticeai/api/health.py +1 -1
- package/latticeai/api/hooks.py +1 -1
- package/latticeai/api/knowledge_graph.py +146 -0
- package/latticeai/api/local_files.py +1 -1
- package/latticeai/api/mcp.py +23 -11
- package/latticeai/api/memory.py +1 -1
- package/latticeai/api/models.py +1 -1
- package/latticeai/api/network.py +81 -0
- package/latticeai/api/portability.py +93 -0
- package/latticeai/api/realtime.py +1 -1
- package/latticeai/api/search.py +26 -2
- package/latticeai/api/security_dashboard.py +2 -3
- package/latticeai/api/setup.py +2 -2
- package/latticeai/api/static_routes.py +2 -4
- package/latticeai/api/tools.py +3 -0
- package/latticeai/api/workflow_designer.py +46 -0
- package/latticeai/api/workspace.py +71 -49
- package/latticeai/app_factory.py +1710 -0
- package/latticeai/brain/__init__.py +18 -0
- package/latticeai/brain/context.py +213 -0
- package/latticeai/brain/conversations.py +236 -0
- package/latticeai/brain/identity.py +175 -0
- package/latticeai/brain/memory.py +102 -0
- package/latticeai/brain/network.py +205 -0
- package/latticeai/core/agent.py +31 -7
- package/latticeai/core/audit.py +0 -7
- package/latticeai/core/config.py +1 -1
- package/latticeai/core/context_builder.py +1 -2
- package/latticeai/core/enterprise.py +1 -1
- package/latticeai/core/graph_curator.py +2 -2
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/mcp_registry.py +791 -0
- package/latticeai/core/model_compat.py +1 -1
- package/latticeai/core/model_resolution.py +0 -1
- package/latticeai/core/multi_agent.py +238 -4
- package/latticeai/core/security.py +1 -1
- package/latticeai/core/sessions.py +37 -7
- package/latticeai/core/workflow_engine.py +114 -2
- package/latticeai/core/workspace_os.py +58 -10
- package/latticeai/models/__init__.py +7 -0
- package/latticeai/models/router.py +779 -0
- package/latticeai/server_app.py +29 -1504
- package/latticeai/services/agent_runtime.py +1 -0
- package/latticeai/services/app_context.py +75 -14
- package/latticeai/services/ingestion.py +318 -0
- package/latticeai/services/kg_portability.py +207 -0
- package/latticeai/services/memory_service.py +39 -11
- package/latticeai/services/model_runtime.py +2 -5
- package/latticeai/services/platform_runtime.py +100 -23
- package/latticeai/services/search_service.py +17 -8
- package/latticeai/services/tool_dispatch.py +12 -2
- package/latticeai/services/triggers.py +241 -0
- package/latticeai/services/upload_service.py +37 -12
- package/latticeai/services/workspace_service.py +31 -0
- package/llm_router.py +29 -772
- package/ltcai_cli.py +1 -2
- package/mcp_registry.py +25 -788
- package/p_reinforce.py +124 -14
- package/package.json +11 -8
- package/scripts/build_vsix.mjs +72 -0
- package/scripts/bump_version.py +99 -0
- package/scripts/generate_diagrams.py +0 -1
- package/scripts/lint_v3.mjs +82 -18
- package/scripts/validate_release_artifacts.py +0 -1
- package/scripts/wheel_smoke.py +142 -0
- package/server.py +11 -7
- package/setup_wizard.py +1142 -0
- package/static/account.html +2 -4
- package/static/admin.html +3 -5
- package/static/chat.html +3 -6
- package/static/graph.html +2 -4
- package/static/sw.js +81 -52
- package/static/v3/asset-manifest.json +20 -19
- package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
- package/static/v3/css/lattice.base.css +1 -1
- package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
- package/static/v3/css/lattice.components.css +1 -1
- package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
- package/static/v3/css/lattice.shell.css +1 -1
- package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
- package/static/v3/css/lattice.tokens.css +3 -0
- package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
- package/static/v3/css/lattice.views.css +2 -2
- package/static/v3/index.html +3 -4
- package/static/v3/js/{app.d086489d.js → app.356e6452.js} +1 -1
- package/static/v3/js/core/{api.12b568ad.js → api.7a308b89.js} +39 -1
- package/static/v3/js/core/api.js +38 -0
- package/static/v3/js/core/{routes.d214b399.js → routes.7222343d.js} +22 -22
- package/static/v3/js/core/routes.js +22 -22
- package/static/v3/js/core/{shell.d05266f5.js → shell.a1657f20.js} +4 -4
- package/static/v3/js/core/shell.js +1 -1
- package/static/v3/js/core/{store.34ebd5e6.js → store.204a08b2.js} +1 -1
- package/static/v3/js/core/store.js +1 -1
- package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
- package/static/v3/js/views/graph-canvas.js +509 -0
- package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
- package/static/v3/js/views/hybrid-search.js +1 -2
- package/static/v3/js/views/knowledge-graph.5e40cbeb.js +509 -0
- package/static/v3/js/views/knowledge-graph.js +326 -54
- package/static/vendor/chart.umd.min.js +20 -0
- package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
- package/static/vendor/fonts/inter.css +44 -0
- package/static/vendor/icons/tabler-icons.min.css +4 -0
- package/static/vendor/icons/tabler-icons.woff2 +0 -0
- package/static/vendor/marked.min.js +69 -0
- package/static/workspace.html +2 -2
- package/telegram_bot.py +1 -2
- package/tools/commands.py +4 -2
- package/tools/computer.py +1 -1
- package/tools/documents.py +1 -3
- package/tools/filesystem.py +0 -4
- package/tools/knowledge.py +1 -3
- package/tools/network.py +1 -3
- package/codex_telegram_bot.py +0 -195
- package/docs/assets/v3.4.0/agent-run.png +0 -0
- package/docs/assets/v3.4.0/agents.png +0 -0
- package/docs/assets/v3.4.0/before/chat-before.png +0 -0
- package/docs/assets/v3.4.0/before/files-before.png +0 -0
- package/docs/assets/v3.4.0/chat.png +0 -0
- package/docs/assets/v3.4.0/connect-folder.png +0 -0
- package/docs/assets/v3.4.0/files.png +0 -0
- package/docs/assets/v3.4.0/home.png +0 -0
- package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
- package/docs/assets/v3.4.0/local-agent.png +0 -0
- package/docs/assets/v3.4.0/memory.png +0 -0
- package/docs/assets/v3.4.0/settings.png +0 -0
- package/docs/assets/v3.4.0/vision-input.png +0 -0
- package/docs/assets/v3.4.0/workflows.png +0 -0
- package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
- package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.1/local-agent.png +0 -0
- package/docs/images/admin-dashboard.png +0 -0
- package/docs/images/architecture.png +0 -0
- package/docs/images/enterprise.png +0 -0
- package/docs/images/graph.png +0 -0
- package/docs/images/hero.gif +0 -0
- package/docs/images/knowledge-graph.png +0 -0
- package/docs/images/lattice-ai-demo.gif +0 -0
- package/docs/images/lattice-ai-hero.png +0 -0
- package/docs/images/logo.svg +0 -33
- package/docs/images/mobile-responsive.png +0 -0
- package/docs/images/model-recommendation.png +0 -0
- package/docs/images/onboarding.png +0 -0
- package/docs/images/organization.png +0 -0
- package/docs/images/pipeline.png +0 -0
- package/docs/images/screenshot-admin.png +0 -0
- package/docs/images/screenshot-chat.png +0 -0
- package/docs/images/screenshot-graph.png +0 -0
- package/docs/images/skills.png +0 -0
- package/docs/images/workspace-dark.png +0 -0
- package/docs/images/workspace-light.png +0 -0
- package/docs/images/workspace.png +0 -0
- package/requirements.txt +0 -16
- package/static/v3/js/views/knowledge-graph.a14ea7e7.js +0 -237
package/knowledge_graph_api.py
CHANGED
|
@@ -1,130 +1,14 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Deprecation shim — the knowledge graph router moved in v4.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
The ``/knowledge-graph/*`` data router (and legacy ``/graph`` page routes)
|
|
4
|
+
now live in :mod:`latticeai.api.knowledge_graph`. This root module remains
|
|
5
|
+
importable for the deprecation window and will be removed in a future major
|
|
6
|
+
release.
|
|
7
|
+
"""
|
|
5
8
|
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
+
from latticeai.api.knowledge_graph import ( # noqa: F401
|
|
10
|
+
KnowledgeGraphIngestRequest,
|
|
11
|
+
create_knowledge_graph_router,
|
|
12
|
+
)
|
|
9
13
|
|
|
10
|
-
|
|
11
|
-
class KnowledgeGraphIngestRequest(BaseModel):
|
|
12
|
-
type: str
|
|
13
|
-
content: str = ""
|
|
14
|
-
role: Optional[str] = None
|
|
15
|
-
title: Optional[str] = None
|
|
16
|
-
source: Optional[str] = None
|
|
17
|
-
conversation_id: Optional[str] = None
|
|
18
|
-
user_email: Optional[str] = None
|
|
19
|
-
user_nickname: Optional[str] = None
|
|
20
|
-
metadata: Optional[Dict[str, Any]] = None
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def create_knowledge_graph_router(
|
|
24
|
-
*,
|
|
25
|
-
get_graph: Callable[[], Any],
|
|
26
|
-
require_graph: Callable[[], None],
|
|
27
|
-
require_user: Callable[[Request], str],
|
|
28
|
-
static_dir: Path,
|
|
29
|
-
) -> APIRouter:
|
|
30
|
-
router = APIRouter()
|
|
31
|
-
|
|
32
|
-
def graph():
|
|
33
|
-
require_graph()
|
|
34
|
-
return get_graph()
|
|
35
|
-
|
|
36
|
-
@router.get("/graph")
|
|
37
|
-
async def knowledge_graph_page(request: Request):
|
|
38
|
-
"""Serve the interactive knowledge graph canvas UI."""
|
|
39
|
-
graph()
|
|
40
|
-
require_user(request)
|
|
41
|
-
response = FileResponse(static_dir / "graph.html")
|
|
42
|
-
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
|
43
|
-
response.headers["Pragma"] = "no-cache"
|
|
44
|
-
response.headers["Expires"] = "0"
|
|
45
|
-
return response
|
|
46
|
-
|
|
47
|
-
@router.get("/knowledge-graph")
|
|
48
|
-
async def knowledge_graph_legacy_page(request: Request):
|
|
49
|
-
"""Backward-compatible route for the graph page."""
|
|
50
|
-
graph()
|
|
51
|
-
require_user(request)
|
|
52
|
-
response = FileResponse(static_dir / "graph.html")
|
|
53
|
-
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
|
54
|
-
response.headers["Pragma"] = "no-cache"
|
|
55
|
-
response.headers["Expires"] = "0"
|
|
56
|
-
return response
|
|
57
|
-
|
|
58
|
-
@router.get("/knowledge-graph/stats")
|
|
59
|
-
async def knowledge_graph_stats(request: Request):
|
|
60
|
-
require_user(request)
|
|
61
|
-
return graph().stats()
|
|
62
|
-
|
|
63
|
-
@router.get("/knowledge-graph/schema")
|
|
64
|
-
async def knowledge_graph_schema(request: Request):
|
|
65
|
-
require_user(request)
|
|
66
|
-
stats = graph().stats()
|
|
67
|
-
return {
|
|
68
|
-
"legacy_schema_version": stats.get("schema_version"),
|
|
69
|
-
"v2_schema_available": stats.get("v2_schema_available"),
|
|
70
|
-
"v2": stats.get("v2"),
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
@router.get("/knowledge-graph/graph")
|
|
74
|
-
async def knowledge_graph_data(request: Request, limit: int = 300):
|
|
75
|
-
require_user(request)
|
|
76
|
-
return graph().graph(limit)
|
|
77
|
-
|
|
78
|
-
@router.get("/knowledge-graph/documents")
|
|
79
|
-
async def knowledge_graph_documents(request: Request, limit: int = 200):
|
|
80
|
-
"""Ingested documents (uploads + indexed local docs) with index state.
|
|
81
|
-
|
|
82
|
-
Backs the Files view so uploaded content is visible end-to-end:
|
|
83
|
-
upload → Files → Knowledge Graph → Hybrid Search → Chat.
|
|
84
|
-
"""
|
|
85
|
-
require_user(request)
|
|
86
|
-
return graph().list_documents(limit)
|
|
87
|
-
|
|
88
|
-
@router.get("/knowledge-graph/search")
|
|
89
|
-
async def knowledge_graph_search(q: str, request: Request, limit: int = 30):
|
|
90
|
-
require_user(request)
|
|
91
|
-
if not q or not q.strip():
|
|
92
|
-
return {"query": q, "matches": []}
|
|
93
|
-
return graph().search(q, limit)
|
|
94
|
-
|
|
95
|
-
@router.get("/knowledge-graph/context")
|
|
96
|
-
async def knowledge_graph_context(q: str, request: Request, limit: int = 6):
|
|
97
|
-
require_user(request)
|
|
98
|
-
return {"query": q, "context": graph().context_for_query(q, limit)}
|
|
99
|
-
|
|
100
|
-
@router.get("/knowledge-graph/neighbors/{node_id:path}")
|
|
101
|
-
async def knowledge_graph_neighbors(node_id: str, request: Request):
|
|
102
|
-
require_user(request)
|
|
103
|
-
if not node_id:
|
|
104
|
-
raise HTTPException(status_code=400, detail="node_id required")
|
|
105
|
-
return graph().neighbors(node_id)
|
|
106
|
-
|
|
107
|
-
@router.post("/knowledge-graph/ingest")
|
|
108
|
-
async def knowledge_graph_ingest(req: KnowledgeGraphIngestRequest, request: Request):
|
|
109
|
-
current_user = require_user(request)
|
|
110
|
-
kg = graph()
|
|
111
|
-
event_type = (req.type or "").strip().lower()
|
|
112
|
-
if event_type not in {"message", "ai_response", "note"}:
|
|
113
|
-
raise HTTPException(status_code=400, detail="지원하는 type: message, ai_response, note")
|
|
114
|
-
role = req.role or ("assistant" if event_type == "ai_response" else "user")
|
|
115
|
-
return kg.ingest_message(
|
|
116
|
-
role,
|
|
117
|
-
req.content,
|
|
118
|
-
user_email=req.user_email or current_user,
|
|
119
|
-
user_nickname=req.user_nickname,
|
|
120
|
-
source=req.source or "mcp",
|
|
121
|
-
conversation_id=req.conversation_id,
|
|
122
|
-
raw={
|
|
123
|
-
"type": req.type,
|
|
124
|
-
"title": req.title,
|
|
125
|
-
"content": req.content,
|
|
126
|
-
"metadata": req.metadata or {},
|
|
127
|
-
},
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
return router
|
|
14
|
+
__all__ = ["KnowledgeGraphIngestRequest", "create_knowledge_graph_router"]
|
package/latticeai/__init__.py
CHANGED
package/latticeai/api/admin.py
CHANGED
package/latticeai/api/agents.py
CHANGED
|
@@ -163,7 +163,13 @@ def create_agents_router(
|
|
|
163
163
|
current_user = require_user(request)
|
|
164
164
|
scope = gate_write(request)
|
|
165
165
|
try:
|
|
166
|
-
|
|
166
|
+
# Worker thread: an LLM-backed run blocks on model generation and
|
|
167
|
+
# must not stall the event loop (the sync model bridge also
|
|
168
|
+
# requires a loop-free thread).
|
|
169
|
+
import asyncio
|
|
170
|
+
|
|
171
|
+
return await asyncio.to_thread(
|
|
172
|
+
runtime.start,
|
|
167
173
|
req.goal,
|
|
168
174
|
user_email=current_user or None,
|
|
169
175
|
scope=scope,
|
package/latticeai/api/auth.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Authentication API router: register, login, logout, SSO, profile."""
|
|
2
2
|
|
|
3
|
+
import base64
|
|
4
|
+
import hashlib
|
|
3
5
|
import logging
|
|
4
6
|
import secrets
|
|
5
7
|
import time
|
|
@@ -69,11 +71,22 @@ def create_auth_router(
|
|
|
69
71
|
) -> APIRouter:
|
|
70
72
|
router = APIRouter()
|
|
71
73
|
|
|
74
|
+
def _enforce_password_policy(password: str) -> None:
|
|
75
|
+
# Real policy (v4): length >= 8 with letters AND digits. A 4-char
|
|
76
|
+
# minimum was not a policy.
|
|
77
|
+
pw = str(password or "")
|
|
78
|
+
if len(pw) < 8 or not any(c.isalpha() for c in pw) or not any(c.isdigit() for c in pw):
|
|
79
|
+
raise HTTPException(
|
|
80
|
+
status_code=400,
|
|
81
|
+
detail="비밀번호는 8자 이상이며 영문자와 숫자를 모두 포함해야 합니다.",
|
|
82
|
+
)
|
|
83
|
+
|
|
72
84
|
@router.post("/register")
|
|
73
85
|
async def register(req: UserRegister, request: Request):
|
|
74
86
|
check_ip_rate_limit(client_ip(request), "register", max_calls=5, window_secs=3600)
|
|
75
87
|
if not open_registration:
|
|
76
88
|
raise HTTPException(status_code=403, detail="회원가입이 비활성화되어 있습니다. 관리자에게 문의하세요.")
|
|
89
|
+
_enforce_password_policy(req.password)
|
|
77
90
|
users = load_users()
|
|
78
91
|
if req.email in users:
|
|
79
92
|
raise HTTPException(status_code=400, detail="이미 존재하는 이메일입니다.")
|
|
@@ -123,7 +136,15 @@ def create_auth_router(
|
|
|
123
136
|
raise HTTPException(status_code=503, detail="SSO가 설정되지 않았습니다.")
|
|
124
137
|
state = secrets.token_urlsafe(16)
|
|
125
138
|
nonce = secrets.token_urlsafe(16)
|
|
126
|
-
|
|
139
|
+
# PKCE (S256): bind the token exchange to this login, so an
|
|
140
|
+
# intercepted authorization code is useless without the verifier.
|
|
141
|
+
code_verifier = secrets.token_urlsafe(48)
|
|
142
|
+
code_challenge = (
|
|
143
|
+
base64.urlsafe_b64encode(hashlib.sha256(code_verifier.encode("ascii")).digest())
|
|
144
|
+
.rstrip(b"=")
|
|
145
|
+
.decode("ascii")
|
|
146
|
+
)
|
|
147
|
+
_sso_states[state] = (time.time(), nonce, code_verifier)
|
|
127
148
|
params = urlencode({
|
|
128
149
|
"client_id": settings["client_id"],
|
|
129
150
|
"response_type": "code",
|
|
@@ -131,6 +152,8 @@ def create_auth_router(
|
|
|
131
152
|
"scope": settings.get("scopes") or "openid email profile",
|
|
132
153
|
"state": state,
|
|
133
154
|
"nonce": nonce,
|
|
155
|
+
"code_challenge": code_challenge,
|
|
156
|
+
"code_challenge_method": "S256",
|
|
134
157
|
})
|
|
135
158
|
return RedirectResponse(f"{discovery['authorization_endpoint']}?{params}")
|
|
136
159
|
|
|
@@ -141,7 +164,7 @@ def create_auth_router(
|
|
|
141
164
|
entry = _sso_states.pop(state, None)
|
|
142
165
|
if entry is None or time.time() - entry[0] > 300:
|
|
143
166
|
raise HTTPException(status_code=400, detail="유효하지 않은 SSO 상태입니다.")
|
|
144
|
-
_, nonce = entry
|
|
167
|
+
_, nonce, code_verifier = entry
|
|
145
168
|
settings = get_sso_settings()
|
|
146
169
|
discovery = await get_sso_discovery()
|
|
147
170
|
if not settings.get("enabled") or not discovery:
|
|
@@ -154,6 +177,7 @@ def create_auth_router(
|
|
|
154
177
|
"redirect_uri": settings["redirect_uri"],
|
|
155
178
|
"client_id": settings["client_id"],
|
|
156
179
|
"client_secret": settings["client_secret"],
|
|
180
|
+
"code_verifier": code_verifier,
|
|
157
181
|
}, headers={"Accept": "application/json"}, timeout=15)
|
|
158
182
|
tokens = r.json()
|
|
159
183
|
id_token = tokens.get("id_token")
|
|
@@ -214,8 +238,7 @@ def create_auth_router(
|
|
|
214
238
|
email = require_user(request)
|
|
215
239
|
if not email:
|
|
216
240
|
raise HTTPException(status_code=401, detail="인증이 필요합니다.")
|
|
217
|
-
|
|
218
|
-
raise HTTPException(status_code=400, detail="새 비밀번호는 4자 이상이어야 합니다.")
|
|
241
|
+
_enforce_password_policy(req.new_password)
|
|
219
242
|
users = load_users()
|
|
220
243
|
user = users.get(email)
|
|
221
244
|
if not user:
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Browser & web ingestion — Knowledge Graph inputs, not standalone features.
|
|
2
|
+
|
|
3
|
+
v3.6.0 Knowledge Graph First: a public URL or an open browser tab is just another
|
|
4
|
+
source that converges into the Knowledge Graph through the unified ingestion
|
|
5
|
+
pipeline. Everything runs on the **local runtime** — the server fetches/reads
|
|
6
|
+
locally, stores into local SQLite, and never uploads to a cloud service.
|
|
7
|
+
|
|
8
|
+
Two layers, both feeding ``IngestionPipeline.ingest``:
|
|
9
|
+
|
|
10
|
+
* ``POST /api/browser/read-url`` — the runtime fetches a public URL locally,
|
|
11
|
+
extracts readable text, stores it as ``source_type=web_url``. Fails gracefully
|
|
12
|
+
on blocked / login-required pages.
|
|
13
|
+
* ``POST /api/browser/ingest-current-tab`` — accepts a payload from the local
|
|
14
|
+
browser extension (url/title/text/selected_text/html), sanitizes + size-limits
|
|
15
|
+
it, stores it as ``source_type=browser_tab``.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from html.parser import HTMLParser
|
|
21
|
+
from typing import Any, Callable, Optional, Tuple
|
|
22
|
+
from urllib.parse import urlparse
|
|
23
|
+
|
|
24
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
25
|
+
from pydantic import BaseModel
|
|
26
|
+
|
|
27
|
+
from latticeai.services.ingestion import IngestionItem
|
|
28
|
+
|
|
29
|
+
MAX_TAB_BYTES = 4 * 1024 * 1024 # 4 MB per captured tab payload
|
|
30
|
+
MAX_URL_FETCH_BYTES = 4 * 1024 * 1024 # 4 MB cap on a fetched page
|
|
31
|
+
URL_FETCH_TIMEOUT = 12.0 # seconds
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BrowserFetchError(Exception):
|
|
35
|
+
"""A URL could not be fetched (blocked, login-required, timeout, too big)."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ── readable-text extraction ─────────────────────────────────────────────────
|
|
39
|
+
_SKIP_TAGS = {"script", "style", "noscript", "template", "svg", "head"}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class _TextExtractor(HTMLParser):
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
super().__init__(convert_charrefs=True)
|
|
45
|
+
self._skip_depth = 0
|
|
46
|
+
self._chunks: list[str] = []
|
|
47
|
+
self.title: str = ""
|
|
48
|
+
self._in_title = False
|
|
49
|
+
|
|
50
|
+
def handle_starttag(self, tag, attrs):
|
|
51
|
+
if tag in _SKIP_TAGS:
|
|
52
|
+
self._skip_depth += 1
|
|
53
|
+
if tag == "title":
|
|
54
|
+
self._in_title = True
|
|
55
|
+
|
|
56
|
+
def handle_endtag(self, tag):
|
|
57
|
+
if tag in _SKIP_TAGS and self._skip_depth > 0:
|
|
58
|
+
self._skip_depth -= 1
|
|
59
|
+
if tag == "title":
|
|
60
|
+
self._in_title = False
|
|
61
|
+
if tag in {"p", "div", "br", "li", "h1", "h2", "h3", "h4", "section", "article"}:
|
|
62
|
+
self._chunks.append("\n")
|
|
63
|
+
|
|
64
|
+
def handle_data(self, data):
|
|
65
|
+
if self._in_title:
|
|
66
|
+
self.title += data
|
|
67
|
+
if self._skip_depth == 0:
|
|
68
|
+
text = data.strip()
|
|
69
|
+
if text:
|
|
70
|
+
self._chunks.append(text)
|
|
71
|
+
|
|
72
|
+
def text(self) -> str:
|
|
73
|
+
raw = " ".join(self._chunks)
|
|
74
|
+
# collapse runs of whitespace while keeping paragraph breaks
|
|
75
|
+
lines = [ln.strip() for ln in raw.replace("\r", "").split("\n")]
|
|
76
|
+
return "\n".join([ln for ln in lines if ln]).strip()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def extract_readable_text(html: str) -> Tuple[str, str]:
|
|
80
|
+
"""Return (title, readable_text) from an HTML string. Never raises."""
|
|
81
|
+
parser = _TextExtractor()
|
|
82
|
+
try:
|
|
83
|
+
parser.feed(html or "")
|
|
84
|
+
except Exception: # noqa: BLE001 — malformed HTML must still yield best-effort text
|
|
85
|
+
pass
|
|
86
|
+
return parser.title.strip(), parser.text()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _default_fetch_url(url: str) -> Tuple[str, str]:
|
|
90
|
+
"""Fetch a public URL on the local runtime and extract readable text.
|
|
91
|
+
|
|
92
|
+
Raises :class:`BrowserFetchError` on any non-success (blocked, login wall,
|
|
93
|
+
timeout, oversized, non-HTML) so the route can fail gracefully.
|
|
94
|
+
"""
|
|
95
|
+
import httpx
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
with httpx.Client(
|
|
99
|
+
follow_redirects=True, timeout=URL_FETCH_TIMEOUT,
|
|
100
|
+
headers={"User-Agent": "LatticeAI-local/3.6 (+local-first knowledge graph)"},
|
|
101
|
+
) as client:
|
|
102
|
+
resp = client.get(url)
|
|
103
|
+
except httpx.HTTPError as exc:
|
|
104
|
+
raise BrowserFetchError(f"Could not reach the page: {exc}") from exc
|
|
105
|
+
|
|
106
|
+
if resp.status_code in (401, 403):
|
|
107
|
+
raise BrowserFetchError("The page is login-required or blocked (HTTP %s)." % resp.status_code)
|
|
108
|
+
if resp.status_code >= 400:
|
|
109
|
+
raise BrowserFetchError(f"The page returned HTTP {resp.status_code}.")
|
|
110
|
+
content_type = resp.headers.get("content-type", "")
|
|
111
|
+
if content_type and "html" not in content_type and "text" not in content_type:
|
|
112
|
+
raise BrowserFetchError(f"Unsupported content type: {content_type or 'unknown'}.")
|
|
113
|
+
body = resp.text or ""
|
|
114
|
+
if len(body.encode("utf-8", "ignore")) > MAX_URL_FETCH_BYTES:
|
|
115
|
+
body = body.encode("utf-8", "ignore")[:MAX_URL_FETCH_BYTES].decode("utf-8", "ignore")
|
|
116
|
+
title, text = extract_readable_text(body)
|
|
117
|
+
return (title or url, text)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _validate_http_url(url: str) -> str:
|
|
121
|
+
url = (url or "").strip()
|
|
122
|
+
if not url:
|
|
123
|
+
raise HTTPException(status_code=400, detail="url is required.")
|
|
124
|
+
parsed = urlparse(url)
|
|
125
|
+
if parsed.scheme not in ("http", "https"):
|
|
126
|
+
raise HTTPException(status_code=400, detail="Only http(s) URLs are supported.")
|
|
127
|
+
if not parsed.netloc:
|
|
128
|
+
raise HTTPException(status_code=400, detail="Malformed URL.")
|
|
129
|
+
return url
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ── request models ───────────────────────────────────────────────────────────
|
|
133
|
+
class ReadUrlRequest(BaseModel):
|
|
134
|
+
url: str
|
|
135
|
+
workspace_id: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class IngestTabRequest(BaseModel):
|
|
139
|
+
url: str
|
|
140
|
+
title: Optional[str] = None
|
|
141
|
+
text: Optional[str] = None
|
|
142
|
+
selected_text: Optional[str] = None
|
|
143
|
+
html: Optional[str] = None
|
|
144
|
+
captured_at: Optional[str] = None
|
|
145
|
+
workspace_id: Optional[str] = None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def create_browser_router(
|
|
149
|
+
*,
|
|
150
|
+
pipeline: Any,
|
|
151
|
+
require_user: Callable[[Request], str],
|
|
152
|
+
fetch_url: Optional[Callable[[str], Tuple[str, str]]] = None,
|
|
153
|
+
max_tab_bytes: int = MAX_TAB_BYTES,
|
|
154
|
+
) -> APIRouter:
|
|
155
|
+
router = APIRouter()
|
|
156
|
+
_fetch = fetch_url or _default_fetch_url
|
|
157
|
+
|
|
158
|
+
def _require_pipeline():
|
|
159
|
+
if pipeline is None or not pipeline.available():
|
|
160
|
+
raise HTTPException(status_code=503, detail="Knowledge Graph ingestion is disabled.")
|
|
161
|
+
|
|
162
|
+
@router.post("/api/browser/read-url")
|
|
163
|
+
async def read_url(req: ReadUrlRequest, request: Request):
|
|
164
|
+
"""Fetch a public URL locally and ingest it as a web_url source."""
|
|
165
|
+
user = require_user(request)
|
|
166
|
+
_require_pipeline()
|
|
167
|
+
url = _validate_http_url(req.url)
|
|
168
|
+
try:
|
|
169
|
+
title, text = _fetch(url)
|
|
170
|
+
except BrowserFetchError as exc:
|
|
171
|
+
# Graceful failure — not a 5xx; the page was simply unreadable.
|
|
172
|
+
raise HTTPException(status_code=422, detail=str(exc))
|
|
173
|
+
if not (text or "").strip():
|
|
174
|
+
return {"status": "empty", "source_type": "web_url", "url": url,
|
|
175
|
+
"detail": "No readable text was extracted from the page."}
|
|
176
|
+
res = pipeline.ingest(
|
|
177
|
+
IngestionItem(
|
|
178
|
+
source_type="web_url", title=title, text=text, source_uri=url,
|
|
179
|
+
owner=user, workspace_id=req.workspace_id,
|
|
180
|
+
),
|
|
181
|
+
user_email=user,
|
|
182
|
+
)
|
|
183
|
+
return res.as_dict()
|
|
184
|
+
|
|
185
|
+
@router.post("/api/browser/ingest-current-tab")
|
|
186
|
+
async def ingest_current_tab(req: IngestTabRequest, request: Request):
|
|
187
|
+
"""Ingest a payload captured from the local browser extension."""
|
|
188
|
+
user = require_user(request)
|
|
189
|
+
_require_pipeline()
|
|
190
|
+
url = _validate_http_url(req.url)
|
|
191
|
+
# Sanitize: reject an oversized payload before doing any work.
|
|
192
|
+
for value in (req.text, req.html, req.selected_text):
|
|
193
|
+
if value and len(value.encode("utf-8", "ignore")) > max_tab_bytes:
|
|
194
|
+
raise HTTPException(status_code=413, detail="Captured payload is too large.")
|
|
195
|
+
text = (req.text or "").strip()
|
|
196
|
+
if not text and req.html:
|
|
197
|
+
_title, text = extract_readable_text(req.html)
|
|
198
|
+
if not text:
|
|
199
|
+
text = (req.selected_text or "").strip()
|
|
200
|
+
if not text:
|
|
201
|
+
raise HTTPException(status_code=400, detail="No text, html, or selected_text provided.")
|
|
202
|
+
res = pipeline.ingest(
|
|
203
|
+
IngestionItem(
|
|
204
|
+
source_type="browser_tab",
|
|
205
|
+
title=req.title or url,
|
|
206
|
+
text=text,
|
|
207
|
+
source_uri=url,
|
|
208
|
+
captured_at=req.captured_at,
|
|
209
|
+
owner=user,
|
|
210
|
+
workspace_id=req.workspace_id,
|
|
211
|
+
metadata={"has_selection": bool(req.selected_text)},
|
|
212
|
+
),
|
|
213
|
+
user_email=user,
|
|
214
|
+
)
|
|
215
|
+
return res.as_dict()
|
|
216
|
+
|
|
217
|
+
return router
|