business-stack 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.python-version +1 -0
- package/backend/.env.example +65 -0
- package/backend/alembic/env.py +63 -0
- package/backend/alembic/script.py.mako +26 -0
- package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
- package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
- package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
- package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
- package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
- package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
- package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
- package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
- package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
- package/backend/alembic.ini +42 -0
- package/backend/app/__init__.py +0 -0
- package/backend/app/config.py +337 -0
- package/backend/app/connectors/__init__.py +13 -0
- package/backend/app/connectors/base.py +39 -0
- package/backend/app/connectors/builtins.py +51 -0
- package/backend/app/connectors/playwright_session.py +146 -0
- package/backend/app/connectors/registry.py +68 -0
- package/backend/app/connectors/thread_expansion/__init__.py +33 -0
- package/backend/app/connectors/thread_expansion/fakes.py +154 -0
- package/backend/app/connectors/thread_expansion/models.py +113 -0
- package/backend/app/connectors/thread_expansion/reddit.py +53 -0
- package/backend/app/connectors/thread_expansion/twitter.py +49 -0
- package/backend/app/db.py +5 -0
- package/backend/app/dependencies.py +34 -0
- package/backend/app/logging_config.py +35 -0
- package/backend/app/main.py +97 -0
- package/backend/app/middleware/__init__.py +0 -0
- package/backend/app/middleware/gateway_identity.py +17 -0
- package/backend/app/middleware/openapi_gateway.py +71 -0
- package/backend/app/middleware/request_id.py +23 -0
- package/backend/app/openapi_config.py +126 -0
- package/backend/app/routers/__init__.py +0 -0
- package/backend/app/routers/admin_pipeline.py +123 -0
- package/backend/app/routers/chat.py +206 -0
- package/backend/app/routers/chunks.py +36 -0
- package/backend/app/routers/entity_extract.py +31 -0
- package/backend/app/routers/example.py +8 -0
- package/backend/app/routers/gemini_embed.py +58 -0
- package/backend/app/routers/health.py +28 -0
- package/backend/app/routers/ingestion.py +146 -0
- package/backend/app/routers/link_expansion.py +34 -0
- package/backend/app/routers/pipeline_status.py +304 -0
- package/backend/app/routers/query.py +63 -0
- package/backend/app/routers/vectors.py +63 -0
- package/backend/app/schemas/__init__.py +0 -0
- package/backend/app/schemas/canonical.py +44 -0
- package/backend/app/schemas/chat.py +50 -0
- package/backend/app/schemas/ingest.py +29 -0
- package/backend/app/schemas/query.py +153 -0
- package/backend/app/schemas/vectors.py +56 -0
- package/backend/app/services/__init__.py +0 -0
- package/backend/app/services/chat_store.py +152 -0
- package/backend/app/services/chunking/__init__.py +3 -0
- package/backend/app/services/chunking/llm_boundaries.py +63 -0
- package/backend/app/services/chunking/schemas.py +30 -0
- package/backend/app/services/chunking/semantic_chunk.py +178 -0
- package/backend/app/services/chunking/splitters.py +214 -0
- package/backend/app/services/embeddings/__init__.py +20 -0
- package/backend/app/services/embeddings/build_inputs.py +140 -0
- package/backend/app/services/embeddings/dlq.py +128 -0
- package/backend/app/services/embeddings/gemini_api.py +207 -0
- package/backend/app/services/embeddings/persist.py +74 -0
- package/backend/app/services/embeddings/types.py +32 -0
- package/backend/app/services/embeddings/worker.py +224 -0
- package/backend/app/services/entities/__init__.py +12 -0
- package/backend/app/services/entities/gliner_extract.py +63 -0
- package/backend/app/services/entities/llm_extract.py +94 -0
- package/backend/app/services/entities/pipeline.py +179 -0
- package/backend/app/services/entities/spacy_extract.py +63 -0
- package/backend/app/services/entities/types.py +15 -0
- package/backend/app/services/gemini_chat.py +113 -0
- package/backend/app/services/hooks/__init__.py +3 -0
- package/backend/app/services/hooks/post_ingest.py +186 -0
- package/backend/app/services/ingestion/__init__.py +0 -0
- package/backend/app/services/ingestion/persist.py +188 -0
- package/backend/app/services/integrations_remote.py +91 -0
- package/backend/app/services/link_expansion/__init__.py +3 -0
- package/backend/app/services/link_expansion/canonical_url.py +45 -0
- package/backend/app/services/link_expansion/domain_policy.py +26 -0
- package/backend/app/services/link_expansion/html_extract.py +72 -0
- package/backend/app/services/link_expansion/rate_limit.py +32 -0
- package/backend/app/services/link_expansion/robots.py +46 -0
- package/backend/app/services/link_expansion/schemas.py +67 -0
- package/backend/app/services/link_expansion/worker.py +458 -0
- package/backend/app/services/normalization/__init__.py +7 -0
- package/backend/app/services/normalization/normalizer.py +331 -0
- package/backend/app/services/normalization/persist_normalized.py +67 -0
- package/backend/app/services/playwright_extract/__init__.py +13 -0
- package/backend/app/services/playwright_extract/__main__.py +96 -0
- package/backend/app/services/playwright_extract/extract.py +181 -0
- package/backend/app/services/retrieval_service.py +351 -0
- package/backend/app/sqlite_ext.py +36 -0
- package/backend/app/storage/__init__.py +3 -0
- package/backend/app/storage/blobs.py +30 -0
- package/backend/app/vectorstore/__init__.py +13 -0
- package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
- package/backend/backend.egg-info/PKG-INFO +18 -0
- package/backend/backend.egg-info/SOURCES.txt +93 -0
- package/backend/backend.egg-info/dependency_links.txt +1 -0
- package/backend/backend.egg-info/entry_points.txt +2 -0
- package/backend/backend.egg-info/requires.txt +15 -0
- package/backend/backend.egg-info/top_level.txt +4 -0
- package/backend/package.json +15 -0
- package/backend/pyproject.toml +52 -0
- package/backend/tests/conftest.py +40 -0
- package/backend/tests/test_chat.py +92 -0
- package/backend/tests/test_chunking.py +132 -0
- package/backend/tests/test_entities.py +170 -0
- package/backend/tests/test_gemini_embed.py +224 -0
- package/backend/tests/test_health.py +24 -0
- package/backend/tests/test_ingest_raw.py +123 -0
- package/backend/tests/test_link_expansion.py +241 -0
- package/backend/tests/test_main.py +12 -0
- package/backend/tests/test_normalizer.py +114 -0
- package/backend/tests/test_openapi_gateway.py +40 -0
- package/backend/tests/test_pipeline_hardening.py +285 -0
- package/backend/tests/test_pipeline_status.py +71 -0
- package/backend/tests/test_playwright_extract.py +80 -0
- package/backend/tests/test_post_ingest_hooks.py +162 -0
- package/backend/tests/test_query.py +165 -0
- package/backend/tests/test_thread_expansion.py +72 -0
- package/backend/tests/test_vectors.py +85 -0
- package/backend/uv.lock +1839 -0
- package/bin/business-stack.cjs +412 -0
- package/frontend/web/.env.example +23 -0
- package/frontend/web/AGENTS.md +5 -0
- package/frontend/web/CLAUDE.md +1 -0
- package/frontend/web/README.md +36 -0
- package/frontend/web/components.json +25 -0
- package/frontend/web/next-env.d.ts +6 -0
- package/frontend/web/next.config.ts +30 -0
- package/frontend/web/package.json +65 -0
- package/frontend/web/postcss.config.mjs +7 -0
- package/frontend/web/skills-lock.json +35 -0
- package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
- package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
- package/frontend/web/src/app/chat/page.tsx +725 -0
- package/frontend/web/src/app/favicon.ico +0 -0
- package/frontend/web/src/app/globals.css +563 -0
- package/frontend/web/src/app/layout.tsx +50 -0
- package/frontend/web/src/app/page.tsx +96 -0
- package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
- package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
- package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
- package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
- package/frontend/web/src/components/home-auth-panel.tsx +49 -0
- package/frontend/web/src/components/providers.tsx +50 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
- package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
- package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
- package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
- package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
- package/frontend/web/src/lib/auth-client.ts +23 -0
- package/frontend/web/src/lib/integrations-config.ts +125 -0
- package/frontend/web/src/lib/ui-utills.tsx +90 -0
- package/frontend/web/src/lib/utils.ts +6 -0
- package/frontend/web/tsconfig.json +36 -0
- package/frontend/web/tsconfig.tsbuildinfo +1 -0
- package/frontend/web/vitest.config.ts +14 -0
- package/gateway/.env.example +23 -0
- package/gateway/README.md +13 -0
- package/gateway/package.json +24 -0
- package/gateway/src/auth.ts +49 -0
- package/gateway/src/index.ts +141 -0
- package/gateway/src/integrations/admin.ts +19 -0
- package/gateway/src/integrations/crypto.ts +52 -0
- package/gateway/src/integrations/handlers.ts +124 -0
- package/gateway/src/integrations/keys.ts +12 -0
- package/gateway/src/integrations/store.ts +106 -0
- package/gateway/src/stack-secrets.ts +35 -0
- package/gateway/tsconfig.json +13 -0
- package/package.json +33 -0
- package/turbo.json +27 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Twitter / X thread expansion — **official APIs only**.
|
|
3
|
+
|
|
4
|
+
Do not use scraping or undocumented HTML/GraphQL access; those may violate the
|
|
5
|
+
platform Terms of Service. Production fetchers must call documented X API
|
|
6
|
+
endpoints under your developer product and agreement.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from app.connectors.thread_expansion.models import ThreadExpansionResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TwitterThreadExpansionFetcher(ABC):
|
|
18
|
+
"""
|
|
19
|
+
Fetch a full conversation thread from X/Twitter using **official APIs**.
|
|
20
|
+
|
|
21
|
+
Compliance & credentials (operational checklist — not legal advice):
|
|
22
|
+
|
|
23
|
+
- **API access**: Requires developer account credentials (e.g. bearer token /
|
|
24
|
+
OAuth 2.0 user context, depending on endpoint). Keys must be kept server-side
|
|
25
|
+
and rotated per your security policy.
|
|
26
|
+
- **Policies**: Usage is subject to the X Developer Agreement, API rules, and
|
|
27
|
+
rate limits for your access tier. Automated collection must stay within
|
|
28
|
+
documented allowances.
|
|
29
|
+
- **Privacy**: Respect user privacy settings and data-retention obligations
|
|
30
|
+
applicable to your jurisdiction and use case.
|
|
31
|
+
|
|
32
|
+
Implementations MUST NOT rely on scraping the public web UI.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
async def fetch_full_thread(
|
|
37
|
+
self,
|
|
38
|
+
conversation_id: str,
|
|
39
|
+
*,
|
|
40
|
+
config: dict[str, Any],
|
|
41
|
+
) -> ThreadExpansionResult:
|
|
42
|
+
"""
|
|
43
|
+
Return every accessible post in the thread identified by ``conversation_id``
|
|
44
|
+
(typically the root tweet id as returned by the API).
|
|
45
|
+
|
|
46
|
+
``config`` holds implementation-specific settings (e.g. timeout, optional
|
|
47
|
+
endpoint flags). **Do not** log secrets from ``config``.
|
|
48
|
+
"""
|
|
49
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from collections.abc import AsyncGenerator
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
from fastapi import Depends, Header, HTTPException, Request
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
|
+
|
|
7
|
+
from app.config import Settings, get_settings
|
|
8
|
+
from app.vectorstore import SqliteVecStore
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def get_db(request: Request) -> AsyncGenerator[AsyncSession, None]:
|
|
12
|
+
factory = request.app.state.session_factory
|
|
13
|
+
async with factory() as session:
|
|
14
|
+
yield session
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_vector_store(request: Request) -> SqliteVecStore:
|
|
18
|
+
return request.app.state.vector_store
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
SettingsDep = Annotated[Settings, Depends(get_settings)]
|
|
22
|
+
DbSession = Annotated[AsyncSession, Depends(get_db)]
|
|
23
|
+
VectorStoreDep = Annotated[SqliteVecStore, Depends(get_vector_store)]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def get_gateway_user_id(
|
|
27
|
+
x_user_id: Annotated[str | None, Header(alias="x-user-id")] = None,
|
|
28
|
+
) -> str:
|
|
29
|
+
if not x_user_id or not str(x_user_id).strip():
|
|
30
|
+
raise HTTPException(status_code=401, detail="Missing x-user-id")
|
|
31
|
+
return str(x_user_id).strip()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
GatewayUserDep = Annotated[str, Depends(get_gateway_user_id)]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from pythonjsonlogger.json import JsonFormatter
|
|
5
|
+
|
|
6
|
+
from app.middleware.request_id import request_id_ctx
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RequestIdFilter(logging.Filter):
|
|
10
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
11
|
+
if not hasattr(record, "request_id"):
|
|
12
|
+
rid = request_id_ctx.get()
|
|
13
|
+
record.request_id = rid if rid is not None else "-"
|
|
14
|
+
return True
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def configure_logging(*, log_level: str, log_json: bool) -> None:
|
|
18
|
+
root = logging.getLogger()
|
|
19
|
+
root.handlers.clear()
|
|
20
|
+
root.setLevel(log_level.upper())
|
|
21
|
+
|
|
22
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
23
|
+
handler.addFilter(RequestIdFilter())
|
|
24
|
+
|
|
25
|
+
if log_json:
|
|
26
|
+
formatter = JsonFormatter(
|
|
27
|
+
"%(asctime)s %(levelname)s %(name)s %(message)s %(request_id)s",
|
|
28
|
+
)
|
|
29
|
+
else:
|
|
30
|
+
fmt = (
|
|
31
|
+
"%(asctime)s %(levelname)s %(name)s %(message)s [request_id=%(request_id)s]"
|
|
32
|
+
)
|
|
33
|
+
formatter = logging.Formatter(fmt)
|
|
34
|
+
handler.setFormatter(formatter)
|
|
35
|
+
root.addHandler(handler)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from contextlib import asynccontextmanager
|
|
2
|
+
|
|
3
|
+
from fastapi import FastAPI
|
|
4
|
+
from sqlalchemy import event
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
6
|
+
|
|
7
|
+
from app.config import get_settings
|
|
8
|
+
from app.connectors.registry import init_connectors
|
|
9
|
+
from app.logging_config import configure_logging
|
|
10
|
+
from app.middleware.gateway_identity import GatewayIdentityMiddleware
|
|
11
|
+
from app.middleware.openapi_gateway import OpenApiGatewayMiddleware
|
|
12
|
+
from app.middleware.request_id import RequestIdMiddleware
|
|
13
|
+
from app.openapi_config import APP_DESCRIPTION, OPENAPI_TAGS, attach_custom_openapi
|
|
14
|
+
from app.routers import (
|
|
15
|
+
admin_pipeline,
|
|
16
|
+
chat,
|
|
17
|
+
chunks,
|
|
18
|
+
entity_extract,
|
|
19
|
+
example,
|
|
20
|
+
gemini_embed,
|
|
21
|
+
health,
|
|
22
|
+
ingestion,
|
|
23
|
+
link_expansion,
|
|
24
|
+
pipeline_status,
|
|
25
|
+
query,
|
|
26
|
+
vectors,
|
|
27
|
+
)
|
|
28
|
+
from app.sqlite_ext import load_sqlite_vec_extension, unwrap_sqlite3_connection
|
|
29
|
+
from app.vectorstore import SqliteVecStore
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@asynccontextmanager
|
|
33
|
+
async def lifespan(app: FastAPI):
|
|
34
|
+
settings = get_settings()
|
|
35
|
+
configure_logging(log_level=settings.log_level, log_json=settings.log_json)
|
|
36
|
+
settings.data_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
(settings.data_dir / "blobs").mkdir(parents=True, exist_ok=True)
|
|
38
|
+
init_connectors()
|
|
39
|
+
engine = create_async_engine(
|
|
40
|
+
settings.sqlalchemy_database_url,
|
|
41
|
+
pool_pre_ping=True,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@event.listens_for(engine.sync_engine, "connect")
|
|
45
|
+
def _sqlite_on_connect(dbapi_connection, _connection_record) -> None:
|
|
46
|
+
raw = unwrap_sqlite3_connection(dbapi_connection)
|
|
47
|
+
cursor = raw.cursor()
|
|
48
|
+
cursor.execute("PRAGMA foreign_keys=ON")
|
|
49
|
+
cursor.execute("PRAGMA busy_timeout=10000")
|
|
50
|
+
cursor.close()
|
|
51
|
+
load_sqlite_vec_extension(dbapi_connection)
|
|
52
|
+
|
|
53
|
+
app.state.engine = engine
|
|
54
|
+
app.state.vector_store = SqliteVecStore(
|
|
55
|
+
engine,
|
|
56
|
+
dimension=settings.vector_embedding_dim,
|
|
57
|
+
)
|
|
58
|
+
app.state.session_factory = async_sessionmaker(
|
|
59
|
+
engine,
|
|
60
|
+
class_=AsyncSession,
|
|
61
|
+
expire_on_commit=False,
|
|
62
|
+
)
|
|
63
|
+
yield
|
|
64
|
+
await engine.dispose()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def create_app() -> FastAPI:
|
|
68
|
+
app = FastAPI(
|
|
69
|
+
title="Business Stack API",
|
|
70
|
+
description=APP_DESCRIPTION,
|
|
71
|
+
version="0.1.0",
|
|
72
|
+
openapi_tags=OPENAPI_TAGS,
|
|
73
|
+
docs_url="/docs",
|
|
74
|
+
redoc_url="/redoc",
|
|
75
|
+
openapi_url="/openapi.json",
|
|
76
|
+
lifespan=lifespan,
|
|
77
|
+
)
|
|
78
|
+
attach_custom_openapi(app)
|
|
79
|
+
app.add_middleware(GatewayIdentityMiddleware)
|
|
80
|
+
app.add_middleware(RequestIdMiddleware)
|
|
81
|
+
app.add_middleware(OpenApiGatewayMiddleware)
|
|
82
|
+
app.include_router(health.router)
|
|
83
|
+
app.include_router(admin_pipeline.router)
|
|
84
|
+
app.include_router(ingestion.router)
|
|
85
|
+
app.include_router(pipeline_status.router)
|
|
86
|
+
app.include_router(link_expansion.router)
|
|
87
|
+
app.include_router(chunks.router)
|
|
88
|
+
app.include_router(gemini_embed.router)
|
|
89
|
+
app.include_router(entity_extract.router)
|
|
90
|
+
app.include_router(vectors.router)
|
|
91
|
+
app.include_router(query.router)
|
|
92
|
+
app.include_router(chat.router)
|
|
93
|
+
app.include_router(example.router)
|
|
94
|
+
return app
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
app = create_app()
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
2
|
+
from starlette.requests import Request
|
|
3
|
+
from starlette.responses import JSONResponse, Response
|
|
4
|
+
|
|
5
|
+
_UNAUTH_PATHS = frozenset({"/healthz", "/readyz"})
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GatewayIdentityMiddleware(BaseHTTPMiddleware):
|
|
9
|
+
async def dispatch(self, request: Request, call_next) -> Response:
|
|
10
|
+
if request.url.path in _UNAUTH_PATHS:
|
|
11
|
+
return await call_next(request)
|
|
12
|
+
if not request.headers.get("x-user-id"):
|
|
13
|
+
return JSONResponse(
|
|
14
|
+
status_code=401,
|
|
15
|
+
content={"detail": "Unauthorized"},
|
|
16
|
+
)
|
|
17
|
+
return await call_next(request)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""ASGI middleware: OpenAPI UIs only via trusted gateway + correct URL prefix for Swagger/ReDoc."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from starlette.types import ASGIApp, Receive, Scope, Send
|
|
8
|
+
|
|
9
|
+
from app.config import get_settings
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _is_openapi_public_path(path: str) -> bool:
|
|
13
|
+
if path == "/openapi.json" or path == "/redoc" or path == "/docs":
|
|
14
|
+
return True
|
|
15
|
+
return path.startswith("/docs/")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _decode_headers(scope: dict) -> dict[str, str]:
|
|
19
|
+
raw = scope.get("headers") or []
|
|
20
|
+
out: dict[str, str] = {}
|
|
21
|
+
for k, v in raw:
|
|
22
|
+
key = k.decode("latin-1").lower()
|
|
23
|
+
out[key] = v.decode("latin-1")
|
|
24
|
+
return out
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OpenApiGatewayMiddleware:
|
|
28
|
+
"""Runs outermost: sets ASGI root_path from X-Forwarded-Prefix; gates OpenAPI routes on X-Gateway-Secret."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, app: ASGIApp):
|
|
31
|
+
self.app = app
|
|
32
|
+
|
|
33
|
+
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
|
|
34
|
+
if scope["type"] != "http":
|
|
35
|
+
await self.app(scope, receive, send)
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
settings = get_settings()
|
|
39
|
+
path = scope["path"]
|
|
40
|
+
headers = _decode_headers(scope)
|
|
41
|
+
|
|
42
|
+
secret = (settings.backend_gateway_secret or "").strip()
|
|
43
|
+
if secret and _is_openapi_public_path(path):
|
|
44
|
+
if headers.get("x-gateway-secret") != secret:
|
|
45
|
+
body = json.dumps(
|
|
46
|
+
{
|
|
47
|
+
"detail": (
|
|
48
|
+
"OpenAPI documentation is only available through the API gateway "
|
|
49
|
+
"with a valid X-Gateway-Secret."
|
|
50
|
+
),
|
|
51
|
+
},
|
|
52
|
+
).encode("utf-8")
|
|
53
|
+
await send(
|
|
54
|
+
{
|
|
55
|
+
"type": "http.response.start",
|
|
56
|
+
"status": 403,
|
|
57
|
+
"headers": [
|
|
58
|
+
(b"content-type", b"application/json"),
|
|
59
|
+
(b"content-length", str(len(body)).encode("ascii")),
|
|
60
|
+
],
|
|
61
|
+
},
|
|
62
|
+
)
|
|
63
|
+
await send({"type": "http.response.body", "body": body})
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
prefix = (headers.get("x-forwarded-prefix") or "").strip().rstrip("/")
|
|
67
|
+
new_scope = scope
|
|
68
|
+
if prefix:
|
|
69
|
+
new_scope = {**scope, "root_path": prefix}
|
|
70
|
+
|
|
71
|
+
await self.app(new_scope, receive, send)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from contextvars import ContextVar
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
|
|
4
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
5
|
+
from starlette.requests import Request
|
|
6
|
+
from starlette.responses import Response
|
|
7
|
+
|
|
8
|
+
REQUEST_ID_HEADER = "X-Request-ID"
|
|
9
|
+
|
|
10
|
+
request_id_ctx: ContextVar[str | None] = ContextVar("request_id", default=None)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RequestIdMiddleware(BaseHTTPMiddleware):
|
|
14
|
+
async def dispatch(self, request: Request, call_next) -> Response:
|
|
15
|
+
incoming = request.headers.get("x-request-id")
|
|
16
|
+
rid = incoming or str(uuid4())
|
|
17
|
+
token = request_id_ctx.set(rid)
|
|
18
|
+
try:
|
|
19
|
+
response = await call_next(request)
|
|
20
|
+
response.headers[REQUEST_ID_HEADER] = rid
|
|
21
|
+
return response
|
|
22
|
+
finally:
|
|
23
|
+
request_id_ctx.reset(token)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""OpenAPI (Swagger) metadata and schema customization for the FastAPI app."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from fastapi import FastAPI
|
|
9
|
+
|
|
10
|
+
from fastapi.openapi.utils import get_openapi
|
|
11
|
+
|
|
12
|
+
# Must stay in sync with GatewayIdentityMiddleware._UNAUTH_PATHS
|
|
13
|
+
_UNAUTHENTICATED_PATHS = frozenset({"/healthz", "/readyz"})
|
|
14
|
+
|
|
15
|
+
OPENAPI_TAGS: list[dict[str, str]] = [
|
|
16
|
+
{
|
|
17
|
+
"name": "health",
|
|
18
|
+
"description": "Liveness (`/healthz`) and readiness (`/readyz`) probes. No gateway identity header required.",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "ingestion",
|
|
22
|
+
"description": "Ingest raw connector payloads, normalize to canonical documents, and persist blobs.",
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"name": "admin",
|
|
26
|
+
"description": "Operational endpoints: retry embedding, re-run normalization, clear DLQ state.",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"name": "query",
|
|
30
|
+
"description": "RAG retrieval: embed the question (Gemini), search sqlite-vec, return ranked context.",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"name": "chat",
|
|
34
|
+
"description": (
|
|
35
|
+
"Saved chat sessions (SQLite): list/create sessions, load messages, "
|
|
36
|
+
"complete a turn (RAG + Gemini answer)."
|
|
37
|
+
),
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"name": "embeddings",
|
|
41
|
+
"description": "Trigger or inspect Gemini embedding jobs for ingested documents.",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"name": "chunks",
|
|
45
|
+
"description": "Chunk listing and related ingest-time chunk operations.",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"name": "entities",
|
|
49
|
+
"description": "Entity extraction over document chunks.",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"name": "link-expansion",
|
|
53
|
+
"description": "Configurable URL fetching and link expansion during ingest.",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"name": "vectors",
|
|
57
|
+
"description": "Direct vector store utilities (debugging and advanced use).",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"name": "example",
|
|
61
|
+
"description": "Sample routes for development.",
|
|
62
|
+
},
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
APP_DESCRIPTION = """Internal HTTP API for the Business Stack knowledge base (RAG).
|
|
66
|
+
|
|
67
|
+
**How to open these docs:** use the Hono gateway URL, e.g. `{gateway}{prefix}/docs` (same prefix as `API_GATEWAY_PREFIX`, default `/api/backend`). The gateway requires a signed-in Better Auth session and injects `X-User-Id` plus `X-Forwarded-Prefix` so Swagger loads `/openapi.json` correctly.
|
|
68
|
+
|
|
69
|
+
**Direct backend access:** if `BACKEND_GATEWAY_SECRET` is set on the backend and gateway, `/docs`, `/redoc`, and `/openapi.json` return **403** unless the gateway adds matching `X-Gateway-Secret` (clients cannot spoof docs by calling the backend with only `x-user-id`). Leave the secret unset in local dev if you want OpenAPI on `http://127.0.0.1:8000/docs` without the gateway.
|
|
70
|
+
|
|
71
|
+
**Gateway identity:** every route except `/healthz` and `/readyz` requires `x-user-id`. The gateway adds it after Better Auth session validation, so **Try it out** works with your login cookies and you do not need **Authorize** unless you call the raw backend URL.
|
|
72
|
+
|
|
73
|
+
**External services:** embeddings and query use **Gemini**. The key may be `GEMINI_API_KEY` or stored in gateway integration settings; the backend uses `INTEGRATIONS_GATEWAY_URL` and `INTEGRATIONS_INTERNAL_SECRET` when env is unset.
|
|
74
|
+
Optional OpenAI is only for LLM-assisted chunking, not `/query`.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def attach_custom_openapi(app: FastAPI) -> None:
|
|
79
|
+
"""Register a custom OpenAPI generator with gateway security and tag descriptions."""
|
|
80
|
+
|
|
81
|
+
def custom_openapi() -> dict:
|
|
82
|
+
if app.openapi_schema:
|
|
83
|
+
return app.openapi_schema
|
|
84
|
+
openapi_schema = get_openapi(
|
|
85
|
+
title=app.title,
|
|
86
|
+
version=app.version,
|
|
87
|
+
description=app.description,
|
|
88
|
+
routes=app.routes,
|
|
89
|
+
tags=OPENAPI_TAGS,
|
|
90
|
+
)
|
|
91
|
+
components = openapi_schema.setdefault("components", {})
|
|
92
|
+
security_schemes = components.setdefault("securitySchemes", {})
|
|
93
|
+
security_schemes["GatewayIdentity"] = {
|
|
94
|
+
"type": "apiKey",
|
|
95
|
+
"in": "header",
|
|
96
|
+
"name": "x-user-id",
|
|
97
|
+
"description": (
|
|
98
|
+
"End-user id propagated by the API gateway. Required on all routes except "
|
|
99
|
+
"`GET /healthz` and `GET /readyz`."
|
|
100
|
+
),
|
|
101
|
+
}
|
|
102
|
+
paths = openapi_schema.get("paths", {})
|
|
103
|
+
for path, path_item in paths.items():
|
|
104
|
+
if not isinstance(path_item, dict):
|
|
105
|
+
continue
|
|
106
|
+
for method, operation in path_item.items():
|
|
107
|
+
if method not in (
|
|
108
|
+
"get",
|
|
109
|
+
"post",
|
|
110
|
+
"put",
|
|
111
|
+
"patch",
|
|
112
|
+
"delete",
|
|
113
|
+
"head",
|
|
114
|
+
"options",
|
|
115
|
+
):
|
|
116
|
+
continue
|
|
117
|
+
if not isinstance(operation, dict):
|
|
118
|
+
continue
|
|
119
|
+
if path in _UNAUTHENTICATED_PATHS:
|
|
120
|
+
operation["security"] = []
|
|
121
|
+
else:
|
|
122
|
+
operation["security"] = [{"GatewayIdentity": []}]
|
|
123
|
+
app.openapi_schema = openapi_schema
|
|
124
|
+
return openapi_schema
|
|
125
|
+
|
|
126
|
+
app.openapi = custom_openapi # type: ignore[method-assign]
|
|
File without changes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
from sqlalchemy import text
|
|
9
|
+
|
|
10
|
+
from app.config import get_settings
|
|
11
|
+
from app.dependencies import DbSession, VectorStoreDep
|
|
12
|
+
from app.services.embeddings.dlq import (
|
|
13
|
+
cleaned_ingest_meta_dict,
|
|
14
|
+
clear_embedding_dlq,
|
|
15
|
+
merge_ingest_meta,
|
|
16
|
+
)
|
|
17
|
+
from app.services.embeddings.worker import run_embed_document_job
|
|
18
|
+
from app.services.ingestion.persist import (
|
|
19
|
+
clear_normalization_error,
|
|
20
|
+
load_raw_envelope_for_document,
|
|
21
|
+
)
|
|
22
|
+
from app.services.integrations_remote import resolve_gemini_api_key
|
|
23
|
+
from app.services.normalization import (
|
|
24
|
+
normalize_envelope_to_canonical,
|
|
25
|
+
persist_normalized_document,
|
|
26
|
+
)
|
|
27
|
+
from app.storage.blobs import BlobStore
|
|
28
|
+
|
|
29
|
+
router = APIRouter(prefix="/admin", tags=["admin"])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RetryEmbedBody(BaseModel):
|
|
33
|
+
model_config = ConfigDict(extra="forbid")
|
|
34
|
+
|
|
35
|
+
multimodal: bool = Field(
|
|
36
|
+
default=False,
|
|
37
|
+
description="Same flag as POST /ingest/documents/{id}/embed",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@router.post("/documents/{document_id}/retry-embedding")
|
|
42
|
+
async def admin_retry_embedding(
|
|
43
|
+
document_id: str,
|
|
44
|
+
body: RetryEmbedBody,
|
|
45
|
+
session: DbSession,
|
|
46
|
+
request: Request,
|
|
47
|
+
store: VectorStoreDep,
|
|
48
|
+
background_tasks: BackgroundTasks,
|
|
49
|
+
) -> dict[str, str | bool]:
|
|
50
|
+
"""
|
|
51
|
+
Clear DLQ state, reset ingest meta embedding fields, set status to ``partial``,
|
|
52
|
+
and queue the same background embed job as the public embed endpoint.
|
|
53
|
+
"""
|
|
54
|
+
settings = get_settings()
|
|
55
|
+
if not await resolve_gemini_api_key(settings):
|
|
56
|
+
raise HTTPException(
|
|
57
|
+
status_code=503,
|
|
58
|
+
detail=(
|
|
59
|
+
"Gemini API key is not configured "
|
|
60
|
+
"(GEMINI_API_KEY or gateway integration store)"
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
async with session.begin():
|
|
64
|
+
row = await session.execute(
|
|
65
|
+
text("SELECT 1 FROM documents WHERE id = :id LIMIT 1"),
|
|
66
|
+
{"id": document_id},
|
|
67
|
+
)
|
|
68
|
+
if row.first() is None:
|
|
69
|
+
raise HTTPException(status_code=404, detail="Document not found")
|
|
70
|
+
await clear_embedding_dlq(session, document_id=document_id)
|
|
71
|
+
r2 = await session.execute(
|
|
72
|
+
text("SELECT ingest_meta FROM documents WHERE id = :id LIMIT 1"),
|
|
73
|
+
{"id": document_id},
|
|
74
|
+
)
|
|
75
|
+
im = r2.scalar_one_or_none()
|
|
76
|
+
clean = cleaned_ingest_meta_dict(im)
|
|
77
|
+
new_meta = merge_ingest_meta(
|
|
78
|
+
json.dumps(clean, default=str),
|
|
79
|
+
{"embedding_admin_retry_at": datetime.now(UTC).isoformat()},
|
|
80
|
+
)
|
|
81
|
+
await session.execute(
|
|
82
|
+
text(
|
|
83
|
+
"UPDATE documents SET status = 'partial', ingest_meta = :im "
|
|
84
|
+
"WHERE id = :id",
|
|
85
|
+
),
|
|
86
|
+
{"im": new_meta, "id": document_id},
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
background_tasks.add_task(
|
|
90
|
+
run_embed_document_job,
|
|
91
|
+
document_id=document_id,
|
|
92
|
+
multimodal=body.multimodal,
|
|
93
|
+
session_factory=request.app.state.session_factory,
|
|
94
|
+
settings=settings,
|
|
95
|
+
store=store,
|
|
96
|
+
)
|
|
97
|
+
return {"accepted": True, "document_id": document_id}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@router.post("/documents/{document_id}/reprocess-normalization")
|
|
101
|
+
async def admin_reprocess_normalization(
|
|
102
|
+
document_id: str,
|
|
103
|
+
session: DbSession,
|
|
104
|
+
) -> dict[str, str | bool]:
|
|
105
|
+
"""Rebuild ``content_blocks`` / ``document_links`` from stored ``raw_content``."""
|
|
106
|
+
settings = get_settings()
|
|
107
|
+
blob_store = BlobStore(settings.data_dir / "blobs")
|
|
108
|
+
async with session.begin():
|
|
109
|
+
row = await session.execute(
|
|
110
|
+
text("SELECT 1 FROM documents WHERE id = :id LIMIT 1"),
|
|
111
|
+
{"id": document_id},
|
|
112
|
+
)
|
|
113
|
+
if row.first() is None:
|
|
114
|
+
raise HTTPException(status_code=404, detail="Document not found")
|
|
115
|
+
env = await load_raw_envelope_for_document(session, document_id=document_id)
|
|
116
|
+
canonical = normalize_envelope_to_canonical(
|
|
117
|
+
document_id=document_id,
|
|
118
|
+
envelope=env,
|
|
119
|
+
blob_store=blob_store,
|
|
120
|
+
)
|
|
121
|
+
await persist_normalized_document(session, canonical=canonical)
|
|
122
|
+
await clear_normalization_error(session, document_id=document_id)
|
|
123
|
+
return {"ok": True, "document_id": document_id}
|