business-stack 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.python-version +1 -0
- package/backend/.env.example +65 -0
- package/backend/alembic/env.py +63 -0
- package/backend/alembic/script.py.mako +26 -0
- package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
- package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
- package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
- package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
- package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
- package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
- package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
- package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
- package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
- package/backend/alembic.ini +42 -0
- package/backend/app/__init__.py +0 -0
- package/backend/app/config.py +337 -0
- package/backend/app/connectors/__init__.py +13 -0
- package/backend/app/connectors/base.py +39 -0
- package/backend/app/connectors/builtins.py +51 -0
- package/backend/app/connectors/playwright_session.py +146 -0
- package/backend/app/connectors/registry.py +68 -0
- package/backend/app/connectors/thread_expansion/__init__.py +33 -0
- package/backend/app/connectors/thread_expansion/fakes.py +154 -0
- package/backend/app/connectors/thread_expansion/models.py +113 -0
- package/backend/app/connectors/thread_expansion/reddit.py +53 -0
- package/backend/app/connectors/thread_expansion/twitter.py +49 -0
- package/backend/app/db.py +5 -0
- package/backend/app/dependencies.py +34 -0
- package/backend/app/logging_config.py +35 -0
- package/backend/app/main.py +97 -0
- package/backend/app/middleware/__init__.py +0 -0
- package/backend/app/middleware/gateway_identity.py +17 -0
- package/backend/app/middleware/openapi_gateway.py +71 -0
- package/backend/app/middleware/request_id.py +23 -0
- package/backend/app/openapi_config.py +126 -0
- package/backend/app/routers/__init__.py +0 -0
- package/backend/app/routers/admin_pipeline.py +123 -0
- package/backend/app/routers/chat.py +206 -0
- package/backend/app/routers/chunks.py +36 -0
- package/backend/app/routers/entity_extract.py +31 -0
- package/backend/app/routers/example.py +8 -0
- package/backend/app/routers/gemini_embed.py +58 -0
- package/backend/app/routers/health.py +28 -0
- package/backend/app/routers/ingestion.py +146 -0
- package/backend/app/routers/link_expansion.py +34 -0
- package/backend/app/routers/pipeline_status.py +304 -0
- package/backend/app/routers/query.py +63 -0
- package/backend/app/routers/vectors.py +63 -0
- package/backend/app/schemas/__init__.py +0 -0
- package/backend/app/schemas/canonical.py +44 -0
- package/backend/app/schemas/chat.py +50 -0
- package/backend/app/schemas/ingest.py +29 -0
- package/backend/app/schemas/query.py +153 -0
- package/backend/app/schemas/vectors.py +56 -0
- package/backend/app/services/__init__.py +0 -0
- package/backend/app/services/chat_store.py +152 -0
- package/backend/app/services/chunking/__init__.py +3 -0
- package/backend/app/services/chunking/llm_boundaries.py +63 -0
- package/backend/app/services/chunking/schemas.py +30 -0
- package/backend/app/services/chunking/semantic_chunk.py +178 -0
- package/backend/app/services/chunking/splitters.py +214 -0
- package/backend/app/services/embeddings/__init__.py +20 -0
- package/backend/app/services/embeddings/build_inputs.py +140 -0
- package/backend/app/services/embeddings/dlq.py +128 -0
- package/backend/app/services/embeddings/gemini_api.py +207 -0
- package/backend/app/services/embeddings/persist.py +74 -0
- package/backend/app/services/embeddings/types.py +32 -0
- package/backend/app/services/embeddings/worker.py +224 -0
- package/backend/app/services/entities/__init__.py +12 -0
- package/backend/app/services/entities/gliner_extract.py +63 -0
- package/backend/app/services/entities/llm_extract.py +94 -0
- package/backend/app/services/entities/pipeline.py +179 -0
- package/backend/app/services/entities/spacy_extract.py +63 -0
- package/backend/app/services/entities/types.py +15 -0
- package/backend/app/services/gemini_chat.py +113 -0
- package/backend/app/services/hooks/__init__.py +3 -0
- package/backend/app/services/hooks/post_ingest.py +186 -0
- package/backend/app/services/ingestion/__init__.py +0 -0
- package/backend/app/services/ingestion/persist.py +188 -0
- package/backend/app/services/integrations_remote.py +91 -0
- package/backend/app/services/link_expansion/__init__.py +3 -0
- package/backend/app/services/link_expansion/canonical_url.py +45 -0
- package/backend/app/services/link_expansion/domain_policy.py +26 -0
- package/backend/app/services/link_expansion/html_extract.py +72 -0
- package/backend/app/services/link_expansion/rate_limit.py +32 -0
- package/backend/app/services/link_expansion/robots.py +46 -0
- package/backend/app/services/link_expansion/schemas.py +67 -0
- package/backend/app/services/link_expansion/worker.py +458 -0
- package/backend/app/services/normalization/__init__.py +7 -0
- package/backend/app/services/normalization/normalizer.py +331 -0
- package/backend/app/services/normalization/persist_normalized.py +67 -0
- package/backend/app/services/playwright_extract/__init__.py +13 -0
- package/backend/app/services/playwright_extract/__main__.py +96 -0
- package/backend/app/services/playwright_extract/extract.py +181 -0
- package/backend/app/services/retrieval_service.py +351 -0
- package/backend/app/sqlite_ext.py +36 -0
- package/backend/app/storage/__init__.py +3 -0
- package/backend/app/storage/blobs.py +30 -0
- package/backend/app/vectorstore/__init__.py +13 -0
- package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
- package/backend/backend.egg-info/PKG-INFO +18 -0
- package/backend/backend.egg-info/SOURCES.txt +93 -0
- package/backend/backend.egg-info/dependency_links.txt +1 -0
- package/backend/backend.egg-info/entry_points.txt +2 -0
- package/backend/backend.egg-info/requires.txt +15 -0
- package/backend/backend.egg-info/top_level.txt +4 -0
- package/backend/package.json +15 -0
- package/backend/pyproject.toml +52 -0
- package/backend/tests/conftest.py +40 -0
- package/backend/tests/test_chat.py +92 -0
- package/backend/tests/test_chunking.py +132 -0
- package/backend/tests/test_entities.py +170 -0
- package/backend/tests/test_gemini_embed.py +224 -0
- package/backend/tests/test_health.py +24 -0
- package/backend/tests/test_ingest_raw.py +123 -0
- package/backend/tests/test_link_expansion.py +241 -0
- package/backend/tests/test_main.py +12 -0
- package/backend/tests/test_normalizer.py +114 -0
- package/backend/tests/test_openapi_gateway.py +40 -0
- package/backend/tests/test_pipeline_hardening.py +285 -0
- package/backend/tests/test_pipeline_status.py +71 -0
- package/backend/tests/test_playwright_extract.py +80 -0
- package/backend/tests/test_post_ingest_hooks.py +162 -0
- package/backend/tests/test_query.py +165 -0
- package/backend/tests/test_thread_expansion.py +72 -0
- package/backend/tests/test_vectors.py +85 -0
- package/backend/uv.lock +1839 -0
- package/bin/business-stack.cjs +412 -0
- package/frontend/web/.env.example +23 -0
- package/frontend/web/AGENTS.md +5 -0
- package/frontend/web/CLAUDE.md +1 -0
- package/frontend/web/README.md +36 -0
- package/frontend/web/components.json +25 -0
- package/frontend/web/next-env.d.ts +6 -0
- package/frontend/web/next.config.ts +30 -0
- package/frontend/web/package.json +65 -0
- package/frontend/web/postcss.config.mjs +7 -0
- package/frontend/web/skills-lock.json +35 -0
- package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
- package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
- package/frontend/web/src/app/chat/page.tsx +725 -0
- package/frontend/web/src/app/favicon.ico +0 -0
- package/frontend/web/src/app/globals.css +563 -0
- package/frontend/web/src/app/layout.tsx +50 -0
- package/frontend/web/src/app/page.tsx +96 -0
- package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
- package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
- package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
- package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
- package/frontend/web/src/components/home-auth-panel.tsx +49 -0
- package/frontend/web/src/components/providers.tsx +50 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
- package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
- package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
- package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
- package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
- package/frontend/web/src/lib/auth-client.ts +23 -0
- package/frontend/web/src/lib/integrations-config.ts +125 -0
- package/frontend/web/src/lib/ui-utills.tsx +90 -0
- package/frontend/web/src/lib/utils.ts +6 -0
- package/frontend/web/tsconfig.json +36 -0
- package/frontend/web/tsconfig.tsbuildinfo +1 -0
- package/frontend/web/vitest.config.ts +14 -0
- package/gateway/.env.example +23 -0
- package/gateway/README.md +13 -0
- package/gateway/package.json +24 -0
- package/gateway/src/auth.ts +49 -0
- package/gateway/src/index.ts +141 -0
- package/gateway/src/integrations/admin.ts +19 -0
- package/gateway/src/integrations/crypto.ts +52 -0
- package/gateway/src/integrations/handlers.ts +124 -0
- package/gateway/src/integrations/keys.ts +12 -0
- package/gateway/src/integrations/store.ts +106 -0
- package/gateway/src/stack-secrets.ts +35 -0
- package/gateway/tsconfig.json +13 -0
- package/package.json +33 -0
- package/turbo.json +27 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
app/__init__.py
|
|
3
|
+
app/config.py
|
|
4
|
+
app/db.py
|
|
5
|
+
app/dependencies.py
|
|
6
|
+
app/logging_config.py
|
|
7
|
+
app/main.py
|
|
8
|
+
app/sqlite_ext.py
|
|
9
|
+
app/connectors/__init__.py
|
|
10
|
+
app/connectors/base.py
|
|
11
|
+
app/connectors/builtins.py
|
|
12
|
+
app/connectors/playwright_session.py
|
|
13
|
+
app/connectors/registry.py
|
|
14
|
+
app/connectors/thread_expansion/__init__.py
|
|
15
|
+
app/connectors/thread_expansion/fakes.py
|
|
16
|
+
app/connectors/thread_expansion/models.py
|
|
17
|
+
app/connectors/thread_expansion/reddit.py
|
|
18
|
+
app/connectors/thread_expansion/twitter.py
|
|
19
|
+
app/middleware/__init__.py
|
|
20
|
+
app/middleware/gateway_identity.py
|
|
21
|
+
app/middleware/request_id.py
|
|
22
|
+
app/routers/__init__.py
|
|
23
|
+
app/routers/chunks.py
|
|
24
|
+
app/routers/entity_extract.py
|
|
25
|
+
app/routers/example.py
|
|
26
|
+
app/routers/gemini_embed.py
|
|
27
|
+
app/routers/health.py
|
|
28
|
+
app/routers/ingestion.py
|
|
29
|
+
app/routers/link_expansion.py
|
|
30
|
+
app/routers/query.py
|
|
31
|
+
app/routers/vectors.py
|
|
32
|
+
app/schemas/__init__.py
|
|
33
|
+
app/schemas/canonical.py
|
|
34
|
+
app/schemas/ingest.py
|
|
35
|
+
app/schemas/query.py
|
|
36
|
+
app/schemas/vectors.py
|
|
37
|
+
app/services/__init__.py
|
|
38
|
+
app/services/retrieval_service.py
|
|
39
|
+
app/services/chunking/__init__.py
|
|
40
|
+
app/services/chunking/llm_boundaries.py
|
|
41
|
+
app/services/chunking/schemas.py
|
|
42
|
+
app/services/chunking/semantic_chunk.py
|
|
43
|
+
app/services/chunking/splitters.py
|
|
44
|
+
app/services/embeddings/__init__.py
|
|
45
|
+
app/services/embeddings/build_inputs.py
|
|
46
|
+
app/services/embeddings/gemini_api.py
|
|
47
|
+
app/services/embeddings/persist.py
|
|
48
|
+
app/services/embeddings/types.py
|
|
49
|
+
app/services/embeddings/worker.py
|
|
50
|
+
app/services/entities/__init__.py
|
|
51
|
+
app/services/entities/gliner_extract.py
|
|
52
|
+
app/services/entities/llm_extract.py
|
|
53
|
+
app/services/entities/pipeline.py
|
|
54
|
+
app/services/entities/spacy_extract.py
|
|
55
|
+
app/services/entities/types.py
|
|
56
|
+
app/services/ingestion/__init__.py
|
|
57
|
+
app/services/ingestion/persist.py
|
|
58
|
+
app/services/link_expansion/__init__.py
|
|
59
|
+
app/services/link_expansion/canonical_url.py
|
|
60
|
+
app/services/link_expansion/domain_policy.py
|
|
61
|
+
app/services/link_expansion/html_extract.py
|
|
62
|
+
app/services/link_expansion/rate_limit.py
|
|
63
|
+
app/services/link_expansion/robots.py
|
|
64
|
+
app/services/link_expansion/schemas.py
|
|
65
|
+
app/services/link_expansion/worker.py
|
|
66
|
+
app/services/normalization/__init__.py
|
|
67
|
+
app/services/normalization/normalizer.py
|
|
68
|
+
app/services/normalization/persist_normalized.py
|
|
69
|
+
app/services/playwright_extract/__init__.py
|
|
70
|
+
app/services/playwright_extract/__main__.py
|
|
71
|
+
app/services/playwright_extract/extract.py
|
|
72
|
+
app/storage/__init__.py
|
|
73
|
+
app/storage/blobs.py
|
|
74
|
+
app/vectorstore/__init__.py
|
|
75
|
+
app/vectorstore/sqlite_vec_store.py
|
|
76
|
+
backend.egg-info/PKG-INFO
|
|
77
|
+
backend.egg-info/SOURCES.txt
|
|
78
|
+
backend.egg-info/dependency_links.txt
|
|
79
|
+
backend.egg-info/entry_points.txt
|
|
80
|
+
backend.egg-info/requires.txt
|
|
81
|
+
backend.egg-info/top_level.txt
|
|
82
|
+
tests/test_chunking.py
|
|
83
|
+
tests/test_entities.py
|
|
84
|
+
tests/test_gemini_embed.py
|
|
85
|
+
tests/test_health.py
|
|
86
|
+
tests/test_ingest_raw.py
|
|
87
|
+
tests/test_link_expansion.py
|
|
88
|
+
tests/test_main.py
|
|
89
|
+
tests/test_normalizer.py
|
|
90
|
+
tests/test_playwright_extract.py
|
|
91
|
+
tests/test_query.py
|
|
92
|
+
tests/test_thread_expansion.py
|
|
93
|
+
tests/test_vectors.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
fastapi[standard]==0.113.0
|
|
2
|
+
pydantic==2.8.0
|
|
3
|
+
pydantic-settings>=2.4.0
|
|
4
|
+
sqlalchemy[asyncio]>=2.0.36
|
|
5
|
+
aiosqlite>=0.20.0
|
|
6
|
+
alembic>=1.14.0
|
|
7
|
+
python-json-logger>=2.0.7
|
|
8
|
+
sqlite-vec>=0.1.9
|
|
9
|
+
httpx>=0.27.0
|
|
10
|
+
|
|
11
|
+
[entities]
|
|
12
|
+
spacy>=3.7.0
|
|
13
|
+
|
|
14
|
+
[playwright]
|
|
15
|
+
playwright>=1.49.0
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@business-stack/backend",
|
|
3
|
+
"private": true,
|
|
4
|
+
"scripts": {
|
|
5
|
+
"dev": "uv run uvicorn app.main:app --reload --host 127.0.0.1 --port 8000",
|
|
6
|
+
"start": "uv run uvicorn app.main:app --host 127.0.0.1 --port 8000",
|
|
7
|
+
"build": "bun -e \"require('fs').mkdirSync('dist',{recursive:true}); require('fs').writeFileSync('dist/.buildstamp','')\"",
|
|
8
|
+
"lint": "uv run ruff check .",
|
|
9
|
+
"lint:fix": "uv run ruff check --fix . && uv run ruff format .",
|
|
10
|
+
"typecheck": "bun -e \"process.exit(0)\"",
|
|
11
|
+
"test": "uv run pytest",
|
|
12
|
+
"db:migrate": "uv run alembic upgrade head",
|
|
13
|
+
"clean": "bun x rimraf@6 .pytest_cache"
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "backend"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "FastAPI service (internal API behind Hono gateway)"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fastapi[standard]==0.113.0",
|
|
8
|
+
"pydantic==2.8.0",
|
|
9
|
+
"pydantic-settings>=2.4.0",
|
|
10
|
+
"sqlalchemy[asyncio]>=2.0.36",
|
|
11
|
+
"aiosqlite>=0.20.0",
|
|
12
|
+
"alembic>=1.14.0",
|
|
13
|
+
"python-json-logger>=2.0.7",
|
|
14
|
+
"sqlite-vec>=0.1.9",
|
|
15
|
+
"httpx>=0.27.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
entities = [
|
|
20
|
+
"spacy>=3.7.0",
|
|
21
|
+
]
|
|
22
|
+
playwright = [
|
|
23
|
+
"playwright>=1.49.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[dependency-groups]
|
|
27
|
+
dev = [
|
|
28
|
+
"httpx>=0.27.0",
|
|
29
|
+
"pytest>=8.3.0",
|
|
30
|
+
"ruff>=0.8.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.entry-points."kb.connectors"]
|
|
34
|
+
echo = "app.connectors.builtins:EchoConnector"
|
|
35
|
+
|
|
36
|
+
[build-system]
|
|
37
|
+
requires = ["setuptools>=61"]
|
|
38
|
+
build-backend = "setuptools.build_meta"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["."]
|
|
42
|
+
exclude = ["tests*", "alembic*"]
|
|
43
|
+
|
|
44
|
+
[tool.ruff]
|
|
45
|
+
target-version = "py312"
|
|
46
|
+
line-length = 88
|
|
47
|
+
|
|
48
|
+
[tool.ruff.lint]
|
|
49
|
+
select = ["E", "F", "I", "UP"]
|
|
50
|
+
|
|
51
|
+
[tool.pytest.ini_options]
|
|
52
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
from alembic.config import Config
|
|
6
|
+
from fastapi.testclient import TestClient
|
|
7
|
+
|
|
8
|
+
from alembic import command
|
|
9
|
+
from app.config import clear_settings_cache
|
|
10
|
+
from app.main import create_app
|
|
11
|
+
|
|
12
|
+
BACKEND_ROOT = Path(__file__).resolve().parents[1]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _alembic_upgrade_head() -> None:
|
|
16
|
+
prev = os.getcwd()
|
|
17
|
+
os.chdir(BACKEND_ROOT)
|
|
18
|
+
try:
|
|
19
|
+
cfg = Config(str(BACKEND_ROOT / "alembic.ini"))
|
|
20
|
+
command.upgrade(cfg, "head")
|
|
21
|
+
finally:
|
|
22
|
+
os.chdir(prev)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture
|
|
26
|
+
def client(tmp_path, monkeypatch) -> TestClient:
|
|
27
|
+
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
28
|
+
clear_settings_cache()
|
|
29
|
+
with TestClient(create_app()) as test_client:
|
|
30
|
+
yield test_client
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.fixture
|
|
34
|
+
def migrated_client(tmp_path, monkeypatch) -> TestClient:
|
|
35
|
+
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
36
|
+
clear_settings_cache()
|
|
37
|
+
_alembic_upgrade_head()
|
|
38
|
+
clear_settings_cache()
|
|
39
|
+
with TestClient(create_app()) as test_client:
|
|
40
|
+
yield test_client
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
|
|
3
|
+
from fastapi.testclient import TestClient
|
|
4
|
+
|
|
5
|
+
from app.config import clear_settings_cache
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_chat_requires_gateway_user(migrated_client: TestClient) -> None:
|
|
9
|
+
response = migrated_client.get("/chat/sessions")
|
|
10
|
+
assert response.status_code == 401
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_chat_session_crud(migrated_client: TestClient) -> None:
|
|
14
|
+
headers = {"x-user-id": "user-chat-crud"}
|
|
15
|
+
create = migrated_client.post("/chat/sessions", json={}, headers=headers)
|
|
16
|
+
assert create.status_code == 200
|
|
17
|
+
session_id = create.json()["id"]
|
|
18
|
+
|
|
19
|
+
listed = migrated_client.get("/chat/sessions", headers=headers)
|
|
20
|
+
assert listed.status_code == 200
|
|
21
|
+
ids = {row["id"] for row in listed.json()}
|
|
22
|
+
assert session_id in ids
|
|
23
|
+
|
|
24
|
+
msgs = migrated_client.get(
|
|
25
|
+
f"/chat/sessions/{session_id}/messages",
|
|
26
|
+
headers=headers,
|
|
27
|
+
)
|
|
28
|
+
assert msgs.status_code == 200
|
|
29
|
+
assert msgs.json() == []
|
|
30
|
+
|
|
31
|
+
deleted = migrated_client.delete(
|
|
32
|
+
f"/chat/sessions/{session_id}",
|
|
33
|
+
headers=headers,
|
|
34
|
+
)
|
|
35
|
+
assert deleted.status_code == 200
|
|
36
|
+
|
|
37
|
+
gone = migrated_client.get(
|
|
38
|
+
f"/chat/sessions/{session_id}/messages",
|
|
39
|
+
headers=headers,
|
|
40
|
+
)
|
|
41
|
+
assert gone.status_code == 404
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_chat_complete_persists_messages(
|
|
45
|
+
migrated_client: TestClient,
|
|
46
|
+
monkeypatch,
|
|
47
|
+
) -> None:
|
|
48
|
+
monkeypatch.setenv("GEMINI_API_KEY", "test-key-for-chat-test")
|
|
49
|
+
clear_settings_cache()
|
|
50
|
+
headers = {"x-user-id": "user-chat-complete"}
|
|
51
|
+
sid = migrated_client.post("/chat/sessions", json={}, headers=headers).json()[
|
|
52
|
+
"id"
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
async def fake_retrieval(*_args, **_kwargs):
|
|
56
|
+
return {
|
|
57
|
+
"candidates": [],
|
|
58
|
+
"context": {
|
|
59
|
+
"combined_text": "stub context",
|
|
60
|
+
"sections": [],
|
|
61
|
+
"media": [],
|
|
62
|
+
},
|
|
63
|
+
"embedding_model": "stub-model",
|
|
64
|
+
"vector_candidates_considered": 0,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async def fake_gemini(*_args, **_kwargs):
|
|
68
|
+
return ("stub assistant reply", "model")
|
|
69
|
+
|
|
70
|
+
with (
|
|
71
|
+
patch("app.routers.chat.run_retrieval", side_effect=fake_retrieval),
|
|
72
|
+
patch("app.routers.chat.gemini_chat_reply", side_effect=fake_gemini),
|
|
73
|
+
):
|
|
74
|
+
complete = migrated_client.post(
|
|
75
|
+
f"/chat/sessions/{sid}/complete",
|
|
76
|
+
json={"message": "hello from test"},
|
|
77
|
+
headers=headers,
|
|
78
|
+
)
|
|
79
|
+
assert complete.status_code == 200
|
|
80
|
+
body = complete.json()
|
|
81
|
+
assert body["reply"] == "stub assistant reply"
|
|
82
|
+
assert body["reply_source"] == "model"
|
|
83
|
+
|
|
84
|
+
rows = migrated_client.get(
|
|
85
|
+
f"/chat/sessions/{sid}/messages",
|
|
86
|
+
headers=headers,
|
|
87
|
+
).json()
|
|
88
|
+
assert len(rows) == 2
|
|
89
|
+
assert rows[0]["role"] == "user"
|
|
90
|
+
assert rows[0]["content"] == "hello from test"
|
|
91
|
+
assert rows[1]["role"] == "assistant"
|
|
92
|
+
assert rows[1]["content"] == "stub assistant reply"
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sqlite3
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
|
|
7
|
+
from fastapi.testclient import TestClient
|
|
8
|
+
|
|
9
|
+
from app.config import get_settings
|
|
10
|
+
from app.services.chunking.splitters import (
|
|
11
|
+
extract_thread_message_items,
|
|
12
|
+
structure_chunks_for_text_block,
|
|
13
|
+
thread_chunks_from_envelope,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_markdown_headings_produce_multiple_chunks() -> None:
|
|
18
|
+
text = "# Intro\n\nFirst para.\n\n## Details\n\nSecond para."
|
|
19
|
+
chunks = structure_chunks_for_text_block(
|
|
20
|
+
text,
|
|
21
|
+
2,
|
|
22
|
+
use_llm=False,
|
|
23
|
+
llm_sections=None,
|
|
24
|
+
)
|
|
25
|
+
assert len(chunks) >= 2
|
|
26
|
+
assert all(c.start_block_ordinal == 2 for c in chunks)
|
|
27
|
+
assert chunks[0].meta.get("strategy") == "markdown_section"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_paragraph_split_default() -> None:
|
|
31
|
+
text = "One block.\n\nTwo block.\n\nThree."
|
|
32
|
+
chunks = structure_chunks_for_text_block(
|
|
33
|
+
text,
|
|
34
|
+
0,
|
|
35
|
+
use_llm=False,
|
|
36
|
+
llm_sections=None,
|
|
37
|
+
)
|
|
38
|
+
assert len(chunks) == 3
|
|
39
|
+
assert chunks[1].meta.get("strategy") == "paragraph"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_weak_structure_single_chunk_without_llm() -> None:
|
|
43
|
+
text = "x" * 500
|
|
44
|
+
chunks = structure_chunks_for_text_block(
|
|
45
|
+
text,
|
|
46
|
+
1,
|
|
47
|
+
use_llm=False,
|
|
48
|
+
llm_sections=None,
|
|
49
|
+
)
|
|
50
|
+
assert len(chunks) == 1
|
|
51
|
+
assert chunks[0].meta.get("strategy") == "fallback_single"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_llm_sections_used_when_provided() -> None:
|
|
55
|
+
chunks = structure_chunks_for_text_block(
|
|
56
|
+
"x" * 900,
|
|
57
|
+
0,
|
|
58
|
+
use_llm=True,
|
|
59
|
+
llm_sections=["part a", "part b"],
|
|
60
|
+
)
|
|
61
|
+
assert len(chunks) == 2
|
|
62
|
+
assert chunks[0].meta.get("strategy") == "llm_section"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_thread_envelope_metadata() -> None:
|
|
66
|
+
env = {
|
|
67
|
+
"payload": [
|
|
68
|
+
{"text": "hello"},
|
|
69
|
+
{"text": "world"},
|
|
70
|
+
],
|
|
71
|
+
"metadata": {"thread_id": "thr-9"},
|
|
72
|
+
}
|
|
73
|
+
drafts = thread_chunks_from_envelope(env)
|
|
74
|
+
assert len(drafts) == 2
|
|
75
|
+
assert drafts[0].meta.get("thread_id") == "thr-9"
|
|
76
|
+
assert drafts[1].meta.get("message_index") == 1
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_messages_key_in_payload_dict() -> None:
|
|
80
|
+
env = {
|
|
81
|
+
"payload": {
|
|
82
|
+
"messages": [{"body": "a"}, {"body": "b"}],
|
|
83
|
+
"thread_id": "abc",
|
|
84
|
+
},
|
|
85
|
+
"metadata": {},
|
|
86
|
+
}
|
|
87
|
+
items = extract_thread_message_items(env)
|
|
88
|
+
assert items is not None
|
|
89
|
+
assert len(items) == 2
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_rebuild_chunks_endpoint(migrated_client: TestClient) -> None:
|
|
93
|
+
h = {"x-user-id": "u1"}
|
|
94
|
+
r = migrated_client.post(
|
|
95
|
+
"/ingest/raw?connector=echo",
|
|
96
|
+
headers=h,
|
|
97
|
+
json={
|
|
98
|
+
"source": "s",
|
|
99
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
|
100
|
+
"content_type": "text",
|
|
101
|
+
"payload": "# Title\n\nAlpha.\n\nBeta.",
|
|
102
|
+
"metadata": {},
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
assert r.status_code == 200
|
|
106
|
+
doc_id = r.json()["document_id"]
|
|
107
|
+
|
|
108
|
+
ch = migrated_client.post(
|
|
109
|
+
f"/ingest/documents/{doc_id}/chunks",
|
|
110
|
+
headers=h,
|
|
111
|
+
json={"use_llm_weak_structure": False},
|
|
112
|
+
)
|
|
113
|
+
assert ch.status_code == 200
|
|
114
|
+
data = ch.json()
|
|
115
|
+
assert data["chunks_written"] >= 1
|
|
116
|
+
|
|
117
|
+
settings = get_settings()
|
|
118
|
+
db_path = (settings.data_dir / settings.sqlite_filename).resolve()
|
|
119
|
+
conn = sqlite3.connect(str(db_path))
|
|
120
|
+
rows = conn.execute(
|
|
121
|
+
"SELECT ordinal, start_block_ordinal, end_block_ordinal, meta "
|
|
122
|
+
"FROM document_chunks WHERE document_id = ? ORDER BY ordinal",
|
|
123
|
+
(doc_id,),
|
|
124
|
+
).fetchall()
|
|
125
|
+
conn.close()
|
|
126
|
+
assert len(rows) >= 1
|
|
127
|
+
meta0 = json.loads(rows[0][3]) if rows[0][3] else {}
|
|
128
|
+
assert meta0.get("strategy") in (
|
|
129
|
+
"markdown_section",
|
|
130
|
+
"paragraph",
|
|
131
|
+
"fallback_single",
|
|
132
|
+
)
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import sqlite3
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from unittest.mock import AsyncMock, patch
|
|
9
|
+
|
|
10
|
+
from alembic.config import Config
|
|
11
|
+
from sqlalchemy import text
|
|
12
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
13
|
+
|
|
14
|
+
from alembic import command
|
|
15
|
+
from app.config import Settings, clear_settings_cache, get_settings
|
|
16
|
+
from app.services.entities.pipeline import extract_and_store_entities_for_document
|
|
17
|
+
from app.services.entities.types import ExtractedMention
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_merge_mentions_keeps_highest_confidence() -> None:
|
|
21
|
+
from app.services.entities.pipeline import _merge_mentions
|
|
22
|
+
|
|
23
|
+
m = _merge_mentions(
|
|
24
|
+
[
|
|
25
|
+
ExtractedMention("Acme", "company", 0.5),
|
|
26
|
+
ExtractedMention("acme", "company", 0.9),
|
|
27
|
+
ExtractedMention("Acme", "person", 0.8),
|
|
28
|
+
],
|
|
29
|
+
)
|
|
30
|
+
by_type = {(x.name.lower(), x.type): x for x in m}
|
|
31
|
+
assert by_type[("acme", "company")].confidence == 0.9
|
|
32
|
+
assert ("acme", "person") in by_type
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_entity_extract_endpoint_accepts(migrated_client, monkeypatch) -> None:
|
|
36
|
+
monkeypatch.setenv("GEMINI_API_KEY", "x")
|
|
37
|
+
clear_settings_cache()
|
|
38
|
+
h = {"x-user-id": "u1"}
|
|
39
|
+
ing = migrated_client.post(
|
|
40
|
+
"/ingest/raw?connector=echo",
|
|
41
|
+
headers=h,
|
|
42
|
+
json={
|
|
43
|
+
"source": "s",
|
|
44
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
|
45
|
+
"content_type": "text",
|
|
46
|
+
"payload": "Alice at Acme in Boston",
|
|
47
|
+
"metadata": {},
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
doc_id = ing.json()["document_id"]
|
|
51
|
+
migrated_client.post(
|
|
52
|
+
f"/ingest/documents/{doc_id}/chunks",
|
|
53
|
+
headers=h,
|
|
54
|
+
json={},
|
|
55
|
+
)
|
|
56
|
+
mock_job = AsyncMock()
|
|
57
|
+
with patch(
|
|
58
|
+
"app.routers.entity_extract.run_entity_extraction_job",
|
|
59
|
+
mock_job,
|
|
60
|
+
):
|
|
61
|
+
r = migrated_client.post(
|
|
62
|
+
f"/ingest/documents/{doc_id}/entities",
|
|
63
|
+
headers=h,
|
|
64
|
+
)
|
|
65
|
+
assert r.status_code == 200
|
|
66
|
+
mock_job.assert_called_once()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_extract_persists_mentions_and_cooccurrence(tmp_path, monkeypatch) -> None:
|
|
70
|
+
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
71
|
+
clear_settings_cache()
|
|
72
|
+
|
|
73
|
+
backend = Path(__file__).resolve().parents[1]
|
|
74
|
+
prev = os.getcwd()
|
|
75
|
+
os.chdir(str(backend))
|
|
76
|
+
try:
|
|
77
|
+
command.upgrade(Config(str(backend / "alembic.ini")), "head")
|
|
78
|
+
finally:
|
|
79
|
+
os.chdir(prev)
|
|
80
|
+
clear_settings_cache()
|
|
81
|
+
|
|
82
|
+
settings = Settings()
|
|
83
|
+
engine = create_async_engine(settings.sqlalchemy_database_url)
|
|
84
|
+
factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
85
|
+
doc_id = "doc-ent-1"
|
|
86
|
+
|
|
87
|
+
async def _seed() -> None:
|
|
88
|
+
async with factory() as session:
|
|
89
|
+
await session.execute(
|
|
90
|
+
text("INSERT INTO sources (name, connector_type) VALUES ('s','s')"),
|
|
91
|
+
)
|
|
92
|
+
sid = int(
|
|
93
|
+
(
|
|
94
|
+
await session.execute(text("SELECT id FROM sources LIMIT 1"))
|
|
95
|
+
).scalar_one(),
|
|
96
|
+
)
|
|
97
|
+
await session.execute(
|
|
98
|
+
text(
|
|
99
|
+
"INSERT INTO documents "
|
|
100
|
+
"(id, source_id, timestamp, content_type, raw_content, "
|
|
101
|
+
"summary, status) VALUES "
|
|
102
|
+
"(:id, :sid, :ts, 'text', '{}', 'x', 'ok')",
|
|
103
|
+
),
|
|
104
|
+
{"id": doc_id, "sid": sid, "ts": datetime.now(UTC).isoformat()},
|
|
105
|
+
)
|
|
106
|
+
await session.execute(
|
|
107
|
+
text(
|
|
108
|
+
"INSERT INTO document_chunks "
|
|
109
|
+
"(document_id, ordinal, text, start_block_ordinal, "
|
|
110
|
+
"end_block_ordinal, meta) VALUES "
|
|
111
|
+
"(:d, 0, 'one', 0, 0, NULL)",
|
|
112
|
+
),
|
|
113
|
+
{"d": doc_id},
|
|
114
|
+
)
|
|
115
|
+
await session.execute(
|
|
116
|
+
text(
|
|
117
|
+
"INSERT INTO document_chunks "
|
|
118
|
+
"(document_id, ordinal, text, start_block_ordinal, "
|
|
119
|
+
"end_block_ordinal, meta) VALUES "
|
|
120
|
+
"(:d, 1, 'two', 0, 0, NULL)",
|
|
121
|
+
),
|
|
122
|
+
{"d": doc_id},
|
|
123
|
+
)
|
|
124
|
+
await session.commit()
|
|
125
|
+
|
|
126
|
+
asyncio.run(_seed())
|
|
127
|
+
|
|
128
|
+
async def _fake_extract(text: str, _settings: Settings):
|
|
129
|
+
if "one" in text:
|
|
130
|
+
return [
|
|
131
|
+
ExtractedMention("Person A", "person", 0.9),
|
|
132
|
+
ExtractedMention("Acme Corp", "company", 0.85),
|
|
133
|
+
]
|
|
134
|
+
return [
|
|
135
|
+
ExtractedMention("Person A", "person", 0.88),
|
|
136
|
+
ExtractedMention("Boston", "location", 0.8),
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
async def _run() -> None:
|
|
140
|
+
with patch(
|
|
141
|
+
"app.services.entities.pipeline.extract_mentions_for_chunk_text",
|
|
142
|
+
new=_fake_extract,
|
|
143
|
+
):
|
|
144
|
+
async with factory() as session:
|
|
145
|
+
nm, nc = await extract_and_store_entities_for_document(
|
|
146
|
+
session,
|
|
147
|
+
document_id=doc_id,
|
|
148
|
+
settings=get_settings(),
|
|
149
|
+
)
|
|
150
|
+
await session.commit()
|
|
151
|
+
assert nm >= 4
|
|
152
|
+
assert nc == 3
|
|
153
|
+
|
|
154
|
+
asyncio.run(_run())
|
|
155
|
+
|
|
156
|
+
db_path = (get_settings().data_dir / get_settings().sqlite_filename).resolve()
|
|
157
|
+
conn = sqlite3.connect(str(db_path))
|
|
158
|
+
mentions = conn.execute(
|
|
159
|
+
"SELECT COUNT(*) FROM entity_mentions WHERE document_id = ?",
|
|
160
|
+
(doc_id,),
|
|
161
|
+
).fetchone()[0]
|
|
162
|
+
co = conn.execute(
|
|
163
|
+
"SELECT COUNT(*) FROM entity_cooccurrence WHERE document_id = ?",
|
|
164
|
+
(doc_id,),
|
|
165
|
+
).fetchone()[0]
|
|
166
|
+
conn.close()
|
|
167
|
+
assert mentions == 4
|
|
168
|
+
assert co == 3
|
|
169
|
+
|
|
170
|
+
asyncio.run(engine.dispose())
|