business-stack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/.python-version +1 -0
  2. package/backend/.env.example +65 -0
  3. package/backend/alembic/env.py +63 -0
  4. package/backend/alembic/script.py.mako +26 -0
  5. package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
  6. package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
  7. package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
  8. package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
  9. package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
  10. package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
  11. package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
  12. package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
  13. package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
  14. package/backend/alembic.ini +42 -0
  15. package/backend/app/__init__.py +0 -0
  16. package/backend/app/config.py +337 -0
  17. package/backend/app/connectors/__init__.py +13 -0
  18. package/backend/app/connectors/base.py +39 -0
  19. package/backend/app/connectors/builtins.py +51 -0
  20. package/backend/app/connectors/playwright_session.py +146 -0
  21. package/backend/app/connectors/registry.py +68 -0
  22. package/backend/app/connectors/thread_expansion/__init__.py +33 -0
  23. package/backend/app/connectors/thread_expansion/fakes.py +154 -0
  24. package/backend/app/connectors/thread_expansion/models.py +113 -0
  25. package/backend/app/connectors/thread_expansion/reddit.py +53 -0
  26. package/backend/app/connectors/thread_expansion/twitter.py +49 -0
  27. package/backend/app/db.py +5 -0
  28. package/backend/app/dependencies.py +34 -0
  29. package/backend/app/logging_config.py +35 -0
  30. package/backend/app/main.py +97 -0
  31. package/backend/app/middleware/__init__.py +0 -0
  32. package/backend/app/middleware/gateway_identity.py +17 -0
  33. package/backend/app/middleware/openapi_gateway.py +71 -0
  34. package/backend/app/middleware/request_id.py +23 -0
  35. package/backend/app/openapi_config.py +126 -0
  36. package/backend/app/routers/__init__.py +0 -0
  37. package/backend/app/routers/admin_pipeline.py +123 -0
  38. package/backend/app/routers/chat.py +206 -0
  39. package/backend/app/routers/chunks.py +36 -0
  40. package/backend/app/routers/entity_extract.py +31 -0
  41. package/backend/app/routers/example.py +8 -0
  42. package/backend/app/routers/gemini_embed.py +58 -0
  43. package/backend/app/routers/health.py +28 -0
  44. package/backend/app/routers/ingestion.py +146 -0
  45. package/backend/app/routers/link_expansion.py +34 -0
  46. package/backend/app/routers/pipeline_status.py +304 -0
  47. package/backend/app/routers/query.py +63 -0
  48. package/backend/app/routers/vectors.py +63 -0
  49. package/backend/app/schemas/__init__.py +0 -0
  50. package/backend/app/schemas/canonical.py +44 -0
  51. package/backend/app/schemas/chat.py +50 -0
  52. package/backend/app/schemas/ingest.py +29 -0
  53. package/backend/app/schemas/query.py +153 -0
  54. package/backend/app/schemas/vectors.py +56 -0
  55. package/backend/app/services/__init__.py +0 -0
  56. package/backend/app/services/chat_store.py +152 -0
  57. package/backend/app/services/chunking/__init__.py +3 -0
  58. package/backend/app/services/chunking/llm_boundaries.py +63 -0
  59. package/backend/app/services/chunking/schemas.py +30 -0
  60. package/backend/app/services/chunking/semantic_chunk.py +178 -0
  61. package/backend/app/services/chunking/splitters.py +214 -0
  62. package/backend/app/services/embeddings/__init__.py +20 -0
  63. package/backend/app/services/embeddings/build_inputs.py +140 -0
  64. package/backend/app/services/embeddings/dlq.py +128 -0
  65. package/backend/app/services/embeddings/gemini_api.py +207 -0
  66. package/backend/app/services/embeddings/persist.py +74 -0
  67. package/backend/app/services/embeddings/types.py +32 -0
  68. package/backend/app/services/embeddings/worker.py +224 -0
  69. package/backend/app/services/entities/__init__.py +12 -0
  70. package/backend/app/services/entities/gliner_extract.py +63 -0
  71. package/backend/app/services/entities/llm_extract.py +94 -0
  72. package/backend/app/services/entities/pipeline.py +179 -0
  73. package/backend/app/services/entities/spacy_extract.py +63 -0
  74. package/backend/app/services/entities/types.py +15 -0
  75. package/backend/app/services/gemini_chat.py +113 -0
  76. package/backend/app/services/hooks/__init__.py +3 -0
  77. package/backend/app/services/hooks/post_ingest.py +186 -0
  78. package/backend/app/services/ingestion/__init__.py +0 -0
  79. package/backend/app/services/ingestion/persist.py +188 -0
  80. package/backend/app/services/integrations_remote.py +91 -0
  81. package/backend/app/services/link_expansion/__init__.py +3 -0
  82. package/backend/app/services/link_expansion/canonical_url.py +45 -0
  83. package/backend/app/services/link_expansion/domain_policy.py +26 -0
  84. package/backend/app/services/link_expansion/html_extract.py +72 -0
  85. package/backend/app/services/link_expansion/rate_limit.py +32 -0
  86. package/backend/app/services/link_expansion/robots.py +46 -0
  87. package/backend/app/services/link_expansion/schemas.py +67 -0
  88. package/backend/app/services/link_expansion/worker.py +458 -0
  89. package/backend/app/services/normalization/__init__.py +7 -0
  90. package/backend/app/services/normalization/normalizer.py +331 -0
  91. package/backend/app/services/normalization/persist_normalized.py +67 -0
  92. package/backend/app/services/playwright_extract/__init__.py +13 -0
  93. package/backend/app/services/playwright_extract/__main__.py +96 -0
  94. package/backend/app/services/playwright_extract/extract.py +181 -0
  95. package/backend/app/services/retrieval_service.py +351 -0
  96. package/backend/app/sqlite_ext.py +36 -0
  97. package/backend/app/storage/__init__.py +3 -0
  98. package/backend/app/storage/blobs.py +30 -0
  99. package/backend/app/vectorstore/__init__.py +13 -0
  100. package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
  101. package/backend/backend.egg-info/PKG-INFO +18 -0
  102. package/backend/backend.egg-info/SOURCES.txt +93 -0
  103. package/backend/backend.egg-info/dependency_links.txt +1 -0
  104. package/backend/backend.egg-info/entry_points.txt +2 -0
  105. package/backend/backend.egg-info/requires.txt +15 -0
  106. package/backend/backend.egg-info/top_level.txt +4 -0
  107. package/backend/package.json +15 -0
  108. package/backend/pyproject.toml +52 -0
  109. package/backend/tests/conftest.py +40 -0
  110. package/backend/tests/test_chat.py +92 -0
  111. package/backend/tests/test_chunking.py +132 -0
  112. package/backend/tests/test_entities.py +170 -0
  113. package/backend/tests/test_gemini_embed.py +224 -0
  114. package/backend/tests/test_health.py +24 -0
  115. package/backend/tests/test_ingest_raw.py +123 -0
  116. package/backend/tests/test_link_expansion.py +241 -0
  117. package/backend/tests/test_main.py +12 -0
  118. package/backend/tests/test_normalizer.py +114 -0
  119. package/backend/tests/test_openapi_gateway.py +40 -0
  120. package/backend/tests/test_pipeline_hardening.py +285 -0
  121. package/backend/tests/test_pipeline_status.py +71 -0
  122. package/backend/tests/test_playwright_extract.py +80 -0
  123. package/backend/tests/test_post_ingest_hooks.py +162 -0
  124. package/backend/tests/test_query.py +165 -0
  125. package/backend/tests/test_thread_expansion.py +72 -0
  126. package/backend/tests/test_vectors.py +85 -0
  127. package/backend/uv.lock +1839 -0
  128. package/bin/business-stack.cjs +412 -0
  129. package/frontend/web/.env.example +23 -0
  130. package/frontend/web/AGENTS.md +5 -0
  131. package/frontend/web/CLAUDE.md +1 -0
  132. package/frontend/web/README.md +36 -0
  133. package/frontend/web/components.json +25 -0
  134. package/frontend/web/next-env.d.ts +6 -0
  135. package/frontend/web/next.config.ts +30 -0
  136. package/frontend/web/package.json +65 -0
  137. package/frontend/web/postcss.config.mjs +7 -0
  138. package/frontend/web/skills-lock.json +35 -0
  139. package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
  140. package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
  141. package/frontend/web/src/app/chat/page.tsx +725 -0
  142. package/frontend/web/src/app/favicon.ico +0 -0
  143. package/frontend/web/src/app/globals.css +563 -0
  144. package/frontend/web/src/app/layout.tsx +50 -0
  145. package/frontend/web/src/app/page.tsx +96 -0
  146. package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
  147. package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
  148. package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
  149. package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
  150. package/frontend/web/src/components/home-auth-panel.tsx +49 -0
  151. package/frontend/web/src/components/providers.tsx +50 -0
  152. package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
  153. package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
  154. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
  155. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
  156. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
  157. package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
  158. package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
  159. package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
  160. package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
  161. package/frontend/web/src/lib/auth-client.ts +23 -0
  162. package/frontend/web/src/lib/integrations-config.ts +125 -0
  163. package/frontend/web/src/lib/ui-utills.tsx +90 -0
  164. package/frontend/web/src/lib/utils.ts +6 -0
  165. package/frontend/web/tsconfig.json +36 -0
  166. package/frontend/web/tsconfig.tsbuildinfo +1 -0
  167. package/frontend/web/vitest.config.ts +14 -0
  168. package/gateway/.env.example +23 -0
  169. package/gateway/README.md +13 -0
  170. package/gateway/package.json +24 -0
  171. package/gateway/src/auth.ts +49 -0
  172. package/gateway/src/index.ts +141 -0
  173. package/gateway/src/integrations/admin.ts +19 -0
  174. package/gateway/src/integrations/crypto.ts +52 -0
  175. package/gateway/src/integrations/handlers.ts +124 -0
  176. package/gateway/src/integrations/keys.ts +12 -0
  177. package/gateway/src/integrations/store.ts +106 -0
  178. package/gateway/src/stack-secrets.ts +35 -0
  179. package/gateway/tsconfig.json +13 -0
  180. package/package.json +33 -0
  181. package/turbo.json +27 -0
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
8
+ from unittest.mock import AsyncMock, MagicMock, patch
9
+
10
+ from alembic.config import Config
11
+ from sqlalchemy import text
12
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
13
+
14
+ from alembic import command
15
+ from app.config import Settings, clear_settings_cache, get_settings
16
+ from app.services.hooks.post_ingest import (
17
+ _apply_template,
18
+ _truncate,
19
+ dispatch_post_ingest_hooks,
20
+ )
21
+
22
+
23
+ def test_apply_template_missing_key_preserved() -> None:
24
+ s = _apply_template("Hi {name} {missing}", {"name": "x"})
25
+ assert "x" in s
26
+ assert "{missing}" in s
27
+
28
+
29
+ def test_truncate() -> None:
30
+ assert _truncate("abc", 2) == "a…"
31
+
32
+
33
+ async def _seed_ok_doc(session: AsyncSession) -> str:
34
+ await session.execute(
35
+ text("INSERT INTO sources (name, connector_type) VALUES ('src1', 't')"),
36
+ )
37
+ sid = int(
38
+ (await session.execute(text("SELECT id FROM sources LIMIT 1"))).scalar_one(),
39
+ )
40
+ doc_id = "hook-doc-1"
41
+ await session.execute(
42
+ text(
43
+ "INSERT INTO documents "
44
+ "(id, source_id, timestamp, content_type, raw_content, summary, status, "
45
+ "canonical_url, external_id, dedupe_content_hash, normalization_error) "
46
+ "VALUES (:id, :sid, :ts, 'text', '{}', :sum, 'ok', :cu, NULL, NULL, NULL)",
47
+ ),
48
+ {
49
+ "id": doc_id,
50
+ "sid": sid,
51
+ "ts": datetime.now(UTC).isoformat(),
52
+ "sum": "hello " * 50,
53
+ "cu": "https://example.com/page",
54
+ },
55
+ )
56
+ await session.execute(
57
+ text(
58
+ "INSERT INTO document_links (document_id, url, ordinal) "
59
+ "VALUES (:d, 'https://fallback.test/', 0)",
60
+ ),
61
+ {"d": doc_id},
62
+ )
63
+ await session.commit()
64
+ return doc_id
65
+
66
+
67
+ def test_dispatch_slack_and_discord_no_secret_logs(
68
+ tmp_path,
69
+ monkeypatch,
70
+ caplog,
71
+ ) -> None:
72
+ monkeypatch.setenv("DATA_DIR", str(tmp_path))
73
+ monkeypatch.setenv(
74
+ "POST_INGEST_SLACK_WEBHOOK_URL",
75
+ "https://hooks.slack.com/services/SECRET/SHOULD/NOTLOG",
76
+ )
77
+ monkeypatch.setenv(
78
+ "POST_INGEST_DISCORD_WEBHOOK_URL",
79
+ "https://discord.com/api/webhooks/SECRET/NOTLOG",
80
+ )
81
+ clear_settings_cache()
82
+
83
+ backend = Path(__file__).resolve().parents[1]
84
+ prev = os.getcwd()
85
+ os.chdir(str(backend))
86
+ try:
87
+ command.upgrade(Config(str(backend / "alembic.ini")), "head")
88
+ finally:
89
+ os.chdir(prev)
90
+ clear_settings_cache()
91
+
92
+ settings = Settings()
93
+ engine = create_async_engine(settings.sqlalchemy_database_url)
94
+ factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
95
+
96
+ async def _setup() -> str:
97
+ async with factory() as session:
98
+ return await _seed_ok_doc(session)
99
+
100
+ doc_id = asyncio.run(_setup())
101
+
102
+ caplog.set_level(logging.INFO, logger="app.services.hooks.post_ingest")
103
+
104
+ mock_resp = MagicMock()
105
+ mock_resp.raise_for_status = MagicMock()
106
+ client_inst = MagicMock()
107
+ client_inst.post = AsyncMock(return_value=mock_resp)
108
+ client_inst.__aenter__ = AsyncMock(return_value=client_inst)
109
+ client_inst.__aexit__ = AsyncMock(return_value=False)
110
+
111
+ with patch(
112
+ "app.services.hooks.post_ingest.httpx.AsyncClient",
113
+ return_value=client_inst,
114
+ ):
115
+ asyncio.run(
116
+ dispatch_post_ingest_hooks(
117
+ factory,
118
+ document_id=doc_id,
119
+ settings=get_settings(),
120
+ ),
121
+ )
122
+
123
+ assert client_inst.post.await_count == 2
124
+
125
+ def _call_url(c: object) -> str:
126
+ args, kwargs = c.args, c.kwargs
127
+ if args:
128
+ return str(args[0])
129
+ return str(kwargs.get("url", ""))
130
+
131
+ calls = list(client_inst.post.await_args_list)
132
+ urls = [_call_url(c) for c in calls]
133
+ assert any("hooks.slack.com" in u for u in urls)
134
+ assert any("discord.com" in u for u in urls)
135
+ slack_call = next(c for c in calls if "slack" in _call_url(c))
136
+ slack_json = slack_call.kwargs["json"]
137
+ assert "hello" in slack_json["text"]
138
+ assert "example.com/page" in slack_json["text"]
139
+
140
+ log_text = caplog.text
141
+ assert "SECRET" not in log_text
142
+ assert "NOTLOG" not in log_text
143
+ assert "hooks.slack.com" not in log_text.lower()
144
+
145
+ asyncio.run(engine.dispose())
146
+
147
+
148
+ def test_dispatch_no_webhooks_is_noop(tmp_path, monkeypatch) -> None:
149
+ monkeypatch.setenv("DATA_DIR", str(tmp_path))
150
+ monkeypatch.delenv("POST_INGEST_SLACK_WEBHOOK_URL", raising=False)
151
+ monkeypatch.delenv("POST_INGEST_DISCORD_WEBHOOK_URL", raising=False)
152
+ clear_settings_cache()
153
+
154
+ with patch("app.services.hooks.post_ingest.httpx.AsyncClient") as ac:
155
+ asyncio.run(
156
+ dispatch_post_ingest_hooks(
157
+ MagicMock(),
158
+ document_id="x",
159
+ settings=get_settings(),
160
+ ),
161
+ )
162
+ ac.assert_not_called()
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
+ from datetime import UTC, datetime
5
+ from unittest.mock import AsyncMock, patch
6
+
7
+ from fastapi.testclient import TestClient
8
+
9
+ from app.config import clear_settings_cache, get_settings
10
+ from app.schemas.query import QueryFiltersPayload
11
+ from app.services.retrieval_service import (
12
+ blended_retrieval_score,
13
+ recency_score_from_ingested_at,
14
+ semantic_score_from_distance,
15
+ source_weight_for_connector,
16
+ )
17
+
18
+ _DIM = 1536
19
+
20
+
21
+ def _vec(first: float = 1.0) -> list[float]:
22
+ v = [0.0] * _DIM
23
+ v[0] = first
24
+ return v
25
+
26
+
27
+ def test_query_requires_gateway(migrated_client: TestClient) -> None:
28
+ r = migrated_client.post("/query", json={"query": "hi", "k": 5})
29
+ assert r.status_code == 401
30
+
31
+
32
+ def test_semantic_score_decreases_with_distance() -> None:
33
+ assert semantic_score_from_distance(0.0) > semantic_score_from_distance(2.0)
34
+
35
+
36
+ def test_recency_newer_higher() -> None:
37
+ now = datetime(2026, 6, 15, tzinfo=UTC)
38
+ old = recency_score_from_ingested_at(
39
+ "2020-01-01T00:00:00Z",
40
+ now=now,
41
+ half_life_days=30.0,
42
+ )
43
+ new = recency_score_from_ingested_at(
44
+ "2026-06-10T00:00:00Z",
45
+ now=now,
46
+ half_life_days=30.0,
47
+ )
48
+ assert new > old
49
+
50
+
51
+ def test_source_weight_default() -> None:
52
+ w = {"default": 0.5, "web": 0.9}
53
+ assert source_weight_for_connector("missing", w) == 0.5
54
+ assert source_weight_for_connector("web", w) == 0.9
55
+
56
+
57
+ def test_blended_score() -> None:
58
+ from app.config import Settings
59
+
60
+ s = Settings()
61
+ score = blended_retrieval_score(
62
+ semantic=1.0,
63
+ recency=1.0,
64
+ source_w=1.0,
65
+ settings=s,
66
+ )
67
+ assert abs(score - 1.0) < 1e-6
68
+
69
+
70
+ def test_filters_newer_than_days_sets_timestamp_min() -> None:
71
+ now = datetime(2026, 1, 20, 12, 0, 0, tzinfo=UTC)
72
+ f = QueryFiltersPayload(newer_than_days=10.0)
73
+ vf = f.to_vector_filters(now=now)
74
+ assert vf is not None
75
+ assert vf.timestamp_min is not None
76
+ assert vf.timestamp_max is None
77
+
78
+
79
+ def test_vector_filters_merge_timestamp_min() -> None:
80
+ now = datetime(2026, 1, 20, tzinfo=UTC)
81
+ f = QueryFiltersPayload(
82
+ timestamp_min="2026-01-01T00:00:00Z",
83
+ newer_than_days=30.0,
84
+ )
85
+ vf = f.to_vector_filters(now=now)
86
+ assert vf is not None
87
+ # Later (stricter) bound should win
88
+ assert vf.timestamp_min == "2026-01-01T00:00:00Z"
89
+
90
+
91
+ def test_query_retrieves_chunk(migrated_client: TestClient, monkeypatch) -> None:
92
+ monkeypatch.setenv("GEMINI_API_KEY", "test-key")
93
+ clear_settings_cache()
94
+
95
+ h = {"x-user-id": "u1"}
96
+ settings = get_settings()
97
+ db_path = settings.data_dir / settings.sqlite_filename
98
+ conn = sqlite3.connect(str(db_path))
99
+ cur = conn.cursor()
100
+ cur.execute(
101
+ "INSERT INTO sources (name, connector_type) VALUES ('n', 'web')",
102
+ )
103
+ sid = int(cur.lastrowid)
104
+ cur.execute(
105
+ "INSERT INTO documents (id, source_id, timestamp, content_type, raw_content, "
106
+ "summary, status) VALUES ('d1', ?, '2024-01-01T00:00:00Z', 'text', '{}', "
107
+ "'summary one', 'ok')",
108
+ (sid,),
109
+ )
110
+ cur.execute(
111
+ "INSERT INTO document_chunks (document_id, ordinal, text, start_block_ordinal, "
112
+ "end_block_ordinal, meta) VALUES ('d1', 0, 'alpha beta gamma', 0, 0, NULL)",
113
+ )
114
+ cid = int(cur.lastrowid)
115
+ conn.commit()
116
+ conn.close()
117
+
118
+ ts = "2026-01-10T12:00:00Z"
119
+ up = migrated_client.post(
120
+ "/vectors/upsert",
121
+ headers=h,
122
+ json={
123
+ "embeddings": [_vec(1.0)],
124
+ "metas": [
125
+ {
126
+ "document_id": "d1",
127
+ "chunk_id": cid,
128
+ "source_id": sid,
129
+ "modality": "text",
130
+ "ingested_at": ts,
131
+ },
132
+ ],
133
+ },
134
+ )
135
+ assert up.status_code == 200
136
+
137
+ async def _fake_batch(*args, **kwargs):
138
+ return [_vec(1.0)]
139
+
140
+ with patch(
141
+ "app.services.retrieval_service.batch_embed_contents",
142
+ new=AsyncMock(side_effect=_fake_batch),
143
+ ):
144
+ q = migrated_client.post(
145
+ "/query",
146
+ headers=h,
147
+ json={"query": "alpha beta", "k": 5},
148
+ )
149
+ assert q.status_code == 200
150
+ data = q.json()
151
+ assert len(data["candidates"]) == 1
152
+ c0 = data["candidates"][0]
153
+ assert c0["chunk"]["text"] == "alpha beta gamma"
154
+ assert c0["document"]["summary"] == "summary one"
155
+ assert c0["attribution"]["document_id"] == "d1"
156
+ assert "combined_text" in data["context"]
157
+ assert "d1" in data["context"]["combined_text"]
158
+
159
+
160
+ def test_query_503_without_gemini_key(migrated_client: TestClient, monkeypatch) -> None:
161
+ monkeypatch.delenv("GEMINI_API_KEY", raising=False)
162
+ clear_settings_cache()
163
+ h = {"x-user-id": "u1"}
164
+ r = migrated_client.post("/query", headers=h, json={"query": "x", "k": 1})
165
+ assert r.status_code == 503
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from datetime import UTC, datetime
5
+
6
+ import pytest
7
+
8
+ from app.connectors.thread_expansion import (
9
+ FakeRedditThreadExpansionFetcher,
10
+ FakeTwitterThreadExpansionFetcher,
11
+ ThreadExpansionResult,
12
+ ThreadRelationshipEdge,
13
+ )
14
+ from app.connectors.thread_expansion.models import ThreadDocumentNode
15
+
16
+
17
+ def test_thread_edge_rejects_self_loop() -> None:
18
+ with pytest.raises(ValueError, match="must differ"):
19
+ ThreadRelationshipEdge(
20
+ parent_document_id="a",
21
+ child_document_id="a",
22
+ relation_type="reply",
23
+ )
24
+
25
+
26
+ def test_expansion_result_requires_root_in_nodes() -> None:
27
+ with pytest.raises(ValueError, match="root_document_id"):
28
+ ThreadExpansionResult(
29
+ platform="twitter",
30
+ thread_key="k",
31
+ root_document_id="missing",
32
+ nodes=[
33
+ ThreadDocumentNode(
34
+ document_id="x",
35
+ external_id="1",
36
+ text="t",
37
+ created_at=datetime.now(UTC),
38
+ ),
39
+ ],
40
+ edges=[],
41
+ )
42
+
43
+
44
+ def test_fake_twitter_linear_reply_chain() -> None:
45
+ f = FakeTwitterThreadExpansionFetcher()
46
+ r = asyncio.run(f.fetch_full_thread("1999", config={}))
47
+ assert r.platform == "twitter"
48
+ assert r.root_document_id == "twitter:tw_root"
49
+ reply_edges = [e for e in r.edges if e.relation_type == "reply"]
50
+ assert len(reply_edges) == 2
51
+ parents = {e.child_document_id: e.parent_document_id for e in reply_edges}
52
+ assert parents["twitter:tw_r1"] == "twitter:tw_root"
53
+ assert parents["twitter:tw_r2"] == "twitter:tw_r1"
54
+
55
+
56
+ def test_fake_reddit_post_and_nested_comment() -> None:
57
+ f = FakeRedditThreadExpansionFetcher()
58
+ r = asyncio.run(f.fetch_post_and_comments("t3_abc123", config={}))
59
+ assert r.platform == "reddit"
60
+ assert len(r.nodes) == 3
61
+ assert any(
62
+ e.parent_document_id == "reddit:t3_abc123"
63
+ and e.child_document_id == "reddit:t1_cmnt1"
64
+ for e in r.edges
65
+ )
66
+
67
+
68
+ def test_fakes_run_sync_helper() -> None:
69
+ """Ensure asyncio.run works for quick local scripts."""
70
+ f = FakeTwitterThreadExpansionFetcher()
71
+ r = asyncio.run(f.fetch_full_thread("1", config={}))
72
+ assert isinstance(r, ThreadExpansionResult)
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ from fastapi.testclient import TestClient
4
+
5
+ _DIM = 1536
6
+
7
+
8
+ def _vec(first: float = 1.0) -> list[float]:
9
+ v = [0.0] * _DIM
10
+ v[0] = first
11
+ return v
12
+
13
+
14
+ def test_vectors_require_gateway(migrated_client: TestClient) -> None:
15
+ r = migrated_client.post(
16
+ "/vectors/search",
17
+ json={"query_vector": _vec(), "k": 1},
18
+ )
19
+ assert r.status_code == 401
20
+
21
+
22
+ def test_vectors_upsert_search_delete(migrated_client: TestClient) -> None:
23
+ h = {"x-user-id": "u1"}
24
+ ts = "2024-06-01T12:00:00Z"
25
+
26
+ r = migrated_client.post(
27
+ "/vectors/upsert",
28
+ json={
29
+ "embeddings": [_vec(1.0), _vec(0.2)],
30
+ "metas": [
31
+ {
32
+ "document_id": "doc-a",
33
+ "chunk_id": 0,
34
+ "source_id": 10,
35
+ "modality": "text",
36
+ "ingested_at": ts,
37
+ },
38
+ {
39
+ "document_id": "doc-b",
40
+ "chunk_id": 1,
41
+ "source_id": 10,
42
+ "modality": "text",
43
+ "ingested_at": ts,
44
+ },
45
+ ],
46
+ },
47
+ headers=h,
48
+ )
49
+ assert r.status_code == 200
50
+
51
+ q = migrated_client.post(
52
+ "/vectors/search",
53
+ json={"query_vector": _vec(1.0), "k": 2},
54
+ headers=h,
55
+ )
56
+ assert q.status_code == 200
57
+ data = q.json()["results"]
58
+ assert len(data) == 2
59
+ assert data[0]["document_id"] == "doc-a"
60
+ assert data[0]["distance"] < data[1]["distance"]
61
+
62
+ f = migrated_client.post(
63
+ "/vectors/search",
64
+ json={
65
+ "query_vector": _vec(1.0),
66
+ "k": 5,
67
+ "filters": {"source_id": 10, "modality": "text"},
68
+ },
69
+ headers=h,
70
+ )
71
+ assert f.status_code == 200
72
+ assert len(f.json()["results"]) == 2
73
+
74
+ d = migrated_client.delete("/vectors/documents/doc-a", headers=h)
75
+ assert d.status_code == 200
76
+ assert d.json()["deleted"] >= 1
77
+
78
+ q2 = migrated_client.post(
79
+ "/vectors/search",
80
+ json={"query_vector": _vec(1.0), "k": 5},
81
+ headers=h,
82
+ )
83
+ ids = {row["document_id"] for row in q2.json()["results"]}
84
+ assert "doc-a" not in ids
85
+ assert "doc-b" in ids