business-stack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/.python-version +1 -0
  2. package/backend/.env.example +65 -0
  3. package/backend/alembic/env.py +63 -0
  4. package/backend/alembic/script.py.mako +26 -0
  5. package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
  6. package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
  7. package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
  8. package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
  9. package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
  10. package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
  11. package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
  12. package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
  13. package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
  14. package/backend/alembic.ini +42 -0
  15. package/backend/app/__init__.py +0 -0
  16. package/backend/app/config.py +337 -0
  17. package/backend/app/connectors/__init__.py +13 -0
  18. package/backend/app/connectors/base.py +39 -0
  19. package/backend/app/connectors/builtins.py +51 -0
  20. package/backend/app/connectors/playwright_session.py +146 -0
  21. package/backend/app/connectors/registry.py +68 -0
  22. package/backend/app/connectors/thread_expansion/__init__.py +33 -0
  23. package/backend/app/connectors/thread_expansion/fakes.py +154 -0
  24. package/backend/app/connectors/thread_expansion/models.py +113 -0
  25. package/backend/app/connectors/thread_expansion/reddit.py +53 -0
  26. package/backend/app/connectors/thread_expansion/twitter.py +49 -0
  27. package/backend/app/db.py +5 -0
  28. package/backend/app/dependencies.py +34 -0
  29. package/backend/app/logging_config.py +35 -0
  30. package/backend/app/main.py +97 -0
  31. package/backend/app/middleware/__init__.py +0 -0
  32. package/backend/app/middleware/gateway_identity.py +17 -0
  33. package/backend/app/middleware/openapi_gateway.py +71 -0
  34. package/backend/app/middleware/request_id.py +23 -0
  35. package/backend/app/openapi_config.py +126 -0
  36. package/backend/app/routers/__init__.py +0 -0
  37. package/backend/app/routers/admin_pipeline.py +123 -0
  38. package/backend/app/routers/chat.py +206 -0
  39. package/backend/app/routers/chunks.py +36 -0
  40. package/backend/app/routers/entity_extract.py +31 -0
  41. package/backend/app/routers/example.py +8 -0
  42. package/backend/app/routers/gemini_embed.py +58 -0
  43. package/backend/app/routers/health.py +28 -0
  44. package/backend/app/routers/ingestion.py +146 -0
  45. package/backend/app/routers/link_expansion.py +34 -0
  46. package/backend/app/routers/pipeline_status.py +304 -0
  47. package/backend/app/routers/query.py +63 -0
  48. package/backend/app/routers/vectors.py +63 -0
  49. package/backend/app/schemas/__init__.py +0 -0
  50. package/backend/app/schemas/canonical.py +44 -0
  51. package/backend/app/schemas/chat.py +50 -0
  52. package/backend/app/schemas/ingest.py +29 -0
  53. package/backend/app/schemas/query.py +153 -0
  54. package/backend/app/schemas/vectors.py +56 -0
  55. package/backend/app/services/__init__.py +0 -0
  56. package/backend/app/services/chat_store.py +152 -0
  57. package/backend/app/services/chunking/__init__.py +3 -0
  58. package/backend/app/services/chunking/llm_boundaries.py +63 -0
  59. package/backend/app/services/chunking/schemas.py +30 -0
  60. package/backend/app/services/chunking/semantic_chunk.py +178 -0
  61. package/backend/app/services/chunking/splitters.py +214 -0
  62. package/backend/app/services/embeddings/__init__.py +20 -0
  63. package/backend/app/services/embeddings/build_inputs.py +140 -0
  64. package/backend/app/services/embeddings/dlq.py +128 -0
  65. package/backend/app/services/embeddings/gemini_api.py +207 -0
  66. package/backend/app/services/embeddings/persist.py +74 -0
  67. package/backend/app/services/embeddings/types.py +32 -0
  68. package/backend/app/services/embeddings/worker.py +224 -0
  69. package/backend/app/services/entities/__init__.py +12 -0
  70. package/backend/app/services/entities/gliner_extract.py +63 -0
  71. package/backend/app/services/entities/llm_extract.py +94 -0
  72. package/backend/app/services/entities/pipeline.py +179 -0
  73. package/backend/app/services/entities/spacy_extract.py +63 -0
  74. package/backend/app/services/entities/types.py +15 -0
  75. package/backend/app/services/gemini_chat.py +113 -0
  76. package/backend/app/services/hooks/__init__.py +3 -0
  77. package/backend/app/services/hooks/post_ingest.py +186 -0
  78. package/backend/app/services/ingestion/__init__.py +0 -0
  79. package/backend/app/services/ingestion/persist.py +188 -0
  80. package/backend/app/services/integrations_remote.py +91 -0
  81. package/backend/app/services/link_expansion/__init__.py +3 -0
  82. package/backend/app/services/link_expansion/canonical_url.py +45 -0
  83. package/backend/app/services/link_expansion/domain_policy.py +26 -0
  84. package/backend/app/services/link_expansion/html_extract.py +72 -0
  85. package/backend/app/services/link_expansion/rate_limit.py +32 -0
  86. package/backend/app/services/link_expansion/robots.py +46 -0
  87. package/backend/app/services/link_expansion/schemas.py +67 -0
  88. package/backend/app/services/link_expansion/worker.py +458 -0
  89. package/backend/app/services/normalization/__init__.py +7 -0
  90. package/backend/app/services/normalization/normalizer.py +331 -0
  91. package/backend/app/services/normalization/persist_normalized.py +67 -0
  92. package/backend/app/services/playwright_extract/__init__.py +13 -0
  93. package/backend/app/services/playwright_extract/__main__.py +96 -0
  94. package/backend/app/services/playwright_extract/extract.py +181 -0
  95. package/backend/app/services/retrieval_service.py +351 -0
  96. package/backend/app/sqlite_ext.py +36 -0
  97. package/backend/app/storage/__init__.py +3 -0
  98. package/backend/app/storage/blobs.py +30 -0
  99. package/backend/app/vectorstore/__init__.py +13 -0
  100. package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
  101. package/backend/backend.egg-info/PKG-INFO +18 -0
  102. package/backend/backend.egg-info/SOURCES.txt +93 -0
  103. package/backend/backend.egg-info/dependency_links.txt +1 -0
  104. package/backend/backend.egg-info/entry_points.txt +2 -0
  105. package/backend/backend.egg-info/requires.txt +15 -0
  106. package/backend/backend.egg-info/top_level.txt +4 -0
  107. package/backend/package.json +15 -0
  108. package/backend/pyproject.toml +52 -0
  109. package/backend/tests/conftest.py +40 -0
  110. package/backend/tests/test_chat.py +92 -0
  111. package/backend/tests/test_chunking.py +132 -0
  112. package/backend/tests/test_entities.py +170 -0
  113. package/backend/tests/test_gemini_embed.py +224 -0
  114. package/backend/tests/test_health.py +24 -0
  115. package/backend/tests/test_ingest_raw.py +123 -0
  116. package/backend/tests/test_link_expansion.py +241 -0
  117. package/backend/tests/test_main.py +12 -0
  118. package/backend/tests/test_normalizer.py +114 -0
  119. package/backend/tests/test_openapi_gateway.py +40 -0
  120. package/backend/tests/test_pipeline_hardening.py +285 -0
  121. package/backend/tests/test_pipeline_status.py +71 -0
  122. package/backend/tests/test_playwright_extract.py +80 -0
  123. package/backend/tests/test_post_ingest_hooks.py +162 -0
  124. package/backend/tests/test_query.py +165 -0
  125. package/backend/tests/test_thread_expansion.py +72 -0
  126. package/backend/tests/test_vectors.py +85 -0
  127. package/backend/uv.lock +1839 -0
  128. package/bin/business-stack.cjs +412 -0
  129. package/frontend/web/.env.example +23 -0
  130. package/frontend/web/AGENTS.md +5 -0
  131. package/frontend/web/CLAUDE.md +1 -0
  132. package/frontend/web/README.md +36 -0
  133. package/frontend/web/components.json +25 -0
  134. package/frontend/web/next-env.d.ts +6 -0
  135. package/frontend/web/next.config.ts +30 -0
  136. package/frontend/web/package.json +65 -0
  137. package/frontend/web/postcss.config.mjs +7 -0
  138. package/frontend/web/skills-lock.json +35 -0
  139. package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
  140. package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
  141. package/frontend/web/src/app/chat/page.tsx +725 -0
  142. package/frontend/web/src/app/favicon.ico +0 -0
  143. package/frontend/web/src/app/globals.css +563 -0
  144. package/frontend/web/src/app/layout.tsx +50 -0
  145. package/frontend/web/src/app/page.tsx +96 -0
  146. package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
  147. package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
  148. package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
  149. package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
  150. package/frontend/web/src/components/home-auth-panel.tsx +49 -0
  151. package/frontend/web/src/components/providers.tsx +50 -0
  152. package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
  153. package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
  154. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
  155. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
  156. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
  157. package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
  158. package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
  159. package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
  160. package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
  161. package/frontend/web/src/lib/auth-client.ts +23 -0
  162. package/frontend/web/src/lib/integrations-config.ts +125 -0
  163. package/frontend/web/src/lib/ui-utills.tsx +90 -0
  164. package/frontend/web/src/lib/utils.ts +6 -0
  165. package/frontend/web/tsconfig.json +36 -0
  166. package/frontend/web/tsconfig.tsbuildinfo +1 -0
  167. package/frontend/web/vitest.config.ts +14 -0
  168. package/gateway/.env.example +23 -0
  169. package/gateway/README.md +13 -0
  170. package/gateway/package.json +24 -0
  171. package/gateway/src/auth.ts +49 -0
  172. package/gateway/src/index.ts +141 -0
  173. package/gateway/src/integrations/admin.ts +19 -0
  174. package/gateway/src/integrations/crypto.ts +52 -0
  175. package/gateway/src/integrations/handlers.ts +124 -0
  176. package/gateway/src/integrations/keys.ts +12 -0
  177. package/gateway/src/integrations/store.ts +106 -0
  178. package/gateway/src/stack-secrets.ts +35 -0
  179. package/gateway/tsconfig.json +13 -0
  180. package/package.json +33 -0
  181. package/turbo.json +27 -0
@@ -0,0 +1,20 @@
1
+ """
2
+ Gemini embedding generation (single- or multi-modal, batched).
3
+
4
+ Public entrypoints:
5
+
6
+ - ``batch_embed_contents`` — low-level API batch (list of interleaved part lists).
7
+
8
+ Import ``run_embed_document_job`` / ``embed_document_gemini`` from
9
+ ``app.services.embeddings.worker`` to avoid circular imports with the entity pipeline.
10
+ """
11
+
12
+ from app.services.embeddings.gemini_api import batch_embed_contents
13
+ from app.services.embeddings.types import EmbeddingPart, InlineDataPart, TextPart
14
+
15
+ __all__ = [
16
+ "EmbeddingPart",
17
+ "InlineDataPart",
18
+ "TextPart",
19
+ "batch_embed_contents",
20
+ ]
@@ -0,0 +1,140 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import dataclass
6
+
7
+ from sqlalchemy import text
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from app.services.embeddings.types import EmbeddingPart, InlineDataPart, TextPart
11
+ from app.services.normalization.normalizer import parse_blob_sha256
12
+ from app.storage.blobs import BlobStore
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass(frozen=True, slots=True)
18
+ class ChunkRow:
19
+ id: int
20
+ ordinal: int
21
+ text: str
22
+ start_block_ordinal: int
23
+ end_block_ordinal: int
24
+ meta_raw: str | None
25
+
26
+
27
+ @dataclass(frozen=True, slots=True)
28
+ class BlockRow:
29
+ ordinal: int
30
+ type: str
31
+ meta: dict
32
+ storage_uri: str | None
33
+ sha256: str | None
34
+ mime: str | None
35
+
36
+
37
+ async def load_document_chunks(
38
+ session: AsyncSession,
39
+ document_id: str,
40
+ ) -> list[ChunkRow]:
41
+ r = await session.execute(
42
+ text(
43
+ "SELECT id, ordinal, text, start_block_ordinal, end_block_ordinal, meta "
44
+ "FROM document_chunks WHERE document_id = :d ORDER BY ordinal",
45
+ ),
46
+ {"d": document_id},
47
+ )
48
+ rows = r.fetchall()
49
+ return [
50
+ ChunkRow(
51
+ id=int(row[0]),
52
+ ordinal=int(row[1]),
53
+ text=str(row[2]),
54
+ start_block_ordinal=int(row[3]),
55
+ end_block_ordinal=int(row[4]),
56
+ meta_raw=row[5] if row[5] else None,
57
+ )
58
+ for row in rows
59
+ ]
60
+
61
+
62
+ async def load_blocks_span(
63
+ session: AsyncSession,
64
+ document_id: str,
65
+ start_ord: int,
66
+ end_ord: int,
67
+ ) -> list[BlockRow]:
68
+ r = await session.execute(
69
+ text(
70
+ "SELECT ordinal, type, meta, storage_uri, sha256, mime "
71
+ "FROM content_blocks WHERE document_id = :d "
72
+ "AND ordinal >= :s AND ordinal <= :e "
73
+ "ORDER BY ordinal",
74
+ ),
75
+ {"d": document_id, "s": start_ord, "e": end_ord},
76
+ )
77
+ out: list[BlockRow] = []
78
+ for row in r.fetchall():
79
+ meta: dict = {}
80
+ if row[2]:
81
+ try:
82
+ p = json.loads(row[2])
83
+ if isinstance(p, dict):
84
+ meta = p
85
+ except json.JSONDecodeError:
86
+ pass
87
+ out.append(
88
+ BlockRow(
89
+ ordinal=int(row[0]),
90
+ type=str(row[1]),
91
+ meta=meta,
92
+ storage_uri=row[3] if row[3] else None,
93
+ sha256=row[4] if row[4] else None,
94
+ mime=row[5] if row[5] else None,
95
+ ),
96
+ )
97
+ return out
98
+
99
+
100
+ def chunk_to_text_parts(chunk: ChunkRow) -> list[EmbeddingPart]:
101
+ """Single-modality text from stored chunk row."""
102
+ if not chunk.text.strip():
103
+ return [TextPart(" ")]
104
+ return [TextPart(chunk.text)]
105
+
106
+
107
+ async def chunk_to_multimodal_parts(
108
+ session: AsyncSession,
109
+ document_id: str,
110
+ chunk: ChunkRow,
111
+ blob_store: BlobStore,
112
+ ) -> list[EmbeddingPart]:
113
+ """Interleaved parts from content_blocks in the chunk's ordinal span."""
114
+ blocks = await load_blocks_span(
115
+ session,
116
+ document_id,
117
+ chunk.start_block_ordinal,
118
+ chunk.end_block_ordinal,
119
+ )
120
+ parts: list[EmbeddingPart] = []
121
+ for b in blocks:
122
+ if b.type == "text":
123
+ t = b.meta.get("text")
124
+ if isinstance(t, str) and t.strip():
125
+ parts.append(TextPart(t.strip()))
126
+ continue
127
+ if b.type == "image" and b.storage_uri:
128
+ sha = b.sha256 or parse_blob_sha256(b.storage_uri)
129
+ if not sha:
130
+ continue
131
+ try:
132
+ data = blob_store.read_bytes(sha)
133
+ except OSError:
134
+ logger.warning("missing blob %s for document %s", sha, document_id)
135
+ continue
136
+ mime = b.mime or "image/png"
137
+ parts.append(InlineDataPart(mime_type=mime, data=data))
138
+ if not parts:
139
+ return chunk_to_text_parts(chunk)
140
+ return parts
@@ -0,0 +1,128 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import UTC, datetime, timedelta
5
+ from typing import Any
6
+
7
+ from sqlalchemy import text
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from app.config import Settings
11
+
12
+
13
+ def merge_ingest_meta(existing: str | None, patch: dict[str, Any]) -> str:
14
+ base: dict[str, Any] = {}
15
+ if existing:
16
+ try:
17
+ parsed = json.loads(existing)
18
+ if isinstance(parsed, dict):
19
+ base = parsed
20
+ except json.JSONDecodeError:
21
+ base = {"_raw_ingest_meta": existing}
22
+ base.update(patch)
23
+ return json.dumps(base, default=str)
24
+
25
+
26
+ def cleaned_ingest_meta_dict(ingest_meta: str | None) -> dict[str, Any]:
27
+ """Drop embedding failure / DLQ keys so a successful embed can overwrite status."""
28
+ base: dict[str, Any] = {}
29
+ if ingest_meta:
30
+ try:
31
+ parsed = json.loads(ingest_meta)
32
+ if isinstance(parsed, dict):
33
+ base = parsed
34
+ except json.JSONDecodeError:
35
+ base = {}
36
+ for k in ("embedding_error", "embedding_dlq_attempts", "embedding_dlq_state"):
37
+ base.pop(k, None)
38
+ return base
39
+
40
+
41
+ async def clear_embedding_dlq(session: AsyncSession, *, document_id: str) -> None:
42
+ await session.execute(
43
+ text("DELETE FROM embedding_dlq WHERE document_id = :d"),
44
+ {"d": document_id},
45
+ )
46
+
47
+
48
+ async def record_embedding_dlq_failure(
49
+ session: AsyncSession,
50
+ *,
51
+ document_id: str,
52
+ error_message: str,
53
+ multimodal: bool,
54
+ settings: Settings,
55
+ prior_ingest_meta: str | None,
56
+ ) -> None:
57
+ """
58
+ Increment attempts, set exponential ``next_retry_at``, or mark ``dead`` when
59
+ attempts exceed ``embedding_dlq_max_attempts``.
60
+ """
61
+ row = await session.execute(
62
+ text(
63
+ "SELECT attempt_count FROM embedding_dlq WHERE document_id = :d LIMIT 1",
64
+ ),
65
+ {"d": document_id},
66
+ )
67
+ found = row.first()
68
+ attempts = int(found[0]) + 1 if found is not None else 1
69
+ max_a = settings.embedding_dlq_max_attempts
70
+ base_delay = settings.embedding_dlq_base_delay_s
71
+
72
+ if attempts >= max_a:
73
+ state = "dead"
74
+ next_retry_at: str | None = None
75
+ doc_status = "failed"
76
+ else:
77
+ state = "pending_retry"
78
+ delay_s = base_delay * (2 ** (attempts - 1))
79
+ cap = settings.embedding_dlq_max_backoff_s
80
+ delay_s = min(delay_s, cap)
81
+ next_at = datetime.now(UTC) + timedelta(seconds=delay_s)
82
+ next_retry_at = next_at.strftime("%Y-%m-%dT%H:%M:%SZ")
83
+ doc_status = "partial"
84
+
85
+ now_iso = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
86
+ await session.execute(
87
+ text(
88
+ """
89
+ INSERT INTO embedding_dlq (
90
+ document_id, last_error, attempt_count, next_retry_at, state,
91
+ multimodal, updated_at
92
+ ) VALUES (
93
+ :doc, :err, :ac, :nra, :st, :mm, :up
94
+ )
95
+ ON CONFLICT(document_id) DO UPDATE SET
96
+ last_error = excluded.last_error,
97
+ attempt_count = excluded.attempt_count,
98
+ next_retry_at = excluded.next_retry_at,
99
+ state = excluded.state,
100
+ multimodal = excluded.multimodal,
101
+ updated_at = excluded.updated_at
102
+ """,
103
+ ),
104
+ {
105
+ "doc": document_id,
106
+ "err": error_message[:8000],
107
+ "ac": attempts,
108
+ "nra": next_retry_at,
109
+ "st": state,
110
+ "mm": 1 if multimodal else 0,
111
+ "up": now_iso,
112
+ },
113
+ )
114
+
115
+ im = merge_ingest_meta(
116
+ prior_ingest_meta,
117
+ {
118
+ "embedding_error": error_message[:2000],
119
+ "embedding_dlq_attempts": attempts,
120
+ "embedding_dlq_state": state,
121
+ },
122
+ )
123
+ await session.execute(
124
+ text(
125
+ "UPDATE documents SET status = :st, ingest_meta = :im WHERE id = :id",
126
+ ),
127
+ {"st": doc_status, "im": im, "id": document_id},
128
+ )
@@ -0,0 +1,207 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import base64
5
+ import logging
6
+ from typing import Any
7
+
8
+ import httpx
9
+
10
+ from app.config import Settings
11
+ from app.services.embeddings.types import EmbeddingPart, InlineDataPart, TextPart
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta"
16
+
17
+
18
+ def _part_to_api_dict(part: EmbeddingPart) -> dict[str, Any]:
19
+ if isinstance(part, TextPart):
20
+ return {"text": part.text}
21
+ if isinstance(part, InlineDataPart):
22
+ b64 = base64.standard_b64encode(part.data).decode("ascii")
23
+ return {
24
+ "inlineData": {
25
+ "mimeType": part.mime_type,
26
+ "data": b64,
27
+ },
28
+ }
29
+ msg = f"unknown part type: {type(part)}"
30
+ raise TypeError(msg)
31
+
32
+
33
+ def _build_request_item(
34
+ *,
35
+ model: str,
36
+ parts: list[EmbeddingPart],
37
+ task_type: str,
38
+ output_dimensionality: int | None,
39
+ ) -> dict[str, Any]:
40
+ item: dict[str, Any] = {
41
+ "model": f"models/{model}",
42
+ "content": {"parts": [_part_to_api_dict(p) for p in parts]},
43
+ "taskType": task_type,
44
+ }
45
+ if output_dimensionality is not None:
46
+ item["outputDimensionality"] = output_dimensionality
47
+ return item
48
+
49
+
50
+ def _normalize_vector(values: list[float], target_dim: int) -> list[float]:
51
+ if len(values) == target_dim:
52
+ return values
53
+ if len(values) > target_dim:
54
+ return values[:target_dim]
55
+ msg = f"embedding dim {len(values)} < target {target_dim}"
56
+ raise ValueError(msg)
57
+
58
+
59
+ async def _post_batch_once(
60
+ client: httpx.AsyncClient,
61
+ url: str,
62
+ headers: dict[str, str],
63
+ payload: dict[str, Any],
64
+ target_dim: int,
65
+ ) -> tuple[list[list[float]] | None, bool]:
66
+ """
67
+ Returns (vectors, reject_output_dimensionality).
68
+ Second flag True if caller should retry without outputDimensionality.
69
+ """
70
+ r = await client.post(url, headers=headers, json=payload)
71
+ if r.status_code == 400 and "outputDimensionality" in r.text:
72
+ return None, True
73
+ if r.status_code in (429, 500, 502, 503):
74
+ msg = f"retryable HTTP {r.status_code}"
75
+ raise RuntimeError(msg)
76
+ r.raise_for_status()
77
+ data = r.json()
78
+ embs = data.get("embeddings")
79
+ if not isinstance(embs, list):
80
+ msg = f"unexpected response: {data!r}"[:500]
81
+ raise RuntimeError(msg)
82
+ out: list[list[float]] = []
83
+ for emb in embs:
84
+ vals = emb.get("values")
85
+ if not isinstance(vals, list):
86
+ msg = f"bad embedding entry: {emb!r}"[:300]
87
+ raise RuntimeError(msg)
88
+ floats = [float(x) for x in vals]
89
+ out.append(_normalize_vector(floats, target_dim))
90
+ return out, False
91
+
92
+
93
+ async def _post_batch_with_retry(
94
+ client: httpx.AsyncClient,
95
+ url: str,
96
+ headers: dict[str, str],
97
+ payload: dict[str, Any],
98
+ target_dim: int,
99
+ settings: Settings,
100
+ ) -> list[list[float]] | None:
101
+ delay = settings.gemini_embed_base_delay_s
102
+ attempts = max(1, settings.gemini_embed_max_retries)
103
+ for attempt in range(attempts):
104
+ try:
105
+ vecs, reject_od = await _post_batch_once(
106
+ client,
107
+ url,
108
+ headers,
109
+ payload,
110
+ target_dim,
111
+ )
112
+ if reject_od:
113
+ return None
114
+ return vecs
115
+ except (httpx.HTTPError, RuntimeError, ValueError, KeyError, TypeError) as e:
116
+ logger.warning(
117
+ "Gemini embed batch attempt %s/%s: %s",
118
+ attempt + 1,
119
+ attempts,
120
+ e,
121
+ )
122
+ if attempt == attempts - 1:
123
+ logger.exception("Gemini embed batch failed")
124
+ return None
125
+ await asyncio.sleep(delay * (2**attempt))
126
+ return None
127
+
128
+
129
+ async def batch_embed_contents(
130
+ *,
131
+ api_key: str,
132
+ model: str,
133
+ contents: list[list[EmbeddingPart]],
134
+ settings: Settings,
135
+ task_type: str = "RETRIEVAL_DOCUMENT",
136
+ client: httpx.AsyncClient | None = None,
137
+ ) -> list[list[float]]:
138
+ """
139
+ Batch Gemini ``batchEmbedContents`` calls.
140
+
141
+ Each inner list is one embed request: a single ``TextPart`` or an
142
+ interleaved list of ``TextPart`` / ``InlineDataPart`` (multimodal).
143
+ """
144
+ if not contents:
145
+ return []
146
+ target_dim = settings.vector_embedding_dim
147
+ url = f"{_GEMINI_BASE}/models/{model}:batchEmbedContents"
148
+ headers = {
149
+ "x-goog-api-key": api_key,
150
+ "Content-Type": "application/json",
151
+ }
152
+
153
+ use_od: int | None = target_dim
154
+ close_client = False
155
+ if client is None:
156
+ client = httpx.AsyncClient(timeout=120.0)
157
+ close_client = True
158
+ try:
159
+ all_out: list[list[float]] = []
160
+ batch_size = max(1, settings.gemini_embed_batch_size)
161
+ for start in range(0, len(contents), batch_size):
162
+ batch = contents[start : start + batch_size]
163
+ requests_body = [
164
+ _build_request_item(
165
+ model=model,
166
+ parts=parts,
167
+ task_type=task_type,
168
+ output_dimensionality=use_od,
169
+ )
170
+ for parts in batch
171
+ ]
172
+ payload: dict[str, Any] = {"requests": requests_body}
173
+ vecs = await _post_batch_with_retry(
174
+ client,
175
+ url,
176
+ headers,
177
+ payload,
178
+ target_dim,
179
+ settings,
180
+ )
181
+ if vecs is None and use_od is not None:
182
+ use_od = None
183
+ requests_body = [
184
+ _build_request_item(
185
+ model=model,
186
+ parts=parts,
187
+ task_type=task_type,
188
+ output_dimensionality=None,
189
+ )
190
+ for parts in batch
191
+ ]
192
+ vecs = await _post_batch_with_retry(
193
+ client,
194
+ url,
195
+ headers,
196
+ {"requests": requests_body},
197
+ target_dim,
198
+ settings,
199
+ )
200
+ if vecs is None:
201
+ msg = "Gemini batchEmbedContents failed after retries"
202
+ raise RuntimeError(msg)
203
+ all_out.extend(vecs)
204
+ return all_out
205
+ finally:
206
+ if close_client:
207
+ await client.aclose()
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+
5
+ from sqlalchemy import text
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from app.config import Settings
9
+ from app.services.embeddings.build_inputs import ChunkRow
10
+ from app.vectorstore import SqliteVecStore, VectorMeta
11
+
12
+
13
+ def vector_store_pointer(document_id: str, chunk_pk: int) -> str:
14
+ return f"sqlite-vec:kb_vec_embeddings:{document_id}:{chunk_pk}"
15
+
16
+
17
+ async def delete_gemini_embeddings_for_document(
18
+ session: AsyncSession,
19
+ *,
20
+ document_id: str,
21
+ model: str,
22
+ ) -> None:
23
+ await session.execute(
24
+ text("DELETE FROM embeddings WHERE document_id = :d AND model = :m"),
25
+ {"d": document_id, "m": model},
26
+ )
27
+
28
+
29
+ async def persist_chunk_embeddings(
30
+ session: AsyncSession,
31
+ store: SqliteVecStore,
32
+ *,
33
+ document_id: str,
34
+ source_id: int,
35
+ model: str,
36
+ chunks: list[ChunkRow],
37
+ vectors: list[list[float]],
38
+ modalities: list[str],
39
+ settings: Settings,
40
+ ) -> None:
41
+ if len(chunks) != len(vectors) or len(chunks) != len(modalities):
42
+ msg = "chunks, vectors, and modalities must align"
43
+ raise ValueError(msg)
44
+ ts = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
45
+ dim = settings.vector_embedding_dim
46
+
47
+ metas = [
48
+ VectorMeta(
49
+ document_id=document_id,
50
+ chunk_id=ch.id,
51
+ source_id=source_id,
52
+ modality=mod,
53
+ ingested_at=ts,
54
+ )
55
+ for ch, mod in zip(chunks, modalities, strict=True)
56
+ ]
57
+
58
+ await store.upsert_for_session(session, vectors, metas)
59
+
60
+ for ch in chunks:
61
+ ptr = vector_store_pointer(document_id, ch.id)
62
+ await session.execute(
63
+ text(
64
+ "INSERT INTO embeddings "
65
+ "(document_id, chunk_id, model, dim, vector_store_ref) "
66
+ "VALUES (:doc, NULL, :model, :dim, :ref)",
67
+ ),
68
+ {
69
+ "doc": document_id,
70
+ "model": model,
71
+ "dim": dim,
72
+ "ref": ptr,
73
+ },
74
+ )
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True, slots=True)
7
+ class TextPart:
8
+ text: str
9
+
10
+
11
+ @dataclass(frozen=True, slots=True)
12
+ class InlineDataPart:
13
+ """Binary payload as base64 in API (image/video/audio snippet)."""
14
+
15
+ mime_type: str
16
+ data: bytes
17
+
18
+
19
+ EmbeddingPart = TextPart | InlineDataPart
20
+
21
+
22
+ def parts_to_modality(parts: list[EmbeddingPart]) -> str:
23
+ if not parts:
24
+ return "text"
25
+ if len(parts) == 1 and isinstance(parts[0], TextPart):
26
+ return "text"
27
+ kinds = {type(p).__name__ for p in parts}
28
+ if "InlineDataPart" in kinds and "TextPart" in kinds:
29
+ return "multimodal"
30
+ if "InlineDataPart" in kinds:
31
+ return "image"
32
+ return "text"