business-stack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/.python-version +1 -0
  2. package/backend/.env.example +65 -0
  3. package/backend/alembic/env.py +63 -0
  4. package/backend/alembic/script.py.mako +26 -0
  5. package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
  6. package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
  7. package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
  8. package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
  9. package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
  10. package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
  11. package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
  12. package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
  13. package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
  14. package/backend/alembic.ini +42 -0
  15. package/backend/app/__init__.py +0 -0
  16. package/backend/app/config.py +337 -0
  17. package/backend/app/connectors/__init__.py +13 -0
  18. package/backend/app/connectors/base.py +39 -0
  19. package/backend/app/connectors/builtins.py +51 -0
  20. package/backend/app/connectors/playwright_session.py +146 -0
  21. package/backend/app/connectors/registry.py +68 -0
  22. package/backend/app/connectors/thread_expansion/__init__.py +33 -0
  23. package/backend/app/connectors/thread_expansion/fakes.py +154 -0
  24. package/backend/app/connectors/thread_expansion/models.py +113 -0
  25. package/backend/app/connectors/thread_expansion/reddit.py +53 -0
  26. package/backend/app/connectors/thread_expansion/twitter.py +49 -0
  27. package/backend/app/db.py +5 -0
  28. package/backend/app/dependencies.py +34 -0
  29. package/backend/app/logging_config.py +35 -0
  30. package/backend/app/main.py +97 -0
  31. package/backend/app/middleware/__init__.py +0 -0
  32. package/backend/app/middleware/gateway_identity.py +17 -0
  33. package/backend/app/middleware/openapi_gateway.py +71 -0
  34. package/backend/app/middleware/request_id.py +23 -0
  35. package/backend/app/openapi_config.py +126 -0
  36. package/backend/app/routers/__init__.py +0 -0
  37. package/backend/app/routers/admin_pipeline.py +123 -0
  38. package/backend/app/routers/chat.py +206 -0
  39. package/backend/app/routers/chunks.py +36 -0
  40. package/backend/app/routers/entity_extract.py +31 -0
  41. package/backend/app/routers/example.py +8 -0
  42. package/backend/app/routers/gemini_embed.py +58 -0
  43. package/backend/app/routers/health.py +28 -0
  44. package/backend/app/routers/ingestion.py +146 -0
  45. package/backend/app/routers/link_expansion.py +34 -0
  46. package/backend/app/routers/pipeline_status.py +304 -0
  47. package/backend/app/routers/query.py +63 -0
  48. package/backend/app/routers/vectors.py +63 -0
  49. package/backend/app/schemas/__init__.py +0 -0
  50. package/backend/app/schemas/canonical.py +44 -0
  51. package/backend/app/schemas/chat.py +50 -0
  52. package/backend/app/schemas/ingest.py +29 -0
  53. package/backend/app/schemas/query.py +153 -0
  54. package/backend/app/schemas/vectors.py +56 -0
  55. package/backend/app/services/__init__.py +0 -0
  56. package/backend/app/services/chat_store.py +152 -0
  57. package/backend/app/services/chunking/__init__.py +3 -0
  58. package/backend/app/services/chunking/llm_boundaries.py +63 -0
  59. package/backend/app/services/chunking/schemas.py +30 -0
  60. package/backend/app/services/chunking/semantic_chunk.py +178 -0
  61. package/backend/app/services/chunking/splitters.py +214 -0
  62. package/backend/app/services/embeddings/__init__.py +20 -0
  63. package/backend/app/services/embeddings/build_inputs.py +140 -0
  64. package/backend/app/services/embeddings/dlq.py +128 -0
  65. package/backend/app/services/embeddings/gemini_api.py +207 -0
  66. package/backend/app/services/embeddings/persist.py +74 -0
  67. package/backend/app/services/embeddings/types.py +32 -0
  68. package/backend/app/services/embeddings/worker.py +224 -0
  69. package/backend/app/services/entities/__init__.py +12 -0
  70. package/backend/app/services/entities/gliner_extract.py +63 -0
  71. package/backend/app/services/entities/llm_extract.py +94 -0
  72. package/backend/app/services/entities/pipeline.py +179 -0
  73. package/backend/app/services/entities/spacy_extract.py +63 -0
  74. package/backend/app/services/entities/types.py +15 -0
  75. package/backend/app/services/gemini_chat.py +113 -0
  76. package/backend/app/services/hooks/__init__.py +3 -0
  77. package/backend/app/services/hooks/post_ingest.py +186 -0
  78. package/backend/app/services/ingestion/__init__.py +0 -0
  79. package/backend/app/services/ingestion/persist.py +188 -0
  80. package/backend/app/services/integrations_remote.py +91 -0
  81. package/backend/app/services/link_expansion/__init__.py +3 -0
  82. package/backend/app/services/link_expansion/canonical_url.py +45 -0
  83. package/backend/app/services/link_expansion/domain_policy.py +26 -0
  84. package/backend/app/services/link_expansion/html_extract.py +72 -0
  85. package/backend/app/services/link_expansion/rate_limit.py +32 -0
  86. package/backend/app/services/link_expansion/robots.py +46 -0
  87. package/backend/app/services/link_expansion/schemas.py +67 -0
  88. package/backend/app/services/link_expansion/worker.py +458 -0
  89. package/backend/app/services/normalization/__init__.py +7 -0
  90. package/backend/app/services/normalization/normalizer.py +331 -0
  91. package/backend/app/services/normalization/persist_normalized.py +67 -0
  92. package/backend/app/services/playwright_extract/__init__.py +13 -0
  93. package/backend/app/services/playwright_extract/__main__.py +96 -0
  94. package/backend/app/services/playwright_extract/extract.py +181 -0
  95. package/backend/app/services/retrieval_service.py +351 -0
  96. package/backend/app/sqlite_ext.py +36 -0
  97. package/backend/app/storage/__init__.py +3 -0
  98. package/backend/app/storage/blobs.py +30 -0
  99. package/backend/app/vectorstore/__init__.py +13 -0
  100. package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
  101. package/backend/backend.egg-info/PKG-INFO +18 -0
  102. package/backend/backend.egg-info/SOURCES.txt +93 -0
  103. package/backend/backend.egg-info/dependency_links.txt +1 -0
  104. package/backend/backend.egg-info/entry_points.txt +2 -0
  105. package/backend/backend.egg-info/requires.txt +15 -0
  106. package/backend/backend.egg-info/top_level.txt +4 -0
  107. package/backend/package.json +15 -0
  108. package/backend/pyproject.toml +52 -0
  109. package/backend/tests/conftest.py +40 -0
  110. package/backend/tests/test_chat.py +92 -0
  111. package/backend/tests/test_chunking.py +132 -0
  112. package/backend/tests/test_entities.py +170 -0
  113. package/backend/tests/test_gemini_embed.py +224 -0
  114. package/backend/tests/test_health.py +24 -0
  115. package/backend/tests/test_ingest_raw.py +123 -0
  116. package/backend/tests/test_link_expansion.py +241 -0
  117. package/backend/tests/test_main.py +12 -0
  118. package/backend/tests/test_normalizer.py +114 -0
  119. package/backend/tests/test_openapi_gateway.py +40 -0
  120. package/backend/tests/test_pipeline_hardening.py +285 -0
  121. package/backend/tests/test_pipeline_status.py +71 -0
  122. package/backend/tests/test_playwright_extract.py +80 -0
  123. package/backend/tests/test_post_ingest_hooks.py +162 -0
  124. package/backend/tests/test_query.py +165 -0
  125. package/backend/tests/test_thread_expansion.py +72 -0
  126. package/backend/tests/test_vectors.py +85 -0
  127. package/backend/uv.lock +1839 -0
  128. package/bin/business-stack.cjs +412 -0
  129. package/frontend/web/.env.example +23 -0
  130. package/frontend/web/AGENTS.md +5 -0
  131. package/frontend/web/CLAUDE.md +1 -0
  132. package/frontend/web/README.md +36 -0
  133. package/frontend/web/components.json +25 -0
  134. package/frontend/web/next-env.d.ts +6 -0
  135. package/frontend/web/next.config.ts +30 -0
  136. package/frontend/web/package.json +65 -0
  137. package/frontend/web/postcss.config.mjs +7 -0
  138. package/frontend/web/skills-lock.json +35 -0
  139. package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
  140. package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
  141. package/frontend/web/src/app/chat/page.tsx +725 -0
  142. package/frontend/web/src/app/favicon.ico +0 -0
  143. package/frontend/web/src/app/globals.css +563 -0
  144. package/frontend/web/src/app/layout.tsx +50 -0
  145. package/frontend/web/src/app/page.tsx +96 -0
  146. package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
  147. package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
  148. package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
  149. package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
  150. package/frontend/web/src/components/home-auth-panel.tsx +49 -0
  151. package/frontend/web/src/components/providers.tsx +50 -0
  152. package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
  153. package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
  154. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
  155. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
  156. package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
  157. package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
  158. package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
  159. package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
  160. package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
  161. package/frontend/web/src/lib/auth-client.ts +23 -0
  162. package/frontend/web/src/lib/integrations-config.ts +125 -0
  163. package/frontend/web/src/lib/ui-utills.tsx +90 -0
  164. package/frontend/web/src/lib/utils.ts +6 -0
  165. package/frontend/web/tsconfig.json +36 -0
  166. package/frontend/web/tsconfig.tsbuildinfo +1 -0
  167. package/frontend/web/vitest.config.ts +14 -0
  168. package/gateway/.env.example +23 -0
  169. package/gateway/README.md +13 -0
  170. package/gateway/package.json +24 -0
  171. package/gateway/src/auth.ts +49 -0
  172. package/gateway/src/index.ts +141 -0
  173. package/gateway/src/integrations/admin.ts +19 -0
  174. package/gateway/src/integrations/crypto.ts +52 -0
  175. package/gateway/src/integrations/handlers.ts +124 -0
  176. package/gateway/src/integrations/keys.ts +12 -0
  177. package/gateway/src/integrations/store.ts +106 -0
  178. package/gateway/src/stack-secrets.ts +35 -0
  179. package/gateway/tsconfig.json +13 -0
  180. package/package.json +33 -0
  181. package/turbo.json +27 -0
@@ -0,0 +1,123 @@
1
+ """entity_mentions (document + optional chunk) + entity_cooccurrence
2
+
3
+ Revision ID: 9f0a1b2c3d4e
4
+ Revises: 7d8e9f0a1b2c
5
+ Create Date: 2026-04-05
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "9f0a1b2c3d4e"
15
+ down_revision: str | None = "7d8e9f0a1b2c"
16
+ branch_labels: str | Sequence[str] | None = None
17
+ depends_on: str | Sequence[str] | None = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ op.execute(sa.text("PRAGMA foreign_keys=ON"))
22
+ op.create_table(
23
+ "entity_mentions",
24
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
25
+ sa.Column("document_id", sa.Text(), nullable=False),
26
+ sa.Column("entity_id", sa.Integer(), nullable=False),
27
+ sa.Column("document_chunk_id", sa.Integer(), nullable=False),
28
+ sa.Column("confidence", sa.Float(), nullable=True),
29
+ sa.ForeignKeyConstraint(
30
+ ["document_id"],
31
+ ["documents.id"],
32
+ name="fk_entity_mentions_document_id_documents",
33
+ ondelete="CASCADE",
34
+ ),
35
+ sa.ForeignKeyConstraint(
36
+ ["entity_id"],
37
+ ["entities.id"],
38
+ name="fk_entity_mentions_entity_id_entities",
39
+ ondelete="CASCADE",
40
+ ),
41
+ sa.ForeignKeyConstraint(
42
+ ["document_chunk_id"],
43
+ ["document_chunks.id"],
44
+ name="fk_entity_mentions_document_chunk_id_document_chunks",
45
+ ondelete="CASCADE",
46
+ ),
47
+ sa.PrimaryKeyConstraint("id", name="pk_entity_mentions"),
48
+ sa.CheckConstraint(
49
+ "confidence IS NULL OR (confidence >= 0 AND confidence <= 1)",
50
+ name="ck_entity_mentions_confidence",
51
+ ),
52
+ )
53
+ op.create_index(
54
+ "ix_entity_mentions_document",
55
+ "entity_mentions",
56
+ ["document_id"],
57
+ )
58
+ op.create_index(
59
+ "ix_entity_mentions_entity",
60
+ "entity_mentions",
61
+ ["entity_id"],
62
+ )
63
+ op.create_index(
64
+ "uq_entity_mentions_doc_entity_chunk",
65
+ "entity_mentions",
66
+ ["document_id", "entity_id", "document_chunk_id"],
67
+ unique=True,
68
+ )
69
+
70
+ op.create_table(
71
+ "entity_cooccurrence",
72
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
73
+ sa.Column("entity_low_id", sa.Integer(), nullable=False),
74
+ sa.Column("entity_high_id", sa.Integer(), nullable=False),
75
+ sa.Column("document_id", sa.Text(), nullable=False),
76
+ sa.Column("weight", sa.Float(), nullable=False, server_default="1.0"),
77
+ sa.ForeignKeyConstraint(
78
+ ["document_id"],
79
+ ["documents.id"],
80
+ name="fk_entity_cooccurrence_document_id_documents",
81
+ ondelete="CASCADE",
82
+ ),
83
+ sa.ForeignKeyConstraint(
84
+ ["entity_low_id"],
85
+ ["entities.id"],
86
+ name="fk_entity_cooccurrence_entity_low_id_entities",
87
+ ondelete="CASCADE",
88
+ ),
89
+ sa.ForeignKeyConstraint(
90
+ ["entity_high_id"],
91
+ ["entities.id"],
92
+ name="fk_entity_cooccurrence_entity_high_id_entities",
93
+ ondelete="CASCADE",
94
+ ),
95
+ sa.PrimaryKeyConstraint("id", name="pk_entity_cooccurrence"),
96
+ sa.CheckConstraint(
97
+ "entity_low_id < entity_high_id",
98
+ name="ck_entity_cooccurrence_ordered_pair",
99
+ ),
100
+ sa.UniqueConstraint(
101
+ "entity_low_id",
102
+ "entity_high_id",
103
+ "document_id",
104
+ name="uq_entity_cooccurrence_triple",
105
+ ),
106
+ )
107
+ op.create_index(
108
+ "ix_entity_cooccurrence_document",
109
+ "entity_cooccurrence",
110
+ ["document_id"],
111
+ )
112
+
113
+
114
+ def downgrade() -> None:
115
+ op.drop_index("ix_entity_cooccurrence_document", table_name="entity_cooccurrence")
116
+ op.drop_table("entity_cooccurrence")
117
+ op.drop_index(
118
+ "uq_entity_mentions_doc_entity_chunk",
119
+ table_name="entity_mentions",
120
+ )
121
+ op.drop_index("ix_entity_mentions_entity", table_name="entity_mentions")
122
+ op.drop_index("ix_entity_mentions_document", table_name="entity_mentions")
123
+ op.drop_table("entity_mentions")
@@ -0,0 +1,99 @@
1
+ """documents dedupe + normalization_error; embedding_dlq
2
+
3
+ Revision ID: b1c2d3e4f5a6
4
+ Revises: 9f0a1b2c3d4e
5
+ Create Date: 2026-04-05
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "b1c2d3e4f5a6"
15
+ down_revision: str | None = "9f0a1b2c3d4e"
16
+ branch_labels: str | Sequence[str] | None = None
17
+ depends_on: str | Sequence[str] | None = None
18
+
19
+ _TS_DEFAULT = sa.text("(strftime('%Y-%m-%dT%H:%M:%fZ','now'))")
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.execute(sa.text("PRAGMA foreign_keys=ON"))
24
+ with op.batch_alter_table("documents") as batch:
25
+ batch.add_column(sa.Column("external_id", sa.Text(), nullable=True))
26
+ batch.add_column(sa.Column("dedupe_content_hash", sa.Text(), nullable=True))
27
+ batch.add_column(sa.Column("normalization_error", sa.Text(), nullable=True))
28
+
29
+ op.create_index("ix_documents_source_external", "documents", ["source_id", "external_id"])
30
+ op.execute(
31
+ sa.text(
32
+ "CREATE UNIQUE INDEX uq_documents_source_external_id "
33
+ "ON documents (source_id, external_id) "
34
+ "WHERE external_id IS NOT NULL AND trim(external_id) != ''",
35
+ ),
36
+ )
37
+ op.create_index(
38
+ "ix_documents_source_dedupe_hash",
39
+ "documents",
40
+ ["source_id", "dedupe_content_hash"],
41
+ )
42
+ op.execute(
43
+ sa.text(
44
+ "CREATE UNIQUE INDEX uq_documents_source_dedupe_hash "
45
+ "ON documents (source_id, dedupe_content_hash) "
46
+ "WHERE dedupe_content_hash IS NOT NULL AND trim(dedupe_content_hash) != ''",
47
+ ),
48
+ )
49
+
50
+ op.create_table(
51
+ "embedding_dlq",
52
+ sa.Column("document_id", sa.Text(), nullable=False),
53
+ sa.Column("last_error", sa.Text(), nullable=False),
54
+ sa.Column("attempt_count", sa.Integer(), nullable=False, server_default="0"),
55
+ sa.Column("next_retry_at", sa.Text(), nullable=True),
56
+ sa.Column(
57
+ "state",
58
+ sa.Text(),
59
+ nullable=False,
60
+ server_default="pending_retry",
61
+ ),
62
+ sa.Column(
63
+ "multimodal",
64
+ sa.Integer(),
65
+ nullable=False,
66
+ server_default="0",
67
+ ),
68
+ sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
69
+ sa.Column("updated_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
70
+ sa.CheckConstraint(
71
+ "state IN ('pending_retry','dead')",
72
+ name="ck_embedding_dlq_state",
73
+ ),
74
+ sa.ForeignKeyConstraint(
75
+ ["document_id"],
76
+ ["documents.id"],
77
+ name="fk_embedding_dlq_document_id_documents",
78
+ ondelete="CASCADE",
79
+ ),
80
+ sa.PrimaryKeyConstraint("document_id", name="pk_embedding_dlq"),
81
+ )
82
+ op.create_index(
83
+ "ix_embedding_dlq_next_retry",
84
+ "embedding_dlq",
85
+ ["next_retry_at", "state"],
86
+ )
87
+
88
+
89
+ def downgrade() -> None:
90
+ op.drop_index("ix_embedding_dlq_next_retry", table_name="embedding_dlq")
91
+ op.drop_table("embedding_dlq")
92
+ op.execute(sa.text("DROP INDEX IF EXISTS uq_documents_source_dedupe_hash"))
93
+ op.drop_index("ix_documents_source_dedupe_hash", table_name="documents")
94
+ op.execute(sa.text("DROP INDEX IF EXISTS uq_documents_source_external_id"))
95
+ op.drop_index("ix_documents_source_external", table_name="documents")
96
+ with op.batch_alter_table("documents") as batch:
97
+ batch.drop_column("normalization_error")
98
+ batch.drop_column("dedupe_content_hash")
99
+ batch.drop_column("external_id")
@@ -0,0 +1,59 @@
1
+ """chat_sessions and chat_messages for UI chat history
2
+
3
+ Revision ID: c2d3e4f5061a
4
+ Revises: b1c2d3e4f5a6
5
+ Create Date: 2026-04-05
6
+
7
+ """
8
+
9
+ from collections.abc import Sequence
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "c2d3e4f5061a"
15
+ down_revision: str | None = "b1c2d3e4f5a6"
16
+ branch_labels: str | Sequence[str] | None = None
17
+ depends_on: str | Sequence[str] | None = None
18
+
19
+ _TS_DEFAULT = sa.text("(strftime('%Y-%m-%dT%H:%M:%fZ','now'))")
20
+
21
+
22
+ def upgrade() -> None:
23
+ op.execute(sa.text("PRAGMA foreign_keys=ON"))
24
+ op.create_table(
25
+ "chat_sessions",
26
+ sa.Column("id", sa.Text(), primary_key=True, nullable=False),
27
+ sa.Column("user_id", sa.Text(), nullable=False),
28
+ sa.Column("title", sa.Text(), nullable=True),
29
+ sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
30
+ sa.Column("updated_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
31
+ )
32
+ op.create_index("ix_chat_sessions_user_id", "chat_sessions", ["user_id"])
33
+
34
+ op.create_table(
35
+ "chat_messages",
36
+ sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
37
+ sa.Column("session_id", sa.Text(), nullable=False),
38
+ sa.Column("role", sa.Text(), nullable=False),
39
+ sa.Column("content", sa.Text(), nullable=False),
40
+ sa.Column("meta_json", sa.Text(), nullable=True),
41
+ sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
42
+ sa.ForeignKeyConstraint(
43
+ ["session_id"],
44
+ ["chat_sessions.id"],
45
+ ondelete="CASCADE",
46
+ ),
47
+ )
48
+ op.create_index(
49
+ "ix_chat_messages_session_created",
50
+ "chat_messages",
51
+ ["session_id", "id"],
52
+ )
53
+
54
+
55
+ def downgrade() -> None:
56
+ op.drop_index("ix_chat_messages_session_created", table_name="chat_messages")
57
+ op.drop_table("chat_messages")
58
+ op.drop_index("ix_chat_sessions_user_id", table_name="chat_sessions")
59
+ op.drop_table("chat_sessions")
@@ -0,0 +1,42 @@
1
+ [alembic]
2
+ script_location = alembic
3
+ prepend_sys_path = .
4
+ version_path_separator = os
5
+
6
+ sqlalchemy.url = sqlite+aiosqlite:///./placeholder.sqlite
7
+
8
+ [post_write_hooks]
9
+
10
+ [loggers]
11
+ keys = root,sqlalchemy,alembic
12
+
13
+ [handlers]
14
+ keys = console
15
+
16
+ [formatters]
17
+ keys = generic
18
+
19
+ [logger_root]
20
+ level = WARN
21
+ handlers = console
22
+ qualname =
23
+
24
+ [logger_sqlalchemy]
25
+ level = WARN
26
+ handlers =
27
+ qualname = sqlalchemy.engine
28
+
29
+ [logger_alembic]
30
+ level = INFO
31
+ handlers =
32
+ qualname = alembic
33
+
34
+ [handler_console]
35
+ class = StreamHandler
36
+ args = (sys.stderr,)
37
+ level = NOTSET
38
+ formatter = generic
39
+
40
+ [formatter_generic]
41
+ format = %(levelname)-5.5s [%(name)s] %(message)s
42
+ datefmt = %H:%M:%S
File without changes
@@ -0,0 +1,337 @@
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+
4
+ from pydantic import Field, computed_field, field_validator
5
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
+
7
+ _POST_INGEST_SLACK_TEMPLATE_DEFAULT = (
8
+ ":white_check_mark: *KB document ready*\n"
9
+ "*Summary:* {summary_short}\n"
10
+ "*Link:* {source_link}\n"
11
+ "*Source:* {source_name}\n"
12
+ "*Id:* `{document_id}`"
13
+ )
14
+ _POST_INGEST_DISCORD_TEMPLATE_DEFAULT = (
15
+ "**KB document ready**\n"
16
+ "**Summary:** {summary_short}\n"
17
+ "**Link:** {source_link}\n"
18
+ "**Source:** {source_name}\n"
19
+ "**Id:** `{document_id}`"
20
+ )
21
+
22
+
23
+ class Settings(BaseSettings):
24
+ model_config = SettingsConfigDict(
25
+ env_file=".env",
26
+ env_file_encoding="utf-8",
27
+ extra="ignore",
28
+ )
29
+
30
+ data_dir: Path = Field(default=Path("data"), validation_alias="DATA_DIR")
31
+ sqlite_filename: str = Field(
32
+ default="rag.sqlite",
33
+ validation_alias="SQLITE_FILENAME",
34
+ )
35
+ database_url: str | None = Field(default=None, validation_alias="DATABASE_URL")
36
+
37
+ openai_api_key: str | None = Field(default=None, validation_alias="OPENAI_API_KEY")
38
+ embedding_api_key: str | None = Field(
39
+ default=None,
40
+ validation_alias="EMBEDDING_API_KEY",
41
+ )
42
+ embedding_model: str = Field(
43
+ default="text-embedding-3-small",
44
+ validation_alias="EMBEDDING_MODEL",
45
+ )
46
+ vector_embedding_dim: int = Field(
47
+ default=1536,
48
+ validation_alias="VECTOR_EMBEDDING_DIM",
49
+ )
50
+
51
+ log_level: str = Field(default="INFO", validation_alias="LOG_LEVEL")
52
+ log_json: bool = Field(default=False, validation_alias="LOG_JSON")
53
+
54
+ backend_gateway_secret: str | None = Field(
55
+ default=None,
56
+ validation_alias="BACKEND_GATEWAY_SECRET",
57
+ description=(
58
+ "When set, /docs, /redoc, /openapi.json, and /docs/* require X-Gateway-Secret to match. "
59
+ "Configure the same value on the Hono gateway so docs are not usable by calling the "
60
+ "backend directly."
61
+ ),
62
+ )
63
+
64
+ link_expand_max_depth: int = Field(
65
+ default=2,
66
+ ge=1,
67
+ le=32,
68
+ validation_alias="LINK_EXPAND_MAX_DEPTH",
69
+ )
70
+ link_expand_domain_allowlist: str = Field(
71
+ default="",
72
+ validation_alias="LINK_EXPAND_DOMAIN_ALLOWLIST",
73
+ description="Comma-separated hostnames; empty = all (except denylist)",
74
+ )
75
+ link_expand_domain_denylist: str = Field(
76
+ default="",
77
+ validation_alias="LINK_EXPAND_DOMAIN_DENYLIST",
78
+ description="Comma-separated hostnames to block",
79
+ )
80
+ link_expand_politeness_delay_ms: int = Field(
81
+ default=750,
82
+ ge=0,
83
+ validation_alias="LINK_EXPAND_POLITENESS_DELAY_MS",
84
+ )
85
+ link_expand_per_domain_interval_ms: int = Field(
86
+ default=1500,
87
+ ge=0,
88
+ validation_alias="LINK_EXPAND_PER_DOMAIN_INTERVAL_MS",
89
+ )
90
+ link_expand_respect_robots: bool = Field(
91
+ default=True,
92
+ validation_alias="LINK_EXPAND_RESPECT_ROBOTS",
93
+ )
94
+ link_expand_user_agent: str = Field(
95
+ default="BusinessStackLinkBot/0.1 (+https://example.invalid/bot)",
96
+ validation_alias="LINK_EXPAND_USER_AGENT",
97
+ )
98
+ link_expand_max_response_bytes: int = Field(
99
+ default=5_242_880,
100
+ ge=4096,
101
+ validation_alias="LINK_EXPAND_MAX_RESPONSE_BYTES",
102
+ )
103
+ link_expand_fetch_timeout_s: float = Field(
104
+ default=30.0,
105
+ ge=1.0,
106
+ validation_alias="LINK_EXPAND_FETCH_TIMEOUT_S",
107
+ )
108
+
109
+ chunk_llm_model: str = Field(
110
+ default="gpt-4o-mini",
111
+ validation_alias="CHUNK_LLM_MODEL",
112
+ )
113
+
114
+ gemini_api_key: str | None = Field(
115
+ default=None,
116
+ validation_alias="GEMINI_API_KEY",
117
+ description="Overrides integration store when set.",
118
+ )
119
+ integrations_gateway_url: str | None = Field(
120
+ default=None,
121
+ validation_alias="INTEGRATIONS_GATEWAY_URL",
122
+ description=(
123
+ "Hono gateway origin for GET /internal/integrations "
124
+ "(e.g. http://127.0.0.1:3001). Used when GEMINI_API_KEY is unset."
125
+ ),
126
+ )
127
+ integrations_internal_secret: str | None = Field(
128
+ default=None,
129
+ validation_alias="INTEGRATIONS_INTERNAL_SECRET",
130
+ description="Bearer shared with gateway INTEGRATIONS_INTERNAL_SECRET.",
131
+ )
132
+ gemini_embedding_model: str = Field(
133
+ default="gemini-embedding-001",
134
+ validation_alias="GEMINI_EMBEDDING_MODEL",
135
+ description=(
136
+ "Gemini API embedding model id (Gemini Embedding / console 'Gemini Embedding 1': "
137
+ "gemini-embedding-001 per https://ai.google.dev/gemini-api/docs/models/gemini-embedding-001 )"
138
+ ),
139
+ )
140
+ gemini_embed_batch_size: int = Field(
141
+ default=32,
142
+ ge=1,
143
+ le=100,
144
+ validation_alias="GEMINI_EMBED_BATCH_SIZE",
145
+ )
146
+ gemini_embed_max_retries: int = Field(
147
+ default=4,
148
+ ge=1,
149
+ le=12,
150
+ validation_alias="GEMINI_EMBED_MAX_RETRIES",
151
+ )
152
+ gemini_embed_base_delay_s: float = Field(
153
+ default=1.0,
154
+ ge=0.1,
155
+ validation_alias="GEMINI_EMBED_BASE_DELAY_S",
156
+ )
157
+ gemini_embed_task_type: str = Field(
158
+ default="RETRIEVAL_DOCUMENT",
159
+ validation_alias="GEMINI_EMBED_TASK_TYPE",
160
+ )
161
+
162
+ embedding_dlq_max_attempts: int = Field(
163
+ default=5,
164
+ ge=1,
165
+ le=100,
166
+ validation_alias="EMBEDDING_DLQ_MAX_ATTEMPTS",
167
+ description="After this many failed embed jobs, row moves to dead state",
168
+ )
169
+ embedding_dlq_base_delay_s: float = Field(
170
+ default=60.0,
171
+ ge=1.0,
172
+ validation_alias="EMBEDDING_DLQ_BASE_DELAY_S",
173
+ description="Initial backoff; doubles each attempt (capped)",
174
+ )
175
+ embedding_dlq_max_backoff_s: float = Field(
176
+ default=3600.0,
177
+ ge=60.0,
178
+ validation_alias="EMBEDDING_DLQ_MAX_BACKOFF_S",
179
+ )
180
+
181
+ post_ingest_slack_webhook_url: str | None = Field(
182
+ default=None,
183
+ validation_alias="POST_INGEST_SLACK_WEBHOOK_URL",
184
+ description="Slack incoming webhook URL (never logged)",
185
+ )
186
+ post_ingest_discord_webhook_url: str | None = Field(
187
+ default=None,
188
+ validation_alias="POST_INGEST_DISCORD_WEBHOOK_URL",
189
+ description="Discord webhook URL (never logged)",
190
+ )
191
+ post_ingest_slack_template: str = Field(
192
+ default=_POST_INGEST_SLACK_TEMPLATE_DEFAULT,
193
+ validation_alias="POST_INGEST_SLACK_TEMPLATE",
194
+ description="str.format template; placeholders: document_id, summary, ...",
195
+ )
196
+ post_ingest_discord_template: str = Field(
197
+ default=_POST_INGEST_DISCORD_TEMPLATE_DEFAULT,
198
+ validation_alias="POST_INGEST_DISCORD_TEMPLATE",
199
+ )
200
+ post_ingest_summary_max_chars: int = Field(
201
+ default=400,
202
+ ge=20,
203
+ le=8000,
204
+ validation_alias="POST_INGEST_SUMMARY_MAX_CHARS",
205
+ )
206
+
207
+ entity_use_spacy: bool = Field(default=True, validation_alias="ENTITY_USE_SPACY")
208
+ spacy_model: str = Field(
209
+ default="en_core_web_sm",
210
+ validation_alias="SPACY_MODEL",
211
+ )
212
+ entity_use_gliner: bool = Field(
213
+ default=False,
214
+ validation_alias="ENTITY_USE_GLINER",
215
+ )
216
+ gliner_model_id: str = Field(
217
+ default="urchade/gliner_medium-v2.1",
218
+ validation_alias="GLINER_MODEL_ID",
219
+ )
220
+ entity_llm_enabled: bool = Field(
221
+ default=False,
222
+ validation_alias="ENTITY_LLM_ENABLED",
223
+ )
224
+ entity_llm_min_mentions: int = Field(
225
+ default=2,
226
+ ge=0,
227
+ le=50,
228
+ validation_alias="ENTITY_LLM_MIN_MENTIONS",
229
+ )
230
+ entity_llm_on_all_chunks: bool = Field(
231
+ default=False,
232
+ validation_alias="ENTITY_LLM_ON_ALL_CHUNKS",
233
+ )
234
+ ollama_base_url: str = Field(
235
+ default="http://127.0.0.1:11434",
236
+ validation_alias="OLLAMA_BASE_URL",
237
+ )
238
+ ollama_entity_model: str = Field(
239
+ default="llama3.2",
240
+ validation_alias="OLLAMA_ENTITY_MODEL",
241
+ )
242
+ entity_extract_strict: bool = Field(
243
+ default=False,
244
+ validation_alias="ENTITY_EXTRACT_STRICT",
245
+ description="If true, entity extraction failure fails the embed job",
246
+ )
247
+
248
+ gemini_query_task_type: str = Field(
249
+ default="RETRIEVAL_QUERY",
250
+ validation_alias="GEMINI_QUERY_TASK_TYPE",
251
+ description="Gemini taskType for query embeddings",
252
+ )
253
+ gemini_chat_model: str = Field(
254
+ default="gemini-3-flash-preview",
255
+ validation_alias="GEMINI_CHAT_MODEL",
256
+ description=(
257
+ "Gemini API model id for POST /chat/.../complete (generateContent). "
258
+ "Older ids (e.g. gemini-1.5-flash) may return 404 on v1beta; see "
259
+ "https://ai.google.dev/gemini-api/docs/models and ListModels."
260
+ ),
261
+ )
262
+
263
+ @field_validator("gemini_embedding_model", "gemini_chat_model", mode="after")
264
+ @classmethod
265
+ def _normalize_gemini_model_env(cls, v: str) -> str:
266
+ """ListModels returns ``name`` like ``models/gemini-2.5-flash``; URLs need the suffix only."""
267
+ s = v.strip()
268
+ return s.removeprefix("models/")
269
+
270
+ retrieval_score_semantic_weight: float = Field(
271
+ default=0.7,
272
+ ge=0.0,
273
+ le=1.0,
274
+ validation_alias="RETRIEVAL_SCORE_SEMANTIC_WEIGHT",
275
+ )
276
+ retrieval_score_recency_weight: float = Field(
277
+ default=0.2,
278
+ ge=0.0,
279
+ le=1.0,
280
+ validation_alias="RETRIEVAL_SCORE_RECENCY_WEIGHT",
281
+ )
282
+ retrieval_score_source_weight: float = Field(
283
+ default=0.1,
284
+ ge=0.0,
285
+ le=1.0,
286
+ validation_alias="RETRIEVAL_SCORE_SOURCE_WEIGHT",
287
+ )
288
+ retrieval_recency_half_life_days: float = Field(
289
+ default=30.0,
290
+ gt=0.0,
291
+ validation_alias="RETRIEVAL_RECENCY_HALF_LIFE_DAYS",
292
+ description="Recency score halves every this many days since ingested_at",
293
+ )
294
+ retrieval_source_weights_json: str = Field(
295
+ default='{"default": 1.0}',
296
+ validation_alias="RETRIEVAL_SOURCE_WEIGHTS_JSON",
297
+ description=(
298
+ 'JSON map connector_type -> weight in [0,1] (e.g. {"default":1,"web":0.85})'
299
+ ),
300
+ )
301
+ retrieval_vec_candidate_multiplier: int = Field(
302
+ default=5,
303
+ ge=1,
304
+ le=50,
305
+ validation_alias="RETRIEVAL_VEC_CANDIDATE_MULTIPLIER",
306
+ description=(
307
+ "Vector search retrieves k * multiplier rows before score re-ranking"
308
+ ),
309
+ )
310
+
311
+ @field_validator(
312
+ "link_expand_domain_allowlist",
313
+ "link_expand_domain_denylist",
314
+ mode="before",
315
+ )
316
+ @classmethod
317
+ def _strip_csv(cls, v: object) -> str:
318
+ if v is None:
319
+ return ""
320
+ return str(v).strip()
321
+
322
+ @computed_field # type: ignore[prop-decorator]
323
+ @property
324
+ def sqlalchemy_database_url(self) -> str:
325
+ if self.database_url:
326
+ return self.database_url
327
+ path = (self.data_dir / self.sqlite_filename).resolve()
328
+ return f"sqlite+aiosqlite:///{path.as_posix()}"
329
+
330
+
331
+ @lru_cache
332
+ def get_settings() -> Settings:
333
+ return Settings()
334
+
335
+
336
+ def clear_settings_cache() -> None:
337
+ get_settings.cache_clear()
@@ -0,0 +1,13 @@
1
+ from app.connectors.registry import (
2
+ get_connector_class,
3
+ init_connectors,
4
+ list_connector_keys,
5
+ register_connector,
6
+ )
7
+
8
+ __all__ = [
9
+ "get_connector_class",
10
+ "init_connectors",
11
+ "list_connector_keys",
12
+ "register_connector",
13
+ ]