business-stack 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.python-version +1 -0
- package/backend/.env.example +65 -0
- package/backend/alembic/env.py +63 -0
- package/backend/alembic/script.py.mako +26 -0
- package/backend/alembic/versions/2a9c8f1d0e7b_multimodal_kb_schema.py +279 -0
- package/backend/alembic/versions/3c1d2e4f5a6b_sqlite_vec_embeddings.py +58 -0
- package/backend/alembic/versions/4e8b0c2d1a3f_document_links.py +50 -0
- package/backend/alembic/versions/6a0b1c2d3e4f_link_expansion_dedupe_columns.py +49 -0
- package/backend/alembic/versions/7d8e9f0a1b2c_document_chunks.py +70 -0
- package/backend/alembic/versions/8f2a1c0d9e3b_initial_empty_revision.py +22 -0
- package/backend/alembic/versions/9f0a1b2c3d4e_entity_mentions_cooccurrence.py +123 -0
- package/backend/alembic/versions/b1c2d3e4f5a6_pipeline_dedupe_dlq.py +99 -0
- package/backend/alembic/versions/c2d3e4f5061a_chat_sessions_messages.py +59 -0
- package/backend/alembic.ini +42 -0
- package/backend/app/__init__.py +0 -0
- package/backend/app/config.py +337 -0
- package/backend/app/connectors/__init__.py +13 -0
- package/backend/app/connectors/base.py +39 -0
- package/backend/app/connectors/builtins.py +51 -0
- package/backend/app/connectors/playwright_session.py +146 -0
- package/backend/app/connectors/registry.py +68 -0
- package/backend/app/connectors/thread_expansion/__init__.py +33 -0
- package/backend/app/connectors/thread_expansion/fakes.py +154 -0
- package/backend/app/connectors/thread_expansion/models.py +113 -0
- package/backend/app/connectors/thread_expansion/reddit.py +53 -0
- package/backend/app/connectors/thread_expansion/twitter.py +49 -0
- package/backend/app/db.py +5 -0
- package/backend/app/dependencies.py +34 -0
- package/backend/app/logging_config.py +35 -0
- package/backend/app/main.py +97 -0
- package/backend/app/middleware/__init__.py +0 -0
- package/backend/app/middleware/gateway_identity.py +17 -0
- package/backend/app/middleware/openapi_gateway.py +71 -0
- package/backend/app/middleware/request_id.py +23 -0
- package/backend/app/openapi_config.py +126 -0
- package/backend/app/routers/__init__.py +0 -0
- package/backend/app/routers/admin_pipeline.py +123 -0
- package/backend/app/routers/chat.py +206 -0
- package/backend/app/routers/chunks.py +36 -0
- package/backend/app/routers/entity_extract.py +31 -0
- package/backend/app/routers/example.py +8 -0
- package/backend/app/routers/gemini_embed.py +58 -0
- package/backend/app/routers/health.py +28 -0
- package/backend/app/routers/ingestion.py +146 -0
- package/backend/app/routers/link_expansion.py +34 -0
- package/backend/app/routers/pipeline_status.py +304 -0
- package/backend/app/routers/query.py +63 -0
- package/backend/app/routers/vectors.py +63 -0
- package/backend/app/schemas/__init__.py +0 -0
- package/backend/app/schemas/canonical.py +44 -0
- package/backend/app/schemas/chat.py +50 -0
- package/backend/app/schemas/ingest.py +29 -0
- package/backend/app/schemas/query.py +153 -0
- package/backend/app/schemas/vectors.py +56 -0
- package/backend/app/services/__init__.py +0 -0
- package/backend/app/services/chat_store.py +152 -0
- package/backend/app/services/chunking/__init__.py +3 -0
- package/backend/app/services/chunking/llm_boundaries.py +63 -0
- package/backend/app/services/chunking/schemas.py +30 -0
- package/backend/app/services/chunking/semantic_chunk.py +178 -0
- package/backend/app/services/chunking/splitters.py +214 -0
- package/backend/app/services/embeddings/__init__.py +20 -0
- package/backend/app/services/embeddings/build_inputs.py +140 -0
- package/backend/app/services/embeddings/dlq.py +128 -0
- package/backend/app/services/embeddings/gemini_api.py +207 -0
- package/backend/app/services/embeddings/persist.py +74 -0
- package/backend/app/services/embeddings/types.py +32 -0
- package/backend/app/services/embeddings/worker.py +224 -0
- package/backend/app/services/entities/__init__.py +12 -0
- package/backend/app/services/entities/gliner_extract.py +63 -0
- package/backend/app/services/entities/llm_extract.py +94 -0
- package/backend/app/services/entities/pipeline.py +179 -0
- package/backend/app/services/entities/spacy_extract.py +63 -0
- package/backend/app/services/entities/types.py +15 -0
- package/backend/app/services/gemini_chat.py +113 -0
- package/backend/app/services/hooks/__init__.py +3 -0
- package/backend/app/services/hooks/post_ingest.py +186 -0
- package/backend/app/services/ingestion/__init__.py +0 -0
- package/backend/app/services/ingestion/persist.py +188 -0
- package/backend/app/services/integrations_remote.py +91 -0
- package/backend/app/services/link_expansion/__init__.py +3 -0
- package/backend/app/services/link_expansion/canonical_url.py +45 -0
- package/backend/app/services/link_expansion/domain_policy.py +26 -0
- package/backend/app/services/link_expansion/html_extract.py +72 -0
- package/backend/app/services/link_expansion/rate_limit.py +32 -0
- package/backend/app/services/link_expansion/robots.py +46 -0
- package/backend/app/services/link_expansion/schemas.py +67 -0
- package/backend/app/services/link_expansion/worker.py +458 -0
- package/backend/app/services/normalization/__init__.py +7 -0
- package/backend/app/services/normalization/normalizer.py +331 -0
- package/backend/app/services/normalization/persist_normalized.py +67 -0
- package/backend/app/services/playwright_extract/__init__.py +13 -0
- package/backend/app/services/playwright_extract/__main__.py +96 -0
- package/backend/app/services/playwright_extract/extract.py +181 -0
- package/backend/app/services/retrieval_service.py +351 -0
- package/backend/app/sqlite_ext.py +36 -0
- package/backend/app/storage/__init__.py +3 -0
- package/backend/app/storage/blobs.py +30 -0
- package/backend/app/vectorstore/__init__.py +13 -0
- package/backend/app/vectorstore/sqlite_vec_store.py +242 -0
- package/backend/backend.egg-info/PKG-INFO +18 -0
- package/backend/backend.egg-info/SOURCES.txt +93 -0
- package/backend/backend.egg-info/dependency_links.txt +1 -0
- package/backend/backend.egg-info/entry_points.txt +2 -0
- package/backend/backend.egg-info/requires.txt +15 -0
- package/backend/backend.egg-info/top_level.txt +4 -0
- package/backend/package.json +15 -0
- package/backend/pyproject.toml +52 -0
- package/backend/tests/conftest.py +40 -0
- package/backend/tests/test_chat.py +92 -0
- package/backend/tests/test_chunking.py +132 -0
- package/backend/tests/test_entities.py +170 -0
- package/backend/tests/test_gemini_embed.py +224 -0
- package/backend/tests/test_health.py +24 -0
- package/backend/tests/test_ingest_raw.py +123 -0
- package/backend/tests/test_link_expansion.py +241 -0
- package/backend/tests/test_main.py +12 -0
- package/backend/tests/test_normalizer.py +114 -0
- package/backend/tests/test_openapi_gateway.py +40 -0
- package/backend/tests/test_pipeline_hardening.py +285 -0
- package/backend/tests/test_pipeline_status.py +71 -0
- package/backend/tests/test_playwright_extract.py +80 -0
- package/backend/tests/test_post_ingest_hooks.py +162 -0
- package/backend/tests/test_query.py +165 -0
- package/backend/tests/test_thread_expansion.py +72 -0
- package/backend/tests/test_vectors.py +85 -0
- package/backend/uv.lock +1839 -0
- package/bin/business-stack.cjs +412 -0
- package/frontend/web/.env.example +23 -0
- package/frontend/web/AGENTS.md +5 -0
- package/frontend/web/CLAUDE.md +1 -0
- package/frontend/web/README.md +36 -0
- package/frontend/web/components.json +25 -0
- package/frontend/web/next-env.d.ts +6 -0
- package/frontend/web/next.config.ts +30 -0
- package/frontend/web/package.json +65 -0
- package/frontend/web/postcss.config.mjs +7 -0
- package/frontend/web/skills-lock.json +35 -0
- package/frontend/web/src/app/account/[[...path]]/page.tsx +19 -0
- package/frontend/web/src/app/auth/[[...path]]/page.tsx +14 -0
- package/frontend/web/src/app/chat/page.tsx +725 -0
- package/frontend/web/src/app/favicon.ico +0 -0
- package/frontend/web/src/app/globals.css +563 -0
- package/frontend/web/src/app/layout.tsx +50 -0
- package/frontend/web/src/app/page.tsx +96 -0
- package/frontend/web/src/app/settings/integrations/actions.ts +74 -0
- package/frontend/web/src/app/settings/integrations/integrations-settings-form.tsx +330 -0
- package/frontend/web/src/app/settings/integrations/page.tsx +41 -0
- package/frontend/web/src/app/webhooks/alpha-alerts/route.ts +84 -0
- package/frontend/web/src/components/home-auth-panel.tsx +49 -0
- package/frontend/web/src/components/providers.tsx +50 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/registry.ts +35 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/types.ts +8 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.test.ts +40 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge-delivery.ts +78 -0
- package/frontend/web/src/lib/alpha-webhook/connectors/wabridge.ts +30 -0
- package/frontend/web/src/lib/alpha-webhook/handler.ts +12 -0
- package/frontend/web/src/lib/alpha-webhook/signature.test.ts +33 -0
- package/frontend/web/src/lib/alpha-webhook/signature.ts +21 -0
- package/frontend/web/src/lib/alpha-webhook/types.ts +23 -0
- package/frontend/web/src/lib/auth-client.ts +23 -0
- package/frontend/web/src/lib/integrations-config.ts +125 -0
- package/frontend/web/src/lib/ui-utills.tsx +90 -0
- package/frontend/web/src/lib/utils.ts +6 -0
- package/frontend/web/tsconfig.json +36 -0
- package/frontend/web/tsconfig.tsbuildinfo +1 -0
- package/frontend/web/vitest.config.ts +14 -0
- package/gateway/.env.example +23 -0
- package/gateway/README.md +13 -0
- package/gateway/package.json +24 -0
- package/gateway/src/auth.ts +49 -0
- package/gateway/src/index.ts +141 -0
- package/gateway/src/integrations/admin.ts +19 -0
- package/gateway/src/integrations/crypto.ts +52 -0
- package/gateway/src/integrations/handlers.ts +124 -0
- package/gateway/src/integrations/keys.ts +12 -0
- package/gateway/src/integrations/store.ts +106 -0
- package/gateway/src/stack-secrets.ts +35 -0
- package/gateway/tsconfig.json +13 -0
- package/package.json +33 -0
- package/turbo.json +27 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""entity_mentions (document + optional chunk) + entity_cooccurrence
|
|
2
|
+
|
|
3
|
+
Revision ID: 9f0a1b2c3d4e
|
|
4
|
+
Revises: 7d8e9f0a1b2c
|
|
5
|
+
Create Date: 2026-04-05
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "9f0a1b2c3d4e"
|
|
15
|
+
down_revision: str | None = "7d8e9f0a1b2c"
|
|
16
|
+
branch_labels: str | Sequence[str] | None = None
|
|
17
|
+
depends_on: str | Sequence[str] | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade() -> None:
|
|
21
|
+
op.execute(sa.text("PRAGMA foreign_keys=ON"))
|
|
22
|
+
op.create_table(
|
|
23
|
+
"entity_mentions",
|
|
24
|
+
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
|
25
|
+
sa.Column("document_id", sa.Text(), nullable=False),
|
|
26
|
+
sa.Column("entity_id", sa.Integer(), nullable=False),
|
|
27
|
+
sa.Column("document_chunk_id", sa.Integer(), nullable=False),
|
|
28
|
+
sa.Column("confidence", sa.Float(), nullable=True),
|
|
29
|
+
sa.ForeignKeyConstraint(
|
|
30
|
+
["document_id"],
|
|
31
|
+
["documents.id"],
|
|
32
|
+
name="fk_entity_mentions_document_id_documents",
|
|
33
|
+
ondelete="CASCADE",
|
|
34
|
+
),
|
|
35
|
+
sa.ForeignKeyConstraint(
|
|
36
|
+
["entity_id"],
|
|
37
|
+
["entities.id"],
|
|
38
|
+
name="fk_entity_mentions_entity_id_entities",
|
|
39
|
+
ondelete="CASCADE",
|
|
40
|
+
),
|
|
41
|
+
sa.ForeignKeyConstraint(
|
|
42
|
+
["document_chunk_id"],
|
|
43
|
+
["document_chunks.id"],
|
|
44
|
+
name="fk_entity_mentions_document_chunk_id_document_chunks",
|
|
45
|
+
ondelete="CASCADE",
|
|
46
|
+
),
|
|
47
|
+
sa.PrimaryKeyConstraint("id", name="pk_entity_mentions"),
|
|
48
|
+
sa.CheckConstraint(
|
|
49
|
+
"confidence IS NULL OR (confidence >= 0 AND confidence <= 1)",
|
|
50
|
+
name="ck_entity_mentions_confidence",
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
op.create_index(
|
|
54
|
+
"ix_entity_mentions_document",
|
|
55
|
+
"entity_mentions",
|
|
56
|
+
["document_id"],
|
|
57
|
+
)
|
|
58
|
+
op.create_index(
|
|
59
|
+
"ix_entity_mentions_entity",
|
|
60
|
+
"entity_mentions",
|
|
61
|
+
["entity_id"],
|
|
62
|
+
)
|
|
63
|
+
op.create_index(
|
|
64
|
+
"uq_entity_mentions_doc_entity_chunk",
|
|
65
|
+
"entity_mentions",
|
|
66
|
+
["document_id", "entity_id", "document_chunk_id"],
|
|
67
|
+
unique=True,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
op.create_table(
|
|
71
|
+
"entity_cooccurrence",
|
|
72
|
+
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
|
|
73
|
+
sa.Column("entity_low_id", sa.Integer(), nullable=False),
|
|
74
|
+
sa.Column("entity_high_id", sa.Integer(), nullable=False),
|
|
75
|
+
sa.Column("document_id", sa.Text(), nullable=False),
|
|
76
|
+
sa.Column("weight", sa.Float(), nullable=False, server_default="1.0"),
|
|
77
|
+
sa.ForeignKeyConstraint(
|
|
78
|
+
["document_id"],
|
|
79
|
+
["documents.id"],
|
|
80
|
+
name="fk_entity_cooccurrence_document_id_documents",
|
|
81
|
+
ondelete="CASCADE",
|
|
82
|
+
),
|
|
83
|
+
sa.ForeignKeyConstraint(
|
|
84
|
+
["entity_low_id"],
|
|
85
|
+
["entities.id"],
|
|
86
|
+
name="fk_entity_cooccurrence_entity_low_id_entities",
|
|
87
|
+
ondelete="CASCADE",
|
|
88
|
+
),
|
|
89
|
+
sa.ForeignKeyConstraint(
|
|
90
|
+
["entity_high_id"],
|
|
91
|
+
["entities.id"],
|
|
92
|
+
name="fk_entity_cooccurrence_entity_high_id_entities",
|
|
93
|
+
ondelete="CASCADE",
|
|
94
|
+
),
|
|
95
|
+
sa.PrimaryKeyConstraint("id", name="pk_entity_cooccurrence"),
|
|
96
|
+
sa.CheckConstraint(
|
|
97
|
+
"entity_low_id < entity_high_id",
|
|
98
|
+
name="ck_entity_cooccurrence_ordered_pair",
|
|
99
|
+
),
|
|
100
|
+
sa.UniqueConstraint(
|
|
101
|
+
"entity_low_id",
|
|
102
|
+
"entity_high_id",
|
|
103
|
+
"document_id",
|
|
104
|
+
name="uq_entity_cooccurrence_triple",
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
op.create_index(
|
|
108
|
+
"ix_entity_cooccurrence_document",
|
|
109
|
+
"entity_cooccurrence",
|
|
110
|
+
["document_id"],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def downgrade() -> None:
|
|
115
|
+
op.drop_index("ix_entity_cooccurrence_document", table_name="entity_cooccurrence")
|
|
116
|
+
op.drop_table("entity_cooccurrence")
|
|
117
|
+
op.drop_index(
|
|
118
|
+
"uq_entity_mentions_doc_entity_chunk",
|
|
119
|
+
table_name="entity_mentions",
|
|
120
|
+
)
|
|
121
|
+
op.drop_index("ix_entity_mentions_entity", table_name="entity_mentions")
|
|
122
|
+
op.drop_index("ix_entity_mentions_document", table_name="entity_mentions")
|
|
123
|
+
op.drop_table("entity_mentions")
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""documents dedupe + normalization_error; embedding_dlq
|
|
2
|
+
|
|
3
|
+
Revision ID: b1c2d3e4f5a6
|
|
4
|
+
Revises: 9f0a1b2c3d4e
|
|
5
|
+
Create Date: 2026-04-05
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "b1c2d3e4f5a6"
|
|
15
|
+
down_revision: str | None = "9f0a1b2c3d4e"
|
|
16
|
+
branch_labels: str | Sequence[str] | None = None
|
|
17
|
+
depends_on: str | Sequence[str] | None = None
|
|
18
|
+
|
|
19
|
+
_TS_DEFAULT = sa.text("(strftime('%Y-%m-%dT%H:%M:%fZ','now'))")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
op.execute(sa.text("PRAGMA foreign_keys=ON"))
|
|
24
|
+
with op.batch_alter_table("documents") as batch:
|
|
25
|
+
batch.add_column(sa.Column("external_id", sa.Text(), nullable=True))
|
|
26
|
+
batch.add_column(sa.Column("dedupe_content_hash", sa.Text(), nullable=True))
|
|
27
|
+
batch.add_column(sa.Column("normalization_error", sa.Text(), nullable=True))
|
|
28
|
+
|
|
29
|
+
op.create_index("ix_documents_source_external", "documents", ["source_id", "external_id"])
|
|
30
|
+
op.execute(
|
|
31
|
+
sa.text(
|
|
32
|
+
"CREATE UNIQUE INDEX uq_documents_source_external_id "
|
|
33
|
+
"ON documents (source_id, external_id) "
|
|
34
|
+
"WHERE external_id IS NOT NULL AND trim(external_id) != ''",
|
|
35
|
+
),
|
|
36
|
+
)
|
|
37
|
+
op.create_index(
|
|
38
|
+
"ix_documents_source_dedupe_hash",
|
|
39
|
+
"documents",
|
|
40
|
+
["source_id", "dedupe_content_hash"],
|
|
41
|
+
)
|
|
42
|
+
op.execute(
|
|
43
|
+
sa.text(
|
|
44
|
+
"CREATE UNIQUE INDEX uq_documents_source_dedupe_hash "
|
|
45
|
+
"ON documents (source_id, dedupe_content_hash) "
|
|
46
|
+
"WHERE dedupe_content_hash IS NOT NULL AND trim(dedupe_content_hash) != ''",
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
op.create_table(
|
|
51
|
+
"embedding_dlq",
|
|
52
|
+
sa.Column("document_id", sa.Text(), nullable=False),
|
|
53
|
+
sa.Column("last_error", sa.Text(), nullable=False),
|
|
54
|
+
sa.Column("attempt_count", sa.Integer(), nullable=False, server_default="0"),
|
|
55
|
+
sa.Column("next_retry_at", sa.Text(), nullable=True),
|
|
56
|
+
sa.Column(
|
|
57
|
+
"state",
|
|
58
|
+
sa.Text(),
|
|
59
|
+
nullable=False,
|
|
60
|
+
server_default="pending_retry",
|
|
61
|
+
),
|
|
62
|
+
sa.Column(
|
|
63
|
+
"multimodal",
|
|
64
|
+
sa.Integer(),
|
|
65
|
+
nullable=False,
|
|
66
|
+
server_default="0",
|
|
67
|
+
),
|
|
68
|
+
sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
|
|
69
|
+
sa.Column("updated_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
|
|
70
|
+
sa.CheckConstraint(
|
|
71
|
+
"state IN ('pending_retry','dead')",
|
|
72
|
+
name="ck_embedding_dlq_state",
|
|
73
|
+
),
|
|
74
|
+
sa.ForeignKeyConstraint(
|
|
75
|
+
["document_id"],
|
|
76
|
+
["documents.id"],
|
|
77
|
+
name="fk_embedding_dlq_document_id_documents",
|
|
78
|
+
ondelete="CASCADE",
|
|
79
|
+
),
|
|
80
|
+
sa.PrimaryKeyConstraint("document_id", name="pk_embedding_dlq"),
|
|
81
|
+
)
|
|
82
|
+
op.create_index(
|
|
83
|
+
"ix_embedding_dlq_next_retry",
|
|
84
|
+
"embedding_dlq",
|
|
85
|
+
["next_retry_at", "state"],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def downgrade() -> None:
|
|
90
|
+
op.drop_index("ix_embedding_dlq_next_retry", table_name="embedding_dlq")
|
|
91
|
+
op.drop_table("embedding_dlq")
|
|
92
|
+
op.execute(sa.text("DROP INDEX IF EXISTS uq_documents_source_dedupe_hash"))
|
|
93
|
+
op.drop_index("ix_documents_source_dedupe_hash", table_name="documents")
|
|
94
|
+
op.execute(sa.text("DROP INDEX IF EXISTS uq_documents_source_external_id"))
|
|
95
|
+
op.drop_index("ix_documents_source_external", table_name="documents")
|
|
96
|
+
with op.batch_alter_table("documents") as batch:
|
|
97
|
+
batch.drop_column("normalization_error")
|
|
98
|
+
batch.drop_column("dedupe_content_hash")
|
|
99
|
+
batch.drop_column("external_id")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""chat_sessions and chat_messages for UI chat history
|
|
2
|
+
|
|
3
|
+
Revision ID: c2d3e4f5061a
|
|
4
|
+
Revises: b1c2d3e4f5a6
|
|
5
|
+
Create Date: 2026-04-05
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "c2d3e4f5061a"
|
|
15
|
+
down_revision: str | None = "b1c2d3e4f5a6"
|
|
16
|
+
branch_labels: str | Sequence[str] | None = None
|
|
17
|
+
depends_on: str | Sequence[str] | None = None
|
|
18
|
+
|
|
19
|
+
_TS_DEFAULT = sa.text("(strftime('%Y-%m-%dT%H:%M:%fZ','now'))")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
op.execute(sa.text("PRAGMA foreign_keys=ON"))
|
|
24
|
+
op.create_table(
|
|
25
|
+
"chat_sessions",
|
|
26
|
+
sa.Column("id", sa.Text(), primary_key=True, nullable=False),
|
|
27
|
+
sa.Column("user_id", sa.Text(), nullable=False),
|
|
28
|
+
sa.Column("title", sa.Text(), nullable=True),
|
|
29
|
+
sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
|
|
30
|
+
sa.Column("updated_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
|
|
31
|
+
)
|
|
32
|
+
op.create_index("ix_chat_sessions_user_id", "chat_sessions", ["user_id"])
|
|
33
|
+
|
|
34
|
+
op.create_table(
|
|
35
|
+
"chat_messages",
|
|
36
|
+
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
|
|
37
|
+
sa.Column("session_id", sa.Text(), nullable=False),
|
|
38
|
+
sa.Column("role", sa.Text(), nullable=False),
|
|
39
|
+
sa.Column("content", sa.Text(), nullable=False),
|
|
40
|
+
sa.Column("meta_json", sa.Text(), nullable=True),
|
|
41
|
+
sa.Column("created_at", sa.Text(), nullable=False, server_default=_TS_DEFAULT),
|
|
42
|
+
sa.ForeignKeyConstraint(
|
|
43
|
+
["session_id"],
|
|
44
|
+
["chat_sessions.id"],
|
|
45
|
+
ondelete="CASCADE",
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
op.create_index(
|
|
49
|
+
"ix_chat_messages_session_created",
|
|
50
|
+
"chat_messages",
|
|
51
|
+
["session_id", "id"],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def downgrade() -> None:
|
|
56
|
+
op.drop_index("ix_chat_messages_session_created", table_name="chat_messages")
|
|
57
|
+
op.drop_table("chat_messages")
|
|
58
|
+
op.drop_index("ix_chat_sessions_user_id", table_name="chat_sessions")
|
|
59
|
+
op.drop_table("chat_sessions")
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[alembic]
|
|
2
|
+
script_location = alembic
|
|
3
|
+
prepend_sys_path = .
|
|
4
|
+
version_path_separator = os
|
|
5
|
+
|
|
6
|
+
sqlalchemy.url = sqlite+aiosqlite:///./placeholder.sqlite
|
|
7
|
+
|
|
8
|
+
[post_write_hooks]
|
|
9
|
+
|
|
10
|
+
[loggers]
|
|
11
|
+
keys = root,sqlalchemy,alembic
|
|
12
|
+
|
|
13
|
+
[handlers]
|
|
14
|
+
keys = console
|
|
15
|
+
|
|
16
|
+
[formatters]
|
|
17
|
+
keys = generic
|
|
18
|
+
|
|
19
|
+
[logger_root]
|
|
20
|
+
level = WARN
|
|
21
|
+
handlers = console
|
|
22
|
+
qualname =
|
|
23
|
+
|
|
24
|
+
[logger_sqlalchemy]
|
|
25
|
+
level = WARN
|
|
26
|
+
handlers =
|
|
27
|
+
qualname = sqlalchemy.engine
|
|
28
|
+
|
|
29
|
+
[logger_alembic]
|
|
30
|
+
level = INFO
|
|
31
|
+
handlers =
|
|
32
|
+
qualname = alembic
|
|
33
|
+
|
|
34
|
+
[handler_console]
|
|
35
|
+
class = StreamHandler
|
|
36
|
+
args = (sys.stderr,)
|
|
37
|
+
level = NOTSET
|
|
38
|
+
formatter = generic
|
|
39
|
+
|
|
40
|
+
[formatter_generic]
|
|
41
|
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
42
|
+
datefmt = %H:%M:%S
|
|
File without changes
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from pydantic import Field, computed_field, field_validator
|
|
5
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
6
|
+
|
|
7
|
+
_POST_INGEST_SLACK_TEMPLATE_DEFAULT = (
|
|
8
|
+
":white_check_mark: *KB document ready*\n"
|
|
9
|
+
"*Summary:* {summary_short}\n"
|
|
10
|
+
"*Link:* {source_link}\n"
|
|
11
|
+
"*Source:* {source_name}\n"
|
|
12
|
+
"*Id:* `{document_id}`"
|
|
13
|
+
)
|
|
14
|
+
_POST_INGEST_DISCORD_TEMPLATE_DEFAULT = (
|
|
15
|
+
"**KB document ready**\n"
|
|
16
|
+
"**Summary:** {summary_short}\n"
|
|
17
|
+
"**Link:** {source_link}\n"
|
|
18
|
+
"**Source:** {source_name}\n"
|
|
19
|
+
"**Id:** `{document_id}`"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Settings(BaseSettings):
|
|
24
|
+
model_config = SettingsConfigDict(
|
|
25
|
+
env_file=".env",
|
|
26
|
+
env_file_encoding="utf-8",
|
|
27
|
+
extra="ignore",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
data_dir: Path = Field(default=Path("data"), validation_alias="DATA_DIR")
|
|
31
|
+
sqlite_filename: str = Field(
|
|
32
|
+
default="rag.sqlite",
|
|
33
|
+
validation_alias="SQLITE_FILENAME",
|
|
34
|
+
)
|
|
35
|
+
database_url: str | None = Field(default=None, validation_alias="DATABASE_URL")
|
|
36
|
+
|
|
37
|
+
openai_api_key: str | None = Field(default=None, validation_alias="OPENAI_API_KEY")
|
|
38
|
+
embedding_api_key: str | None = Field(
|
|
39
|
+
default=None,
|
|
40
|
+
validation_alias="EMBEDDING_API_KEY",
|
|
41
|
+
)
|
|
42
|
+
embedding_model: str = Field(
|
|
43
|
+
default="text-embedding-3-small",
|
|
44
|
+
validation_alias="EMBEDDING_MODEL",
|
|
45
|
+
)
|
|
46
|
+
vector_embedding_dim: int = Field(
|
|
47
|
+
default=1536,
|
|
48
|
+
validation_alias="VECTOR_EMBEDDING_DIM",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
log_level: str = Field(default="INFO", validation_alias="LOG_LEVEL")
|
|
52
|
+
log_json: bool = Field(default=False, validation_alias="LOG_JSON")
|
|
53
|
+
|
|
54
|
+
backend_gateway_secret: str | None = Field(
|
|
55
|
+
default=None,
|
|
56
|
+
validation_alias="BACKEND_GATEWAY_SECRET",
|
|
57
|
+
description=(
|
|
58
|
+
"When set, /docs, /redoc, /openapi.json, and /docs/* require X-Gateway-Secret to match. "
|
|
59
|
+
"Configure the same value on the Hono gateway so docs are not usable by calling the "
|
|
60
|
+
"backend directly."
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
link_expand_max_depth: int = Field(
|
|
65
|
+
default=2,
|
|
66
|
+
ge=1,
|
|
67
|
+
le=32,
|
|
68
|
+
validation_alias="LINK_EXPAND_MAX_DEPTH",
|
|
69
|
+
)
|
|
70
|
+
link_expand_domain_allowlist: str = Field(
|
|
71
|
+
default="",
|
|
72
|
+
validation_alias="LINK_EXPAND_DOMAIN_ALLOWLIST",
|
|
73
|
+
description="Comma-separated hostnames; empty = all (except denylist)",
|
|
74
|
+
)
|
|
75
|
+
link_expand_domain_denylist: str = Field(
|
|
76
|
+
default="",
|
|
77
|
+
validation_alias="LINK_EXPAND_DOMAIN_DENYLIST",
|
|
78
|
+
description="Comma-separated hostnames to block",
|
|
79
|
+
)
|
|
80
|
+
link_expand_politeness_delay_ms: int = Field(
|
|
81
|
+
default=750,
|
|
82
|
+
ge=0,
|
|
83
|
+
validation_alias="LINK_EXPAND_POLITENESS_DELAY_MS",
|
|
84
|
+
)
|
|
85
|
+
link_expand_per_domain_interval_ms: int = Field(
|
|
86
|
+
default=1500,
|
|
87
|
+
ge=0,
|
|
88
|
+
validation_alias="LINK_EXPAND_PER_DOMAIN_INTERVAL_MS",
|
|
89
|
+
)
|
|
90
|
+
link_expand_respect_robots: bool = Field(
|
|
91
|
+
default=True,
|
|
92
|
+
validation_alias="LINK_EXPAND_RESPECT_ROBOTS",
|
|
93
|
+
)
|
|
94
|
+
link_expand_user_agent: str = Field(
|
|
95
|
+
default="BusinessStackLinkBot/0.1 (+https://example.invalid/bot)",
|
|
96
|
+
validation_alias="LINK_EXPAND_USER_AGENT",
|
|
97
|
+
)
|
|
98
|
+
link_expand_max_response_bytes: int = Field(
|
|
99
|
+
default=5_242_880,
|
|
100
|
+
ge=4096,
|
|
101
|
+
validation_alias="LINK_EXPAND_MAX_RESPONSE_BYTES",
|
|
102
|
+
)
|
|
103
|
+
link_expand_fetch_timeout_s: float = Field(
|
|
104
|
+
default=30.0,
|
|
105
|
+
ge=1.0,
|
|
106
|
+
validation_alias="LINK_EXPAND_FETCH_TIMEOUT_S",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
chunk_llm_model: str = Field(
|
|
110
|
+
default="gpt-4o-mini",
|
|
111
|
+
validation_alias="CHUNK_LLM_MODEL",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
gemini_api_key: str | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
validation_alias="GEMINI_API_KEY",
|
|
117
|
+
description="Overrides integration store when set.",
|
|
118
|
+
)
|
|
119
|
+
integrations_gateway_url: str | None = Field(
|
|
120
|
+
default=None,
|
|
121
|
+
validation_alias="INTEGRATIONS_GATEWAY_URL",
|
|
122
|
+
description=(
|
|
123
|
+
"Hono gateway origin for GET /internal/integrations "
|
|
124
|
+
"(e.g. http://127.0.0.1:3001). Used when GEMINI_API_KEY is unset."
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
integrations_internal_secret: str | None = Field(
|
|
128
|
+
default=None,
|
|
129
|
+
validation_alias="INTEGRATIONS_INTERNAL_SECRET",
|
|
130
|
+
description="Bearer shared with gateway INTEGRATIONS_INTERNAL_SECRET.",
|
|
131
|
+
)
|
|
132
|
+
gemini_embedding_model: str = Field(
|
|
133
|
+
default="gemini-embedding-001",
|
|
134
|
+
validation_alias="GEMINI_EMBEDDING_MODEL",
|
|
135
|
+
description=(
|
|
136
|
+
"Gemini API embedding model id (Gemini Embedding / console 'Gemini Embedding 1': "
|
|
137
|
+
"gemini-embedding-001 per https://ai.google.dev/gemini-api/docs/models/gemini-embedding-001 )"
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
gemini_embed_batch_size: int = Field(
|
|
141
|
+
default=32,
|
|
142
|
+
ge=1,
|
|
143
|
+
le=100,
|
|
144
|
+
validation_alias="GEMINI_EMBED_BATCH_SIZE",
|
|
145
|
+
)
|
|
146
|
+
gemini_embed_max_retries: int = Field(
|
|
147
|
+
default=4,
|
|
148
|
+
ge=1,
|
|
149
|
+
le=12,
|
|
150
|
+
validation_alias="GEMINI_EMBED_MAX_RETRIES",
|
|
151
|
+
)
|
|
152
|
+
gemini_embed_base_delay_s: float = Field(
|
|
153
|
+
default=1.0,
|
|
154
|
+
ge=0.1,
|
|
155
|
+
validation_alias="GEMINI_EMBED_BASE_DELAY_S",
|
|
156
|
+
)
|
|
157
|
+
gemini_embed_task_type: str = Field(
|
|
158
|
+
default="RETRIEVAL_DOCUMENT",
|
|
159
|
+
validation_alias="GEMINI_EMBED_TASK_TYPE",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
embedding_dlq_max_attempts: int = Field(
|
|
163
|
+
default=5,
|
|
164
|
+
ge=1,
|
|
165
|
+
le=100,
|
|
166
|
+
validation_alias="EMBEDDING_DLQ_MAX_ATTEMPTS",
|
|
167
|
+
description="After this many failed embed jobs, row moves to dead state",
|
|
168
|
+
)
|
|
169
|
+
embedding_dlq_base_delay_s: float = Field(
|
|
170
|
+
default=60.0,
|
|
171
|
+
ge=1.0,
|
|
172
|
+
validation_alias="EMBEDDING_DLQ_BASE_DELAY_S",
|
|
173
|
+
description="Initial backoff; doubles each attempt (capped)",
|
|
174
|
+
)
|
|
175
|
+
embedding_dlq_max_backoff_s: float = Field(
|
|
176
|
+
default=3600.0,
|
|
177
|
+
ge=60.0,
|
|
178
|
+
validation_alias="EMBEDDING_DLQ_MAX_BACKOFF_S",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
post_ingest_slack_webhook_url: str | None = Field(
|
|
182
|
+
default=None,
|
|
183
|
+
validation_alias="POST_INGEST_SLACK_WEBHOOK_URL",
|
|
184
|
+
description="Slack incoming webhook URL (never logged)",
|
|
185
|
+
)
|
|
186
|
+
post_ingest_discord_webhook_url: str | None = Field(
|
|
187
|
+
default=None,
|
|
188
|
+
validation_alias="POST_INGEST_DISCORD_WEBHOOK_URL",
|
|
189
|
+
description="Discord webhook URL (never logged)",
|
|
190
|
+
)
|
|
191
|
+
post_ingest_slack_template: str = Field(
|
|
192
|
+
default=_POST_INGEST_SLACK_TEMPLATE_DEFAULT,
|
|
193
|
+
validation_alias="POST_INGEST_SLACK_TEMPLATE",
|
|
194
|
+
description="str.format template; placeholders: document_id, summary, ...",
|
|
195
|
+
)
|
|
196
|
+
post_ingest_discord_template: str = Field(
|
|
197
|
+
default=_POST_INGEST_DISCORD_TEMPLATE_DEFAULT,
|
|
198
|
+
validation_alias="POST_INGEST_DISCORD_TEMPLATE",
|
|
199
|
+
)
|
|
200
|
+
post_ingest_summary_max_chars: int = Field(
|
|
201
|
+
default=400,
|
|
202
|
+
ge=20,
|
|
203
|
+
le=8000,
|
|
204
|
+
validation_alias="POST_INGEST_SUMMARY_MAX_CHARS",
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
entity_use_spacy: bool = Field(default=True, validation_alias="ENTITY_USE_SPACY")
|
|
208
|
+
spacy_model: str = Field(
|
|
209
|
+
default="en_core_web_sm",
|
|
210
|
+
validation_alias="SPACY_MODEL",
|
|
211
|
+
)
|
|
212
|
+
entity_use_gliner: bool = Field(
|
|
213
|
+
default=False,
|
|
214
|
+
validation_alias="ENTITY_USE_GLINER",
|
|
215
|
+
)
|
|
216
|
+
gliner_model_id: str = Field(
|
|
217
|
+
default="urchade/gliner_medium-v2.1",
|
|
218
|
+
validation_alias="GLINER_MODEL_ID",
|
|
219
|
+
)
|
|
220
|
+
entity_llm_enabled: bool = Field(
|
|
221
|
+
default=False,
|
|
222
|
+
validation_alias="ENTITY_LLM_ENABLED",
|
|
223
|
+
)
|
|
224
|
+
entity_llm_min_mentions: int = Field(
|
|
225
|
+
default=2,
|
|
226
|
+
ge=0,
|
|
227
|
+
le=50,
|
|
228
|
+
validation_alias="ENTITY_LLM_MIN_MENTIONS",
|
|
229
|
+
)
|
|
230
|
+
entity_llm_on_all_chunks: bool = Field(
|
|
231
|
+
default=False,
|
|
232
|
+
validation_alias="ENTITY_LLM_ON_ALL_CHUNKS",
|
|
233
|
+
)
|
|
234
|
+
ollama_base_url: str = Field(
|
|
235
|
+
default="http://127.0.0.1:11434",
|
|
236
|
+
validation_alias="OLLAMA_BASE_URL",
|
|
237
|
+
)
|
|
238
|
+
ollama_entity_model: str = Field(
|
|
239
|
+
default="llama3.2",
|
|
240
|
+
validation_alias="OLLAMA_ENTITY_MODEL",
|
|
241
|
+
)
|
|
242
|
+
entity_extract_strict: bool = Field(
|
|
243
|
+
default=False,
|
|
244
|
+
validation_alias="ENTITY_EXTRACT_STRICT",
|
|
245
|
+
description="If true, entity extraction failure fails the embed job",
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
gemini_query_task_type: str = Field(
|
|
249
|
+
default="RETRIEVAL_QUERY",
|
|
250
|
+
validation_alias="GEMINI_QUERY_TASK_TYPE",
|
|
251
|
+
description="Gemini taskType for query embeddings",
|
|
252
|
+
)
|
|
253
|
+
gemini_chat_model: str = Field(
|
|
254
|
+
default="gemini-3-flash-preview",
|
|
255
|
+
validation_alias="GEMINI_CHAT_MODEL",
|
|
256
|
+
description=(
|
|
257
|
+
"Gemini API model id for POST /chat/.../complete (generateContent). "
|
|
258
|
+
"Older ids (e.g. gemini-1.5-flash) may return 404 on v1beta; see "
|
|
259
|
+
"https://ai.google.dev/gemini-api/docs/models and ListModels."
|
|
260
|
+
),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
@field_validator("gemini_embedding_model", "gemini_chat_model", mode="after")
|
|
264
|
+
@classmethod
|
|
265
|
+
def _normalize_gemini_model_env(cls, v: str) -> str:
|
|
266
|
+
"""ListModels returns ``name`` like ``models/gemini-2.5-flash``; URLs need the suffix only."""
|
|
267
|
+
s = v.strip()
|
|
268
|
+
return s.removeprefix("models/")
|
|
269
|
+
|
|
270
|
+
retrieval_score_semantic_weight: float = Field(
|
|
271
|
+
default=0.7,
|
|
272
|
+
ge=0.0,
|
|
273
|
+
le=1.0,
|
|
274
|
+
validation_alias="RETRIEVAL_SCORE_SEMANTIC_WEIGHT",
|
|
275
|
+
)
|
|
276
|
+
retrieval_score_recency_weight: float = Field(
|
|
277
|
+
default=0.2,
|
|
278
|
+
ge=0.0,
|
|
279
|
+
le=1.0,
|
|
280
|
+
validation_alias="RETRIEVAL_SCORE_RECENCY_WEIGHT",
|
|
281
|
+
)
|
|
282
|
+
retrieval_score_source_weight: float = Field(
|
|
283
|
+
default=0.1,
|
|
284
|
+
ge=0.0,
|
|
285
|
+
le=1.0,
|
|
286
|
+
validation_alias="RETRIEVAL_SCORE_SOURCE_WEIGHT",
|
|
287
|
+
)
|
|
288
|
+
retrieval_recency_half_life_days: float = Field(
|
|
289
|
+
default=30.0,
|
|
290
|
+
gt=0.0,
|
|
291
|
+
validation_alias="RETRIEVAL_RECENCY_HALF_LIFE_DAYS",
|
|
292
|
+
description="Recency score halves every this many days since ingested_at",
|
|
293
|
+
)
|
|
294
|
+
retrieval_source_weights_json: str = Field(
|
|
295
|
+
default='{"default": 1.0}',
|
|
296
|
+
validation_alias="RETRIEVAL_SOURCE_WEIGHTS_JSON",
|
|
297
|
+
description=(
|
|
298
|
+
'JSON map connector_type -> weight in [0,1] (e.g. {"default":1,"web":0.85})'
|
|
299
|
+
),
|
|
300
|
+
)
|
|
301
|
+
retrieval_vec_candidate_multiplier: int = Field(
|
|
302
|
+
default=5,
|
|
303
|
+
ge=1,
|
|
304
|
+
le=50,
|
|
305
|
+
validation_alias="RETRIEVAL_VEC_CANDIDATE_MULTIPLIER",
|
|
306
|
+
description=(
|
|
307
|
+
"Vector search retrieves k * multiplier rows before score re-ranking"
|
|
308
|
+
),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@field_validator(
|
|
312
|
+
"link_expand_domain_allowlist",
|
|
313
|
+
"link_expand_domain_denylist",
|
|
314
|
+
mode="before",
|
|
315
|
+
)
|
|
316
|
+
@classmethod
|
|
317
|
+
def _strip_csv(cls, v: object) -> str:
|
|
318
|
+
if v is None:
|
|
319
|
+
return ""
|
|
320
|
+
return str(v).strip()
|
|
321
|
+
|
|
322
|
+
@computed_field # type: ignore[prop-decorator]
|
|
323
|
+
@property
|
|
324
|
+
def sqlalchemy_database_url(self) -> str:
|
|
325
|
+
if self.database_url:
|
|
326
|
+
return self.database_url
|
|
327
|
+
path = (self.data_dir / self.sqlite_filename).resolve()
|
|
328
|
+
return f"sqlite+aiosqlite:///{path.as_posix()}"
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
@lru_cache
|
|
332
|
+
def get_settings() -> Settings:
|
|
333
|
+
return Settings()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def clear_settings_cache() -> None:
|
|
337
|
+
get_settings.cache_clear()
|