datahub-analytics-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datahub_analytics_agent-0.1.0/.env.example +58 -0
- datahub_analytics_agent-0.1.0/.github/workflows/ci.yml +120 -0
- datahub_analytics_agent-0.1.0/.github/workflows/docker.yml +57 -0
- datahub_analytics_agent-0.1.0/.github/workflows/publish.yml +67 -0
- datahub_analytics_agent-0.1.0/.gitignore +36 -0
- datahub_analytics_agent-0.1.0/.pre-commit-config.yaml +9 -0
- datahub_analytics_agent-0.1.0/AGENTS.md +290 -0
- datahub_analytics_agent-0.1.0/CLAUDE.md +66 -0
- datahub_analytics_agent-0.1.0/PKG-INFO +328 -0
- datahub_analytics_agent-0.1.0/README.md +270 -0
- datahub_analytics_agent-0.1.0/alembic.ini +38 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/analysis.py +149 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/chart_generator.py +70 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/chart_tool.py +103 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/compaction.py +57 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/compactor_registry.py +22 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/graph.py +121 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/history.py +159 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/llm.py +87 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/mock_llm.py +111 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/state.py +13 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/streaming.py +304 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/__init__.py +135 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/chat.py +439 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/conversations.py +244 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/oauth.py +741 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/settings.py +1947 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/config.py +236 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/base.py +26 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/datahub.py +242 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/mcp_platform.py +123 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/native_datahub.py +58 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/registry.py +84 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/env.py +49 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/script.py.mako +25 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/001_init.py +47 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/002_settings_table.py +30 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/003_integrations.py +52 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/004_conversation_quality.py +28 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/005_context_platforms.py +36 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/base.py +33 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/models.py +137 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/repository.py +294 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/types.py +69 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/base.py +30 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/factory.py +95 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/mcp/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/mcp/engine.py +78 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/resolver.py +84 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/snowflake/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/snowflake/engine.py +304 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/sqlalchemy/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/sqlalchemy/engine.py +163 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/main.py +536 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/chart.py +101 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/system.py +33 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/system_prompt.md +184 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/datahub_skills.py +409 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/improve-context/SKILL.md +73 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/loader.py +162 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/publish-analysis/SKILL.md +99 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/save-correction/SKILL.md +161 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/search-business-context/SKILL.md +109 -0
- datahub_analytics_agent-0.1.0/backend/src/analytics_agent/tracing.py +88 -0
- datahub_analytics_agent-0.1.0/config.demo.yaml +17 -0
- datahub_analytics_agent-0.1.0/config.yaml.example +41 -0
- datahub_analytics_agent-0.1.0/docker/Dockerfile +32 -0
- datahub_analytics_agent-0.1.0/docker-compose.yml +34 -0
- datahub_analytics_agent-0.1.0/frontend/eslint.config.js +31 -0
- datahub_analytics_agent-0.1.0/frontend/index.html +13 -0
- datahub_analytics_agent-0.1.0/frontend/package.json +51 -0
- datahub_analytics_agent-0.1.0/frontend/pnpm-lock.yaml +4768 -0
- datahub_analytics_agent-0.1.0/frontend/postcss.config.js +6 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-favicon.svg +12 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-color.svg +17 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-dark-bg.svg +12 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-mono.svg +12 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-white.svg +12 -0
- datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-wordmark.svg +17 -0
- datahub_analytics_agent-0.1.0/frontend/src/App.tsx +111 -0
- datahub_analytics_agent-0.1.0/frontend/src/api/conversations.ts +61 -0
- datahub_analytics_agent-0.1.0/frontend/src/api/oauth.ts +118 -0
- datahub_analytics_agent-0.1.0/frontend/src/api/settings.ts +322 -0
- datahub_analytics_agent-0.1.0/frontend/src/api/stream.ts +74 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Brand/AppLogo.tsx +34 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Brand/DataHubBadge.tsx +40 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Brand/ThemeSwitcher.tsx +42 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/ChatView.tsx +423 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/ContextStatusBar.tsx +158 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/EngineSelector.tsx +31 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/MessageInput.tsx +112 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/MessageList.tsx +72 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/WelcomeView.tsx +204 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/AgentWorkBlock.tsx +210 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ChartMessage.tsx +79 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ErrorMessage.tsx +14 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/SqlMessage.tsx +81 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/TextMessage.tsx +106 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ThinkingMessage.tsx +179 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/TokenBadge.tsx +104 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ToolCallMessage.tsx +66 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Onboarding/OnboardingWizard.tsx +624 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/ModelSection.tsx +454 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/SettingsModal.tsx +1371 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/SnowflakeAuthSection.tsx +760 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/AddConnectionFlow.tsx +61 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/GenericMcpForm.tsx +194 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/SimpleFormShell.tsx +198 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/TypePicker.tsx +129 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/fields/ArrayField.tsx +59 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/fields/KeyValueField.tsx +70 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/helpers.tsx +200 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/index.ts +29 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/custom-mcp.tsx +22 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/datahub-mcp.tsx +72 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/datahub.tsx +30 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/mysql.tsx +28 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/postgresql.tsx +28 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/snowflake-mcp.tsx +12 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/snowflake.tsx +88 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/sqlite.tsx +25 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/types.ts +52 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Sidebar/ConversationItem.tsx +38 -0
- datahub_analytics_agent-0.1.0/frontend/src/components/Sidebar/Sidebar.tsx +77 -0
- datahub_analytics_agent-0.1.0/frontend/src/index.css +282 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/buildUiMessages.test.ts +193 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/groupMessages.test.ts +138 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/stress.test.ts +345 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/buildUiMessages.ts +124 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/groupMessages.ts +64 -0
- datahub_analytics_agent-0.1.0/frontend/src/lib/starterPrompts.ts +222 -0
- datahub_analytics_agent-0.1.0/frontend/src/main.tsx +10 -0
- datahub_analytics_agent-0.1.0/frontend/src/store/connectionSettings.ts +100 -0
- datahub_analytics_agent-0.1.0/frontend/src/store/conversations.ts +159 -0
- datahub_analytics_agent-0.1.0/frontend/src/store/display.ts +25 -0
- datahub_analytics_agent-0.1.0/frontend/src/store/theme.ts +32 -0
- datahub_analytics_agent-0.1.0/frontend/src/types/index.ts +104 -0
- datahub_analytics_agent-0.1.0/frontend/tailwind.config.ts +33 -0
- datahub_analytics_agent-0.1.0/frontend/tsconfig.json +25 -0
- datahub_analytics_agent-0.1.0/frontend/tsconfig.node.json +11 -0
- datahub_analytics_agent-0.1.0/frontend/vite.config.ts +30 -0
- datahub_analytics_agent-0.1.0/justfile +143 -0
- datahub_analytics_agent-0.1.0/pyproject.toml +119 -0
- datahub_analytics_agent-0.1.0/quickstart.sh +503 -0
- datahub_analytics_agent-0.1.0/scripts/datahub_status.py +24 -0
- datahub_analytics_agent-0.1.0/scripts/ingest_metadata.py +376 -0
- datahub_analytics_agent-0.1.0/scripts/load_sample_data.py +225 -0
- datahub_analytics_agent-0.1.0/tests/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/tests/e2e/fixtures/index.ts +46 -0
- datahub_analytics_agent-0.1.0/tests/e2e/mcp-connection.spec.ts +71 -0
- datahub_analytics_agent-0.1.0/tests/e2e/playwright.config.ts +45 -0
- datahub_analytics_agent-0.1.0/tests/e2e/stream-conversation-switch.spec.ts +103 -0
- datahub_analytics_agent-0.1.0/tests/e2e/stream-switchback.spec.ts +95 -0
- datahub_analytics_agent-0.1.0/tests/e2e/token-counting.spec.ts +59 -0
- datahub_analytics_agent-0.1.0/tests/integration/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/tests/integration/test_datahub_tools.py +158 -0
- datahub_analytics_agent-0.1.0/tests/unit/__init__.py +0 -0
- datahub_analytics_agent-0.1.0/tests/unit/test_compaction.py +58 -0
- datahub_analytics_agent-0.1.0/tests/unit/test_config.py +151 -0
- datahub_analytics_agent-0.1.0/tests/unit/test_context_platform_repo.py +115 -0
- datahub_analytics_agent-0.1.0/tests/unit/test_llm_provider.py +223 -0
- datahub_analytics_agent-0.1.0/tests/unit/test_settings_wire_format.py +334 -0
- datahub_analytics_agent-0.1.0/uv.lock +4603 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# DataHub — referenced via ${VAR} substitution in config.yaml context_platforms
|
|
2
|
+
DATAHUB_GMS_URL=http://localhost:8080
|
|
3
|
+
DATAHUB_GMS_TOKEN=
|
|
4
|
+
|
|
5
|
+
# LLM — set LLM_PROVIDER to "openai", "anthropic", "google", or "bedrock"
|
|
6
|
+
LLM_PROVIDER=openai
|
|
7
|
+
|
|
8
|
+
# OpenAI (used when LLM_PROVIDER=openai)
|
|
9
|
+
OPENAI_API_KEY=
|
|
10
|
+
# LLM_MODEL=gpt-4o # optional override; default: gpt-4o
|
|
11
|
+
# CHART_LLM_MODEL=gpt-4o-mini
|
|
12
|
+
|
|
13
|
+
# Anthropic (used when LLM_PROVIDER=anthropic)
|
|
14
|
+
ANTHROPIC_API_KEY=
|
|
15
|
+
# LLM_MODEL=claude-opus-4-7 # optional override; default: claude-opus-4-7
|
|
16
|
+
# CHART_LLM_MODEL=claude-haiku-4-5
|
|
17
|
+
|
|
18
|
+
# Bedrock (used when LLM_PROVIDER=bedrock) — Anthropic models on AWS Bedrock.
|
|
19
|
+
# Auth uses the standard AWS credential chain (env vars, ~/.aws/credentials,
|
|
20
|
+
# IAM role) by default. Set AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY below
|
|
21
|
+
# to override with explicit keys. AWS_SESSION_TOKEN is optional (STS).
|
|
22
|
+
AWS_REGION=us-west-2
|
|
23
|
+
# AWS_ACCESS_KEY_ID=
|
|
24
|
+
# AWS_SECRET_ACCESS_KEY=
|
|
25
|
+
# AWS_SESSION_TOKEN=
|
|
26
|
+
# Model IDs on Bedrock differ from native Anthropic IDs — use the full
|
|
27
|
+
# inference-profile ID (e.g. "us.anthropic.claude-sonnet-4-5-20250929-v1:0").
|
|
28
|
+
# LLM_MODEL=us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
|
29
|
+
# CHART_LLM_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
|
|
30
|
+
|
|
31
|
+
# Database (pluggable)
|
|
32
|
+
# PostgreSQL (production):
|
|
33
|
+
# DATABASE_URL=postgresql+asyncpg://user:pass@localhost:5432/talk_to_data
|
|
34
|
+
# SQLite (local dev):
|
|
35
|
+
DATABASE_URL=sqlite+aiosqlite:///./data/dev.db
|
|
36
|
+
|
|
37
|
+
# Engine config
|
|
38
|
+
ENGINES_CONFIG=./config.yaml
|
|
39
|
+
SQL_ROW_LIMIT=500
|
|
40
|
+
|
|
41
|
+
# App
|
|
42
|
+
LOG_LEVEL=INFO
|
|
43
|
+
SSE_KEEPALIVE_INTERVAL=15
|
|
44
|
+
|
|
45
|
+
# Observability — OTEL tracing (no-op if endpoint is blank)
|
|
46
|
+
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Jaeger / Grafana Tempo / any OTLP collector
|
|
47
|
+
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io
|
|
48
|
+
# OTEL_EXPORTER_OTLP_HEADERS=x-honeycomb-team=YOUR_API_KEY
|
|
49
|
+
# OTEL_SERVICE_NAME=talkster
|
|
50
|
+
|
|
51
|
+
# Snowflake (if using snowflake engine)
|
|
52
|
+
SNOWFLAKE_ACCOUNT=
|
|
53
|
+
SNOWFLAKE_WAREHOUSE=
|
|
54
|
+
SNOWFLAKE_DATABASE=
|
|
55
|
+
SNOWFLAKE_SCHEMA=
|
|
56
|
+
SNOWFLAKE_USER=
|
|
57
|
+
SNOWFLAKE_PASSWORD=
|
|
58
|
+
# SNOWFLAKE_PRIVATE_KEY= # base64-encoded PEM: base64 -i key.p8 | tr -d '\n'
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
# ── Python backend ──────────────────────────────────────────────────────────
|
|
11
|
+
backend-lint:
|
|
12
|
+
name: Backend — lint & typecheck
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v5
|
|
19
|
+
with:
|
|
20
|
+
enable-cache: true
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync --extra dev
|
|
24
|
+
|
|
25
|
+
- name: Ruff lint
|
|
26
|
+
run: uv run ruff check backend/src tests
|
|
27
|
+
|
|
28
|
+
- name: Ruff format check
|
|
29
|
+
run: uv run ruff format --check backend/src tests
|
|
30
|
+
|
|
31
|
+
- name: Mypy typecheck
|
|
32
|
+
run: uv run mypy backend/src/analytics_agent
|
|
33
|
+
|
|
34
|
+
backend-test:
|
|
35
|
+
name: Backend — unit tests
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v4
|
|
39
|
+
|
|
40
|
+
- name: Install uv
|
|
41
|
+
uses: astral-sh/setup-uv@v5
|
|
42
|
+
with:
|
|
43
|
+
enable-cache: true
|
|
44
|
+
|
|
45
|
+
- name: Install dependencies
|
|
46
|
+
run: uv sync --extra dev
|
|
47
|
+
|
|
48
|
+
- name: Run unit tests
|
|
49
|
+
run: uv run pytest tests/unit -v
|
|
50
|
+
|
|
51
|
+
# ── Playwright e2e ──────────────────────────────────────────────────────────
|
|
52
|
+
e2e:
|
|
53
|
+
name: E2E — Playwright
|
|
54
|
+
runs-on: ubuntu-latest
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v4
|
|
57
|
+
|
|
58
|
+
- name: Install uv
|
|
59
|
+
uses: astral-sh/setup-uv@v5
|
|
60
|
+
with:
|
|
61
|
+
enable-cache: true
|
|
62
|
+
|
|
63
|
+
- name: Install Python dependencies
|
|
64
|
+
run: uv sync
|
|
65
|
+
|
|
66
|
+
- name: Setup pnpm
|
|
67
|
+
uses: pnpm/action-setup@v4
|
|
68
|
+
with:
|
|
69
|
+
version: latest
|
|
70
|
+
|
|
71
|
+
- name: Setup Node
|
|
72
|
+
uses: actions/setup-node@v4
|
|
73
|
+
with:
|
|
74
|
+
node-version: 20
|
|
75
|
+
cache: pnpm
|
|
76
|
+
cache-dependency-path: frontend/pnpm-lock.yaml
|
|
77
|
+
|
|
78
|
+
- name: Install frontend dependencies
|
|
79
|
+
run: cd frontend && pnpm install --frozen-lockfile
|
|
80
|
+
|
|
81
|
+
- name: Build frontend
|
|
82
|
+
run: cd frontend && pnpm build
|
|
83
|
+
|
|
84
|
+
- name: Install Playwright browsers
|
|
85
|
+
run: cd frontend && npx playwright install chromium --with-deps
|
|
86
|
+
|
|
87
|
+
- name: Run e2e tests
|
|
88
|
+
run: npx --prefix frontend playwright test --config tests/e2e/playwright.config.ts
|
|
89
|
+
|
|
90
|
+
# ── Frontend ────────────────────────────────────────────────────────────────
|
|
91
|
+
frontend-lint:
|
|
92
|
+
name: Frontend — typecheck & lint
|
|
93
|
+
runs-on: ubuntu-latest
|
|
94
|
+
defaults:
|
|
95
|
+
run:
|
|
96
|
+
working-directory: frontend
|
|
97
|
+
|
|
98
|
+
steps:
|
|
99
|
+
- uses: actions/checkout@v4
|
|
100
|
+
|
|
101
|
+
- name: Setup pnpm
|
|
102
|
+
uses: pnpm/action-setup@v4
|
|
103
|
+
with:
|
|
104
|
+
version: latest
|
|
105
|
+
|
|
106
|
+
- name: Setup Node
|
|
107
|
+
uses: actions/setup-node@v4
|
|
108
|
+
with:
|
|
109
|
+
node-version: 20
|
|
110
|
+
cache: pnpm
|
|
111
|
+
cache-dependency-path: frontend/pnpm-lock.yaml
|
|
112
|
+
|
|
113
|
+
- name: Install dependencies
|
|
114
|
+
run: pnpm install --frozen-lockfile
|
|
115
|
+
|
|
116
|
+
- name: TypeScript typecheck + build
|
|
117
|
+
run: pnpm build
|
|
118
|
+
|
|
119
|
+
- name: ESLint
|
|
120
|
+
run: pnpm lint
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: Build & push Docker image
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags: ["v*.*.*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main]
|
|
9
|
+
|
|
10
|
+
env:
|
|
11
|
+
REGISTRY: ghcr.io
|
|
12
|
+
IMAGE_NAME: ${{ github.repository }}
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
build:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
permissions:
|
|
18
|
+
contents: read
|
|
19
|
+
packages: write
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Set up Docker Buildx
|
|
26
|
+
uses: docker/setup-buildx-action@v3
|
|
27
|
+
|
|
28
|
+
- name: Log in to GitHub Container Registry
|
|
29
|
+
if: github.event_name != 'pull_request'
|
|
30
|
+
uses: docker/login-action@v3
|
|
31
|
+
with:
|
|
32
|
+
registry: ${{ env.REGISTRY }}
|
|
33
|
+
username: ${{ github.actor }}
|
|
34
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
35
|
+
|
|
36
|
+
- name: Extract Docker metadata
|
|
37
|
+
id: meta
|
|
38
|
+
uses: docker/metadata-action@v5
|
|
39
|
+
with:
|
|
40
|
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
41
|
+
tags: |
|
|
42
|
+
type=ref,event=branch
|
|
43
|
+
type=ref,event=pr
|
|
44
|
+
type=semver,pattern={{version}}
|
|
45
|
+
type=semver,pattern={{major}}.{{minor}}
|
|
46
|
+
type=sha,prefix=sha-,format=short
|
|
47
|
+
|
|
48
|
+
- name: Build and push
|
|
49
|
+
uses: docker/build-push-action@v6
|
|
50
|
+
with:
|
|
51
|
+
context: .
|
|
52
|
+
file: docker/Dockerfile
|
|
53
|
+
push: ${{ github.event_name != 'pull_request' }}
|
|
54
|
+
tags: ${{ steps.meta.outputs.tags }}
|
|
55
|
+
labels: ${{ steps.meta.outputs.labels }}
|
|
56
|
+
cache-from: type=gha
|
|
57
|
+
cache-to: type=gha,mode=max
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
name: Build distribution
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Install uv
|
|
16
|
+
uses: astral-sh/setup-uv@v5
|
|
17
|
+
with:
|
|
18
|
+
enable-cache: true
|
|
19
|
+
|
|
20
|
+
- name: Build package
|
|
21
|
+
run: uv build
|
|
22
|
+
|
|
23
|
+
- name: Upload dist artifacts
|
|
24
|
+
uses: actions/upload-artifact@v4
|
|
25
|
+
with:
|
|
26
|
+
name: dist
|
|
27
|
+
path: dist/
|
|
28
|
+
|
|
29
|
+
publish-pypi:
|
|
30
|
+
name: Publish to PyPI
|
|
31
|
+
needs: build
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
environment:
|
|
34
|
+
name: pypi
|
|
35
|
+
url: https://pypi.org/p/datahub-analytics-agent
|
|
36
|
+
permissions:
|
|
37
|
+
id-token: write
|
|
38
|
+
steps:
|
|
39
|
+
- name: Download dist artifacts
|
|
40
|
+
uses: actions/download-artifact@v4
|
|
41
|
+
with:
|
|
42
|
+
name: dist
|
|
43
|
+
path: dist/
|
|
44
|
+
|
|
45
|
+
- name: Publish to PyPI
|
|
46
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
47
|
+
|
|
48
|
+
github-release:
|
|
49
|
+
name: Create GitHub Release
|
|
50
|
+
needs: build
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
permissions:
|
|
53
|
+
contents: write
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/checkout@v4
|
|
56
|
+
|
|
57
|
+
- name: Download dist artifacts
|
|
58
|
+
uses: actions/download-artifact@v4
|
|
59
|
+
with:
|
|
60
|
+
name: dist
|
|
61
|
+
path: dist/
|
|
62
|
+
|
|
63
|
+
- name: Create release
|
|
64
|
+
uses: softprops/action-gh-release@v2
|
|
65
|
+
with:
|
|
66
|
+
files: dist/*
|
|
67
|
+
generate_release_notes: true
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
*.env
|
|
2
|
+
!.env.example
|
|
3
|
+
.env.quickstart
|
|
4
|
+
config.yaml
|
|
5
|
+
!config.yaml.example
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.pyc
|
|
8
|
+
*.pyo
|
|
9
|
+
.venv/
|
|
10
|
+
dist/
|
|
11
|
+
build/
|
|
12
|
+
*.egg-info/
|
|
13
|
+
.mypy_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
htmlcov/
|
|
17
|
+
data/
|
|
18
|
+
*.db
|
|
19
|
+
|
|
20
|
+
# Frontend
|
|
21
|
+
node_modules/
|
|
22
|
+
frontend/dist/
|
|
23
|
+
frontend/.vite/
|
|
24
|
+
|
|
25
|
+
# IDE
|
|
26
|
+
.idea/
|
|
27
|
+
.vscode/
|
|
28
|
+
*.iml
|
|
29
|
+
|
|
30
|
+
# Logs
|
|
31
|
+
*.log
|
|
32
|
+
nohup.out
|
|
33
|
+
|
|
34
|
+
# Playwright MCP test artifacts
|
|
35
|
+
.playwright-mcp/
|
|
36
|
+
*.png
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# AGENTS.md — Analytics Agent Codebase Guide
|
|
2
|
+
|
|
3
|
+
This file is written for AI coding agents (Claude Code, Cursor, Copilot, etc.) working on the Analytics Agent codebase. Read it before making changes.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Project in one sentence
|
|
8
|
+
|
|
9
|
+
Analytics Agent is a LangGraph-based chat agent that uses **DataHub** tools for metadata context and pluggable **SQL engines** (Snowflake first) to answer natural-language data questions, with Vega-Lite charts rendered inline in a React + Vite UI served by the same FastAPI process.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Running the stack
|
|
14
|
+
|
|
15
|
+
A `justfile` at the repo root covers all common tasks. Install `just` once (`brew install just`), then:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
just install # uv sync + pnpm install
|
|
19
|
+
just start # build frontend if stale, start backend at :8100
|
|
20
|
+
just port=8102 start # same on a custom port
|
|
21
|
+
just stop # kill the backend
|
|
22
|
+
just nuke # wipe the DB (start from scratch / re-trigger wizard)
|
|
23
|
+
just start-remote # start + print DataHub connection status
|
|
24
|
+
just logs # tail /tmp/analytics_agent.log
|
|
25
|
+
just test # unit tests
|
|
26
|
+
just build # force frontend rebuild
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
`just start` automatically detects whether `frontend/src` is newer than `frontend/dist` and rebuilds only when needed.
|
|
30
|
+
|
|
31
|
+
### Without just (manual)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv sync
|
|
35
|
+
cd frontend && pnpm install && pnpm build && cd ..
|
|
36
|
+
uv run uvicorn analytics_agent.main:app --reload --port 8101
|
|
37
|
+
# → http://localhost:8101
|
|
38
|
+
# The setup wizard handles LLM key + connections on first run.
|
|
39
|
+
# Optional: cp .env.example .env to pre-configure credentials.
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Two-process mode (frontend hot reload)
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# Terminal 1 — backend (dev)
|
|
46
|
+
uv run uvicorn analytics_agent.main:app --reload --port 8101
|
|
47
|
+
|
|
48
|
+
# Terminal 2 — Vite dev server with HMR
|
|
49
|
+
cd frontend && pnpm dev
|
|
50
|
+
# → http://localhost:5173 (proxies /api/* to :8101)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**DataHub credentials**: run `datahub init --sso --host https://your-instance.acryl.io/gms` once. The app reads `~/.datahubenv` automatically; or set `DATAHUB_GMS_URL` + `DATAHUB_GMS_TOKEN` in `config.yaml` / `.env`.
|
|
54
|
+
|
|
55
|
+
**Database**: SQLite at `./data/dev.db` by default. Alembic runs automatically on startup. For Postgres set `DATABASE_URL=postgresql+asyncpg://...`.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Key file map
|
|
60
|
+
|
|
61
|
+
| Path | What it does |
|
|
62
|
+
|------|-------------|
|
|
63
|
+
| `backend/src/analytics_agent/main.py` | FastAPI app factory + lifespan (runs Alembic, seeds integrations, mounts SPA) |
|
|
64
|
+
| `backend/src/analytics_agent/agent/graph.py` | LangGraph `StateGraph`: ReAct agent → conditional chart node |
|
|
65
|
+
| `backend/src/analytics_agent/agent/streaming.py` | `astream_events` → SSE event dicts; handles `on_tool_error` |
|
|
66
|
+
| `backend/src/analytics_agent/agent/history.py` | Reconstructs LangChain message history from DB rows; pads orphaned tool calls |
|
|
67
|
+
| `backend/src/analytics_agent/agent/chart_tool.py` | `create_chart` LangChain tool; stores spec in `_pending_charts` side-channel |
|
|
68
|
+
| `backend/src/analytics_agent/agent/chart_generator.py` | `chart_node`: runs after SQL results; calls chart LLM → updates `pending_chart` state |
|
|
69
|
+
| `backend/src/analytics_agent/api/chat.py` | `POST /api/conversations/{id}/messages` → `StreamingResponse` (SSE) |
|
|
70
|
+
| `backend/src/analytics_agent/api/settings.py` | Connection CRUD + test + tool toggles + prompt + display settings |
|
|
71
|
+
| `backend/src/analytics_agent/api/oauth.py` | SSO browser flow, PAT storage, OAuth popup flow, credential encryption |
|
|
72
|
+
| `backend/src/analytics_agent/context/datahub.py` | Builds DataHub LangChain tools via `datahub_agent_context.build_langchain_tools()` |
|
|
73
|
+
| `backend/src/analytics_agent/engines/resolver.py` | **Single credential resolution point** — loads Integration + credential from DB |
|
|
74
|
+
| `backend/src/analytics_agent/engines/snowflake/engine.py` | Snowflake `QueryEngine`: execute_sql, list_tables, get_schema, preview_table; SSO/key-pair/PAT auth |
|
|
75
|
+
| `backend/src/analytics_agent/engines/factory.py` | Engine registry; `register_engine` / `unregister_engine` for dynamic connections |
|
|
76
|
+
| `backend/src/analytics_agent/db/models.py` | SQLAlchemy models: Conversation, Message, Integration, IntegrationCredential, Setting |
|
|
77
|
+
| `backend/src/analytics_agent/db/repository.py` | Repos: ConversationRepo, MessageRepo, SettingsRepo, IntegrationRepo, CredentialRepo |
|
|
78
|
+
| `backend/src/analytics_agent/prompts/system_prompt.md` | Agent system prompt (edit here — loaded at runtime) |
|
|
79
|
+
| `frontend/src/components/Chat/ChatView.tsx` | Chat shell; handles welcome-screen → new conversation flow |
|
|
80
|
+
| `frontend/src/components/Chat/WelcomeView.tsx` | Landing screen with LLM greeting, suggestion chips, engine selector |
|
|
81
|
+
| `frontend/src/components/Settings/SnowflakeAuthSection.tsx` | Segmented auth selector: Password / Private Key / SSO / PAT / OAuth |
|
|
82
|
+
| `frontend/src/store/conversations.ts` | Zustand: conversations, messages, engines, streaming state |
|
|
83
|
+
| `frontend/src/store/display.ts` | Zustand: app name, logo, cached LLM greeting |
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Integrations + credential architecture
|
|
88
|
+
|
|
89
|
+
Connections are stored in two DB tables:
|
|
90
|
+
|
|
91
|
+
- **`integrations`** — connection topology (account, warehouse, database, user). `source="yaml"` for `config.yaml` connections, `source="ui"` for UI-created ones.
|
|
92
|
+
- **`integration_credentials`** — encrypted auth per connection: `auth_type` ∈ `{sso_externalbrowser, private_key, pat, oauth, password}`.
|
|
93
|
+
|
|
94
|
+
**Credential resolution** happens in `engines/resolver.py::resolve_engine(engine_name, session)`:
|
|
95
|
+
1. Looks up `integration_credentials` for the engine
|
|
96
|
+
2. Decrypts the credential and returns a configured engine clone
|
|
97
|
+
3. Falls back to env vars for `source="yaml"` connections (backwards compat)
|
|
98
|
+
|
|
99
|
+
**Never thread individual credential fields** (`oauth_token`, `sso_user`, etc.) through `graph.py` or agent code. Pass the engine object returned by `resolve_engine`.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
# chat.py — the only place credentials are resolved
|
|
103
|
+
engine = await resolve_engine(engine_name, session)
|
|
104
|
+
graph = build_graph(engine=engine, engine_name=engine_name, ...)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Connection write wire format — `{config, secrets}`
|
|
108
|
+
|
|
109
|
+
`PUT /api/settings/connections/{name}` and `POST /api/settings/connections`
|
|
110
|
+
accept a single wire shape:
|
|
111
|
+
|
|
112
|
+
```jsonc
|
|
113
|
+
{
|
|
114
|
+
"config": { "account": "...", "warehouse": "...", "database": "...", "user": "..." },
|
|
115
|
+
"secrets": { "password": "..." }
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
- `config` values are merged directly into `integrations.config` (DB) for engines,
|
|
120
|
+
or into the context-platform's config JSON for DataHub.
|
|
121
|
+
- `secrets` keys are validated against each engine's own
|
|
122
|
+
`QueryEngine.secret_env_vars` allow-list (e.g.
|
|
123
|
+
`SnowflakeQueryEngine.secret_env_vars = {"password": "SNOWFLAKE_PASSWORD", ...}`)
|
|
124
|
+
and translated to env-var names before being written to `.env` and
|
|
125
|
+
`os.environ`. Unknown secret keys are rejected with **HTTP 400**. The API
|
|
126
|
+
layer stays ignorant of any particular engine's credential fields.
|
|
127
|
+
- `_upsert_env_vars` always double-quotes every value so PEM blocks and passwords
|
|
128
|
+
with special characters (`#`, `$`, `\`, spaces) round-trip correctly.
|
|
129
|
+
|
|
130
|
+
**How the frontend splits values** — each `ConnectionField` returned by
|
|
131
|
+
`GET /connections` has an optional `secret_key` attribute. If present, the field's
|
|
132
|
+
value is routed to `body.secrets[secret_key]`; otherwise it goes to
|
|
133
|
+
`body.config[key]`. See `splitConnectionValues` in `frontend/src/api/settings.ts`.
|
|
134
|
+
|
|
135
|
+
**Staged follow-up steps (tracked):**
|
|
136
|
+
|
|
137
|
+
- **1 - route `body.secrets` to `integration_credentials`** (encrypted, per-connection).
|
|
138
|
+
Adds a `password` auth_type and a `password` branch in `resolver.py`, plus
|
|
139
|
+
`with_password` on `SnowflakeQueryEngine`. After 1 lands, `.env` stops accumulating
|
|
140
|
+
per-connection secrets and two Snowflake connections with different passwords can
|
|
141
|
+
coexist without collision.
|
|
142
|
+
- **2 - `GET /api/settings/connections/schemas/{type}`** + frontend renders forms
|
|
143
|
+
generically from the schema. Promotes `QueryEngine.secret_env_vars` into a full
|
|
144
|
+
typed schema (fields, labels, placeholders, required flags) shared with the
|
|
145
|
+
frontend; handler becomes validate -> dispatch.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## SSE event flow
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
POST /api/conversations/{id}/messages
|
|
153
|
+
└─ chat.py: _event_stream()
|
|
154
|
+
├─ resolve_engine(engine_name, session) → configured engine
|
|
155
|
+
├─ load conversation history → build_history() → LangChain messages
|
|
156
|
+
├─ build_graph(engine=engine, ...) → LangGraph compiled graph
|
|
157
|
+
└─ stream_graph_events(graph, ...)
|
|
158
|
+
├─ on_chat_model_stream → TEXT event
|
|
159
|
+
├─ on_tool_start → TOOL_CALL event (skipped for create_chart)
|
|
160
|
+
├─ on_tool_end → SQL / TOOL_RESULT / CHART
|
|
161
|
+
├─ on_tool_error → TOOL_RESULT (is_error=True)
|
|
162
|
+
├─ on_chain_end → captures final_state for chart_node charts
|
|
163
|
+
└─ end of stream → CHART (fallback) + COMPLETE
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Frontend consumes SSE via `stream.ts` (fetch + ReadableStream, **not** EventSource — needs POST).
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## LangGraph agent design
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
START → agent (create_react_agent) → conditional → chart → END
|
|
174
|
+
↓ (no SQL rows)
|
|
175
|
+
END
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
- Use `create_agent` from `langchain.agents` (**not** `create_react_agent` from `langgraph.prebuilt`)
|
|
179
|
+
- System prompt loaded from `prompts/system_prompt.md` at runtime (editable without restart)
|
|
180
|
+
- Tools: DataHub tools (search_documents, search, get_entities, …) + engine tools + `create_chart`
|
|
181
|
+
- `chart_node` fires when `get_last_sql_result(state)` finds an `execute_sql` ToolMessage with rows
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Chart generation — two paths
|
|
186
|
+
|
|
187
|
+
| Path | Trigger | How spec reaches frontend |
|
|
188
|
+
|------|---------|--------------------------|
|
|
189
|
+
| `create_chart` tool | Agent calls tool | `_pending_charts[chart_id]` → `on_tool_end` → CHART event |
|
|
190
|
+
| `chart_node` | SQL returned rows | `state.pending_chart` → `on_chain_end` → CHART event |
|
|
191
|
+
| Text fallback | Model writes spec as ```json``` | `_extract_chart_from_text` regex → CHART event |
|
|
192
|
+
|
|
193
|
+
`chart_emitted` flag prevents duplicates across all three paths.
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Dynamic connections (UI-created)
|
|
198
|
+
|
|
199
|
+
Users can add connections via **Settings → Add Connection** without editing `config.yaml`:
|
|
200
|
+
|
|
201
|
+
1. `POST /api/settings/connections` → creates `Integration` in DB + calls `register_engine()`
|
|
202
|
+
2. `DELETE /api/settings/connections/{name}` → removes from DB + calls `unregister_engine()`
|
|
203
|
+
3. On server restart, `_seed_integrations()` in `main.py` reloads all integrations from DB
|
|
204
|
+
|
|
205
|
+
The Snowflake engine supports `with_sso_user()`, `with_private_key()`, `with_pat_token()`, `with_oauth_token()` clone methods — these are called by `resolver.py`, never from agent code.
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Multi-turn conversation history
|
|
210
|
+
|
|
211
|
+
`build_history()` in `agent/history.py` converts DB rows to LangChain messages:
|
|
212
|
+
|
|
213
|
+
- **User TEXT** → `HumanMessage`
|
|
214
|
+
- **TOOL_CALL + TOOL_RESULT pairs** → `AIMessage(tool_calls=[...])` + `ToolMessage`
|
|
215
|
+
- Tool calls always use `tc["id"]` for `tool_call_id` (not the result's stored ID) — avoids Anthropic "unexpected tool_use_id" rejections from orphaned DB records
|
|
216
|
+
- Turns with no useful content → **skipped** (avoids consecutive HumanMessages)
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Serving the frontend
|
|
221
|
+
|
|
222
|
+
`main.py` mounts the built React SPA after registering all API routes:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
_dist = Path(os.getenv("FRONTEND_DIST", "")) or Path(__file__).parents[3] / "frontend" / "dist"
|
|
226
|
+
|
|
227
|
+
if _dist.exists():
|
|
228
|
+
app.mount("/assets", StaticFiles(directory=_dist / "assets"), name="spa-assets")
|
|
229
|
+
|
|
230
|
+
@app.get("/{full_path:path}", include_in_schema=False)
|
|
231
|
+
async def _spa_fallback(full_path: str) -> FileResponse:
|
|
232
|
+
return FileResponse(_dist / "index.html", media_type="text/html")
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
- If `dist/` is absent (dev mode), the server runs API-only and Vite handles the frontend
|
|
236
|
+
- `FRONTEND_DIST` env var overrides the default path (useful in Docker)
|
|
237
|
+
- The catch-all **must be the last route** — FastAPI matches in registration order
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## Adding a new query engine
|
|
242
|
+
|
|
243
|
+
1. Create `engines/<name>/engine.py` implementing `QueryEngine`:
|
|
244
|
+
- Expose four tools: `execute_sql`, `list_tables`, `get_schema`, `preview_table`
|
|
245
|
+
- All tools must catch exceptions and return `{"error": str(e)}` — never raise
|
|
246
|
+
2. Register in `engines/factory.py` → `_engine_cls()` dict
|
|
247
|
+
3. Add connection config to `config.yaml` OR let users add via the Settings UI
|
|
248
|
+
4. Add tool list to `api/settings.py` → `_KNOWN_TOOLS`
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Changing the system prompt
|
|
253
|
+
|
|
254
|
+
Edit `prompts/system_prompt.md`. The prompt is loaded at runtime — no restart needed for changes made via the Settings UI (stored in DB). The `{engine_name}` placeholder is substituted at graph build time.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Docker
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
# Build (multistage: Node builds frontend, Python 3.12 serves everything)
|
|
262
|
+
docker build -f docker/Dockerfile -t analytics-agent .
|
|
263
|
+
|
|
264
|
+
# Run
|
|
265
|
+
docker run -p 8100:8100 --env-file .env analytics-agent
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
GitHub Actions (`.github/workflows/docker.yml`) builds and pushes to GHCR on every push to `main` and version tags.
|
|
269
|
+
|
|
270
|
+
---
|
|
271
|
+
|
|
272
|
+
## Common pitfalls
|
|
273
|
+
|
|
274
|
+
**Do not** use `create_react_agent` from `langgraph.prebuilt` — deprecated in LangGraph v1. Use `create_agent` from `langchain.agents` with `system_prompt=` (string).
|
|
275
|
+
|
|
276
|
+
**Do not** pass `temperature=0` to `ChatAnthropic` with `claude-opus-4-7` — sampling parameters are removed on this model.
|
|
277
|
+
|
|
278
|
+
**Do not** use `EventSource` in the frontend — the chat endpoint is a POST. Use `fetch()` + `ReadableStream` (`frontend/src/api/stream.ts`).
|
|
279
|
+
|
|
280
|
+
**Do not** thread credential fields through `graph.py` — use `resolver.py` to get a pre-configured engine and pass the engine object.
|
|
281
|
+
|
|
282
|
+
**Do not** store chart Vega-Lite specs as the tool return value — use the `_pending_charts` side-channel.
|
|
283
|
+
|
|
284
|
+
**Do not** start the backend without loading `.env` — `main.py` calls `load_dotenv()` automatically so this is handled, but env vars must be in `.env`.
|
|
285
|
+
|
|
286
|
+
**The DB engine is lazy**: `db/base.py` creates the SQLAlchemy async engine on first use. This prevents the sync Alembic migration from deadlocking with the async engine at startup.
|
|
287
|
+
|
|
288
|
+
**`chat.py` uses its own session**: `_event_stream` opens a fresh `AsyncSession` independent of the `Depends(get_session)` session — FastAPI closes `Depends` sessions before `StreamingResponse` iterates the generator.
|
|
289
|
+
|
|
290
|
+
**Snowflake Decimal/date types**: `_run_query` in `snowflake/engine.py` coerces `Decimal` → `int`/`float` and `datetime` → ISO string before serialisation. Do not remove this — `orjson` rejects `Decimal`.
|