datahub-analytics-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. datahub_analytics_agent-0.1.0/.env.example +58 -0
  2. datahub_analytics_agent-0.1.0/.github/workflows/ci.yml +120 -0
  3. datahub_analytics_agent-0.1.0/.github/workflows/docker.yml +57 -0
  4. datahub_analytics_agent-0.1.0/.github/workflows/publish.yml +67 -0
  5. datahub_analytics_agent-0.1.0/.gitignore +36 -0
  6. datahub_analytics_agent-0.1.0/.pre-commit-config.yaml +9 -0
  7. datahub_analytics_agent-0.1.0/AGENTS.md +290 -0
  8. datahub_analytics_agent-0.1.0/CLAUDE.md +66 -0
  9. datahub_analytics_agent-0.1.0/PKG-INFO +328 -0
  10. datahub_analytics_agent-0.1.0/README.md +270 -0
  11. datahub_analytics_agent-0.1.0/alembic.ini +38 -0
  12. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/__init__.py +0 -0
  13. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/__init__.py +0 -0
  14. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/analysis.py +149 -0
  15. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/chart_generator.py +70 -0
  16. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/chart_tool.py +103 -0
  17. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/compaction.py +57 -0
  18. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/compactor_registry.py +22 -0
  19. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/graph.py +121 -0
  20. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/history.py +159 -0
  21. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/llm.py +87 -0
  22. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/mock_llm.py +111 -0
  23. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/state.py +13 -0
  24. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/agent/streaming.py +304 -0
  25. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/__init__.py +135 -0
  26. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/chat.py +439 -0
  27. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/conversations.py +244 -0
  28. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/oauth.py +741 -0
  29. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/api/settings.py +1947 -0
  30. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/config.py +236 -0
  31. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/__init__.py +0 -0
  32. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/base.py +26 -0
  33. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/datahub.py +242 -0
  34. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/mcp_platform.py +123 -0
  35. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/native_datahub.py +58 -0
  36. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/context/registry.py +84 -0
  37. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/__init__.py +0 -0
  38. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/env.py +49 -0
  39. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/script.py.mako +25 -0
  40. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/001_init.py +47 -0
  41. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/002_settings_table.py +30 -0
  42. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/003_integrations.py +52 -0
  43. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/004_conversation_quality.py +28 -0
  44. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/alembic/versions/005_context_platforms.py +36 -0
  45. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/base.py +33 -0
  46. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/models.py +137 -0
  47. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/repository.py +294 -0
  48. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/db/types.py +69 -0
  49. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/__init__.py +0 -0
  50. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/base.py +30 -0
  51. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/factory.py +95 -0
  52. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/mcp/__init__.py +0 -0
  53. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/mcp/engine.py +78 -0
  54. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/resolver.py +84 -0
  55. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/snowflake/__init__.py +0 -0
  56. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/snowflake/engine.py +304 -0
  57. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/sqlalchemy/__init__.py +0 -0
  58. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/engines/sqlalchemy/engine.py +163 -0
  59. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/main.py +536 -0
  60. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/__init__.py +0 -0
  61. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/chart.py +101 -0
  62. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/system.py +33 -0
  63. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/prompts/system_prompt.md +184 -0
  64. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/__init__.py +0 -0
  65. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/datahub_skills.py +409 -0
  66. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/improve-context/SKILL.md +73 -0
  67. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/loader.py +162 -0
  68. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/publish-analysis/SKILL.md +99 -0
  69. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/save-correction/SKILL.md +161 -0
  70. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/skills/search-business-context/SKILL.md +109 -0
  71. datahub_analytics_agent-0.1.0/backend/src/analytics_agent/tracing.py +88 -0
  72. datahub_analytics_agent-0.1.0/config.demo.yaml +17 -0
  73. datahub_analytics_agent-0.1.0/config.yaml.example +41 -0
  74. datahub_analytics_agent-0.1.0/docker/Dockerfile +32 -0
  75. datahub_analytics_agent-0.1.0/docker-compose.yml +34 -0
  76. datahub_analytics_agent-0.1.0/frontend/eslint.config.js +31 -0
  77. datahub_analytics_agent-0.1.0/frontend/index.html +13 -0
  78. datahub_analytics_agent-0.1.0/frontend/package.json +51 -0
  79. datahub_analytics_agent-0.1.0/frontend/pnpm-lock.yaml +4768 -0
  80. datahub_analytics_agent-0.1.0/frontend/postcss.config.js +6 -0
  81. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-favicon.svg +12 -0
  82. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-color.svg +17 -0
  83. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-dark-bg.svg +12 -0
  84. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-mono.svg +12 -0
  85. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-white.svg +12 -0
  86. datahub_analytics_agent-0.1.0/frontend/public/analytics-agent-logo-wordmark.svg +17 -0
  87. datahub_analytics_agent-0.1.0/frontend/src/App.tsx +111 -0
  88. datahub_analytics_agent-0.1.0/frontend/src/api/conversations.ts +61 -0
  89. datahub_analytics_agent-0.1.0/frontend/src/api/oauth.ts +118 -0
  90. datahub_analytics_agent-0.1.0/frontend/src/api/settings.ts +322 -0
  91. datahub_analytics_agent-0.1.0/frontend/src/api/stream.ts +74 -0
  92. datahub_analytics_agent-0.1.0/frontend/src/components/Brand/AppLogo.tsx +34 -0
  93. datahub_analytics_agent-0.1.0/frontend/src/components/Brand/DataHubBadge.tsx +40 -0
  94. datahub_analytics_agent-0.1.0/frontend/src/components/Brand/ThemeSwitcher.tsx +42 -0
  95. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/ChatView.tsx +423 -0
  96. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/ContextStatusBar.tsx +158 -0
  97. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/EngineSelector.tsx +31 -0
  98. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/MessageInput.tsx +112 -0
  99. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/MessageList.tsx +72 -0
  100. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/WelcomeView.tsx +204 -0
  101. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/AgentWorkBlock.tsx +210 -0
  102. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ChartMessage.tsx +79 -0
  103. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ErrorMessage.tsx +14 -0
  104. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/SqlMessage.tsx +81 -0
  105. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/TextMessage.tsx +106 -0
  106. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ThinkingMessage.tsx +179 -0
  107. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/TokenBadge.tsx +104 -0
  108. datahub_analytics_agent-0.1.0/frontend/src/components/Chat/messages/ToolCallMessage.tsx +66 -0
  109. datahub_analytics_agent-0.1.0/frontend/src/components/Onboarding/OnboardingWizard.tsx +624 -0
  110. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/ModelSection.tsx +454 -0
  111. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/SettingsModal.tsx +1371 -0
  112. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/SnowflakeAuthSection.tsx +760 -0
  113. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/AddConnectionFlow.tsx +61 -0
  114. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/GenericMcpForm.tsx +194 -0
  115. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/SimpleFormShell.tsx +198 -0
  116. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/TypePicker.tsx +129 -0
  117. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/fields/ArrayField.tsx +59 -0
  118. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/fields/KeyValueField.tsx +70 -0
  119. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/helpers.tsx +200 -0
  120. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/index.ts +29 -0
  121. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/custom-mcp.tsx +22 -0
  122. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/datahub-mcp.tsx +72 -0
  123. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/datahub.tsx +30 -0
  124. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/mysql.tsx +28 -0
  125. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/postgresql.tsx +28 -0
  126. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/snowflake-mcp.tsx +12 -0
  127. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/snowflake.tsx +88 -0
  128. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/plugins/sqlite.tsx +25 -0
  129. datahub_analytics_agent-0.1.0/frontend/src/components/Settings/connections/types.ts +52 -0
  130. datahub_analytics_agent-0.1.0/frontend/src/components/Sidebar/ConversationItem.tsx +38 -0
  131. datahub_analytics_agent-0.1.0/frontend/src/components/Sidebar/Sidebar.tsx +77 -0
  132. datahub_analytics_agent-0.1.0/frontend/src/index.css +282 -0
  133. datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/buildUiMessages.test.ts +193 -0
  134. datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/groupMessages.test.ts +138 -0
  135. datahub_analytics_agent-0.1.0/frontend/src/lib/__tests__/stress.test.ts +345 -0
  136. datahub_analytics_agent-0.1.0/frontend/src/lib/buildUiMessages.ts +124 -0
  137. datahub_analytics_agent-0.1.0/frontend/src/lib/groupMessages.ts +64 -0
  138. datahub_analytics_agent-0.1.0/frontend/src/lib/starterPrompts.ts +222 -0
  139. datahub_analytics_agent-0.1.0/frontend/src/main.tsx +10 -0
  140. datahub_analytics_agent-0.1.0/frontend/src/store/connectionSettings.ts +100 -0
  141. datahub_analytics_agent-0.1.0/frontend/src/store/conversations.ts +159 -0
  142. datahub_analytics_agent-0.1.0/frontend/src/store/display.ts +25 -0
  143. datahub_analytics_agent-0.1.0/frontend/src/store/theme.ts +32 -0
  144. datahub_analytics_agent-0.1.0/frontend/src/types/index.ts +104 -0
  145. datahub_analytics_agent-0.1.0/frontend/tailwind.config.ts +33 -0
  146. datahub_analytics_agent-0.1.0/frontend/tsconfig.json +25 -0
  147. datahub_analytics_agent-0.1.0/frontend/tsconfig.node.json +11 -0
  148. datahub_analytics_agent-0.1.0/frontend/vite.config.ts +30 -0
  149. datahub_analytics_agent-0.1.0/justfile +143 -0
  150. datahub_analytics_agent-0.1.0/pyproject.toml +119 -0
  151. datahub_analytics_agent-0.1.0/quickstart.sh +503 -0
  152. datahub_analytics_agent-0.1.0/scripts/datahub_status.py +24 -0
  153. datahub_analytics_agent-0.1.0/scripts/ingest_metadata.py +376 -0
  154. datahub_analytics_agent-0.1.0/scripts/load_sample_data.py +225 -0
  155. datahub_analytics_agent-0.1.0/tests/__init__.py +0 -0
  156. datahub_analytics_agent-0.1.0/tests/e2e/fixtures/index.ts +46 -0
  157. datahub_analytics_agent-0.1.0/tests/e2e/mcp-connection.spec.ts +71 -0
  158. datahub_analytics_agent-0.1.0/tests/e2e/playwright.config.ts +45 -0
  159. datahub_analytics_agent-0.1.0/tests/e2e/stream-conversation-switch.spec.ts +103 -0
  160. datahub_analytics_agent-0.1.0/tests/e2e/stream-switchback.spec.ts +95 -0
  161. datahub_analytics_agent-0.1.0/tests/e2e/token-counting.spec.ts +59 -0
  162. datahub_analytics_agent-0.1.0/tests/integration/__init__.py +0 -0
  163. datahub_analytics_agent-0.1.0/tests/integration/test_datahub_tools.py +158 -0
  164. datahub_analytics_agent-0.1.0/tests/unit/__init__.py +0 -0
  165. datahub_analytics_agent-0.1.0/tests/unit/test_compaction.py +58 -0
  166. datahub_analytics_agent-0.1.0/tests/unit/test_config.py +151 -0
  167. datahub_analytics_agent-0.1.0/tests/unit/test_context_platform_repo.py +115 -0
  168. datahub_analytics_agent-0.1.0/tests/unit/test_llm_provider.py +223 -0
  169. datahub_analytics_agent-0.1.0/tests/unit/test_settings_wire_format.py +334 -0
  170. datahub_analytics_agent-0.1.0/uv.lock +4603 -0
@@ -0,0 +1,58 @@
1
+ # DataHub — referenced via ${VAR} substitution in config.yaml context_platforms
2
+ DATAHUB_GMS_URL=http://localhost:8080
3
+ DATAHUB_GMS_TOKEN=
4
+
5
+ # LLM — set LLM_PROVIDER to "openai", "anthropic", "google", or "bedrock"
6
+ LLM_PROVIDER=openai
7
+
8
+ # OpenAI (used when LLM_PROVIDER=openai)
9
+ OPENAI_API_KEY=
10
+ # LLM_MODEL=gpt-4o # optional override; default: gpt-4o
11
+ # CHART_LLM_MODEL=gpt-4o-mini
12
+
13
+ # Anthropic (used when LLM_PROVIDER=anthropic)
14
+ ANTHROPIC_API_KEY=
15
+ # LLM_MODEL=claude-opus-4-7 # optional override; default: claude-opus-4-7
16
+ # CHART_LLM_MODEL=claude-haiku-4-5
17
+
18
+ # Bedrock (used when LLM_PROVIDER=bedrock) — Anthropic models on AWS Bedrock.
19
+ # Auth uses the standard AWS credential chain (env vars, ~/.aws/credentials,
20
+ # IAM role) by default. Set AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY below
21
+ # to override with explicit keys. AWS_SESSION_TOKEN is optional (STS).
22
+ AWS_REGION=us-west-2
23
+ # AWS_ACCESS_KEY_ID=
24
+ # AWS_SECRET_ACCESS_KEY=
25
+ # AWS_SESSION_TOKEN=
26
+ # Model IDs on Bedrock differ from native Anthropic IDs — use the full
27
+ # inference-profile ID (e.g. "us.anthropic.claude-sonnet-4-5-20250929-v1:0").
28
+ # LLM_MODEL=us.anthropic.claude-sonnet-4-5-20250929-v1:0
29
+ # CHART_LLM_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
30
+
31
+ # Database (pluggable)
32
+ # PostgreSQL (production):
33
+ # DATABASE_URL=postgresql+asyncpg://user:pass@localhost:5432/talk_to_data
34
+ # SQLite (local dev):
35
+ DATABASE_URL=sqlite+aiosqlite:///./data/dev.db
36
+
37
+ # Engine config
38
+ ENGINES_CONFIG=./config.yaml
39
+ SQL_ROW_LIMIT=500
40
+
41
+ # App
42
+ LOG_LEVEL=INFO
43
+ SSE_KEEPALIVE_INTERVAL=15
44
+
45
+ # Observability — OTEL tracing (no-op if endpoint is blank)
46
+ # OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Jaeger / Grafana Tempo / any OTLP collector
47
+ # OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io
48
+ # OTEL_EXPORTER_OTLP_HEADERS=x-honeycomb-team=YOUR_API_KEY
49
+ # OTEL_SERVICE_NAME=talkster
50
+
51
+ # Snowflake (if using snowflake engine)
52
+ SNOWFLAKE_ACCOUNT=
53
+ SNOWFLAKE_WAREHOUSE=
54
+ SNOWFLAKE_DATABASE=
55
+ SNOWFLAKE_SCHEMA=
56
+ SNOWFLAKE_USER=
57
+ SNOWFLAKE_PASSWORD=
58
+ # SNOWFLAKE_PRIVATE_KEY= # base64-encoded PEM: base64 -i key.p8 | tr -d '\n'
@@ -0,0 +1,120 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ # ── Python backend ──────────────────────────────────────────────────────────
11
+ backend-lint:
12
+ name: Backend — lint & typecheck
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v5
19
+ with:
20
+ enable-cache: true
21
+
22
+ - name: Install dependencies
23
+ run: uv sync --extra dev
24
+
25
+ - name: Ruff lint
26
+ run: uv run ruff check backend/src tests
27
+
28
+ - name: Ruff format check
29
+ run: uv run ruff format --check backend/src tests
30
+
31
+ - name: Mypy typecheck
32
+ run: uv run mypy backend/src/analytics_agent
33
+
34
+ backend-test:
35
+ name: Backend — unit tests
36
+ runs-on: ubuntu-latest
37
+ steps:
38
+ - uses: actions/checkout@v4
39
+
40
+ - name: Install uv
41
+ uses: astral-sh/setup-uv@v5
42
+ with:
43
+ enable-cache: true
44
+
45
+ - name: Install dependencies
46
+ run: uv sync --extra dev
47
+
48
+ - name: Run unit tests
49
+ run: uv run pytest tests/unit -v
50
+
51
+ # ── Playwright e2e ──────────────────────────────────────────────────────────
52
+ e2e:
53
+ name: E2E — Playwright
54
+ runs-on: ubuntu-latest
55
+ steps:
56
+ - uses: actions/checkout@v4
57
+
58
+ - name: Install uv
59
+ uses: astral-sh/setup-uv@v5
60
+ with:
61
+ enable-cache: true
62
+
63
+ - name: Install Python dependencies
64
+ run: uv sync
65
+
66
+ - name: Setup pnpm
67
+ uses: pnpm/action-setup@v4
68
+ with:
69
+ version: latest
70
+
71
+ - name: Setup Node
72
+ uses: actions/setup-node@v4
73
+ with:
74
+ node-version: 20
75
+ cache: pnpm
76
+ cache-dependency-path: frontend/pnpm-lock.yaml
77
+
78
+ - name: Install frontend dependencies
79
+ run: cd frontend && pnpm install --frozen-lockfile
80
+
81
+ - name: Build frontend
82
+ run: cd frontend && pnpm build
83
+
84
+ - name: Install Playwright browsers
85
+ run: cd frontend && npx playwright install chromium --with-deps
86
+
87
+ - name: Run e2e tests
88
+ run: npx --prefix frontend playwright test --config tests/e2e/playwright.config.ts
89
+
90
+ # ── Frontend ────────────────────────────────────────────────────────────────
91
+ frontend-lint:
92
+ name: Frontend — typecheck & lint
93
+ runs-on: ubuntu-latest
94
+ defaults:
95
+ run:
96
+ working-directory: frontend
97
+
98
+ steps:
99
+ - uses: actions/checkout@v4
100
+
101
+ - name: Setup pnpm
102
+ uses: pnpm/action-setup@v4
103
+ with:
104
+ version: latest
105
+
106
+ - name: Setup Node
107
+ uses: actions/setup-node@v4
108
+ with:
109
+ node-version: 20
110
+ cache: pnpm
111
+ cache-dependency-path: frontend/pnpm-lock.yaml
112
+
113
+ - name: Install dependencies
114
+ run: pnpm install --frozen-lockfile
115
+
116
+ - name: TypeScript typecheck + build
117
+ run: pnpm build
118
+
119
+ - name: ESLint
120
+ run: pnpm lint
@@ -0,0 +1,57 @@
1
+ name: Build & push Docker image
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*.*.*"]
7
+ pull_request:
8
+ branches: [main]
9
+
10
+ env:
11
+ REGISTRY: ghcr.io
12
+ IMAGE_NAME: ${{ github.repository }}
13
+
14
+ jobs:
15
+ build:
16
+ runs-on: ubuntu-latest
17
+ permissions:
18
+ contents: read
19
+ packages: write
20
+
21
+ steps:
22
+ - name: Checkout
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Set up Docker Buildx
26
+ uses: docker/setup-buildx-action@v3
27
+
28
+ - name: Log in to GitHub Container Registry
29
+ if: github.event_name != 'pull_request'
30
+ uses: docker/login-action@v3
31
+ with:
32
+ registry: ${{ env.REGISTRY }}
33
+ username: ${{ github.actor }}
34
+ password: ${{ secrets.GITHUB_TOKEN }}
35
+
36
+ - name: Extract Docker metadata
37
+ id: meta
38
+ uses: docker/metadata-action@v5
39
+ with:
40
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
41
+ tags: |
42
+ type=ref,event=branch
43
+ type=ref,event=pr
44
+ type=semver,pattern={{version}}
45
+ type=semver,pattern={{major}}.{{minor}}
46
+ type=sha,prefix=sha-,format=short
47
+
48
+ - name: Build and push
49
+ uses: docker/build-push-action@v6
50
+ with:
51
+ context: .
52
+ file: docker/Dockerfile
53
+ push: ${{ github.event_name != 'pull_request' }}
54
+ tags: ${{ steps.meta.outputs.tags }}
55
+ labels: ${{ steps.meta.outputs.labels }}
56
+ cache-from: type=gha
57
+ cache-to: type=gha,mode=max
@@ -0,0 +1,67 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ name: Build distribution
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Install uv
16
+ uses: astral-sh/setup-uv@v5
17
+ with:
18
+ enable-cache: true
19
+
20
+ - name: Build package
21
+ run: uv build
22
+
23
+ - name: Upload dist artifacts
24
+ uses: actions/upload-artifact@v4
25
+ with:
26
+ name: dist
27
+ path: dist/
28
+
29
+ publish-pypi:
30
+ name: Publish to PyPI
31
+ needs: build
32
+ runs-on: ubuntu-latest
33
+ environment:
34
+ name: pypi
35
+ url: https://pypi.org/p/datahub-analytics-agent
36
+ permissions:
37
+ id-token: write
38
+ steps:
39
+ - name: Download dist artifacts
40
+ uses: actions/download-artifact@v4
41
+ with:
42
+ name: dist
43
+ path: dist/
44
+
45
+ - name: Publish to PyPI
46
+ uses: pypa/gh-action-pypi-publish@release/v1
47
+
48
+ github-release:
49
+ name: Create GitHub Release
50
+ needs: build
51
+ runs-on: ubuntu-latest
52
+ permissions:
53
+ contents: write
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+
57
+ - name: Download dist artifacts
58
+ uses: actions/download-artifact@v4
59
+ with:
60
+ name: dist
61
+ path: dist/
62
+
63
+ - name: Create release
64
+ uses: softprops/action-gh-release@v2
65
+ with:
66
+ files: dist/*
67
+ generate_release_notes: true
@@ -0,0 +1,36 @@
1
+ *.env
2
+ !.env.example
3
+ .env.quickstart
4
+ config.yaml
5
+ !config.yaml.example
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ .venv/
10
+ dist/
11
+ build/
12
+ *.egg-info/
13
+ .mypy_cache/
14
+ .ruff_cache/
15
+ .pytest_cache/
16
+ htmlcov/
17
+ data/
18
+ *.db
19
+
20
+ # Frontend
21
+ node_modules/
22
+ frontend/dist/
23
+ frontend/.vite/
24
+
25
+ # IDE
26
+ .idea/
27
+ .vscode/
28
+ *.iml
29
+
30
+ # Logs
31
+ *.log
32
+ nohup.out
33
+
34
+ # Playwright MCP test artifacts
35
+ .playwright-mcp/
36
+ *.png
@@ -0,0 +1,9 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.11.6
4
+ hooks:
5
+ - id: ruff # lint + auto-fix
6
+ args: [--fix]
7
+ files: ^(backend/src|tests)/
8
+ - id: ruff-format # format
9
+ files: ^(backend/src|tests)/
@@ -0,0 +1,290 @@
1
+ # AGENTS.md — Analytics Agent Codebase Guide
2
+
3
+ This file is written for AI coding agents (Claude Code, Cursor, Copilot, etc.) working on the Analytics Agent codebase. Read it before making changes.
4
+
5
+ ---
6
+
7
+ ## Project in one sentence
8
+
9
+ Analytics Agent is a LangGraph-based chat agent that uses **DataHub** tools for metadata context and pluggable **SQL engines** (Snowflake first) to answer natural-language data questions, with Vega-Lite charts rendered inline in a React + Vite UI served by the same FastAPI process.
10
+
11
+ ---
12
+
13
+ ## Running the stack
14
+
15
+ A `justfile` at the repo root covers all common tasks. Install `just` once (`brew install just`), then:
16
+
17
+ ```bash
18
+ just install # uv sync + pnpm install
19
+ just start # build frontend if stale, start backend at :8100
20
+ just port=8102 start # same on a custom port
21
+ just stop # kill the backend
22
+ just nuke # wipe the DB (start from scratch / re-trigger wizard)
23
+ just start-remote # start + print DataHub connection status
24
+ just logs # tail /tmp/analytics_agent.log
25
+ just test # unit tests
26
+ just build # force frontend rebuild
27
+ ```
28
+
29
+ `just start` automatically detects whether `frontend/src` is newer than `frontend/dist` and rebuilds only when needed.
30
+
31
+ ### Without just (manual)
32
+
33
+ ```bash
34
+ uv sync
35
+ cd frontend && pnpm install && pnpm build && cd ..
36
+ uv run uvicorn analytics_agent.main:app --reload --port 8101
37
+ # → http://localhost:8101
38
+ # The setup wizard handles LLM key + connections on first run.
39
+ # Optional: cp .env.example .env to pre-configure credentials.
40
+ ```
41
+
42
+ ### Two-process mode (frontend hot reload)
43
+
44
+ ```bash
45
+ # Terminal 1 — backend (dev)
46
+ uv run uvicorn analytics_agent.main:app --reload --port 8101
47
+
48
+ # Terminal 2 — Vite dev server with HMR
49
+ cd frontend && pnpm dev
50
+ # → http://localhost:5173 (proxies /api/* to :8101)
51
+ ```
52
+
53
+ **DataHub credentials**: run `datahub init --sso --host https://your-instance.acryl.io/gms` once. The app reads `~/.datahubenv` automatically; or set `DATAHUB_GMS_URL` + `DATAHUB_GMS_TOKEN` in `config.yaml` / `.env`.
54
+
55
+ **Database**: SQLite at `./data/dev.db` by default. Alembic runs automatically on startup. For Postgres set `DATABASE_URL=postgresql+asyncpg://...`.
56
+
57
+ ---
58
+
59
+ ## Key file map
60
+
61
+ | Path | What it does |
62
+ |------|-------------|
63
+ | `backend/src/analytics_agent/main.py` | FastAPI app factory + lifespan (runs Alembic, seeds integrations, mounts SPA) |
64
+ | `backend/src/analytics_agent/agent/graph.py` | LangGraph `StateGraph`: ReAct agent → conditional chart node |
65
+ | `backend/src/analytics_agent/agent/streaming.py` | `astream_events` → SSE event dicts; handles `on_tool_error` |
66
+ | `backend/src/analytics_agent/agent/history.py` | Reconstructs LangChain message history from DB rows; pads orphaned tool calls |
67
+ | `backend/src/analytics_agent/agent/chart_tool.py` | `create_chart` LangChain tool; stores spec in `_pending_charts` side-channel |
68
+ | `backend/src/analytics_agent/agent/chart_generator.py` | `chart_node`: runs after SQL results; calls chart LLM → updates `pending_chart` state |
69
+ | `backend/src/analytics_agent/api/chat.py` | `POST /api/conversations/{id}/messages` → `StreamingResponse` (SSE) |
70
+ | `backend/src/analytics_agent/api/settings.py` | Connection CRUD + test + tool toggles + prompt + display settings |
71
+ | `backend/src/analytics_agent/api/oauth.py` | SSO browser flow, PAT storage, OAuth popup flow, credential encryption |
72
+ | `backend/src/analytics_agent/context/datahub.py` | Builds DataHub LangChain tools via `datahub_agent_context.build_langchain_tools()` |
73
+ | `backend/src/analytics_agent/engines/resolver.py` | **Single credential resolution point** — loads Integration + credential from DB |
74
+ | `backend/src/analytics_agent/engines/snowflake/engine.py` | Snowflake `QueryEngine`: execute_sql, list_tables, get_schema, preview_table; SSO/key-pair/PAT auth |
75
+ | `backend/src/analytics_agent/engines/factory.py` | Engine registry; `register_engine` / `unregister_engine` for dynamic connections |
76
+ | `backend/src/analytics_agent/db/models.py` | SQLAlchemy models: Conversation, Message, Integration, IntegrationCredential, Setting |
77
+ | `backend/src/analytics_agent/db/repository.py` | Repos: ConversationRepo, MessageRepo, SettingsRepo, IntegrationRepo, CredentialRepo |
78
+ | `backend/src/analytics_agent/prompts/system_prompt.md` | Agent system prompt (edit here — loaded at runtime) |
79
+ | `frontend/src/components/Chat/ChatView.tsx` | Chat shell; handles welcome-screen → new conversation flow |
80
+ | `frontend/src/components/Chat/WelcomeView.tsx` | Landing screen with LLM greeting, suggestion chips, engine selector |
81
+ | `frontend/src/components/Settings/SnowflakeAuthSection.tsx` | Segmented auth selector: Password / Private Key / SSO / PAT / OAuth |
82
+ | `frontend/src/store/conversations.ts` | Zustand: conversations, messages, engines, streaming state |
83
+ | `frontend/src/store/display.ts` | Zustand: app name, logo, cached LLM greeting |
84
+
85
+ ---
86
+
87
+ ## Integrations + credential architecture
88
+
89
+ Connections are stored in two DB tables:
90
+
91
+ - **`integrations`** — connection topology (account, warehouse, database, user). `source="yaml"` for `config.yaml` connections, `source="ui"` for UI-created ones.
92
+ - **`integration_credentials`** — encrypted auth per connection: `auth_type` ∈ `{sso_externalbrowser, private_key, pat, oauth, password}`.
93
+
94
+ **Credential resolution** happens in `engines/resolver.py::resolve_engine(engine_name, session)`:
95
+ 1. Looks up `integration_credentials` for the engine
96
+ 2. Decrypts the credential and returns a configured engine clone
97
+ 3. Falls back to env vars for `source="yaml"` connections (backwards compat)
98
+
99
+ **Never thread individual credential fields** (`oauth_token`, `sso_user`, etc.) through `graph.py` or agent code. Pass the engine object returned by `resolve_engine`.
100
+
101
+ ```python
102
+ # chat.py — the only place credentials are resolved
103
+ engine = await resolve_engine(engine_name, session)
104
+ graph = build_graph(engine=engine, engine_name=engine_name, ...)
105
+ ```
106
+
107
+ ### Connection write wire format — `{config, secrets}`
108
+
109
+ `PUT /api/settings/connections/{name}` and `POST /api/settings/connections`
110
+ accept a single wire shape:
111
+
112
+ ```jsonc
113
+ {
114
+ "config": { "account": "...", "warehouse": "...", "database": "...", "user": "..." },
115
+ "secrets": { "password": "..." }
116
+ }
117
+ ```
118
+
119
+ - `config` values are merged directly into `integrations.config` (DB) for engines,
120
+ or into the context-platform's config JSON for DataHub.
121
+ - `secrets` keys are validated against each engine's own
122
+ `QueryEngine.secret_env_vars` allow-list (e.g.
123
+ `SnowflakeQueryEngine.secret_env_vars = {"password": "SNOWFLAKE_PASSWORD", ...}`)
124
+ and translated to env-var names before being written to `.env` and
125
+ `os.environ`. Unknown secret keys are rejected with **HTTP 400**. The API
126
+ layer stays ignorant of any particular engine's credential fields.
127
+ - `_upsert_env_vars` always double-quotes every value so PEM blocks and passwords
128
+ with special characters (`#`, `$`, `\`, spaces) round-trip correctly.
129
+
130
+ **How the frontend splits values** — each `ConnectionField` returned by
131
+ `GET /connections` has an optional `secret_key` attribute. If present, the field's
132
+ value is routed to `body.secrets[secret_key]`; otherwise it goes to
133
+ `body.config[key]`. See `splitConnectionValues` in `frontend/src/api/settings.ts`.
134
+
135
+ **Staged follow-up steps (tracked):**
136
+
137
+ - **1 - route `body.secrets` to `integration_credentials`** (encrypted, per-connection).
138
+ Adds a `password` auth_type and a `password` branch in `resolver.py`, plus
139
+ `with_password` on `SnowflakeQueryEngine`. After 1 lands, `.env` stops accumulating
140
+ per-connection secrets and two Snowflake connections with different passwords can
141
+ coexist without collision.
142
+ - **2 - `GET /api/settings/connections/schemas/{type}`** + frontend renders forms
143
+ generically from the schema. Promotes `QueryEngine.secret_env_vars` into a full
144
+ typed schema (fields, labels, placeholders, required flags) shared with the
145
+ frontend; handler becomes validate -> dispatch.
146
+
147
+ ---
148
+
149
+ ## SSE event flow
150
+
151
+ ```
152
+ POST /api/conversations/{id}/messages
153
+ └─ chat.py: _event_stream()
154
+ ├─ resolve_engine(engine_name, session) → configured engine
155
+ ├─ load conversation history → build_history() → LangChain messages
156
+ ├─ build_graph(engine=engine, ...) → LangGraph compiled graph
157
+ └─ stream_graph_events(graph, ...)
158
+ ├─ on_chat_model_stream → TEXT event
159
+ ├─ on_tool_start → TOOL_CALL event (skipped for create_chart)
160
+ ├─ on_tool_end → SQL / TOOL_RESULT / CHART
161
+ ├─ on_tool_error → TOOL_RESULT (is_error=True)
162
+ ├─ on_chain_end → captures final_state for chart_node charts
163
+ └─ end of stream → CHART (fallback) + COMPLETE
164
+ ```
165
+
166
+ Frontend consumes SSE via `stream.ts` (fetch + ReadableStream, **not** EventSource — needs POST).
167
+
168
+ ---
169
+
170
+ ## LangGraph agent design
171
+
172
+ ```
173
+ START → agent (create_react_agent) → conditional → chart → END
174
+ ↓ (no SQL rows)
175
+ END
176
+ ```
177
+
178
+ - Use `create_agent` from `langchain.agents` (**not** `create_react_agent` from `langgraph.prebuilt`)
179
+ - System prompt loaded from `prompts/system_prompt.md` at runtime (editable without restart)
180
+ - Tools: DataHub tools (search_documents, search, get_entities, …) + engine tools + `create_chart`
181
+ - `chart_node` fires when `get_last_sql_result(state)` finds an `execute_sql` ToolMessage with rows
182
+
183
+ ---
184
+
185
+ ## Chart generation — two paths
186
+
187
+ | Path | Trigger | How spec reaches frontend |
188
+ |------|---------|--------------------------|
189
+ | `create_chart` tool | Agent calls tool | `_pending_charts[chart_id]` → `on_tool_end` → CHART event |
190
+ | `chart_node` | SQL returned rows | `state.pending_chart` → `on_chain_end` → CHART event |
191
+ | Text fallback | Model writes spec as ```json``` | `_extract_chart_from_text` regex → CHART event |
192
+
193
+ `chart_emitted` flag prevents duplicates across all three paths.
194
+
195
+ ---
196
+
197
+ ## Dynamic connections (UI-created)
198
+
199
+ Users can add connections via **Settings → Add Connection** without editing `config.yaml`:
200
+
201
+ 1. `POST /api/settings/connections` → creates `Integration` in DB + calls `register_engine()`
202
+ 2. `DELETE /api/settings/connections/{name}` → removes from DB + calls `unregister_engine()`
203
+ 3. On server restart, `_seed_integrations()` in `main.py` reloads all integrations from DB
204
+
205
+ The Snowflake engine supports `with_sso_user()`, `with_private_key()`, `with_pat_token()`, `with_oauth_token()` clone methods — these are called by `resolver.py`, never from agent code.
206
+
207
+ ---
208
+
209
+ ## Multi-turn conversation history
210
+
211
+ `build_history()` in `agent/history.py` converts DB rows to LangChain messages:
212
+
213
+ - **User TEXT** → `HumanMessage`
214
+ - **TOOL_CALL + TOOL_RESULT pairs** → `AIMessage(tool_calls=[...])` + `ToolMessage`
215
+ - Tool calls always use `tc["id"]` for `tool_call_id` (not the result's stored ID) — avoids Anthropic "unexpected tool_use_id" rejections from orphaned DB records
216
+ - Turns with no useful content → **skipped** (avoids consecutive HumanMessages)
217
+
218
+ ---
219
+
220
+ ## Serving the frontend
221
+
222
+ `main.py` mounts the built React SPA after registering all API routes:
223
+
224
+ ```python
225
+ _dist = Path(os.getenv("FRONTEND_DIST", "")) or Path(__file__).parents[3] / "frontend" / "dist"
226
+
227
+ if _dist.exists():
228
+ app.mount("/assets", StaticFiles(directory=_dist / "assets"), name="spa-assets")
229
+
230
+ @app.get("/{full_path:path}", include_in_schema=False)
231
+ async def _spa_fallback(full_path: str) -> FileResponse:
232
+ return FileResponse(_dist / "index.html", media_type="text/html")
233
+ ```
234
+
235
+ - If `dist/` is absent (dev mode), the server runs API-only and Vite handles the frontend
236
+ - `FRONTEND_DIST` env var overrides the default path (useful in Docker)
237
+ - The catch-all **must be the last route** — FastAPI matches in registration order
238
+
239
+ ---
240
+
241
+ ## Adding a new query engine
242
+
243
+ 1. Create `engines/<name>/engine.py` implementing `QueryEngine`:
244
+ - Expose four tools: `execute_sql`, `list_tables`, `get_schema`, `preview_table`
245
+ - All tools must catch exceptions and return `{"error": str(e)}` — never raise
246
+ 2. Register in `engines/factory.py` → `_engine_cls()` dict
247
+ 3. Add connection config to `config.yaml` OR let users add via the Settings UI
248
+ 4. Add tool list to `api/settings.py` → `_KNOWN_TOOLS`
249
+
250
+ ---
251
+
252
+ ## Changing the system prompt
253
+
254
+ Edit `prompts/system_prompt.md`. The prompt is loaded at runtime — no restart needed for changes made via the Settings UI (stored in DB). The `{engine_name}` placeholder is substituted at graph build time.
255
+
256
+ ---
257
+
258
+ ## Docker
259
+
260
+ ```bash
261
+ # Build (multistage: Node builds frontend, Python 3.12 serves everything)
262
+ docker build -f docker/Dockerfile -t analytics-agent .
263
+
264
+ # Run
265
+ docker run -p 8100:8100 --env-file .env analytics-agent
266
+ ```
267
+
268
+ GitHub Actions (`.github/workflows/docker.yml`) builds and pushes to GHCR on every push to `main` and version tags.
269
+
270
+ ---
271
+
272
+ ## Common pitfalls
273
+
274
+ **Do not** use `create_react_agent` from `langgraph.prebuilt` — deprecated in LangGraph v1. Use `create_agent` from `langchain.agents` with `system_prompt=` (string).
275
+
276
+ **Do not** pass `temperature=0` to `ChatAnthropic` with `claude-opus-4-7` — sampling parameters are removed on this model.
277
+
278
+ **Do not** use `EventSource` in the frontend — the chat endpoint is a POST. Use `fetch()` + `ReadableStream` (`frontend/src/api/stream.ts`).
279
+
280
+ **Do not** thread credential fields through `graph.py` — use `resolver.py` to get a pre-configured engine and pass the engine object.
281
+
282
+ **Do not** store chart Vega-Lite specs as the tool return value — use the `_pending_charts` side-channel.
283
+
284
+ **Do not** start the backend without loading `.env` — `main.py` calls `load_dotenv()` automatically so this is handled, but env vars must be in `.env`.
285
+
286
+ **The DB engine is lazy**: `db/base.py` creates the SQLAlchemy async engine on first use. This prevents the sync Alembic migration from deadlocking with the async engine at startup.
287
+
288
+ **`chat.py` uses its own session**: `_event_stream` opens a fresh `AsyncSession` independent of the `Depends(get_session)` session — FastAPI closes `Depends` sessions before `StreamingResponse` iterates the generator.
289
+
290
+ **Snowflake Decimal/date types**: `_run_query` in `snowflake/engine.py` coerces `Decimal` → `int`/`float` and `datetime` → ISO string before serialisation. Do not remove this — `orjson` rejects `Decimal`.