bridgekit 0.2.2__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bridgekit-0.2.2 → bridgekit-0.3.1}/PKG-INFO +127 -68
- bridgekit-0.3.1/README.md +260 -0
- bridgekit-0.3.1/bridgekit/__init__.py +6 -0
- bridgekit-0.3.1/bridgekit/config.py +1 -0
- bridgekit-0.3.1/bridgekit/planner.py +75 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit/reviewer.py +11 -14
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit/search.py +2 -1
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit.egg-info/PKG-INFO +127 -68
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit.egg-info/SOURCES.txt +3 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/pyproject.toml +1 -1
- bridgekit-0.3.1/tests/test_planner.py +180 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/tests/test_reviewer.py +3 -5
- bridgekit-0.2.2/README.md +0 -201
- bridgekit-0.2.2/bridgekit/__init__.py +0 -5
- {bridgekit-0.2.2 → bridgekit-0.3.1}/LICENSE +0 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit.egg-info/dependency_links.txt +0 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit.egg-info/requires.txt +0 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/bridgekit.egg-info/top_level.txt +0 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/setup.cfg +0 -0
- {bridgekit-0.2.2 → bridgekit-0.3.1}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bridgekit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: AI tools that make you a better data scientist, not a redundant one.
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/getbridgekit/bridgekit
|
|
@@ -32,54 +32,7 @@ Dynamic: license-file
|
|
|
32
32
|
|
|
33
33
|
Data scientists are not being replaced — they're being asked to do more with less context, less time, and more pressure to be right. Bridgekit is a growing suite of small, focused tools that bring AI into your existing workflow to sharpen your thinking, catch your blind spots, and level up your craft.
|
|
34
34
|
|
|
35
|
-
No new interface to learn.
|
|
36
|
-
|
|
37
|
-
---
|
|
38
|
-
|
|
39
|
-
## Tool #1: Analysis Reviewer
|
|
40
|
-
|
|
41
|
-
Write your findings the way you normally would. Bridgekit reads them and gives you the feedback a senior data scientist would — before you walk into the meeting.
|
|
42
|
-
|
|
43
|
-
```python
|
|
44
|
-
from bridgekit import evaluate
|
|
45
|
-
|
|
46
|
-
text = """
|
|
47
|
-
I analyzed 90 days of user behavior data to understand what drives subscription
|
|
48
|
-
upgrades. Users who engaged with the reporting feature within their first week
|
|
49
|
-
were 3x more likely to upgrade within 30 days. I recommend we prioritize
|
|
50
|
-
onboarding users to reporting as a growth lever.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
evaluate(text)
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
**Output:**
|
|
57
|
-
|
|
58
|
-
```
|
|
59
|
-
BRIDGEKIT FEEDBACK
|
|
60
|
-
─────────────────────────────────────────
|
|
61
|
-
|
|
62
|
-
✅ LOGIC
|
|
63
|
-
Your conclusion follows from the data. The 3x lift is a meaningful signal
|
|
64
|
-
worth acting on.
|
|
65
|
-
|
|
66
|
-
⚠️ WHAT'S MISSING
|
|
67
|
-
- Did you control for user intent? Users who explore reporting features may
|
|
68
|
-
already be power users likely to upgrade regardless.
|
|
69
|
-
- What's the sample size behind the 3x figure?
|
|
70
|
-
- Is this correlation or did you establish any causal direction?
|
|
71
|
-
|
|
72
|
-
🎯 WEAKEST POINT
|
|
73
|
-
"I recommend we prioritize onboarding to reporting" is a big leap from an
|
|
74
|
-
observational finding. A senior DS would push back on this in the meeting.
|
|
75
|
-
|
|
76
|
-
💡 LEVEL UP
|
|
77
|
-
Look into selection bias and how to address it — this analysis would be
|
|
78
|
-
significantly stronger with a matched cohort or an experiment to validate
|
|
79
|
-
the finding.
|
|
80
|
-
|
|
81
|
-
─────────────────────────────────────────
|
|
82
|
-
```
|
|
35
|
+
No new interface to learn. Just better work.
|
|
83
36
|
|
|
84
37
|
---
|
|
85
38
|
|
|
@@ -112,34 +65,87 @@ export ANTHROPIC_API_KEY=your_key_here
|
|
|
112
65
|
|
|
113
66
|
## Getting Started
|
|
114
67
|
|
|
115
|
-
|
|
68
|
+
Set your API key before launching Jupyter:
|
|
116
69
|
|
|
117
70
|
```bash
|
|
118
|
-
|
|
71
|
+
export ANTHROPIC_API_KEY=your_key_here
|
|
72
|
+
jupyter notebook
|
|
119
73
|
```
|
|
120
74
|
|
|
121
|
-
|
|
75
|
+
Then import whichever tool you need:
|
|
122
76
|
|
|
123
|
-
|
|
77
|
+
```python
|
|
78
|
+
from bridgekit import evaluate, plan, ask
|
|
79
|
+
```
|
|
124
80
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
81
|
+
**Review a writeup:**
|
|
82
|
+
```python
|
|
83
|
+
print(evaluate("I analyzed 90 days of user behavior data. Users who engaged with the reporting feature were 3x more likely to upgrade."))
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Plan your analytical approach:**
|
|
87
|
+
```python
|
|
88
|
+
print(plan("Did our onboarding flow reduce churn?"))
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Search past reports:**
|
|
92
|
+
```python
|
|
93
|
+
print(ask("What drove churn in Q3?", source="reports/"))
|
|
128
94
|
```
|
|
129
95
|
|
|
130
|
-
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Tool #1: Analysis Reviewer
|
|
99
|
+
|
|
100
|
+
Write your findings the way you normally would. Bridgekit reads them and gives you the feedback a senior data scientist would — before you walk into the meeting.
|
|
131
101
|
|
|
132
102
|
```python
|
|
133
103
|
from bridgekit import evaluate
|
|
134
104
|
|
|
135
105
|
text = """
|
|
136
|
-
|
|
106
|
+
I analyzed 90 days of user behavior data to understand what drives subscription
|
|
107
|
+
upgrades. Users who engaged with the reporting feature within their first week
|
|
108
|
+
were 3x more likely to upgrade within 30 days. I recommend we prioritize
|
|
109
|
+
onboarding users to reporting as a growth lever.
|
|
137
110
|
"""
|
|
138
111
|
|
|
139
112
|
print(evaluate(text))
|
|
140
113
|
```
|
|
141
114
|
|
|
142
|
-
|
|
115
|
+
**Output:**
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
BRIDGEKIT ANALYSIS REVIEW
|
|
119
|
+
─────────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
1. CLARITY
|
|
122
|
+
✅ STRONG — Clean, concise, and jargon-free. Any stakeholder could read
|
|
123
|
+
this and immediately understand the claim and the recommendation.
|
|
124
|
+
|
|
125
|
+
2. STATISTICAL RIGOR
|
|
126
|
+
⚠️ NEEDS WORK — "3x more likely" is a compelling number, but critical
|
|
127
|
+
context is missing. How many users are in each group? What's the base
|
|
128
|
+
upgrade rate? There's no confidence interval or p-value, so we can't
|
|
129
|
+
assess whether this difference is statistically significant or noise.
|
|
130
|
+
|
|
131
|
+
3. METHODOLOGY
|
|
132
|
+
❌ MISSING — This reads as a pure correlation finding, but the
|
|
133
|
+
recommendation implies causation. Users who explore reporting in week one
|
|
134
|
+
may simply be more motivated or already closer to upgrading. Without
|
|
135
|
+
addressing the self-selection problem, this recommendation is not
|
|
136
|
+
defensible.
|
|
137
|
+
|
|
138
|
+
4. BUSINESS IMPACT
|
|
139
|
+
⚠️ NEEDS WORK — "Growth lever" is directional, not quantified. Translate
|
|
140
|
+
the 3x lift into projected revenue or upgrade volume so leadership can
|
|
141
|
+
prioritize this against competing initiatives.
|
|
142
|
+
|
|
143
|
+
─────────────────────────────────────────
|
|
144
|
+
BOTTOM LINE
|
|
145
|
+
You must address the correlation-vs-causation gap before presenting —
|
|
146
|
+
otherwise you risk recommending an onboarding investment that targets a
|
|
147
|
+
symptom of upgrade intent rather than a cause of it.
|
|
148
|
+
```
|
|
143
149
|
|
|
144
150
|
---
|
|
145
151
|
|
|
@@ -186,33 +192,86 @@ churn rate of 4.5%:
|
|
|
186
192
|
|
|
187
193
|
---
|
|
188
194
|
|
|
189
|
-
##
|
|
195
|
+
## Tool #3: Analysis Planner
|
|
190
196
|
|
|
191
|
-
|
|
197
|
+
Describe your analytical problem and get a structured plan for the right approach — before you start the analysis.
|
|
192
198
|
|
|
193
|
-
|
|
199
|
+
Covers the recommended method, why it fits your problem, key assumptions, common pitfalls, and alternatives.
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from bridgekit import plan
|
|
203
|
+
|
|
204
|
+
print(plan(
|
|
205
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
206
|
+
data_description="We are running an A/B test with ~1,000 users split between old and new onboarding. Key variables will include upgrade status, time to upgrade, acquisition channel, and plan tier.",
|
|
207
|
+
goal="causal inference"
|
|
208
|
+
))
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
`data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
|
|
212
|
+
|
|
213
|
+
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
214
|
+
|
|
215
|
+
**Output:**
|
|
216
|
+
```
|
|
217
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
218
|
+
─────────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
RECOMMENDED APPROACH
|
|
221
|
+
Two-sample proportion test (z-test or Fisher's exact) for the primary
|
|
222
|
+
analysis, since you have a randomized experiment with a binary outcome
|
|
223
|
+
and want to estimate the causal effect of the new onboarding flow on
|
|
224
|
+
upgrade rates.
|
|
225
|
+
|
|
226
|
+
WHY THIS APPROACH
|
|
227
|
+
Randomization handles confounding, so you don't need regression
|
|
228
|
+
adjustment to get an unbiased causal estimate. With 500 per group,
|
|
229
|
+
you have reasonable power for detecting meaningful differences (~80%
|
|
230
|
+
power for a 7-8 percentage point lift from a 20% baseline).
|
|
231
|
+
|
|
232
|
+
KEY ASSUMPTIONS
|
|
233
|
+
- Randomization was correctly implemented (no selection bias)
|
|
234
|
+
- No interference between users
|
|
235
|
+
- SUTVA: each user has a single well-defined treatment version
|
|
236
|
+
- Outcome measurement is complete (watch for differential dropout)
|
|
237
|
+
- Users in both arms had equal opportunity to upgrade
|
|
238
|
+
|
|
239
|
+
WATCH OUT FOR
|
|
240
|
+
Peeking and early stopping — if you're checking results repeatedly
|
|
241
|
+
before the experiment concludes, your p-values are invalid. Decide
|
|
242
|
+
your sample size and analysis time upfront.
|
|
243
|
+
|
|
244
|
+
ALTERNATIVES
|
|
245
|
+
- Logistic regression with covariates (channel, plan tier): use if you
|
|
246
|
+
discover post-hoc imbalance or want to tighten confidence intervals
|
|
247
|
+
- Survival analysis (Cox model): use if time-to-upgrade matters as
|
|
248
|
+
much as whether users upgrade
|
|
249
|
+
─────────────────────────────────────────
|
|
250
|
+
```
|
|
194
251
|
|
|
195
252
|
---
|
|
196
253
|
|
|
197
|
-
## Why
|
|
254
|
+
## Why not just use Claude?
|
|
198
255
|
|
|
199
|
-
|
|
256
|
+
You could. But you'd need to know what to ask, how to frame it, and what a good answer looks like. Bridgekit has that baked in — it knows you're a data scientist presenting findings, so it asks the right questions automatically. No prompt engineering required. Just paste your work and run it.
|
|
257
|
+
|
|
258
|
+
It also lives in your Jupyter notebook, so there's no context switching. You stay in your workflow.
|
|
200
259
|
|
|
201
260
|
---
|
|
202
261
|
|
|
203
|
-
##
|
|
262
|
+
## Why a library and not a chatbot?
|
|
204
263
|
|
|
205
|
-
|
|
264
|
+
Because your analysis already lives in a notebook. Bridgekit meets you there. A chatbot asks you to re-explain your work from scratch every time. Bridgekit is one function call — consistent, reproducible, and fast.
|
|
206
265
|
|
|
207
266
|
---
|
|
208
267
|
|
|
209
268
|
## What's next?
|
|
210
269
|
|
|
211
|
-
Bridgekit is a suite, not a one-off.
|
|
270
|
+
Bridgekit is a suite, not a one-off. Three tools are live — more are coming:
|
|
212
271
|
|
|
213
|
-
- **Statistical approach suggester** — describe your problem in plain English, get the right test and why
|
|
214
272
|
- **Stakeholder translator** — turn your technical findings into a narrative a non-technical audience will actually follow
|
|
215
273
|
- **Assumption checker** — state your analytical assumptions, get the ones you missed
|
|
274
|
+
- **Multi-model support** — use any LLM provider (OpenAI, Gemini, open source models via OpenRouter) instead of being tied to Anthropic
|
|
216
275
|
|
|
217
276
|
Each tool is small, focused, and built for the way data scientists actually work.
|
|
218
277
|
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# Bridgekit
|
|
2
|
+
|
|
3
|
+
**AI tools that make you a better data scientist, not a redundant one.**
|
|
4
|
+
|
|
5
|
+
Data scientists are not being replaced — they're being asked to do more with less context, less time, and more pressure to be right. Bridgekit is a growing suite of small, focused tools that bring AI into your existing workflow to sharpen your thinking, catch your blind spots, and level up your craft.
|
|
6
|
+
|
|
7
|
+
No new interface to learn. Just better work.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
**Standard install:**
|
|
14
|
+
```bash
|
|
15
|
+
pip install bridgekit
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**In a virtual environment (recommended for clean setups):**
|
|
19
|
+
```bash
|
|
20
|
+
python -m venv .venv
|
|
21
|
+
source .venv/bin/activate
|
|
22
|
+
pip install bridgekit
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**In a Jupyter notebook:**
|
|
26
|
+
```python
|
|
27
|
+
!pip install bridgekit
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Requires an Anthropic API key:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
export ANTHROPIC_API_KEY=your_key_here
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Getting Started
|
|
39
|
+
|
|
40
|
+
Set your API key before launching Jupyter:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
export ANTHROPIC_API_KEY=your_key_here
|
|
44
|
+
jupyter notebook
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Then import whichever tool you need:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from bridgekit import evaluate, plan, ask
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Review a writeup:**
|
|
54
|
+
```python
|
|
55
|
+
print(evaluate("I analyzed 90 days of user behavior data. Users who engaged with the reporting feature were 3x more likely to upgrade."))
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Plan your analytical approach:**
|
|
59
|
+
```python
|
|
60
|
+
print(plan("Did our onboarding flow reduce churn?"))
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Search past reports:**
|
|
64
|
+
```python
|
|
65
|
+
print(ask("What drove churn in Q3?", source="reports/"))
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Tool #1: Analysis Reviewer
|
|
71
|
+
|
|
72
|
+
Write your findings the way you normally would. Bridgekit reads them and gives you the feedback a senior data scientist would — before you walk into the meeting.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from bridgekit import evaluate
|
|
76
|
+
|
|
77
|
+
text = """
|
|
78
|
+
I analyzed 90 days of user behavior data to understand what drives subscription
|
|
79
|
+
upgrades. Users who engaged with the reporting feature within their first week
|
|
80
|
+
were 3x more likely to upgrade within 30 days. I recommend we prioritize
|
|
81
|
+
onboarding users to reporting as a growth lever.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
print(evaluate(text))
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Output:**
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
BRIDGEKIT ANALYSIS REVIEW
|
|
91
|
+
─────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
1. CLARITY
|
|
94
|
+
✅ STRONG — Clean, concise, and jargon-free. Any stakeholder could read
|
|
95
|
+
this and immediately understand the claim and the recommendation.
|
|
96
|
+
|
|
97
|
+
2. STATISTICAL RIGOR
|
|
98
|
+
⚠️ NEEDS WORK — "3x more likely" is a compelling number, but critical
|
|
99
|
+
context is missing. How many users are in each group? What's the base
|
|
100
|
+
upgrade rate? There's no confidence interval or p-value, so we can't
|
|
101
|
+
assess whether this difference is statistically significant or noise.
|
|
102
|
+
|
|
103
|
+
3. METHODOLOGY
|
|
104
|
+
❌ MISSING — This reads as a pure correlation finding, but the
|
|
105
|
+
recommendation implies causation. Users who explore reporting in week one
|
|
106
|
+
may simply be more motivated or already closer to upgrading. Without
|
|
107
|
+
addressing the self-selection problem, this recommendation is not
|
|
108
|
+
defensible.
|
|
109
|
+
|
|
110
|
+
4. BUSINESS IMPACT
|
|
111
|
+
⚠️ NEEDS WORK — "Growth lever" is directional, not quantified. Translate
|
|
112
|
+
the 3x lift into projected revenue or upgrade volume so leadership can
|
|
113
|
+
prioritize this against competing initiatives.
|
|
114
|
+
|
|
115
|
+
─────────────────────────────────────────
|
|
116
|
+
BOTTOM LINE
|
|
117
|
+
You must address the correlation-vs-causation gap before presenting —
|
|
118
|
+
otherwise you risk recommending an onboarding investment that targets a
|
|
119
|
+
symptom of upgrade intent rather than a cause of it.
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Tool #2: Analysis Search
|
|
125
|
+
|
|
126
|
+
Ask questions across a collection of your past analysis documents. Point it at a folder and get answers grounded in your actual work — no digging through files manually.
|
|
127
|
+
|
|
128
|
+
Uses a vector database and semantic similarity to find relevant context across your documents — not keyword matching.
|
|
129
|
+
|
|
130
|
+
Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
|
|
131
|
+
|
|
132
|
+
> **Note:** The first run will download the MiniLM embedding model (~90MB). This is a one-time download — it gets cached locally for all subsequent calls.
|
|
133
|
+
|
|
134
|
+
**From a folder:**
|
|
135
|
+
```python
|
|
136
|
+
from bridgekit import ask
|
|
137
|
+
|
|
138
|
+
print(ask("what drove churn in Q3?", source="reports/"))
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**From raw text:**
|
|
142
|
+
```python
|
|
143
|
+
from bridgekit import ask
|
|
144
|
+
|
|
145
|
+
text = """
|
|
146
|
+
Q3 churn rose to 4.5%, driven by a product outage in August and a pricing
|
|
147
|
+
change in July that increased SMB costs by 12%.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
print(ask("what caused the Q3 churn spike?", text=text))
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Output** *(based on sample data included in the repo)*:
|
|
154
|
+
```
|
|
155
|
+
Based on the Q3 2024 Churn Analysis, two primary factors drove the elevated
|
|
156
|
+
churn rate of 4.5%:
|
|
157
|
+
|
|
158
|
+
1. August Product Outage — A 14-hour outage affected 3,800 accounts. Impacted
|
|
159
|
+
accounts churned at 8.1% vs 3.2% for unaffected accounts.
|
|
160
|
+
|
|
161
|
+
2. July Pricing Change — SMB costs increased by an average of 12%, causing SMB
|
|
162
|
+
churn to spike to 7.2% — the highest single-month figure in the dataset.
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Tool #3: Analysis Planner
|
|
168
|
+
|
|
169
|
+
Describe your analytical problem and get a structured plan for the right approach — before you start the analysis.
|
|
170
|
+
|
|
171
|
+
Covers the recommended method, why it fits your problem, key assumptions, common pitfalls, and alternatives.
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from bridgekit import plan
|
|
175
|
+
|
|
176
|
+
print(plan(
|
|
177
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
178
|
+
data_description="We are running an A/B test with ~1,000 users split between old and new onboarding. Key variables will include upgrade status, time to upgrade, acquisition channel, and plan tier.",
|
|
179
|
+
goal="causal inference"
|
|
180
|
+
))
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
`data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
|
|
184
|
+
|
|
185
|
+
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
186
|
+
|
|
187
|
+
**Output:**
|
|
188
|
+
```
|
|
189
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
190
|
+
─────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
RECOMMENDED APPROACH
|
|
193
|
+
Two-sample proportion test (z-test or Fisher's exact) for the primary
|
|
194
|
+
analysis, since you have a randomized experiment with a binary outcome
|
|
195
|
+
and want to estimate the causal effect of the new onboarding flow on
|
|
196
|
+
upgrade rates.
|
|
197
|
+
|
|
198
|
+
WHY THIS APPROACH
|
|
199
|
+
Randomization handles confounding, so you don't need regression
|
|
200
|
+
adjustment to get an unbiased causal estimate. With 500 per group,
|
|
201
|
+
you have reasonable power for detecting meaningful differences (~80%
|
|
202
|
+
power for a 7-8 percentage point lift from a 20% baseline).
|
|
203
|
+
|
|
204
|
+
KEY ASSUMPTIONS
|
|
205
|
+
- Randomization was correctly implemented (no selection bias)
|
|
206
|
+
- No interference between users
|
|
207
|
+
- SUTVA: each user has a single well-defined treatment version
|
|
208
|
+
- Outcome measurement is complete (watch for differential dropout)
|
|
209
|
+
- Users in both arms had equal opportunity to upgrade
|
|
210
|
+
|
|
211
|
+
WATCH OUT FOR
|
|
212
|
+
Peeking and early stopping — if you're checking results repeatedly
|
|
213
|
+
before the experiment concludes, your p-values are invalid. Decide
|
|
214
|
+
your sample size and analysis time upfront.
|
|
215
|
+
|
|
216
|
+
ALTERNATIVES
|
|
217
|
+
- Logistic regression with covariates (channel, plan tier): use if you
|
|
218
|
+
discover post-hoc imbalance or want to tighten confidence intervals
|
|
219
|
+
- Survival analysis (Cox model): use if time-to-upgrade matters as
|
|
220
|
+
much as whether users upgrade
|
|
221
|
+
─────────────────────────────────────────
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Why not just use Claude?
|
|
227
|
+
|
|
228
|
+
You could. But you'd need to know what to ask, how to frame it, and what a good answer looks like. Bridgekit has that baked in — it knows you're a data scientist presenting findings, so it asks the right questions automatically. No prompt engineering required. Just paste your work and run it.
|
|
229
|
+
|
|
230
|
+
It also lives in your Jupyter notebook, so there's no context switching. You stay in your workflow.
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Why a library and not a chatbot?
|
|
235
|
+
|
|
236
|
+
Because your analysis already lives in a notebook. Bridgekit meets you there. A chatbot asks you to re-explain your work from scratch every time. Bridgekit is one function call — consistent, reproducible, and fast.
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## What's next?
|
|
241
|
+
|
|
242
|
+
Bridgekit is a suite, not a one-off. Three tools are live — more are coming:
|
|
243
|
+
|
|
244
|
+
- **Stakeholder translator** — turn your technical findings into a narrative a non-technical audience will actually follow
|
|
245
|
+
- **Assumption checker** — state your analytical assumptions, get the ones you missed
|
|
246
|
+
- **Multi-model support** — use any LLM provider (OpenAI, Gemini, open source models via OpenRouter) instead of being tied to Anthropic
|
|
247
|
+
|
|
248
|
+
Each tool is small, focused, and built for the way data scientists actually work.
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Contributing
|
|
253
|
+
|
|
254
|
+
Bridgekit is open source and early. If you're a data scientist and something here would genuinely save you time or make you sharper — open an issue, submit a PR, or just tell me what's missing.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## License
|
|
259
|
+
|
|
260
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
DEFAULT_MODEL = "claude-opus-4-6"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import anthropic
|
|
3
|
+
from .config import DEFAULT_MODEL
|
|
4
|
+
|
|
5
|
+
SYSTEM_PROMPT = """You are a senior statistician and data scientist advising a colleague on the right analytical approach for their problem.
|
|
6
|
+
|
|
7
|
+
Given a question, a description of the available data, and the goal of the analysis, recommend the best analytical approach. Be direct and specific — not a textbook, not a list of every possible method.
|
|
8
|
+
|
|
9
|
+
Structure your response exactly like this:
|
|
10
|
+
|
|
11
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
12
|
+
─────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
RECOMMENDED APPROACH
|
|
15
|
+
[Name of the method and one sentence on why it fits this problem]
|
|
16
|
+
|
|
17
|
+
WHY THIS APPROACH
|
|
18
|
+
[2-3 sentences on why this is the right fit given the question, data, and goal]
|
|
19
|
+
|
|
20
|
+
KEY ASSUMPTIONS
|
|
21
|
+
[Bullet list of assumptions this approach requires — flag any that may be violated]
|
|
22
|
+
|
|
23
|
+
WATCH OUT FOR
|
|
24
|
+
[The most common mistake DS make on this type of problem]
|
|
25
|
+
|
|
26
|
+
ALTERNATIVES
|
|
27
|
+
[1-2 alternative approaches and when you'd use them instead]
|
|
28
|
+
|
|
29
|
+
─────────────────────────────────────────
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def plan(question: str, data_description: str = None, goal: str = None) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Recommend the right analytical approach for your problem.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
question: The analytical question you are trying to answer.
|
|
39
|
+
data_description: Optional. A plain text description of your available data.
|
|
40
|
+
goal: Optional. The goal of your analysis (e.g. "causal inference",
|
|
41
|
+
"prediction", "segmentation", "hypothesis testing", "exploration").
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
A structured analytical plan covering the recommended approach, assumptions,
|
|
45
|
+
common pitfalls, and alternatives.
|
|
46
|
+
"""
|
|
47
|
+
if not question or not question.strip():
|
|
48
|
+
raise ValueError("Question cannot be empty.")
|
|
49
|
+
|
|
50
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
51
|
+
if not api_key:
|
|
52
|
+
raise EnvironmentError(
|
|
53
|
+
"ANTHROPIC_API_KEY not found. Set it with: export ANTHROPIC_API_KEY=your_key_here"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
user_message = f"Question: {question}"
|
|
57
|
+
if data_description:
|
|
58
|
+
user_message += f"\n\nData: {data_description}"
|
|
59
|
+
if goal:
|
|
60
|
+
user_message += f"\n\nGoal: {goal}"
|
|
61
|
+
|
|
62
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
63
|
+
message = client.messages.create(
|
|
64
|
+
model=DEFAULT_MODEL,
|
|
65
|
+
max_tokens=1024,
|
|
66
|
+
system=SYSTEM_PROMPT,
|
|
67
|
+
messages=[
|
|
68
|
+
{
|
|
69
|
+
"role": "user",
|
|
70
|
+
"content": user_message
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return message.content[0].text
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import anthropic
|
|
3
|
+
from .config import DEFAULT_MODEL
|
|
3
4
|
|
|
4
5
|
SYSTEM_PROMPT = """You are a senior data scientist reviewing a colleague's analysis writeup.
|
|
5
6
|
You are direct, constructive, and specific. You do not flatter — you help people improve.
|
|
6
7
|
|
|
7
|
-
Evaluate the writeup across exactly these
|
|
8
|
+
Evaluate the writeup across exactly these four dimensions:
|
|
8
9
|
|
|
9
|
-
1. CLARITY — Is it free of jargon? Could someone outside data science read this without googling anything?
|
|
10
|
-
2.
|
|
11
|
-
3.
|
|
12
|
-
4.
|
|
13
|
-
5. BUSINESS IMPACT — Are outcomes quantified in % or $ terms? Directional statements like "improved performance" are not enough.
|
|
10
|
+
1. CLARITY — Is it free of jargon? Could someone outside data science read this without googling anything? Is it written for the right reader?
|
|
11
|
+
2. STATISTICAL RIGOR — Is there enough data to support the claim? Are sample sizes mentioned? Are confidence levels or uncertainty acknowledged?
|
|
12
|
+
3. METHODOLOGY — Is it clear why this analytical approach was chosen? Are alternatives considered or ruled out?
|
|
13
|
+
4. BUSINESS IMPACT — Are outcomes quantified in % or $ terms? Directional statements like "improved performance" are not enough.
|
|
14
14
|
|
|
15
15
|
For each dimension, give one of three ratings:
|
|
16
16
|
✅ STRONG — this dimension is handled well
|
|
@@ -29,16 +29,13 @@ BRIDGEKIT ANALYSIS REVIEW
|
|
|
29
29
|
1. CLARITY
|
|
30
30
|
[rating] [feedback]
|
|
31
31
|
|
|
32
|
-
2.
|
|
32
|
+
2. STATISTICAL RIGOR
|
|
33
33
|
[rating] [feedback]
|
|
34
34
|
|
|
35
|
-
3.
|
|
35
|
+
3. METHODOLOGY
|
|
36
36
|
[rating] [feedback]
|
|
37
37
|
|
|
38
|
-
4.
|
|
39
|
-
[rating] [feedback]
|
|
40
|
-
|
|
41
|
-
5. BUSINESS IMPACT
|
|
38
|
+
4. BUSINESS IMPACT
|
|
42
39
|
[rating] [feedback]
|
|
43
40
|
|
|
44
41
|
─────────────────────────────────────────
|
|
@@ -54,7 +51,7 @@ def evaluate(text: str) -> str:
|
|
|
54
51
|
text: Your analysis writeup as a plain string.
|
|
55
52
|
|
|
56
53
|
Returns:
|
|
57
|
-
Structured feedback across
|
|
54
|
+
Structured feedback across four dimensions.
|
|
58
55
|
"""
|
|
59
56
|
if not text or not text.strip():
|
|
60
57
|
raise ValueError("Text cannot be empty.")
|
|
@@ -68,7 +65,7 @@ def evaluate(text: str) -> str:
|
|
|
68
65
|
client = anthropic.Anthropic(api_key=api_key)
|
|
69
66
|
|
|
70
67
|
message = client.messages.create(
|
|
71
|
-
model=
|
|
68
|
+
model=DEFAULT_MODEL,
|
|
72
69
|
max_tokens=1024,
|
|
73
70
|
system=SYSTEM_PROMPT,
|
|
74
71
|
messages=[
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import anthropic
|
|
4
|
+
from .config import DEFAULT_MODEL
|
|
4
5
|
import chromadb
|
|
5
6
|
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
|
6
7
|
|
|
@@ -106,7 +107,7 @@ def ask(question: str, source: str = None, text: str = None) -> str:
|
|
|
106
107
|
# Generate answer with Claude
|
|
107
108
|
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
|
108
109
|
message = anthropic_client.messages.create(
|
|
109
|
-
model=
|
|
110
|
+
model=DEFAULT_MODEL,
|
|
110
111
|
max_tokens=1024,
|
|
111
112
|
system=(
|
|
112
113
|
"You are a senior data scientist answering questions based on analysis reports. "
|