bridgekit 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bridgekit-0.2.1 → bridgekit-0.3.0}/PKG-INFO +61 -4
- {bridgekit-0.2.1 → bridgekit-0.3.0}/README.md +60 -3
- bridgekit-0.3.0/bridgekit/__init__.py +6 -0
- bridgekit-0.3.0/bridgekit/planner.py +74 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit.egg-info/PKG-INFO +61 -4
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit.egg-info/SOURCES.txt +2 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/pyproject.toml +1 -1
- bridgekit-0.3.0/tests/test_planner.py +180 -0
- bridgekit-0.2.1/bridgekit/__init__.py +0 -5
- {bridgekit-0.2.1 → bridgekit-0.3.0}/LICENSE +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit/reviewer.py +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit/search.py +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit.egg-info/dependency_links.txt +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit.egg-info/requires.txt +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/bridgekit.egg-info/top_level.txt +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/setup.cfg +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/tests/test_reviewer.py +0 -0
- {bridgekit-0.2.1 → bridgekit-0.3.0}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bridgekit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: AI tools that make you a better data scientist, not a redundant one.
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/getbridgekit/bridgekit
|
|
@@ -26,8 +26,6 @@ Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
|
26
26
|
Requires-Dist: pytest-mock>=3.0.0; extra == "dev"
|
|
27
27
|
Dynamic: license-file
|
|
28
28
|
|
|
29
|
-
<img src="assets/logo.png" width="150"/>
|
|
30
|
-
|
|
31
29
|
# Bridgekit
|
|
32
30
|
|
|
33
31
|
**AI tools that make you a better data scientist, not a redundant one.**
|
|
@@ -188,6 +186,65 @@ churn rate of 4.5%:
|
|
|
188
186
|
|
|
189
187
|
---
|
|
190
188
|
|
|
189
|
+
## Tool #3: Analysis Planner
|
|
190
|
+
|
|
191
|
+
Describe your analytical problem and get a structured plan for the right approach — before you start the analysis.
|
|
192
|
+
|
|
193
|
+
Covers the recommended method, why it fits your problem, key assumptions, common pitfalls, and alternatives.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from bridgekit import plan
|
|
197
|
+
|
|
198
|
+
print(plan(
|
|
199
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
200
|
+
data_description="1,000 users randomly split 50/50 between old and new onboarding. Variables: upgrade status (binary), time to upgrade (days), acquisition channel, plan tier.",
|
|
201
|
+
goal="causal inference"
|
|
202
|
+
))
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
`data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
|
|
206
|
+
|
|
207
|
+
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
208
|
+
|
|
209
|
+
**Output:**
|
|
210
|
+
```
|
|
211
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
212
|
+
─────────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
RECOMMENDED APPROACH
|
|
215
|
+
Two-sample proportion test (z-test or Fisher's exact) for the primary
|
|
216
|
+
analysis, since you have a randomized experiment with a binary outcome
|
|
217
|
+
and want to estimate the causal effect of the new onboarding flow on
|
|
218
|
+
upgrade rates.
|
|
219
|
+
|
|
220
|
+
WHY THIS APPROACH
|
|
221
|
+
Randomization handles confounding, so you don't need regression
|
|
222
|
+
adjustment to get an unbiased causal estimate. With 500 per group,
|
|
223
|
+
you have reasonable power for detecting meaningful differences (~80%
|
|
224
|
+
power for a 7-8 percentage point lift from a 20% baseline).
|
|
225
|
+
|
|
226
|
+
KEY ASSUMPTIONS
|
|
227
|
+
- Randomization was correctly implemented (no selection bias)
|
|
228
|
+
- No interference between users
|
|
229
|
+
- SUTVA: each user has a single well-defined treatment version
|
|
230
|
+
- Outcome measurement is complete (watch for differential dropout)
|
|
231
|
+
- Users in both arms had equal opportunity to upgrade
|
|
232
|
+
|
|
233
|
+
WATCH OUT FOR
|
|
234
|
+
Peeking and early stopping — if you're checking results repeatedly
|
|
235
|
+
before the experiment concludes, your p-values are invalid. Decide
|
|
236
|
+
your sample size and analysis time upfront.
|
|
237
|
+
|
|
238
|
+
ALTERNATIVES
|
|
239
|
+
- Logistic regression with covariates (channel, plan tier): use if you
|
|
240
|
+
discover post-hoc imbalance or want to tighten confidence intervals
|
|
241
|
+
- Survival analysis (Cox model): use if time-to-upgrade matters as
|
|
242
|
+
much as whether users upgrade
|
|
243
|
+
─────────────────────────────────────────
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
191
248
|
## Why not just use Claude?
|
|
192
249
|
|
|
193
250
|
You could. But you'd need to know what to ask, how to frame it, and what a good answer looks like. Bridgekit has that baked in — it knows you're a data scientist presenting findings, so it asks the right questions automatically. No prompt engineering required. Just paste your work and run it.
|
|
@@ -210,7 +267,7 @@ Bridgekit only ever sees text you write yourself — your narrative, your conclu
|
|
|
210
267
|
|
|
211
268
|
## What's next?
|
|
212
269
|
|
|
213
|
-
Bridgekit is a suite, not a one-off.
|
|
270
|
+
Bridgekit is a suite, not a one-off. Three tools are live — more are coming:
|
|
214
271
|
|
|
215
272
|
- **Statistical approach suggester** — describe your problem in plain English, get the right test and why
|
|
216
273
|
- **Stakeholder translator** — turn your technical findings into a narrative a non-technical audience will actually follow
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
<img src="assets/logo.png" width="150"/>
|
|
2
|
-
|
|
3
1
|
# Bridgekit
|
|
4
2
|
|
|
5
3
|
**AI tools that make you a better data scientist, not a redundant one.**
|
|
@@ -160,6 +158,65 @@ churn rate of 4.5%:
|
|
|
160
158
|
|
|
161
159
|
---
|
|
162
160
|
|
|
161
|
+
## Tool #3: Analysis Planner
|
|
162
|
+
|
|
163
|
+
Describe your analytical problem and get a structured plan for the right approach — before you start the analysis.
|
|
164
|
+
|
|
165
|
+
Covers the recommended method, why it fits your problem, key assumptions, common pitfalls, and alternatives.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from bridgekit import plan
|
|
169
|
+
|
|
170
|
+
print(plan(
|
|
171
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
172
|
+
data_description="1,000 users randomly split 50/50 between old and new onboarding. Variables: upgrade status (binary), time to upgrade (days), acquisition channel, plan tier.",
|
|
173
|
+
goal="causal inference"
|
|
174
|
+
))
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
`data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
|
|
178
|
+
|
|
179
|
+
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
180
|
+
|
|
181
|
+
**Output:**
|
|
182
|
+
```
|
|
183
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
184
|
+
─────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
RECOMMENDED APPROACH
|
|
187
|
+
Two-sample proportion test (z-test or Fisher's exact) for the primary
|
|
188
|
+
analysis, since you have a randomized experiment with a binary outcome
|
|
189
|
+
and want to estimate the causal effect of the new onboarding flow on
|
|
190
|
+
upgrade rates.
|
|
191
|
+
|
|
192
|
+
WHY THIS APPROACH
|
|
193
|
+
Randomization handles confounding, so you don't need regression
|
|
194
|
+
adjustment to get an unbiased causal estimate. With 500 per group,
|
|
195
|
+
you have reasonable power for detecting meaningful differences (~80%
|
|
196
|
+
power for a 7-8 percentage point lift from a 20% baseline).
|
|
197
|
+
|
|
198
|
+
KEY ASSUMPTIONS
|
|
199
|
+
- Randomization was correctly implemented (no selection bias)
|
|
200
|
+
- No interference between users
|
|
201
|
+
- SUTVA: each user has a single well-defined treatment version
|
|
202
|
+
- Outcome measurement is complete (watch for differential dropout)
|
|
203
|
+
- Users in both arms had equal opportunity to upgrade
|
|
204
|
+
|
|
205
|
+
WATCH OUT FOR
|
|
206
|
+
Peeking and early stopping — if you're checking results repeatedly
|
|
207
|
+
before the experiment concludes, your p-values are invalid. Decide
|
|
208
|
+
your sample size and analysis time upfront.
|
|
209
|
+
|
|
210
|
+
ALTERNATIVES
|
|
211
|
+
- Logistic regression with covariates (channel, plan tier): use if you
|
|
212
|
+
discover post-hoc imbalance or want to tighten confidence intervals
|
|
213
|
+
- Survival analysis (Cox model): use if time-to-upgrade matters as
|
|
214
|
+
much as whether users upgrade
|
|
215
|
+
─────────────────────────────────────────
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
163
220
|
## Why not just use Claude?
|
|
164
221
|
|
|
165
222
|
You could. But you'd need to know what to ask, how to frame it, and what a good answer looks like. Bridgekit has that baked in — it knows you're a data scientist presenting findings, so it asks the right questions automatically. No prompt engineering required. Just paste your work and run it.
|
|
@@ -182,7 +239,7 @@ Bridgekit only ever sees text you write yourself — your narrative, your conclu
|
|
|
182
239
|
|
|
183
240
|
## What's next?
|
|
184
241
|
|
|
185
|
-
Bridgekit is a suite, not a one-off.
|
|
242
|
+
Bridgekit is a suite, not a one-off. Three tools are live — more are coming:
|
|
186
243
|
|
|
187
244
|
- **Statistical approach suggester** — describe your problem in plain English, get the right test and why
|
|
188
245
|
- **Stakeholder translator** — turn your technical findings into a narrative a non-technical audience will actually follow
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import anthropic
|
|
3
|
+
|
|
4
|
+
SYSTEM_PROMPT = """You are a senior statistician and data scientist advising a colleague on the right analytical approach for their problem.
|
|
5
|
+
|
|
6
|
+
Given a question, a description of the available data, and the goal of the analysis, recommend the best analytical approach. Be direct and specific — not a textbook, not a list of every possible method.
|
|
7
|
+
|
|
8
|
+
Structure your response exactly like this:
|
|
9
|
+
|
|
10
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
11
|
+
─────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
RECOMMENDED APPROACH
|
|
14
|
+
[Name of the method and one sentence on why it fits this problem]
|
|
15
|
+
|
|
16
|
+
WHY THIS APPROACH
|
|
17
|
+
[2-3 sentences on why this is the right fit given the question, data, and goal]
|
|
18
|
+
|
|
19
|
+
KEY ASSUMPTIONS
|
|
20
|
+
[Bullet list of assumptions this approach requires — flag any that may be violated]
|
|
21
|
+
|
|
22
|
+
WATCH OUT FOR
|
|
23
|
+
[The most common mistake DS make on this type of problem]
|
|
24
|
+
|
|
25
|
+
ALTERNATIVES
|
|
26
|
+
[1-2 alternative approaches and when you'd use them instead]
|
|
27
|
+
|
|
28
|
+
─────────────────────────────────────────
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def plan(question: str, data_description: str = None, goal: str = None) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Recommend the right analytical approach for your problem.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
question: The analytical question you are trying to answer.
|
|
38
|
+
data_description: Optional. A plain text description of your available data.
|
|
39
|
+
goal: Optional. The goal of your analysis (e.g. "causal inference",
|
|
40
|
+
"prediction", "segmentation", "hypothesis testing", "exploration").
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
A structured analytical plan covering the recommended approach, assumptions,
|
|
44
|
+
common pitfalls, and alternatives.
|
|
45
|
+
"""
|
|
46
|
+
if not question or not question.strip():
|
|
47
|
+
raise ValueError("Question cannot be empty.")
|
|
48
|
+
|
|
49
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
50
|
+
if not api_key:
|
|
51
|
+
raise EnvironmentError(
|
|
52
|
+
"ANTHROPIC_API_KEY not found. Set it with: export ANTHROPIC_API_KEY=your_key_here"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
user_message = f"Question: {question}"
|
|
56
|
+
if data_description:
|
|
57
|
+
user_message += f"\n\nData: {data_description}"
|
|
58
|
+
if goal:
|
|
59
|
+
user_message += f"\n\nGoal: {goal}"
|
|
60
|
+
|
|
61
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
62
|
+
message = client.messages.create(
|
|
63
|
+
model="claude-opus-4-5",
|
|
64
|
+
max_tokens=1024,
|
|
65
|
+
system=SYSTEM_PROMPT,
|
|
66
|
+
messages=[
|
|
67
|
+
{
|
|
68
|
+
"role": "user",
|
|
69
|
+
"content": user_message
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return message.content[0].text
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bridgekit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: AI tools that make you a better data scientist, not a redundant one.
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/getbridgekit/bridgekit
|
|
@@ -26,8 +26,6 @@ Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
|
26
26
|
Requires-Dist: pytest-mock>=3.0.0; extra == "dev"
|
|
27
27
|
Dynamic: license-file
|
|
28
28
|
|
|
29
|
-
<img src="assets/logo.png" width="150"/>
|
|
30
|
-
|
|
31
29
|
# Bridgekit
|
|
32
30
|
|
|
33
31
|
**AI tools that make you a better data scientist, not a redundant one.**
|
|
@@ -188,6 +186,65 @@ churn rate of 4.5%:
|
|
|
188
186
|
|
|
189
187
|
---
|
|
190
188
|
|
|
189
|
+
## Tool #3: Analysis Planner
|
|
190
|
+
|
|
191
|
+
Describe your analytical problem and get a structured plan for the right approach — before you start the analysis.
|
|
192
|
+
|
|
193
|
+
Covers the recommended method, why it fits your problem, key assumptions, common pitfalls, and alternatives.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from bridgekit import plan
|
|
197
|
+
|
|
198
|
+
print(plan(
|
|
199
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
200
|
+
data_description="1,000 users randomly split 50/50 between old and new onboarding. Variables: upgrade status (binary), time to upgrade (days), acquisition channel, plan tier.",
|
|
201
|
+
goal="causal inference"
|
|
202
|
+
))
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
`data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
|
|
206
|
+
|
|
207
|
+
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
208
|
+
|
|
209
|
+
**Output:**
|
|
210
|
+
```
|
|
211
|
+
BRIDGEKIT ANALYSIS PLAN
|
|
212
|
+
─────────────────────────────────────────
|
|
213
|
+
|
|
214
|
+
RECOMMENDED APPROACH
|
|
215
|
+
Two-sample proportion test (z-test or Fisher's exact) for the primary
|
|
216
|
+
analysis, since you have a randomized experiment with a binary outcome
|
|
217
|
+
and want to estimate the causal effect of the new onboarding flow on
|
|
218
|
+
upgrade rates.
|
|
219
|
+
|
|
220
|
+
WHY THIS APPROACH
|
|
221
|
+
Randomization handles confounding, so you don't need regression
|
|
222
|
+
adjustment to get an unbiased causal estimate. With 500 per group,
|
|
223
|
+
you have reasonable power for detecting meaningful differences (~80%
|
|
224
|
+
power for a 7-8 percentage point lift from a 20% baseline).
|
|
225
|
+
|
|
226
|
+
KEY ASSUMPTIONS
|
|
227
|
+
- Randomization was correctly implemented (no selection bias)
|
|
228
|
+
- No interference between users
|
|
229
|
+
- SUTVA: each user has a single well-defined treatment version
|
|
230
|
+
- Outcome measurement is complete (watch for differential dropout)
|
|
231
|
+
- Users in both arms had equal opportunity to upgrade
|
|
232
|
+
|
|
233
|
+
WATCH OUT FOR
|
|
234
|
+
Peeking and early stopping — if you're checking results repeatedly
|
|
235
|
+
before the experiment concludes, your p-values are invalid. Decide
|
|
236
|
+
your sample size and analysis time upfront.
|
|
237
|
+
|
|
238
|
+
ALTERNATIVES
|
|
239
|
+
- Logistic regression with covariates (channel, plan tier): use if you
|
|
240
|
+
discover post-hoc imbalance or want to tighten confidence intervals
|
|
241
|
+
- Survival analysis (Cox model): use if time-to-upgrade matters as
|
|
242
|
+
much as whether users upgrade
|
|
243
|
+
─────────────────────────────────────────
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
191
248
|
## Why not just use Claude?
|
|
192
249
|
|
|
193
250
|
You could. But you'd need to know what to ask, how to frame it, and what a good answer looks like. Bridgekit has that baked in — it knows you're a data scientist presenting findings, so it asks the right questions automatically. No prompt engineering required. Just paste your work and run it.
|
|
@@ -210,7 +267,7 @@ Bridgekit only ever sees text you write yourself — your narrative, your conclu
|
|
|
210
267
|
|
|
211
268
|
## What's next?
|
|
212
269
|
|
|
213
|
-
Bridgekit is a suite, not a one-off.
|
|
270
|
+
Bridgekit is a suite, not a one-off. Three tools are live — more are coming:
|
|
214
271
|
|
|
215
272
|
- **Statistical approach suggester** — describe your problem in plain English, get the right test and why
|
|
216
273
|
- **Stakeholder translator** — turn your technical findings into a narrative a non-technical audience will actually follow
|
|
@@ -2,6 +2,7 @@ LICENSE
|
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
4
|
bridgekit/__init__.py
|
|
5
|
+
bridgekit/planner.py
|
|
5
6
|
bridgekit/reviewer.py
|
|
6
7
|
bridgekit/search.py
|
|
7
8
|
bridgekit.egg-info/PKG-INFO
|
|
@@ -9,5 +10,6 @@ bridgekit.egg-info/SOURCES.txt
|
|
|
9
10
|
bridgekit.egg-info/dependency_links.txt
|
|
10
11
|
bridgekit.egg-info/requires.txt
|
|
11
12
|
bridgekit.egg-info/top_level.txt
|
|
13
|
+
tests/test_planner.py
|
|
12
14
|
tests/test_reviewer.py
|
|
13
15
|
tests/test_search.py
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pytest
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# ---------------------------------------------------------------------------
|
|
7
|
+
# Helpers
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
def _make_mock_message(text: str):
|
|
11
|
+
content_block = MagicMock()
|
|
12
|
+
content_block.text = text
|
|
13
|
+
message = MagicMock()
|
|
14
|
+
message.content = [content_block]
|
|
15
|
+
return message
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
FAKE_RESPONSE = (
|
|
19
|
+
"BRIDGEKIT ANALYSIS PLAN\n"
|
|
20
|
+
"─────────────────────────────────────────\n\n"
|
|
21
|
+
"RECOMMENDED APPROACH\n"
|
|
22
|
+
"A/B test with a two-proportion z-test.\n\n"
|
|
23
|
+
"WHY THIS APPROACH\n"
|
|
24
|
+
"Random assignment handles confounding.\n\n"
|
|
25
|
+
"KEY ASSUMPTIONS\n"
|
|
26
|
+
"- Users were randomly assigned\n"
|
|
27
|
+
"- Independence between users\n\n"
|
|
28
|
+
"WATCH OUT FOR\n"
|
|
29
|
+
"Peeking at results before the test reaches planned sample size.\n\n"
|
|
30
|
+
"ALTERNATIVES\n"
|
|
31
|
+
"Logistic regression if you need to control for covariates.\n"
|
|
32
|
+
"─────────────────────────────────────────\n"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Tests
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
class TestPlanReturnsString:
|
|
41
|
+
"""plan() should return a non-empty string."""
|
|
42
|
+
|
|
43
|
+
def test_returns_string(self):
|
|
44
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
45
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
46
|
+
mock_client = MagicMock()
|
|
47
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
48
|
+
MockAnthropic.return_value = mock_client
|
|
49
|
+
|
|
50
|
+
from bridgekit.planner import plan
|
|
51
|
+
result = plan("Does our new onboarding flow increase upgrade rates?")
|
|
52
|
+
|
|
53
|
+
assert isinstance(result, str)
|
|
54
|
+
assert len(result) > 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TestPlanOutputStructure:
|
|
58
|
+
"""plan() output should contain required section headers."""
|
|
59
|
+
|
|
60
|
+
def test_output_contains_recommended_approach(self):
|
|
61
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
62
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
63
|
+
mock_client = MagicMock()
|
|
64
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
65
|
+
MockAnthropic.return_value = mock_client
|
|
66
|
+
|
|
67
|
+
from bridgekit.planner import plan
|
|
68
|
+
result = plan("Does our new onboarding flow increase upgrade rates?")
|
|
69
|
+
|
|
70
|
+
assert "RECOMMENDED APPROACH" in result
|
|
71
|
+
|
|
72
|
+
def test_output_contains_watch_out_for(self):
|
|
73
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
74
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
75
|
+
mock_client = MagicMock()
|
|
76
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
77
|
+
MockAnthropic.return_value = mock_client
|
|
78
|
+
|
|
79
|
+
from bridgekit.planner import plan
|
|
80
|
+
result = plan("Does our new onboarding flow increase upgrade rates?")
|
|
81
|
+
|
|
82
|
+
assert "WATCH OUT FOR" in result
|
|
83
|
+
|
|
84
|
+
def test_output_contains_alternatives(self):
|
|
85
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
86
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
87
|
+
mock_client = MagicMock()
|
|
88
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
89
|
+
MockAnthropic.return_value = mock_client
|
|
90
|
+
|
|
91
|
+
from bridgekit.planner import plan
|
|
92
|
+
result = plan("Does our new onboarding flow increase upgrade rates?")
|
|
93
|
+
|
|
94
|
+
assert "ALTERNATIVES" in result
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class TestPlanMissingApiKey:
|
|
98
|
+
"""plan() should raise EnvironmentError when the API key is absent."""
|
|
99
|
+
|
|
100
|
+
def test_raises_environment_error_when_key_missing(self):
|
|
101
|
+
env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
|
|
102
|
+
with patch.dict(os.environ, env, clear=True):
|
|
103
|
+
from bridgekit.planner import plan
|
|
104
|
+
with pytest.raises(EnvironmentError):
|
|
105
|
+
plan("Does our new onboarding flow increase upgrade rates?")
|
|
106
|
+
|
|
107
|
+
def test_error_message_mentions_key(self):
|
|
108
|
+
env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
|
|
109
|
+
with patch.dict(os.environ, env, clear=True):
|
|
110
|
+
from bridgekit.planner import plan
|
|
111
|
+
with pytest.raises(EnvironmentError, match="ANTHROPIC_API_KEY"):
|
|
112
|
+
plan("Does our new onboarding flow increase upgrade rates?")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class TestPlanEmptyInput:
|
|
116
|
+
"""plan() should raise ValueError for empty or whitespace-only questions."""
|
|
117
|
+
|
|
118
|
+
def test_empty_string_raises_value_error(self):
|
|
119
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
120
|
+
from bridgekit.planner import plan
|
|
121
|
+
with pytest.raises(ValueError, match="empty"):
|
|
122
|
+
plan("")
|
|
123
|
+
|
|
124
|
+
def test_whitespace_only_raises_value_error(self):
|
|
125
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
126
|
+
from bridgekit.planner import plan
|
|
127
|
+
with pytest.raises(ValueError, match="empty"):
|
|
128
|
+
plan(" ")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class TestPlanOptionalParameters:
|
|
132
|
+
"""plan() should work with and without optional parameters."""
|
|
133
|
+
|
|
134
|
+
def test_question_only(self):
|
|
135
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
136
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
137
|
+
mock_client = MagicMock()
|
|
138
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
139
|
+
MockAnthropic.return_value = mock_client
|
|
140
|
+
|
|
141
|
+
from bridgekit.planner import plan
|
|
142
|
+
result = plan("Does our new onboarding flow increase upgrade rates?")
|
|
143
|
+
|
|
144
|
+
assert isinstance(result, str)
|
|
145
|
+
|
|
146
|
+
def test_with_all_parameters(self):
|
|
147
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
148
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
149
|
+
mock_client = MagicMock()
|
|
150
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
151
|
+
MockAnthropic.return_value = mock_client
|
|
152
|
+
|
|
153
|
+
from bridgekit.planner import plan
|
|
154
|
+
result = plan(
|
|
155
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
156
|
+
data_description="5,000 users split 50/50.",
|
|
157
|
+
goal="causal inference"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
assert isinstance(result, str)
|
|
161
|
+
|
|
162
|
+
def test_all_parameters_included_in_api_call(self):
|
|
163
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
164
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
165
|
+
mock_client = MagicMock()
|
|
166
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
167
|
+
MockAnthropic.return_value = mock_client
|
|
168
|
+
|
|
169
|
+
from bridgekit.planner import plan
|
|
170
|
+
plan(
|
|
171
|
+
question="Does our new onboarding flow increase upgrade rates?",
|
|
172
|
+
data_description="5,000 users split 50/50.",
|
|
173
|
+
goal="causal inference"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
177
|
+
messages_arg = call_kwargs.kwargs.get("messages") or call_kwargs.args[0]
|
|
178
|
+
content = str(messages_arg)
|
|
179
|
+
assert "5,000 users split 50/50." in content
|
|
180
|
+
assert "causal inference" in content
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|