bridgekit 0.3.6__tar.gz → 0.3.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bridgekit-0.3.6 → bridgekit-0.3.8}/PKG-INFO +40 -7
- {bridgekit-0.3.6 → bridgekit-0.3.8}/README.md +39 -6
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/__init__.py +1 -1
- bridgekit-0.3.8/bridgekit/cli.py +105 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/config.py +1 -1
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/planner.py +5 -3
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/redteam.py +20 -17
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/reviewer.py +6 -4
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/search.py +19 -14
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/PKG-INFO +40 -7
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/SOURCES.txt +4 -0
- bridgekit-0.3.8/bridgekit.egg-info/entry_points.txt +2 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/pyproject.toml +4 -1
- bridgekit-0.3.8/tests/test_cli.py +175 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_planner.py +44 -0
- bridgekit-0.3.8/tests/test_redteam.py +185 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_reviewer.py +48 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_search.py +63 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/LICENSE +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/providers.py +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/dependency_links.txt +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/requires.txt +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/top_level.txt +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/setup.cfg +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_config.py +0 -0
- {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_providers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bridgekit
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.8
|
|
4
4
|
Summary: AI tools that make you a better data scientist, not a redundant one.
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://usebridgekit.com
|
|
@@ -136,6 +136,9 @@ onboarding users to reporting as a growth lever.
|
|
|
136
136
|
"""
|
|
137
137
|
|
|
138
138
|
print(evaluate(text))
|
|
139
|
+
|
|
140
|
+
# Override for longer analyses
|
|
141
|
+
print(evaluate(text, max_tokens=2048))
|
|
139
142
|
```
|
|
140
143
|
|
|
141
144
|
**Output:**
|
|
@@ -190,6 +193,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
|
|
|
190
193
|
from bridgekit import ask
|
|
191
194
|
|
|
192
195
|
print(ask("what drove churn in Q3?", source="reports/"))
|
|
196
|
+
|
|
197
|
+
# Override for longer responses
|
|
198
|
+
print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
|
|
193
199
|
```
|
|
194
200
|
|
|
195
201
|
**From raw text:**
|
|
@@ -234,7 +240,7 @@ print(plan(
|
|
|
234
240
|
))
|
|
235
241
|
```
|
|
236
242
|
|
|
237
|
-
`data_description` and `
|
|
243
|
+
`data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
|
|
238
244
|
|
|
239
245
|
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
240
246
|
|
|
@@ -297,6 +303,9 @@ print(redteam(text))
|
|
|
297
303
|
# Or specify a stakeholder
|
|
298
304
|
print(redteam(text, stakeholder="VP of Engineering"))
|
|
299
305
|
print(redteam(text, stakeholder="VP of Marketing"))
|
|
306
|
+
|
|
307
|
+
# Override for longer responses
|
|
308
|
+
print(redteam(text, max_tokens=2048))
|
|
300
309
|
```
|
|
301
310
|
|
|
302
311
|
Same writeup, different attack angles:
|
|
@@ -414,15 +423,39 @@ Bridgekit automatically detects the provider from model names:
|
|
|
414
423
|
- Models starting with "gemini" → Google Gemini
|
|
415
424
|
|
|
416
425
|
**Default models by provider:**
|
|
417
|
-
- Anthropic: `claude-
|
|
426
|
+
- Anthropic: `claude-opus-4-8`
|
|
418
427
|
- OpenAI: `gpt-4o`
|
|
419
428
|
- Gemini: `gemini-1.5-pro`
|
|
420
429
|
|
|
421
430
|
All tools support the same `provider` and `model` parameters:
|
|
422
|
-
- `evaluate(text, provider=None, model=None)`
|
|
423
|
-
- `plan(question, provider=None, model=None,
|
|
424
|
-
- `ask(question, provider=None, model=None,
|
|
425
|
-
- `redteam(text, provider=None, model=None,
|
|
431
|
+
- `evaluate(text, provider=None, model=None, system_prompt=None)`
|
|
432
|
+
- `plan(question, provider=None, model=None, ..., system_prompt=None)`
|
|
433
|
+
- `ask(question, provider=None, model=None, ..., system_prompt=None)`
|
|
434
|
+
- `redteam(text, provider=None, model=None, ..., system_prompt=None)`
|
|
435
|
+
|
|
436
|
+
---
|
|
437
|
+
|
|
438
|
+
## Custom System Prompts
|
|
439
|
+
|
|
440
|
+
Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
|
|
441
|
+
|
|
442
|
+
```python
|
|
443
|
+
from bridgekit import evaluate, plan, ask, redteam
|
|
444
|
+
|
|
445
|
+
# Narrow the reviewer to a specific domain
|
|
446
|
+
print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
|
|
447
|
+
|
|
448
|
+
# Tailor the planner to a specific industry
|
|
449
|
+
print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
|
|
450
|
+
|
|
451
|
+
# Replace the red team persona entirely
|
|
452
|
+
print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
|
|
453
|
+
|
|
454
|
+
# Change the answering style for ask
|
|
455
|
+
print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
|
|
426
459
|
|
|
427
460
|
---
|
|
428
461
|
|
|
@@ -104,6 +104,9 @@ onboarding users to reporting as a growth lever.
|
|
|
104
104
|
"""
|
|
105
105
|
|
|
106
106
|
print(evaluate(text))
|
|
107
|
+
|
|
108
|
+
# Override for longer analyses
|
|
109
|
+
print(evaluate(text, max_tokens=2048))
|
|
107
110
|
```
|
|
108
111
|
|
|
109
112
|
**Output:**
|
|
@@ -158,6 +161,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
|
|
|
158
161
|
from bridgekit import ask
|
|
159
162
|
|
|
160
163
|
print(ask("what drove churn in Q3?", source="reports/"))
|
|
164
|
+
|
|
165
|
+
# Override for longer responses
|
|
166
|
+
print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
|
|
161
167
|
```
|
|
162
168
|
|
|
163
169
|
**From raw text:**
|
|
@@ -202,7 +208,7 @@ print(plan(
|
|
|
202
208
|
))
|
|
203
209
|
```
|
|
204
210
|
|
|
205
|
-
`data_description` and `
|
|
211
|
+
`data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
|
|
206
212
|
|
|
207
213
|
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
208
214
|
|
|
@@ -265,6 +271,9 @@ print(redteam(text))
|
|
|
265
271
|
# Or specify a stakeholder
|
|
266
272
|
print(redteam(text, stakeholder="VP of Engineering"))
|
|
267
273
|
print(redteam(text, stakeholder="VP of Marketing"))
|
|
274
|
+
|
|
275
|
+
# Override for longer responses
|
|
276
|
+
print(redteam(text, max_tokens=2048))
|
|
268
277
|
```
|
|
269
278
|
|
|
270
279
|
Same writeup, different attack angles:
|
|
@@ -382,15 +391,39 @@ Bridgekit automatically detects the provider from model names:
|
|
|
382
391
|
- Models starting with "gemini" → Google Gemini
|
|
383
392
|
|
|
384
393
|
**Default models by provider:**
|
|
385
|
-
- Anthropic: `claude-
|
|
394
|
+
- Anthropic: `claude-opus-4-8`
|
|
386
395
|
- OpenAI: `gpt-4o`
|
|
387
396
|
- Gemini: `gemini-1.5-pro`
|
|
388
397
|
|
|
389
398
|
All tools support the same `provider` and `model` parameters:
|
|
390
|
-
- `evaluate(text, provider=None, model=None)`
|
|
391
|
-
- `plan(question, provider=None, model=None,
|
|
392
|
-
- `ask(question, provider=None, model=None,
|
|
393
|
-
- `redteam(text, provider=None, model=None,
|
|
399
|
+
- `evaluate(text, provider=None, model=None, system_prompt=None)`
|
|
400
|
+
- `plan(question, provider=None, model=None, ..., system_prompt=None)`
|
|
401
|
+
- `ask(question, provider=None, model=None, ..., system_prompt=None)`
|
|
402
|
+
- `redteam(text, provider=None, model=None, ..., system_prompt=None)`
|
|
403
|
+
|
|
404
|
+
---
|
|
405
|
+
|
|
406
|
+
## Custom System Prompts
|
|
407
|
+
|
|
408
|
+
Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
|
|
409
|
+
|
|
410
|
+
```python
|
|
411
|
+
from bridgekit import evaluate, plan, ask, redteam
|
|
412
|
+
|
|
413
|
+
# Narrow the reviewer to a specific domain
|
|
414
|
+
print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
|
|
415
|
+
|
|
416
|
+
# Tailor the planner to a specific industry
|
|
417
|
+
print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
|
|
418
|
+
|
|
419
|
+
# Replace the red team persona entirely
|
|
420
|
+
print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
|
|
421
|
+
|
|
422
|
+
# Change the answering style for ask
|
|
423
|
+
print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
|
|
394
427
|
|
|
395
428
|
---
|
|
396
429
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from .planner import plan
|
|
5
|
+
from .reviewer import evaluate
|
|
6
|
+
from .redteam import redteam
|
|
7
|
+
from .search import ask
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _add_provider_args(parser: argparse.ArgumentParser) -> None:
|
|
11
|
+
parser.add_argument("--provider", help='AI provider: "anthropic", "openai", or "gemini"')
|
|
12
|
+
parser.add_argument("--model", help="Specific model to use (e.g. claude-opus-4-8, gpt-4o)")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _cmd_plan(args: argparse.Namespace) -> None:
|
|
16
|
+
result = plan(
|
|
17
|
+
question=args.question,
|
|
18
|
+
data_description=args.data,
|
|
19
|
+
goal=args.goal,
|
|
20
|
+
provider=args.provider,
|
|
21
|
+
model=args.model,
|
|
22
|
+
)
|
|
23
|
+
print(result)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _cmd_review(args: argparse.Namespace) -> None:
|
|
27
|
+
result = evaluate(
|
|
28
|
+
text=args.text,
|
|
29
|
+
provider=args.provider,
|
|
30
|
+
model=args.model,
|
|
31
|
+
)
|
|
32
|
+
print(result)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _cmd_redteam(args: argparse.Namespace) -> None:
|
|
36
|
+
result = redteam(
|
|
37
|
+
text=args.text,
|
|
38
|
+
stakeholder=args.stakeholder,
|
|
39
|
+
provider=args.provider,
|
|
40
|
+
model=args.model,
|
|
41
|
+
)
|
|
42
|
+
print(result)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _cmd_search(args: argparse.Namespace) -> None:
|
|
46
|
+
if not args.source and not args.text:
|
|
47
|
+
print("error: provide --source or --text", file=sys.stderr)
|
|
48
|
+
sys.exit(1)
|
|
49
|
+
result = ask(
|
|
50
|
+
question=args.question,
|
|
51
|
+
source=args.source,
|
|
52
|
+
text=args.text,
|
|
53
|
+
provider=args.provider,
|
|
54
|
+
model=args.model,
|
|
55
|
+
)
|
|
56
|
+
print(result)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main() -> None:
|
|
60
|
+
parser = argparse.ArgumentParser(
|
|
61
|
+
prog="bridgekit",
|
|
62
|
+
description="AI tools for data scientists",
|
|
63
|
+
)
|
|
64
|
+
sub = parser.add_subparsers(dest="command", metavar="COMMAND")
|
|
65
|
+
sub.required = True
|
|
66
|
+
|
|
67
|
+
# plan
|
|
68
|
+
p_plan = sub.add_parser("plan", help="Recommend the right analytical approach")
|
|
69
|
+
p_plan.add_argument("question", help="The analytical question you want to answer")
|
|
70
|
+
p_plan.add_argument("--data", metavar="DESCRIPTION", help="Description of your available data")
|
|
71
|
+
p_plan.add_argument("--goal", help='Goal of the analysis (e.g. "prediction", "hypothesis testing")')
|
|
72
|
+
_add_provider_args(p_plan)
|
|
73
|
+
p_plan.set_defaults(func=_cmd_plan)
|
|
74
|
+
|
|
75
|
+
# review
|
|
76
|
+
p_review = sub.add_parser("review", help="Evaluate a data science analysis writeup")
|
|
77
|
+
p_review.add_argument("text", help="The analysis text to review")
|
|
78
|
+
_add_provider_args(p_review)
|
|
79
|
+
p_review.set_defaults(func=_cmd_review)
|
|
80
|
+
|
|
81
|
+
# redteam
|
|
82
|
+
p_redteam = sub.add_parser("redteam", help="Red-team an analysis from a skeptical stakeholder")
|
|
83
|
+
p_redteam.add_argument("text", help="The analysis text to red-team")
|
|
84
|
+
p_redteam.add_argument("--stakeholder", help='Stakeholder role (e.g. "VP of Finance")')
|
|
85
|
+
_add_provider_args(p_redteam)
|
|
86
|
+
p_redteam.set_defaults(func=_cmd_redteam)
|
|
87
|
+
|
|
88
|
+
# search
|
|
89
|
+
p_search = sub.add_parser("search", help="Ask a question across documents or text")
|
|
90
|
+
p_search.add_argument("question", help="The question to answer")
|
|
91
|
+
p_search.add_argument("--source", metavar="PATH", help="Folder of documents to search")
|
|
92
|
+
p_search.add_argument("--text", help="Raw text to search instead of a folder")
|
|
93
|
+
_add_provider_args(p_search)
|
|
94
|
+
p_search.set_defaults(func=_cmd_search)
|
|
95
|
+
|
|
96
|
+
args = parser.parse_args()
|
|
97
|
+
try:
|
|
98
|
+
args.func(args)
|
|
99
|
+
except (ValueError, EnvironmentError) as e:
|
|
100
|
+
print(f"error: {e}", file=sys.stderr)
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
main()
|
|
@@ -29,7 +29,7 @@ ALTERNATIVES
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def plan(question: str, data_description: str = None, goal: str = None, provider: str = None, model: str = None) -> str:
|
|
32
|
+
def plan(question: str, data_description: str = None, goal: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
|
|
33
33
|
"""
|
|
34
34
|
Recommend the right analytical approach for your problem.
|
|
35
35
|
|
|
@@ -41,6 +41,8 @@ def plan(question: str, data_description: str = None, goal: str = None, provider
|
|
|
41
41
|
provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
|
|
42
42
|
If not specified, defaults to "anthropic" or infers from model.
|
|
43
43
|
model: Optional. The specific model to use. If not specified, uses the provider's default.
|
|
44
|
+
system_prompt: Optional. A custom system prompt to override the default planner persona.
|
|
45
|
+
max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
|
|
44
46
|
|
|
45
47
|
Returns:
|
|
46
48
|
A structured analytical plan covering the recommended approach, assumptions,
|
|
@@ -62,8 +64,8 @@ def plan(question: str, data_description: str = None, goal: str = None, provider
|
|
|
62
64
|
|
|
63
65
|
return create_message(
|
|
64
66
|
provider=provider_enum,
|
|
65
|
-
system_prompt=SYSTEM_PROMPT,
|
|
67
|
+
system_prompt=system_prompt or SYSTEM_PROMPT,
|
|
66
68
|
user_message=user_message,
|
|
67
69
|
model=model,
|
|
68
|
-
max_tokens=
|
|
70
|
+
max_tokens=max_tokens
|
|
69
71
|
)
|
|
@@ -39,18 +39,21 @@ HARDEST QUESTION TO ANSWER
|
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def redteam(text: str, stakeholder: str = None, provider: str = None, model: str = None) -> str:
|
|
42
|
+
def redteam(text: str, stakeholder: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
|
|
43
43
|
"""
|
|
44
44
|
Red-team a data science analysis writeup from the perspective of a skeptical stakeholder.
|
|
45
45
|
|
|
46
46
|
Args:
|
|
47
|
-
text:
|
|
48
|
-
stakeholder:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
provider:
|
|
52
|
-
|
|
53
|
-
model:
|
|
47
|
+
text: Your analysis writeup as a plain string.
|
|
48
|
+
stakeholder: Optional. The skeptical stakeholder role (e.g. "VP of Finance",
|
|
49
|
+
"skeptical board member", "Chief Revenue Officer").
|
|
50
|
+
Defaults to a generic skeptical senior executive.
|
|
51
|
+
provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
|
|
52
|
+
If not specified, defaults to "anthropic" or infers from model.
|
|
53
|
+
model: Optional. The specific model to use. If not specified, uses the provider's default.
|
|
54
|
+
system_prompt: Optional. A custom system prompt to fully override the default red team persona.
|
|
55
|
+
When provided, the stakeholder parameter is ignored.
|
|
56
|
+
max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
|
|
54
57
|
|
|
55
58
|
Returns:
|
|
56
59
|
The 3-5 hardest critiques the stakeholder would make, plus the single
|
|
@@ -64,20 +67,20 @@ def redteam(text: str, stakeholder: str = None, provider: str = None, model: str
|
|
|
64
67
|
if model is None:
|
|
65
68
|
model = get_default_model(provider_enum)
|
|
66
69
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
if system_prompt is None:
|
|
71
|
+
stakeholder_label = stakeholder if stakeholder else "Skeptical Senior Executive"
|
|
72
|
+
stakeholder_desc = stakeholder if stakeholder else DEFAULT_STAKEHOLDER
|
|
73
|
+
system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
|
|
74
|
+
stakeholder=stakeholder_desc,
|
|
75
|
+
stakeholder_label=stakeholder_label
|
|
76
|
+
)
|
|
74
77
|
|
|
75
78
|
user_message = f"Red-team this analysis writeup:\n\n{text}"
|
|
76
|
-
|
|
79
|
+
|
|
77
80
|
return create_message(
|
|
78
81
|
provider=provider_enum,
|
|
79
82
|
system_prompt=system_prompt,
|
|
80
83
|
user_message=user_message,
|
|
81
84
|
model=model,
|
|
82
|
-
max_tokens=
|
|
85
|
+
max_tokens=max_tokens
|
|
83
86
|
)
|
|
@@ -42,7 +42,7 @@ BOTTOM LINE
|
|
|
42
42
|
[one sentence]
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
def evaluate(text: str, provider: str = None, model: str = None) -> str:
|
|
45
|
+
def evaluate(text: str, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
|
|
46
46
|
"""
|
|
47
47
|
Evaluate a data science analysis writeup and return structured feedback.
|
|
48
48
|
|
|
@@ -51,6 +51,8 @@ def evaluate(text: str, provider: str = None, model: str = None) -> str:
|
|
|
51
51
|
provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
|
|
52
52
|
If not specified, defaults to "anthropic" or infers from model.
|
|
53
53
|
model: Optional. The specific model to use. If not specified, uses the provider's default.
|
|
54
|
+
system_prompt: Optional. A custom system prompt to override the default reviewer persona.
|
|
55
|
+
max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
|
|
54
56
|
|
|
55
57
|
Returns:
|
|
56
58
|
Structured feedback across four dimensions.
|
|
@@ -64,11 +66,11 @@ def evaluate(text: str, provider: str = None, model: str = None) -> str:
|
|
|
64
66
|
model = get_default_model(provider_enum)
|
|
65
67
|
|
|
66
68
|
user_message = f"Please review this analysis writeup:\n\n{text}"
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
return create_message(
|
|
69
71
|
provider=provider_enum,
|
|
70
|
-
system_prompt=SYSTEM_PROMPT,
|
|
72
|
+
system_prompt=system_prompt or SYSTEM_PROMPT,
|
|
71
73
|
user_message=user_message,
|
|
72
74
|
model=model,
|
|
73
|
-
max_tokens=
|
|
75
|
+
max_tokens=max_tokens
|
|
74
76
|
)
|
|
@@ -49,17 +49,26 @@ def _chunk(text: str) -> list[str]:
|
|
|
49
49
|
return [c for c in chunks if c.strip()]
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
DEFAULT_SYSTEM_PROMPT = (
|
|
53
|
+
"You are a senior data scientist answering questions based on analysis reports. "
|
|
54
|
+
"Answer only from the provided context. Be specific and cite findings where relevant. "
|
|
55
|
+
"If the context does not contain enough information to answer, say so clearly."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def ask(question: str, source: str = None, text: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
|
|
53
60
|
"""
|
|
54
61
|
Ask a question across a collection of analysis documents or raw text.
|
|
55
62
|
|
|
56
63
|
Args:
|
|
57
|
-
question:
|
|
58
|
-
source:
|
|
59
|
-
text:
|
|
60
|
-
provider:
|
|
61
|
-
|
|
62
|
-
model:
|
|
64
|
+
question: The question to answer.
|
|
65
|
+
source: Path to a folder containing .txt, .md, .pdf, .docx, .pptx, or .ipynb files.
|
|
66
|
+
text: A raw text string to search instead of a folder.
|
|
67
|
+
provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
|
|
68
|
+
If not specified, defaults to "anthropic" or infers from model.
|
|
69
|
+
model: Optional. The specific model to use. If not specified, uses the provider's default.
|
|
70
|
+
system_prompt: Optional. A custom system prompt to override the default answering persona.
|
|
71
|
+
max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
|
|
63
72
|
|
|
64
73
|
Returns:
|
|
65
74
|
An answer grounded in the provided documents.
|
|
@@ -107,15 +116,11 @@ def ask(question: str, source: str = None, text: str = None, provider: str = Non
|
|
|
107
116
|
|
|
108
117
|
# Generate answer with specified provider
|
|
109
118
|
user_message = f"Context from analysis reports:\n\n{context}\n\nQuestion: {question}"
|
|
110
|
-
|
|
119
|
+
|
|
111
120
|
return create_message(
|
|
112
121
|
provider=provider_enum,
|
|
113
|
-
system_prompt=
|
|
114
|
-
"You are a senior data scientist answering questions based on analysis reports. "
|
|
115
|
-
"Answer only from the provided context. Be specific and cite findings where relevant. "
|
|
116
|
-
"If the context does not contain enough information to answer, say so clearly."
|
|
117
|
-
),
|
|
122
|
+
system_prompt=system_prompt or DEFAULT_SYSTEM_PROMPT,
|
|
118
123
|
user_message=user_message,
|
|
119
124
|
model=model,
|
|
120
|
-
max_tokens=
|
|
125
|
+
max_tokens=max_tokens
|
|
121
126
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bridgekit
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.8
|
|
4
4
|
Summary: AI tools that make you a better data scientist, not a redundant one.
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://usebridgekit.com
|
|
@@ -136,6 +136,9 @@ onboarding users to reporting as a growth lever.
|
|
|
136
136
|
"""
|
|
137
137
|
|
|
138
138
|
print(evaluate(text))
|
|
139
|
+
|
|
140
|
+
# Override for longer analyses
|
|
141
|
+
print(evaluate(text, max_tokens=2048))
|
|
139
142
|
```
|
|
140
143
|
|
|
141
144
|
**Output:**
|
|
@@ -190,6 +193,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
|
|
|
190
193
|
from bridgekit import ask
|
|
191
194
|
|
|
192
195
|
print(ask("what drove churn in Q3?", source="reports/"))
|
|
196
|
+
|
|
197
|
+
# Override for longer responses
|
|
198
|
+
print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
|
|
193
199
|
```
|
|
194
200
|
|
|
195
201
|
**From raw text:**
|
|
@@ -234,7 +240,7 @@ print(plan(
|
|
|
234
240
|
))
|
|
235
241
|
```
|
|
236
242
|
|
|
237
|
-
`data_description` and `
|
|
243
|
+
`data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
|
|
238
244
|
|
|
239
245
|
**`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
|
|
240
246
|
|
|
@@ -297,6 +303,9 @@ print(redteam(text))
|
|
|
297
303
|
# Or specify a stakeholder
|
|
298
304
|
print(redteam(text, stakeholder="VP of Engineering"))
|
|
299
305
|
print(redteam(text, stakeholder="VP of Marketing"))
|
|
306
|
+
|
|
307
|
+
# Override for longer responses
|
|
308
|
+
print(redteam(text, max_tokens=2048))
|
|
300
309
|
```
|
|
301
310
|
|
|
302
311
|
Same writeup, different attack angles:
|
|
@@ -414,15 +423,39 @@ Bridgekit automatically detects the provider from model names:
|
|
|
414
423
|
- Models starting with "gemini" → Google Gemini
|
|
415
424
|
|
|
416
425
|
**Default models by provider:**
|
|
417
|
-
- Anthropic: `claude-
|
|
426
|
+
- Anthropic: `claude-opus-4-8`
|
|
418
427
|
- OpenAI: `gpt-4o`
|
|
419
428
|
- Gemini: `gemini-1.5-pro`
|
|
420
429
|
|
|
421
430
|
All tools support the same `provider` and `model` parameters:
|
|
422
|
-
- `evaluate(text, provider=None, model=None)`
|
|
423
|
-
- `plan(question, provider=None, model=None,
|
|
424
|
-
- `ask(question, provider=None, model=None,
|
|
425
|
-
- `redteam(text, provider=None, model=None,
|
|
431
|
+
- `evaluate(text, provider=None, model=None, system_prompt=None)`
|
|
432
|
+
- `plan(question, provider=None, model=None, ..., system_prompt=None)`
|
|
433
|
+
- `ask(question, provider=None, model=None, ..., system_prompt=None)`
|
|
434
|
+
- `redteam(text, provider=None, model=None, ..., system_prompt=None)`
|
|
435
|
+
|
|
436
|
+
---
|
|
437
|
+
|
|
438
|
+
## Custom System Prompts
|
|
439
|
+
|
|
440
|
+
Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
|
|
441
|
+
|
|
442
|
+
```python
|
|
443
|
+
from bridgekit import evaluate, plan, ask, redteam
|
|
444
|
+
|
|
445
|
+
# Narrow the reviewer to a specific domain
|
|
446
|
+
print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
|
|
447
|
+
|
|
448
|
+
# Tailor the planner to a specific industry
|
|
449
|
+
print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
|
|
450
|
+
|
|
451
|
+
# Replace the red team persona entirely
|
|
452
|
+
print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
|
|
453
|
+
|
|
454
|
+
# Change the answering style for ask
|
|
455
|
+
print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
|
|
426
459
|
|
|
427
460
|
---
|
|
428
461
|
|
|
@@ -2,6 +2,7 @@ LICENSE
|
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
4
|
bridgekit/__init__.py
|
|
5
|
+
bridgekit/cli.py
|
|
5
6
|
bridgekit/config.py
|
|
6
7
|
bridgekit/planner.py
|
|
7
8
|
bridgekit/providers.py
|
|
@@ -11,10 +12,13 @@ bridgekit/search.py
|
|
|
11
12
|
bridgekit.egg-info/PKG-INFO
|
|
12
13
|
bridgekit.egg-info/SOURCES.txt
|
|
13
14
|
bridgekit.egg-info/dependency_links.txt
|
|
15
|
+
bridgekit.egg-info/entry_points.txt
|
|
14
16
|
bridgekit.egg-info/requires.txt
|
|
15
17
|
bridgekit.egg-info/top_level.txt
|
|
18
|
+
tests/test_cli.py
|
|
16
19
|
tests/test_config.py
|
|
17
20
|
tests/test_planner.py
|
|
18
21
|
tests/test_providers.py
|
|
22
|
+
tests/test_redteam.py
|
|
19
23
|
tests/test_reviewer.py
|
|
20
24
|
tests/test_search.py
|
|
@@ -7,7 +7,7 @@ include = ["bridgekit*"]
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "bridgekit"
|
|
10
|
-
version = "0.3.
|
|
10
|
+
version = "0.3.8"
|
|
11
11
|
description = "AI tools that make you a better data scientist, not a redundant one."
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.9"
|
|
@@ -38,6 +38,9 @@ dev = [
|
|
|
38
38
|
"pytest-mock>=3.0.0",
|
|
39
39
|
]
|
|
40
40
|
|
|
41
|
+
[project.scripts]
|
|
42
|
+
bridgekit = "bridgekit.cli:main"
|
|
43
|
+
|
|
41
44
|
[project.urls]
|
|
42
45
|
Homepage = "https://usebridgekit.com"
|
|
43
46
|
Issues = "https://github.com/getbridgekit/bridgekit/issues"
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import pytest
|
|
3
|
+
from unittest.mock import patch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
FAKE_PLAN = "BRIDGEKIT ANALYSIS PLAN\n─────\nRECOMMENDED APPROACH\nUse a t-test."
|
|
7
|
+
FAKE_REVIEW = "BRIDGEKIT ANALYSIS REVIEW\n─────\n1. CLARITY\n✅ STRONG Clear writing."
|
|
8
|
+
FAKE_REDTEAM = "BRIDGEKIT RED TEAM\n─────\nCRITIQUE 1: Sample Size\nHARDEST QUESTION TO ANSWER\nWhat is n?"
|
|
9
|
+
FAKE_SEARCH = "Based on the documents, the answer is 42."
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestPlanCommand:
|
|
13
|
+
def test_basic_question(self, capsys):
|
|
14
|
+
with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
|
|
15
|
+
with patch("sys.argv", ["bridgekit", "plan", "should I use a t-test?"]):
|
|
16
|
+
from bridgekit.cli import main
|
|
17
|
+
main()
|
|
18
|
+
mock_plan.assert_called_once_with(
|
|
19
|
+
question="should I use a t-test?",
|
|
20
|
+
data_description=None,
|
|
21
|
+
goal=None,
|
|
22
|
+
provider=None,
|
|
23
|
+
model=None,
|
|
24
|
+
)
|
|
25
|
+
assert FAKE_PLAN in capsys.readouterr().out
|
|
26
|
+
|
|
27
|
+
def test_with_data_and_goal(self, capsys):
|
|
28
|
+
with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
|
|
29
|
+
with patch("sys.argv", ["bridgekit", "plan", "my question",
|
|
30
|
+
"--data", "50 rows", "--goal", "compare means"]):
|
|
31
|
+
from bridgekit.cli import main
|
|
32
|
+
main()
|
|
33
|
+
mock_plan.assert_called_once_with(
|
|
34
|
+
question="my question",
|
|
35
|
+
data_description="50 rows",
|
|
36
|
+
goal="compare means",
|
|
37
|
+
provider=None,
|
|
38
|
+
model=None,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def test_with_provider_and_model(self):
|
|
42
|
+
with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
|
|
43
|
+
with patch("sys.argv", ["bridgekit", "plan", "my question",
|
|
44
|
+
"--provider", "openai", "--model", "gpt-4o"]):
|
|
45
|
+
from bridgekit.cli import main
|
|
46
|
+
main()
|
|
47
|
+
mock_plan.assert_called_once_with(
|
|
48
|
+
question="my question",
|
|
49
|
+
data_description=None,
|
|
50
|
+
goal=None,
|
|
51
|
+
provider="openai",
|
|
52
|
+
model="gpt-4o",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def test_missing_question_exits(self):
|
|
56
|
+
with patch("sys.argv", ["bridgekit", "plan"]):
|
|
57
|
+
from bridgekit.cli import main
|
|
58
|
+
with pytest.raises(SystemExit):
|
|
59
|
+
main()
|
|
60
|
+
|
|
61
|
+
def test_environment_error_exits(self, capsys):
|
|
62
|
+
with patch("bridgekit.cli.plan", side_effect=EnvironmentError("ANTHROPIC_API_KEY not found")):
|
|
63
|
+
with patch("sys.argv", ["bridgekit", "plan", "my question"]):
|
|
64
|
+
from bridgekit.cli import main
|
|
65
|
+
with pytest.raises(SystemExit) as exc:
|
|
66
|
+
main()
|
|
67
|
+
assert exc.value.code == 1
|
|
68
|
+
assert "ANTHROPIC_API_KEY" in capsys.readouterr().err
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TestReviewCommand:
|
|
72
|
+
def test_basic_text(self, capsys):
|
|
73
|
+
with patch("bridgekit.cli.evaluate", return_value=FAKE_REVIEW) as mock_evaluate:
|
|
74
|
+
with patch("sys.argv", ["bridgekit", "review", "my analysis text"]):
|
|
75
|
+
from bridgekit.cli import main
|
|
76
|
+
main()
|
|
77
|
+
mock_evaluate.assert_called_once_with(
|
|
78
|
+
text="my analysis text",
|
|
79
|
+
provider=None,
|
|
80
|
+
model=None,
|
|
81
|
+
)
|
|
82
|
+
assert FAKE_REVIEW in capsys.readouterr().out
|
|
83
|
+
|
|
84
|
+
def test_missing_text_exits(self):
|
|
85
|
+
with patch("sys.argv", ["bridgekit", "review"]):
|
|
86
|
+
from bridgekit.cli import main
|
|
87
|
+
with pytest.raises(SystemExit):
|
|
88
|
+
main()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestRedteamCommand:
|
|
92
|
+
def test_basic_text(self, capsys):
|
|
93
|
+
with patch("bridgekit.cli.redteam", return_value=FAKE_REDTEAM) as mock_redteam:
|
|
94
|
+
with patch("sys.argv", ["bridgekit", "redteam", "my analysis text"]):
|
|
95
|
+
from bridgekit.cli import main
|
|
96
|
+
main()
|
|
97
|
+
mock_redteam.assert_called_once_with(
|
|
98
|
+
text="my analysis text",
|
|
99
|
+
stakeholder=None,
|
|
100
|
+
provider=None,
|
|
101
|
+
model=None,
|
|
102
|
+
)
|
|
103
|
+
assert FAKE_REDTEAM in capsys.readouterr().out
|
|
104
|
+
|
|
105
|
+
def test_with_stakeholder(self):
|
|
106
|
+
with patch("bridgekit.cli.redteam", return_value=FAKE_REDTEAM) as mock_redteam:
|
|
107
|
+
with patch("sys.argv", ["bridgekit", "redteam", "my analysis text",
|
|
108
|
+
"--stakeholder", "VP of Finance"]):
|
|
109
|
+
from bridgekit.cli import main
|
|
110
|
+
main()
|
|
111
|
+
mock_redteam.assert_called_once_with(
|
|
112
|
+
text="my analysis text",
|
|
113
|
+
stakeholder="VP of Finance",
|
|
114
|
+
provider=None,
|
|
115
|
+
model=None,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def test_missing_text_exits(self):
|
|
119
|
+
with patch("sys.argv", ["bridgekit", "redteam"]):
|
|
120
|
+
from bridgekit.cli import main
|
|
121
|
+
with pytest.raises(SystemExit):
|
|
122
|
+
main()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class TestSearchCommand:
|
|
126
|
+
def test_with_source(self, capsys):
|
|
127
|
+
with patch("bridgekit.cli.ask", return_value=FAKE_SEARCH) as mock_ask:
|
|
128
|
+
with patch("sys.argv", ["bridgekit", "search", "my question",
|
|
129
|
+
"--source", "./my_docs"]):
|
|
130
|
+
from bridgekit.cli import main
|
|
131
|
+
main()
|
|
132
|
+
mock_ask.assert_called_once_with(
|
|
133
|
+
question="my question",
|
|
134
|
+
source="./my_docs",
|
|
135
|
+
text=None,
|
|
136
|
+
provider=None,
|
|
137
|
+
model=None,
|
|
138
|
+
)
|
|
139
|
+
assert FAKE_SEARCH in capsys.readouterr().out
|
|
140
|
+
|
|
141
|
+
def test_with_text(self):
|
|
142
|
+
with patch("bridgekit.cli.ask", return_value=FAKE_SEARCH) as mock_ask:
|
|
143
|
+
with patch("sys.argv", ["bridgekit", "search", "my question",
|
|
144
|
+
"--text", "some raw text"]):
|
|
145
|
+
from bridgekit.cli import main
|
|
146
|
+
main()
|
|
147
|
+
mock_ask.assert_called_once_with(
|
|
148
|
+
question="my question",
|
|
149
|
+
source=None,
|
|
150
|
+
text="some raw text",
|
|
151
|
+
provider=None,
|
|
152
|
+
model=None,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def test_missing_source_and_text_exits(self, capsys):
|
|
156
|
+
with patch("sys.argv", ["bridgekit", "search", "my question"]):
|
|
157
|
+
from bridgekit.cli import main
|
|
158
|
+
with pytest.raises(SystemExit) as exc:
|
|
159
|
+
main()
|
|
160
|
+
assert exc.value.code == 1
|
|
161
|
+
assert "error" in capsys.readouterr().err
|
|
162
|
+
|
|
163
|
+
def test_missing_question_exits(self):
|
|
164
|
+
with patch("sys.argv", ["bridgekit", "search"]):
|
|
165
|
+
from bridgekit.cli import main
|
|
166
|
+
with pytest.raises(SystemExit):
|
|
167
|
+
main()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class TestNoCommand:
|
|
171
|
+
def test_no_subcommand_exits(self):
|
|
172
|
+
with patch("sys.argv", ["bridgekit"]):
|
|
173
|
+
from bridgekit.cli import main
|
|
174
|
+
with pytest.raises(SystemExit):
|
|
175
|
+
main()
|
|
@@ -159,6 +159,20 @@ class TestPlanOptionalParameters:
|
|
|
159
159
|
|
|
160
160
|
assert isinstance(result, str)
|
|
161
161
|
|
|
162
|
+
def test_custom_system_prompt_reaches_api(self):
|
|
163
|
+
custom_prompt = "You are a data scientist specializing in healthcare analytics."
|
|
164
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
165
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
166
|
+
mock_client = MagicMock()
|
|
167
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
168
|
+
MockAnthropic.return_value = mock_client
|
|
169
|
+
|
|
170
|
+
from bridgekit.planner import plan
|
|
171
|
+
plan("Should I use a t-test or ANOVA?", system_prompt=custom_prompt)
|
|
172
|
+
|
|
173
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
174
|
+
assert call_kwargs.kwargs.get("system") == custom_prompt
|
|
175
|
+
|
|
162
176
|
def test_all_parameters_included_in_api_call(self):
|
|
163
177
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
164
178
|
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
@@ -178,3 +192,33 @@ class TestPlanOptionalParameters:
|
|
|
178
192
|
content = str(messages_arg)
|
|
179
193
|
assert "5,000 users split 50/50." in content
|
|
180
194
|
assert "causal inference" in content
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class TestPlanMaxTokens:
|
|
198
|
+
"""plan() should pass max_tokens through to the API."""
|
|
199
|
+
|
|
200
|
+
def test_default_max_tokens_is_1024(self):
|
|
201
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
202
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
203
|
+
mock_client = MagicMock()
|
|
204
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
205
|
+
MockAnthropic.return_value = mock_client
|
|
206
|
+
|
|
207
|
+
from bridgekit.planner import plan
|
|
208
|
+
plan("Does our new onboarding flow increase upgrade rates?")
|
|
209
|
+
|
|
210
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
211
|
+
assert call_kwargs.kwargs.get("max_tokens") == 1024
|
|
212
|
+
|
|
213
|
+
def test_custom_max_tokens_reaches_api(self):
|
|
214
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
215
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
216
|
+
mock_client = MagicMock()
|
|
217
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
218
|
+
MockAnthropic.return_value = mock_client
|
|
219
|
+
|
|
220
|
+
from bridgekit.planner import plan
|
|
221
|
+
plan("Does our new onboarding flow increase upgrade rates?", max_tokens=2048)
|
|
222
|
+
|
|
223
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
224
|
+
assert call_kwargs.kwargs.get("max_tokens") == 2048
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pytest
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# ---------------------------------------------------------------------------
|
|
7
|
+
# Helpers
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
def _make_mock_message(text: str):
|
|
11
|
+
content_block = MagicMock()
|
|
12
|
+
content_block.text = text
|
|
13
|
+
message = MagicMock()
|
|
14
|
+
message.content = [content_block]
|
|
15
|
+
return message
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
FAKE_RESPONSE = (
|
|
19
|
+
"BRIDGEKIT RED TEAM\n"
|
|
20
|
+
"─────────────────────────────────────────\n"
|
|
21
|
+
"STAKEHOLDER: Skeptical Senior Executive\n\n"
|
|
22
|
+
"CRITIQUE 1: Sample Size\n"
|
|
23
|
+
'❯ "How many users was this actually tested on?"\n'
|
|
24
|
+
"WHY IT LANDS: No sample size is mentioned anywhere.\n"
|
|
25
|
+
"TO ADDRESS: Report n for each group with a power calculation.\n\n"
|
|
26
|
+
"CRITIQUE 2: Causation vs Correlation\n"
|
|
27
|
+
'❯ "You\'re assuming the feature caused this lift — prove it."\n'
|
|
28
|
+
"WHY IT LANDS: No control group is described.\n"
|
|
29
|
+
"TO ADDRESS: Show the experimental design with random assignment.\n\n"
|
|
30
|
+
"CRITIQUE 3: Business Impact\n"
|
|
31
|
+
'❯ "What does a 5% lift actually mean in dollars?"\n'
|
|
32
|
+
"WHY IT LANDS: Directional claims are not quantified.\n"
|
|
33
|
+
"TO ADDRESS: Translate the metric into revenue or cost terms.\n\n"
|
|
34
|
+
"─────────────────────────────────────────\n"
|
|
35
|
+
"HARDEST QUESTION TO ANSWER\n"
|
|
36
|
+
"What is the p-value and did you correct for multiple comparisons?"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Tests
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
class TestRedteamReturnsString:
|
|
45
|
+
"""redteam() should return a non-empty string."""
|
|
46
|
+
|
|
47
|
+
def test_returns_string(self):
|
|
48
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
49
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
50
|
+
mock_client = MagicMock()
|
|
51
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
52
|
+
MockAnthropic.return_value = mock_client
|
|
53
|
+
|
|
54
|
+
from bridgekit.redteam import redteam
|
|
55
|
+
result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
|
|
56
|
+
|
|
57
|
+
assert isinstance(result, str)
|
|
58
|
+
assert len(result) > 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class TestRedteamOutputStructure:
|
|
62
|
+
"""redteam() output should contain the required section headers."""
|
|
63
|
+
|
|
64
|
+
def test_output_contains_critique(self):
|
|
65
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
66
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
67
|
+
mock_client = MagicMock()
|
|
68
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
69
|
+
MockAnthropic.return_value = mock_client
|
|
70
|
+
|
|
71
|
+
from bridgekit.redteam import redteam
|
|
72
|
+
result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
|
|
73
|
+
|
|
74
|
+
assert "CRITIQUE" in result
|
|
75
|
+
|
|
76
|
+
def test_output_contains_hardest_question(self):
|
|
77
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
78
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
79
|
+
mock_client = MagicMock()
|
|
80
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
81
|
+
MockAnthropic.return_value = mock_client
|
|
82
|
+
|
|
83
|
+
from bridgekit.redteam import redteam
|
|
84
|
+
result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
|
|
85
|
+
|
|
86
|
+
assert "HARDEST QUESTION" in result
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TestRedteamMissingApiKey:
|
|
90
|
+
"""redteam() should raise EnvironmentError when ANTHROPIC_API_KEY is absent."""
|
|
91
|
+
|
|
92
|
+
def test_raises_environment_error_when_key_missing(self):
|
|
93
|
+
env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
|
|
94
|
+
with patch.dict(os.environ, env, clear=True):
|
|
95
|
+
from bridgekit.redteam import redteam
|
|
96
|
+
with pytest.raises(EnvironmentError):
|
|
97
|
+
redteam("Some analysis text.")
|
|
98
|
+
|
|
99
|
+
def test_error_message_mentions_key(self):
|
|
100
|
+
env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
|
|
101
|
+
with patch.dict(os.environ, env, clear=True):
|
|
102
|
+
from bridgekit.redteam import redteam
|
|
103
|
+
with pytest.raises(EnvironmentError, match="ANTHROPIC_API_KEY"):
|
|
104
|
+
redteam("Some analysis text.")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class TestRedteamEmptyInput:
|
|
108
|
+
"""redteam() should raise ValueError for empty or whitespace-only input."""
|
|
109
|
+
|
|
110
|
+
def test_empty_string_raises_value_error(self):
|
|
111
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
112
|
+
from bridgekit.redteam import redteam
|
|
113
|
+
with pytest.raises(ValueError, match="empty"):
|
|
114
|
+
redteam("")
|
|
115
|
+
|
|
116
|
+
def test_whitespace_only_raises_value_error(self):
|
|
117
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
118
|
+
from bridgekit.redteam import redteam
|
|
119
|
+
with pytest.raises(ValueError, match="empty"):
|
|
120
|
+
redteam(" ")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestRedteamStakeholder:
|
|
124
|
+
"""redteam() should include a custom stakeholder in the system prompt."""
|
|
125
|
+
|
|
126
|
+
def test_custom_stakeholder_reaches_system_prompt(self):
|
|
127
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
128
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
129
|
+
mock_client = MagicMock()
|
|
130
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
131
|
+
MockAnthropic.return_value = mock_client
|
|
132
|
+
|
|
133
|
+
from bridgekit.redteam import redteam
|
|
134
|
+
redteam("Some analysis text.", stakeholder="VP of Finance")
|
|
135
|
+
|
|
136
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
137
|
+
assert "VP of Finance" in call_kwargs.kwargs.get("system", "")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class TestRedteamCustomSystemPrompt:
|
|
141
|
+
"""redteam() should forward a custom system_prompt to the API, ignoring stakeholder."""
|
|
142
|
+
|
|
143
|
+
def test_custom_system_prompt_reaches_api(self):
|
|
144
|
+
custom_prompt = "You are a hostile regulator looking for compliance violations."
|
|
145
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
146
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
147
|
+
mock_client = MagicMock()
|
|
148
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
149
|
+
MockAnthropic.return_value = mock_client
|
|
150
|
+
|
|
151
|
+
from bridgekit.redteam import redteam
|
|
152
|
+
redteam("Some analysis text.", system_prompt=custom_prompt)
|
|
153
|
+
|
|
154
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
155
|
+
assert call_kwargs.kwargs.get("system") == custom_prompt
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class TestRedteamMaxTokens:
|
|
159
|
+
"""redteam() should pass max_tokens through to the API."""
|
|
160
|
+
|
|
161
|
+
def test_default_max_tokens_is_1024(self):
|
|
162
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
163
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
164
|
+
mock_client = MagicMock()
|
|
165
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
166
|
+
MockAnthropic.return_value = mock_client
|
|
167
|
+
|
|
168
|
+
from bridgekit.redteam import redteam
|
|
169
|
+
redteam("Some analysis text.")
|
|
170
|
+
|
|
171
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
172
|
+
assert call_kwargs.kwargs.get("max_tokens") == 1024
|
|
173
|
+
|
|
174
|
+
def test_custom_max_tokens_reaches_api(self):
|
|
175
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
176
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
177
|
+
mock_client = MagicMock()
|
|
178
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
179
|
+
MockAnthropic.return_value = mock_client
|
|
180
|
+
|
|
181
|
+
from bridgekit.redteam import redteam
|
|
182
|
+
redteam("Some analysis text.", max_tokens=2048)
|
|
183
|
+
|
|
184
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
185
|
+
assert call_kwargs.kwargs.get("max_tokens") == 2048
|
|
@@ -158,3 +158,51 @@ class TestEvaluateApiCallShape:
|
|
|
158
158
|
messages_arg = call_kwargs.kwargs.get("messages") or call_kwargs.args[0]
|
|
159
159
|
content = str(messages_arg)
|
|
160
160
|
assert user_text in content
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class TestEvaluateCustomSystemPrompt:
|
|
164
|
+
"""evaluate() should forward a custom system_prompt to the API."""
|
|
165
|
+
|
|
166
|
+
def test_custom_system_prompt_reaches_api(self):
|
|
167
|
+
custom_prompt = "You are a skeptical PhD statistician focused only on methodology."
|
|
168
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
169
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
170
|
+
mock_client = MagicMock()
|
|
171
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
172
|
+
MockAnthropic.return_value = mock_client
|
|
173
|
+
|
|
174
|
+
from bridgekit.reviewer import evaluate
|
|
175
|
+
evaluate("Some analysis text.", system_prompt=custom_prompt)
|
|
176
|
+
|
|
177
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
178
|
+
assert call_kwargs.kwargs.get("system") == custom_prompt
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class TestEvaluateMaxTokens:
|
|
182
|
+
"""evaluate() should pass max_tokens through to the API."""
|
|
183
|
+
|
|
184
|
+
def test_default_max_tokens_is_1024(self):
|
|
185
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
186
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
187
|
+
mock_client = MagicMock()
|
|
188
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
189
|
+
MockAnthropic.return_value = mock_client
|
|
190
|
+
|
|
191
|
+
from bridgekit.reviewer import evaluate
|
|
192
|
+
evaluate("Some analysis text.")
|
|
193
|
+
|
|
194
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
195
|
+
assert call_kwargs.kwargs.get("max_tokens") == 1024
|
|
196
|
+
|
|
197
|
+
def test_custom_max_tokens_reaches_api(self):
|
|
198
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
199
|
+
with patch("anthropic.Anthropic") as MockAnthropic:
|
|
200
|
+
mock_client = MagicMock()
|
|
201
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
|
|
202
|
+
MockAnthropic.return_value = mock_client
|
|
203
|
+
|
|
204
|
+
from bridgekit.reviewer import evaluate
|
|
205
|
+
evaluate("Some analysis text.", max_tokens=2048)
|
|
206
|
+
|
|
207
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
208
|
+
assert call_kwargs.kwargs.get("max_tokens") == 2048
|
|
@@ -222,6 +222,27 @@ class TestAskWithSourceFolder:
|
|
|
222
222
|
|
|
223
223
|
assert mock_client.messages.create.call_count == 1
|
|
224
224
|
|
|
225
|
+
def test_custom_system_prompt_reaches_api(self):
|
|
226
|
+
custom_prompt = "You are a financial analyst. Answer only in terms of revenue impact."
|
|
227
|
+
mock_chromadb, mock_ef = _make_mock_chromadb()
|
|
228
|
+
|
|
229
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
230
|
+
with patch("anthropic.Anthropic") as MockAnthropic, \
|
|
231
|
+
patch("chromadb.Client", mock_chromadb.Client), \
|
|
232
|
+
patch(
|
|
233
|
+
"chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
|
|
234
|
+
mock_ef,
|
|
235
|
+
):
|
|
236
|
+
mock_client = MagicMock()
|
|
237
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
|
|
238
|
+
MockAnthropic.return_value = mock_client
|
|
239
|
+
|
|
240
|
+
from bridgekit.search import ask
|
|
241
|
+
ask("What was revenue?", text="Revenue was $5M.", system_prompt=custom_prompt)
|
|
242
|
+
|
|
243
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
244
|
+
assert call_kwargs.kwargs.get("system") == custom_prompt
|
|
245
|
+
|
|
225
246
|
def test_source_folder_empty_raises_value_error(self):
|
|
226
247
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
227
248
|
# Folder exists but has no supported files
|
|
@@ -236,3 +257,45 @@ class TestAskWithSourceFolder:
|
|
|
236
257
|
from bridgekit.search import ask
|
|
237
258
|
with pytest.raises(ValueError, match="No content found"):
|
|
238
259
|
ask("What happened?", source=tmpdir)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class TestAskMaxTokens:
|
|
263
|
+
"""ask() should pass max_tokens through to the API."""
|
|
264
|
+
|
|
265
|
+
def test_default_max_tokens_is_1024(self):
|
|
266
|
+
mock_chromadb, mock_ef = _make_mock_chromadb()
|
|
267
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
268
|
+
with patch("anthropic.Anthropic") as MockAnthropic, \
|
|
269
|
+
patch("chromadb.Client", mock_chromadb.Client), \
|
|
270
|
+
patch(
|
|
271
|
+
"chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
|
|
272
|
+
mock_ef,
|
|
273
|
+
):
|
|
274
|
+
mock_client = MagicMock()
|
|
275
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
|
|
276
|
+
MockAnthropic.return_value = mock_client
|
|
277
|
+
|
|
278
|
+
from bridgekit.search import ask
|
|
279
|
+
ask("What was the conversion rate?", text="The conversion rate increased by 12%.")
|
|
280
|
+
|
|
281
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
282
|
+
assert call_kwargs.kwargs.get("max_tokens") == 1024
|
|
283
|
+
|
|
284
|
+
def test_custom_max_tokens_reaches_api(self):
|
|
285
|
+
mock_chromadb, mock_ef = _make_mock_chromadb()
|
|
286
|
+
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
287
|
+
with patch("anthropic.Anthropic") as MockAnthropic, \
|
|
288
|
+
patch("chromadb.Client", mock_chromadb.Client), \
|
|
289
|
+
patch(
|
|
290
|
+
"chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
|
|
291
|
+
mock_ef,
|
|
292
|
+
):
|
|
293
|
+
mock_client = MagicMock()
|
|
294
|
+
mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
|
|
295
|
+
MockAnthropic.return_value = mock_client
|
|
296
|
+
|
|
297
|
+
from bridgekit.search import ask
|
|
298
|
+
ask("What was the conversion rate?", text="The conversion rate increased by 12%.", max_tokens=2048)
|
|
299
|
+
|
|
300
|
+
call_kwargs = mock_client.messages.create.call_args
|
|
301
|
+
assert call_kwargs.kwargs.get("max_tokens") == 2048
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|