bridgekit 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {bridgekit-0.3.6 → bridgekit-0.3.8}/PKG-INFO +40 -7
  2. {bridgekit-0.3.6 → bridgekit-0.3.8}/README.md +39 -6
  3. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/__init__.py +1 -1
  4. bridgekit-0.3.8/bridgekit/cli.py +105 -0
  5. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/config.py +1 -1
  6. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/planner.py +5 -3
  7. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/redteam.py +20 -17
  8. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/reviewer.py +6 -4
  9. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/search.py +19 -14
  10. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/PKG-INFO +40 -7
  11. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/SOURCES.txt +4 -0
  12. bridgekit-0.3.8/bridgekit.egg-info/entry_points.txt +2 -0
  13. {bridgekit-0.3.6 → bridgekit-0.3.8}/pyproject.toml +4 -1
  14. bridgekit-0.3.8/tests/test_cli.py +175 -0
  15. {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_planner.py +44 -0
  16. bridgekit-0.3.8/tests/test_redteam.py +185 -0
  17. {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_reviewer.py +48 -0
  18. {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_search.py +63 -0
  19. {bridgekit-0.3.6 → bridgekit-0.3.8}/LICENSE +0 -0
  20. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit/providers.py +0 -0
  21. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/dependency_links.txt +0 -0
  22. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/requires.txt +0 -0
  23. {bridgekit-0.3.6 → bridgekit-0.3.8}/bridgekit.egg-info/top_level.txt +0 -0
  24. {bridgekit-0.3.6 → bridgekit-0.3.8}/setup.cfg +0 -0
  25. {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_config.py +0 -0
  26. {bridgekit-0.3.6 → bridgekit-0.3.8}/tests/test_providers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bridgekit
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: AI tools that make you a better data scientist, not a redundant one.
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://usebridgekit.com
@@ -136,6 +136,9 @@ onboarding users to reporting as a growth lever.
136
136
  """
137
137
 
138
138
  print(evaluate(text))
139
+
140
+ # Override for longer analyses
141
+ print(evaluate(text, max_tokens=2048))
139
142
  ```
140
143
 
141
144
  **Output:**
@@ -190,6 +193,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
190
193
  from bridgekit import ask
191
194
 
192
195
  print(ask("what drove churn in Q3?", source="reports/"))
196
+
197
+ # Override for longer responses
198
+ print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
193
199
  ```
194
200
 
195
201
  **From raw text:**
@@ -234,7 +240,7 @@ print(plan(
234
240
  ))
235
241
  ```
236
242
 
237
- `data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
243
+ `data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
238
244
 
239
245
  **`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
240
246
 
@@ -297,6 +303,9 @@ print(redteam(text))
297
303
  # Or specify a stakeholder
298
304
  print(redteam(text, stakeholder="VP of Engineering"))
299
305
  print(redteam(text, stakeholder="VP of Marketing"))
306
+
307
+ # Override for longer responses
308
+ print(redteam(text, max_tokens=2048))
300
309
  ```
301
310
 
302
311
  Same writeup, different attack angles:
@@ -414,15 +423,39 @@ Bridgekit automatically detects the provider from model names:
414
423
  - Models starting with "gemini" → Google Gemini
415
424
 
416
425
  **Default models by provider:**
417
- - Anthropic: `claude-3-5-sonnet-20241022`
426
+ - Anthropic: `claude-opus-4-8`
418
427
  - OpenAI: `gpt-4o`
419
428
  - Gemini: `gemini-1.5-pro`
420
429
 
421
430
  All tools support the same `provider` and `model` parameters:
422
- - `evaluate(text, provider=None, model=None)`
423
- - `plan(question, provider=None, model=None, ...)`
424
- - `ask(question, provider=None, model=None, ...)`
425
- - `redteam(text, provider=None, model=None, ...)`
431
+ - `evaluate(text, provider=None, model=None, system_prompt=None)`
432
+ - `plan(question, provider=None, model=None, ..., system_prompt=None)`
433
+ - `ask(question, provider=None, model=None, ..., system_prompt=None)`
434
+ - `redteam(text, provider=None, model=None, ..., system_prompt=None)`
435
+
436
+ ---
437
+
438
+ ## Custom System Prompts
439
+
440
+ Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
441
+
442
+ ```python
443
+ from bridgekit import evaluate, plan, ask, redteam
444
+
445
+ # Narrow the reviewer to a specific domain
446
+ print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
447
+
448
+ # Tailor the planner to a specific industry
449
+ print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
450
+
451
+ # Replace the red team persona entirely
452
+ print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
453
+
454
+ # Change the answering style for ask
455
+ print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
456
+ ```
457
+
458
+ When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
426
459
 
427
460
  ---
428
461
 
@@ -104,6 +104,9 @@ onboarding users to reporting as a growth lever.
104
104
  """
105
105
 
106
106
  print(evaluate(text))
107
+
108
+ # Override for longer analyses
109
+ print(evaluate(text, max_tokens=2048))
107
110
  ```
108
111
 
109
112
  **Output:**
@@ -158,6 +161,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
158
161
  from bridgekit import ask
159
162
 
160
163
  print(ask("what drove churn in Q3?", source="reports/"))
164
+
165
+ # Override for longer responses
166
+ print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
161
167
  ```
162
168
 
163
169
  **From raw text:**
@@ -202,7 +208,7 @@ print(plan(
202
208
  ))
203
209
  ```
204
210
 
205
- `data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
211
+ `data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
206
212
 
207
213
  **`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
208
214
 
@@ -265,6 +271,9 @@ print(redteam(text))
265
271
  # Or specify a stakeholder
266
272
  print(redteam(text, stakeholder="VP of Engineering"))
267
273
  print(redteam(text, stakeholder="VP of Marketing"))
274
+
275
+ # Override for longer responses
276
+ print(redteam(text, max_tokens=2048))
268
277
  ```
269
278
 
270
279
  Same writeup, different attack angles:
@@ -382,15 +391,39 @@ Bridgekit automatically detects the provider from model names:
382
391
  - Models starting with "gemini" → Google Gemini
383
392
 
384
393
  **Default models by provider:**
385
- - Anthropic: `claude-3-5-sonnet-20241022`
394
+ - Anthropic: `claude-opus-4-8`
386
395
  - OpenAI: `gpt-4o`
387
396
  - Gemini: `gemini-1.5-pro`
388
397
 
389
398
  All tools support the same `provider` and `model` parameters:
390
- - `evaluate(text, provider=None, model=None)`
391
- - `plan(question, provider=None, model=None, ...)`
392
- - `ask(question, provider=None, model=None, ...)`
393
- - `redteam(text, provider=None, model=None, ...)`
399
+ - `evaluate(text, provider=None, model=None, system_prompt=None)`
400
+ - `plan(question, provider=None, model=None, ..., system_prompt=None)`
401
+ - `ask(question, provider=None, model=None, ..., system_prompt=None)`
402
+ - `redteam(text, provider=None, model=None, ..., system_prompt=None)`
403
+
404
+ ---
405
+
406
+ ## Custom System Prompts
407
+
408
+ Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
409
+
410
+ ```python
411
+ from bridgekit import evaluate, plan, ask, redteam
412
+
413
+ # Narrow the reviewer to a specific domain
414
+ print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
415
+
416
+ # Tailor the planner to a specific industry
417
+ print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
418
+
419
+ # Replace the red team persona entirely
420
+ print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
421
+
422
+ # Change the answering style for ask
423
+ print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
424
+ ```
425
+
426
+ When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
394
427
 
395
428
  ---
396
429
 
@@ -3,5 +3,5 @@ from .search import ask
3
3
  from .planner import plan
4
4
  from .redteam import redteam
5
5
 
6
- __version__ = "0.3.4"
6
+ __version__ = "0.3.8"
7
7
  __all__ = ["evaluate", "ask", "plan", "redteam"]
@@ -0,0 +1,105 @@
1
+ import argparse
2
+ import sys
3
+
4
+ from .planner import plan
5
+ from .reviewer import evaluate
6
+ from .redteam import redteam
7
+ from .search import ask
8
+
9
+
10
+ def _add_provider_args(parser: argparse.ArgumentParser) -> None:
11
+ parser.add_argument("--provider", help='AI provider: "anthropic", "openai", or "gemini"')
12
+ parser.add_argument("--model", help="Specific model to use (e.g. claude-opus-4-8, gpt-4o)")
13
+
14
+
15
+ def _cmd_plan(args: argparse.Namespace) -> None:
16
+ result = plan(
17
+ question=args.question,
18
+ data_description=args.data,
19
+ goal=args.goal,
20
+ provider=args.provider,
21
+ model=args.model,
22
+ )
23
+ print(result)
24
+
25
+
26
+ def _cmd_review(args: argparse.Namespace) -> None:
27
+ result = evaluate(
28
+ text=args.text,
29
+ provider=args.provider,
30
+ model=args.model,
31
+ )
32
+ print(result)
33
+
34
+
35
+ def _cmd_redteam(args: argparse.Namespace) -> None:
36
+ result = redteam(
37
+ text=args.text,
38
+ stakeholder=args.stakeholder,
39
+ provider=args.provider,
40
+ model=args.model,
41
+ )
42
+ print(result)
43
+
44
+
45
+ def _cmd_search(args: argparse.Namespace) -> None:
46
+ if not args.source and not args.text:
47
+ print("error: provide --source or --text", file=sys.stderr)
48
+ sys.exit(1)
49
+ result = ask(
50
+ question=args.question,
51
+ source=args.source,
52
+ text=args.text,
53
+ provider=args.provider,
54
+ model=args.model,
55
+ )
56
+ print(result)
57
+
58
+
59
+ def main() -> None:
60
+ parser = argparse.ArgumentParser(
61
+ prog="bridgekit",
62
+ description="AI tools for data scientists",
63
+ )
64
+ sub = parser.add_subparsers(dest="command", metavar="COMMAND")
65
+ sub.required = True
66
+
67
+ # plan
68
+ p_plan = sub.add_parser("plan", help="Recommend the right analytical approach")
69
+ p_plan.add_argument("question", help="The analytical question you want to answer")
70
+ p_plan.add_argument("--data", metavar="DESCRIPTION", help="Description of your available data")
71
+ p_plan.add_argument("--goal", help='Goal of the analysis (e.g. "prediction", "hypothesis testing")')
72
+ _add_provider_args(p_plan)
73
+ p_plan.set_defaults(func=_cmd_plan)
74
+
75
+ # review
76
+ p_review = sub.add_parser("review", help="Evaluate a data science analysis writeup")
77
+ p_review.add_argument("text", help="The analysis text to review")
78
+ _add_provider_args(p_review)
79
+ p_review.set_defaults(func=_cmd_review)
80
+
81
+ # redteam
82
+ p_redteam = sub.add_parser("redteam", help="Red-team an analysis from a skeptical stakeholder")
83
+ p_redteam.add_argument("text", help="The analysis text to red-team")
84
+ p_redteam.add_argument("--stakeholder", help='Stakeholder role (e.g. "VP of Finance")')
85
+ _add_provider_args(p_redteam)
86
+ p_redteam.set_defaults(func=_cmd_redteam)
87
+
88
+ # search
89
+ p_search = sub.add_parser("search", help="Ask a question across documents or text")
90
+ p_search.add_argument("question", help="The question to answer")
91
+ p_search.add_argument("--source", metavar="PATH", help="Folder of documents to search")
92
+ p_search.add_argument("--text", help="Raw text to search instead of a folder")
93
+ _add_provider_args(p_search)
94
+ p_search.set_defaults(func=_cmd_search)
95
+
96
+ args = parser.parse_args()
97
+ try:
98
+ args.func(args)
99
+ except (ValueError, EnvironmentError) as e:
100
+ print(f"error: {e}", file=sys.stderr)
101
+ sys.exit(1)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
@@ -11,7 +11,7 @@ class Provider(Enum):
11
11
 
12
12
  # Default models for each provider
13
13
  DEFAULT_MODELS = {
14
- Provider.ANTHROPIC: "claude-opus-4-6",
14
+ Provider.ANTHROPIC: "claude-opus-4-8",
15
15
  Provider.OPENAI: "gpt-4o",
16
16
  Provider.GEMINI: "gemini-1.5-pro"
17
17
  }
@@ -29,7 +29,7 @@ ALTERNATIVES
29
29
  """
30
30
 
31
31
 
32
- def plan(question: str, data_description: str = None, goal: str = None, provider: str = None, model: str = None) -> str:
32
+ def plan(question: str, data_description: str = None, goal: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
33
33
  """
34
34
  Recommend the right analytical approach for your problem.
35
35
 
@@ -41,6 +41,8 @@ def plan(question: str, data_description: str = None, goal: str = None, provider
41
41
  provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
42
42
  If not specified, defaults to "anthropic" or infers from model.
43
43
  model: Optional. The specific model to use. If not specified, uses the provider's default.
44
+ system_prompt: Optional. A custom system prompt to override the default planner persona.
45
+ max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
44
46
 
45
47
  Returns:
46
48
  A structured analytical plan covering the recommended approach, assumptions,
@@ -62,8 +64,8 @@ def plan(question: str, data_description: str = None, goal: str = None, provider
62
64
 
63
65
  return create_message(
64
66
  provider=provider_enum,
65
- system_prompt=SYSTEM_PROMPT,
67
+ system_prompt=system_prompt or SYSTEM_PROMPT,
66
68
  user_message=user_message,
67
69
  model=model,
68
- max_tokens=1024
70
+ max_tokens=max_tokens
69
71
  )
@@ -39,18 +39,21 @@ HARDEST QUESTION TO ANSWER
39
39
  """
40
40
 
41
41
 
42
- def redteam(text: str, stakeholder: str = None, provider: str = None, model: str = None) -> str:
42
+ def redteam(text: str, stakeholder: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
43
43
  """
44
44
  Red-team a data science analysis writeup from the perspective of a skeptical stakeholder.
45
45
 
46
46
  Args:
47
- text: Your analysis writeup as a plain string.
48
- stakeholder: Optional. The skeptical stakeholder role (e.g. "VP of Finance",
49
- "skeptical board member", "Chief Revenue Officer").
50
- Defaults to a generic skeptical senior executive.
51
- provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
52
- If not specified, defaults to "anthropic" or infers from model.
53
- model: Optional. The specific model to use. If not specified, uses the provider's default.
47
+ text: Your analysis writeup as a plain string.
48
+ stakeholder: Optional. The skeptical stakeholder role (e.g. "VP of Finance",
49
+ "skeptical board member", "Chief Revenue Officer").
50
+ Defaults to a generic skeptical senior executive.
51
+ provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
52
+ If not specified, defaults to "anthropic" or infers from model.
53
+ model: Optional. The specific model to use. If not specified, uses the provider's default.
54
+ system_prompt: Optional. A custom system prompt to fully override the default red team persona.
55
+ When provided, the stakeholder parameter is ignored.
56
+ max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
54
57
 
55
58
  Returns:
56
59
  The 3-5 hardest critiques the stakeholder would make, plus the single
@@ -64,20 +67,20 @@ def redteam(text: str, stakeholder: str = None, provider: str = None, model: str
64
67
  if model is None:
65
68
  model = get_default_model(provider_enum)
66
69
 
67
- stakeholder_label = stakeholder if stakeholder else "Skeptical Senior Executive"
68
- stakeholder_desc = stakeholder if stakeholder else DEFAULT_STAKEHOLDER
69
-
70
- system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
71
- stakeholder=stakeholder_desc,
72
- stakeholder_label=stakeholder_label
73
- )
70
+ if system_prompt is None:
71
+ stakeholder_label = stakeholder if stakeholder else "Skeptical Senior Executive"
72
+ stakeholder_desc = stakeholder if stakeholder else DEFAULT_STAKEHOLDER
73
+ system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
74
+ stakeholder=stakeholder_desc,
75
+ stakeholder_label=stakeholder_label
76
+ )
74
77
 
75
78
  user_message = f"Red-team this analysis writeup:\n\n{text}"
76
-
79
+
77
80
  return create_message(
78
81
  provider=provider_enum,
79
82
  system_prompt=system_prompt,
80
83
  user_message=user_message,
81
84
  model=model,
82
- max_tokens=1024
85
+ max_tokens=max_tokens
83
86
  )
@@ -42,7 +42,7 @@ BOTTOM LINE
42
42
  [one sentence]
43
43
  """
44
44
 
45
- def evaluate(text: str, provider: str = None, model: str = None) -> str:
45
+ def evaluate(text: str, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
46
46
  """
47
47
  Evaluate a data science analysis writeup and return structured feedback.
48
48
 
@@ -51,6 +51,8 @@ def evaluate(text: str, provider: str = None, model: str = None) -> str:
51
51
  provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
52
52
  If not specified, defaults to "anthropic" or infers from model.
53
53
  model: Optional. The specific model to use. If not specified, uses the provider's default.
54
+ system_prompt: Optional. A custom system prompt to override the default reviewer persona.
55
+ max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
54
56
 
55
57
  Returns:
56
58
  Structured feedback across four dimensions.
@@ -64,11 +66,11 @@ def evaluate(text: str, provider: str = None, model: str = None) -> str:
64
66
  model = get_default_model(provider_enum)
65
67
 
66
68
  user_message = f"Please review this analysis writeup:\n\n{text}"
67
-
69
+
68
70
  return create_message(
69
71
  provider=provider_enum,
70
- system_prompt=SYSTEM_PROMPT,
72
+ system_prompt=system_prompt or SYSTEM_PROMPT,
71
73
  user_message=user_message,
72
74
  model=model,
73
- max_tokens=1024
75
+ max_tokens=max_tokens
74
76
  )
@@ -49,17 +49,26 @@ def _chunk(text: str) -> list[str]:
49
49
  return [c for c in chunks if c.strip()]
50
50
 
51
51
 
52
- def ask(question: str, source: str = None, text: str = None, provider: str = None, model: str = None) -> str:
52
+ DEFAULT_SYSTEM_PROMPT = (
53
+ "You are a senior data scientist answering questions based on analysis reports. "
54
+ "Answer only from the provided context. Be specific and cite findings where relevant. "
55
+ "If the context does not contain enough information to answer, say so clearly."
56
+ )
57
+
58
+
59
+ def ask(question: str, source: str = None, text: str = None, provider: str = None, model: str = None, system_prompt: str = None, max_tokens: int = 1024) -> str:
53
60
  """
54
61
  Ask a question across a collection of analysis documents or raw text.
55
62
 
56
63
  Args:
57
- question: The question to answer.
58
- source: Path to a folder containing .txt, .md, .pdf, .docx, .pptx, or .ipynb files.
59
- text: A raw text string to search instead of a folder.
60
- provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
61
- If not specified, defaults to "anthropic" or infers from model.
62
- model: Optional. The specific model to use. If not specified, uses the provider's default.
64
+ question: The question to answer.
65
+ source: Path to a folder containing .txt, .md, .pdf, .docx, .pptx, or .ipynb files.
66
+ text: A raw text string to search instead of a folder.
67
+ provider: Optional. The AI provider to use ("anthropic", "openai", "gemini").
68
+ If not specified, defaults to "anthropic" or infers from model.
69
+ model: Optional. The specific model to use. If not specified, uses the provider's default.
70
+ system_prompt: Optional. A custom system prompt to override the default answering persona.
71
+ max_tokens: Optional. Maximum tokens in the response. Defaults to 1024.
63
72
 
64
73
  Returns:
65
74
  An answer grounded in the provided documents.
@@ -107,15 +116,11 @@ def ask(question: str, source: str = None, text: str = None, provider: str = Non
107
116
 
108
117
  # Generate answer with specified provider
109
118
  user_message = f"Context from analysis reports:\n\n{context}\n\nQuestion: {question}"
110
-
119
+
111
120
  return create_message(
112
121
  provider=provider_enum,
113
- system_prompt=(
114
- "You are a senior data scientist answering questions based on analysis reports. "
115
- "Answer only from the provided context. Be specific and cite findings where relevant. "
116
- "If the context does not contain enough information to answer, say so clearly."
117
- ),
122
+ system_prompt=system_prompt or DEFAULT_SYSTEM_PROMPT,
118
123
  user_message=user_message,
119
124
  model=model,
120
- max_tokens=1024
125
+ max_tokens=max_tokens
121
126
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bridgekit
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: AI tools that make you a better data scientist, not a redundant one.
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://usebridgekit.com
@@ -136,6 +136,9 @@ onboarding users to reporting as a growth lever.
136
136
  """
137
137
 
138
138
  print(evaluate(text))
139
+
140
+ # Override for longer analyses
141
+ print(evaluate(text, max_tokens=2048))
139
142
  ```
140
143
 
141
144
  **Output:**
@@ -190,6 +193,9 @@ Supports `.txt`, `.md`, `.pdf`, `.docx`, `.pptx`, and `.ipynb` files.
190
193
  from bridgekit import ask
191
194
 
192
195
  print(ask("what drove churn in Q3?", source="reports/"))
196
+
197
+ # Override for longer responses
198
+ print(ask("what drove churn in Q3?", source="reports/", max_tokens=2048))
193
199
  ```
194
200
 
195
201
  **From raw text:**
@@ -234,7 +240,7 @@ print(plan(
234
240
  ))
235
241
  ```
236
242
 
237
- `data_description` and `goal` are optional — the more context you provide, the more tailored the recommendation.
243
+ `data_description`, `goal`, and `max_tokens` are optional — the more context you provide, the more tailored the recommendation.
238
244
 
239
245
  **`goal` examples:** `"causal inference"`, `"prediction"`, `"segmentation"`, `"hypothesis testing"`, `"exploration"`
240
246
 
@@ -297,6 +303,9 @@ print(redteam(text))
297
303
  # Or specify a stakeholder
298
304
  print(redteam(text, stakeholder="VP of Engineering"))
299
305
  print(redteam(text, stakeholder="VP of Marketing"))
306
+
307
+ # Override for longer responses
308
+ print(redteam(text, max_tokens=2048))
300
309
  ```
301
310
 
302
311
  Same writeup, different attack angles:
@@ -414,15 +423,39 @@ Bridgekit automatically detects the provider from model names:
414
423
  - Models starting with "gemini" → Google Gemini
415
424
 
416
425
  **Default models by provider:**
417
- - Anthropic: `claude-3-5-sonnet-20241022`
426
+ - Anthropic: `claude-opus-4-8`
418
427
  - OpenAI: `gpt-4o`
419
428
  - Gemini: `gemini-1.5-pro`
420
429
 
421
430
  All tools support the same `provider` and `model` parameters:
422
- - `evaluate(text, provider=None, model=None)`
423
- - `plan(question, provider=None, model=None, ...)`
424
- - `ask(question, provider=None, model=None, ...)`
425
- - `redteam(text, provider=None, model=None, ...)`
431
+ - `evaluate(text, provider=None, model=None, system_prompt=None)`
432
+ - `plan(question, provider=None, model=None, ..., system_prompt=None)`
433
+ - `ask(question, provider=None, model=None, ..., system_prompt=None)`
434
+ - `redteam(text, provider=None, model=None, ..., system_prompt=None)`
435
+
436
+ ---
437
+
438
+ ## Custom System Prompts
439
+
440
+ Every tool accepts an optional `system_prompt` parameter to override the default persona. Use this to adapt the tone or focus to a specific domain without changing anything else.
441
+
442
+ ```python
443
+ from bridgekit import evaluate, plan, ask, redteam
444
+
445
+ # Narrow the reviewer to a specific domain
446
+ print(evaluate("my analysis", system_prompt="You are a skeptical PhD statistician focused only on methodology"))
447
+
448
+ # Tailor the planner to a specific industry
449
+ print(plan("my question", system_prompt="You are a data scientist specializing in healthcare analytics"))
450
+
451
+ # Replace the red team persona entirely
452
+ print(redteam("my analysis", system_prompt="You are a hostile regulator looking for compliance violations"))
453
+
454
+ # Change the answering style for ask
455
+ print(ask("my question", text="...", system_prompt="You are a financial analyst. Answer only in terms of revenue impact."))
456
+ ```
457
+
458
+ When `system_prompt` is not provided, each tool uses its built-in default — existing behavior is unchanged.
426
459
 
427
460
  ---
428
461
 
@@ -2,6 +2,7 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  bridgekit/__init__.py
5
+ bridgekit/cli.py
5
6
  bridgekit/config.py
6
7
  bridgekit/planner.py
7
8
  bridgekit/providers.py
@@ -11,10 +12,13 @@ bridgekit/search.py
11
12
  bridgekit.egg-info/PKG-INFO
12
13
  bridgekit.egg-info/SOURCES.txt
13
14
  bridgekit.egg-info/dependency_links.txt
15
+ bridgekit.egg-info/entry_points.txt
14
16
  bridgekit.egg-info/requires.txt
15
17
  bridgekit.egg-info/top_level.txt
18
+ tests/test_cli.py
16
19
  tests/test_config.py
17
20
  tests/test_planner.py
18
21
  tests/test_providers.py
22
+ tests/test_redteam.py
19
23
  tests/test_reviewer.py
20
24
  tests/test_search.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ bridgekit = bridgekit.cli:main
@@ -7,7 +7,7 @@ include = ["bridgekit*"]
7
7
 
8
8
  [project]
9
9
  name = "bridgekit"
10
- version = "0.3.6"
10
+ version = "0.3.8"
11
11
  description = "AI tools that make you a better data scientist, not a redundant one."
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.9"
@@ -38,6 +38,9 @@ dev = [
38
38
  "pytest-mock>=3.0.0",
39
39
  ]
40
40
 
41
+ [project.scripts]
42
+ bridgekit = "bridgekit.cli:main"
43
+
41
44
  [project.urls]
42
45
  Homepage = "https://usebridgekit.com"
43
46
  Issues = "https://github.com/getbridgekit/bridgekit/issues"
@@ -0,0 +1,175 @@
1
+ import sys
2
+ import pytest
3
+ from unittest.mock import patch
4
+
5
+
6
+ FAKE_PLAN = "BRIDGEKIT ANALYSIS PLAN\n─────\nRECOMMENDED APPROACH\nUse a t-test."
7
+ FAKE_REVIEW = "BRIDGEKIT ANALYSIS REVIEW\n─────\n1. CLARITY\n✅ STRONG Clear writing."
8
+ FAKE_REDTEAM = "BRIDGEKIT RED TEAM\n─────\nCRITIQUE 1: Sample Size\nHARDEST QUESTION TO ANSWER\nWhat is n?"
9
+ FAKE_SEARCH = "Based on the documents, the answer is 42."
10
+
11
+
12
+ class TestPlanCommand:
13
+ def test_basic_question(self, capsys):
14
+ with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
15
+ with patch("sys.argv", ["bridgekit", "plan", "should I use a t-test?"]):
16
+ from bridgekit.cli import main
17
+ main()
18
+ mock_plan.assert_called_once_with(
19
+ question="should I use a t-test?",
20
+ data_description=None,
21
+ goal=None,
22
+ provider=None,
23
+ model=None,
24
+ )
25
+ assert FAKE_PLAN in capsys.readouterr().out
26
+
27
+ def test_with_data_and_goal(self, capsys):
28
+ with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
29
+ with patch("sys.argv", ["bridgekit", "plan", "my question",
30
+ "--data", "50 rows", "--goal", "compare means"]):
31
+ from bridgekit.cli import main
32
+ main()
33
+ mock_plan.assert_called_once_with(
34
+ question="my question",
35
+ data_description="50 rows",
36
+ goal="compare means",
37
+ provider=None,
38
+ model=None,
39
+ )
40
+
41
+ def test_with_provider_and_model(self):
42
+ with patch("bridgekit.cli.plan", return_value=FAKE_PLAN) as mock_plan:
43
+ with patch("sys.argv", ["bridgekit", "plan", "my question",
44
+ "--provider", "openai", "--model", "gpt-4o"]):
45
+ from bridgekit.cli import main
46
+ main()
47
+ mock_plan.assert_called_once_with(
48
+ question="my question",
49
+ data_description=None,
50
+ goal=None,
51
+ provider="openai",
52
+ model="gpt-4o",
53
+ )
54
+
55
+ def test_missing_question_exits(self):
56
+ with patch("sys.argv", ["bridgekit", "plan"]):
57
+ from bridgekit.cli import main
58
+ with pytest.raises(SystemExit):
59
+ main()
60
+
61
+ def test_environment_error_exits(self, capsys):
62
+ with patch("bridgekit.cli.plan", side_effect=EnvironmentError("ANTHROPIC_API_KEY not found")):
63
+ with patch("sys.argv", ["bridgekit", "plan", "my question"]):
64
+ from bridgekit.cli import main
65
+ with pytest.raises(SystemExit) as exc:
66
+ main()
67
+ assert exc.value.code == 1
68
+ assert "ANTHROPIC_API_KEY" in capsys.readouterr().err
69
+
70
+
71
+ class TestReviewCommand:
72
+ def test_basic_text(self, capsys):
73
+ with patch("bridgekit.cli.evaluate", return_value=FAKE_REVIEW) as mock_evaluate:
74
+ with patch("sys.argv", ["bridgekit", "review", "my analysis text"]):
75
+ from bridgekit.cli import main
76
+ main()
77
+ mock_evaluate.assert_called_once_with(
78
+ text="my analysis text",
79
+ provider=None,
80
+ model=None,
81
+ )
82
+ assert FAKE_REVIEW in capsys.readouterr().out
83
+
84
+ def test_missing_text_exits(self):
85
+ with patch("sys.argv", ["bridgekit", "review"]):
86
+ from bridgekit.cli import main
87
+ with pytest.raises(SystemExit):
88
+ main()
89
+
90
+
91
+ class TestRedteamCommand:
92
+ def test_basic_text(self, capsys):
93
+ with patch("bridgekit.cli.redteam", return_value=FAKE_REDTEAM) as mock_redteam:
94
+ with patch("sys.argv", ["bridgekit", "redteam", "my analysis text"]):
95
+ from bridgekit.cli import main
96
+ main()
97
+ mock_redteam.assert_called_once_with(
98
+ text="my analysis text",
99
+ stakeholder=None,
100
+ provider=None,
101
+ model=None,
102
+ )
103
+ assert FAKE_REDTEAM in capsys.readouterr().out
104
+
105
+ def test_with_stakeholder(self):
106
+ with patch("bridgekit.cli.redteam", return_value=FAKE_REDTEAM) as mock_redteam:
107
+ with patch("sys.argv", ["bridgekit", "redteam", "my analysis text",
108
+ "--stakeholder", "VP of Finance"]):
109
+ from bridgekit.cli import main
110
+ main()
111
+ mock_redteam.assert_called_once_with(
112
+ text="my analysis text",
113
+ stakeholder="VP of Finance",
114
+ provider=None,
115
+ model=None,
116
+ )
117
+
118
+ def test_missing_text_exits(self):
119
+ with patch("sys.argv", ["bridgekit", "redteam"]):
120
+ from bridgekit.cli import main
121
+ with pytest.raises(SystemExit):
122
+ main()
123
+
124
+
125
+ class TestSearchCommand:
126
+ def test_with_source(self, capsys):
127
+ with patch("bridgekit.cli.ask", return_value=FAKE_SEARCH) as mock_ask:
128
+ with patch("sys.argv", ["bridgekit", "search", "my question",
129
+ "--source", "./my_docs"]):
130
+ from bridgekit.cli import main
131
+ main()
132
+ mock_ask.assert_called_once_with(
133
+ question="my question",
134
+ source="./my_docs",
135
+ text=None,
136
+ provider=None,
137
+ model=None,
138
+ )
139
+ assert FAKE_SEARCH in capsys.readouterr().out
140
+
141
+ def test_with_text(self):
142
+ with patch("bridgekit.cli.ask", return_value=FAKE_SEARCH) as mock_ask:
143
+ with patch("sys.argv", ["bridgekit", "search", "my question",
144
+ "--text", "some raw text"]):
145
+ from bridgekit.cli import main
146
+ main()
147
+ mock_ask.assert_called_once_with(
148
+ question="my question",
149
+ source=None,
150
+ text="some raw text",
151
+ provider=None,
152
+ model=None,
153
+ )
154
+
155
+ def test_missing_source_and_text_exits(self, capsys):
156
+ with patch("sys.argv", ["bridgekit", "search", "my question"]):
157
+ from bridgekit.cli import main
158
+ with pytest.raises(SystemExit) as exc:
159
+ main()
160
+ assert exc.value.code == 1
161
+ assert "error" in capsys.readouterr().err
162
+
163
+ def test_missing_question_exits(self):
164
+ with patch("sys.argv", ["bridgekit", "search"]):
165
+ from bridgekit.cli import main
166
+ with pytest.raises(SystemExit):
167
+ main()
168
+
169
+
170
+ class TestNoCommand:
171
+ def test_no_subcommand_exits(self):
172
+ with patch("sys.argv", ["bridgekit"]):
173
+ from bridgekit.cli import main
174
+ with pytest.raises(SystemExit):
175
+ main()
@@ -159,6 +159,20 @@ class TestPlanOptionalParameters:
159
159
 
160
160
  assert isinstance(result, str)
161
161
 
162
+ def test_custom_system_prompt_reaches_api(self):
163
+ custom_prompt = "You are a data scientist specializing in healthcare analytics."
164
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
165
+ with patch("anthropic.Anthropic") as MockAnthropic:
166
+ mock_client = MagicMock()
167
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
168
+ MockAnthropic.return_value = mock_client
169
+
170
+ from bridgekit.planner import plan
171
+ plan("Should I use a t-test or ANOVA?", system_prompt=custom_prompt)
172
+
173
+ call_kwargs = mock_client.messages.create.call_args
174
+ assert call_kwargs.kwargs.get("system") == custom_prompt
175
+
162
176
  def test_all_parameters_included_in_api_call(self):
163
177
  with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
164
178
  with patch("anthropic.Anthropic") as MockAnthropic:
@@ -178,3 +192,33 @@ class TestPlanOptionalParameters:
178
192
  content = str(messages_arg)
179
193
  assert "5,000 users split 50/50." in content
180
194
  assert "causal inference" in content
195
+
196
+
197
+ class TestPlanMaxTokens:
198
+ """plan() should pass max_tokens through to the API."""
199
+
200
+ def test_default_max_tokens_is_1024(self):
201
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
202
+ with patch("anthropic.Anthropic") as MockAnthropic:
203
+ mock_client = MagicMock()
204
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
205
+ MockAnthropic.return_value = mock_client
206
+
207
+ from bridgekit.planner import plan
208
+ plan("Does our new onboarding flow increase upgrade rates?")
209
+
210
+ call_kwargs = mock_client.messages.create.call_args
211
+ assert call_kwargs.kwargs.get("max_tokens") == 1024
212
+
213
+ def test_custom_max_tokens_reaches_api(self):
214
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
215
+ with patch("anthropic.Anthropic") as MockAnthropic:
216
+ mock_client = MagicMock()
217
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
218
+ MockAnthropic.return_value = mock_client
219
+
220
+ from bridgekit.planner import plan
221
+ plan("Does our new onboarding flow increase upgrade rates?", max_tokens=2048)
222
+
223
+ call_kwargs = mock_client.messages.create.call_args
224
+ assert call_kwargs.kwargs.get("max_tokens") == 2048
@@ -0,0 +1,185 @@
1
+ import os
2
+ import pytest
3
+ from unittest.mock import MagicMock, patch
4
+
5
+
6
+ # ---------------------------------------------------------------------------
7
+ # Helpers
8
+ # ---------------------------------------------------------------------------
9
+
10
+ def _make_mock_message(text: str):
11
+ content_block = MagicMock()
12
+ content_block.text = text
13
+ message = MagicMock()
14
+ message.content = [content_block]
15
+ return message
16
+
17
+
18
+ FAKE_RESPONSE = (
19
+ "BRIDGEKIT RED TEAM\n"
20
+ "─────────────────────────────────────────\n"
21
+ "STAKEHOLDER: Skeptical Senior Executive\n\n"
22
+ "CRITIQUE 1: Sample Size\n"
23
+ '❯ "How many users was this actually tested on?"\n'
24
+ "WHY IT LANDS: No sample size is mentioned anywhere.\n"
25
+ "TO ADDRESS: Report n for each group with a power calculation.\n\n"
26
+ "CRITIQUE 2: Causation vs Correlation\n"
27
+ '❯ "You\'re assuming the feature caused this lift — prove it."\n'
28
+ "WHY IT LANDS: No control group is described.\n"
29
+ "TO ADDRESS: Show the experimental design with random assignment.\n\n"
30
+ "CRITIQUE 3: Business Impact\n"
31
+ '❯ "What does a 5% lift actually mean in dollars?"\n'
32
+ "WHY IT LANDS: Directional claims are not quantified.\n"
33
+ "TO ADDRESS: Translate the metric into revenue or cost terms.\n\n"
34
+ "─────────────────────────────────────────\n"
35
+ "HARDEST QUESTION TO ANSWER\n"
36
+ "What is the p-value and did you correct for multiple comparisons?"
37
+ )
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Tests
42
+ # ---------------------------------------------------------------------------
43
+
44
+ class TestRedteamReturnsString:
45
+ """redteam() should return a non-empty string."""
46
+
47
+ def test_returns_string(self):
48
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
49
+ with patch("anthropic.Anthropic") as MockAnthropic:
50
+ mock_client = MagicMock()
51
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
52
+ MockAnthropic.return_value = mock_client
53
+
54
+ from bridgekit.redteam import redteam
55
+ result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
56
+
57
+ assert isinstance(result, str)
58
+ assert len(result) > 0
59
+
60
+
61
+ class TestRedteamOutputStructure:
62
+ """redteam() output should contain the required section headers."""
63
+
64
+ def test_output_contains_critique(self):
65
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
66
+ with patch("anthropic.Anthropic") as MockAnthropic:
67
+ mock_client = MagicMock()
68
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
69
+ MockAnthropic.return_value = mock_client
70
+
71
+ from bridgekit.redteam import redteam
72
+ result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
73
+
74
+ assert "CRITIQUE" in result
75
+
76
+ def test_output_contains_hardest_question(self):
77
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
78
+ with patch("anthropic.Anthropic") as MockAnthropic:
79
+ mock_client = MagicMock()
80
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
81
+ MockAnthropic.return_value = mock_client
82
+
83
+ from bridgekit.redteam import redteam
84
+ result = redteam("We ran an A/B test and saw a 5% lift in conversions.")
85
+
86
+ assert "HARDEST QUESTION" in result
87
+
88
+
89
+ class TestRedteamMissingApiKey:
90
+ """redteam() should raise EnvironmentError when ANTHROPIC_API_KEY is absent."""
91
+
92
+ def test_raises_environment_error_when_key_missing(self):
93
+ env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
94
+ with patch.dict(os.environ, env, clear=True):
95
+ from bridgekit.redteam import redteam
96
+ with pytest.raises(EnvironmentError):
97
+ redteam("Some analysis text.")
98
+
99
+ def test_error_message_mentions_key(self):
100
+ env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
101
+ with patch.dict(os.environ, env, clear=True):
102
+ from bridgekit.redteam import redteam
103
+ with pytest.raises(EnvironmentError, match="ANTHROPIC_API_KEY"):
104
+ redteam("Some analysis text.")
105
+
106
+
107
+ class TestRedteamEmptyInput:
108
+ """redteam() should raise ValueError for empty or whitespace-only input."""
109
+
110
+ def test_empty_string_raises_value_error(self):
111
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
112
+ from bridgekit.redteam import redteam
113
+ with pytest.raises(ValueError, match="empty"):
114
+ redteam("")
115
+
116
+ def test_whitespace_only_raises_value_error(self):
117
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
118
+ from bridgekit.redteam import redteam
119
+ with pytest.raises(ValueError, match="empty"):
120
+ redteam(" ")
121
+
122
+
123
+ class TestRedteamStakeholder:
124
+ """redteam() should include a custom stakeholder in the system prompt."""
125
+
126
+ def test_custom_stakeholder_reaches_system_prompt(self):
127
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
128
+ with patch("anthropic.Anthropic") as MockAnthropic:
129
+ mock_client = MagicMock()
130
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
131
+ MockAnthropic.return_value = mock_client
132
+
133
+ from bridgekit.redteam import redteam
134
+ redteam("Some analysis text.", stakeholder="VP of Finance")
135
+
136
+ call_kwargs = mock_client.messages.create.call_args
137
+ assert "VP of Finance" in call_kwargs.kwargs.get("system", "")
138
+
139
+
140
+ class TestRedteamCustomSystemPrompt:
141
+ """redteam() should forward a custom system_prompt to the API, ignoring stakeholder."""
142
+
143
+ def test_custom_system_prompt_reaches_api(self):
144
+ custom_prompt = "You are a hostile regulator looking for compliance violations."
145
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
146
+ with patch("anthropic.Anthropic") as MockAnthropic:
147
+ mock_client = MagicMock()
148
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
149
+ MockAnthropic.return_value = mock_client
150
+
151
+ from bridgekit.redteam import redteam
152
+ redteam("Some analysis text.", system_prompt=custom_prompt)
153
+
154
+ call_kwargs = mock_client.messages.create.call_args
155
+ assert call_kwargs.kwargs.get("system") == custom_prompt
156
+
157
+
158
+ class TestRedteamMaxTokens:
159
+ """redteam() should pass max_tokens through to the API."""
160
+
161
+ def test_default_max_tokens_is_1024(self):
162
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
163
+ with patch("anthropic.Anthropic") as MockAnthropic:
164
+ mock_client = MagicMock()
165
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
166
+ MockAnthropic.return_value = mock_client
167
+
168
+ from bridgekit.redteam import redteam
169
+ redteam("Some analysis text.")
170
+
171
+ call_kwargs = mock_client.messages.create.call_args
172
+ assert call_kwargs.kwargs.get("max_tokens") == 1024
173
+
174
+ def test_custom_max_tokens_reaches_api(self):
175
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
176
+ with patch("anthropic.Anthropic") as MockAnthropic:
177
+ mock_client = MagicMock()
178
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
179
+ MockAnthropic.return_value = mock_client
180
+
181
+ from bridgekit.redteam import redteam
182
+ redteam("Some analysis text.", max_tokens=2048)
183
+
184
+ call_kwargs = mock_client.messages.create.call_args
185
+ assert call_kwargs.kwargs.get("max_tokens") == 2048
@@ -158,3 +158,51 @@ class TestEvaluateApiCallShape:
158
158
  messages_arg = call_kwargs.kwargs.get("messages") or call_kwargs.args[0]
159
159
  content = str(messages_arg)
160
160
  assert user_text in content
161
+
162
+
163
+ class TestEvaluateCustomSystemPrompt:
164
+ """evaluate() should forward a custom system_prompt to the API."""
165
+
166
+ def test_custom_system_prompt_reaches_api(self):
167
+ custom_prompt = "You are a skeptical PhD statistician focused only on methodology."
168
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
169
+ with patch("anthropic.Anthropic") as MockAnthropic:
170
+ mock_client = MagicMock()
171
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
172
+ MockAnthropic.return_value = mock_client
173
+
174
+ from bridgekit.reviewer import evaluate
175
+ evaluate("Some analysis text.", system_prompt=custom_prompt)
176
+
177
+ call_kwargs = mock_client.messages.create.call_args
178
+ assert call_kwargs.kwargs.get("system") == custom_prompt
179
+
180
+
181
+ class TestEvaluateMaxTokens:
182
+ """evaluate() should pass max_tokens through to the API."""
183
+
184
+ def test_default_max_tokens_is_1024(self):
185
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
186
+ with patch("anthropic.Anthropic") as MockAnthropic:
187
+ mock_client = MagicMock()
188
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
189
+ MockAnthropic.return_value = mock_client
190
+
191
+ from bridgekit.reviewer import evaluate
192
+ evaluate("Some analysis text.")
193
+
194
+ call_kwargs = mock_client.messages.create.call_args
195
+ assert call_kwargs.kwargs.get("max_tokens") == 1024
196
+
197
+ def test_custom_max_tokens_reaches_api(self):
198
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
199
+ with patch("anthropic.Anthropic") as MockAnthropic:
200
+ mock_client = MagicMock()
201
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_RESPONSE)
202
+ MockAnthropic.return_value = mock_client
203
+
204
+ from bridgekit.reviewer import evaluate
205
+ evaluate("Some analysis text.", max_tokens=2048)
206
+
207
+ call_kwargs = mock_client.messages.create.call_args
208
+ assert call_kwargs.kwargs.get("max_tokens") == 2048
@@ -222,6 +222,27 @@ class TestAskWithSourceFolder:
222
222
 
223
223
  assert mock_client.messages.create.call_count == 1
224
224
 
225
+ def test_custom_system_prompt_reaches_api(self):
226
+ custom_prompt = "You are a financial analyst. Answer only in terms of revenue impact."
227
+ mock_chromadb, mock_ef = _make_mock_chromadb()
228
+
229
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
230
+ with patch("anthropic.Anthropic") as MockAnthropic, \
231
+ patch("chromadb.Client", mock_chromadb.Client), \
232
+ patch(
233
+ "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
234
+ mock_ef,
235
+ ):
236
+ mock_client = MagicMock()
237
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
238
+ MockAnthropic.return_value = mock_client
239
+
240
+ from bridgekit.search import ask
241
+ ask("What was revenue?", text="Revenue was $5M.", system_prompt=custom_prompt)
242
+
243
+ call_kwargs = mock_client.messages.create.call_args
244
+ assert call_kwargs.kwargs.get("system") == custom_prompt
245
+
225
246
  def test_source_folder_empty_raises_value_error(self):
226
247
  with tempfile.TemporaryDirectory() as tmpdir:
227
248
  # Folder exists but has no supported files
@@ -236,3 +257,45 @@ class TestAskWithSourceFolder:
236
257
  from bridgekit.search import ask
237
258
  with pytest.raises(ValueError, match="No content found"):
238
259
  ask("What happened?", source=tmpdir)
260
+
261
+
262
+ class TestAskMaxTokens:
263
+ """ask() should pass max_tokens through to the API."""
264
+
265
+ def test_default_max_tokens_is_1024(self):
266
+ mock_chromadb, mock_ef = _make_mock_chromadb()
267
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
268
+ with patch("anthropic.Anthropic") as MockAnthropic, \
269
+ patch("chromadb.Client", mock_chromadb.Client), \
270
+ patch(
271
+ "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
272
+ mock_ef,
273
+ ):
274
+ mock_client = MagicMock()
275
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
276
+ MockAnthropic.return_value = mock_client
277
+
278
+ from bridgekit.search import ask
279
+ ask("What was the conversion rate?", text="The conversion rate increased by 12%.")
280
+
281
+ call_kwargs = mock_client.messages.create.call_args
282
+ assert call_kwargs.kwargs.get("max_tokens") == 1024
283
+
284
+ def test_custom_max_tokens_reaches_api(self):
285
+ mock_chromadb, mock_ef = _make_mock_chromadb()
286
+ with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
287
+ with patch("anthropic.Anthropic") as MockAnthropic, \
288
+ patch("chromadb.Client", mock_chromadb.Client), \
289
+ patch(
290
+ "chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction",
291
+ mock_ef,
292
+ ):
293
+ mock_client = MagicMock()
294
+ mock_client.messages.create.return_value = _make_mock_message(FAKE_ANSWER)
295
+ MockAnthropic.return_value = mock_client
296
+
297
+ from bridgekit.search import ask
298
+ ask("What was the conversion rate?", text="The conversion rate increased by 12%.", max_tokens=2048)
299
+
300
+ call_kwargs = mock_client.messages.create.call_args
301
+ assert call_kwargs.kwargs.get("max_tokens") == 2048
File without changes
File without changes