tech-hub-skills 1.2.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.claude/README.md +291 -0
  2. package/.claude/bin/cli.js +266 -0
  3. package/.claude/package.json +46 -0
  4. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
  5. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
  6. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
  7. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
  8. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
  9. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
  10. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
  11. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
  12. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
  13. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
  14. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
  15. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
  16. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
  17. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
  18. package/.claude/skills/README.md +336 -0
  19. package/.claude/skills/ai-engineer.md +104 -0
  20. package/.claude/skills/aws.md +143 -0
  21. package/.claude/skills/azure.md +149 -0
  22. package/.claude/skills/backend-developer.md +108 -0
  23. package/.claude/skills/code-review.md +399 -0
  24. package/.claude/skills/compliance-automation.md +747 -0
  25. package/.claude/skills/compliance-officer.md +108 -0
  26. package/.claude/skills/data-engineer.md +113 -0
  27. package/.claude/skills/data-governance.md +102 -0
  28. package/.claude/skills/data-scientist.md +123 -0
  29. package/.claude/skills/database-admin.md +109 -0
  30. package/.claude/skills/devops.md +160 -0
  31. package/.claude/skills/docker.md +160 -0
  32. package/.claude/skills/enterprise-dashboard.md +613 -0
  33. package/.claude/skills/finops.md +184 -0
  34. package/.claude/skills/frontend-developer.md +108 -0
  35. package/.claude/skills/gcp.md +143 -0
  36. package/.claude/skills/ml-engineer.md +115 -0
  37. package/.claude/skills/mlops.md +187 -0
  38. package/.claude/skills/network-engineer.md +109 -0
  39. package/.claude/skills/optimization-advisor.md +329 -0
  40. package/.claude/skills/orchestrator.md +623 -0
  41. package/.claude/skills/platform-engineer.md +102 -0
  42. package/.claude/skills/process-automation.md +226 -0
  43. package/.claude/skills/process-changelog.md +184 -0
  44. package/.claude/skills/process-documentation.md +484 -0
  45. package/.claude/skills/process-kanban.md +324 -0
  46. package/.claude/skills/process-versioning.md +214 -0
  47. package/.claude/skills/product-designer.md +104 -0
  48. package/.claude/skills/project-starter.md +443 -0
  49. package/.claude/skills/qa-engineer.md +109 -0
  50. package/.claude/skills/security-architect.md +135 -0
  51. package/.claude/skills/sre.md +109 -0
  52. package/.claude/skills/system-design.md +126 -0
  53. package/.claude/skills/technical-writer.md +101 -0
  54. package/.gitattributes +2 -0
  55. package/GITHUB_COPILOT.md +106 -0
  56. package/README.md +117 -224
  57. package/package.json +4 -42
  58. package/bin/cli.js +0 -241
  59. /package/{LICENSE → .claude/LICENSE} +0 -0
  60. /package/{bin → .claude/bin}/copilot.js +0 -0
  61. /package/{bin → .claude/bin}/postinstall.js +0 -0
  62. /package/{tech_hub_skills/skills → .claude/commands}/README.md +0 -0
  63. /package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +0 -0
  64. /package/{tech_hub_skills/skills → .claude/commands}/aws.md +0 -0
  65. /package/{tech_hub_skills/skills → .claude/commands}/azure.md +0 -0
  66. /package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +0 -0
  67. /package/{tech_hub_skills/skills → .claude/commands}/code-review.md +0 -0
  68. /package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +0 -0
  69. /package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +0 -0
  70. /package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +0 -0
  71. /package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +0 -0
  72. /package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +0 -0
  73. /package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +0 -0
  74. /package/{tech_hub_skills/skills → .claude/commands}/devops.md +0 -0
  75. /package/{tech_hub_skills/skills → .claude/commands}/docker.md +0 -0
  76. /package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +0 -0
  77. /package/{tech_hub_skills/skills → .claude/commands}/finops.md +0 -0
  78. /package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +0 -0
  79. /package/{tech_hub_skills/skills → .claude/commands}/gcp.md +0 -0
  80. /package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +0 -0
  81. /package/{tech_hub_skills/skills → .claude/commands}/mlops.md +0 -0
  82. /package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +0 -0
  83. /package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +0 -0
  84. /package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +0 -0
  85. /package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +0 -0
  86. /package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +0 -0
  87. /package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +0 -0
  88. /package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +0 -0
  89. /package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +0 -0
  90. /package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +0 -0
  91. /package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +0 -0
  92. /package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +0 -0
  93. /package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +0 -0
  94. /package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +0 -0
  95. /package/{tech_hub_skills/skills → .claude/commands}/sre.md +0 -0
  96. /package/{tech_hub_skills/skills → .claude/commands}/system-design.md +0 -0
  97. /package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +0 -0
  98. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +0 -0
  99. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +0 -0
  100. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +0 -0
  101. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +0 -0
  102. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +0 -0
  103. /package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +0 -0
  104. /package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +0 -0
  105. /package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +0 -0
  106. /package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +0 -0
  107. /package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +0 -0
  108. /package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +0 -0
  109. /package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +0 -0
  110. /package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +0 -0
  111. /package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +0 -0
  112. /package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +0 -0
  113. /package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +0 -0
  114. /package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +0 -0
  115. /package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +0 -0
  116. /package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +0 -0
  117. /package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +0 -0
  118. /package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +0 -0
  119. /package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +0 -0
  120. /package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +0 -0
  121. /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +0 -0
  122. /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +0 -0
  123. /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +0 -0
  124. /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +0 -0
  125. /package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +0 -0
  126. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +0 -0
  127. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +0 -0
  128. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +0 -0
  129. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +0 -0
  130. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +0 -0
  131. /package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +0 -0
  132. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +0 -0
  133. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +0 -0
  134. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +0 -0
  135. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +0 -0
  136. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +0 -0
  137. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +0 -0
  138. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +0 -0
  139. /package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +0 -0
  140. /package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +0 -0
  141. /package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +0 -0
  142. /package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +0 -0
  143. /package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +0 -0
  144. /package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +0 -0
  145. /package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +0 -0
  146. /package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +0 -0
  147. /package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +0 -0
  148. /package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +0 -0
  149. /package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +0 -0
  150. /package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +0 -0
  151. /package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +0 -0
  152. /package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +0 -0
  153. /package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +0 -0
  154. /package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +0 -0
  155. /package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +0 -0
  156. /package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +0 -0
  157. /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +0 -0
  158. /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +0 -0
  159. /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +0 -0
  160. /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +0 -0
  161. /package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +0 -0
  162. /package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +0 -0
  163. /package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +0 -0
  164. /package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +0 -0
  165. /package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +0 -0
  166. /package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +0 -0
  167. /package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +0 -0
  168. /package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +0 -0
  169. /package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +0 -0
  170. /package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +0 -0
  171. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +0 -0
  172. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +0 -0
  173. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +0 -0
  174. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +0 -0
  175. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +0 -0
  176. /package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +0 -0
  177. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +0 -0
  178. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +0 -0
  179. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +0 -0
  180. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +0 -0
  181. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +0 -0
  182. /package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +0 -0
  183. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +0 -0
  184. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +0 -0
  185. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +0 -0
  186. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +0 -0
  187. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +0 -0
  188. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +0 -0
  189. /package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +0 -0
  190. /package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +0 -0
  191. /package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +0 -0
  192. /package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +0 -0
  193. /package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +0 -0
  194. /package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +0 -0
  195. /package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +0 -0
  196. /package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +0 -0
  197. /package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +0 -0
  198. /package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +0 -0
@@ -0,0 +1,356 @@
1
+ """
2
+ Prompt A/B Testing Framework
3
+ Compare prompt variations with statistical significance testing.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ from datetime import datetime
9
+ from typing import List, Dict, Any, Optional, Callable
10
+ from dataclasses import dataclass, asdict
11
+ from scipy import stats
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+
16
+ @dataclass
17
+ class PromptVariant:
18
+ """A prompt variant for A/B testing."""
19
+ id: str
20
+ name: str
21
+ template: str
22
+ metadata: Dict[str, Any] = None
23
+
24
+ def __post_init__(self):
25
+ if self.metadata is None:
26
+ self.metadata = {}
27
+
28
+
29
+ @dataclass
30
+ class TestResult:
31
+ """Result from testing a single variant."""
32
+ variant_id: str
33
+ response: str
34
+ latency_ms: float
35
+ tokens_used: int
36
+ cost: float
37
+ quality_score: Optional[float] = None
38
+ timestamp: str = None
39
+
40
+ def __post_init__(self):
41
+ if self.timestamp is None:
42
+ self.timestamp = datetime.now().isoformat()
43
+
44
+
45
+ class ABTest:
46
+ """A/B test experiment for prompt variants."""
47
+
48
+ def __init__(
49
+ self,
50
+ name: str,
51
+ variants: List[PromptVariant],
52
+ evaluation_fn: Optional[Callable] = None
53
+ ):
54
+ self.name = name
55
+ self.variants = {v.id: v for v in variants}
56
+ self.evaluation_fn = evaluation_fn or self._default_evaluation
57
+ self.results: List[TestResult] = []
58
+
59
+ def add_result(self, result: TestResult) -> None:
60
+ """Add a test result."""
61
+ self.results.append(result)
62
+
63
+ def get_variant_results(self, variant_id: str) -> List[TestResult]:
64
+ """Get all results for a specific variant."""
65
+ return [r for r in self.results if r.variant_id == variant_id]
66
+
67
+ def calculate_metrics(self, variant_id: str) -> Dict[str, float]:
68
+ """Calculate aggregate metrics for a variant."""
69
+ results = self.get_variant_results(variant_id)
70
+
71
+ if not results:
72
+ return {}
73
+
74
+ return {
75
+ "sample_size": len(results),
76
+ "avg_latency_ms": np.mean([r.latency_ms for r in results]),
77
+ "avg_cost": np.mean([r.cost for r in results]),
78
+ "avg_tokens": np.mean([r.tokens_used for r in results]),
79
+ "avg_quality_score": np.mean([r.quality_score for r in results if r.quality_score is not None]),
80
+ "p50_latency": np.percentile([r.latency_ms for r in results], 50),
81
+ "p95_latency": np.percentile([r.latency_ms for r in results], 95),
82
+ "p99_latency": np.percentile([r.latency_ms for r in results], 99),
83
+ }
84
+
85
+ def compare_variants(
86
+ self,
87
+ variant_a_id: str,
88
+ variant_b_id: str,
89
+ metric: str = "quality_score"
90
+ ) -> Dict[str, Any]:
91
+ """
92
+ Compare two variants with statistical significance testing.
93
+
94
+ Args:
95
+ variant_a_id: First variant ID
96
+ variant_b_id: Second variant ID
97
+ metric: Metric to compare ('quality_score', 'latency_ms', 'cost')
98
+
99
+ Returns:
100
+ Comparison results with p-value and effect size
101
+ """
102
+ results_a = self.get_variant_results(variant_a_id)
103
+ results_b = self.get_variant_results(variant_b_id)
104
+
105
+ if not results_a or not results_b:
106
+ return {"error": "Insufficient data for comparison"}
107
+
108
+ # Extract metric values
109
+ values_a = [getattr(r, metric) for r in results_a if getattr(r, metric) is not None]
110
+ values_b = [getattr(r, metric) for r in results_b if getattr(r, metric) is not None]
111
+
112
+ if not values_a or not values_b:
113
+ return {"error": f"No valid {metric} data"}
114
+
115
+ # Perform t-test
116
+ t_stat, p_value = stats.ttest_ind(values_a, values_b)
117
+
118
+ # Calculate effect size (Cohen's d)
119
+ pooled_std = np.sqrt((np.std(values_a)**2 + np.std(values_b)**2) / 2)
120
+ cohens_d = (np.mean(values_a) - np.mean(values_b)) / pooled_std if pooled_std > 0 else 0
121
+
122
+ # Determine winner
123
+ if p_value < 0.05:
124
+ if metric in ["latency_ms", "cost"]:
125
+ # Lower is better
126
+ winner = variant_a_id if np.mean(values_a) < np.mean(values_b) else variant_b_id
127
+ else:
128
+ # Higher is better
129
+ winner = variant_a_id if np.mean(values_a) > np.mean(values_b) else variant_b_id
130
+ significant = True
131
+ else:
132
+ winner = "No significant difference"
133
+ significant = False
134
+
135
+ return {
136
+ "variant_a": {
137
+ "id": variant_a_id,
138
+ "mean": float(np.mean(values_a)),
139
+ "std": float(np.std(values_a)),
140
+ "sample_size": len(values_a)
141
+ },
142
+ "variant_b": {
143
+ "id": variant_b_id,
144
+ "mean": float(np.mean(values_b)),
145
+ "std": float(np.std(values_b)),
146
+ "sample_size": len(values_b)
147
+ },
148
+ "t_statistic": float(t_stat),
149
+ "p_value": float(p_value),
150
+ "cohens_d": float(cohens_d),
151
+ "significant": significant,
152
+ "winner": winner,
153
+ "metric": metric,
154
+ "improvement": float((np.mean(values_b) - np.mean(values_a)) / np.mean(values_a) * 100)
155
+ }
156
+
157
+ def generate_report(self) -> str:
158
+ """Generate a comprehensive A/B test report."""
159
+ lines = [
160
+ f"📊 A/B Test Report: {self.name}",
161
+ "=" * 80,
162
+ f"Total Results: {len(self.results)}",
163
+ f"Variants Tested: {len(self.variants)}",
164
+ "\n"
165
+ ]
166
+
167
+ # Metrics for each variant
168
+ lines.append("📈 Variant Performance:")
169
+ for variant_id, variant in self.variants.items():
170
+ metrics = self.calculate_metrics(variant_id)
171
+ if metrics:
172
+ lines.append(f"\n {variant.name} (ID: {variant_id})")
173
+ lines.append(f" Sample Size: {metrics['sample_size']}")
174
+ lines.append(f" Avg Quality Score: {metrics['avg_quality_score']:.2f}")
175
+ lines.append(f" Avg Latency: {metrics['avg_latency_ms']:.0f}ms (p95: {metrics['p95_latency']:.0f}ms)")
176
+ lines.append(f" Avg Cost: ${metrics['avg_cost']:.4f}")
177
+ lines.append(f" Avg Tokens: {metrics['avg_tokens']:.0f}")
178
+
179
+ # Statistical comparisons
180
+ if len(self.variants) == 2:
181
+ variant_ids = list(self.variants.keys())
182
+ comparison = self.compare_variants(variant_ids[0], variant_ids[1])
183
+
184
+ lines.append("\n\n🔬 Statistical Comparison:")
185
+ lines.append(f" Metric: {comparison.get('metric', 'N/A')}")
186
+ lines.append(f" P-Value: {comparison.get('p_value', 0):.4f}")
187
+ lines.append(f" Cohen's d: {comparison.get('cohens_d', 0):.3f}")
188
+ lines.append(f" Significant: {'Yes' if comparison.get('significant') else 'No'}")
189
+ lines.append(f" Winner: {comparison.get('winner', 'N/A')}")
190
+ lines.append(f" Improvement: {comparison.get('improvement', 0):.1f}%")
191
+
192
+ return "\n".join(lines)
193
+
194
+ def _default_evaluation(self, response: str) -> float:
195
+ """Default quality evaluation (length-based)."""
196
+ # Simple heuristic: penalize very short or very long responses
197
+ length = len(response)
198
+ if length < 50:
199
+ return 50.0
200
+ elif length > 2000:
201
+ return 70.0
202
+ else:
203
+ return 85.0
204
+
205
+ def export_results(self, filepath: str) -> None:
206
+ """Export results to JSON file."""
207
+ data = {
208
+ "name": self.name,
209
+ "variants": {k: asdict(v) for k, v in self.variants.items()},
210
+ "results": [asdict(r) for r in self.results],
211
+ "summary": {
212
+ variant_id: self.calculate_metrics(variant_id)
213
+ for variant_id in self.variants.keys()
214
+ }
215
+ }
216
+
217
+ with open(filepath, 'w') as f:
218
+ json.dump(data, f, indent=2)
219
+
220
+ print(f"✅ Results exported to {filepath}")
221
+
222
+
223
+ class ABTestRunner:
224
+ """Runner for executing A/B tests with actual LLM calls."""
225
+
226
+ def __init__(self, llm_client: Optional[Any] = None):
227
+ self.llm_client = llm_client
228
+
229
+ async def run_test(
230
+ self,
231
+ test: ABTest,
232
+ test_cases: List[Dict[str, Any]],
233
+ samples_per_variant: int = 30
234
+ ) -> ABTest:
235
+ """
236
+ Run an A/B test with multiple test cases.
237
+
238
+ Args:
239
+ test: The ABTest instance
240
+ test_cases: List of test case dictionaries with template variables
241
+ samples_per_variant: Number of samples to collect per variant
242
+
243
+ Returns:
244
+ The test instance with results
245
+ """
246
+ for variant_id, variant in test.variants.items():
247
+ print(f"Testing variant: {variant.name} ({samples_per_variant} samples)")
248
+
249
+ for i, test_case in enumerate(test_cases[:samples_per_variant]):
250
+ # Render prompt with test case
251
+ # In production, use actual template rendering
252
+ prompt = variant.template.format(**test_case)
253
+
254
+ # Simulate LLM call (replace with actual API call)
255
+ result = await self._simulate_llm_call(prompt, variant_id)
256
+
257
+ # Evaluate quality
258
+ quality_score = test.evaluation_fn(result.response)
259
+ result.quality_score = quality_score
260
+
261
+ test.add_result(result)
262
+
263
+ if (i + 1) % 10 == 0:
264
+ print(f" Progress: {i + 1}/{samples_per_variant}")
265
+
266
+ return test
267
+
268
+ async def _simulate_llm_call(
269
+ self,
270
+ prompt: str,
271
+ variant_id: str
272
+ ) -> TestResult:
273
+ """Simulate an LLM API call (replace with actual implementation)."""
274
+ # In production, call actual LLM API here
275
+ await asyncio.sleep(0.1) # Simulate API latency
276
+
277
+ # Generate mock response
278
+ response = f"This is a simulated response for variant {variant_id}. " + "Sample content. " * 20
279
+
280
+ return TestResult(
281
+ variant_id=variant_id,
282
+ response=response,
283
+ latency_ms=np.random.uniform(200, 800),
284
+ tokens_used=np.random.randint(100, 500),
285
+ cost=np.random.uniform(0.001, 0.01)
286
+ )
287
+
288
+
289
+ # Example usage
290
+ if __name__ == "__main__":
291
+ # Define variants
292
+ variant_a = PromptVariant(
293
+ id="v1_concise",
294
+ name="Concise Prompt",
295
+ template="""Analyze this marketing campaign: {campaign_details}
296
+
297
+ Provide brief recommendations.""",
298
+ metadata={"author": "Team A", "hypothesis": "Shorter prompts = faster responses"}
299
+ )
300
+
301
+ variant_b = PromptVariant(
302
+ id="v2_detailed",
303
+ name="Detailed Prompt",
304
+ template="""You are a marketing analytics expert. Analyze the following campaign in detail.
305
+
306
+ Campaign Details: {campaign_details}
307
+
308
+ Please provide:
309
+ 1. Performance assessment
310
+ 2. Key insights
311
+ 3. Specific recommendations
312
+ 4. Action items
313
+
314
+ Be thorough and data-driven.""",
315
+ metadata={"author": "Team B", "hypothesis": "Detailed prompts = better quality"}
316
+ )
317
+
318
+ # Create test
319
+ test = ABTest(
320
+ name="Marketing Prompt Optimization Q1 2025",
321
+ variants=[variant_a, variant_b]
322
+ )
323
+
324
+ # Define test cases
325
+ test_cases = [
326
+ {
327
+ "campaign_details": f"Campaign {i}: Budget $10K, CTR 2.5%, Conv 1.2%"
328
+ }
329
+ for i in range(50)
330
+ ]
331
+
332
+ # Run test
333
+ runner = ABTestRunner()
334
+
335
+ async def run():
336
+ test_with_results = await runner.run_test(
337
+ test=test,
338
+ test_cases=test_cases,
339
+ samples_per_variant=25
340
+ )
341
+
342
+ # Generate report
343
+ print("\n" + test_with_results.generate_report())
344
+
345
+ # Compare variants
346
+ print("\n" + "=" * 80)
347
+ comparison = test_with_results.compare_variants("v1_concise", "v2_detailed", "quality_score")
348
+ print(f"\n🏆 Winner: {comparison['winner']}")
349
+ print(f"📊 Statistical Significance: {'Yes ✓' if comparison['significant'] else 'No ✗'}")
350
+ print(f"📈 Improvement: {comparison['improvement']:.1f}%")
351
+
352
+ # Export results
353
+ test_with_results.export_results("ab_test_results.json")
354
+
355
+ # Run async test
356
+ asyncio.run(run())
@@ -0,0 +1,274 @@
1
+ """
2
+ Prompt Template Manager with Version Control
3
+ Manages prompt templates with variable injection, inheritance, and versioning.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Dict, Any, Optional, List
11
+ from jinja2 import Environment, FileSystemLoader, Template
12
+ import yaml
13
+
14
+
15
+ class PromptTemplate:
16
+ """Version-controlled prompt template with variable injection."""
17
+
18
+ def __init__(
19
+ self,
20
+ name: str,
21
+ template: str,
22
+ version: str = "1.0.0",
23
+ metadata: Optional[Dict[str, Any]] = None,
24
+ parent: Optional[str] = None
25
+ ):
26
+ self.name = name
27
+ self.template = template
28
+ self.version = version
29
+ self.metadata = metadata or {}
30
+ self.parent = parent
31
+ self.created_at = datetime.now().isoformat()
32
+ self._jinja_template = Template(template)
33
+
34
+ def render(self, **kwargs) -> str:
35
+ """Render the template with provided variables."""
36
+ return self._jinja_template.render(**kwargs)
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ """Convert to dictionary for serialization."""
40
+ return {
41
+ "name": self.name,
42
+ "template": self.template,
43
+ "version": self.version,
44
+ "metadata": self.metadata,
45
+ "parent": self.parent,
46
+ "created_at": self.created_at
47
+ }
48
+
49
+ @classmethod
50
+ def from_dict(cls, data: Dict[str, Any]) -> "PromptTemplate":
51
+ """Create from dictionary."""
52
+ return cls(
53
+ name=data["name"],
54
+ template=data["template"],
55
+ version=data.get("version", "1.0.0"),
56
+ metadata=data.get("metadata", {}),
57
+ parent=data.get("parent")
58
+ )
59
+
60
+ def save(self, directory: str = "./prompts") -> None:
61
+ """Save template to disk."""
62
+ Path(directory).mkdir(parents=True, exist_ok=True)
63
+ filepath = Path(directory) / f"{self.name}_v{self.version}.json"
64
+
65
+ with open(filepath, 'w') as f:
66
+ json.dump(self.to_dict(), f, indent=2)
67
+
68
+ print(f"✅ Saved: {filepath}")
69
+
70
+ @classmethod
71
+ def load(cls, name: str, version: Optional[str] = None, directory: str = "./prompts") -> "PromptTemplate":
72
+ """Load template from disk."""
73
+ if version:
74
+ filepath = Path(directory) / f"{name}_v{version}.json"
75
+ else:
76
+ # Load latest version
77
+ pattern = f"{name}_v*.json"
78
+ files = sorted(Path(directory).glob(pattern), reverse=True)
79
+ if not files:
80
+ raise FileNotFoundError(f"No template found for {name}")
81
+ filepath = files[0]
82
+
83
+ with open(filepath, 'r') as f:
84
+ data = json.load(f)
85
+
86
+ return cls.from_dict(data)
87
+
88
+
89
+ class PromptLibrary:
90
+ """Centralized library for managing multiple prompt templates."""
91
+
92
+ def __init__(self, library_path: str = "./prompt_library"):
93
+ self.library_path = Path(library_path)
94
+ self.library_path.mkdir(parents=True, exist_ok=True)
95
+ self.templates: Dict[str, PromptTemplate] = {}
96
+ self._load_all()
97
+
98
+ def _load_all(self) -> None:
99
+ """Load all templates from library."""
100
+ for filepath in self.library_path.glob("*.json"):
101
+ with open(filepath, 'r') as f:
102
+ data = json.load(f)
103
+ template = PromptTemplate.from_dict(data)
104
+ self.templates[template.name] = template
105
+
106
+ def add(self, template: PromptTemplate) -> None:
107
+ """Add a template to the library."""
108
+ self.templates[template.name] = template
109
+ template.save(str(self.library_path))
110
+
111
+ def get(self, name: str, version: Optional[str] = None) -> PromptTemplate:
112
+ """Get a template by name."""
113
+ if version:
114
+ return PromptTemplate.load(name, version, str(self.library_path))
115
+ return self.templates.get(name) or PromptTemplate.load(name, directory=str(self.library_path))
116
+
117
+ def list(self) -> List[Dict[str, Any]]:
118
+ """List all templates."""
119
+ return [
120
+ {
121
+ "name": t.name,
122
+ "version": t.version,
123
+ "created_at": t.created_at,
124
+ "parent": t.parent
125
+ }
126
+ for t in self.templates.values()
127
+ ]
128
+
129
+ def create_from_yaml(self, yaml_path: str) -> PromptTemplate:
130
+ """Create template from YAML configuration."""
131
+ with open(yaml_path, 'r') as f:
132
+ config = yaml.safe_load(f)
133
+
134
+ template = PromptTemplate(
135
+ name=config["name"],
136
+ template=config["template"],
137
+ version=config.get("version", "1.0.0"),
138
+ metadata=config.get("metadata", {}),
139
+ parent=config.get("parent")
140
+ )
141
+
142
+ self.add(template)
143
+ return template
144
+
145
+
146
+ # Example templates
147
+ EXAMPLE_TEMPLATES = {
148
+ "marketing_email": """You are a marketing copywriter for {company}.
149
+
150
+ Write a compelling email for {product} targeting {audience}.
151
+
152
+ Requirements:
153
+ - Tone: {tone}
154
+ - Length: {length} words
155
+ - Include a clear call-to-action
156
+ - Use the brand voice: {brand_voice}
157
+
158
+ Product Details:
159
+ {product_details}
160
+
161
+ Email:""",
162
+
163
+ "seo_optimizer": """Analyze and optimize the following content for SEO.
164
+
165
+ Target Keywords: {keywords}
166
+ Target Audience: {audience}
167
+
168
+ Original Content:
169
+ {content}
170
+
171
+ Provide:
172
+ 1. Keyword density analysis
173
+ 2. Readability score
174
+ 3. Recommended improvements
175
+ 4. Optimized version
176
+
177
+ Analysis:""",
178
+
179
+ "lead_scorer": """You are a lead scoring expert for B2B SaaS.
180
+
181
+ Evaluate this lead based on the following criteria:
182
+
183
+ Lead Information:
184
+ - Company: {company}
185
+ - Industry: {industry}
186
+ - Company Size: {company_size}
187
+ - Job Title: {job_title}
188
+ - Engagement Level: {engagement_level}
189
+
190
+ Scoring Criteria (0-100):
191
+ 1. Fit Score (company size, industry match)
192
+ 2. Interest Score (engagement, intent signals)
193
+ 3. Urgency Score (buying timeline indicators)
194
+
195
+ Provide:
196
+ - Overall Score (0-100)
197
+ - Category: Hot/Warm/Cold
198
+ - Recommended Action
199
+ - Reasoning
200
+
201
+ Analysis:"""
202
+ }
203
+
204
+
205
+ def create_default_library() -> PromptLibrary:
206
+ """Create a library with example templates."""
207
+ library = PromptLibrary()
208
+
209
+ for name, template_text in EXAMPLE_TEMPLATES.items():
210
+ template = PromptTemplate(
211
+ name=name,
212
+ template=template_text,
213
+ version="1.0.0",
214
+ metadata={
215
+ "author": "Tech Innovation Hub",
216
+ "category": "marketing",
217
+ "tags": ["production", "tested"]
218
+ }
219
+ )
220
+ library.add(template)
221
+
222
+ return library
223
+
224
+
225
+ # Example usage
226
+ if __name__ == "__main__":
227
+ # Create library with examples
228
+ library = create_default_library()
229
+
230
+ # List all templates
231
+ print("📚 Available Templates:")
232
+ for t in library.list():
233
+ print(f" - {t['name']} (v{t['version']})")
234
+
235
+ print("\n" + "="*60 + "\n")
236
+
237
+ # Use marketing email template
238
+ template = library.get("marketing_email")
239
+
240
+ email = template.render(
241
+ company="Tech Innovation Hub",
242
+ product="AI Engineering Masterclass",
243
+ audience="Data Scientists and ML Engineers",
244
+ tone="professional yet approachable",
245
+ length="150",
246
+ brand_voice="innovative, data-driven, practical",
247
+ product_details="A comprehensive course covering LLMs, RAG, and Multi-Agent Systems"
248
+ )
249
+
250
+ print("📧 Generated Email:")
251
+ print(email)
252
+
253
+ print("\n" + "="*60 + "\n")
254
+
255
+ # Create a custom template
256
+ custom_template = PromptTemplate(
257
+ name="campaign_analyzer",
258
+ template="""Analyze this marketing campaign performance:
259
+
260
+ Campaign: {campaign_name}
261
+ Duration: {duration}
262
+ Metrics:
263
+ - Impressions: {impressions}
264
+ - Clicks: {clicks}
265
+ - Conversions: {conversions}
266
+ - Revenue: ${revenue}
267
+
268
+ Provide insights and recommendations.""",
269
+ version="1.0.0",
270
+ metadata={"author": "Marketing Team", "category": "analytics"}
271
+ )
272
+
273
+ library.add(custom_template)
274
+ print(f"✅ Added custom template: {custom_template.name}")