mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,226 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ import re
5
+ from typing import Any
6
+
7
+
8
+ class RAGEvaluationMetrics:
9
+ """
10
+ Evaluation metrics for RAG systems.
11
+
12
+ Provides both retrieval and generation quality metrics
13
+ for comprehensive RAG system evaluation.
14
+ """
15
+
16
+ def evaluate_retrieval(self, retrieved_docs: list[dict[str, Any]], relevant_doc_ids: list[str]) -> dict[str, float]:
17
+ """
18
+ Evaluate retrieval quality metrics.
19
+
20
+ Args:
21
+ retrieved_docs: List of retrieved documents with metadata
22
+ relevant_doc_ids: List of ground truth relevant document IDs
23
+
24
+ Returns:
25
+ Dictionary with retrieval metrics (precision, recall, f1, mrr)
26
+ """
27
+ if not retrieved_docs or not relevant_doc_ids:
28
+ return {"retrieval_precision": 0.0, "retrieval_recall": 0.0, "retrieval_f1": 0.0, "retrieval_mrr": 0.0}
29
+
30
+ # Extract document IDs from retrieved docs
31
+ retrieved_ids = []
32
+ for doc in retrieved_docs:
33
+ doc_id = doc.get("metadata", {}).get("doc_id") or doc.get("metadata", {}).get("id")
34
+ if doc_id:
35
+ retrieved_ids.append(str(doc_id))
36
+
37
+ relevant_set = set(relevant_doc_ids)
38
+ retrieved_set = set(retrieved_ids)
39
+
40
+ # Calculate precision and recall
41
+ if len(retrieved_set) == 0:
42
+ precision = 0.0
43
+ else:
44
+ precision = len(relevant_set.intersection(retrieved_set)) / len(retrieved_set)
45
+
46
+ if len(relevant_set) == 0:
47
+ recall = 0.0
48
+ else:
49
+ recall = len(relevant_set.intersection(retrieved_set)) / len(relevant_set)
50
+
51
+ # Calculate F1
52
+ if precision + recall == 0:
53
+ f1 = 0.0
54
+ else:
55
+ f1 = 2 * (precision * recall) / (precision + recall)
56
+
57
+ # Calculate Mean Reciprocal Rank (MRR)
58
+ mrr = 0.0
59
+ for i, retrieved_id in enumerate(retrieved_ids):
60
+ if retrieved_id in relevant_set:
61
+ mrr = 1.0 / (i + 1)
62
+ break
63
+
64
+ return {"retrieval_precision": precision, "retrieval_recall": recall, "retrieval_f1": f1, "retrieval_mrr": mrr}
65
+
66
+ def evaluate_generation(self, generated_answer: str, ground_truth: str, context: str) -> dict[str, float]:
67
+ """
68
+ Evaluate generation quality metrics.
69
+
70
+ Args:
71
+ generated_answer: Generated answer text
72
+ ground_truth: Ground truth answer
73
+ context: Retrieved context used for generation
74
+
75
+ Returns:
76
+ Dictionary with generation metrics
77
+ """
78
+ # Exact match (case-insensitive)
79
+ exact_match = self._exact_match(generated_answer, ground_truth)
80
+
81
+ # F1 score based on token overlap
82
+ f1_score = self._token_f1(generated_answer, ground_truth)
83
+
84
+ # BLEU-like score
85
+ bleu_score = self._simple_bleu(generated_answer, ground_truth)
86
+
87
+ # Answer relevance (simple keyword overlap with context)
88
+ relevance_score = self._answer_relevance(generated_answer, context)
89
+
90
+ # Faithfulness (how well the answer is supported by context)
91
+ faithfulness_score = self._faithfulness_score(generated_answer, context)
92
+
93
+ return {
94
+ "exact_match": float(exact_match),
95
+ "token_f1": f1_score,
96
+ "bleu_score": bleu_score,
97
+ "answer_relevance": relevance_score,
98
+ "faithfulness": faithfulness_score,
99
+ "answer_confidence": (f1_score + relevance_score + faithfulness_score) / 3.0,
100
+ }
101
+
102
+ def combined_rag_score(
103
+ self,
104
+ retrieval_metrics: dict[str, float],
105
+ generation_metrics: dict[str, float],
106
+ retrieval_weight: float = 0.3,
107
+ generation_weight: float = 0.7,
108
+ ) -> float:
109
+ """
110
+ Combine retrieval and generation metrics into a single score.
111
+
112
+ Args:
113
+ retrieval_metrics: Output from evaluate_retrieval
114
+ generation_metrics: Output from evaluate_generation
115
+ retrieval_weight: Weight for retrieval score
116
+ generation_weight: Weight for generation score
117
+
118
+ Returns:
119
+ Combined score between 0 and 1
120
+ """
121
+ # Primary retrieval metric: F1 score
122
+ retrieval_score = retrieval_metrics.get("retrieval_f1", 0.0)
123
+
124
+ # Primary generation metric: weighted combination
125
+ generation_score = (
126
+ generation_metrics.get("token_f1", 0.0) * 0.4
127
+ + generation_metrics.get("answer_relevance", 0.0) * 0.3
128
+ + generation_metrics.get("faithfulness", 0.0) * 0.3
129
+ )
130
+
131
+ return retrieval_weight * retrieval_score + generation_weight * generation_score
132
+
133
+ def _exact_match(self, prediction: str, ground_truth: str) -> bool:
134
+ """Check if prediction exactly matches ground truth (case-insensitive)."""
135
+ return prediction.strip().lower() == ground_truth.strip().lower()
136
+
137
+ def _token_f1(self, prediction: str, ground_truth: str) -> float:
138
+ """Calculate F1 score based on token overlap."""
139
+ pred_tokens = set(self._normalize_text(prediction).split())
140
+ truth_tokens = set(self._normalize_text(ground_truth).split())
141
+
142
+ if len(pred_tokens) == 0 and len(truth_tokens) == 0:
143
+ return 1.0
144
+ if len(pred_tokens) == 0 or len(truth_tokens) == 0:
145
+ return 0.0
146
+
147
+ intersection = pred_tokens.intersection(truth_tokens)
148
+ precision = len(intersection) / len(pred_tokens)
149
+ recall = len(intersection) / len(truth_tokens)
150
+
151
+ if precision + recall == 0:
152
+ return 0.0
153
+
154
+ return 2 * (precision * recall) / (precision + recall)
155
+
156
+ def _simple_bleu(self, prediction: str, ground_truth: str, n: int = 2) -> float:
157
+ """Simple BLEU-like score for n-gram overlap."""
158
+ pred_words = self._normalize_text(prediction).split()
159
+ truth_words = self._normalize_text(ground_truth).split()
160
+
161
+ if len(pred_words) < n or len(truth_words) < n:
162
+ return self._token_f1(prediction, ground_truth)
163
+
164
+ pred_ngrams = {tuple(pred_words[i : i + n]) for i in range(len(pred_words) - n + 1)}
165
+ truth_ngrams = {tuple(truth_words[i : i + n]) for i in range(len(truth_words) - n + 1)}
166
+
167
+ if len(pred_ngrams) == 0 or len(truth_ngrams) == 0:
168
+ return 0.0
169
+
170
+ intersection = pred_ngrams.intersection(truth_ngrams)
171
+ return len(intersection) / len(pred_ngrams)
172
+
173
+ def _answer_relevance(self, answer: str, context: str) -> float:
174
+ """Measure how well the answer relates to the provided context."""
175
+ answer_words = set(self._normalize_text(answer).split())
176
+ context_words = set(self._normalize_text(context).split())
177
+
178
+ if len(answer_words) == 0:
179
+ return 0.0
180
+
181
+ overlap = answer_words.intersection(context_words)
182
+ return len(overlap) / len(answer_words)
183
+
184
+ def _faithfulness_score(self, answer: str, context: str) -> float:
185
+ """
186
+ Measure how well the answer is supported by the context.
187
+ Simple implementation based on shared key phrases.
188
+ """
189
+ # Extract key phrases (sequences of 2+ words)
190
+ answer_phrases = self._extract_phrases(answer)
191
+ context_phrases = self._extract_phrases(context)
192
+
193
+ if len(answer_phrases) == 0:
194
+ return 1.0 # Empty answer is technically faithful
195
+
196
+ supported_phrases = answer_phrases.intersection(context_phrases)
197
+ return len(supported_phrases) / len(answer_phrases)
198
+
199
+ def _extract_phrases(self, text: str, min_length: int = 2) -> set[str]:
200
+ """Extract meaningful phrases from text."""
201
+ words = self._normalize_text(text).split()
202
+ phrases = set()
203
+
204
+ # Add individual significant words (length > 3)
205
+ for word in words:
206
+ if len(word) > 3:
207
+ phrases.add(word)
208
+
209
+ # Add bi-grams and tri-grams
210
+ for n in range(min_length, min(4, len(words) + 1)):
211
+ for i in range(len(words) - n + 1):
212
+ phrase = " ".join(words[i : i + n])
213
+ if len(phrase) > 5: # Only meaningful phrases
214
+ phrases.add(phrase)
215
+
216
+ return phrases
217
+
218
+ def _normalize_text(self, text: str) -> str:
219
+ """Normalize text for comparison."""
220
+ # Convert to lowercase and remove extra whitespace
221
+ text = text.lower().strip()
222
+ # Remove punctuation and special characters
223
+ text = re.sub(r"[^\w\s]", " ", text)
224
+ # Normalize whitespace
225
+ text = re.sub(r"\s+", " ", text)
226
+ return text