mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,22 @@
1
+ <poml>
2
+ <p>Revise the given prompt template using the critique as constraints and improvement guide.</p>
3
+ <cp caption="Revision Rules">
4
+ <list listStyle="decimal">
5
+ <item>Rewrite or restructure the prompt if critique implies it.</item>
6
+ <item>Explicitly include any requested output format, structure, or word limit, if requested by the critique.</item>
7
+ <item>Prioritize mechanism-first phrasing: define what to do, then how to do it.</item>
8
+ <item>Preserve placeholder variables inside curly brackets.</item>
9
+ </list>
10
+ </cp>
11
+ <output-format>
12
+ Return only the improved prompt template with placeholders intact. Do not include other explanations on how you did it, or headers and introductory texts.
13
+ </output-format>
14
+ <human-msg>
15
+ <cp caption="Prompt Template">
16
+ <text whiteSpace="pre">{{ prompt_template }}</text>
17
+ </cp>
18
+ <cp caption="Critique">
19
+ <text whiteSpace="pre">{{ critique }}</text>
20
+ </cp>
21
+ </human-msg>
22
+ </poml>
@@ -0,0 +1,18 @@
1
+ <!-- Conservative Edit Prompt -->
2
+
3
+ <poml>
4
+ <p>Revise the prompt to address ONE critique point clearly and effectively. Preserve all variable names in curly-brackets.</p>
5
+ <p>Do not address more than one critique point. Focus on the single most critical issue.</p>
6
+ <p>Keep the new prompt close in tone, length, and structure to the original.</p>
7
+ <output-format>
8
+ Return only the revised full prompt. Do not include explanations, comparisons, or other text.
9
+ </output-format>
10
+ <human-msg>
11
+ <cp caption="PROMPT" level="3">
12
+ <text whiteSpace="pre">{{ prompt_template }}</text>
13
+ </cp>
14
+ <cp caption="CRITIQUE" level="3">
15
+ <text whiteSpace="pre">{{ critique }}</text>
16
+ </cp>
17
+ </human-msg>
18
+ </poml>
@@ -0,0 +1,18 @@
1
+ <poml>
2
+ <p>You optimize a prompt template.</p>
3
+ <cp caption="Original Prompt Template">
4
+ <text whiteSpace="pre">{{ prompt_template }}</text>
5
+ </cp>
6
+ <cp caption="Experiments with Original Prompt Template">
7
+ <cp for="experiment in experiments" caption="Experiment {{ loop.index + 1 }}">
8
+ <p>This experiment has {{ experiment.status }}. It gets a final reward: {{ experiment.final_reward }}</p>
9
+ <cp caption="Rollout Traces (Chat Messages, Grader Requests included)">
10
+ <object data="{{ experiment.messages }}" />
11
+ </cp>
12
+ </cp>
13
+ </cp>
14
+ <cp caption="Your Task">
15
+ Produce a brief critique listing specific causes for the error or ways to raise reward next time.
16
+ Return a bullet list with concrete, testable changes (format, constraints, ordering, definitions).
17
+ </cp>
18
+ </poml>
@@ -0,0 +1,16 @@
1
+ <poml>
2
+ <role>You are a prompt engineer.</role>
3
+ <task>Analyze where the current prompt failed to elicit the right mechanism.</task>
4
+ <cp caption="Current Prompt Template">
5
+ <text whiteSpace="pre">{{ prompt_template }}</text>
6
+ </cp>
7
+ <cp caption="Sample Runs with Current Prompt Template">
8
+ <p>The following are the OpenTelemetry spans collected from the sample runs with the current prompt template. They should contain both prompt, responses and rewards.</p>
9
+ <cp for="experiment in experiments" caption="Sample Run #{{ loop.index + 1 }} Diagnostics">
10
+ <object for="span in experiment.spans" data="{{ span }}" />
11
+ </cp>
12
+ </cp>
13
+ <output-format>
14
+ Write 3-5 short bullets titled 'Critique:' focusing on missing constraints, ordering, or formatting.
15
+ </output-format>
16
+ </poml>
@@ -0,0 +1,107 @@
1
+ <poml>
2
+
3
+ <role>You are an expert prompt engineer.</role>
4
+
5
+ <task>Your task is to analyze the prompt and provide a critique of the prompt. Follow the steps below to create the critique.
6
+
7
+ <cp caption="1. Structural Issues">
8
+ <p>These flaws block clarity and logic. Always check them first.</p>
9
+
10
+ <list>
11
+ <item><b>Missing goal</b>: The prompt never defines what success looks like. Ask: <i>Can I summarize its output goal in one line?</i></item>
12
+ <item><b>Contradictions</b>: Two or more instructions conflict. Search for words like *never*, *always*, *except*, *but also*.</item>
13
+ <item><b>Circular dependencies</b>: The model is told to do A before B and B before A.</item>
14
+ <item><b>No stop condition</b>: The prompt doesn’t say when the task is done. Flag any open-ended verbs: <i>explore,</i> <i>analyze further,</i> <i>continue indefinitely.</i></item>
15
+ </list>
16
+ </cp>
17
+
18
+ <cp caption="2. Instruction Quality">
19
+ <p>Examine how the instructions are stated and ordered to ensure clarity and enforceability.</p>
20
+ <list>
21
+ <item><b>Vague verbs</b>: Avoid terms like <i>optimize,</i> <i>improve,</i> and <i>ensure.</i> Use precise, measurable instructions.</item>
22
+ <item><b>Lack of hierarchy</b>: All rules appear equally important, making conflict resolution impossible. Clarify rule precedence.</item>
23
+ <item><b>Mixed abstraction</b>: High-level policies are interleaved with implementation details. Keep principles separate from step-by-step actions.</item>
24
+ <item><b>Overlapping scope</b>: Similar instructions appear in several sections with minor changes. Identify and consolidate duplicates.</item>
25
+ </list>
26
+ </cp>
27
+
28
+ <cp caption="3. Control and Behavior">
29
+ <p>Review boundaries on model autonomy, tool use, and communication style.</p>
30
+ <list>
31
+ <item><b>No tool limits</b>: Limits on tool calls, retries, or time not specified. Define boundaries for operations.</item>
32
+ <item><b>Unclear uncertainty handling</b>: Conflicting instructions regarding clarifying uncertainties vs. never asking users. Select one behavior.</item>
33
+ <item><b>Verbosity confusion</b>: Some parts demand detailed answers, others specify brevity. Highlight and resolve inconsistency.</item>
34
+ <item><b>Feedback omission</b>: No plan for progress reporting or preamble during multi-step operations.</item>
35
+ </list>
36
+ </cp>
37
+
38
+ <cp caption="4. Input and Output Specification">
39
+ <p>Assess if required data and expected output formats are clearly defined.</p>
40
+ <list>
41
+ <item><b>No input defaults</b>: What should happen if a needed value is absent or invalid isn’t explained.</item>
42
+ <item><b>Output schema missing</b>: Expected response format or sections are not spelled out.</item>
43
+ <item><b>Format inconsistency</b>: Output style (Markdown, JSON, XML, etc.) shifts mid-prompt. Ensure format requirements are stable.</item>
44
+ <item><b>No validation</b>: Lacks steps like <i>verify results before submitting</i> or <i>summarize at end.</i></item>
45
+ </list>
46
+ </cp>
47
+
48
+ <cp caption="5. Scope and Safety">
49
+ <p>Ensure prompt actions remain within safe, authorized boundaries.</p>
50
+ <list>
51
+ <item><b>Scope creep</b>: Open-ended statements such as <i>feel free to enhance</i> can justify unrelated changes.</item>
52
+ <item><b>Unsafe actions</b>: Allows deletions or modifications without explicit user approval.</item>
53
+ <item><b>No error handling</b>: What happens if a tool call fails or data is missing is not addressed.</item>
54
+ <item><b>User authority ambiguity</b>: Model may act for multiple users or perform irreversible actions without checks.</item>
55
+ </list>
56
+ </cp>
57
+
58
+ <cp caption="6. Efficiency and Maintainability">
59
+ <p>Consider the prompt’s length, redundancy, and future comprehensibility.</p>
60
+ <list>
61
+ <item><b>Overexplained</b>: Verbose explanations where concise, numbered steps suffice.</item>
62
+ <item><b>Redundancy</b>: Similar rules scattered in multiple aliases; centralize and summarize them.</item>
63
+ <item><b>Hidden assumptions</b>: Implicit defaults (like timezone, language) are not stated.</item>
64
+ <item><b>Poor auditability</b>: Lacks section markers (e.g., <code>&lt;policy&gt;</code>, <code>&lt;procedure&gt;</code>). Structure prompt for easy review.</item>
65
+ </list>
66
+ </cp>
67
+
68
+ <cp caption="7. Testing Method">
69
+ <p>Methodical approach for reviewing a prompt:</p>
70
+ <list>
71
+ <item>Read the prompt fully; highlight all unclear or contradictory instructions.</item>
72
+ <item>For each main area, answer:
73
+ <list listStyle="decimal">
74
+ <item>What is the intended outcome?</item>
75
+ <item>What is the stop or completion condition?</item>
76
+ <item>How are conflicts between rules resolved?</item>
77
+ <item>What are the explicit limits (tools, run time, tokens)?</item>
78
+ <item>What should the output format be?</item>
79
+ </list>
80
+ </item>
81
+ <item>Rate each section: <i>clear</i>, <i>incomplete</i>, <i>contradictory</i>, or <i>redundant</i>.</item>
82
+ <item>Summarize findings under categories: structure, control, scope, format, safety.</item>
83
+ </list>
84
+ <p>This method surfaces issues such as ambiguity, contradiction, missing boundaries, and output uncertainty—core failure modes in prompting identified by the GPT-5 prompting guide.</p>
85
+ </cp>
86
+ </task>
87
+
88
+ <output-format>
89
+ Respond with a complete analysis and critique of the prompt. Be concise and direct. Less than 350 words.
90
+ </output-format>
91
+
92
+ <human-msg>
93
+ <cp caption="Prompt">
94
+ <text whiteSpace="pre">{{ prompt_template }}</text>
95
+ </cp>
96
+ <cp caption="Sample Runs of the Prompts (Historical Messages and Rewards)">
97
+ <cp for="experiment in experiments" caption="Sample Run #{{ loop.index + 1 }}">
98
+ <cp caption="Overall Status">
99
+ This run has {{ experiment.status }}. The final score is {{ experiment.final_reward }}.
100
+ </cp>
101
+ <cp caption="Messages">
102
+ <object data="{{ experiment.messages }}" />
103
+ </cp>
104
+ </cp>
105
+ </cp>
106
+ </human-msg>
107
+ </poml>
@@ -0,0 +1,162 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from __future__ import annotations
4
+
5
+ import inspect
6
+ import weakref
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Awaitable,
11
+ Optional,
12
+ Union,
13
+ )
14
+
15
+ from mantisdk.adapter import TraceAdapter
16
+ from mantisdk.client import MantisdkClient
17
+ from mantisdk.store.base import LightningStore
18
+ from mantisdk.types import Dataset, NamedResources
19
+
20
+ if TYPE_CHECKING:
21
+ from mantisdk.llm_proxy import LLMProxy
22
+ from mantisdk.trainer import Trainer
23
+
24
+
25
+ class Algorithm:
26
+ """Algorithm is the strategy, or tuner to train the agent."""
27
+
28
+ _trainer_ref: weakref.ReferenceType[Trainer] | None = None
29
+ _llm_proxy_ref: weakref.ReferenceType["LLMProxy"] | None = None
30
+ _store: LightningStore | None = None
31
+ _initial_resources: NamedResources | None = None
32
+ _adapter_ref: weakref.ReferenceType[TraceAdapter[Any]] | None = None
33
+
34
+ def is_async(self) -> bool:
35
+ """Return True if the algorithm is asynchronous."""
36
+ return inspect.iscoroutinefunction(self.run)
37
+
38
+ def set_trainer(self, trainer: Trainer) -> None:
39
+ """
40
+ Set the trainer for this algorithm.
41
+
42
+ Args:
43
+ trainer: The Trainer instance that will handle training and validation.
44
+ """
45
+ self._trainer_ref = weakref.ref(trainer)
46
+
47
+ def get_trainer(self) -> Trainer:
48
+ """
49
+ Get the trainer for this algorithm.
50
+
51
+ Returns:
52
+ The Trainer instance associated with this agent.
53
+ """
54
+ if self._trainer_ref is None:
55
+ raise ValueError("Trainer has not been set for this agent.")
56
+ trainer = self._trainer_ref()
57
+ if trainer is None:
58
+ raise ValueError("Trainer reference is no longer valid (object has been garbage collected).")
59
+ return trainer
60
+
61
+ def set_llm_proxy(self, llm_proxy: LLMProxy | None) -> None:
62
+ """
63
+ Set the LLM proxy for this algorithm to reuse when available.
64
+
65
+ Args:
66
+ llm_proxy: The LLMProxy instance configured by the trainer, if any.
67
+ """
68
+ self._llm_proxy_ref = weakref.ref(llm_proxy) if llm_proxy is not None else None
69
+
70
+ def get_llm_proxy(self) -> Optional[LLMProxy]:
71
+ """
72
+ Retrieve the configured LLM proxy instance, if one has been set.
73
+
74
+ Returns:
75
+ The active LLMProxy instance or None when not configured.
76
+ """
77
+ if self._llm_proxy_ref is None:
78
+ return None
79
+
80
+ llm_proxy = self._llm_proxy_ref()
81
+ if llm_proxy is None:
82
+ raise ValueError("LLM proxy reference is no longer valid (object has been garbage collected).")
83
+
84
+ return llm_proxy
85
+
86
+ def set_adapter(self, adapter: TraceAdapter[Any]) -> None:
87
+ """
88
+ Set the adapter for this algorithm to collect and convert traces.
89
+ """
90
+ self._adapter_ref = weakref.ref(adapter)
91
+
92
+ def get_adapter(self) -> TraceAdapter[Any]:
93
+ """
94
+ Retrieve the adapter for this algorithm to communicate with the runners.
95
+ """
96
+ if self._adapter_ref is None:
97
+ raise ValueError("Adapter has not been set for this algorithm.")
98
+ adapter = self._adapter_ref()
99
+ if adapter is None:
100
+ raise ValueError("Adapter reference is no longer valid (object has been garbage collected).")
101
+ return adapter
102
+
103
+ def set_store(self, store: LightningStore) -> None:
104
+ """
105
+ Set the store for this algorithm to communicate with the runners.
106
+
107
+ Store is set directly instead of using weakref because its copy is meant to be
108
+ maintained throughout the algorithm's lifecycle.
109
+ """
110
+ self._store = store
111
+
112
+ def get_store(self) -> LightningStore:
113
+ """
114
+ Retrieve the store for this algorithm to communicate with the runners.
115
+ """
116
+ if self._store is None:
117
+ raise ValueError("Store has not been set for this algorithm.")
118
+ return self._store
119
+
120
+ def get_initial_resources(self) -> Optional[NamedResources]:
121
+ """
122
+ Get the initial resources for this algorithm.
123
+ """
124
+ return self._initial_resources
125
+
126
+ def set_initial_resources(self, resources: NamedResources) -> None:
127
+ """
128
+ Set the initial resources for this algorithm.
129
+ """
130
+ self._initial_resources = resources
131
+
132
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
133
+ return self.run(*args, **kwargs)
134
+
135
+ def run(
136
+ self,
137
+ train_dataset: Optional[Dataset[Any]] = None,
138
+ val_dataset: Optional[Dataset[Any]] = None,
139
+ ) -> Union[None, Awaitable[None]]:
140
+ """Subclasses should implement this method to implement the algorithm.
141
+
142
+ Args:
143
+ train_dataset: The dataset to train on. Not all algorithms require a training dataset.
144
+ val_dataset: The dataset to validate on. Not all algorithms require a validation dataset.
145
+
146
+ Returns:
147
+ Algorithm should refrain from returning anything. It should just run the algorithm.
148
+ """
149
+ raise NotImplementedError("Subclasses must implement run().")
150
+
151
+ def get_client(self) -> MantisdkClient:
152
+ """Get the client to communicate with the algorithm.
153
+
154
+ If the algorithm does not require a server-client communication, it can also create a mock client
155
+ that never communicates with itself.
156
+
157
+ Deprecated and will be removed in a future version.
158
+
159
+ Returns:
160
+ The MantisdkClient instance associated with this algorithm.
161
+ """
162
+ raise NotImplementedError("Subclasses must implement get_client().")
@@ -0,0 +1,264 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import inspect
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Awaitable,
11
+ Dict,
12
+ Generic,
13
+ Literal,
14
+ Optional,
15
+ Protocol,
16
+ TypeVar,
17
+ Union,
18
+ cast,
19
+ overload,
20
+ )
21
+
22
+ from mantisdk.adapter import TraceAdapter
23
+ from mantisdk.store.base import LightningStore
24
+ from mantisdk.types import Dataset, NamedResources
25
+
26
+ if TYPE_CHECKING:
27
+ from mantisdk.llm_proxy import LLMProxy
28
+
29
+ from .base import Algorithm
30
+
31
+ # Algorithm function signature types
32
+ # We've missed a lot of combinations here.
33
+ # Let's add them in future.
34
+
35
+
36
+ class AlgorithmFuncSyncFull(Protocol):
37
+ def __call__(
38
+ self,
39
+ *,
40
+ store: LightningStore,
41
+ train_dataset: Optional[Dataset[Any]],
42
+ val_dataset: Optional[Dataset[Any]],
43
+ llm_proxy: Optional[LLMProxy],
44
+ adapter: Optional[TraceAdapter[Any]],
45
+ initial_resources: Optional[NamedResources],
46
+ ) -> None: ...
47
+
48
+
49
+ class AlgorithmFuncSyncOnlyStore(Protocol):
50
+ def __call__(self, *, store: LightningStore) -> None: ...
51
+
52
+
53
+ class AlgorithmFuncSyncOnlyDataset(Protocol):
54
+ def __call__(self, *, train_dataset: Optional[Dataset[Any]], val_dataset: Optional[Dataset[Any]]) -> None: ...
55
+
56
+
57
+ class AlgorithmFuncAsyncFull(Protocol):
58
+ def __call__(
59
+ self,
60
+ *,
61
+ store: LightningStore,
62
+ train_dataset: Optional[Dataset[Any]],
63
+ val_dataset: Optional[Dataset[Any]],
64
+ llm_proxy: Optional[LLMProxy],
65
+ adapter: Optional[TraceAdapter[Any]],
66
+ initial_resources: Optional[NamedResources],
67
+ ) -> Awaitable[None]: ...
68
+
69
+
70
+ class AlgorithmFuncAsyncOnlyStore(Protocol):
71
+ def __call__(self, *, store: LightningStore) -> Awaitable[None]: ...
72
+
73
+
74
+ class AlgorithmFuncAsyncOnlyDataset(Protocol):
75
+ def __call__(
76
+ self, *, train_dataset: Optional[Dataset[Any]], val_dataset: Optional[Dataset[Any]]
77
+ ) -> Awaitable[None]: ...
78
+
79
+
80
+ AlgorithmFuncAsync = Union[AlgorithmFuncAsyncOnlyStore, AlgorithmFuncAsyncOnlyDataset, AlgorithmFuncAsyncFull]
81
+
82
+ AlgorithmFuncSync = Union[AlgorithmFuncSyncOnlyStore, AlgorithmFuncSyncOnlyDataset, AlgorithmFuncSyncFull]
83
+
84
+
85
+ class AlgorithmFuncSyncFallback(Protocol):
86
+ def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
87
+
88
+
89
+ class AlgorithmFuncAsyncFallback(Protocol):
90
+ def __call__(self, *args: Any, **kwargs: Any) -> Awaitable[Any]: ...
91
+
92
+
93
+ AlgorithmFuncSyncLike = Union[AlgorithmFuncSync, AlgorithmFuncSyncFallback]
94
+ AlgorithmFuncAsyncLike = Union[AlgorithmFuncAsync, AlgorithmFuncAsyncFallback]
95
+
96
+ AlgorithmFunc = Union[AlgorithmFuncSyncLike, AlgorithmFuncAsyncLike]
97
+
98
+
99
+ AsyncFlag = Literal[True, False]
100
+ AF = TypeVar("AF", bound=AsyncFlag)
101
+
102
+
103
+ class FunctionalAlgorithm(Algorithm, Generic[AF]):
104
+ """An algorithm wrapper built from a callable implementation.
105
+
106
+ Functional algorithms let you provide an ordinary function instead of
107
+ subclassing [`Algorithm`][mantisdk.Algorithm]. The wrapper inspects
108
+ the callable signature to supply optional dependencies
109
+ such as the store, adapter, and LLM proxy.
110
+ """
111
+
112
+ @overload
113
+ def __init__(self: "FunctionalAlgorithm[Literal[False]]", algorithm_func: AlgorithmFuncSyncLike) -> None: ...
114
+
115
+ @overload
116
+ def __init__(self: "FunctionalAlgorithm[Literal[True]]", algorithm_func: AlgorithmFuncAsyncLike) -> None: ...
117
+
118
+ def __init__(self, algorithm_func: Union[AlgorithmFuncSyncLike, AlgorithmFuncAsyncLike]) -> None:
119
+ """Wrap a function that implements algorithm behaviour.
120
+
121
+ Args:
122
+ algorithm_func: Sync or async callable implementing the algorithm
123
+ contract. Arguments are detected automatically based on the
124
+ function signature.
125
+ """
126
+ super().__init__()
127
+ self._algorithm_func = algorithm_func
128
+ self._sig = inspect.signature(algorithm_func)
129
+ self._is_async = inspect.iscoroutinefunction(algorithm_func)
130
+
131
+ # Copy function metadata to preserve type hints and other attributes
132
+ functools.update_wrapper(self, algorithm_func) # type: ignore
133
+
134
+ def is_async(self) -> bool:
135
+ return self._is_async
136
+
137
+ @overload
138
+ def run(
139
+ self: "FunctionalAlgorithm[Literal[False]]",
140
+ train_dataset: Optional[Dataset[Any]] = None,
141
+ val_dataset: Optional[Dataset[Any]] = None,
142
+ ) -> None: ...
143
+
144
+ @overload
145
+ def run(
146
+ self: "FunctionalAlgorithm[Literal[True]]",
147
+ train_dataset: Optional[Dataset[Any]] = None,
148
+ val_dataset: Optional[Dataset[Any]] = None,
149
+ ) -> Awaitable[None]: ...
150
+
151
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
152
+ return self._algorithm_func(*args, **kwargs) # type: ignore
153
+
154
+ def run(
155
+ self,
156
+ train_dataset: Optional[Dataset[Any]] = None,
157
+ val_dataset: Optional[Dataset[Any]] = None,
158
+ ) -> Union[None, Awaitable[None]]:
159
+ """Execute the wrapped function with injected dependencies.
160
+
161
+ Args:
162
+ train_dataset: Optional training dataset passed through when the
163
+ callable declares a `train_dataset` parameter.
164
+ val_dataset: Optional validation dataset passed through when the
165
+ callable declares a `val_dataset` parameter.
166
+
167
+ Returns:
168
+ None for sync callables or an awaitable when the callable is async.
169
+
170
+ Raises:
171
+ TypeError: If a dataset is provided but the function signature does
172
+ not accept the corresponding argument.
173
+ """
174
+ kwargs: Dict[str, Any] = {}
175
+ if "store" in self._sig.parameters:
176
+ kwargs["store"] = self.get_store()
177
+ if "adapter" in self._sig.parameters:
178
+ kwargs["adapter"] = self.get_adapter()
179
+ if "llm_proxy" in self._sig.parameters:
180
+ kwargs["llm_proxy"] = self.get_llm_proxy()
181
+ if "initial_resources" in self._sig.parameters:
182
+ kwargs["initial_resources"] = self.get_initial_resources()
183
+ if "train_dataset" in self._sig.parameters:
184
+ kwargs["train_dataset"] = train_dataset
185
+ elif train_dataset is not None:
186
+ raise TypeError(
187
+ f"train_dataset is provided but not supported by the algorithm function: {self._algorithm_func}"
188
+ )
189
+ if "val_dataset" in self._sig.parameters:
190
+ kwargs["val_dataset"] = val_dataset
191
+ elif val_dataset is not None:
192
+ raise TypeError(
193
+ f"val_dataset is provided but not supported by the algorithm function: {self._algorithm_func}"
194
+ )
195
+ # both sync and async functions can be called with the same signature
196
+ result = self._algorithm_func(**kwargs) # type: ignore[misc]
197
+ if self._is_async:
198
+ return cast(Awaitable[None], result)
199
+ return None
200
+
201
+
202
+ @overload
203
+ def algo(func: AlgorithmFuncAsync) -> FunctionalAlgorithm[Literal[True]]: ...
204
+
205
+
206
+ @overload
207
+ def algo(func: AlgorithmFuncAsyncFallback) -> FunctionalAlgorithm[Any]: ...
208
+
209
+
210
+ @overload
211
+ def algo(func: AlgorithmFuncSync) -> FunctionalAlgorithm[Literal[False]]: ...
212
+
213
+
214
+ @overload
215
+ def algo(func: AlgorithmFuncSyncFallback) -> FunctionalAlgorithm[Any]: ...
216
+
217
+
218
+ def algo(
219
+ func: Union[
220
+ AlgorithmFuncSync,
221
+ AlgorithmFuncAsync,
222
+ AlgorithmFuncSyncFallback,
223
+ AlgorithmFuncAsyncFallback,
224
+ ],
225
+ ) -> Union[FunctionalAlgorithm[Literal[False]], FunctionalAlgorithm[Literal[True]]]:
226
+ """Convert a callable into a [`FunctionalAlgorithm`][mantisdk.algorithm.decorator.FunctionalAlgorithm].
227
+
228
+ The decorator inspects the callable signature to decide which dependencies
229
+ to inject at runtime, enabling concise algorithm definitions that still
230
+ leverage the full training runtime.
231
+
232
+ Args:
233
+ func: Function implementing the algorithm logic. May be synchronous or
234
+ asynchronous. The function can expect all of, or a subset of the following parameters:
235
+
236
+ - `store`: [`LightningStore`][mantisdk.store.base.LightningStore],
237
+ - `train_dataset`: [`Dataset`][mantisdk.Dataset],
238
+ - `val_dataset`: [`Dataset`][mantisdk.Dataset],
239
+ - `llm_proxy`: [`LLMProxy`][mantisdk.LLMProxy],
240
+ - `adapter`: [`TraceAdapter`][mantisdk.TraceAdapter],
241
+ - `initial_resources`: [`NamedResources`][mantisdk.NamedResources],
242
+
243
+ If the function does not expect a parameter, the wrapper will not inject it into the call.
244
+ Using `*args` and `**kwargs` will not work and no parameters will be injected.
245
+
246
+ Returns:
247
+ FunctionalAlgorithm that proxies the callable while exposing the
248
+ `Algorithm` interface.
249
+
250
+ Examples:
251
+ ```python
252
+ from mantisdk.algorithm.decorator import algo
253
+
254
+ @algo
255
+ def batching_algorithm(*, store, train_dataset, val_dataset):
256
+ for sample in train_dataset:
257
+ store.enqueue_rollout(input=sample, mode="train")
258
+
259
+ @algo
260
+ async def async_algorithm(*, store, train_dataset=None, val_dataset=None):
261
+ await store.enqueue_rollout(input={"prompt": "hello"}, mode="train")
262
+ ```
263
+ """
264
+ return FunctionalAlgorithm(func)