maos-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,207 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
@@ -0,0 +1 @@
1
+ maos-agent
File without changes
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: maos-agent
3
+ Version: 0.1.0
4
+ Summary: The Observability & Resilience SDK for Maos AI Agents
5
+ Project-URL: Homepage, https://github.com/maosproject-dev/maos-agent
6
+ Project-URL: Bug Tracker, https://github.com/maosproject-dev/maos-agent/issues
7
+ Author-email: Maos AI <support@maosproject.io>
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: System :: Monitoring
12
+ Requires-Python: >=3.9
13
+ Requires-Dist: prometheus-client>=0.17.0
14
+ Description-Content-Type: text/markdown
15
+
16
+ **CTO here.**
17
+
18
+ Here is the `README.md`.
19
+
20
+ It is written to be "Marketing-Engineering" aligned. It doesn't just say *how* to use it; it explains *why* a developer needs it (to stop their agents from dying silently on Spot instances).
21
+
22
+ I’ve added badges, a clear "Quick Start," and a section linking the metrics directly to the Grafana dashboard we just built.
23
+
24
+ ---
25
+
26
+ # Maos Agent SDK
27
+
28
+ The official Python SDK for building resilient, observable AI Agents on the **Maos Platform**.
29
+
30
+ **`maos-agent`** provides the "Day 2" primitives required to run autonomous agents in production:
31
+
32
+ 1. **Zero-Config Telemetry:** Automatically emits Prometheus metrics for every tool call, token used, and cognitive step.
33
+ 2. **Spot Instance Resilience:** Handles `SIGTERM` signals from Kubernetes to allow graceful state checkpointing before node termination.
34
+
35
+ ---
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install maos-agent
41
+
42
+ ```
43
+
44
+ ---
45
+
46
+ ## Quick Start
47
+
48
+ Wrap your existing agent code with the Maos decorators to instantly get Grafana dashboards and Spot interruption protection.
49
+
50
+ ```python
51
+ import time
52
+ import random
53
+ from maos_agent import MaosAgent, SpotInterruptionError
54
+
55
+ # 1. Initialize (Starts Prometheus server on port 8000)
56
+ agent = MaosAgent(service_name="financial-analyst", version="v1.2")
57
+
58
+ # 2. Define Tools (Auto-tracked for success/failure rates)
59
+ @agent.tool(name="stock_lookup")
60
+ def get_stock_price(ticker: str):
61
+ # Simulate work
62
+ if random.random() < 0.05:
63
+ raise ConnectionError("API Timeout") # Recorded as 'error' in Grafana
64
+ return 150.00
65
+
66
+ # 3. The Agent Loop
67
+ def run_job():
68
+ # Track duration, steps, and success automatically
69
+ with agent.task("analyze_portfolio") as task:
70
+ print("Starting analysis...")
71
+
72
+ for i in range(5):
73
+ # --- THE MAOS GUARANTEE ---
74
+ # Checks if K8s sent a termination signal (Spot reclaim).
75
+ # Raises SpotInterruptionError if node is draining.
76
+ agent.check_health()
77
+
78
+ # Record a "cognitive step" (thinking loop)
79
+ task.step()
80
+
81
+ price = get_stock_price("AAPL")
82
+ time.sleep(1)
83
+
84
+ if __name__ == "__main__":
85
+ try:
86
+ run_job()
87
+ except SpotInterruptionError:
88
+ print("🚨 SPOT RECLAIM DETECTED! SAVING STATE TO REDIS...")
89
+ # Checkpoint your agent's memory here so it can resume on a new node
90
+ exit(0)
91
+
92
+ ```
93
+
94
+ ---
95
+
96
+ ## Key Features
97
+
98
+ ### 1. Automatic Telemetry (The "Brain Scan")
99
+
100
+ Stop guessing if your agent is working. The SDK automatically exposes a `/metrics` endpoint on port `8000` (configurable) with standard Prometheus metrics:
101
+
102
+ | Metric Name | Type | Description |
103
+ | --- | --- | --- |
104
+ | `maos_agent_tool_calls_total` | Counter | Tracks tool usage + Success/Error rates. |
105
+ | `maos_agent_steps_per_goal` | Histogram | Detects "Loops of Death" (agents spinning in circles). |
106
+ | `maos_agent_token_usage_total` | Counter | Tracks cost (Input vs Output tokens). |
107
+ | `maos_agent_task_duration_seconds` | Histogram | End-to-end latency of jobs. |
108
+
109
+ *Compatible with the [Maos Agent Quality Dashboard](https://www.google.com/search?q=https://github.com/maos-ai/platform/tree/main/dashboards).*
110
+
111
+ ### 2. Graceful Shutdown (The "Money Saver")
112
+
113
+ Maos runs agents on Spot Instances to save you 90% on compute. However, Spot nodes can disappear with a 2-minute warning.
114
+
115
+ The `agent.check_health()` method abstracts the complexity of Kubernetes signal handling.
116
+
117
+ * **Normal operation:** Returns immediately.
118
+ * **During Drain:** Raises `SpotInterruptionError`.
119
+
120
+ **Best Practice:** Call `check_health()` inside your main `while` loop or before every LLM call.
121
+
122
+ ---
123
+
124
+ ## Configuration
125
+
126
+ You can configure the agent via environment variables or constructor arguments.
127
+
128
+ | Environment Variable | Default | Description |
129
+ | --- | --- | --- |
130
+ | `MAOS_SERVICE_NAME` | `unknown-agent` | The name of your agent (for filtering in Grafana). |
131
+ | `MAOS_METRICS_PORT` | `8000` | Port to expose Prometheus metrics. |
132
+ | `MAOS_LOG_LEVEL` | `INFO` | Logging verbosity. |
133
+
134
+ ---
135
+
136
+ ## Contributing
137
+
138
+ We welcome contributions! Please see [CONTRIBUTING.md](https://www.google.com/search?q=CONTRIBUTING.md) for details.
139
+
140
+ 1. Fork the repo.
141
+ 2. Create a feature branch (`git checkout -b feature/langchain-integration`).
142
+ 3. Commit your changes.
143
+ 4. Open a Pull Request.
144
+
145
+ ---
146
+
147
+ **Built by [Maos AI](https://www.google.com/search?q=https://maosproject.io) — The Control Plane for Autonomous Compute.**
@@ -0,0 +1,132 @@
1
+ **CTO here.**
2
+
3
+ Here is the `README.md`.
4
+
5
+ It is written to be "Marketing-Engineering" aligned. It doesn't just say *how* to use it; it explains *why* a developer needs it (to stop their agents from dying silently on Spot instances).
6
+
7
+ I’ve added badges, a clear "Quick Start," and a section linking the metrics directly to the Grafana dashboard we just built.
8
+
9
+ ---
10
+
11
+ # Maos Agent SDK
12
+
13
+ The official Python SDK for building resilient, observable AI Agents on the **Maos Platform**.
14
+
15
+ **`maos-agent`** provides the "Day 2" primitives required to run autonomous agents in production:
16
+
17
+ 1. **Zero-Config Telemetry:** Automatically emits Prometheus metrics for every tool call, token used, and cognitive step.
18
+ 2. **Spot Instance Resilience:** Handles `SIGTERM` signals from Kubernetes to allow graceful state checkpointing before node termination.
19
+
20
+ ---
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install maos-agent
26
+
27
+ ```
28
+
29
+ ---
30
+
31
+ ## Quick Start
32
+
33
+ Wrap your existing agent code with the Maos decorators to instantly get Grafana dashboards and Spot interruption protection.
34
+
35
+ ```python
36
+ import time
37
+ import random
38
+ from maos_agent import MaosAgent, SpotInterruptionError
39
+
40
+ # 1. Initialize (Starts Prometheus server on port 8000)
41
+ agent = MaosAgent(service_name="financial-analyst", version="v1.2")
42
+
43
+ # 2. Define Tools (Auto-tracked for success/failure rates)
44
+ @agent.tool(name="stock_lookup")
45
+ def get_stock_price(ticker: str):
46
+ # Simulate work
47
+ if random.random() < 0.05:
48
+ raise ConnectionError("API Timeout") # Recorded as 'error' in Grafana
49
+ return 150.00
50
+
51
+ # 3. The Agent Loop
52
+ def run_job():
53
+ # Track duration, steps, and success automatically
54
+ with agent.task("analyze_portfolio") as task:
55
+ print("Starting analysis...")
56
+
57
+ for i in range(5):
58
+ # --- THE MAOS GUARANTEE ---
59
+ # Checks if K8s sent a termination signal (Spot reclaim).
60
+ # Raises SpotInterruptionError if node is draining.
61
+ agent.check_health()
62
+
63
+ # Record a "cognitive step" (thinking loop)
64
+ task.step()
65
+
66
+ price = get_stock_price("AAPL")
67
+ time.sleep(1)
68
+
69
+ if __name__ == "__main__":
70
+ try:
71
+ run_job()
72
+ except SpotInterruptionError:
73
+ print("🚨 SPOT RECLAIM DETECTED! SAVING STATE TO REDIS...")
74
+ # Checkpoint your agent's memory here so it can resume on a new node
75
+ exit(0)
76
+
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Key Features
82
+
83
+ ### 1. Automatic Telemetry (The "Brain Scan")
84
+
85
+ Stop guessing if your agent is working. The SDK automatically exposes a `/metrics` endpoint on port `8000` (configurable) with standard Prometheus metrics:
86
+
87
+ | Metric Name | Type | Description |
88
+ | --- | --- | --- |
89
+ | `maos_agent_tool_calls_total` | Counter | Tracks tool usage + Success/Error rates. |
90
+ | `maos_agent_steps_per_goal` | Histogram | Detects "Loops of Death" (agents spinning in circles). |
91
+ | `maos_agent_token_usage_total` | Counter | Tracks cost (Input vs Output tokens). |
92
+ | `maos_agent_task_duration_seconds` | Histogram | End-to-end latency of jobs. |
93
+
94
+ *Compatible with the [Maos Agent Quality Dashboard](https://www.google.com/search?q=https://github.com/maos-ai/platform/tree/main/dashboards).*
95
+
96
+ ### 2. Graceful Shutdown (The "Money Saver")
97
+
98
+ Maos runs agents on Spot Instances to save you 90% on compute. However, Spot nodes can disappear with a 2-minute warning.
99
+
100
+ The `agent.check_health()` method abstracts the complexity of Kubernetes signal handling.
101
+
102
+ * **Normal operation:** Returns immediately.
103
+ * **During Drain:** Raises `SpotInterruptionError`.
104
+
105
+ **Best Practice:** Call `check_health()` inside your main `while` loop or before every LLM call.
106
+
107
+ ---
108
+
109
+ ## Configuration
110
+
111
+ You can configure the agent via environment variables or constructor arguments.
112
+
113
+ | Environment Variable | Default | Description |
114
+ | --- | --- | --- |
115
+ | `MAOS_SERVICE_NAME` | `unknown-agent` | The name of your agent (for filtering in Grafana). |
116
+ | `MAOS_METRICS_PORT` | `8000` | Port to expose Prometheus metrics. |
117
+ | `MAOS_LOG_LEVEL` | `INFO` | Logging verbosity. |
118
+
119
+ ---
120
+
121
+ ## Contributing
122
+
123
+ We welcome contributions! Please see [CONTRIBUTING.md](https://www.google.com/search?q=CONTRIBUTING.md) for details.
124
+
125
+ 1. Fork the repo.
126
+ 2. Create a feature branch (`git checkout -b feature/langchain-integration`).
127
+ 3. Commit your changes.
128
+ 4. Open a Pull Request.
129
+
130
+ ---
131
+
132
+ **Built by [Maos AI](https://www.google.com/search?q=https://maosproject.io) — The Control Plane for Autonomous Compute.**
@@ -0,0 +1,30 @@
1
+ from .metrics import MetricsManager
2
+ from .lifecycle import LifecycleManager
3
+ from .decorators import TaskContext, instrument_tool
4
+
5
+ class MaosAgent:
6
+ def __init__(self, service_name: str, version: str = "v1.0", metrics_port: int = 8000):
7
+ self.service_name = service_name
8
+ self.metrics = MetricsManager(service_name, version, metrics_port)
9
+ self.lifecycle = LifecycleManager()
10
+
11
+ def check_health(self):
12
+ """
13
+ Proxy to lifecycle check.
14
+ Raises SpotInterruptionError if the node is dying.
15
+ """
16
+ self.lifecycle.check_health()
17
+
18
+ def tool(self, name: str = None):
19
+ """
20
+ Decorator to track tool usage automatically.
21
+ Usage: @agent.tool(name="search")
22
+ """
23
+ return instrument_tool(self.metrics, name)
24
+
25
+ def task(self, name: str):
26
+ """
27
+ Context manager for the main job loop.
28
+ Usage: with agent.task("analyze") as task: ...
29
+ """
30
+ return TaskContext(self.metrics, name)
@@ -0,0 +1,77 @@
1
+ import functools
2
+ import time
3
+ import logging
4
+
5
+ # We import metrics inside the methods to avoid circular imports
6
+ # or we pass the metrics manager object into these classes.
7
+
8
+ class TaskContext:
9
+ """
10
+ Context manager for a unit of work (Task).
11
+ Tracks duration, step count, and final success/failure status.
12
+ """
13
+ def __init__(self, metrics, name: str):
14
+ self.metrics = metrics
15
+ self.name = name
16
+ self.start_time = time.time()
17
+ self.steps = 0
18
+ self.logger = logging.getLogger("maos.agent")
19
+
20
+ def __enter__(self):
21
+ self.logger.info(f"Starting task: {self.name}")
22
+ return self
23
+
24
+ def step(self):
25
+ """
26
+ Record a 'cognitive step' (e.g., one LLM thought loop).
27
+ """
28
+ self.steps += 1
29
+
30
+ def __exit__(self, exc_type, exc_val, exc_tb):
31
+ duration = time.time() - self.start_time
32
+ status = "error" if exc_type else "success"
33
+
34
+ # 1. Record Success/Fail Counter
35
+ self.metrics.record_task_success(self.name, status)
36
+
37
+ # 2. Record Duration
38
+ # We manually observe the histogram since we managed the time
39
+ from .metrics import TASK_DURATION
40
+ TASK_DURATION.labels(
41
+ task_type=self.name,
42
+ **self.metrics.labels
43
+ ).observe(duration)
44
+
45
+ # 3. Record Steps (The "Loop of Death" check)
46
+ from .metrics import STEPS_PER_GOAL
47
+ STEPS_PER_GOAL.labels(
48
+ task_type=self.name,
49
+ **self.metrics.labels
50
+ ).observe(self.steps)
51
+
52
+ if exc_type:
53
+ self.logger.error(f"Task '{self.name}' failed: {exc_val}")
54
+
55
+
56
+ def instrument_tool(metrics, name: str = None):
57
+ """
58
+ Factory that returns the actual decorator.
59
+ We pass 'metrics' (the manager instance) in so the decorator knows where to record.
60
+ """
61
+ def decorator(func):
62
+ # Use the provided name or default to the function name
63
+ tool_name = name or func.__name__
64
+
65
+ @functools.wraps(func)
66
+ def wrapper(*args, **kwargs):
67
+ try:
68
+ result = func(*args, **kwargs)
69
+ # Record Success
70
+ metrics.record_tool(tool_name, status="success")
71
+ return result
72
+ except Exception as e:
73
+ # Record Failure & Re-raise
74
+ metrics.record_tool(tool_name, status="error")
75
+ raise e
76
+ return wrapper
77
+ return decorator
@@ -0,0 +1,36 @@
1
+ import time
2
+ import random
3
+ from maos_agent import MaosAgent, SpotInterruptionError
4
+
5
+ # 1. Initialize
6
+ agent = MaosAgent(service_name="stock-analyst", version="v1.2")
7
+
8
+ # 2. Define a Tool (Auto-tracked)
9
+ @agent.tool(name="google_search")
10
+ def search(query):
11
+ if random.random() < 0.1:
12
+ raise Exception("Network Error") # Will show as red in Grafana
13
+ return "Result"
14
+
15
+ # 3. The Worker Loop
16
+ def process_job():
17
+ print("Starting job...")
18
+
19
+ # Track duration, steps, and success automatically
20
+ with agent.task("daily_report") as task:
21
+ for i in range(5):
22
+ # Check if Spot Instance is dying
23
+ agent.check_health()
24
+
25
+ # Simulate "Thinking"
26
+ task.step()
27
+ search("Apple Stock")
28
+ time.sleep(1)
29
+
30
+ if __name__ == "__main__":
31
+ try:
32
+ process_job()
33
+ except SpotInterruptionError:
34
+ print("🚨 SAVING STATE TO REDIS BEFORE DEATH...")
35
+ # Checkpoint logic here
36
+ exit(0)
@@ -0,0 +1,28 @@
1
+ import signal
2
+ import sys
3
+ import logging
4
+
5
+ class SpotInterruptionError(Exception):
6
+ """Raised when the environment signals a shutdown."""
7
+ pass
8
+
9
+ class LifecycleManager:
10
+ def __init__(self):
11
+ self.should_exit = False
12
+ self.logger = logging.getLogger("maos.lifecycle")
13
+
14
+ # Register the signal handlers
15
+ signal.signal(signal.SIGTERM, self._handle_sigterm)
16
+ signal.signal(signal.SIGINT, self._handle_sigterm) # Handle Ctrl+C locally too
17
+
18
+ def _handle_sigterm(self, signum, frame):
19
+ self.logger.warning("⚠️ SIGTERM received from Kubernetes! Node is draining.")
20
+ self.should_exit = True
21
+
22
+ def check_health(self):
23
+ """
24
+ Call this inside your agent loop.
25
+ If a kill signal was received, it raises an exception to break the loop safely.
26
+ """
27
+ if self.should_exit:
28
+ raise SpotInterruptionError("Spot Instance Reclaim Imminent")
@@ -0,0 +1,63 @@
1
+ from prometheus_client import Counter, Histogram, start_http_server
2
+ import time
3
+
4
+ # --- Metric Definitions (Must match Grafana Queries) ---
5
+
6
+ # Histogram: maos_agent_task_duration_seconds
7
+ TASK_DURATION = Histogram(
8
+ 'maos_agent_task_duration_seconds',
9
+ 'Time spent executing the agent task',
10
+ ['task_type', 'service_name', 'version']
11
+ )
12
+
13
+ # Counter: maos_agent_tool_calls_total
14
+ TOOL_CALLS = Counter(
15
+ 'maos_agent_tool_calls_total',
16
+ 'Total number of tool invocations',
17
+ ['tool_name', 'status', 'service_name', 'version']
18
+ )
19
+
20
+ # Counter: maos_agent_token_usage_total
21
+ TOKEN_USAGE = Counter(
22
+ 'maos_agent_token_usage_total',
23
+ 'Total LLM tokens consumed',
24
+ ['model', 'type', 'service_name', 'version']
25
+ )
26
+
27
+ # Histogram: maos_agent_steps_per_goal
28
+ STEPS_PER_GOAL = Histogram(
29
+ 'maos_agent_steps_per_goal',
30
+ 'Number of cognitive steps taken to solve a goal',
31
+ ['task_type', 'service_name', 'version'],
32
+ buckets=[1, 3, 5, 10, 20, 50] # Optimized for "Loop of Death" detection
33
+ )
34
+
35
+ # Counter: maos_agent_task_success_total
36
+ TASK_SUCCESS = Counter(
37
+ 'maos_agent_task_success_total',
38
+ 'Total task completions',
39
+ ['task_type', 'status', 'service_name', 'version']
40
+ )
41
+
42
+ class MetricsManager:
43
+ def __init__(self, service_name: str, version: str = "v1", port: int = 8000):
44
+ self.labels = {"service_name": service_name, "version": version}
45
+ # Start the Prometheus exporter server automatically
46
+ try:
47
+ start_http_server(port)
48
+ print(f"[Maos] Metrics server started on port {port}")
49
+ except Exception as e:
50
+ print(f"[Maos] Warning: Could not start metrics server: {e}")
51
+
52
+ def record_tool(self, tool_name: str, status: str = "success"):
53
+ TOOL_CALLS.labels(tool_name=tool_name, status=status, **self.labels).inc()
54
+
55
+ def record_tokens(self, count: int, model: str = "unknown", type: str = "total"):
56
+ TOKEN_USAGE.labels(model=model, type=type, **self.labels).inc(count)
57
+
58
+ def record_task_success(self, task_type: str, status: str):
59
+ TASK_SUCCESS.labels(task_type=task_type, status=status, **self.labels).inc()
60
+
61
+ def task_timer(self, task_type: str):
62
+ """Returns a context manager to time a task."""
63
+ return TASK_DURATION.labels(task_type=task_type, **self.labels).time()
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "maos-agent"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="Maos AI", email="support@maosproject.io" },
10
+ ]
11
+ description = "The Observability & Resilience SDK for Maos AI Agents"
12
+ readme = "README.md"
13
+ requires-python = ">=3.9"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: System :: Monitoring",
19
+ ]
20
+ dependencies = [
21
+ "prometheus-client>=0.17.0",
22
+ ]
23
+
24
+ [project.urls]
25
+ "Homepage" = "https://github.com/maosproject-dev/maos-agent"
26
+ "Bug Tracker" = "https://github.com/maosproject-dev/maos-agent/issues"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["maos_agent"]