maos-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maos_agent-0.1.0/.gitignore +207 -0
- maos_agent-0.1.0/.python-version +1 -0
- maos_agent-0.1.0/CONTRIBUTION.md +0 -0
- maos_agent-0.1.0/PKG-INFO +147 -0
- maos_agent-0.1.0/README.md +132 -0
- maos_agent-0.1.0/core.py +30 -0
- maos_agent-0.1.0/decorators.py +77 -0
- maos_agent-0.1.0/examples/simple-worker.py +36 -0
- maos_agent-0.1.0/lifecycle.py +28 -0
- maos_agent-0.1.0/metrics.py +63 -0
- maos_agent-0.1.0/pyproject.toml +29 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
maos-agent
|
|
File without changes
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: maos-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The Observability & Resilience SDK for Maos AI Agents
|
|
5
|
+
Project-URL: Homepage, https://github.com/maosproject-dev/maos-agent
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/maosproject-dev/maos-agent/issues
|
|
7
|
+
Author-email: Maos AI <support@maosproject.io>
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: System :: Monitoring
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Requires-Dist: prometheus-client>=0.17.0
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
**CTO here.**
|
|
17
|
+
|
|
18
|
+
Here is the `README.md`.
|
|
19
|
+
|
|
20
|
+
It is written to be "Marketing-Engineering" aligned. It doesn't just say *how* to use it; it explains *why* a developer needs it (to stop their agents from dying silently on Spot instances).
|
|
21
|
+
|
|
22
|
+
I’ve added badges, a clear "Quick Start," and a section linking the metrics directly to the Grafana dashboard we just built.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
# Maos Agent SDK
|
|
27
|
+
|
|
28
|
+
The official Python SDK for building resilient, observable AI Agents on the **Maos Platform**.
|
|
29
|
+
|
|
30
|
+
**`maos-agent`** provides the "Day 2" primitives required to run autonomous agents in production:
|
|
31
|
+
|
|
32
|
+
1. **Zero-Config Telemetry:** Automatically emits Prometheus metrics for every tool call, token used, and cognitive step.
|
|
33
|
+
2. **Spot Instance Resilience:** Handles `SIGTERM` signals from Kubernetes to allow graceful state checkpointing before node termination.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install maos-agent
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Quick Start
|
|
47
|
+
|
|
48
|
+
Wrap your existing agent code with the Maos decorators to instantly get Grafana dashboards and Spot interruption protection.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import time
|
|
52
|
+
import random
|
|
53
|
+
from maos_agent import MaosAgent, SpotInterruptionError
|
|
54
|
+
|
|
55
|
+
# 1. Initialize (Starts Prometheus server on port 8000)
|
|
56
|
+
agent = MaosAgent(service_name="financial-analyst", version="v1.2")
|
|
57
|
+
|
|
58
|
+
# 2. Define Tools (Auto-tracked for success/failure rates)
|
|
59
|
+
@agent.tool(name="stock_lookup")
|
|
60
|
+
def get_stock_price(ticker: str):
|
|
61
|
+
# Simulate work
|
|
62
|
+
if random.random() < 0.05:
|
|
63
|
+
raise ConnectionError("API Timeout") # Recorded as 'error' in Grafana
|
|
64
|
+
return 150.00
|
|
65
|
+
|
|
66
|
+
# 3. The Agent Loop
|
|
67
|
+
def run_job():
|
|
68
|
+
# Track duration, steps, and success automatically
|
|
69
|
+
with agent.task("analyze_portfolio") as task:
|
|
70
|
+
print("Starting analysis...")
|
|
71
|
+
|
|
72
|
+
for i in range(5):
|
|
73
|
+
# --- THE MAOS GUARANTEE ---
|
|
74
|
+
# Checks if K8s sent a termination signal (Spot reclaim).
|
|
75
|
+
# Raises SpotInterruptionError if node is draining.
|
|
76
|
+
agent.check_health()
|
|
77
|
+
|
|
78
|
+
# Record a "cognitive step" (thinking loop)
|
|
79
|
+
task.step()
|
|
80
|
+
|
|
81
|
+
price = get_stock_price("AAPL")
|
|
82
|
+
time.sleep(1)
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
try:
|
|
86
|
+
run_job()
|
|
87
|
+
except SpotInterruptionError:
|
|
88
|
+
print("🚨 SPOT RECLAIM DETECTED! SAVING STATE TO REDIS...")
|
|
89
|
+
# Checkpoint your agent's memory here so it can resume on a new node
|
|
90
|
+
exit(0)
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Key Features
|
|
97
|
+
|
|
98
|
+
### 1. Automatic Telemetry (The "Brain Scan")
|
|
99
|
+
|
|
100
|
+
Stop guessing if your agent is working. The SDK automatically exposes a `/metrics` endpoint on port `8000` (configurable) with standard Prometheus metrics:
|
|
101
|
+
|
|
102
|
+
| Metric Name | Type | Description |
|
|
103
|
+
| --- | --- | --- |
|
|
104
|
+
| `maos_agent_tool_calls_total` | Counter | Tracks tool usage + Success/Error rates. |
|
|
105
|
+
| `maos_agent_steps_per_goal` | Histogram | Detects "Loops of Death" (agents spinning in circles). |
|
|
106
|
+
| `maos_agent_token_usage_total` | Counter | Tracks cost (Input vs Output tokens). |
|
|
107
|
+
| `maos_agent_task_duration_seconds` | Histogram | End-to-end latency of jobs. |
|
|
108
|
+
|
|
109
|
+
*Compatible with the [Maos Agent Quality Dashboard](https://www.google.com/search?q=https://github.com/maos-ai/platform/tree/main/dashboards).*
|
|
110
|
+
|
|
111
|
+
### 2. Graceful Shutdown (The "Money Saver")
|
|
112
|
+
|
|
113
|
+
Maos runs agents on Spot Instances to save you 90% on compute. However, Spot nodes can disappear with a 2-minute warning.
|
|
114
|
+
|
|
115
|
+
The `agent.check_health()` method abstracts the complexity of Kubernetes signal handling.
|
|
116
|
+
|
|
117
|
+
* **Normal operation:** Returns immediately.
|
|
118
|
+
* **During Drain:** Raises `SpotInterruptionError`.
|
|
119
|
+
|
|
120
|
+
**Best Practice:** Call `check_health()` inside your main `while` loop or before every LLM call.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
You can configure the agent via environment variables or constructor arguments.
|
|
127
|
+
|
|
128
|
+
| Environment Variable | Default | Description |
|
|
129
|
+
| --- | --- | --- |
|
|
130
|
+
| `MAOS_SERVICE_NAME` | `unknown-agent` | The name of your agent (for filtering in Grafana). |
|
|
131
|
+
| `MAOS_METRICS_PORT` | `8000` | Port to expose Prometheus metrics. |
|
|
132
|
+
| `MAOS_LOG_LEVEL` | `INFO` | Logging verbosity. |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Contributing
|
|
137
|
+
|
|
138
|
+
We welcome contributions! Please see [CONTRIBUTING.md](https://www.google.com/search?q=CONTRIBUTING.md) for details.
|
|
139
|
+
|
|
140
|
+
1. Fork the repo.
|
|
141
|
+
2. Create a feature branch (`git checkout -b feature/langchain-integration`).
|
|
142
|
+
3. Commit your changes.
|
|
143
|
+
4. Open a Pull Request.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
**Built by [Maos AI](https://www.google.com/search?q=https://maosproject.io) — The Control Plane for Autonomous Compute.**
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
**CTO here.**
|
|
2
|
+
|
|
3
|
+
Here is the `README.md`.
|
|
4
|
+
|
|
5
|
+
It is written to be "Marketing-Engineering" aligned. It doesn't just say *how* to use it; it explains *why* a developer needs it (to stop their agents from dying silently on Spot instances).
|
|
6
|
+
|
|
7
|
+
I’ve added badges, a clear "Quick Start," and a section linking the metrics directly to the Grafana dashboard we just built.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Maos Agent SDK
|
|
12
|
+
|
|
13
|
+
The official Python SDK for building resilient, observable AI Agents on the **Maos Platform**.
|
|
14
|
+
|
|
15
|
+
**`maos-agent`** provides the "Day 2" primitives required to run autonomous agents in production:
|
|
16
|
+
|
|
17
|
+
1. **Zero-Config Telemetry:** Automatically emits Prometheus metrics for every tool call, token used, and cognitive step.
|
|
18
|
+
2. **Spot Instance Resilience:** Handles `SIGTERM` signals from Kubernetes to allow graceful state checkpointing before node termination.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install maos-agent
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
Wrap your existing agent code with the Maos decorators to instantly get Grafana dashboards and Spot interruption protection.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import time
|
|
37
|
+
import random
|
|
38
|
+
from maos_agent import MaosAgent, SpotInterruptionError
|
|
39
|
+
|
|
40
|
+
# 1. Initialize (Starts Prometheus server on port 8000)
|
|
41
|
+
agent = MaosAgent(service_name="financial-analyst", version="v1.2")
|
|
42
|
+
|
|
43
|
+
# 2. Define Tools (Auto-tracked for success/failure rates)
|
|
44
|
+
@agent.tool(name="stock_lookup")
|
|
45
|
+
def get_stock_price(ticker: str):
|
|
46
|
+
# Simulate work
|
|
47
|
+
if random.random() < 0.05:
|
|
48
|
+
raise ConnectionError("API Timeout") # Recorded as 'error' in Grafana
|
|
49
|
+
return 150.00
|
|
50
|
+
|
|
51
|
+
# 3. The Agent Loop
|
|
52
|
+
def run_job():
|
|
53
|
+
# Track duration, steps, and success automatically
|
|
54
|
+
with agent.task("analyze_portfolio") as task:
|
|
55
|
+
print("Starting analysis...")
|
|
56
|
+
|
|
57
|
+
for i in range(5):
|
|
58
|
+
# --- THE MAOS GUARANTEE ---
|
|
59
|
+
# Checks if K8s sent a termination signal (Spot reclaim).
|
|
60
|
+
# Raises SpotInterruptionError if node is draining.
|
|
61
|
+
agent.check_health()
|
|
62
|
+
|
|
63
|
+
# Record a "cognitive step" (thinking loop)
|
|
64
|
+
task.step()
|
|
65
|
+
|
|
66
|
+
price = get_stock_price("AAPL")
|
|
67
|
+
time.sleep(1)
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
try:
|
|
71
|
+
run_job()
|
|
72
|
+
except SpotInterruptionError:
|
|
73
|
+
print("🚨 SPOT RECLAIM DETECTED! SAVING STATE TO REDIS...")
|
|
74
|
+
# Checkpoint your agent's memory here so it can resume on a new node
|
|
75
|
+
exit(0)
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Key Features
|
|
82
|
+
|
|
83
|
+
### 1. Automatic Telemetry (The "Brain Scan")
|
|
84
|
+
|
|
85
|
+
Stop guessing if your agent is working. The SDK automatically exposes a `/metrics` endpoint on port `8000` (configurable) with standard Prometheus metrics:
|
|
86
|
+
|
|
87
|
+
| Metric Name | Type | Description |
|
|
88
|
+
| --- | --- | --- |
|
|
89
|
+
| `maos_agent_tool_calls_total` | Counter | Tracks tool usage + Success/Error rates. |
|
|
90
|
+
| `maos_agent_steps_per_goal` | Histogram | Detects "Loops of Death" (agents spinning in circles). |
|
|
91
|
+
| `maos_agent_token_usage_total` | Counter | Tracks cost (Input vs Output tokens). |
|
|
92
|
+
| `maos_agent_task_duration_seconds` | Histogram | End-to-end latency of jobs. |
|
|
93
|
+
|
|
94
|
+
*Compatible with the [Maos Agent Quality Dashboard](https://www.google.com/search?q=https://github.com/maos-ai/platform/tree/main/dashboards).*
|
|
95
|
+
|
|
96
|
+
### 2. Graceful Shutdown (The "Money Saver")
|
|
97
|
+
|
|
98
|
+
Maos runs agents on Spot Instances to save you 90% on compute. However, Spot nodes can disappear with a 2-minute warning.
|
|
99
|
+
|
|
100
|
+
The `agent.check_health()` method abstracts the complexity of Kubernetes signal handling.
|
|
101
|
+
|
|
102
|
+
* **Normal operation:** Returns immediately.
|
|
103
|
+
* **During Drain:** Raises `SpotInterruptionError`.
|
|
104
|
+
|
|
105
|
+
**Best Practice:** Call `check_health()` inside your main `while` loop or before every LLM call.
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Configuration
|
|
110
|
+
|
|
111
|
+
You can configure the agent via environment variables or constructor arguments.
|
|
112
|
+
|
|
113
|
+
| Environment Variable | Default | Description |
|
|
114
|
+
| --- | --- | --- |
|
|
115
|
+
| `MAOS_SERVICE_NAME` | `unknown-agent` | The name of your agent (for filtering in Grafana). |
|
|
116
|
+
| `MAOS_METRICS_PORT` | `8000` | Port to expose Prometheus metrics. |
|
|
117
|
+
| `MAOS_LOG_LEVEL` | `INFO` | Logging verbosity. |
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Contributing
|
|
122
|
+
|
|
123
|
+
We welcome contributions! Please see [CONTRIBUTING.md](https://www.google.com/search?q=CONTRIBUTING.md) for details.
|
|
124
|
+
|
|
125
|
+
1. Fork the repo.
|
|
126
|
+
2. Create a feature branch (`git checkout -b feature/langchain-integration`).
|
|
127
|
+
3. Commit your changes.
|
|
128
|
+
4. Open a Pull Request.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
**Built by [Maos AI](https://www.google.com/search?q=https://maosproject.io) — The Control Plane for Autonomous Compute.**
|
maos_agent-0.1.0/core.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .metrics import MetricsManager
|
|
2
|
+
from .lifecycle import LifecycleManager
|
|
3
|
+
from .decorators import TaskContext, instrument_tool
|
|
4
|
+
|
|
5
|
+
class MaosAgent:
|
|
6
|
+
def __init__(self, service_name: str, version: str = "v1.0", metrics_port: int = 8000):
|
|
7
|
+
self.service_name = service_name
|
|
8
|
+
self.metrics = MetricsManager(service_name, version, metrics_port)
|
|
9
|
+
self.lifecycle = LifecycleManager()
|
|
10
|
+
|
|
11
|
+
def check_health(self):
|
|
12
|
+
"""
|
|
13
|
+
Proxy to lifecycle check.
|
|
14
|
+
Raises SpotInterruptionError if the node is dying.
|
|
15
|
+
"""
|
|
16
|
+
self.lifecycle.check_health()
|
|
17
|
+
|
|
18
|
+
def tool(self, name: str = None):
|
|
19
|
+
"""
|
|
20
|
+
Decorator to track tool usage automatically.
|
|
21
|
+
Usage: @agent.tool(name="search")
|
|
22
|
+
"""
|
|
23
|
+
return instrument_tool(self.metrics, name)
|
|
24
|
+
|
|
25
|
+
def task(self, name: str):
|
|
26
|
+
"""
|
|
27
|
+
Context manager for the main job loop.
|
|
28
|
+
Usage: with agent.task("analyze") as task: ...
|
|
29
|
+
"""
|
|
30
|
+
return TaskContext(self.metrics, name)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import time
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
# We import metrics inside the methods to avoid circular imports
|
|
6
|
+
# or we pass the metrics manager object into these classes.
|
|
7
|
+
|
|
8
|
+
class TaskContext:
|
|
9
|
+
"""
|
|
10
|
+
Context manager for a unit of work (Task).
|
|
11
|
+
Tracks duration, step count, and final success/failure status.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, metrics, name: str):
|
|
14
|
+
self.metrics = metrics
|
|
15
|
+
self.name = name
|
|
16
|
+
self.start_time = time.time()
|
|
17
|
+
self.steps = 0
|
|
18
|
+
self.logger = logging.getLogger("maos.agent")
|
|
19
|
+
|
|
20
|
+
def __enter__(self):
|
|
21
|
+
self.logger.info(f"Starting task: {self.name}")
|
|
22
|
+
return self
|
|
23
|
+
|
|
24
|
+
def step(self):
|
|
25
|
+
"""
|
|
26
|
+
Record a 'cognitive step' (e.g., one LLM thought loop).
|
|
27
|
+
"""
|
|
28
|
+
self.steps += 1
|
|
29
|
+
|
|
30
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
31
|
+
duration = time.time() - self.start_time
|
|
32
|
+
status = "error" if exc_type else "success"
|
|
33
|
+
|
|
34
|
+
# 1. Record Success/Fail Counter
|
|
35
|
+
self.metrics.record_task_success(self.name, status)
|
|
36
|
+
|
|
37
|
+
# 2. Record Duration
|
|
38
|
+
# We manually observe the histogram since we managed the time
|
|
39
|
+
from .metrics import TASK_DURATION
|
|
40
|
+
TASK_DURATION.labels(
|
|
41
|
+
task_type=self.name,
|
|
42
|
+
**self.metrics.labels
|
|
43
|
+
).observe(duration)
|
|
44
|
+
|
|
45
|
+
# 3. Record Steps (The "Loop of Death" check)
|
|
46
|
+
from .metrics import STEPS_PER_GOAL
|
|
47
|
+
STEPS_PER_GOAL.labels(
|
|
48
|
+
task_type=self.name,
|
|
49
|
+
**self.metrics.labels
|
|
50
|
+
).observe(self.steps)
|
|
51
|
+
|
|
52
|
+
if exc_type:
|
|
53
|
+
self.logger.error(f"Task '{self.name}' failed: {exc_val}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def instrument_tool(metrics, name: str = None):
|
|
57
|
+
"""
|
|
58
|
+
Factory that returns the actual decorator.
|
|
59
|
+
We pass 'metrics' (the manager instance) in so the decorator knows where to record.
|
|
60
|
+
"""
|
|
61
|
+
def decorator(func):
|
|
62
|
+
# Use the provided name or default to the function name
|
|
63
|
+
tool_name = name or func.__name__
|
|
64
|
+
|
|
65
|
+
@functools.wraps(func)
|
|
66
|
+
def wrapper(*args, **kwargs):
|
|
67
|
+
try:
|
|
68
|
+
result = func(*args, **kwargs)
|
|
69
|
+
# Record Success
|
|
70
|
+
metrics.record_tool(tool_name, status="success")
|
|
71
|
+
return result
|
|
72
|
+
except Exception as e:
|
|
73
|
+
# Record Failure & Re-raise
|
|
74
|
+
metrics.record_tool(tool_name, status="error")
|
|
75
|
+
raise e
|
|
76
|
+
return wrapper
|
|
77
|
+
return decorator
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import random
|
|
3
|
+
from maos_agent import MaosAgent, SpotInterruptionError
|
|
4
|
+
|
|
5
|
+
# 1. Initialize
|
|
6
|
+
agent = MaosAgent(service_name="stock-analyst", version="v1.2")
|
|
7
|
+
|
|
8
|
+
# 2. Define a Tool (Auto-tracked)
|
|
9
|
+
@agent.tool(name="google_search")
|
|
10
|
+
def search(query):
|
|
11
|
+
if random.random() < 0.1:
|
|
12
|
+
raise Exception("Network Error") # Will show as red in Grafana
|
|
13
|
+
return "Result"
|
|
14
|
+
|
|
15
|
+
# 3. The Worker Loop
|
|
16
|
+
def process_job():
|
|
17
|
+
print("Starting job...")
|
|
18
|
+
|
|
19
|
+
# Track duration, steps, and success automatically
|
|
20
|
+
with agent.task("daily_report") as task:
|
|
21
|
+
for i in range(5):
|
|
22
|
+
# Check if Spot Instance is dying
|
|
23
|
+
agent.check_health()
|
|
24
|
+
|
|
25
|
+
# Simulate "Thinking"
|
|
26
|
+
task.step()
|
|
27
|
+
search("Apple Stock")
|
|
28
|
+
time.sleep(1)
|
|
29
|
+
|
|
30
|
+
if __name__ == "__main__":
|
|
31
|
+
try:
|
|
32
|
+
process_job()
|
|
33
|
+
except SpotInterruptionError:
|
|
34
|
+
print("🚨 SAVING STATE TO REDIS BEFORE DEATH...")
|
|
35
|
+
# Checkpoint logic here
|
|
36
|
+
exit(0)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import signal
|
|
2
|
+
import sys
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
class SpotInterruptionError(Exception):
|
|
6
|
+
"""Raised when the environment signals a shutdown."""
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
class LifecycleManager:
|
|
10
|
+
def __init__(self):
|
|
11
|
+
self.should_exit = False
|
|
12
|
+
self.logger = logging.getLogger("maos.lifecycle")
|
|
13
|
+
|
|
14
|
+
# Register the signal handlers
|
|
15
|
+
signal.signal(signal.SIGTERM, self._handle_sigterm)
|
|
16
|
+
signal.signal(signal.SIGINT, self._handle_sigterm) # Handle Ctrl+C locally too
|
|
17
|
+
|
|
18
|
+
def _handle_sigterm(self, signum, frame):
|
|
19
|
+
self.logger.warning("⚠️ SIGTERM received from Kubernetes! Node is draining.")
|
|
20
|
+
self.should_exit = True
|
|
21
|
+
|
|
22
|
+
def check_health(self):
|
|
23
|
+
"""
|
|
24
|
+
Call this inside your agent loop.
|
|
25
|
+
If a kill signal was received, it raises an exception to break the loop safely.
|
|
26
|
+
"""
|
|
27
|
+
if self.should_exit:
|
|
28
|
+
raise SpotInterruptionError("Spot Instance Reclaim Imminent")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from prometheus_client import Counter, Histogram, start_http_server
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
# --- Metric Definitions (Must match Grafana Queries) ---
|
|
5
|
+
|
|
6
|
+
# Histogram: maos_agent_task_duration_seconds
|
|
7
|
+
TASK_DURATION = Histogram(
|
|
8
|
+
'maos_agent_task_duration_seconds',
|
|
9
|
+
'Time spent executing the agent task',
|
|
10
|
+
['task_type', 'service_name', 'version']
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# Counter: maos_agent_tool_calls_total
|
|
14
|
+
TOOL_CALLS = Counter(
|
|
15
|
+
'maos_agent_tool_calls_total',
|
|
16
|
+
'Total number of tool invocations',
|
|
17
|
+
['tool_name', 'status', 'service_name', 'version']
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Counter: maos_agent_token_usage_total
|
|
21
|
+
TOKEN_USAGE = Counter(
|
|
22
|
+
'maos_agent_token_usage_total',
|
|
23
|
+
'Total LLM tokens consumed',
|
|
24
|
+
['model', 'type', 'service_name', 'version']
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Histogram: maos_agent_steps_per_goal
|
|
28
|
+
STEPS_PER_GOAL = Histogram(
|
|
29
|
+
'maos_agent_steps_per_goal',
|
|
30
|
+
'Number of cognitive steps taken to solve a goal',
|
|
31
|
+
['task_type', 'service_name', 'version'],
|
|
32
|
+
buckets=[1, 3, 5, 10, 20, 50] # Optimized for "Loop of Death" detection
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Counter: maos_agent_task_success_total
|
|
36
|
+
TASK_SUCCESS = Counter(
|
|
37
|
+
'maos_agent_task_success_total',
|
|
38
|
+
'Total task completions',
|
|
39
|
+
['task_type', 'status', 'service_name', 'version']
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
class MetricsManager:
|
|
43
|
+
def __init__(self, service_name: str, version: str = "v1", port: int = 8000):
|
|
44
|
+
self.labels = {"service_name": service_name, "version": version}
|
|
45
|
+
# Start the Prometheus exporter server automatically
|
|
46
|
+
try:
|
|
47
|
+
start_http_server(port)
|
|
48
|
+
print(f"[Maos] Metrics server started on port {port}")
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"[Maos] Warning: Could not start metrics server: {e}")
|
|
51
|
+
|
|
52
|
+
def record_tool(self, tool_name: str, status: str = "success"):
|
|
53
|
+
TOOL_CALLS.labels(tool_name=tool_name, status=status, **self.labels).inc()
|
|
54
|
+
|
|
55
|
+
def record_tokens(self, count: int, model: str = "unknown", type: str = "total"):
|
|
56
|
+
TOKEN_USAGE.labels(model=model, type=type, **self.labels).inc(count)
|
|
57
|
+
|
|
58
|
+
def record_task_success(self, task_type: str, status: str):
|
|
59
|
+
TASK_SUCCESS.labels(task_type=task_type, status=status, **self.labels).inc()
|
|
60
|
+
|
|
61
|
+
def task_timer(self, task_type: str):
|
|
62
|
+
"""Returns a context manager to time a task."""
|
|
63
|
+
return TASK_DURATION.labels(task_type=task_type, **self.labels).time()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "maos-agent"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Maos AI", email="support@maosproject.io" },
|
|
10
|
+
]
|
|
11
|
+
description = "The Observability & Resilience SDK for Maos AI Agents"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.9"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Topic :: System :: Monitoring",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"prometheus-client>=0.17.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
"Homepage" = "https://github.com/maosproject-dev/maos-agent"
|
|
26
|
+
"Bug Tracker" = "https://github.com/maosproject-dev/maos-agent/issues"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["maos_agent"]
|