cortexops 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ .venv/
6
+ venv/
7
+ .env
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ PKG-INFO
12
+ *.whl
13
+ *.tar.gz
14
+
15
+ # Test / lint caches
16
+ .pytest_cache/
17
+ .ruff_cache/
18
+ .mypy_cache/
19
+
20
+ # Package managers
21
+ uv.lock
22
+ .python-version
23
+
24
+ # Database
25
+ *.db
26
+ *.sqlite
27
+
28
+ # IDE
29
+ .vscode/
30
+ .idea/
31
+
32
+ # OS
33
+ .DS_Store
34
+ Thumbs.db
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 CortexOps Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: cortexops
3
+ Version: 0.1.0
4
+ Summary: Reliability infrastructure for AI agents — evaluation, observability, and regression testing
5
+ Project-URL: Homepage, https://cortexops.ai
6
+ Project-URL: Repository, https://github.com/ashishodu2023/cortexops
7
+ Project-URL: Documentation, https://docs.cortexops.ai
8
+ Project-URL: Bug Tracker, https://github.com/ashishodu2023/cortexops/issues
9
+ Project-URL: Changelog, https://github.com/ashishodu2023/cortexops/releases
10
+ Author-email: Ashish <ashishodu2023@gmail.com>
11
+ License: MIT License
12
+
13
+ Copyright (c) 2025 CortexOps Contributors
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ of this software and associated documentation files (the "Software"), to deal
17
+ in the Software without restriction, including without limitation the rights
18
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19
+ copies of the Software, and to permit persons to whom the Software is
20
+ furnished to do so, subject to the following conditions:
21
+
22
+ The above copyright notice and this permission notice shall be included in all
23
+ copies or substantial portions of the Software.
24
+
25
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
+ SOFTWARE.
32
+ License-File: LICENSE
33
+ Keywords: agents,ai,autogen,crewai,evaluation,langgraph,llm,observability,testing
34
+ Classifier: Development Status :: 3 - Alpha
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: OS Independent
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.10
40
+ Classifier: Programming Language :: Python :: 3.11
41
+ Classifier: Programming Language :: Python :: 3.12
42
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
43
+ Classifier: Topic :: Software Development :: Quality Assurance
44
+ Classifier: Topic :: Software Development :: Testing
45
+ Classifier: Typing :: Typed
46
+ Requires-Python: >=3.10
47
+ Requires-Dist: pydantic>=2.0
48
+ Requires-Dist: pyyaml>=6.0
49
+ Requires-Dist: setuptools>=82.0.1
50
+ Provides-Extra: all
51
+ Requires-Dist: httpx>=0.27; extra == 'all'
52
+ Provides-Extra: dev
53
+ Requires-Dist: httpx>=0.27; extra == 'dev'
54
+ Requires-Dist: mypy>=1.10; extra == 'dev'
55
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
56
+ Requires-Dist: pytest>=8.0; extra == 'dev'
57
+ Requires-Dist: ruff>=0.4; extra == 'dev'
58
+ Provides-Extra: http
59
+ Requires-Dist: httpx>=0.27; extra == 'http'
60
+ Provides-Extra: llm
61
+ Requires-Dist: httpx>=0.27; extra == 'llm'
62
+ Description-Content-Type: text/markdown
63
+
64
+ # CortexOps
65
+
66
+ **Reliability infrastructure for AI agents.**
67
+ Evaluate · Observe · Operate — for LangGraph, CrewAI, and AutoGen.
68
+
69
+ [![PyPI version](https://img.shields.io/pypi/v/cortexops.svg)](https://pypi.org/project/cortexops/)
70
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
71
+ [![CI](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml/badge.svg)](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml)
72
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/ashishodu2023/cortexops/blob/main/LICENSE)
73
+
74
+ ---
75
+
76
+ ## The problem
77
+
78
+ You deployed an agent. You have no idea if it regressed overnight.
79
+
80
+ No standard eval format. No failure traces. No CI gate before the next prompt change ships.
81
+ CortexOps fixes that.
82
+
83
+ ---
84
+
85
+ ## Install
86
+
87
+ ```bash
88
+ pip install cortexops
89
+
90
+ # With HTTP client (for pushing traces to hosted API):
91
+ pip install cortexops[http]
92
+
93
+ # With LLM judge support:
94
+ pip install cortexops[llm]
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Quickstart
100
+
101
+ ```python
102
+ from cortexops import CortexTracer, EvalSuite
103
+
104
+ # Wrap your LangGraph app — zero refactor required
105
+ tracer = CortexTracer(project="payments-agent")
106
+ graph = tracer.wrap(your_langgraph_app)
107
+
108
+ # Run evaluations against a golden dataset
109
+ results = EvalSuite.run(
110
+ dataset="golden_v1.yaml",
111
+ agent=graph,
112
+ )
113
+ print(results.summary())
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Golden dataset (YAML)
119
+
120
+ ```yaml
121
+ version: 1
122
+ project: payments-agent
123
+
124
+ cases:
125
+ - id: refund_lookup_01
126
+ input: "What is the status of refund REF-8821?"
127
+ expected_tool_calls: [lookup_refund]
128
+ expected_output_contains: ["approved", "REF-8821"]
129
+ max_latency_ms: 3000
130
+
131
+ - id: open_ended_explanation_01
132
+ input: "Why was my refund rejected?"
133
+ judge: llm
134
+ judge_criteria: >
135
+ The response must explain the rejection reason clearly,
136
+ be empathetic, and offer a concrete next step. No jargon.
137
+ ```
138
+
139
+ ---
140
+
141
+ ## CI gate
142
+
143
+ ```bash
144
+ cortexops eval run \
145
+ --dataset golden_v1.yaml \
146
+ --fail-on "task_completion < 0.90"
147
+ ```
148
+
149
+ Exits non-zero if the threshold is not met — blocks the PR.
150
+
151
+ ---
152
+
153
+ ## Built-in metrics
154
+
155
+ | Metric | What it checks |
156
+ |---|---|
157
+ | `task_completion` | Non-empty, non-error output with expected content |
158
+ | `tool_accuracy` | Expected tool calls were actually made |
159
+ | `latency` | Response within `max_latency_ms` budget |
160
+ | `hallucination` | Fabrication signals in output |
161
+ | `llm_judge` | GPT-4o scores against natural-language criteria |
162
+
163
+ ---
164
+
165
+ ## Links
166
+
167
+ - **Docs**: [docs.cortexops.ai](https://docs.cortexops.ai)
168
+ - **Repo**: [github.com/ashishodu2023/cortexops](https://github.com/ashishodu2023/cortexops)
169
+ - **Issues**: [GitHub Issues](https://github.com/ashishodu2023/cortexops/issues)
@@ -0,0 +1,106 @@
1
+ # CortexOps
2
+
3
+ **Reliability infrastructure for AI agents.**
4
+ Evaluate · Observe · Operate — for LangGraph, CrewAI, and AutoGen.
5
+
6
+ [![PyPI version](https://img.shields.io/pypi/v/cortexops.svg)](https://pypi.org/project/cortexops/)
7
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
8
+ [![CI](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml/badge.svg)](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml)
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/ashishodu2023/cortexops/blob/main/LICENSE)
10
+
11
+ ---
12
+
13
+ ## The problem
14
+
15
+ You deployed an agent. You have no idea if it regressed overnight.
16
+
17
+ No standard eval format. No failure traces. No CI gate before the next prompt change ships.
18
+ CortexOps fixes that.
19
+
20
+ ---
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install cortexops
26
+
27
+ # With HTTP client (for pushing traces to hosted API):
28
+ pip install cortexops[http]
29
+
30
+ # With LLM judge support:
31
+ pip install cortexops[llm]
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Quickstart
37
+
38
+ ```python
39
+ from cortexops import CortexTracer, EvalSuite
40
+
41
+ # Wrap your LangGraph app — zero refactor required
42
+ tracer = CortexTracer(project="payments-agent")
43
+ graph = tracer.wrap(your_langgraph_app)
44
+
45
+ # Run evaluations against a golden dataset
46
+ results = EvalSuite.run(
47
+ dataset="golden_v1.yaml",
48
+ agent=graph,
49
+ )
50
+ print(results.summary())
51
+ ```
52
+
53
+ ---
54
+
55
+ ## Golden dataset (YAML)
56
+
57
+ ```yaml
58
+ version: 1
59
+ project: payments-agent
60
+
61
+ cases:
62
+ - id: refund_lookup_01
63
+ input: "What is the status of refund REF-8821?"
64
+ expected_tool_calls: [lookup_refund]
65
+ expected_output_contains: ["approved", "REF-8821"]
66
+ max_latency_ms: 3000
67
+
68
+ - id: open_ended_explanation_01
69
+ input: "Why was my refund rejected?"
70
+ judge: llm
71
+ judge_criteria: >
72
+ The response must explain the rejection reason clearly,
73
+ be empathetic, and offer a concrete next step. No jargon.
74
+ ```
75
+
76
+ ---
77
+
78
+ ## CI gate
79
+
80
+ ```bash
81
+ cortexops eval run \
82
+ --dataset golden_v1.yaml \
83
+ --fail-on "task_completion < 0.90"
84
+ ```
85
+
86
+ Exits non-zero if the threshold is not met — blocks the PR.
87
+
88
+ ---
89
+
90
+ ## Built-in metrics
91
+
92
+ | Metric | What it checks |
93
+ |---|---|
94
+ | `task_completion` | Non-empty, non-error output with expected content |
95
+ | `tool_accuracy` | Expected tool calls were actually made |
96
+ | `latency` | Response within `max_latency_ms` budget |
97
+ | `hallucination` | Fabrication signals in output |
98
+ | `llm_judge` | GPT-4o scores against natural-language criteria |
99
+
100
+ ---
101
+
102
+ ## Links
103
+
104
+ - **Docs**: [docs.cortexops.ai](https://docs.cortexops.ai)
105
+ - **Repo**: [github.com/ashishodu2023/cortexops](https://github.com/ashishodu2023/cortexops)
106
+ - **Issues**: [GitHub Issues](https://github.com/ashishodu2023/cortexops/issues)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 CortexOps Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,106 @@
1
+ # CortexOps
2
+
3
+ **Reliability infrastructure for AI agents.**
4
+ Evaluate · Observe · Operate — for LangGraph, CrewAI, and AutoGen.
5
+
6
+ [![PyPI version](https://img.shields.io/pypi/v/cortexops.svg)](https://pypi.org/project/cortexops/)
7
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
8
+ [![CI](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml/badge.svg)](https://github.com/ashishodu2023/cortexops/actions/workflows/eval.yml)
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/ashishodu2023/cortexops/blob/main/LICENSE)
10
+
11
+ ---
12
+
13
+ ## The problem
14
+
15
+ You deployed an agent. You have no idea if it regressed overnight.
16
+
17
+ No standard eval format. No failure traces. No CI gate before the next prompt change ships.
18
+ CortexOps fixes that.
19
+
20
+ ---
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install cortexops
26
+
27
+ # With HTTP client (for pushing traces to hosted API):
28
+ pip install cortexops[http]
29
+
30
+ # With LLM judge support:
31
+ pip install cortexops[llm]
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Quickstart
37
+
38
+ ```python
39
+ from cortexops import CortexTracer, EvalSuite
40
+
41
+ # Wrap your LangGraph app — zero refactor required
42
+ tracer = CortexTracer(project="payments-agent")
43
+ graph = tracer.wrap(your_langgraph_app)
44
+
45
+ # Run evaluations against a golden dataset
46
+ results = EvalSuite.run(
47
+ dataset="golden_v1.yaml",
48
+ agent=graph,
49
+ )
50
+ print(results.summary())
51
+ ```
52
+
53
+ ---
54
+
55
+ ## Golden dataset (YAML)
56
+
57
+ ```yaml
58
+ version: 1
59
+ project: payments-agent
60
+
61
+ cases:
62
+ - id: refund_lookup_01
63
+ input: "What is the status of refund REF-8821?"
64
+ expected_tool_calls: [lookup_refund]
65
+ expected_output_contains: ["approved", "REF-8821"]
66
+ max_latency_ms: 3000
67
+
68
+ - id: open_ended_explanation_01
69
+ input: "Why was my refund rejected?"
70
+ judge: llm
71
+ judge_criteria: >
72
+ The response must explain the rejection reason clearly,
73
+ be empathetic, and offer a concrete next step. No jargon.
74
+ ```
75
+
76
+ ---
77
+
78
+ ## CI gate
79
+
80
+ ```bash
81
+ cortexops eval run \
82
+ --dataset golden_v1.yaml \
83
+ --fail-on "task_completion < 0.90"
84
+ ```
85
+
86
+ Exits non-zero if the threshold is not met — blocks the PR.
87
+
88
+ ---
89
+
90
+ ## Built-in metrics
91
+
92
+ | Metric | What it checks |
93
+ |---|---|
94
+ | `task_completion` | Non-empty, non-error output with expected content |
95
+ | `tool_accuracy` | Expected tool calls were actually made |
96
+ | `latency` | Response within `max_latency_ms` budget |
97
+ | `hallucination` | Fabrication signals in output |
98
+ | `llm_judge` | GPT-4o scores against natural-language criteria |
99
+
100
+ ---
101
+
102
+ ## Links
103
+
104
+ - **Docs**: [docs.cortexops.ai](https://docs.cortexops.ai)
105
+ - **Repo**: [github.com/ashishodu2023/cortexops](https://github.com/ashishodu2023/cortexops)
106
+ - **Issues**: [GitHub Issues](https://github.com/ashishodu2023/cortexops/issues)
@@ -0,0 +1,58 @@
1
+ """CortexOps — Reliability infrastructure for AI agents.
2
+
3
+ Quickstart:
4
+ from cortexops import CortexTracer, EvalSuite
5
+
6
+ tracer = CortexTracer(project="my-agent")
7
+ graph = tracer.wrap(your_langgraph_app)
8
+
9
+ results = EvalSuite.run(dataset="golden_v1.yaml", agent=graph)
10
+ print(results.summary())
11
+ """
12
+
13
+ from .client import CortexClient
14
+ from .eval import EvalSuite, EvalThresholdError
15
+ from .judge import LLMJudgeMetric
16
+ from .metrics import (
17
+ HallucinationMetric,
18
+ LatencyMetric,
19
+ Metric,
20
+ TaskCompletionMetric,
21
+ ToolAccuracyMetric,
22
+ )
23
+ from .models import (
24
+ CaseResult,
25
+ EvalCase,
26
+ EvalDataset,
27
+ EvalSummary,
28
+ FailureKind,
29
+ RunStatus,
30
+ Trace,
31
+ TraceNode,
32
+ ToolCall,
33
+ )
34
+ from .tracer import CortexTracer
35
+
36
+ __version__ = "0.1.0"
37
+
38
+ __all__ = [
39
+ "CortexTracer",
40
+ "EvalSuite",
41
+ "EvalThresholdError",
42
+ "CortexClient",
43
+ "Metric",
44
+ "TaskCompletionMetric",
45
+ "ToolAccuracyMetric",
46
+ "LatencyMetric",
47
+ "HallucinationMetric",
48
+ "LLMJudgeMetric",
49
+ "Trace",
50
+ "TraceNode",
51
+ "ToolCall",
52
+ "EvalCase",
53
+ "EvalDataset",
54
+ "EvalSummary",
55
+ "CaseResult",
56
+ "FailureKind",
57
+ "RunStatus",
58
+ ]