agi-pragma 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agi_pragma-1.0.0/LICENSE +17 -0
- agi_pragma-1.0.0/PKG-INFO +394 -0
- agi_pragma-1.0.0/README.md +354 -0
- agi_pragma-1.0.0/agi_pragma/__init__.py +51 -0
- agi_pragma-1.0.0/agi_pragma/integrations/__init__.py +0 -0
- agi_pragma-1.0.0/agi_pragma/integrations/autogen.py +274 -0
- agi_pragma-1.0.0/agi_pragma/integrations/langgraph.py +250 -0
- agi_pragma-1.0.0/agi_pragma/integrations/llamaindex.py +275 -0
- agi_pragma-1.0.0/agi_pragma.egg-info/PKG-INFO +394 -0
- agi_pragma-1.0.0/agi_pragma.egg-info/SOURCES.txt +53 -0
- agi_pragma-1.0.0/agi_pragma.egg-info/dependency_links.txt +1 -0
- agi_pragma-1.0.0/agi_pragma.egg-info/requires.txt +23 -0
- agi_pragma-1.0.0/agi_pragma.egg-info/top_level.txt +3 -0
- agi_pragma-1.0.0/core/__init__.py +1 -0
- agi_pragma-1.0.0/core/agent_collaboration.py +20 -0
- agi_pragma-1.0.0/core/agent_swarm.py +31 -0
- agi_pragma-1.0.0/core/bayesian_updater.py +27 -0
- agi_pragma-1.0.0/core/circuit_breaker.py +19 -0
- agi_pragma-1.0.0/core/critical_path_analyzer.py +38 -0
- agi_pragma-1.0.0/core/decision_tree.py +24 -0
- agi_pragma-1.0.0/core/episodic_memory.py +95 -0
- agi_pragma-1.0.0/core/fmea_engine.py +43 -0
- agi_pragma-1.0.0/core/integration_demo.py +20 -0
- agi_pragma-1.0.0/core/main_engine.py +57 -0
- agi_pragma-1.0.0/core/reasoning.py +19 -0
- agi_pragma-1.0.0/core/simulation_engine.py +14 -0
- agi_pragma-1.0.0/core/tornado_analysis.py +16 -0
- agi_pragma-1.0.0/demos/__init__.py +0 -0
- agi_pragma-1.0.0/demos/dic_api/__init__.py +0 -0
- agi_pragma-1.0.0/demos/dic_api/main.py +210 -0
- agi_pragma-1.0.0/demos/dic_api/models.py +100 -0
- agi_pragma-1.0.0/demos/dic_db/__init__.py +0 -0
- agi_pragma-1.0.0/demos/dic_db/bayes.py +18 -0
- agi_pragma-1.0.0/demos/dic_db/circuit_breaker.py +59 -0
- agi_pragma-1.0.0/demos/dic_db/critical_path.py +98 -0
- agi_pragma-1.0.0/demos/dic_db/db_action.py +31 -0
- agi_pragma-1.0.0/demos/dic_db/db_engine.py +149 -0
- agi_pragma-1.0.0/demos/dic_db/dic_governor.py +176 -0
- agi_pragma-1.0.0/demos/dic_db/mock_actor.py +57 -0
- agi_pragma-1.0.0/demos/dic_db/risk_fmea.py +140 -0
- agi_pragma-1.0.0/demos/dic_db/run.py +203 -0
- agi_pragma-1.0.0/demos/dic_llm/__init__.py +0 -0
- agi_pragma-1.0.0/demos/dic_llm/bayes.py +22 -0
- agi_pragma-1.0.0/demos/dic_llm/circuit_breaker.py +82 -0
- agi_pragma-1.0.0/demos/dic_llm/critical_path.py +89 -0
- agi_pragma-1.0.0/demos/dic_llm/dic_governor.py +221 -0
- agi_pragma-1.0.0/demos/dic_llm/executor.py +71 -0
- agi_pragma-1.0.0/demos/dic_llm/file_action.py +22 -0
- agi_pragma-1.0.0/demos/dic_llm/llm_actor.py +116 -0
- agi_pragma-1.0.0/demos/dic_llm/mock_actor.py +46 -0
- agi_pragma-1.0.0/demos/dic_llm/risk_fmea.py +92 -0
- agi_pragma-1.0.0/demos/dic_llm/run.py +229 -0
- agi_pragma-1.0.0/pyproject.toml +48 -0
- agi_pragma-1.0.0/setup.cfg +4 -0
- agi_pragma-1.0.0/setup.py +7 -0
agi_pragma-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
The MIT License (MIT) + Commons Clause
|
|
2
|
+
|
|
3
|
+
Copyright © 2025-2026 Rafał Żabiński – original concept of "reverse-reality AGI Testing Ground"
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
ADDITIONAL "Commons Clause" CONDITION:
|
|
10
|
+
|
|
11
|
+
The Software may NOT be used as part of any commercial product, service, or offering that is sold, licensed, rented, or otherwise provided to third parties for direct revenue generation (including training paid/protected models, SaaS, enterprise tools, etc.) without prior written permission from the copyright holder.
|
|
12
|
+
|
|
13
|
+
For commercial/enterprise licensing, consulting, or partnership contact:
|
|
14
|
+
zabinskirafal@outlook.com
|
|
15
|
+
LinkedIn: www.linkedin.com/in/zabinskirafal
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND […]
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agi-pragma
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI Action Firewall — seven-stage Decision Intelligence Core for safe agentic AI
|
|
5
|
+
Author-email: Rafał Żabiński <zabinskirafal@outlook.com>
|
|
6
|
+
License: Proprietary — see LICENSE
|
|
7
|
+
Project-URL: Homepage, https://github.com/zabinskirafal/AGI-Pragma-Core
|
|
8
|
+
Project-URL: Repository, https://github.com/zabinskirafal/AGI-Pragma-Core
|
|
9
|
+
Project-URL: Paper, https://github.com/zabinskirafal/AGI-Pragma-Core/blob/main/docs/arxiv/main.tex
|
|
10
|
+
Keywords: ai-safety,agentic-ai,llm,decision-intelligence,fmea,risk-assessment,ai-firewall
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Security
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: numpy>=1.21.0
|
|
23
|
+
Requires-Dist: scipy>=1.7.0
|
|
24
|
+
Provides-Extra: api
|
|
25
|
+
Requires-Dist: fastapi>=0.100.0; extra == "api"
|
|
26
|
+
Requires-Dist: uvicorn>=0.20.0; extra == "api"
|
|
27
|
+
Provides-Extra: llm
|
|
28
|
+
Requires-Dist: anthropic>=0.20.0; extra == "llm"
|
|
29
|
+
Provides-Extra: langgraph
|
|
30
|
+
Requires-Dist: langgraph>=0.2.0; extra == "langgraph"
|
|
31
|
+
Requires-Dist: langchain-core>=0.2.0; extra == "langgraph"
|
|
32
|
+
Provides-Extra: autogen
|
|
33
|
+
Requires-Dist: pyautogen>=0.7.0; extra == "autogen"
|
|
34
|
+
Provides-Extra: llamaindex
|
|
35
|
+
Requires-Dist: llama-index-core>=0.14.0; extra == "llamaindex"
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# AGI Pragma
|
|
42
|
+
**AI Action Firewall — Safe execution layer for AI agents**
|
|
43
|
+
|
|
44
|
+
> AGI Pragma prevents AI agents from executing dangerous actions before they happen.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
### 1 — Python SDK
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install agi-pragma
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from agi_pragma import DICGovernor, FileAction, FileOp
|
|
58
|
+
|
|
59
|
+
gov = DICGovernor()
|
|
60
|
+
|
|
61
|
+
# WRITE — approved (RPN 504, below threshold)
|
|
62
|
+
decision = gov.evaluate(FileAction(
|
|
63
|
+
op=FileOp.WRITE, path="plan.md",
|
|
64
|
+
content="project notes", reason="save draft"
|
|
65
|
+
))
|
|
66
|
+
print(decision.approved, decision.max_rpn) # True 504
|
|
67
|
+
|
|
68
|
+
# DELETE — blocked (RPN 3150, exceeds threshold 2400)
|
|
69
|
+
decision = gov.evaluate(FileAction(
|
|
70
|
+
op=FileOp.DELETE, path="users.csv", reason="clean up"
|
|
71
|
+
))
|
|
72
|
+
print(decision.approved, decision.block_reason) # False RPN 3150 ≥ threshold 2400
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 2 — REST API
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Start the server
|
|
79
|
+
pip install "agi-pragma[api]"
|
|
80
|
+
uvicorn demos.dic_api.main:app --reload
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# Evaluate a proposed action
|
|
85
|
+
curl -s -X POST http://localhost:8000/evaluate \
|
|
86
|
+
-H "Content-Type: application/json" \
|
|
87
|
+
-d '{"op": "delete", "path": "users.csv", "reason": "clean up"}' \
|
|
88
|
+
| python3 -m json.tool
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"approved": false,
|
|
94
|
+
"block_reason": "RPN 3150 ≥ threshold 2400",
|
|
95
|
+
"max_rpn": 3150,
|
|
96
|
+
"utility": -7.75
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 3 — LangGraph Integration
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
pip install "agi-pragma[langgraph]"
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from langgraph.graph import StateGraph
|
|
108
|
+
from agi_pragma.integrations.langgraph import DICGuardNode, dic_conditional_edge
|
|
109
|
+
|
|
110
|
+
guard = DICGuardNode() # wraps DICGovernor; shared across the graph
|
|
111
|
+
|
|
112
|
+
graph = StateGraph(AgentState)
|
|
113
|
+
graph.add_node("agent", agent_node)
|
|
114
|
+
graph.add_node("dic_guard", guard)
|
|
115
|
+
graph.add_node("tools", tool_node)
|
|
116
|
+
|
|
117
|
+
graph.set_entry_point("agent")
|
|
118
|
+
graph.add_edge("agent", "dic_guard")
|
|
119
|
+
|
|
120
|
+
# approved → run tools; blocked → back to agent to re-plan
|
|
121
|
+
graph.add_conditional_edges(
|
|
122
|
+
"dic_guard",
|
|
123
|
+
dic_conditional_edge,
|
|
124
|
+
{"approved": "tools", "blocked": "agent"},
|
|
125
|
+
)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
See [docs/integrations/langgraph.md](docs/integrations/langgraph.md) for the full usage guide.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Overview
|
|
133
|
+
|
|
134
|
+
**AGI Pragma** is an **AI Action Firewall**: a structured pre-execution governance layer that sits between an AI agent and the real world, evaluating every proposed action for risk before it is allowed to execute.
|
|
135
|
+
|
|
136
|
+
It does **not** attempt to replicate human cognition, consciousness, or emotions.
|
|
137
|
+
Instead, it enforces **systematic risk evaluation** at the point of action:
|
|
138
|
+
filtering proposals, scoring failure modes, and blocking irreversible operations
|
|
139
|
+
**before they cause harm**.
|
|
140
|
+
|
|
141
|
+
> An AI agent that cannot delete a database table it shouldn't delete,
|
|
142
|
+
> overwrite a file it shouldn't overwrite, or execute a command it shouldn't execute
|
|
143
|
+
> — not because it was prompted to behave, but because a hard enforcement layer blocked it.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## What AGI Pragma Is / Is Not
|
|
148
|
+
|
|
149
|
+
### AGI Pragma IS
|
|
150
|
+
- an **AI Action Firewall** — hard pre-execution enforcement for agentic AI systems
|
|
151
|
+
- a **Decision Intelligence Core (DIC)** built around explicit, auditable decision gates
|
|
152
|
+
- a research artifact with reproducible benchmarks and full audit traces per decision
|
|
153
|
+
- a foundation for safety-oriented autonomous systems and LLM agent governance
|
|
154
|
+
|
|
155
|
+
### AGI Pragma IS NOT
|
|
156
|
+
- a human-like AGI
|
|
157
|
+
- a black-box learning system
|
|
158
|
+
- a reward-maximization benchmark
|
|
159
|
+
- a production-ready general intelligence
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Core Architecture — Decision Intelligence Core (DIC)
|
|
164
|
+
|
|
165
|
+
Each decision follows a fixed and auditable pipeline:
|
|
166
|
+
|
|
167
|
+
**1. Branching** — enumerate feasible actions, eliminate invalid ones.
|
|
168
|
+
|
|
169
|
+
**2. Critical Path Estimation** — Monte Carlo rollouts estimate:
|
|
170
|
+
- probability of catastrophic failure,
|
|
171
|
+
- probability of entering irreversible traps,
|
|
172
|
+
- expected steps until failure.
|
|
173
|
+
|
|
174
|
+
**3. Risk Assessment (FMEA)** — each action scored by:
|
|
175
|
+
- Severity (S) × Occurrence (O) × Detection difficulty (D) = **RPN**
|
|
176
|
+
|
|
177
|
+
**4. Decision Integrity Gate** — actions exceeding risk threshold are blocked before execution.
|
|
178
|
+
|
|
179
|
+
**5. Circuit Breaker** — autonomy dynamically constrained:
|
|
180
|
+
- OK → WARN → SLOW → STOP
|
|
181
|
+
|
|
182
|
+
**6. Decision Selection** — utility balances survival probability, goal progress, residual risk.
|
|
183
|
+
|
|
184
|
+
**7. Belief Update** — Bayesian trackers update internal hazard estimates.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Safety Model
|
|
189
|
+
|
|
190
|
+
Safety in AGI Pragma is **preventive**, not reactive.
|
|
191
|
+
|
|
192
|
+
- self-harm equals failure,
|
|
193
|
+
- no action bypasses risk evaluation,
|
|
194
|
+
- all decisions are auditable,
|
|
195
|
+
- autonomy is conditional, not absolute.
|
|
196
|
+
|
|
197
|
+
See: [docs/safety.md](docs/safety.md)
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Benchmark Results — Snake
|
|
202
|
+
|
|
203
|
+
**Agent:** PragmaSnakeAgent
|
|
204
|
+
**Environment:** SnakeEnv 10×10
|
|
205
|
+
|
|
206
|
+
### v1.0 — 50 episodes (2026-04-05)
|
|
207
|
+
|
|
208
|
+
| Metric | Value |
|
|
209
|
+
|------------------------|---------------|
|
|
210
|
+
| Average score | 22.8 |
|
|
211
|
+
| Min / Max score | 9 / 33 |
|
|
212
|
+
| Average reward | 102.4 |
|
|
213
|
+
| Average steps | 201 |
|
|
214
|
+
| Survived to step limit | 2/50 |
|
|
215
|
+
| Scores ≥ 25 | 21/50 (42%) |
|
|
216
|
+
| Scores < 15 | 4/50 (8%) |
|
|
217
|
+
|
|
218
|
+
### v0.1 — 10 episodes (2026-04-04) — initial run
|
|
219
|
+
|
|
220
|
+
| Metric | Value |
|
|
221
|
+
|------------------------|---------------|
|
|
222
|
+
| Average score | 25.0 |
|
|
223
|
+
| Min / Max score | 18 / 33 |
|
|
224
|
+
| Average reward | 113.1 |
|
|
225
|
+
| Average steps | 214 |
|
|
226
|
+
| Survived to step limit | 0/10 |
|
|
227
|
+
|
|
228
|
+
> Note: v0.1 used only 10 seeds — higher average reflects small sample size.
|
|
229
|
+
> v1.0 with 50 seeds gives a more reliable picture of agent behavior.
|
|
230
|
+
|
|
231
|
+
### Key finding — passive vs active agent
|
|
232
|
+
|
|
233
|
+
| Config | Avg score | Avg reward |
|
|
234
|
+
|-----------------------------|-----------|------------|
|
|
235
|
+
| dist weight = 0.2 (passive) | 0.4 | ~0 |
|
|
236
|
+
| dist weight = 1.5 (active) | 22.8 | 102.4 |
|
|
237
|
+
|
|
238
|
+
**One parameter change produced a 57× improvement in score.**
|
|
239
|
+
|
|
240
|
+
### Interpretation
|
|
241
|
+
|
|
242
|
+
42% of episodes scored 25 or above.
|
|
243
|
+
Only 8% of episodes scored below 15 — rare failures, not systemic.
|
|
244
|
+
The agent accepts risk to pursue goals and dies actively, not passively.
|
|
245
|
+
|
|
246
|
+
This confirms the core AGI Pragma trade-off:
|
|
247
|
+
**safety ≠ passivity. Controlled risk is required for goal achievement.**
|
|
248
|
+
|
|
249
|
+
To run the benchmark (50 episodes, results written to `artifacts/snake/`):
|
|
250
|
+
```bash
|
|
251
|
+
python3 -m benchmarks.snake.run
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
See: [docs/benchmarks/snake.md](docs/benchmarks/snake.md)
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Benchmark Results — Maze
|
|
259
|
+
|
|
260
|
+
**Agent:** PragmaMazeAgent
|
|
261
|
+
**Environment:** MazeEnv 15×15 (recursive backtracker generation)
|
|
262
|
+
|
|
263
|
+
### v2.0 — 50 episodes (2026-04-05)
|
|
264
|
+
|
|
265
|
+
| Metric | Value |
|
|
266
|
+
|-------------------------------------|----------------|
|
|
267
|
+
| Solved | 50 / 50 (100%) |
|
|
268
|
+
| Steps — avg / min / max | 46.1 / 24 / 76 |
|
|
269
|
+
| Score (steps remaining) — avg / min / max | 253.9 / 224 / 276 |
|
|
270
|
+
|
|
271
|
+
### Key finding — BFS distance vs manhattan distance
|
|
272
|
+
|
|
273
|
+
| Utility signal | Solved | Avg steps |
|
|
274
|
+
|----------------|--------|-----------|
|
|
275
|
+
| Manhattan distance (v1.1) | 4/50 (8%) | 277.9 |
|
|
276
|
+
| BFS path distance (v2.0) | 50/50 (100%) | 46.1 |
|
|
277
|
+
|
|
278
|
+
**One signal change produced a 12.5× reduction in steps and a 100% solve rate.**
|
|
279
|
+
|
|
280
|
+
### Interpretation
|
|
281
|
+
|
|
282
|
+
Manhattan distance is unreliable in mazes where walls force long detours.
|
|
283
|
+
Replacing it with exact BFS path distance — precomputed once per maze, O(1) per lookup —
|
|
284
|
+
gave the utility function accurate topological information and immediately solved all episodes.
|
|
285
|
+
|
|
286
|
+
The FMEA and circuit breaker operated correctly throughout; the failure in v1.x was
|
|
287
|
+
a utility signal problem, not a safety pipeline problem.
|
|
288
|
+
|
|
289
|
+
To run the benchmark (50 episodes, results written to `artifacts/maze/`):
|
|
290
|
+
```bash
|
|
291
|
+
python3 -m benchmarks.maze.run
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
See: [docs/benchmarks/maze.md](docs/benchmarks/maze.md)
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Benchmark Results — Dynamic Threat Gridworld
|
|
299
|
+
|
|
300
|
+
**Agent:** PragmaGridworldAgent
|
|
301
|
+
**Environment:** GridworldEnv 15×15, 5 wandering hazards
|
|
302
|
+
|
|
303
|
+
### v1.0 — 50 episodes (2026-04-06)
|
|
304
|
+
|
|
305
|
+
| Metric | Value |
|
|
306
|
+
|-------------------------------------|----------------|
|
|
307
|
+
| Solved | 39 / 50 (78%) |
|
|
308
|
+
| Killed by hazard | 11 / 50 (22%) |
|
|
309
|
+
| Timed out | 0 / 50 |
|
|
310
|
+
| Steps — avg / min / max | 22.8 / 9 / 24 |
|
|
311
|
+
| Score when solved (steps remaining) | 276 |
|
|
312
|
+
|
|
313
|
+
### Key finding — p_death signal is load-bearing
|
|
314
|
+
|
|
315
|
+
Unlike Snake and Maze where the Monte Carlo risk signal was saturated or secondary,
|
|
316
|
+
the gridworld is the first benchmark where `p_death` varies meaningfully across
|
|
317
|
+
candidate actions at each step. Moving toward a hazard cluster scores higher
|
|
318
|
+
`p_death` than WAIT or evasion — the FMEA and Critical Path stages are actively
|
|
319
|
+
driving decisions, not just gating them.
|
|
320
|
+
|
|
321
|
+
The circuit breaker operates in **WARN/SLOW** range throughout (RPN 180–200),
|
|
322
|
+
constraining autonomy proportionally without collapsing into full conservatism.
|
|
323
|
+
|
|
324
|
+
### Interpretation
|
|
325
|
+
|
|
326
|
+
The 22% failure rate reflects genuine stochastic risk — some hazard configurations
|
|
327
|
+
cross the direct path regardless of decision quality. Zero timeouts confirms the
|
|
328
|
+
agent always makes decisive forward progress.
|
|
329
|
+
|
|
330
|
+
**Safety ≠ passivity** holds across all three benchmarks: the agent accepts risk
|
|
331
|
+
to pursue the goal and the safety pipeline constrains, not blocks, autonomous action.
|
|
332
|
+
|
|
333
|
+
To run the benchmark (50 episodes, results written to `artifacts/gridworld/`):
|
|
334
|
+
```bash
|
|
335
|
+
python3 -m benchmarks.gridworld.run
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
See: [docs/benchmarks/gridworld.md](docs/benchmarks/gridworld.md)
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Methodology
|
|
343
|
+
|
|
344
|
+
See: [docs/Methodology.md](docs/Methodology.md)
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
## Reproducibility
|
|
349
|
+
|
|
350
|
+
Each benchmark run produces:
|
|
351
|
+
- decision-level logs (JSONL)
|
|
352
|
+
- episode summaries (JSON)
|
|
353
|
+
- reproducible configurations
|
|
354
|
+
|
|
355
|
+
---
|
|
356
|
+
|
|
357
|
+
## Related Projects
|
|
358
|
+
|
|
359
|
+
**ChaosGym / Reverse Reality Sandbox** — physics-breaking simulation environment
|
|
360
|
+
designed to stress-test AGI Pragma's decision integrity under non-stationary rules.
|
|
361
|
+
|
|
362
|
+
- [AGI-Development](https://github.com/zabinskirafal/AGI-Development) — iterative development history and experimental branches of the AGI Pragma framework
|
|
363
|
+
- [developmental-agi-sandbox](https://github.com/zabinskirafal/developmental-agi-sandbox) — Unity-based reverse-physics sandbox environment for testing AGI Pragma under non-stationary world rules
|
|
364
|
+
|
|
365
|
+
---
|
|
366
|
+
|
|
367
|
+
## Licensing & Commercial Use
|
|
368
|
+
|
|
369
|
+
**Author:** Rafał Żabiński
|
|
370
|
+
|
|
371
|
+
**Free use:** academic research, education, non-commercial projects, open-source experimentation.
|
|
372
|
+
|
|
373
|
+
**Commercial use:** requires a separate written agreement with the author.
|
|
374
|
+
|
|
375
|
+
zabinskirafal@outlook.com
|
|
376
|
+
https://www.linkedin.com/in/zabinskirafal
|
|
377
|
+
|
|
378
|
+
---
|
|
379
|
+
|
|
380
|
+
## Project Status
|
|
381
|
+
|
|
382
|
+
Current version: **v3.0.0**
|
|
383
|
+
|
|
384
|
+
AGI Pragma is an active research program, not a finished product.
|
|
385
|
+
|
|
386
|
+
Future work includes additional benchmarks, stronger baselines, and formal evaluation protocols.
|
|
387
|
+
|
|
388
|
+
---
|
|
389
|
+
|
|
390
|
+
## Citation
|
|
391
|
+
|
|
392
|
+
If you use this work in research, please cite via: [CITATION.cff](CITATION.cff)
|
|
393
|
+
|
|
394
|
+
**Rafał Żabiński** — Founder and original author (January 2026)
|