cognicore-env 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognicore_env-0.1.0/.gitignore +14 -0
- cognicore_env-0.1.0/LICENSE +21 -0
- cognicore_env-0.1.0/PKG-INFO +248 -0
- cognicore_env-0.1.0/README.md +211 -0
- cognicore_env-0.1.0/cognicore/__init__.py +99 -0
- cognicore_env-0.1.0/cognicore/adapters/__init__.py +5 -0
- cognicore_env-0.1.0/cognicore/adapters/gymnasium.py +142 -0
- cognicore_env-0.1.0/cognicore/agents/__init__.py +5 -0
- cognicore_env-0.1.0/cognicore/agents/base_agent.py +119 -0
- cognicore_env-0.1.0/cognicore/cli.py +244 -0
- cognicore_env-0.1.0/cognicore/core/__init__.py +21 -0
- cognicore_env-0.1.0/cognicore/core/base_env.py +521 -0
- cognicore_env-0.1.0/cognicore/core/spaces.py +149 -0
- cognicore_env-0.1.0/cognicore/core/types.py +233 -0
- cognicore_env-0.1.0/cognicore/dashboard.py +629 -0
- cognicore_env-0.1.0/cognicore/envs/__init__.py +5 -0
- cognicore_env-0.1.0/cognicore/envs/code_debugging.py +116 -0
- cognicore_env-0.1.0/cognicore/envs/conversation.py +106 -0
- cognicore_env-0.1.0/cognicore/envs/data/__init__.py +1 -0
- cognicore_env-0.1.0/cognicore/envs/data/code_cases.py +454 -0
- cognicore_env-0.1.0/cognicore/envs/data/conversation_cases.py +575 -0
- cognicore_env-0.1.0/cognicore/envs/data/graders.py +81 -0
- cognicore_env-0.1.0/cognicore/envs/data/math_cases.py +351 -0
- cognicore_env-0.1.0/cognicore/envs/data/planning_cases.py +637 -0
- cognicore_env-0.1.0/cognicore/envs/data/safety_cases.py +362 -0
- cognicore_env-0.1.0/cognicore/envs/data/summarization_cases.py +340 -0
- cognicore_env-0.1.0/cognicore/envs/math_reasoning.py +99 -0
- cognicore_env-0.1.0/cognicore/envs/multi_step_planning.py +107 -0
- cognicore_env-0.1.0/cognicore/envs/registry.py +284 -0
- cognicore_env-0.1.0/cognicore/envs/safety_classification.py +120 -0
- cognicore_env-0.1.0/cognicore/envs/text_summarization.py +57 -0
- cognicore_env-0.1.0/cognicore/finetuning.py +201 -0
- cognicore_env-0.1.0/cognicore/leaderboard.py +148 -0
- cognicore_env-0.1.0/cognicore/llm/__init__.py +1 -0
- cognicore_env-0.1.0/cognicore/llm/gemini.py +119 -0
- cognicore_env-0.1.0/cognicore/memory/__init__.py +4 -0
- cognicore_env-0.1.0/cognicore/memory/vector_memory.py +130 -0
- cognicore_env-0.1.0/cognicore/memory_manager.py +165 -0
- cognicore_env-0.1.0/cognicore/middleware/__init__.py +15 -0
- cognicore_env-0.1.0/cognicore/middleware/memory.py +197 -0
- cognicore_env-0.1.0/cognicore/middleware/propose_revise.py +168 -0
- cognicore_env-0.1.0/cognicore/middleware/reflection.py +177 -0
- cognicore_env-0.1.0/cognicore/middleware/rewards.py +120 -0
- cognicore_env-0.1.0/cognicore/middleware/safety_monitor.py +144 -0
- cognicore_env-0.1.0/cognicore/multi_agent.py +245 -0
- cognicore_env-0.1.0/cognicore/reflection/__init__.py +4 -0
- cognicore_env-0.1.0/cognicore/reflection/reflection.py +138 -0
- cognicore_env-0.1.0/cognicore/rl/__init__.py +1 -0
- cognicore_env-0.1.0/cognicore/rl/agent.py +94 -0
- cognicore_env-0.1.0/cognicore/safety/__init__.py +4 -0
- cognicore_env-0.1.0/cognicore/safety/safety.py +71 -0
- cognicore_env-0.1.0/cognicore/server/__init__.py +5 -0
- cognicore_env-0.1.0/cognicore/server/app.py +256 -0
- cognicore_env-0.1.0/cognicore/utils/__init__.py +1 -0
- cognicore_env-0.1.0/cognicore/utils/logging.py +73 -0
- cognicore_env-0.1.0/examples/custom_env.py +182 -0
- cognicore_env-0.1.0/examples/llm_agent.py +240 -0
- cognicore_env-0.1.0/examples/quickstart.py +71 -0
- cognicore_env-0.1.0/pyproject.toml +54 -0
- cognicore_env-0.1.0/tests/test_base_env.py +210 -0
- cognicore_env-0.1.0/tests/test_code_env.py +88 -0
- cognicore_env-0.1.0/tests/test_conversation_env.py +99 -0
- cognicore_env-0.1.0/tests/test_gymnasium_adapter.py +96 -0
- cognicore_env-0.1.0/tests/test_math_env.py +78 -0
- cognicore_env-0.1.0/tests/test_memory.py +121 -0
- cognicore_env-0.1.0/tests/test_new_features.py +268 -0
- cognicore_env-0.1.0/tests/test_planning_env.py +108 -0
- cognicore_env-0.1.0/tests/test_propose_revise.py +100 -0
- cognicore_env-0.1.0/tests/test_registry.py +67 -0
- cognicore_env-0.1.0/tests/test_rewards.py +127 -0
- cognicore_env-0.1.0/tests/test_safety_env.py +200 -0
- cognicore_env-0.1.0/tests/test_server.py +178 -0
- cognicore_env-0.1.0/tests/test_summarization_env.py +60 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 CogniCore Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cognicore-env
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cognitive environments for AI — Memory, Reflection, and Structured Rewards built into every environment.
|
|
5
|
+
Project-URL: Homepage, https://github.com/cognicore/cognicore
|
|
6
|
+
Project-URL: Documentation, https://cognicore.readthedocs.io
|
|
7
|
+
Project-URL: Repository, https://github.com/cognicore/cognicore
|
|
8
|
+
Project-URL: Issues, https://github.com/cognicore/cognicore/issues
|
|
9
|
+
Author: CogniCore Team
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai,cognitive,environments,memory,reflection,reinforcement-learning
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: cognicore[dev,llm,server]; extra == 'all'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
30
|
+
Provides-Extra: llm
|
|
31
|
+
Requires-Dist: openai>=1.0; extra == 'llm'
|
|
32
|
+
Provides-Extra: server
|
|
33
|
+
Requires-Dist: fastapi>=0.110; extra == 'server'
|
|
34
|
+
Requires-Dist: pydantic>=2.0; extra == 'server'
|
|
35
|
+
Requires-Dist: uvicorn>=0.27; extra == 'server'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# CogniCore
|
|
39
|
+
|
|
40
|
+
**Cognitive Environments for AI** — Memory, Reflection, and Structured Rewards built into every environment.
|
|
41
|
+
|
|
42
|
+
CogniCore is a Python framework where every environment comes with built-in cognitive infrastructure that no other framework provides:
|
|
43
|
+
|
|
44
|
+
| Feature | Gymnasium | CogniCore |
|
|
45
|
+
|---------|-----------|-----------|
|
|
46
|
+
| Memory across episodes | No | **Yes** — agents learn from past mistakes |
|
|
47
|
+
| Reflection hints | No | **Yes** — environment tells agents what they're doing wrong |
|
|
48
|
+
| Structured rewards | Single float | **8-component** reward signal |
|
|
49
|
+
| PROPOSE → Revise | No | **Yes** — explore before committing |
|
|
50
|
+
| Works with any AI type | RL only | **LLM, RL, classifier, rule-based** |
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Core framework (zero dependencies)
|
|
56
|
+
pip install cognicore
|
|
57
|
+
|
|
58
|
+
# With LLM support
|
|
59
|
+
pip install cognicore[llm]
|
|
60
|
+
|
|
61
|
+
# With API server
|
|
62
|
+
pip install cognicore[server]
|
|
63
|
+
|
|
64
|
+
# Everything
|
|
65
|
+
pip install cognicore[all]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import cognicore
|
|
72
|
+
|
|
73
|
+
# Create an environment
|
|
74
|
+
env = cognicore.make("SafetyClassification-v1", difficulty="easy")
|
|
75
|
+
obs = env.reset()
|
|
76
|
+
|
|
77
|
+
while True:
|
|
78
|
+
action = {"classification": "SAFE"} # your agent here
|
|
79
|
+
obs, reward, done, truncated, info = env.step(action)
|
|
80
|
+
|
|
81
|
+
# 8-component structured reward
|
|
82
|
+
print(f"Total: {reward.total:.2f}")
|
|
83
|
+
print(f" Base: {reward.base_score}")
|
|
84
|
+
print(f" Memory bonus: {reward.memory_bonus}")
|
|
85
|
+
print(f" Streak penalty: {reward.streak_penalty}")
|
|
86
|
+
|
|
87
|
+
if done:
|
|
88
|
+
break
|
|
89
|
+
|
|
90
|
+
print(env.episode_stats())
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Environments
|
|
94
|
+
|
|
95
|
+
CogniCore ships with **5 environment domains** (20 registered IDs):
|
|
96
|
+
|
|
97
|
+
### Safety Classification
|
|
98
|
+
Classify AI responses as SAFE / UNSAFE / NEEDS_REVIEW.
|
|
99
|
+
```python
|
|
100
|
+
env = cognicore.make("SafetyClassification-v1", difficulty="hard")
|
|
101
|
+
obs, reward, done, _, info = env.step({"classification": "UNSAFE"})
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Math Reasoning
|
|
105
|
+
Solve arithmetic, algebra, and advanced math problems.
|
|
106
|
+
```python
|
|
107
|
+
env = cognicore.make("MathReasoning-v1", difficulty="medium")
|
|
108
|
+
obs, reward, done, _, info = env.step({"answer": 42})
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Code Debugging
|
|
112
|
+
Find and fix bugs in Python code snippets.
|
|
113
|
+
```python
|
|
114
|
+
env = cognicore.make("CodeDebugging-v1", difficulty="hard")
|
|
115
|
+
obs, reward, done, _, info = env.step({"bug_line": 4, "fix_type": "security_vulnerability"})
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Conversation / Negotiation
|
|
119
|
+
Choose the best response in dialogue scenarios.
|
|
120
|
+
```python
|
|
121
|
+
env = cognicore.make("Conversation-v1", difficulty="medium")
|
|
122
|
+
obs, reward, done, _, info = env.step({"response": "empathetic_action"})
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Multi-Step Planning
|
|
126
|
+
Order steps correctly to solve planning problems.
|
|
127
|
+
```python
|
|
128
|
+
env = cognicore.make("Planning-v1", difficulty="hard")
|
|
129
|
+
obs, reward, done, _, info = env.step({"order": ["A", "B", "C", "D", "E"]})
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## The 8-Component Structured Reward
|
|
133
|
+
|
|
134
|
+
Every `step()` returns a `StructuredReward` — not just a float:
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
StructuredReward(
|
|
138
|
+
base_score = 1.0 # From environment grader
|
|
139
|
+
memory_bonus = 0.05 # Consistency with past successes
|
|
140
|
+
reflection_bonus = 0.03 # Followed a reflection hint
|
|
141
|
+
streak_penalty = 0.00 # Penalty for consecutive failures
|
|
142
|
+
propose_bonus = 0.05 # Improved via PROPOSE → Revise
|
|
143
|
+
novelty_bonus = 0.04 # Correctly handled new category
|
|
144
|
+
confidence_cal = 0.02 # Well-calibrated confidence
|
|
145
|
+
time_decay = -0.01 # Speed penalty
|
|
146
|
+
─────────────────────────
|
|
147
|
+
total = 1.18 # Sum of all components
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## PROPOSE → Revise Protocol
|
|
152
|
+
|
|
153
|
+
Agents can explore before committing:
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
# 1. Propose (no grading)
|
|
157
|
+
feedback = env.propose({"classification": "UNSAFE"})
|
|
158
|
+
print(feedback.reflection_hint) # "This category was often SAFE"
|
|
159
|
+
print(feedback.confidence_estimate) # 0.34
|
|
160
|
+
|
|
161
|
+
# 2. Revise (graded)
|
|
162
|
+
obs, reward, done, _, info = env.revise({"classification": "SAFE"})
|
|
163
|
+
# If improved → propose_bonus in reward
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Build Your Own Environment
|
|
167
|
+
|
|
168
|
+
Subclass `CogniCoreEnv` and implement 4 methods:
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from cognicore import CogniCoreEnv, EvalResult
|
|
172
|
+
|
|
173
|
+
class MyEnv(CogniCoreEnv):
|
|
174
|
+
def _setup(self, **kwargs):
|
|
175
|
+
pass # Define spaces, load data
|
|
176
|
+
|
|
177
|
+
def _generate_tasks(self):
|
|
178
|
+
return [{"q": "2+2", "a": 4, "category": "math"}]
|
|
179
|
+
|
|
180
|
+
def _evaluate(self, action):
|
|
181
|
+
task = self._tasks[self._current_step]
|
|
182
|
+
correct = action.get("answer") == task["a"]
|
|
183
|
+
return EvalResult(
|
|
184
|
+
base_score=1.0 if correct else 0.0,
|
|
185
|
+
correct=correct,
|
|
186
|
+
category=task["category"],
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def _get_obs(self):
|
|
190
|
+
return {"question": self._tasks[self._current_step]["q"]}
|
|
191
|
+
|
|
192
|
+
# That's it! Memory, reflection, rewards all work automatically.
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## CLI
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
# List environments
|
|
199
|
+
cognicore list
|
|
200
|
+
|
|
201
|
+
# Run with a random agent
|
|
202
|
+
cognicore run SafetyClassification-v1 --difficulty hard --episodes 3 -v
|
|
203
|
+
|
|
204
|
+
# Show environment info
|
|
205
|
+
cognicore info MathReasoning-v1
|
|
206
|
+
|
|
207
|
+
# Start API server
|
|
208
|
+
cognicore serve --port 8000
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## REST API
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
# Start server
|
|
215
|
+
cognicore serve
|
|
216
|
+
|
|
217
|
+
# Create session
|
|
218
|
+
curl -X POST http://localhost:8000/envs/SafetyClassification-v1/create \
|
|
219
|
+
-H "Content-Type: application/json" \
|
|
220
|
+
-d '{"difficulty": "easy"}'
|
|
221
|
+
|
|
222
|
+
# Reset
|
|
223
|
+
curl -X POST http://localhost:8000/sessions/{sid}/reset
|
|
224
|
+
|
|
225
|
+
# Step
|
|
226
|
+
curl -X POST http://localhost:8000/sessions/{sid}/step \
|
|
227
|
+
-H "Content-Type: application/json" \
|
|
228
|
+
-d '{"action": {"classification": "SAFE"}}'
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Interactive docs at `http://localhost:8000/docs`.
|
|
232
|
+
|
|
233
|
+
## Architecture
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
cognicore/
|
|
237
|
+
├── core/ # Base env, types, spaces
|
|
238
|
+
├── middleware/ # Memory, Reflection, Rewards, Propose-Revise, Safety Monitor
|
|
239
|
+
├── envs/ # 5 built-in environments + registry
|
|
240
|
+
├── agents/ # BaseAgent ABC + RandomAgent
|
|
241
|
+
├── server/ # FastAPI REST API
|
|
242
|
+
├── cli.py # Command-line interface
|
|
243
|
+
└── utils/ # Logging utilities
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# CogniCore
|
|
2
|
+
|
|
3
|
+
**Cognitive Environments for AI** — Memory, Reflection, and Structured Rewards built into every environment.
|
|
4
|
+
|
|
5
|
+
CogniCore is a Python framework where every environment comes with built-in cognitive infrastructure that no other framework provides:
|
|
6
|
+
|
|
7
|
+
| Feature | Gymnasium | CogniCore |
|
|
8
|
+
|---------|-----------|-----------|
|
|
9
|
+
| Memory across episodes | No | **Yes** — agents learn from past mistakes |
|
|
10
|
+
| Reflection hints | No | **Yes** — environment tells agents what they're doing wrong |
|
|
11
|
+
| Structured rewards | Single float | **8-component** reward signal |
|
|
12
|
+
| PROPOSE → Revise | No | **Yes** — explore before committing |
|
|
13
|
+
| Works with any AI type | RL only | **LLM, RL, classifier, rule-based** |
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Core framework (zero dependencies)
|
|
19
|
+
pip install cognicore
|
|
20
|
+
|
|
21
|
+
# With LLM support
|
|
22
|
+
pip install cognicore[llm]
|
|
23
|
+
|
|
24
|
+
# With API server
|
|
25
|
+
pip install cognicore[server]
|
|
26
|
+
|
|
27
|
+
# Everything
|
|
28
|
+
pip install cognicore[all]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import cognicore
|
|
35
|
+
|
|
36
|
+
# Create an environment
|
|
37
|
+
env = cognicore.make("SafetyClassification-v1", difficulty="easy")
|
|
38
|
+
obs = env.reset()
|
|
39
|
+
|
|
40
|
+
while True:
|
|
41
|
+
action = {"classification": "SAFE"} # your agent here
|
|
42
|
+
obs, reward, done, truncated, info = env.step(action)
|
|
43
|
+
|
|
44
|
+
# 8-component structured reward
|
|
45
|
+
print(f"Total: {reward.total:.2f}")
|
|
46
|
+
print(f" Base: {reward.base_score}")
|
|
47
|
+
print(f" Memory bonus: {reward.memory_bonus}")
|
|
48
|
+
print(f" Streak penalty: {reward.streak_penalty}")
|
|
49
|
+
|
|
50
|
+
if done:
|
|
51
|
+
break
|
|
52
|
+
|
|
53
|
+
print(env.episode_stats())
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Environments
|
|
57
|
+
|
|
58
|
+
CogniCore ships with **5 environment domains** (20 registered IDs):
|
|
59
|
+
|
|
60
|
+
### Safety Classification
|
|
61
|
+
Classify AI responses as SAFE / UNSAFE / NEEDS_REVIEW.
|
|
62
|
+
```python
|
|
63
|
+
env = cognicore.make("SafetyClassification-v1", difficulty="hard")
|
|
64
|
+
obs, reward, done, _, info = env.step({"classification": "UNSAFE"})
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Math Reasoning
|
|
68
|
+
Solve arithmetic, algebra, and advanced math problems.
|
|
69
|
+
```python
|
|
70
|
+
env = cognicore.make("MathReasoning-v1", difficulty="medium")
|
|
71
|
+
obs, reward, done, _, info = env.step({"answer": 42})
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Code Debugging
|
|
75
|
+
Find and fix bugs in Python code snippets.
|
|
76
|
+
```python
|
|
77
|
+
env = cognicore.make("CodeDebugging-v1", difficulty="hard")
|
|
78
|
+
obs, reward, done, _, info = env.step({"bug_line": 4, "fix_type": "security_vulnerability"})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Conversation / Negotiation
|
|
82
|
+
Choose the best response in dialogue scenarios.
|
|
83
|
+
```python
|
|
84
|
+
env = cognicore.make("Conversation-v1", difficulty="medium")
|
|
85
|
+
obs, reward, done, _, info = env.step({"response": "empathetic_action"})
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Multi-Step Planning
|
|
89
|
+
Order steps correctly to solve planning problems.
|
|
90
|
+
```python
|
|
91
|
+
env = cognicore.make("Planning-v1", difficulty="hard")
|
|
92
|
+
obs, reward, done, _, info = env.step({"order": ["A", "B", "C", "D", "E"]})
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## The 8-Component Structured Reward
|
|
96
|
+
|
|
97
|
+
Every `step()` returns a `StructuredReward` — not just a float:
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
StructuredReward(
|
|
101
|
+
base_score = 1.0 # From environment grader
|
|
102
|
+
memory_bonus = 0.05 # Consistency with past successes
|
|
103
|
+
reflection_bonus = 0.03 # Followed a reflection hint
|
|
104
|
+
streak_penalty = 0.00 # Penalty for consecutive failures
|
|
105
|
+
propose_bonus = 0.05 # Improved via PROPOSE → Revise
|
|
106
|
+
novelty_bonus = 0.04 # Correctly handled new category
|
|
107
|
+
confidence_cal = 0.02 # Well-calibrated confidence
|
|
108
|
+
time_decay = -0.01 # Speed penalty
|
|
109
|
+
─────────────────────────
|
|
110
|
+
total = 1.18 # Sum of all components
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## PROPOSE → Revise Protocol
|
|
115
|
+
|
|
116
|
+
Agents can explore before committing:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
# 1. Propose (no grading)
|
|
120
|
+
feedback = env.propose({"classification": "UNSAFE"})
|
|
121
|
+
print(feedback.reflection_hint) # "This category was often SAFE"
|
|
122
|
+
print(feedback.confidence_estimate) # 0.34
|
|
123
|
+
|
|
124
|
+
# 2. Revise (graded)
|
|
125
|
+
obs, reward, done, _, info = env.revise({"classification": "SAFE"})
|
|
126
|
+
# If improved → propose_bonus in reward
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Build Your Own Environment
|
|
130
|
+
|
|
131
|
+
Subclass `CogniCoreEnv` and implement 4 methods:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from cognicore import CogniCoreEnv, EvalResult
|
|
135
|
+
|
|
136
|
+
class MyEnv(CogniCoreEnv):
|
|
137
|
+
def _setup(self, **kwargs):
|
|
138
|
+
pass # Define spaces, load data
|
|
139
|
+
|
|
140
|
+
def _generate_tasks(self):
|
|
141
|
+
return [{"q": "2+2", "a": 4, "category": "math"}]
|
|
142
|
+
|
|
143
|
+
def _evaluate(self, action):
|
|
144
|
+
task = self._tasks[self._current_step]
|
|
145
|
+
correct = action.get("answer") == task["a"]
|
|
146
|
+
return EvalResult(
|
|
147
|
+
base_score=1.0 if correct else 0.0,
|
|
148
|
+
correct=correct,
|
|
149
|
+
category=task["category"],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def _get_obs(self):
|
|
153
|
+
return {"question": self._tasks[self._current_step]["q"]}
|
|
154
|
+
|
|
155
|
+
# That's it! Memory, reflection, rewards all work automatically.
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## CLI
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# List environments
|
|
162
|
+
cognicore list
|
|
163
|
+
|
|
164
|
+
# Run with a random agent
|
|
165
|
+
cognicore run SafetyClassification-v1 --difficulty hard --episodes 3 -v
|
|
166
|
+
|
|
167
|
+
# Show environment info
|
|
168
|
+
cognicore info MathReasoning-v1
|
|
169
|
+
|
|
170
|
+
# Start API server
|
|
171
|
+
cognicore serve --port 8000
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## REST API
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
# Start server
|
|
178
|
+
cognicore serve
|
|
179
|
+
|
|
180
|
+
# Create session
|
|
181
|
+
curl -X POST http://localhost:8000/envs/SafetyClassification-v1/create \
|
|
182
|
+
-H "Content-Type: application/json" \
|
|
183
|
+
-d '{"difficulty": "easy"}'
|
|
184
|
+
|
|
185
|
+
# Reset
|
|
186
|
+
curl -X POST http://localhost:8000/sessions/{sid}/reset
|
|
187
|
+
|
|
188
|
+
# Step
|
|
189
|
+
curl -X POST http://localhost:8000/sessions/{sid}/step \
|
|
190
|
+
-H "Content-Type: application/json" \
|
|
191
|
+
-d '{"action": {"classification": "SAFE"}}'
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Interactive docs at `http://localhost:8000/docs`.
|
|
195
|
+
|
|
196
|
+
## Architecture
|
|
197
|
+
|
|
198
|
+
```
|
|
199
|
+
cognicore/
|
|
200
|
+
├── core/ # Base env, types, spaces
|
|
201
|
+
├── middleware/ # Memory, Reflection, Rewards, Propose-Revise, Safety Monitor
|
|
202
|
+
├── envs/ # 5 built-in environments + registry
|
|
203
|
+
├── agents/ # BaseAgent ABC + RandomAgent
|
|
204
|
+
├── server/ # FastAPI REST API
|
|
205
|
+
├── cli.py # Command-line interface
|
|
206
|
+
└── utils/ # Logging utilities
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
|
|
211
|
+
MIT
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CogniCore — Cognitive Environments for AI.
|
|
3
|
+
|
|
4
|
+
Every environment gets Memory, Reflection, Structured Rewards,
|
|
5
|
+
and PROPOSE→Revise built in. Any AI agent (LLM, RL, classifier,
|
|
6
|
+
rule-based) can plug in and learn from experience.
|
|
7
|
+
|
|
8
|
+
Quick start::
|
|
9
|
+
|
|
10
|
+
import cognicore
|
|
11
|
+
|
|
12
|
+
env = cognicore.make("SafetyClassification-v1", difficulty="easy")
|
|
13
|
+
obs = env.reset()
|
|
14
|
+
|
|
15
|
+
while True:
|
|
16
|
+
action = {"classification": "SAFE"} # your agent here
|
|
17
|
+
obs, reward, done, truncated, info = env.step(action)
|
|
18
|
+
print(f"Reward: {reward.total:.2f}")
|
|
19
|
+
if done:
|
|
20
|
+
break
|
|
21
|
+
|
|
22
|
+
print(env.episode_stats())
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
# Core
|
|
28
|
+
from cognicore.core.base_env import CogniCoreEnv
|
|
29
|
+
from cognicore.core.types import (
|
|
30
|
+
CogniCoreConfig,
|
|
31
|
+
EpisodeStats,
|
|
32
|
+
EvalResult,
|
|
33
|
+
ProposalFeedback,
|
|
34
|
+
StepResult,
|
|
35
|
+
StructuredReward,
|
|
36
|
+
)
|
|
37
|
+
from cognicore.core.spaces import DiscreteSpace, DictSpace, TextSpace
|
|
38
|
+
|
|
39
|
+
# Registry
|
|
40
|
+
from cognicore.envs.registry import make, register, list_envs
|
|
41
|
+
|
|
42
|
+
# Agents
|
|
43
|
+
from cognicore.agents.base_agent import BaseAgent, RandomAgent
|
|
44
|
+
|
|
45
|
+
# Middleware (importable for custom usage)
|
|
46
|
+
from cognicore.middleware.memory import Memory
|
|
47
|
+
from cognicore.middleware.reflection import ReflectionEngine
|
|
48
|
+
from cognicore.middleware.safety_monitor import SafetyMonitor
|
|
49
|
+
|
|
50
|
+
# Adapters
|
|
51
|
+
from cognicore.adapters.gymnasium import GymnasiumAdapter
|
|
52
|
+
|
|
53
|
+
# Persistence & Leaderboard
|
|
54
|
+
from cognicore.memory_manager import MemoryManager
|
|
55
|
+
from cognicore.leaderboard import Leaderboard
|
|
56
|
+
|
|
57
|
+
# Fine-tuning
|
|
58
|
+
from cognicore.finetuning import EpisodeRecorder
|
|
59
|
+
|
|
60
|
+
# Multi-agent
|
|
61
|
+
from cognicore.multi_agent import MultiAgentEnv, DebateEnv
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
# Version
|
|
65
|
+
"__version__",
|
|
66
|
+
# Core
|
|
67
|
+
"CogniCoreEnv",
|
|
68
|
+
"CogniCoreConfig",
|
|
69
|
+
"StructuredReward",
|
|
70
|
+
"EvalResult",
|
|
71
|
+
"StepResult",
|
|
72
|
+
"EpisodeStats",
|
|
73
|
+
"ProposalFeedback",
|
|
74
|
+
# Spaces
|
|
75
|
+
"DiscreteSpace",
|
|
76
|
+
"DictSpace",
|
|
77
|
+
"TextSpace",
|
|
78
|
+
# Registry
|
|
79
|
+
"make",
|
|
80
|
+
"register",
|
|
81
|
+
"list_envs",
|
|
82
|
+
# Agents
|
|
83
|
+
"BaseAgent",
|
|
84
|
+
"RandomAgent",
|
|
85
|
+
# Middleware
|
|
86
|
+
"Memory",
|
|
87
|
+
"ReflectionEngine",
|
|
88
|
+
"SafetyMonitor",
|
|
89
|
+
# Adapters
|
|
90
|
+
"GymnasiumAdapter",
|
|
91
|
+
# Persistence
|
|
92
|
+
"MemoryManager",
|
|
93
|
+
"Leaderboard",
|
|
94
|
+
# Fine-tuning
|
|
95
|
+
"EpisodeRecorder",
|
|
96
|
+
# Multi-agent
|
|
97
|
+
"MultiAgentEnv",
|
|
98
|
+
"DebateEnv",
|
|
99
|
+
]
|