memla 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memla-0.1.0/PKG-INFO +171 -0
- memla-0.1.0/README.md +151 -0
- memla-0.1.0/memla.egg-info/PKG-INFO +171 -0
- memla-0.1.0/memla.egg-info/SOURCES.txt +62 -0
- memla-0.1.0/memla.egg-info/dependency_links.txt +1 -0
- memla-0.1.0/memla.egg-info/entry_points.txt +2 -0
- memla-0.1.0/memla.egg-info/requires.txt +12 -0
- memla-0.1.0/memla.egg-info/top_level.txt +1 -0
- memla-0.1.0/memory_system/__init__.py +2 -0
- memla-0.1.0/memory_system/adapters/__init__.py +7 -0
- memla-0.1.0/memory_system/adapters/ewc.py +201 -0
- memla-0.1.0/memory_system/adapters/gradient_pass.py +226 -0
- memla-0.1.0/memory_system/adapters/lora_manager.py +281 -0
- memla-0.1.0/memory_system/adapters/merge.py +400 -0
- memla-0.1.0/memory_system/cli.py +599 -0
- memla-0.1.0/memory_system/distillation/__init__.py +175 -0
- memla-0.1.0/memory_system/distillation/abstract_c2a_benchmark.py +264 -0
- memla-0.1.0/memory_system/distillation/acquisition_pack_builder.py +935 -0
- memla-0.1.0/memory_system/distillation/batch_runner.py +659 -0
- memla-0.1.0/memory_system/distillation/coding_compile_loop.py +645 -0
- memla-0.1.0/memory_system/distillation/coding_log.py +1267 -0
- memla-0.1.0/memory_system/distillation/coding_proxy.py +704 -0
- memla-0.1.0/memory_system/distillation/comparison_runner.py +295 -0
- memla-0.1.0/memory_system/distillation/compile_loop_benchmark.py +407 -0
- memla-0.1.0/memory_system/distillation/constraint_graph.py +3145 -0
- memla-0.1.0/memory_system/distillation/demo_runner.py +335 -0
- memla-0.1.0/memory_system/distillation/diligence_packet_builder.py +648 -0
- memla-0.1.0/memory_system/distillation/eval_harness.py +137 -0
- memla-0.1.0/memory_system/distillation/exporter.py +105 -0
- memla-0.1.0/memory_system/distillation/git_history_cases.py +469 -0
- memla-0.1.0/memory_system/distillation/hypothesis_benchmark.py +456 -0
- memla-0.1.0/memory_system/distillation/math_c2a_benchmark.py +2148 -0
- memla-0.1.0/memory_system/distillation/patch_execution_benchmark.py +2428 -0
- memla-0.1.0/memory_system/distillation/pitch_pack_builder.py +337 -0
- memla-0.1.0/memory_system/distillation/seed_runner.py +360 -0
- memla-0.1.0/memory_system/distillation/thesis_pack_builder.py +474 -0
- memla-0.1.0/memory_system/distillation/transfer_eval.py +208 -0
- memla-0.1.0/memory_system/distillation/workflow_planner.py +1419 -0
- memla-0.1.0/memory_system/distillation/workspace_capture.py +77 -0
- memla-0.1.0/memory_system/main.py +259 -0
- memla-0.1.0/memory_system/memory/__init__.py +2 -0
- memla-0.1.0/memory_system/memory/chunk_manager.py +1692 -0
- memla-0.1.0/memory_system/memory/consolidator.py +168 -0
- memla-0.1.0/memory_system/memory/episode_log.py +929 -0
- memla-0.1.0/memory_system/memory/lazy_import.py +220 -0
- memla-0.1.0/memory_system/memory/llm_extractor.py +75 -0
- memla-0.1.0/memory_system/middleware/__init__.py +2 -0
- memla-0.1.0/memory_system/middleware/context_builder.py +173 -0
- memla-0.1.0/memory_system/middleware/quality.py +170 -0
- memla-0.1.0/memory_system/middleware/ttt_layer.py +295 -0
- memla-0.1.0/memory_system/ollama_client.py +214 -0
- memla-0.1.0/memory_system/projection/__init__.py +7 -0
- memla-0.1.0/memory_system/projection/gradient_filter.py +243 -0
- memla-0.1.0/memory_system/reasoning/__init__.py +0 -0
- memla-0.1.0/memory_system/reasoning/trajectory.py +212 -0
- memla-0.1.0/memory_system/sync.py +189 -0
- memla-0.1.0/pyproject.toml +36 -0
- memla-0.1.0/setup.cfg +4 -0
- memla-0.1.0/tests/test_ollama_client.py +68 -0
- memla-0.1.0/tests/test_step13_coding_compile_loop.py +234 -0
- memla-0.1.0/tests/test_step14_compile_loop_benchmark.py +166 -0
- memla-0.1.0/tests/test_step15_patch_execution_benchmark.py +659 -0
- memla-0.1.0/tests/test_step16_math_c2a_benchmark.py +314 -0
- memla-0.1.0/tests/test_step17_memla_cli.py +258 -0
memla-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memla
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Memla CLI for bounded coding and math runtimes.
|
|
5
|
+
Author-email: Jackfarmer2328 <samatarsalahudeen@gmail.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: requests<3,>=2.31
|
|
9
|
+
Requires-Dist: torch>=2.1
|
|
10
|
+
Requires-Dist: transformers>=4.40
|
|
11
|
+
Requires-Dist: peft>=0.7
|
|
12
|
+
Requires-Dist: safetensors>=0.4
|
|
13
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
14
|
+
Requires-Dist: fastapi>=0.100
|
|
15
|
+
Requires-Dist: uvicorn>=0.20
|
|
16
|
+
Requires-Dist: fastmcp>=2.0
|
|
17
|
+
Requires-Dist: bitsandbytes>=0.41
|
|
18
|
+
Requires-Dist: accelerate>=0.25
|
|
19
|
+
Requires-Dist: sympy>=1.12
|
|
20
|
+
|
|
21
|
+
# Memla CLI
|
|
22
|
+
|
|
23
|
+
Memla is a bounded runtime that helps smaller local models make better technical decisions inside verifier-backed loops.
|
|
24
|
+
|
|
25
|
+
This public repo is the CLI-first version of Memla:
|
|
26
|
+
- local coding workflow planning and execution
|
|
27
|
+
- coding benchmarks for patch execution and compile/backtest loops
|
|
28
|
+
- bounded math benchmarks for decision-layer proof
|
|
29
|
+
- a small proof packet showing the current strongest results
|
|
30
|
+
|
|
31
|
+
It is intentionally narrower than the internal research repo.
|
|
32
|
+
|
|
33
|
+
## What is here
|
|
34
|
+
|
|
35
|
+
- `memla.py`
|
|
36
|
+
- top-level CLI entry point
|
|
37
|
+
- `memory_system/`
|
|
38
|
+
- the runtime, coding session, patch benchmark, compile benchmark, math benchmark, and pack builders
|
|
39
|
+
- `cases/`
|
|
40
|
+
- small bundled case files for quick benchmark runs
|
|
41
|
+
- `proof/`
|
|
42
|
+
- the current proof packet and key benchmark artifacts
|
|
43
|
+
- `tests/`
|
|
44
|
+
- focused tests for the coding, math, and CLI surfaces
|
|
45
|
+
|
|
46
|
+
## Current claim
|
|
47
|
+
|
|
48
|
+
The current public proof packet is in:
|
|
49
|
+
- `proof/current_pack/`
|
|
50
|
+
|
|
51
|
+
Current strongest bounded claim:
|
|
52
|
+
- on coding, `qwen3.5:9b + Memla` beat local `qwen2.5:32b` raw on execution outcome in the OAuth patch benchmark
|
|
53
|
+
- on math, `qwen3.5:4b + Memla` matched `qwen2.5:32b` raw on the bounded harder pack
|
|
54
|
+
- on ambiguous math decision states, Memla lifted both `4b` and `9b` to perfect choice accuracy on the tested slice
|
|
55
|
+
|
|
56
|
+
This is not a claim of universal model parity. It is a claim about bounded runtimes with verifiers.
|
|
57
|
+
|
|
58
|
+
## Quick start
|
|
59
|
+
|
|
60
|
+
Prerequisites:
|
|
61
|
+
- Python 3.11+
|
|
62
|
+
- Ollama running locally
|
|
63
|
+
- one or more local models already pulled
|
|
64
|
+
|
|
65
|
+
Install:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
py -3 -m pip install .
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Smoke-check the CLI:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
memla --help
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run a local environment check:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
memla doctor --repo-root . --model qwen3.5:9b
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Main commands
|
|
84
|
+
|
|
85
|
+
Build a workflow plan inside a repo:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
memla coding plan --prompt "Fix the auth regression" --repo-root .
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Run a bounded coding turn with optional verification:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
memla coding run --prompt "Repair the failing auth tests" --repo-root . --test-command "pytest -q"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Run the patch execution benchmark:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
memla coding benchmark-patch --pack path\\to\\git_history_case_pack.json --raw-model qwen2.5:32b --memla-model qwen3.5:9b
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Run the compile-loop benchmark:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
memla coding benchmark-compile --cases cases\\coding_eval_cases.jsonl --repo-root . --model qwen3.5:9b
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Run the bounded math benchmark:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
memla math benchmark --cases cases\\math_linear_c2a_v2_harder.jsonl --teacher-model qwen2.5:32b --student-models qwen3.5:4b qwen3.5:9b --executor-mode stepwise_rerank --teacher-trace-source hybrid
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Rebuild the proof packet:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
memla pack thesis --coding proof\\current_pack\\frozen\\coding_patch_execution_report.json --math-rerank proof\\current_pack\\frozen\\math_step_rerank_report.json --math-progress proof\\current_pack\\frozen\\math_progress_report.json
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Benchmark commands write report bundles under `./memla_reports/` by default.
|
|
122
|
+
|
|
123
|
+
Publish the current proof pack to the repo root for Vercel:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
memla pack publish-site --source proof\\current_pack --out-dir .
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Proof artifacts
|
|
130
|
+
|
|
131
|
+
Main public proof packet:
|
|
132
|
+
- `proof/current_pack/`
|
|
133
|
+
|
|
134
|
+
Supporting reports:
|
|
135
|
+
- `proof/reports/coding_oauth_patch/`
|
|
136
|
+
- `proof/reports/coding_fastapi_patch/`
|
|
137
|
+
- `proof/reports/coding_fastapi_compile/`
|
|
138
|
+
- `proof/reports/math_qwen32b_rerank/`
|
|
139
|
+
- `proof/reports/math_qwen32b_stepwise/`
|
|
140
|
+
|
|
141
|
+
## Tests
|
|
142
|
+
|
|
143
|
+
Focused verification:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
py -3 -m pytest -q tests\\test_step13_coding_compile_loop.py tests\\test_step14_compile_loop_benchmark.py tests\\test_step15_patch_execution_benchmark.py tests\\test_step16_math_c2a_benchmark.py tests\\test_step17_memla_cli.py
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Product direction
|
|
150
|
+
|
|
151
|
+
Memla is being packaged as:
|
|
152
|
+
- a local/private coding runtime for smaller models
|
|
153
|
+
- a CLI first, not a chat app first
|
|
154
|
+
- a verifier-backed system, not a prompt wrapper
|
|
155
|
+
|
|
156
|
+
The wedge is simple:
|
|
157
|
+
|
|
158
|
+
**make local 9b/14b/32b coding models more execution-capable than their raw form.**
|
|
159
|
+
|
|
160
|
+
## GitHub and Vercel
|
|
161
|
+
|
|
162
|
+
This repo is set up so GitHub and Vercel can work together cleanly:
|
|
163
|
+
- GitHub Actions runs the focused CLI/benchmark test suite on push and pull request
|
|
164
|
+
- `memla pack publish-site --source proof\\current_pack --out-dir .` syncs the current proof pack to the repo root
|
|
165
|
+
- the root `vercel.json` is written as a static deployment config so Vercel can deploy the proof site directly from the repo root
|
|
166
|
+
|
|
167
|
+
Practical flow:
|
|
168
|
+
1. update proof artifacts
|
|
169
|
+
2. run `memla pack publish-site`
|
|
170
|
+
3. push to GitHub
|
|
171
|
+
4. let Vercel redeploy from the latest commit
|
memla-0.1.0/README.md
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Memla CLI
|
|
2
|
+
|
|
3
|
+
Memla is a bounded runtime that helps smaller local models make better technical decisions inside verifier-backed loops.
|
|
4
|
+
|
|
5
|
+
This public repo is the CLI-first version of Memla:
|
|
6
|
+
- local coding workflow planning and execution
|
|
7
|
+
- coding benchmarks for patch execution and compile/backtest loops
|
|
8
|
+
- bounded math benchmarks for decision-layer proof
|
|
9
|
+
- a small proof packet showing the current strongest results
|
|
10
|
+
|
|
11
|
+
It is intentionally narrower than the internal research repo.
|
|
12
|
+
|
|
13
|
+
## What is here
|
|
14
|
+
|
|
15
|
+
- `memla.py`
|
|
16
|
+
- top-level CLI entry point
|
|
17
|
+
- `memory_system/`
|
|
18
|
+
- the runtime, coding session, patch benchmark, compile benchmark, math benchmark, and pack builders
|
|
19
|
+
- `cases/`
|
|
20
|
+
- small bundled case files for quick benchmark runs
|
|
21
|
+
- `proof/`
|
|
22
|
+
- the current proof packet and key benchmark artifacts
|
|
23
|
+
- `tests/`
|
|
24
|
+
- focused tests for the coding, math, and CLI surfaces
|
|
25
|
+
|
|
26
|
+
## Current claim
|
|
27
|
+
|
|
28
|
+
The current public proof packet is in:
|
|
29
|
+
- `proof/current_pack/`
|
|
30
|
+
|
|
31
|
+
Current strongest bounded claim:
|
|
32
|
+
- on coding, `qwen3.5:9b + Memla` beat local `qwen2.5:32b` raw on execution outcome in the OAuth patch benchmark
|
|
33
|
+
- on math, `qwen3.5:4b + Memla` matched `qwen2.5:32b` raw on the bounded harder pack
|
|
34
|
+
- on ambiguous math decision states, Memla lifted both `4b` and `9b` to perfect choice accuracy on the tested slice
|
|
35
|
+
|
|
36
|
+
This is not a claim of universal model parity. It is a claim about bounded runtimes with verifiers.
|
|
37
|
+
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
Prerequisites:
|
|
41
|
+
- Python 3.11+
|
|
42
|
+
- Ollama running locally
|
|
43
|
+
- one or more local models already pulled
|
|
44
|
+
|
|
45
|
+
Install:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
py -3 -m pip install .
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Smoke-check the CLI:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
memla --help
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Run a local environment check:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
memla doctor --repo-root . --model qwen3.5:9b
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Main commands
|
|
64
|
+
|
|
65
|
+
Build a workflow plan inside a repo:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
memla coding plan --prompt "Fix the auth regression" --repo-root .
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Run a bounded coding turn with optional verification:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
memla coding run --prompt "Repair the failing auth tests" --repo-root . --test-command "pytest -q"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run the patch execution benchmark:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
memla coding benchmark-patch --pack path\\to\\git_history_case_pack.json --raw-model qwen2.5:32b --memla-model qwen3.5:9b
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Run the compile-loop benchmark:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
memla coding benchmark-compile --cases cases\\coding_eval_cases.jsonl --repo-root . --model qwen3.5:9b
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Run the bounded math benchmark:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
memla math benchmark --cases cases\\math_linear_c2a_v2_harder.jsonl --teacher-model qwen2.5:32b --student-models qwen3.5:4b qwen3.5:9b --executor-mode stepwise_rerank --teacher-trace-source hybrid
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Rebuild the proof packet:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
memla pack thesis --coding proof\\current_pack\\frozen\\coding_patch_execution_report.json --math-rerank proof\\current_pack\\frozen\\math_step_rerank_report.json --math-progress proof\\current_pack\\frozen\\math_progress_report.json
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Benchmark commands write report bundles under `./memla_reports/` by default.
|
|
102
|
+
|
|
103
|
+
Publish the current proof pack to the repo root for Vercel:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
memla pack publish-site --source proof\\current_pack --out-dir .
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Proof artifacts
|
|
110
|
+
|
|
111
|
+
Main public proof packet:
|
|
112
|
+
- `proof/current_pack/`
|
|
113
|
+
|
|
114
|
+
Supporting reports:
|
|
115
|
+
- `proof/reports/coding_oauth_patch/`
|
|
116
|
+
- `proof/reports/coding_fastapi_patch/`
|
|
117
|
+
- `proof/reports/coding_fastapi_compile/`
|
|
118
|
+
- `proof/reports/math_qwen32b_rerank/`
|
|
119
|
+
- `proof/reports/math_qwen32b_stepwise/`
|
|
120
|
+
|
|
121
|
+
## Tests
|
|
122
|
+
|
|
123
|
+
Focused verification:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
py -3 -m pytest -q tests\\test_step13_coding_compile_loop.py tests\\test_step14_compile_loop_benchmark.py tests\\test_step15_patch_execution_benchmark.py tests\\test_step16_math_c2a_benchmark.py tests\\test_step17_memla_cli.py
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Product direction
|
|
130
|
+
|
|
131
|
+
Memla is being packaged as:
|
|
132
|
+
- a local/private coding runtime for smaller models
|
|
133
|
+
- a CLI first, not a chat app first
|
|
134
|
+
- a verifier-backed system, not a prompt wrapper
|
|
135
|
+
|
|
136
|
+
The wedge is simple:
|
|
137
|
+
|
|
138
|
+
**make local 9b/14b/32b coding models more execution-capable than their raw form.**
|
|
139
|
+
|
|
140
|
+
## GitHub and Vercel
|
|
141
|
+
|
|
142
|
+
This repo is set up so GitHub and Vercel can work together cleanly:
|
|
143
|
+
- GitHub Actions runs the focused CLI/benchmark test suite on push and pull request
|
|
144
|
+
- `memla pack publish-site --source proof\\current_pack --out-dir .` syncs the current proof pack to the repo root
|
|
145
|
+
- the root `vercel.json` is written as a static deployment config so Vercel can deploy the proof site directly from the repo root
|
|
146
|
+
|
|
147
|
+
Practical flow:
|
|
148
|
+
1. update proof artifacts
|
|
149
|
+
2. run `memla pack publish-site`
|
|
150
|
+
3. push to GitHub
|
|
151
|
+
4. let Vercel redeploy from the latest commit
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memla
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Memla CLI for bounded coding and math runtimes.
|
|
5
|
+
Author-email: Jackfarmer2328 <samatarsalahudeen@gmail.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: requests<3,>=2.31
|
|
9
|
+
Requires-Dist: torch>=2.1
|
|
10
|
+
Requires-Dist: transformers>=4.40
|
|
11
|
+
Requires-Dist: peft>=0.7
|
|
12
|
+
Requires-Dist: safetensors>=0.4
|
|
13
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
14
|
+
Requires-Dist: fastapi>=0.100
|
|
15
|
+
Requires-Dist: uvicorn>=0.20
|
|
16
|
+
Requires-Dist: fastmcp>=2.0
|
|
17
|
+
Requires-Dist: bitsandbytes>=0.41
|
|
18
|
+
Requires-Dist: accelerate>=0.25
|
|
19
|
+
Requires-Dist: sympy>=1.12
|
|
20
|
+
|
|
21
|
+
# Memla CLI
|
|
22
|
+
|
|
23
|
+
Memla is a bounded runtime that helps smaller local models make better technical decisions inside verifier-backed loops.
|
|
24
|
+
|
|
25
|
+
This public repo is the CLI-first version of Memla:
|
|
26
|
+
- local coding workflow planning and execution
|
|
27
|
+
- coding benchmarks for patch execution and compile/backtest loops
|
|
28
|
+
- bounded math benchmarks for decision-layer proof
|
|
29
|
+
- a small proof packet showing the current strongest results
|
|
30
|
+
|
|
31
|
+
It is intentionally narrower than the internal research repo.
|
|
32
|
+
|
|
33
|
+
## What is here
|
|
34
|
+
|
|
35
|
+
- `memla.py`
|
|
36
|
+
- top-level CLI entry point
|
|
37
|
+
- `memory_system/`
|
|
38
|
+
- the runtime, coding session, patch benchmark, compile benchmark, math benchmark, and pack builders
|
|
39
|
+
- `cases/`
|
|
40
|
+
- small bundled case files for quick benchmark runs
|
|
41
|
+
- `proof/`
|
|
42
|
+
- the current proof packet and key benchmark artifacts
|
|
43
|
+
- `tests/`
|
|
44
|
+
- focused tests for the coding, math, and CLI surfaces
|
|
45
|
+
|
|
46
|
+
## Current claim
|
|
47
|
+
|
|
48
|
+
The current public proof packet is in:
|
|
49
|
+
- `proof/current_pack/`
|
|
50
|
+
|
|
51
|
+
Current strongest bounded claim:
|
|
52
|
+
- on coding, `qwen3.5:9b + Memla` beat local `qwen2.5:32b` raw on execution outcome in the OAuth patch benchmark
|
|
53
|
+
- on math, `qwen3.5:4b + Memla` matched `qwen2.5:32b` raw on the bounded harder pack
|
|
54
|
+
- on ambiguous math decision states, Memla lifted both `4b` and `9b` to perfect choice accuracy on the tested slice
|
|
55
|
+
|
|
56
|
+
This is not a claim of universal model parity. It is a claim about bounded runtimes with verifiers.
|
|
57
|
+
|
|
58
|
+
## Quick start
|
|
59
|
+
|
|
60
|
+
Prerequisites:
|
|
61
|
+
- Python 3.11+
|
|
62
|
+
- Ollama running locally
|
|
63
|
+
- one or more local models already pulled
|
|
64
|
+
|
|
65
|
+
Install:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
py -3 -m pip install .
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Smoke-check the CLI:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
memla --help
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run a local environment check:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
memla doctor --repo-root . --model qwen3.5:9b
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Main commands
|
|
84
|
+
|
|
85
|
+
Build a workflow plan inside a repo:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
memla coding plan --prompt "Fix the auth regression" --repo-root .
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Run a bounded coding turn with optional verification:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
memla coding run --prompt "Repair the failing auth tests" --repo-root . --test-command "pytest -q"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Run the patch execution benchmark:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
memla coding benchmark-patch --pack path\\to\\git_history_case_pack.json --raw-model qwen2.5:32b --memla-model qwen3.5:9b
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Run the compile-loop benchmark:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
memla coding benchmark-compile --cases cases\\coding_eval_cases.jsonl --repo-root . --model qwen3.5:9b
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Run the bounded math benchmark:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
memla math benchmark --cases cases\\math_linear_c2a_v2_harder.jsonl --teacher-model qwen2.5:32b --student-models qwen3.5:4b qwen3.5:9b --executor-mode stepwise_rerank --teacher-trace-source hybrid
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Rebuild the proof packet:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
memla pack thesis --coding proof\\current_pack\\frozen\\coding_patch_execution_report.json --math-rerank proof\\current_pack\\frozen\\math_step_rerank_report.json --math-progress proof\\current_pack\\frozen\\math_progress_report.json
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Benchmark commands write report bundles under `./memla_reports/` by default.
|
|
122
|
+
|
|
123
|
+
Publish the current proof pack to the repo root for Vercel:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
memla pack publish-site --source proof\\current_pack --out-dir .
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Proof artifacts
|
|
130
|
+
|
|
131
|
+
Main public proof packet:
|
|
132
|
+
- `proof/current_pack/`
|
|
133
|
+
|
|
134
|
+
Supporting reports:
|
|
135
|
+
- `proof/reports/coding_oauth_patch/`
|
|
136
|
+
- `proof/reports/coding_fastapi_patch/`
|
|
137
|
+
- `proof/reports/coding_fastapi_compile/`
|
|
138
|
+
- `proof/reports/math_qwen32b_rerank/`
|
|
139
|
+
- `proof/reports/math_qwen32b_stepwise/`
|
|
140
|
+
|
|
141
|
+
## Tests
|
|
142
|
+
|
|
143
|
+
Focused verification:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
py -3 -m pytest -q tests\\test_step13_coding_compile_loop.py tests\\test_step14_compile_loop_benchmark.py tests\\test_step15_patch_execution_benchmark.py tests\\test_step16_math_c2a_benchmark.py tests\\test_step17_memla_cli.py
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Product direction
|
|
150
|
+
|
|
151
|
+
Memla is being packaged as:
|
|
152
|
+
- a local/private coding runtime for smaller models
|
|
153
|
+
- a CLI first, not a chat app first
|
|
154
|
+
- a verifier-backed system, not a prompt wrapper
|
|
155
|
+
|
|
156
|
+
The wedge is simple:
|
|
157
|
+
|
|
158
|
+
**make local 9b/14b/32b coding models more execution-capable than their raw form.**
|
|
159
|
+
|
|
160
|
+
## GitHub and Vercel
|
|
161
|
+
|
|
162
|
+
This repo is set up so GitHub and Vercel can work together cleanly:
|
|
163
|
+
- GitHub Actions runs the focused CLI/benchmark test suite on push and pull request
|
|
164
|
+
- `memla pack publish-site --source proof\\current_pack --out-dir .` syncs the current proof pack to the repo root
|
|
165
|
+
- the root `vercel.json` is written as a static deployment config so Vercel can deploy the proof site directly from the repo root
|
|
166
|
+
|
|
167
|
+
Practical flow:
|
|
168
|
+
1. update proof artifacts
|
|
169
|
+
2. run `memla pack publish-site`
|
|
170
|
+
3. push to GitHub
|
|
171
|
+
4. let Vercel redeploy from the latest commit
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
memla.egg-info/PKG-INFO
|
|
4
|
+
memla.egg-info/SOURCES.txt
|
|
5
|
+
memla.egg-info/dependency_links.txt
|
|
6
|
+
memla.egg-info/entry_points.txt
|
|
7
|
+
memla.egg-info/requires.txt
|
|
8
|
+
memla.egg-info/top_level.txt
|
|
9
|
+
memory_system/__init__.py
|
|
10
|
+
memory_system/cli.py
|
|
11
|
+
memory_system/main.py
|
|
12
|
+
memory_system/ollama_client.py
|
|
13
|
+
memory_system/sync.py
|
|
14
|
+
memory_system/adapters/__init__.py
|
|
15
|
+
memory_system/adapters/ewc.py
|
|
16
|
+
memory_system/adapters/gradient_pass.py
|
|
17
|
+
memory_system/adapters/lora_manager.py
|
|
18
|
+
memory_system/adapters/merge.py
|
|
19
|
+
memory_system/distillation/__init__.py
|
|
20
|
+
memory_system/distillation/abstract_c2a_benchmark.py
|
|
21
|
+
memory_system/distillation/acquisition_pack_builder.py
|
|
22
|
+
memory_system/distillation/batch_runner.py
|
|
23
|
+
memory_system/distillation/coding_compile_loop.py
|
|
24
|
+
memory_system/distillation/coding_log.py
|
|
25
|
+
memory_system/distillation/coding_proxy.py
|
|
26
|
+
memory_system/distillation/comparison_runner.py
|
|
27
|
+
memory_system/distillation/compile_loop_benchmark.py
|
|
28
|
+
memory_system/distillation/constraint_graph.py
|
|
29
|
+
memory_system/distillation/demo_runner.py
|
|
30
|
+
memory_system/distillation/diligence_packet_builder.py
|
|
31
|
+
memory_system/distillation/eval_harness.py
|
|
32
|
+
memory_system/distillation/exporter.py
|
|
33
|
+
memory_system/distillation/git_history_cases.py
|
|
34
|
+
memory_system/distillation/hypothesis_benchmark.py
|
|
35
|
+
memory_system/distillation/math_c2a_benchmark.py
|
|
36
|
+
memory_system/distillation/patch_execution_benchmark.py
|
|
37
|
+
memory_system/distillation/pitch_pack_builder.py
|
|
38
|
+
memory_system/distillation/seed_runner.py
|
|
39
|
+
memory_system/distillation/thesis_pack_builder.py
|
|
40
|
+
memory_system/distillation/transfer_eval.py
|
|
41
|
+
memory_system/distillation/workflow_planner.py
|
|
42
|
+
memory_system/distillation/workspace_capture.py
|
|
43
|
+
memory_system/memory/__init__.py
|
|
44
|
+
memory_system/memory/chunk_manager.py
|
|
45
|
+
memory_system/memory/consolidator.py
|
|
46
|
+
memory_system/memory/episode_log.py
|
|
47
|
+
memory_system/memory/lazy_import.py
|
|
48
|
+
memory_system/memory/llm_extractor.py
|
|
49
|
+
memory_system/middleware/__init__.py
|
|
50
|
+
memory_system/middleware/context_builder.py
|
|
51
|
+
memory_system/middleware/quality.py
|
|
52
|
+
memory_system/middleware/ttt_layer.py
|
|
53
|
+
memory_system/projection/__init__.py
|
|
54
|
+
memory_system/projection/gradient_filter.py
|
|
55
|
+
memory_system/reasoning/__init__.py
|
|
56
|
+
memory_system/reasoning/trajectory.py
|
|
57
|
+
tests/test_ollama_client.py
|
|
58
|
+
tests/test_step13_coding_compile_loop.py
|
|
59
|
+
tests/test_step14_compile_loop_benchmark.py
|
|
60
|
+
tests/test_step15_patch_execution_benchmark.py
|
|
61
|
+
tests/test_step16_math_c2a_benchmark.py
|
|
62
|
+
tests/test_step17_memla_cli.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
memory_system
|