mentar 0.1.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mentar-0.1.0.dev0/PKG-INFO +178 -0
- mentar-0.1.0.dev0/README.md +151 -0
- mentar-0.1.0.dev0/pyproject.toml +72 -0
- mentar-0.1.0.dev0/setup.cfg +4 -0
- mentar-0.1.0.dev0/src/mentar/__init__.py +6 -0
- mentar-0.1.0.dev0/src/mentar/cli/__init__.py +1 -0
- mentar-0.1.0.dev0/src/mentar/cli/__main__.py +62 -0
- mentar-0.1.0.dev0/src/mentar/db/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/db/store.py +416 -0
- mentar-0.1.0.dev0/src/mentar/dialogue/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/engine/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/engine/bkt.py +99 -0
- mentar-0.1.0.dev0/src/mentar/engine/fringe.py +104 -0
- mentar-0.1.0.dev0/src/mentar/engine/probe_classify.py +79 -0
- mentar-0.1.0.dev0/src/mentar/eval/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/eval/verify_numeric.py +619 -0
- mentar-0.1.0.dev0/src/mentar/grounding/__init__.py +65 -0
- mentar-0.1.0.dev0/src/mentar/grounding/cache.py +127 -0
- mentar-0.1.0.dev0/src/mentar/grounding/reader.py +271 -0
- mentar-0.1.0.dev0/src/mentar/grounding/resolve.py +125 -0
- mentar-0.1.0.dev0/src/mentar/grounding/source_map.py +120 -0
- mentar-0.1.0.dev0/src/mentar/grounding/sources.py +267 -0
- mentar-0.1.0.dev0/src/mentar/grounding/wrapper.py +50 -0
- mentar-0.1.0.dev0/src/mentar/inference/__init__.py +7 -0
- mentar-0.1.0.dev0/src/mentar/safety/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/safety/escalation.py +316 -0
- mentar-0.1.0.dev0/src/mentar/tools/__init__.py +4 -0
- mentar-0.1.0.dev0/src/mentar/tools/validate_template.py +322 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/PKG-INFO +178 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/SOURCES.txt +34 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/dependency_links.txt +1 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/entry_points.txt +2 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/requires.txt +15 -0
- mentar-0.1.0.dev0/src/mentar.egg-info/top_level.txt +1 -0
- mentar-0.1.0.dev0/tests/test_prompt_registry.py +131 -0
- mentar-0.1.0.dev0/tests/test_smoke.py +8 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mentar
|
|
3
|
+
Version: 0.1.0.dev0
|
|
4
|
+
Summary: OSS-first, local-first AI tutor for children — supplements school, never replaces it.
|
|
5
|
+
Author: Mentar maintainers
|
|
6
|
+
License: TBD
|
|
7
|
+
Keywords: tutor,education,kids,oss,local-llm,kst,bkt
|
|
8
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
9
|
+
Classifier: Intended Audience :: Education
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Topic :: Education
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: pyyaml>=6.0
|
|
16
|
+
Requires-Dist: pyBKT>=1.4
|
|
17
|
+
Requires-Dist: libzim>=3.10
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
21
|
+
Requires-Dist: hypothesis>=6.100; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
23
|
+
Provides-Extra: inference-ollama
|
|
24
|
+
Requires-Dist: ollama>=0.3; extra == "inference-ollama"
|
|
25
|
+
Provides-Extra: nas
|
|
26
|
+
Requires-Dist: smbprotocol>=1.12; extra == "nas"
|
|
27
|
+
|
|
28
|
+
# Mentar
|
|
29
|
+
|
|
30
|
+
**OSS-first AI tutor for children that supplements — never replaces — school education.**
|
|
31
|
+
|
|
32
|
+
Local LLM hosting. Curriculum-templated by country and year level. Built-in kid safety from day one.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## What it is
|
|
37
|
+
|
|
38
|
+
Mentar is an open-source tutoring framework that lets parents run an AI tutor on their own hardware, with no data leaving the device and no per-seat API fees. The core is three components:
|
|
39
|
+
|
|
40
|
+
- **Template engine** — Markdown curriculum files per country + year/grade level, used as learning guidelines. Community-extensible.
|
|
41
|
+
- **Dialogue framework** — Scaffolds tutoring conversations within the bounds of the active curriculum template.
|
|
42
|
+
- **Safety layer** — Content guardrails and age-mode logic baked in, not bolted on. This is the non-negotiable bar the project must clear to justify existing.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## How this is built — an honesty note
|
|
47
|
+
|
|
48
|
+
Mentar is, candidly, **AI-built software**. The great majority of the code, tests, and docs in this
|
|
49
|
+
repo are written by AI agents working under a human maintainer's direction, decisions, and review —
|
|
50
|
+
**not hand-written by a person**. In that sense it is close to "vibe coding," even though it follows
|
|
51
|
+
deliberate engineering discipline: spec-first design, test-driven development (150+ tests gating
|
|
52
|
+
changes), design docs before code, versioned prompts, and code review. Those principles raise the
|
|
53
|
+
quality bar — but they don't change that underlying fact, and we'd rather be upfront about it.
|
|
54
|
+
|
|
55
|
+
What this means for you:
|
|
56
|
+
|
|
57
|
+
- **The human makes the decisions** (scope, safety thresholds, model choices, architecture); the AI
|
|
58
|
+
executes and advises. Changes are test-gated and reviewed — but the author is AI.
|
|
59
|
+
- **It has not had a professional, independent audit.** In particular, the **child-safety** code and
|
|
60
|
+
spec are AI-authored and reviewed by AI plus the maintainer — *not* by a qualified safeguarding,
|
|
61
|
+
security, or child-development professional. The safety spec's own rollout guards
|
|
62
|
+
([`docs/SAFETY.md`](docs/SAFETY.md)) require that review **before** any use beyond a single,
|
|
63
|
+
supervised pilot.
|
|
64
|
+
- Treat the project accordingly: carefully built and openly documented, but **not yet independently
|
|
65
|
+
verified**. Read the code, run the tests, and do not put it in front of a real child outside a
|
|
66
|
+
supervised pilot until the open safety items are closed.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Why local-first
|
|
71
|
+
|
|
72
|
+
Two reasons:
|
|
73
|
+
|
|
74
|
+
1. **Privacy** — children's data never leaves the device. No operator collects it. This is also a major compliance advantage (see `compliance/`).
|
|
75
|
+
2. **Cost** — no per-seat API fees. A parent with a capable laptop or homelab machine pays nothing to run inference.
|
|
76
|
+
|
|
77
|
+
A paid hosted-inference tier (for non-technical parents) is a planned future bridge, but it carries its own heavier compliance obligations. The OSS local edition stays deliberately data-light by design.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Architecture
|
|
82
|
+
|
|
83
|
+
The codebase uses a Python **src-layout** (`src/mentar/`); specs and the safety spec live
|
|
84
|
+
under `docs/` (not in a top-level `safety/`). See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md)
|
|
85
|
+
for the authoritative layout.
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
mentar/
|
|
89
|
+
├── curriculum/ # Markdown curriculum templates (concept graphs)
|
|
90
|
+
│ ├── _template.md # Authoring format for new curricula
|
|
91
|
+
│ └── templates/
|
|
92
|
+
│ └── _pilot/ # Phase-0 fractions pilot graph (more to follow)
|
|
93
|
+
├── prompts/ # Versioned prompt templates + prompts/README.md registry (W6.2)
|
|
94
|
+
├── src/mentar/ # Python package (src-layout)
|
|
95
|
+
│ ├── engine/ # Concept graph (KST), BKT mastery, fringe, probe classifier
|
|
96
|
+
│ ├── dialogue/ # Turn-loop controller (session state machine)
|
|
97
|
+
│ ├── safety/ # Safety-layer implementation (escalation, filters)
|
|
98
|
+
│ ├── inference/ # LLM abstraction layer (swappable backends)
|
|
99
|
+
│ ├── eval/ # Deterministic verifiers + model-eval harness
|
|
100
|
+
│ ├── db/ # Local SQLite store (schema + access)
|
|
101
|
+
│ ├── tools/ # Template validator, etc.
|
|
102
|
+
│ └── cli/ # Command-line entry points
|
|
103
|
+
├── tests/ # Mirrors the src/ layout
|
|
104
|
+
├── docs/ # SPEC, PHASE0(+_STATUS), SAFETY, SESSION_FSM, ARCHITECTURE,
|
|
105
|
+
│ # TESTS, CONTENT_LICENSES, PILOT_CONSENT, design/, research/
|
|
106
|
+
├── compliance/ # Compliance coverage-status map (points back to docs/)
|
|
107
|
+
└── eval/ # Eval datasets/outputs (data is gitignored)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Curriculum templates
|
|
113
|
+
|
|
114
|
+
Templates are simple Markdown files that define what topics a child at a given country + year level should be learning. They are **guidelines**, not scripts — the dialogue framework uses them to keep sessions on-topic and age-appropriate.
|
|
115
|
+
|
|
116
|
+
Anyone can add a new country or year-level template. See `curriculum/_template.md` for the format.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Safety
|
|
121
|
+
|
|
122
|
+
Kid-safe content blocks and age-appropriate responses are non-negotiable and built in from the start. See [`docs/SAFETY.md`](docs/SAFETY.md) for the full 6-layer spec (implementation lives in `src/mentar/safety/`).
|
|
123
|
+
|
|
124
|
+
Key commitments:
|
|
125
|
+
- No dark patterns, no compulsive gamification mechanics (legal line under EU AI Act Article 5)
|
|
126
|
+
- No emotion recognition or mood inference
|
|
127
|
+
- Under-13: parent-mediated mode (parent in the loop, child never alone with AI)
|
|
128
|
+
- 13+: more independent with parental oversight available
|
|
129
|
+
- Hard block: model must never produce sexual content involving minors
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Compliance
|
|
134
|
+
|
|
135
|
+
The OSS local edition is data-light by design, which removes most direct developer exposure under COPPA, GDPR-K, and similar frameworks. However, obligations are real and documented.
|
|
136
|
+
|
|
137
|
+
See `compliance/README.md` for coverage status — what's mapped, what's incomplete, and where contributors can help.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## LLMs
|
|
142
|
+
|
|
143
|
+
Mentar is designed to work with smaller OSS models suited to educational dialogue. Low hallucination is critical for a children's tutor. The inference layer is abstracted so users can swap models.
|
|
144
|
+
|
|
145
|
+
Current evaluation status: see `docs/llm-compatibility.md`.
|
|
146
|
+
|
|
147
|
+
Hardware requirements: see `docs/hardware-requirements.md`.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Documentation
|
|
152
|
+
|
|
153
|
+
Full index: **[`docs/README.md`](docs/README.md)**. Highlights:
|
|
154
|
+
|
|
155
|
+
- **[Spec](docs/SPEC.md)** · **[Live status](docs/PHASE0_STATUS.md)** · **[Architecture](docs/ARCHITECTURE.md)**
|
|
156
|
+
- **[Safety spec](docs/SAFETY.md)** (6-layer, non-negotiable) · **[Pilot consent](docs/PILOT_CONSENT.md)**
|
|
157
|
+
- **[Session state machine](docs/SESSION_FSM.md)** · **[Test plan](docs/TESTS.md)**
|
|
158
|
+
- Model evaluation — **[results, plain-language](docs/EVAL_RESULTS.md)** · **[roster & plan](docs/MODEL.md)** · **[eval tooling](eval/README.md)**
|
|
159
|
+
- **[Content licences](docs/CONTENT_LICENSES.md)** · **[Compliance status](compliance/README.md)** · **[Config & grounding sources](config/README.md)**
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Contributing
|
|
164
|
+
|
|
165
|
+
- Add or improve a curriculum template under `curriculum/templates/` (see `curriculum/_template.md` for the format)
|
|
166
|
+
- Improve the safety spec in `docs/SAFETY.md`
|
|
167
|
+
- Fill compliance gaps flagged in `compliance/README.md`
|
|
168
|
+
- Test and document model compatibility in `docs/llm-compatibility.md`
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Status
|
|
173
|
+
|
|
174
|
+
Early-stage, in active **Phase 0** (a single-subject fractions pilot). The architecture,
|
|
175
|
+
safety spec, session state machine, learner data model, and core engine pieces (concept
|
|
176
|
+
graph, BKT mastery, verifiers, escalation, prompt registry) are in place; the end-to-end
|
|
177
|
+
dialogue loop and the local model evaluation are the next milestones. Live status tracker:
|
|
178
|
+
[`docs/PHASE0_STATUS.md`](docs/PHASE0_STATUS.md).
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Mentar
|
|
2
|
+
|
|
3
|
+
**OSS-first AI tutor for children that supplements — never replaces — school education.**
|
|
4
|
+
|
|
5
|
+
Local LLM hosting. Curriculum-templated by country and year level. Built-in kid safety from day one.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What it is
|
|
10
|
+
|
|
11
|
+
Mentar is an open-source tutoring framework that lets parents run an AI tutor on their own hardware, with no data leaving the device and no per-seat API fees. The core is three components:
|
|
12
|
+
|
|
13
|
+
- **Template engine** — Markdown curriculum files per country + year/grade level, used as learning guidelines. Community-extensible.
|
|
14
|
+
- **Dialogue framework** — Scaffolds tutoring conversations within the bounds of the active curriculum template.
|
|
15
|
+
- **Safety layer** — Content guardrails and age-mode logic baked in, not bolted on. This is the non-negotiable bar the project must clear to justify existing.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## How this is built — an honesty note
|
|
20
|
+
|
|
21
|
+
Mentar is, candidly, **AI-built software**. The great majority of the code, tests, and docs in this
|
|
22
|
+
repo are written by AI agents working under a human maintainer's direction, decisions, and review —
|
|
23
|
+
**not hand-written by a person**. In that sense it is close to "vibe coding," even though it follows
|
|
24
|
+
deliberate engineering discipline: spec-first design, test-driven development (150+ tests gating
|
|
25
|
+
changes), design docs before code, versioned prompts, and code review. Those principles raise the
|
|
26
|
+
quality bar — but they don't change that underlying fact, and we'd rather be upfront about it.
|
|
27
|
+
|
|
28
|
+
What this means for you:
|
|
29
|
+
|
|
30
|
+
- **The human makes the decisions** (scope, safety thresholds, model choices, architecture); the AI
|
|
31
|
+
executes and advises. Changes are test-gated and reviewed — but the author is AI.
|
|
32
|
+
- **It has not had a professional, independent audit.** In particular, the **child-safety** code and
|
|
33
|
+
spec are AI-authored and reviewed by AI plus the maintainer — *not* by a qualified safeguarding,
|
|
34
|
+
security, or child-development professional. The safety spec's own rollout guards
|
|
35
|
+
([`docs/SAFETY.md`](docs/SAFETY.md)) require that review **before** any use beyond a single,
|
|
36
|
+
supervised pilot.
|
|
37
|
+
- Treat the project accordingly: carefully built and openly documented, but **not yet independently
|
|
38
|
+
verified**. Read the code, run the tests, and do not put it in front of a real child outside a
|
|
39
|
+
supervised pilot until the open safety items are closed.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Why local-first
|
|
44
|
+
|
|
45
|
+
Two reasons:
|
|
46
|
+
|
|
47
|
+
1. **Privacy** — children's data never leaves the device. No operator collects it. This is also a major compliance advantage (see `compliance/`).
|
|
48
|
+
2. **Cost** — no per-seat API fees. A parent with a capable laptop or homelab machine pays nothing to run inference.
|
|
49
|
+
|
|
50
|
+
A paid hosted-inference tier (for non-technical parents) is a planned future bridge, but it carries its own heavier compliance obligations. The OSS local edition stays deliberately data-light by design.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Architecture
|
|
55
|
+
|
|
56
|
+
The codebase uses a Python **src-layout** (`src/mentar/`); specs and the safety spec live
|
|
57
|
+
under `docs/` (not in a top-level `safety/`). See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md)
|
|
58
|
+
for the authoritative layout.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
mentar/
|
|
62
|
+
├── curriculum/ # Markdown curriculum templates (concept graphs)
|
|
63
|
+
│ ├── _template.md # Authoring format for new curricula
|
|
64
|
+
│ └── templates/
|
|
65
|
+
│ └── _pilot/ # Phase-0 fractions pilot graph (more to follow)
|
|
66
|
+
├── prompts/ # Versioned prompt templates + prompts/README.md registry (W6.2)
|
|
67
|
+
├── src/mentar/ # Python package (src-layout)
|
|
68
|
+
│ ├── engine/ # Concept graph (KST), BKT mastery, fringe, probe classifier
|
|
69
|
+
│ ├── dialogue/ # Turn-loop controller (session state machine)
|
|
70
|
+
│ ├── safety/ # Safety-layer implementation (escalation, filters)
|
|
71
|
+
│ ├── inference/ # LLM abstraction layer (swappable backends)
|
|
72
|
+
│ ├── eval/ # Deterministic verifiers + model-eval harness
|
|
73
|
+
│ ├── db/ # Local SQLite store (schema + access)
|
|
74
|
+
│ ├── tools/ # Template validator, etc.
|
|
75
|
+
│ └── cli/ # Command-line entry points
|
|
76
|
+
├── tests/ # Mirrors the src/ layout
|
|
77
|
+
├── docs/ # SPEC, PHASE0(+_STATUS), SAFETY, SESSION_FSM, ARCHITECTURE,
|
|
78
|
+
│ # TESTS, CONTENT_LICENSES, PILOT_CONSENT, design/, research/
|
|
79
|
+
├── compliance/ # Compliance coverage-status map (points back to docs/)
|
|
80
|
+
└── eval/ # Eval datasets/outputs (data is gitignored)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Curriculum templates
|
|
86
|
+
|
|
87
|
+
Templates are simple Markdown files that define what topics a child at a given country + year level should be learning. They are **guidelines**, not scripts — the dialogue framework uses them to keep sessions on-topic and age-appropriate.
|
|
88
|
+
|
|
89
|
+
Anyone can add a new country or year-level template. See `curriculum/_template.md` for the format.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Safety
|
|
94
|
+
|
|
95
|
+
Kid-safe content blocks and age-appropriate responses are non-negotiable and built in from the start. See [`docs/SAFETY.md`](docs/SAFETY.md) for the full 6-layer spec (implementation lives in `src/mentar/safety/`).
|
|
96
|
+
|
|
97
|
+
Key commitments:
|
|
98
|
+
- No dark patterns, no compulsive gamification mechanics (legal line under EU AI Act Article 5)
|
|
99
|
+
- No emotion recognition or mood inference
|
|
100
|
+
- Under-13: parent-mediated mode (parent in the loop, child never alone with AI)
|
|
101
|
+
- 13+: more independent with parental oversight available
|
|
102
|
+
- Hard block: model must never produce sexual content involving minors
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Compliance
|
|
107
|
+
|
|
108
|
+
The OSS local edition is data-light by design, which removes most direct developer exposure under COPPA, GDPR-K, and similar frameworks. However, obligations are real and documented.
|
|
109
|
+
|
|
110
|
+
See `compliance/README.md` for coverage status — what's mapped, what's incomplete, and where contributors can help.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## LLMs
|
|
115
|
+
|
|
116
|
+
Mentar is designed to work with smaller OSS models suited to educational dialogue. Low hallucination is critical for a children's tutor. The inference layer is abstracted so users can swap models.
|
|
117
|
+
|
|
118
|
+
Current evaluation status: see `docs/llm-compatibility.md`.
|
|
119
|
+
|
|
120
|
+
Hardware requirements: see `docs/hardware-requirements.md`.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Documentation
|
|
125
|
+
|
|
126
|
+
Full index: **[`docs/README.md`](docs/README.md)**. Highlights:
|
|
127
|
+
|
|
128
|
+
- **[Spec](docs/SPEC.md)** · **[Live status](docs/PHASE0_STATUS.md)** · **[Architecture](docs/ARCHITECTURE.md)**
|
|
129
|
+
- **[Safety spec](docs/SAFETY.md)** (6-layer, non-negotiable) · **[Pilot consent](docs/PILOT_CONSENT.md)**
|
|
130
|
+
- **[Session state machine](docs/SESSION_FSM.md)** · **[Test plan](docs/TESTS.md)**
|
|
131
|
+
- Model evaluation — **[results, plain-language](docs/EVAL_RESULTS.md)** · **[roster & plan](docs/MODEL.md)** · **[eval tooling](eval/README.md)**
|
|
132
|
+
- **[Content licences](docs/CONTENT_LICENSES.md)** · **[Compliance status](compliance/README.md)** · **[Config & grounding sources](config/README.md)**
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Contributing
|
|
137
|
+
|
|
138
|
+
- Add or improve a curriculum template under `curriculum/templates/` (see `curriculum/_template.md` for the format)
|
|
139
|
+
- Improve the safety spec in `docs/SAFETY.md`
|
|
140
|
+
- Fill compliance gaps flagged in `compliance/README.md`
|
|
141
|
+
- Test and document model compatibility in `docs/llm-compatibility.md`
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Status
|
|
146
|
+
|
|
147
|
+
Early-stage, in active **Phase 0** (a single-subject fractions pilot). The architecture,
|
|
148
|
+
safety spec, session state machine, learner data model, and core engine pieces (concept
|
|
149
|
+
graph, BKT mastery, verifiers, escalation, prompt registry) are in place; the end-to-end
|
|
150
|
+
dialogue loop and the local model evaluation are the next milestones. Live status tracker:
|
|
151
|
+
[`docs/PHASE0_STATUS.md`](docs/PHASE0_STATUS.md).
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mentar"
|
|
7
|
+
version = "0.1.0.dev0"
|
|
8
|
+
description = "OSS-first, local-first AI tutor for children — supplements school, never replaces it."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "TBD" }
|
|
12
|
+
authors = [{ name = "Mentar maintainers" }]
|
|
13
|
+
keywords = ["tutor", "education", "kids", "oss", "local-llm", "kst", "bkt"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
16
|
+
"Intended Audience :: Education",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Topic :: Education",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
dependencies = [
|
|
23
|
+
"pyyaml>=6.0",
|
|
24
|
+
"pyBKT>=1.4",
|
|
25
|
+
"libzim>=3.10",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = [
|
|
30
|
+
"pytest>=8.0",
|
|
31
|
+
"pytest-cov>=4.1",
|
|
32
|
+
"hypothesis>=6.100",
|
|
33
|
+
"ruff>=0.5",
|
|
34
|
+
]
|
|
35
|
+
inference-ollama = [
|
|
36
|
+
"ollama>=0.3",
|
|
37
|
+
]
|
|
38
|
+
nas = [
|
|
39
|
+
# SMB/Samba client for reading/downloading ZIMs from a NAS via smb:// URLs.
|
|
40
|
+
# Not needed when the share is mounted in the OS (libzim reads the mount directly).
|
|
41
|
+
"smbprotocol>=1.12",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[project.scripts]
|
|
45
|
+
mentar = "mentar.cli.__main__:main"
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
where = ["src"]
|
|
49
|
+
include = ["mentar*"]
|
|
50
|
+
|
|
51
|
+
[tool.pytest.ini_options]
|
|
52
|
+
minversion = "8.0"
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
python_files = ["test_*.py"]
|
|
55
|
+
addopts = [
|
|
56
|
+
"-ra",
|
|
57
|
+
"--strict-markers",
|
|
58
|
+
"--strict-config",
|
|
59
|
+
]
|
|
60
|
+
markers = [
|
|
61
|
+
"llm: tests that require a live LLM backend (skipped on the build host; run on eval host)",
|
|
62
|
+
"slow: tests that take >5s",
|
|
63
|
+
"human_gated: tests that require a human action (consent, review, child session)",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
target-version = "py311"
|
|
68
|
+
line-length = 100
|
|
69
|
+
|
|
70
|
+
[tool.ruff.lint]
|
|
71
|
+
select = ["E", "F", "W", "I", "B", "UP"]
|
|
72
|
+
ignore = ["E501"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Mentar CLI entry: `mentar serve`, `mentar eval`, `mentar validate-template`."""
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""CLI entry point. Wired in pyproject.toml [project.scripts].
|
|
2
|
+
|
|
3
|
+
Subcommands:
|
|
4
|
+
serve — Start a pilot tutoring session (stub).
|
|
5
|
+
eval — Run the eval harness (stub).
|
|
6
|
+
validate-template — Validate a curriculum template against the W3.1 schema.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main(argv: list[str] | None = None) -> int:
|
|
16
|
+
parser = argparse.ArgumentParser(prog="mentar")
|
|
17
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
18
|
+
sub.add_parser("serve", help="Start a pilot tutoring session (stub).")
|
|
19
|
+
sub.add_parser("eval", help="Run the eval harness (stub).")
|
|
20
|
+
vt = sub.add_parser(
|
|
21
|
+
"validate-template",
|
|
22
|
+
help="Validate a curriculum template against the W3.1 schema.",
|
|
23
|
+
)
|
|
24
|
+
vt.add_argument("path", help="Path to curriculum template Markdown file.")
|
|
25
|
+
|
|
26
|
+
args = parser.parse_args(argv)
|
|
27
|
+
|
|
28
|
+
if args.cmd == "validate-template":
|
|
29
|
+
from mentar.tools.validate_template import validate
|
|
30
|
+
|
|
31
|
+
result = validate(args.path)
|
|
32
|
+
|
|
33
|
+
for w in result.warnings:
|
|
34
|
+
print(f"WARNING: {w}", file=sys.stderr)
|
|
35
|
+
|
|
36
|
+
for e in result.errors:
|
|
37
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
38
|
+
|
|
39
|
+
if result.ok:
|
|
40
|
+
n = len(result.concept_ids)
|
|
41
|
+
print(
|
|
42
|
+
f"OK: {args.path} — {n} concept(s); "
|
|
43
|
+
f"roots={result.roots}; leaves={result.leaves}",
|
|
44
|
+
file=sys.stdout,
|
|
45
|
+
)
|
|
46
|
+
if result.warnings:
|
|
47
|
+
print(f" {len(result.warnings)} warning(s) — see stderr.", file=sys.stdout)
|
|
48
|
+
else:
|
|
49
|
+
print(
|
|
50
|
+
f"FAIL: {args.path} — {len(result.errors)} error(s).",
|
|
51
|
+
file=sys.stdout,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return 0 if result.ok else 1
|
|
55
|
+
|
|
56
|
+
# stubs
|
|
57
|
+
print(f"mentar: '{args.cmd}' not implemented yet (stub).", file=sys.stderr)
|
|
58
|
+
return 1
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
raise SystemExit(main())
|