simula 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
simula-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Peter Ofovik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,8 @@
1
+ include README.md
2
+ include LICENSE
3
+ include PLAN.md
4
+ include PRINCIPLES.md
5
+ include simula.toml.example
6
+ recursive-include schemas *.json
7
+ recursive-include grammars *.gbnf
8
+ include simula/py.typed
simula-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: simula
3
+ Version: 0.1.0
4
+ Summary: Lokalno-prvi pogon za sazdavanje i naseljavanje svetova i persona iz korisnikovih materijala.
5
+ Author-email: Peter Ofovik <pedjaurosevic@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/pedjaurosevic/simula
8
+ Project-URL: Repository, https://github.com/pedjaurosevic/simula
9
+ Keywords: llm,simulation,worldbuilding,persona,local-first,llama.cpp,gbnf
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Operating System :: OS Independent
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: platformdirs>=4.0
23
+ Provides-Extra: openai
24
+ Requires-Dist: requests>=2.28; extra == "openai"
25
+ Provides-Extra: embeddings
26
+ Requires-Dist: sentence-transformers>=2.2; extra == "embeddings"
27
+ Dynamic: license-file
28
+
29
+ # simula
30
+
31
+ **A local-first engine for generating and inhabiting worlds and personas from your own materials.**
32
+
33
+ One engine, two blueprint types (`world` | `persona`), one unified entity model (`Simulacrum`).
34
+ Local-first (llama.cpp + GBNF for hard-constrained output), but always able to run against any
35
+ OpenAI-compatible endpoint.
36
+
37
+ > **Status:** early alpha (Phase 0). The core is still a skeleton — see [`PLAN.md`](PLAN.md) for the
38
+ > implementation phases and [`PRINCIPLES.md`](PRINCIPLES.md) for the empirically derived lessons
39
+ > that drive the design.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install simula
45
+ ```
46
+
47
+ ## Quick start
48
+
49
+ ```bash
50
+ simula --version
51
+ simula init # create a workspace (materials/ blueprints/ saves/ evals/)
52
+ simula where # print the workspace path
53
+ ```
54
+
55
+ The workspace lives at a platform-appropriate path (via `platformdirs`), falling back to
56
+ `~/simula-workspace`. **No corpus is ever shipped** — you bring your own materials.
57
+
58
+ ## Configuration
59
+
60
+ Copy `simula.toml.example` into your workspace as `simula.toml` and edit the backend (llama.cpp or
61
+ OpenAI-compatible), embeddings, RAG, and experience mode (`world` | `persona`).
62
+
63
+ ## Design in brief
64
+
65
+ - **Constrained output is the reliability backbone:** GBNF on llama.cpp's `/completion`,
66
+ `json_schema` on OpenAI-compatible backends, with a parse-and-repair fallback.
67
+ - **Minimal prompt:** a commit directive + the blueprint spine + pointers into your materials (RAG),
68
+ not a large ontology.
69
+ - **Local-first and private:** embeddings and generation can stay on your own machine.
70
+ - **The engine holds the truth:** the LLM only *proposes* structured changes; the engine validates
71
+ and applies them against authoritative state.
72
+
73
+ ## Documentation
74
+
75
+ Full docs: **https://pedjaurosevic.github.io/simula/**
76
+
77
+ ## License
78
+
79
+ MIT — see [LICENSE](LICENSE).
simula-0.1.0/PLAN.md ADDED
@@ -0,0 +1,275 @@
1
+ # simula — PLAN
2
+
3
+ > A local-first engine for generating and inhabiting fashioned worlds and personas from the
4
+ > user's own materials. One engine, two blueprint types, one unified entity model.
5
+ > The name carries a question, not a claim: is a fashioned mind/world real? (PKD)
6
+
7
+ This document is the design plan. Code, schemas, and config are all in English (portability, GitHub).
8
+
9
+ ---
10
+
11
+ ## 0. What simula is (and is NOT)
12
+
13
+ **It is:** a thin harness that (1) takes the user's materials (books, texts), (2) distills a
14
+ *blueprint* from them (the spine of a world or a persona — mostly pointers into the material plus a
15
+ tiny summary), and (3) runs an interactive, stateful, memory-bearing experience in which the LLM
16
+ **proposes** structured changes while the engine **holds the truth**.
17
+
18
+ **It is NOT:** a world generator "out of thin air," an elaborate cognitive architecture, or a Zork
19
+ fork. Zork is a fixed authored world; we build a world *from a corpus*. (See `PRINCIPLES.md` for
20
+ why — these are lessons from our experiment series, not style.)
21
+
22
+ Two implementations over the same core:
23
+ - **simula worldbuilding** — a TUI/web game; the user walks through a world generated from their
24
+ books. The founding idea; we build it first.
25
+ - **simula persona** — the same concept applied to creating a persona (Big Five / OCEAN substrate
26
+ via IPIP, public domain × the user's material). Second phase.
27
+
28
+ A key idea: a persona created in *persona* mode can live inside a world from *worldbuilding* mode.
29
+ This is NOT an add-on — it is exactly what the unified entity model exists for (section 4).
30
+
31
+ ---
32
+
33
+ ## 1. The unified entity model — the heart of the design
34
+
35
+ Everything in simula is a **simulacrum**:
36
+
37
+ ```
38
+ Simulacrum = (Blueprint, State, Memory, Contract)
39
+ ```
40
+
41
+ - **Blueprint** — the distilled spine: WHAT the entity is. Mostly pointers into materials plus a
42
+ short summary. NOT a large ontology (the B'=B lesson: ontology is decoration; grounding on the
43
+ text carries the consequence).
44
+ - **State** — the deterministic source of truth held by the *engine*, not the model.
45
+ - **Memory** — short-term (transcript window) + long-term (fact ledger, history) + search (RAG)
46
+ over both. This is the "remember."
47
+ - **Contract** — the grammar/schema the LLM MUST emit to propose a delta (narration + state change).
48
+ Structure at the boundary, free reasoning inside.
49
+
50
+ Everything follows from this:
51
+ - **World** = a simulacrum whose state is the *environment* (places, objects, NPCs, time), and the
52
+ loop is exploration.
53
+ - **Persona** = a simulacrum whose state is the *agent* (mood, knowledge, relationship, goals), and
54
+ the loop is conversation.
55
+ - **NPC** = a persona-simulacrum *embedded* in a world-simulacrum. When the player talks to it, the
56
+ world loop delegates that entity's turn to the persona loop, then returns the result as a world
57
+ delta.
58
+
59
+ That is why persona-in-world is a **composition of simulacra**, not new machinery. You build one
60
+ entity abstraction and two blueprint schemas; the crossover is free (but *ships* last — section 4).
61
+
62
+ ---
63
+
64
+ ## 2. Layers
65
+
66
+ ```
67
+ ┌─────────────────────────────────────────────────────────┐
68
+ │ Clients (thin): TUI (Textual) | Web UI (FastAPI) │
69
+ ├─────────────────────────────────────────────────────────┤
70
+ │ simula-core (library) │
71
+ │ • Backend abstraction (llama.cpp local | OpenAI-compat)│
72
+ │ • Constrained output (GBNF local | json_schema/tools) │
73
+ │ • RAG (sqlite-vec + FTS5, e5-small embeddings) │
74
+ │ • State engine (sqlite, source of truth) + Fact ledger │
75
+ │ • Memory (short/long-term + search) │
76
+ │ • Turn loop (ORORO-minimal) │
77
+ │ • Eval rig (style-fidelity, drift, commit-rate) │
78
+ ├─────────────────────────────────────────────────────────┤
79
+ │ Blueprint layer: World blueprint | Persona blueprint │
80
+ ├─────────────────────────────────────────────────────────┤
81
+ │ Workspace: ~/simula-workspace/ (sqlite, materials) │
82
+ └─────────────────────────────────────────────────────────┘
83
+ ```
84
+
85
+ Clients are thin — all logic is in `simula-core`. TUI and web are two skins over the same core.
86
+
87
+ ---
88
+
89
+ ## 3. Turn loop (ORORO-minimal)
90
+
91
+ One turn, identical for world and persona (the difference is only the blueprint and the delta
92
+ schema):
93
+
94
+ 1. **Observe** — take the user's input.
95
+ 2. **Retrieve** — RAG fetches relevant passages from the materials + relevant facts from the ledger
96
+ (grounding against *narrative* drift, not just bookkeeping drift).
97
+ 3. **React** — assemble a MINIMAL prompt: commit directive + blueprint spine + retrieved exemplars
98
+ + current state + transcript window + user input.
99
+ 4. **Constrain** — call the backend with a contract (GBNF locally / json_schema on OpenAI-compat).
100
+ Output = `{ narration, deltas[] }`, guaranteed parsable.
101
+ 5. **Validate & apply** — the engine checks deltas against state and the ledger (reject invalid ones
102
+ — e.g., taking an object that isn't there), applies the valid ones, writes to the ledger.
103
+ 6. **Persist & render** — save state, render the narration.
104
+
105
+ The commit directive is the most valuable part of the prompt (lesson from the tests: the model's
106
+ failure mode is hedging → generic mush). Roughly: *"Commit to a concrete, tangible detail rooted in
107
+ the texture of this world/persona. Never retreat into generic fantasy or vagueness."*
108
+
109
+ ---
110
+
111
+ ## 4. Persona-in-world — my view
112
+
113
+ The idea is good and not peripheral: it is the *reason* the entity model is unified. If both world
114
+ and persona are "simulacrum = (blueprint, state, memory, contract)," then a persona in a world is
115
+ just a rich NPC — the world loop delegates its turn to the persona loop. The abstraction allows it
116
+ from day one, so we should design it *in* immediately (uniform entities), even before we use it.
117
+
118
+ But honestly about the cost, so we don't build it too early:
119
+ - Two LLM-driven entities, each with their own state, mean *two* calls per turn (latency) and
120
+ *multiplied* drift risk — the persona must stay true to itself AND fit the world's tone.
121
+ - That is the hardest coherence case. The "measure where it fails" lesson: don't build the hardest
122
+ thing first.
123
+
124
+ So: **the architecture allows it from the start, but it ships last** (Phase 6), only once the eval
125
+ rig shows that the drift of a *single* entity is under control. Love the idea, defer the execution.
126
+
127
+ ---
128
+
129
+ ## 5. Backend abstraction (local-first, but always OpenAI-compat)
130
+
131
+ One interface, two adapters (see `simula/backends.py`):
132
+
133
+ ```
134
+ complete(messages, *, contract=None, temperature, max_tokens) -> str
135
+ ```
136
+
137
+ - **LlamaCppBackend** (primary): HTTP to a local server (:18083). Constrained output via a
138
+ **GBNF grammar** (the `grammar` field) — a guarantee of valid JSON at decode time. Embeddings
139
+ local (e5-small). This is the default.
140
+ - **OpenAICompatBackend**: any OpenAI-compatible endpoint + key + model. Constrained output via
141
+ `response_format: json_schema` or tool-calling; a parse-and-repair fallback loop if the model
142
+ doesn't support it. Embeddings: either their endpoint or still local e5 (recommended: local e5,
143
+ to avoid coupling).
144
+
145
+ The backend choice lives in `simula.toml` in the workspace. The "Contract" is the structured-output
146
+ abstraction implemented differently per backend — it is the reliability backbone.
147
+
148
+ ---
149
+
150
+ ## 6. Workspace (installed on the user's machine)
151
+
152
+ Location via `platformdirs` (cross-platform), default `~/simula-workspace/`:
153
+
154
+ ```
155
+ simula-workspace/
156
+ simula.toml # config: backend, endpoint, model, key, experience mode
157
+ materials/ # the user's books/texts (theirs, local)
158
+ library.sqlite # RAG (sqlite-vec + FTS5) + state + memory + ledger
159
+ blueprints/ # distilled world/persona blueprints (JSON)
160
+ saves/ # experience snapshots / transcripts
161
+ evals/ # eval rig results
162
+ ```
163
+
164
+ The user adds books through the TUI or web (which copies them into `materials/` and runs the
165
+ ingest). Everything is inspectable and portable. **We do not ship a corpus** — the user brings
166
+ their own (section 9).
167
+
168
+ ---
169
+
170
+ ## 7. Cross-platform
171
+
172
+ The core is Python (Linux/Mac/Windows). Rules:
173
+ - No Linux-only dependencies; `pathlib` everywhere, no bash assumptions in the core.
174
+ - TUI: **Textual** (modern, cross-platform). Web: **FastAPI** + a thin frontend.
175
+ - sqlite-vec works cross-platform; e5-small via `sentence-transformers`/`llama.cpp` embeddings.
176
+ - The llama.cpp backend is just HTTP to a server the user runs (or uses OpenAI-compat), so the core
177
+ doesn't depend on the server's platform.
178
+ - Workspace locations via `platformdirs`.
179
+
180
+ You develop on Linux Mint; CI tests Win/Mac too (a matrix) before every release.
181
+
182
+ ---
183
+
184
+ ## 8. Reliability: against drift
185
+
186
+ Two levels of drift, two cures:
187
+ - **Bookkeeping** (inventory, location): state outside the model + constrained deltas + validation
188
+ in the engine. The model proposes, the engine adjudicates.
189
+ - **Narrative/tonal/factual** (forgets an established fact, betrays the tone, contradicts itself):
190
+ RAG grounding on the materials every turn + a **fact ledger** (a running record of established
191
+ facts) that is both retrieved and used to validate contradictions.
192
+
193
+ The second level is the real hard problem (the "measure where it fails" lesson). simula's
194
+ engineering value lives there, not in the beauty of a single turn.
195
+
196
+ ---
197
+
198
+ ## 9. Copyright / IP (important for a public GitHub)
199
+
200
+ - We ship the **engine, never the corpus**. The user brings their own books into `materials/`.
201
+ - The blueprint extracts *texture* (tone, motifs, structure, lexicon as pointers), it does not
202
+ reproduce text. Add a guard against long verbatim passages in the narration.
203
+ - Result: a world *in the texture* of some author, not a transcript. Legal hygiene and better art.
204
+
205
+ ---
206
+
207
+ ## 10. Eval rig — the backbone, not an afterthought
208
+
209
+ Repurposes our apparatus from the test series (conditions, ablation, pre-registration). It measures:
210
+ - **style-fidelity** — embedding distance of the output to the corpus (does it sound like the
211
+ world/persona).
212
+ - **drift** — the number of contradictions of the output against the fact ledger across N turns.
213
+ - **commit-rate** — the fraction of turns with a concrete, anchored detail vs generic mush
214
+ (anti-mush; measures whether the commit directive works).
215
+
216
+ Every prompt/RAG/backend change passes a fast ablation before adoption. Every phase has an eval gate.
217
+
218
+ ---
219
+
220
+ ## 11. Persona blueprint — Big Five / OCEAN via IPIP (and NOT MBTI/16Personalities)
221
+
222
+ The persona substrate is **Big Five / OCEAN**, instantiated via **IPIP** items — which are in the
223
+ **public domain** (https://ipip.ori.org), free to copy, modify, translate, and use commercially,
224
+ without permission or fee. This is a deliberate choice, both legal and scientific:
225
+
226
+ - **Legally (important for a public GitHub release):** MBTI is a trademark of the Myers-Briggs
227
+ company and a licensed instrument; 16Personalities is NOT MBTI but the NERIS framework with its
228
+ own brand, descriptions, archetype names ("Advocate," etc.), and graphics — all their property.
229
+ We do not reproduce their text, type names, or brand. The typology itself (the idea of axes,
230
+ four-letter labels) is not copyrightable, but because we ship a public, commercially usable
231
+ engine, we do not rely on a "hobby/personal" exception — we choose a clean public-domain
232
+ substrate. (This is not legal advice; specifics vary for Serbia/EU vs US users of the repo.)
233
+ - **Scientifically:** OCEAN is empirically robust and falsifiable, unlike MBTI dichotomies. That
234
+ matches the ethos of the test series (PRINCIPLES.md): measurable and honest, not a clinical claim.
235
+
236
+ Mapping: five **continuous** axes (O, C, E, A, N in [0,1]) → behavioral tendencies, register/voice
237
+ (from the materials if the persona is "after" a corpus), values, mannerisms, an own "history" in
238
+ memory. If a discrete seed is needed for generative convenience, we **bucket the continuous scores
239
+ into our own archetypes with our own names and descriptions** (the `archetype` field, optional) —
240
+ never anyone else's names or text. See `schemas/persona_blueprint.schema.json` (`ocean` instead of
241
+ the old `lattice`).
242
+
243
+ ---
244
+
245
+ ## 12. Build phases (each with an eval gate)
246
+
247
+ - **Phase 0 — skeleton.** Workspace bootstrap, config, backend adapter (llama.cpp + openai),
248
+ contract abstraction, a "hello world" turn loop end-to-end on *empty* content. (First make it work
249
+ end-to-end on empty, then add pieces.)
250
+ - **Phase 1 — ingest + RAG.** Add books → chunk → embed → retrieve. Gate: retrieval relevance on
251
+ manual queries.
252
+ - **Phase 2 — worldbuilding (the founding idea).** World DISTILL (corpus → world blueprint) +
253
+ worldbuilding PLAY loop + STATE + fact ledger. TUI client. First playable world: **Kipple** (PKD).
254
+ - **Phase 3 — eval rig.** style-fidelity, drift, commit-rate; ablate prompt/RAG choices. Gate:
255
+ drift under control across a long session.
256
+ - **Phase 4 — persona.** Persona DISTILL (OCEAN/IPIP substrate × material → persona blueprint) +
257
+ persona PLAY loop (conversation, consistency).
258
+ - **Phase 5 — web UI.** A thin client over the core (FastAPI). Adding books through the web.
259
+ - **Phase 6 — persona-in-world.** The uniform entity model pays off: a persona as a rich NPC. Only
260
+ once Phase 3 shows controlled drift for a single entity.
261
+
262
+ The rule across all phases: add a component only when ablation shows a delta or prevents a
263
+ regression. The thinnest harness that works, then growth by proof — the inverse of "970/1000 of
264
+ machinery richness."
265
+
266
+ ---
267
+
268
+ ## 13. Explicit non-goals (anti-inflation)
269
+
270
+ - No large world ontology in the prompt (pointers + spine instead).
271
+ - No rigid multi-step reasoning templates in the system prompt (rigidity hurts — see dim_masina in
272
+ `PRINCIPLES.md`).
273
+ - No self-modifying "super-exo" layer in v1 (defeasible heuristics later, if ablation asks).
274
+ - No LangChain or heavy abstractions — direct calls.
275
+ - No Z-machine/Zork fork — the wrong foundation for corpus→world.
@@ -0,0 +1,48 @@
1
+ # PRINCIPLES — lessons the test series earned
2
+
3
+ This file exists so that a future builder (human or agent) does not push the project back into
4
+ over-design. Everything below is *empirically derived* from a series of experiments on sign-type
5
+ discrimination on Gemma 12B (local, llama.cpp), not a stylistic preference. The details are in the
6
+ project history; here are the engineering consequences.
7
+
8
+ ## 1. The 12B model's failure mode is OVER-HEDGING, not false affirmation.
9
+ Without a frame, the model rejects even valid conclusions. Replicated across three versions
10
+ (license-acc bare: 0.00 → 0.14 → 0.44; always the lowest). Consequence: the most valuable part of
11
+ the prompt is the *commit directive* ("commit to the concrete, don't hedge without reason"). In
12
+ narration this translates to: a concrete, anchored detail instead of generic mush.
13
+
14
+ ## 2. The content of the frame is often irrelevant; the effect is "attention/de-hedging," not ontology.
15
+ B' = B = 1.00: empty hermeneutic jargon licenses just as well as naming the exact concepts. Two
16
+ full conditions maxed out every item. Consequence: **ontology/persona is decoration until ablation
17
+ proves otherwise.** Blueprint = pointers into the material + a tiny spine, not a large ontology.
18
+
19
+ ## 3. An elaborate procedure does not beat plain framing — and can trip you up.
20
+ Operational procedure C did not outperform priming, and in one case (dim_masina) its own rigid step
21
+ led it into an error that a freer model avoided. Consequence: no rigid reasoning templates in the
22
+ system prompt. Structure goes on the I/O boundary, not into the model's head.
23
+
24
+ ## 4. State and facts live OUTSIDE the model; the model proposes constrained deltas.
25
+ The only structure that worked everywhere and never hurt was forced, parsable output. Consequence:
26
+ GBNF (local) / json_schema (OpenAI-compat) for deltas; the engine is the source of truth.
27
+
28
+ ## 5. Drift is the real front. Measure where the model actually fails.
29
+ Ceiling-saturated metrics teach nothing (block-acc was 1.00 everywhere → a dead signal).
30
+ Consequence: don't spend measurement on what the model already passes; target long-horizon
31
+ coherence (narrative/factual drift), not the beauty of a single turn.
32
+
33
+ ## 6. The eval rig is the product; the skeleton/prompt is a consumable.
34
+ Every "richer is better" intuition fell as soon as a real control was added — three times.
35
+ Consequence: on 12B you don't trust prompt intuition; every change passes a differential eval
36
+ (condition A/B + ablation + a pre-registered threshold). It is a permanent part of the system, not a
37
+ one-off experiment.
38
+
39
+ ## 7. The lessons are scale-dependent.
40
+ On 4B, empty verbosity HURTS, and naming the types helps (the opposite of 12B). Consequence: if
41
+ simula falls back to a smaller model (fallback machines), the prompt strategy is NOT the same — more
42
+ skeleton for the smaller, less for the larger. Don't assume the 12B recipe works on 4B.
43
+
44
+ ## 8. Build the thinnest, grow by proof.
45
+ The strongest move of the whole series would be: start from the thinnest harness, add piece by piece
46
+ only when ablation shows a delta. This is the inverse of plans that score high on *machinery
47
+ richness* they cannot show actually works. Tokens and rigidity are paid for; the benefit must be
48
+ proven.
simula-0.1.0/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # simula
2
+
3
+ **A local-first engine for generating and inhabiting worlds and personas from your own materials.**
4
+
5
+ One engine, two blueprint types (`world` | `persona`), one unified entity model (`Simulacrum`).
6
+ Local-first (llama.cpp + GBNF for hard-constrained output), but always able to run against any
7
+ OpenAI-compatible endpoint.
8
+
9
+ > **Status:** early alpha (Phase 0). The core is still a skeleton — see [`PLAN.md`](PLAN.md) for the
10
+ > implementation phases and [`PRINCIPLES.md`](PRINCIPLES.md) for the empirically derived lessons
11
+ > that drive the design.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pip install simula
17
+ ```
18
+
19
+ ## Quick start
20
+
21
+ ```bash
22
+ simula --version
23
+ simula init # create a workspace (materials/ blueprints/ saves/ evals/)
24
+ simula where # print the workspace path
25
+ ```
26
+
27
+ The workspace lives at a platform-appropriate path (via `platformdirs`), falling back to
28
+ `~/simula-workspace`. **No corpus is ever shipped** — you bring your own materials.
29
+
30
+ ## Configuration
31
+
32
+ Copy `simula.toml.example` into your workspace as `simula.toml` and edit the backend (llama.cpp or
33
+ OpenAI-compatible), embeddings, RAG, and experience mode (`world` | `persona`).
34
+
35
+ ## Design in brief
36
+
37
+ - **Constrained output is the reliability backbone:** GBNF on llama.cpp's `/completion`,
38
+ `json_schema` on OpenAI-compatible backends, with a parse-and-repair fallback.
39
+ - **Minimal prompt:** a commit directive + the blueprint spine + pointers into your materials (RAG),
40
+ not a large ontology.
41
+ - **Local-first and private:** embeddings and generation can stay on your own machine.
42
+ - **The engine holds the truth:** the LLM only *proposes* structured changes; the engine validates
43
+ and applies them against authoritative state.
44
+
45
+ ## Documentation
46
+
47
+ Full docs: **https://pedjaurosevic.github.io/simula/**
48
+
49
+ ## License
50
+
51
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,31 @@
1
+ # turn_output.gbnf
2
+ # Hard-constrains llama.cpp output to a valid TurnOutput JSON object (see schemas/turn_output.schema.json).
3
+ # Pass via the `grammar` field of llama.cpp's native /completion endpoint.
4
+ # This is the reliability backbone on the local backend: valid structure is GUARANTEED at decode time,
5
+ # not hoped for. (PRINCIPLES.md #4)
6
+
7
+ root ::= obj
8
+ obj ::= "{" ws "\"narration\"" ws ":" ws string ws "," ws "\"deltas\"" ws ":" ws deltas ws "}"
9
+
10
+ deltas ::= "[" ws ( delta ( ws "," ws delta )* )? ws "]"
11
+ delta ::= "{" ws
12
+ "\"op\"" ws ":" ws op ws "," ws
13
+ "\"target\"" ws ":" ws string
14
+ ( ws "," ws "\"value\"" ws ":" ws value )?
15
+ ( ws "," ws "\"reason\"" ws ":" ws string )?
16
+ ws "}"
17
+
18
+ op ::= "\"set\"" | "\"add\"" | "\"remove\"" | "\"move\"" | "\"flag\"" | "\"fact\""
19
+
20
+ value ::= string | number | "true" | "false" | "null"
21
+
22
+ string ::= "\"" char* "\""
23
+ char ::= [^"\\] | "\\" ( ["\\/bfnrt] | "u" hex hex hex hex )
24
+ hex ::= [0-9a-fA-F]
25
+
26
+ number ::= "-"? int frac? exp?
27
+ int ::= "0" | [1-9] [0-9]*
28
+ frac ::= "." [0-9]+
29
+ exp ::= ("e" | "E") ("+" | "-")? [0-9]+
30
+
31
+ ws ::= [ \t\n\r]*
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "simula"
7
+ version = "0.1.0"
8
+ description = "Lokalno-prvi pogon za sazdavanje i naseljavanje svetova i persona iz korisnikovih materijala."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Peter Ofovik", email = "pedjaurosevic@gmail.com" }]
13
+ keywords = ["llm", "simulation", "worldbuilding", "persona", "local-first", "llama.cpp", "gbnf"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Operating System :: OS Independent",
24
+ ]
25
+ dependencies = [
26
+ "platformdirs>=4.0",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ openai = ["requests>=2.28"]
31
+ embeddings = ["sentence-transformers>=2.2"]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/pedjaurosevic/simula"
35
+ Repository = "https://github.com/pedjaurosevic/simula"
36
+
37
+ [project.scripts]
38
+ simula = "simula.__main__:main"
39
+
40
+ [tool.setuptools]
41
+ packages = ["simula"]
42
+
43
+ [tool.setuptools.package-data]
44
+ simula = ["py.typed"]
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "PersonaBlueprint",
4
+ "description": "Distilled spine of a persona. The trait substrate is Big Five / OCEAN, instantiated via public-domain IPIP item pools (https://ipip.ori.org, public domain). Used ONLY as a deterministic character-generation substrate, NOT as validated clinical psychology (see PLAN.md #11). No MBTI/16Personalities/NERIS marks, type names, or descriptions are used (legal hygiene for public release). Mostly pointers into materials + tiny spine, same discipline as WorldBlueprint.",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["id", "name", "ocean", "voice", "values"],
8
+ "properties": {
9
+ "id": { "type": "string" },
10
+ "name": { "type": "string" },
11
+ "source_note": { "type": "string" },
12
+ "ocean": {
13
+ "type": "object",
14
+ "additionalProperties": false,
15
+ "description": "Big Five / OCEAN as a generative substrate (IPIP, public domain). Each axis is a continuous position in [0,1], not a diagnosis. Continuous so the engine can derive tendencies; discretize into archetypes only if desired.",
16
+ "required": ["openness", "conscientiousness", "extraversion", "agreeableness", "neuroticism"],
17
+ "properties": {
18
+ "openness": { "type": "number", "minimum": 0, "maximum": 1 },
19
+ "conscientiousness": { "type": "number", "minimum": 0, "maximum": 1 },
20
+ "extraversion": { "type": "number", "minimum": 0, "maximum": 1 },
21
+ "agreeableness": { "type": "number", "minimum": 0, "maximum": 1 },
22
+ "neuroticism": { "type": "number", "minimum": 0, "maximum": 1 }
23
+ }
24
+ },
25
+ "archetype": {
26
+ "type": "object",
27
+ "additionalProperties": false,
28
+ "description": "OPTIONAL discrete seed for generative convenience: our OWN named archetype derived by bucketing the continuous OCEAN scores. Name and description MUST be original (never 'Advocate', 'Logician', etc. or any NERIS/MBTI text).",
29
+ "required": ["name", "summary"],
30
+ "properties": {
31
+ "name": { "type": "string", "description": "Original archetype label. Project-owned, not borrowed from any type system." },
32
+ "summary": { "type": "string", "description": "1-2 sentence original description of the archetype's flavor." }
33
+ }
34
+ },
35
+ "voice": {
36
+ "type": "object",
37
+ "additionalProperties": false,
38
+ "required": ["summary", "exemplar_refs"],
39
+ "properties": {
40
+ "summary": { "type": "string", "description": "Register, cadence, verbal tics in 1-3 sentences." },
41
+ "exemplar_refs": {
42
+ "type": "array",
43
+ "items": { "type": "string" },
44
+ "description": "RAG chunk ids exemplifying the voice (if persona is derived from a corpus). POINTERS only."
45
+ }
46
+ }
47
+ },
48
+ "values": {
49
+ "type": "array",
50
+ "items": { "type": "string" },
51
+ "description": "What the persona cares about / will defend or pursue."
52
+ },
53
+ "quirks": {
54
+ "type": "array",
55
+ "items": { "type": "string" },
56
+ "description": "Behavioral tendencies / mannerisms derived from the OCEAN substrate + materials."
57
+ },
58
+ "boundaries": {
59
+ "type": "array",
60
+ "items": { "type": "string" },
61
+ "description": "Consistency constraints the engine enforces (things the persona would not say/do)."
62
+ },
63
+ "history": {
64
+ "type": "string",
65
+ "description": "Backstory seed that seeds long-term memory at instantiation."
66
+ }
67
+ }
68
+ }
@@ -0,0 +1,42 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "TurnOutput",
4
+ "description": "What the LLM must emit each turn. The engine owns truth; the model only PROPOSES deltas. Validated and applied by the engine, never trusted blindly.",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["narration", "deltas"],
8
+ "properties": {
9
+ "narration": {
10
+ "type": "string",
11
+ "description": "Prose shown to the player. Must be concrete and grounded in blueprint texture; never generic mush (see commit-directive)."
12
+ },
13
+ "deltas": {
14
+ "type": "array",
15
+ "description": "Proposed state changes. Empty array is valid (pure description / refused action).",
16
+ "items": {
17
+ "type": "object",
18
+ "additionalProperties": false,
19
+ "required": ["op", "target"],
20
+ "properties": {
21
+ "op": {
22
+ "type": "string",
23
+ "enum": ["set", "add", "remove", "move", "flag", "fact"],
24
+ "description": "set: set a state field; add/remove: inventory or set membership; move: relocate entity; flag: boolean world flag; fact: append an established fact to the ledger."
25
+ },
26
+ "target": {
27
+ "type": "string",
28
+ "description": "Dotted path or entity id the op applies to, e.g. 'player.location', 'inventory', 'npc.rachel.mood'."
29
+ },
30
+ "value": {
31
+ "description": "New value / item / fact text. Type depends on op.",
32
+ "type": ["string", "number", "boolean", "null"]
33
+ },
34
+ "reason": {
35
+ "type": "string",
36
+ "description": "Optional short justification, grounded in retrieved material or ledger. Used by the engine for validation/audit."
37
+ }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
@@ -0,0 +1,71 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "WorldBlueprint",
4
+ "description": "Distilled spine of a world. Deliberately small: mostly POINTERS into the user's materials plus a tiny summary. Not a big ontology (see PRINCIPLES.md #2).",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": ["id", "title", "tone", "rules", "lexicon", "seeds"],
8
+ "properties": {
9
+ "id": { "type": "string" },
10
+ "title": { "type": "string" },
11
+ "source_note": {
12
+ "type": "string",
13
+ "description": "Short note on provenance. Engine never ships source text; this references the user's own materials."
14
+ },
15
+ "tone": {
16
+ "type": "object",
17
+ "additionalProperties": false,
18
+ "required": ["summary", "exemplar_refs"],
19
+ "properties": {
20
+ "summary": { "type": "string", "description": "1-3 sentences on register/mood." },
21
+ "exemplar_refs": {
22
+ "type": "array",
23
+ "items": { "type": "string" },
24
+ "description": "RAG chunk ids that exemplify the tone. POINTERS, not copied text."
25
+ }
26
+ }
27
+ },
28
+ "rules": {
29
+ "type": "array",
30
+ "items": { "type": "string" },
31
+ "description": "The world's 'physics'/conventions in a few terse lines (e.g. 'reality is unreliable; objects may be simulacra')."
32
+ },
33
+ "lexicon": {
34
+ "type": "array",
35
+ "items": { "type": "string" },
36
+ "description": "Signature vocabulary/coinages (e.g. 'kipple', 'kibble', 'empathy box')."
37
+ },
38
+ "seeds": {
39
+ "type": "object",
40
+ "additionalProperties": false,
41
+ "description": "Starting scaffolding: a few places/factions/archetypes as light seeds, each linking to material.",
42
+ "properties": {
43
+ "places": { "type": "array", "items": { "$ref": "#/$defs/seed" } },
44
+ "factions": { "type": "array", "items": { "$ref": "#/$defs/seed" } },
45
+ "archetypes": { "type": "array", "items": { "$ref": "#/$defs/seed" } }
46
+ }
47
+ },
48
+ "opening": {
49
+ "type": "object",
50
+ "additionalProperties": false,
51
+ "description": "Initial player state for a new save.",
52
+ "properties": {
53
+ "location": { "type": "string" },
54
+ "inventory": { "type": "array", "items": { "type": "string" } },
55
+ "intro": { "type": "string" }
56
+ }
57
+ }
58
+ },
59
+ "$defs": {
60
+ "seed": {
61
+ "type": "object",
62
+ "additionalProperties": false,
63
+ "required": ["name"],
64
+ "properties": {
65
+ "name": { "type": "string" },
66
+ "note": { "type": "string" },
67
+ "material_refs": { "type": "array", "items": { "type": "string" } }
68
+ }
69
+ }
70
+ }
71
+ }
simula-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,15 @@
1
+ """simula — a local-first engine for generating and inhabiting worlds and personas from the
2
+ user's own materials. See PLAN.md and PRINCIPLES.md.
3
+
4
+ One engine, two blueprint types (world | persona), one unified entity model (Simulacrum).
5
+ """
6
+ from .backends import Backend, Contract, Message, from_config
7
+ from .loop import Simulacrum, TurnResult, run_turn
8
+ from .workspace import bootstrap_workspace, default_workspace
9
+
10
+ __all__ = [
11
+ "Backend", "Contract", "Message", "from_config",
12
+ "Simulacrum", "TurnResult", "run_turn",
13
+ "bootstrap_workspace", "default_workspace",
14
+ ]
15
+ __version__ = "0.1.0"
@@ -0,0 +1,42 @@
1
+ """Minimal CLI entry point for `simula`.
2
+
3
+ Currently (Phase 0) it only exposes `version`, `init` (workspace bootstrap), and `where`. The
4
+ engine is still a skeleton; see PLAN.md for the implementation phases.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+
10
+ from . import __version__
11
+ from .workspace import bootstrap_workspace, default_workspace
12
+
13
+
14
+ def main(argv: list[str] | None = None) -> int:
15
+ parser = argparse.ArgumentParser(
16
+ prog="simula",
17
+ description="A local-first engine for generating and inhabiting worlds and personas.",
18
+ )
19
+ parser.add_argument("--version", action="version", version=f"simula {__version__}")
20
+ sub = parser.add_subparsers(dest="command")
21
+
22
+ p_init = sub.add_parser("init", help="Create the workspace folder tree.")
23
+ p_init.add_argument("path", nargs="?", default=None, help="Path (default: platform default).")
24
+
25
+ sub.add_parser("where", help="Print the default workspace path.")
26
+
27
+ args = parser.parse_args(argv)
28
+
29
+ if args.command == "init":
30
+ ws = bootstrap_workspace(args.path)
31
+ print(f"Workspace ready: {ws}")
32
+ return 0
33
+ if args.command == "where":
34
+ print(default_workspace())
35
+ return 0
36
+
37
+ parser.print_help()
38
+ return 0
39
+
40
+
41
+ if __name__ == "__main__":
42
+ raise SystemExit(main())
@@ -0,0 +1,109 @@
1
+ """Backend abstraction for simula.
2
+
3
+ One interface, two adapters. Local-first (llama.cpp + GBNF) but always able to run against
4
+ any OpenAI-compatible endpoint. "Constrained output" is the reliability backbone and is
5
+ implemented differently per backend (PLAN.md #5, PRINCIPLES.md #4).
6
+
7
+ This is a SKELETON: contracts + docstrings. Implement per build phases. stdlib only at the
8
+ contract level; concrete adapters may use `requests`/`httpx` and `sentence-transformers`.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Protocol, Sequence
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class Message:
19
+ role: str # "system" | "user" | "assistant"
20
+ content: str
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class Contract:
25
+ """How to constrain structured output. Backend chooses how to honor it.
26
+
27
+ Exactly one of `gbnf_path` / `json_schema` is the primary mechanism; backends fall back
28
+ to a parse-and-repair loop if neither is supported.
29
+ """
30
+ gbnf_path: Path | None = None # used by llama.cpp native /completion
31
+ json_schema: dict | None = None # used by OpenAI-compat (response_format/tools)
32
+
33
+
34
+ class Backend(Protocol):
35
+ """Text + embedding generation. Implementations MUST guarantee that, when a Contract is
36
+ given, the returned string parses against it (raising on irrecoverable failure)."""
37
+
38
+ def complete(
39
+ self,
40
+ messages: Sequence[Message],
41
+ *,
42
+ contract: Contract | None = None,
43
+ temperature: float = 0.2,
44
+ max_tokens: int = 800,
45
+ ) -> str:
46
+ ...
47
+
48
+ def embed(self, texts: Sequence[str]) -> list[list[float]]:
49
+ ...
50
+
51
+
52
+ class LlamaCppBackend:
53
+ """Default, local. Talks to a llama.cpp server (e.g. :18083).
54
+
55
+ Constrained output: prefer the native /completion endpoint with a `grammar` (GBNF) field,
56
+ which guarantees valid structure at decode time. Embeddings stay local (e5-small).
57
+ """
58
+
59
+ def __init__(self, endpoint: str, model: str, *, prefer_native_grammar: bool = True) -> None:
60
+ self.endpoint = endpoint
61
+ self.model = model
62
+ self.prefer_native_grammar = prefer_native_grammar
63
+
64
+ def complete(self, messages, *, contract=None, temperature=0.2, max_tokens=800) -> str:
65
+ # Phase 0: implement.
66
+ # - If contract.gbnf_path and prefer_native_grammar: POST /completion with
67
+ # {"prompt": render(messages), "grammar": gbnf_text, "temperature": ..., "n_predict": ...}
68
+ # (Gemma: fold system into the prompt; see PRINCIPLES.md note on system role.)
69
+ # - Else: POST /v1/chat/completions (no hard grammar; rely on repair loop).
70
+ raise NotImplementedError
71
+
72
+ def embed(self, texts) -> list[list[float]]:
73
+ # Phase 1: local e5-small (sentence-transformers) or llama.cpp /embedding.
74
+ raise NotImplementedError
75
+
76
+
77
+ class OpenAICompatBackend:
78
+ """Any OpenAI-compatible endpoint + key + model. Never store the key in config; read env."""
79
+
80
+ def __init__(self, base_url: str, api_key: str, model: str, *, structured_output: str = "json_schema") -> None:
81
+ self.base_url = base_url
82
+ self.api_key = api_key
83
+ self.model = model
84
+ self.structured_output = structured_output # "json_schema" | "tools" | "repair"
85
+
86
+ def complete(self, messages, *, contract=None, temperature=0.2, max_tokens=800) -> str:
87
+ # Phase 0: implement.
88
+ # - structured_output == "json_schema": pass response_format with contract.json_schema.
89
+ # - "tools": expose a single tool whose params == contract.json_schema; force tool_choice.
90
+ # - "repair": free generation + JSON extraction + one repair retry.
91
+ raise NotImplementedError
92
+
93
+ def embed(self, texts) -> list[list[float]]:
94
+ # Default to local e5 even here (decoupling); only use remote embeddings if configured.
95
+ raise NotImplementedError
96
+
97
+
98
+ def from_config(cfg: dict) -> Backend:
99
+ """Construct the backend from a parsed simula.toml dict. See simula.toml.example."""
100
+ kind = cfg["backend"]["kind"]
101
+ if kind == "llamacpp":
102
+ c = cfg["backend"]["llamacpp"]
103
+ return LlamaCppBackend(c["endpoint"], c["model"], prefer_native_grammar=c.get("prefer_native_grammar", True))
104
+ if kind == "openai_compat":
105
+ import os
106
+ c = cfg["backend"]["openai_compat"]
107
+ return OpenAICompatBackend(c["base_url"], os.environ[c["api_key_env"]], c["model"],
108
+ structured_output=c.get("structured_output", "json_schema"))
109
+ raise ValueError(f"unknown backend.kind: {kind}")
@@ -0,0 +1,74 @@
1
+ """The turn loop and the unified entity model.
2
+
3
+ Everything is a Simulacrum = (blueprint, state, memory, contract). A World is a simulacrum of
4
+ place; a Persona is a simulacrum of agent; an NPC is a Persona embedded in a World. The loop is
5
+ the same for both; only blueprint and applied-delta semantics differ. (PLAN.md #1, #3)
6
+
7
+ SKELETON: contracts + docstrings.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from .backends import Backend, Contract, Message
16
+
17
+
18
+ @dataclass
19
+ class Simulacrum:
20
+ """A world or a persona (or an NPC = persona-in-world)."""
21
+ id: str
22
+ kind: str # "world" | "persona"
23
+ blueprint: dict # validated against schemas/*.schema.json
24
+ state: dict = field(default_factory=dict) # engine-owned source of truth
25
+ # memory + ledger live in sqlite (library.sqlite); referenced by id, not held in RAM.
26
+
27
+
28
+ @dataclass
29
+ class TurnResult:
30
+ narration: str
31
+ applied: list[dict] # deltas the engine actually applied (after validation)
32
+ rejected: list[dict] # deltas rejected (invalid against state/ledger), for audit
33
+
34
+
35
+ COMMIT_DIRECTIVE = (
36
+ "Commit to a concrete, tangible detail rooted in the texture of this world/persona. "
37
+ "Never retreat into generic fantasy or vagueness. If an action is not feasible, say why, "
38
+ "concretely."
39
+ ) # The single highest-value prompt content (PRINCIPLES.md #1).
40
+
41
+
42
+ def run_turn(
43
+ sim: Simulacrum,
44
+ player_input: str,
45
+ backend: Backend,
46
+ *,
47
+ retrieve, # callable(query, top_k) -> list[chunk]
48
+ ledger, # fact ledger interface (read/append/contradicts)
49
+ transcript_window: list[Message],
50
+ contract: Contract,
51
+ temperature: float = 0.2,
52
+ max_tokens: int = 800,
53
+ ) -> TurnResult:
54
+ """One ORORO-minimal turn (PLAN.md #3):
55
+
56
+ 1. Observe - player_input is given.
57
+ 2. Retrieve - grounding from materials + relevant ledger facts.
58
+ 3. React - assemble a MINIMAL prompt (commit-directive + blueprint spine +
59
+ exemplars + current state + transcript window + input).
60
+ 4. Constrain - backend.complete(..., contract=contract) -> guaranteed-parsable TurnOutput.
61
+ 5. Validate - check each delta against state + ledger; apply valid, reject invalid.
62
+ 6. Persist - caller persists state + appends to ledger/transcript.
63
+
64
+ Returns narration + applied/rejected deltas. Does NOT mutate sqlite directly; the caller
65
+ persists (keeps this function pure-ish and testable for the eval rig).
66
+ """
67
+ raise NotImplementedError
68
+
69
+
70
+ def build_prompt(sim: Simulacrum, player_input: str, grounding: list, state: dict,
71
+ transcript_window: list[Message]) -> list[Message]:
72
+ """Assemble the minimal prompt. Keep it thin: spine + pointers-grounding, not a big ontology
73
+ (PRINCIPLES.md #2)."""
74
+ raise NotImplementedError
File without changes
@@ -0,0 +1,35 @@
1
+ """Workspace bootstrap. Cross-platform via platformdirs + pathlib (PLAN.md #6, #7).
2
+
3
+ Creates ~/simula-workspace (or platform default) with the standard layout, and never ships
4
+ any corpus: the user supplies their own materials (PLAN.md #9).
5
+
6
+ SKELETON.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ LAYOUT = ["materials", "blueprints", "saves", "evals"]
13
+
14
+
15
+ def default_workspace() -> Path:
16
+ """Platform-appropriate workspace path. Falls back to ~/simula-workspace."""
17
+ try:
18
+ import platformdirs
19
+ return Path(platformdirs.user_data_dir("simula")) / "workspace"
20
+ except Exception:
21
+ return Path.home() / "simula-workspace"
22
+
23
+
24
+ def bootstrap_workspace(path: Path | None = None) -> Path:
25
+ """Create the workspace folder tree and a starter config if missing. Returns the path."""
26
+ ws = path or default_workspace()
27
+ ws.mkdir(parents=True, exist_ok=True)
28
+ for sub in LAYOUT:
29
+ (ws / sub).mkdir(exist_ok=True)
30
+ cfg = ws / "simula.toml"
31
+ if not cfg.exists():
32
+ # Phase 0: copy simula.toml.example into place.
33
+ pass
34
+ # Phase 1: initialize library.sqlite (sqlite-vec + FTS5 tables).
35
+ return ws
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: simula
3
+ Version: 0.1.0
4
+ Summary: Lokalno-prvi pogon za sazdavanje i naseljavanje svetova i persona iz korisnikovih materijala.
5
+ Author-email: Peter Ofovik <pedjaurosevic@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/pedjaurosevic/simula
8
+ Project-URL: Repository, https://github.com/pedjaurosevic/simula
9
+ Keywords: llm,simulation,worldbuilding,persona,local-first,llama.cpp,gbnf
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Operating System :: OS Independent
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: platformdirs>=4.0
23
+ Provides-Extra: openai
24
+ Requires-Dist: requests>=2.28; extra == "openai"
25
+ Provides-Extra: embeddings
26
+ Requires-Dist: sentence-transformers>=2.2; extra == "embeddings"
27
+ Dynamic: license-file
28
+
29
+ # simula
30
+
31
+ **A local-first engine for generating and inhabiting worlds and personas from your own materials.**
32
+
33
+ One engine, two blueprint types (`world` | `persona`), one unified entity model (`Simulacrum`).
34
+ Local-first (llama.cpp + GBNF for hard-constrained output), but always able to run against any
35
+ OpenAI-compatible endpoint.
36
+
37
+ > **Status:** early alpha (Phase 0). The core is still a skeleton — see [`PLAN.md`](PLAN.md) for the
38
+ > implementation phases and [`PRINCIPLES.md`](PRINCIPLES.md) for the empirically derived lessons
39
+ > that drive the design.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install simula
45
+ ```
46
+
47
+ ## Quick start
48
+
49
+ ```bash
50
+ simula --version
51
+ simula init # create a workspace (materials/ blueprints/ saves/ evals/)
52
+ simula where # print the workspace path
53
+ ```
54
+
55
+ The workspace lives at a platform-appropriate path (via `platformdirs`), falling back to
56
+ `~/simula-workspace`. **No corpus is ever shipped** — you bring your own materials.
57
+
58
+ ## Configuration
59
+
60
+ Copy `simula.toml.example` into your workspace as `simula.toml` and edit the backend (llama.cpp or
61
+ OpenAI-compatible), embeddings, RAG, and experience mode (`world` | `persona`).
62
+
63
+ ## Design in brief
64
+
65
+ - **Constrained output is the reliability backbone:** GBNF on llama.cpp's `/completion`,
66
+ `json_schema` on OpenAI-compatible backends, with a parse-and-repair fallback.
67
+ - **Minimal prompt:** a commit directive + the blueprint spine + pointers into your materials (RAG),
68
+ not a large ontology.
69
+ - **Local-first and private:** embeddings and generation can stay on your own machine.
70
+ - **The engine holds the truth:** the LLM only *proposes* structured changes; the engine validates
71
+ and applies them against authoritative state.
72
+
73
+ ## Documentation
74
+
75
+ Full docs: **https://pedjaurosevic.github.io/simula/**
76
+
77
+ ## License
78
+
79
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,23 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ PLAN.md
4
+ PRINCIPLES.md
5
+ README.md
6
+ pyproject.toml
7
+ simula.toml.example
8
+ grammars/turn_output.gbnf
9
+ schemas/persona_blueprint.schema.json
10
+ schemas/turn_output.schema.json
11
+ schemas/world_blueprint.schema.json
12
+ simula/__init__.py
13
+ simula/__main__.py
14
+ simula/backends.py
15
+ simula/loop.py
16
+ simula/py.typed
17
+ simula/workspace.py
18
+ simula.egg-info/PKG-INFO
19
+ simula.egg-info/SOURCES.txt
20
+ simula.egg-info/dependency_links.txt
21
+ simula.egg-info/entry_points.txt
22
+ simula.egg-info/requires.txt
23
+ simula.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ simula = simula.__main__:main
@@ -0,0 +1,7 @@
1
+ platformdirs>=4.0
2
+
3
+ [embeddings]
4
+ sentence-transformers>=2.2
5
+
6
+ [openai]
7
+ requests>=2.28
@@ -0,0 +1 @@
1
+ simula
@@ -0,0 +1,44 @@
1
+ # simula.toml.example
2
+ # Copy to your workspace as simula.toml and edit.
3
+ # Local-first, but always able to run against any OpenAI-compatible endpoint.
4
+
5
+ [backend]
6
+ # "llamacpp" (default, local) | "openai_compat"
7
+ kind = "llamacpp"
8
+
9
+ [backend.llamacpp]
10
+ endpoint = "http://127.0.0.1:18083"
11
+ # llama.cpp serves an OpenAI-compatible /v1/chat/completions AND native /completion.
12
+ # Native /completion is preferred because it accepts a GBNF `grammar` for hard-constrained output.
13
+ model = "gemma-4-12b-it" # alias only; the loaded GGUF is what matters
14
+ prefer_native_grammar = true
15
+
16
+ [backend.openai_compat]
17
+ # Used when backend.kind = "openai_compat". Works with OpenAI, or any compatible server.
18
+ base_url = "https://api.openai.com/v1"
19
+ api_key_env = "OPENAI_API_KEY" # read from environment, never store the key here
20
+ model = "gpt-4o-mini"
21
+ # Structured output strategy for this backend: "json_schema" | "tools" | "repair"
22
+ structured_output = "json_schema"
23
+
24
+ [embeddings]
25
+ # Embeddings stay local by default to avoid coupling, even in openai_compat mode.
26
+ kind = "local_e5" # intfloat/multilingual-e5-small
27
+ # kind = "openai_compat" # alternatively, use the remote endpoint above
28
+
29
+ [generation]
30
+ temperature = 0.2 # low for consistency; tune via eval rig
31
+ max_tokens = 800
32
+
33
+ [experience]
34
+ # "world" | "persona"
35
+ mode = "world"
36
+ blueprint = "blueprints/kipple.world.json"
37
+
38
+ [rag]
39
+ top_k = 6
40
+ hybrid = true # sqlite-vec (dense) + FTS5 (lexical)
41
+
42
+ [workspace]
43
+ # Leave empty to use the platform default (~/simulacra-workspace via platformdirs).
44
+ path = ""