smallwords 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smallwords-0.1.0/LICENSE +21 -0
- smallwords-0.1.0/MANIFEST.in +11 -0
- smallwords-0.1.0/PKG-INFO +175 -0
- smallwords-0.1.0/README.md +138 -0
- smallwords-0.1.0/RELEASING.md +39 -0
- smallwords-0.1.0/docs/conf.py +39 -0
- smallwords-0.1.0/docs/index.rst +164 -0
- smallwords-0.1.0/examples/README.md +35 -0
- smallwords-0.1.0/examples/_shared.py +67 -0
- smallwords-0.1.0/examples/pirate_greeting.py +116 -0
- smallwords-0.1.0/examples/readme_bridge_contrast.py +124 -0
- smallwords-0.1.0/examples/rewrite_technical_passage.py +128 -0
- smallwords-0.1.0/pyproject.toml +73 -0
- smallwords-0.1.0/scripts/check_documentation.py +75 -0
- smallwords-0.1.0/setup.cfg +4 -0
- smallwords-0.1.0/src/smallwords/__init__.py +29 -0
- smallwords-0.1.0/src/smallwords/_constraints.py +184 -0
- smallwords-0.1.0/src/smallwords/_spec_utils.py +26 -0
- smallwords-0.1.0/src/smallwords/_variant_data.py +288 -0
- smallwords-0.1.0/src/smallwords/data/__init__.py +4 -0
- smallwords-0.1.0/src/smallwords/data/basic_english_850.txt +856 -0
- smallwords-0.1.0/src/smallwords/data/moby_freq_alpha_898.txt +904 -0
- smallwords-0.1.0/src/smallwords/data/special_english_1475.txt +1481 -0
- smallwords-0.1.0/src/smallwords/grammar_builder.py +80 -0
- smallwords-0.1.0/src/smallwords/input_words.py +59 -0
- smallwords-0.1.0/src/smallwords/json_schema.py +42 -0
- smallwords-0.1.0/src/smallwords/prompts.py +62 -0
- smallwords-0.1.0/src/smallwords/py.typed +1 -0
- smallwords-0.1.0/src/smallwords/remix.py +73 -0
- smallwords-0.1.0/src/smallwords/resources.py +80 -0
- smallwords-0.1.0/src/smallwords/themes/__init__.py +13 -0
- smallwords-0.1.0/src/smallwords/themes/caveman.py +73 -0
- smallwords-0.1.0/src/smallwords/themes/pirate.py +76 -0
- smallwords-0.1.0/src/smallwords/types.py +108 -0
- smallwords-0.1.0/src/smallwords/validation.py +61 -0
- smallwords-0.1.0/src/smallwords/variants.py +123 -0
- smallwords-0.1.0/src/smallwords/wordlists.py +124 -0
- smallwords-0.1.0/src/smallwords.egg-info/PKG-INFO +175 -0
- smallwords-0.1.0/src/smallwords.egg-info/SOURCES.txt +46 -0
- smallwords-0.1.0/src/smallwords.egg-info/dependency_links.txt +1 -0
- smallwords-0.1.0/src/smallwords.egg-info/requires.txt +9 -0
- smallwords-0.1.0/src/smallwords.egg-info/top_level.txt +1 -0
- smallwords-0.1.0/tests/test_basics.py +73 -0
- smallwords-0.1.0/tests/test_examples.py +45 -0
- smallwords-0.1.0/tests/test_package_surface.py +61 -0
- smallwords-0.1.0/tests/test_prompt_helpers.py +70 -0
- smallwords-0.1.0/tests/test_schema.py +145 -0
- smallwords-0.1.0/tests/test_wordlists.py +84 -0
smallwords-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Chris McComb
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: smallwords
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Controlled-vocabulary prompts plus portable GBNF and JSON Schema resources for small-word English generation.
|
|
5
|
+
Author: Christopher McComb
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/cmccomb/smallwords
|
|
8
|
+
Project-URL: Documentation, https://cmccomb.github.io/smallwords/
|
|
9
|
+
Project-URL: Repository, https://github.com/cmccomb/smallwords.git
|
|
10
|
+
Project-URL: Issues, https://github.com/cmccomb/smallwords/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/cmccomb/smallwords/releases
|
|
12
|
+
Keywords: grammar,gbnf,json-schema,structured-output,controlled-language,simple-english
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
30
|
+
Requires-Dist: pydata-sphinx-theme>=0.16; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=7.1; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff>=0.11; extra == "dev"
|
|
34
|
+
Requires-Dist: sphinx<9,>=8.1; extra == "dev"
|
|
35
|
+
Requires-Dist: twine>=6.1; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# smallwords
|
|
39
|
+
|
|
40
|
+
[](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml)
|
|
41
|
+
[](https://pypi.org/project/smallwords/)
|
|
42
|
+
[](https://pypi.org/project/smallwords/)
|
|
43
|
+
[](https://github.com/cmccomb/smallwords/blob/main/LICENSE)
|
|
44
|
+
[](https://cmccomb.github.io/smallwords/)
|
|
45
|
+
|
|
46
|
+
`smallwords` is a tiny Python package for controlled-vocabulary prompting plus
|
|
47
|
+
portable output resources. It keeps one wordlist at the center of the workflow
|
|
48
|
+
so prompt text, GBNF, JSON Schema, and post-generation validation all stay in
|
|
49
|
+
sync.
|
|
50
|
+
|
|
51
|
+
The package ships with a small set of bundled wordlists: direct source-backed
|
|
52
|
+
lists such as `moby_898`, `basic_850`, and `special_english_1475`, plus a
|
|
53
|
+
couple of intentionally themed remixes. By default, the built-ins also allow
|
|
54
|
+
slight family variants such as `go`, `goes`, and `going`.
|
|
55
|
+
|
|
56
|
+
It supports Python 3.10 and newer.
|
|
57
|
+
|
|
58
|
+
The hosted API-and-examples docs live at
|
|
59
|
+
[`cmccomb.github.io/smallwords`](https://cmccomb.github.io/smallwords/).
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install smallwords
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
For local development, create and activate a virtualenv first:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m venv .venv
|
|
71
|
+
source .venv/bin/activate
|
|
72
|
+
python -m pip install -e ".[dev]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from smallwords import OutputResources, OutputShape, allow_input_words, is_compliant
|
|
79
|
+
from smallwords.prompts import build_prompt
|
|
80
|
+
|
|
81
|
+
shape = OutputShape(max_words_per_line=24, max_lines=1)
|
|
82
|
+
spec = allow_input_words("basic_850", "How does a bridge work?")
|
|
83
|
+
resources = OutputResources.from_wordlist(spec, shape=shape)
|
|
84
|
+
prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
|
|
85
|
+
schema = resources.json_schema(key="answer", title="bridge_explanation")
|
|
86
|
+
|
|
87
|
+
text = "A bridge is a structure that helps people and things move across a river or a deep place."
|
|
88
|
+
ok = is_compliant(text, spec)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The contrast is the point. `build_prompt(...)` is the soft instruction layer.
|
|
92
|
+
`OutputResources` gives you the matching hard constraints in both GBNF and JSON
|
|
93
|
+
Schema form. `is_compliant(...)` is the lightweight offline check.
|
|
94
|
+
|
|
95
|
+
If you want the model to be able to repeat topic or question terms such as
|
|
96
|
+
`bridge`, `neighbor`, or `order`, use `allow_input_words(...)` once and pass
|
|
97
|
+
that derived spec into the prompt, resources, and validation helpers together.
|
|
98
|
+
|
|
99
|
+
## Built-In Wordlists
|
|
100
|
+
|
|
101
|
+
- `moby_898`: the full normalized alpha-only Moby Words II frequency list
|
|
102
|
+
- `basic_850`: Charles Ogden's Basic English 850
|
|
103
|
+
- `special_english_1475`: Voice of America Special English
|
|
104
|
+
- `caveman_898`: a size-neutral surface-only `moby_898` remix with caveman adjustments
|
|
105
|
+
- `pirate_898`: a size-neutral `moby_898` remix with pirate adjustments
|
|
106
|
+
|
|
107
|
+
The bundled text files live in `src/smallwords/data/`. `moby_898`,
|
|
108
|
+
`basic_850`, and `special_english_1475` are direct source-backed lists.
|
|
109
|
+
`caveman_898` and `pirate_898` are derived size-neutral remixes built on top of
|
|
110
|
+
`moby_898`.
|
|
111
|
+
|
|
112
|
+
The themed remixes live in `src/smallwords/themes/caveman.py` and
|
|
113
|
+
`src/smallwords/themes/pirate.py`. If you want to build your own, use
|
|
114
|
+
`remix_wordlist(...)` with a base list plus curated additions and removals.
|
|
115
|
+
|
|
116
|
+
## Contrastive Example
|
|
117
|
+
|
|
118
|
+
This is the clearest way to see what `smallwords` is trying to do. Both blocks
|
|
119
|
+
below are genuine local Qwen outputs from April 5, 2026. The first uses a plain
|
|
120
|
+
prompt. The second uses the same base prompt plus an explicit `basic_850`
|
|
121
|
+
vocabulary list, the topic word `bridge`, and the generated GBNF.
|
|
122
|
+
|
|
123
|
+
A plain prompt stays fairly natural:
|
|
124
|
+
|
|
125
|
+
> A bridge connects two points, usually across a body of water or a gap,
|
|
126
|
+
> allowing people and vehicles to cross safely.
|
|
127
|
+
|
|
128
|
+
A constrained `basic_850 + topic words` run stays simpler while still sounding
|
|
129
|
+
reasonably natural:
|
|
130
|
+
|
|
131
|
+
> A bridge is a structure that helps people and things move across a river or a
|
|
132
|
+
> deep place.
|
|
133
|
+
|
|
134
|
+
These runs use `llama-server` from `llama.cpp` and
|
|
135
|
+
[`Qwen/Qwen3-8B-GGUF`](https://huggingface.co/Qwen/Qwen3-8B-GGUF)
|
|
136
|
+
via
|
|
137
|
+
[`bartowski/Qwen_Qwen3-8B-GGUF`](https://huggingface.co/bartowski/Qwen_Qwen3-8B-GGUF).
|
|
138
|
+
|
|
139
|
+
Reproduce that comparison from a clone of the repository with an activated
|
|
140
|
+
virtualenv:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
|
|
144
|
+
python examples/readme_bridge_contrast.py
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Examples
|
|
148
|
+
|
|
149
|
+
See the repository's
|
|
150
|
+
[`examples/README.md`](https://github.com/cmccomb/smallwords/blob/main/examples/README.md)
|
|
151
|
+
for the runnable examples. The current example set is live-model based:
|
|
152
|
+
the README bridge contrast, a focused pirate greeting, and a focused technical
|
|
153
|
+
rewrite all call a live `llama-server` model with a prompt plus generated
|
|
154
|
+
grammar.
|
|
155
|
+
|
|
156
|
+
## Development
|
|
157
|
+
|
|
158
|
+
Run these commands from an activated virtualenv:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
python -m pip install -e ".[dev]"
|
|
162
|
+
python -m ruff check .
|
|
163
|
+
python -m ruff format --check .
|
|
164
|
+
python -m pytest
|
|
165
|
+
python scripts/check_documentation.py
|
|
166
|
+
python -m sphinx -W --keep-going -b html docs docs/_build/html
|
|
167
|
+
python -m build
|
|
168
|
+
python -m twine check --strict dist/*
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
CI runs linting, tests, the documentation policy check, a `>=90%` coverage
|
|
172
|
+
gate, a Sphinx docs build, and a package build on GitHub Actions.
|
|
173
|
+
|
|
174
|
+
For release steps and Trusted Publishing setup, see
|
|
175
|
+
[`RELEASING.md`](https://github.com/cmccomb/smallwords/blob/main/RELEASING.md).
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# smallwords
|
|
2
|
+
|
|
3
|
+
[](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/smallwords/)
|
|
5
|
+
[](https://pypi.org/project/smallwords/)
|
|
6
|
+
[](https://github.com/cmccomb/smallwords/blob/main/LICENSE)
|
|
7
|
+
[](https://cmccomb.github.io/smallwords/)
|
|
8
|
+
|
|
9
|
+
`smallwords` is a tiny Python package for controlled-vocabulary prompting plus
|
|
10
|
+
portable output resources. It keeps one wordlist at the center of the workflow
|
|
11
|
+
so prompt text, GBNF, JSON Schema, and post-generation validation all stay in
|
|
12
|
+
sync.
|
|
13
|
+
|
|
14
|
+
The package ships with a small set of bundled wordlists: direct source-backed
|
|
15
|
+
lists such as `moby_898`, `basic_850`, and `special_english_1475`, plus a
|
|
16
|
+
couple of intentionally themed remixes. By default, the built-ins also allow
|
|
17
|
+
slight family variants such as `go`, `goes`, and `going`.
|
|
18
|
+
|
|
19
|
+
It supports Python 3.10 and newer.
|
|
20
|
+
|
|
21
|
+
The hosted API-and-examples docs live at
|
|
22
|
+
[`cmccomb.github.io/smallwords`](https://cmccomb.github.io/smallwords/).
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install smallwords
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
For local development, create and activate a virtualenv first:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
python -m venv .venv
|
|
34
|
+
source .venv/bin/activate
|
|
35
|
+
python -m pip install -e ".[dev]"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quick Start
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from smallwords import OutputResources, OutputShape, allow_input_words, is_compliant
|
|
42
|
+
from smallwords.prompts import build_prompt
|
|
43
|
+
|
|
44
|
+
shape = OutputShape(max_words_per_line=24, max_lines=1)
|
|
45
|
+
spec = allow_input_words("basic_850", "How does a bridge work?")
|
|
46
|
+
resources = OutputResources.from_wordlist(spec, shape=shape)
|
|
47
|
+
prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
|
|
48
|
+
schema = resources.json_schema(key="answer", title="bridge_explanation")
|
|
49
|
+
|
|
50
|
+
text = "A bridge is a structure that helps people and things move across a river or a deep place."
|
|
51
|
+
ok = is_compliant(text, spec)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The contrast is the point. `build_prompt(...)` is the soft instruction layer.
|
|
55
|
+
`OutputResources` gives you the matching hard constraints in both GBNF and JSON
|
|
56
|
+
Schema form. `is_compliant(...)` is the lightweight offline check.
|
|
57
|
+
|
|
58
|
+
If you want the model to be able to repeat topic or question terms such as
|
|
59
|
+
`bridge`, `neighbor`, or `order`, use `allow_input_words(...)` once and pass
|
|
60
|
+
that derived spec into the prompt, resources, and validation helpers together.
|
|
61
|
+
|
|
62
|
+
## Built-In Wordlists
|
|
63
|
+
|
|
64
|
+
- `moby_898`: the full normalized alpha-only Moby Words II frequency list
|
|
65
|
+
- `basic_850`: Charles Ogden's Basic English 850
|
|
66
|
+
- `special_english_1475`: Voice of America Special English
|
|
67
|
+
- `caveman_898`: a size-neutral surface-only `moby_898` remix with caveman adjustments
|
|
68
|
+
- `pirate_898`: a size-neutral `moby_898` remix with pirate adjustments
|
|
69
|
+
|
|
70
|
+
The bundled text files live in `src/smallwords/data/`. `moby_898`,
|
|
71
|
+
`basic_850`, and `special_english_1475` are direct source-backed lists.
|
|
72
|
+
`caveman_898` and `pirate_898` are derived size-neutral remixes built on top of
|
|
73
|
+
`moby_898`.
|
|
74
|
+
|
|
75
|
+
The themed remixes live in `src/smallwords/themes/caveman.py` and
|
|
76
|
+
`src/smallwords/themes/pirate.py`. If you want to build your own, use
|
|
77
|
+
`remix_wordlist(...)` with a base list plus curated additions and removals.
|
|
78
|
+
|
|
79
|
+
## Contrastive Example
|
|
80
|
+
|
|
81
|
+
This is the clearest way to see what `smallwords` is trying to do. Both blocks
|
|
82
|
+
below are genuine local Qwen outputs from April 5, 2026. The first uses a plain
|
|
83
|
+
prompt. The second uses the same base prompt plus an explicit `basic_850`
|
|
84
|
+
vocabulary list, the topic word `bridge`, and the generated GBNF.
|
|
85
|
+
|
|
86
|
+
A plain prompt stays fairly natural:
|
|
87
|
+
|
|
88
|
+
> A bridge connects two points, usually across a body of water or a gap,
|
|
89
|
+
> allowing people and vehicles to cross safely.
|
|
90
|
+
|
|
91
|
+
A constrained `basic_850 + topic words` run stays simpler while still sounding
|
|
92
|
+
reasonably natural:
|
|
93
|
+
|
|
94
|
+
> A bridge is a structure that helps people and things move across a river or a
|
|
95
|
+
> deep place.
|
|
96
|
+
|
|
97
|
+
These runs use `llama-server` from `llama.cpp` and
|
|
98
|
+
[`Qwen/Qwen3-8B-GGUF`](https://huggingface.co/Qwen/Qwen3-8B-GGUF)
|
|
99
|
+
via
|
|
100
|
+
[`bartowski/Qwen_Qwen3-8B-GGUF`](https://huggingface.co/bartowski/Qwen_Qwen3-8B-GGUF).
|
|
101
|
+
|
|
102
|
+
Reproduce that comparison from a clone of the repository with an activated
|
|
103
|
+
virtualenv:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
|
|
107
|
+
python examples/readme_bridge_contrast.py
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Examples
|
|
111
|
+
|
|
112
|
+
See the repository's
|
|
113
|
+
[`examples/README.md`](https://github.com/cmccomb/smallwords/blob/main/examples/README.md)
|
|
114
|
+
for the runnable examples. The current example set is live-model based:
|
|
115
|
+
the README bridge contrast, a focused pirate greeting, and a focused technical
|
|
116
|
+
rewrite all call a live `llama-server` model with a prompt plus generated
|
|
117
|
+
grammar.
|
|
118
|
+
|
|
119
|
+
## Development
|
|
120
|
+
|
|
121
|
+
Run these commands from an activated virtualenv:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
python -m pip install -e ".[dev]"
|
|
125
|
+
python -m ruff check .
|
|
126
|
+
python -m ruff format --check .
|
|
127
|
+
python -m pytest
|
|
128
|
+
python scripts/check_documentation.py
|
|
129
|
+
python -m sphinx -W --keep-going -b html docs docs/_build/html
|
|
130
|
+
python -m build
|
|
131
|
+
python -m twine check --strict dist/*
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
CI runs linting, tests, the documentation policy check, a `>=90%` coverage
|
|
135
|
+
gate, a Sphinx docs build, and a package build on GitHub Actions.
|
|
136
|
+
|
|
137
|
+
For release steps and Trusted Publishing setup, see
|
|
138
|
+
[`RELEASING.md`](https://github.com/cmccomb/smallwords/blob/main/RELEASING.md).
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Releasing `smallwords`
|
|
2
|
+
|
|
3
|
+
This repository is set up for PyPI Trusted Publishing through GitHub Actions.
|
|
4
|
+
|
|
5
|
+
Before the first release:
|
|
6
|
+
|
|
7
|
+
1. Create the `smallwords` project on PyPI.
|
|
8
|
+
2. Configure a Trusted Publisher on PyPI for this GitHub repository.
|
|
9
|
+
3. Add the `pypi` GitHub Actions environment if you want environment-level protection rules.
|
|
10
|
+
|
|
11
|
+
Recommended release flow:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
python -m venv .venv
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
python -m pip install -e ".[dev]"
|
|
17
|
+
python -m ruff check .
|
|
18
|
+
python -m ruff format --check .
|
|
19
|
+
python -m pytest
|
|
20
|
+
python scripts/check_documentation.py
|
|
21
|
+
python -m sphinx -W --keep-going -b html docs docs/_build/html
|
|
22
|
+
python -m build
|
|
23
|
+
python -m twine check --strict dist/*
|
|
24
|
+
python -m venv /tmp/smallwords-release-check
|
|
25
|
+
/tmp/smallwords-release-check/bin/python -m pip install --upgrade pip
|
|
26
|
+
/tmp/smallwords-release-check/bin/python -m pip install dist/*.whl
|
|
27
|
+
/tmp/smallwords-release-check/bin/python -c "import smallwords; print(smallwords.__version__)"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Then:
|
|
31
|
+
|
|
32
|
+
1. Update `version` in `pyproject.toml`.
|
|
33
|
+
2. Commit the release.
|
|
34
|
+
3. Create and push a Git tag.
|
|
35
|
+
4. Publish a GitHub release from that tag.
|
|
36
|
+
|
|
37
|
+
The `Publish` workflow builds the distributions, validates them with `twine`,
|
|
38
|
+
and uploads them to PyPI through Trusted Publishing. The separate `Docs`
|
|
39
|
+
workflow builds the Sphinx site and deploys it to GitHub Pages from `main`.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Sphinx configuration for the compact project docs site."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Add the source tree so autodoc can import the local package during docs builds.
|
|
9
|
+
ROOT = Path(__file__).resolve().parents[1]
|
|
10
|
+
SRC = ROOT / "src"
|
|
11
|
+
if str(SRC) not in sys.path:
|
|
12
|
+
sys.path.insert(0, str(SRC))
|
|
13
|
+
|
|
14
|
+
from smallwords import __version__ # noqa: E402
|
|
15
|
+
|
|
16
|
+
project = "smallwords"
|
|
17
|
+
author = "Christopher McComb"
|
|
18
|
+
copyright = "2026, Christopher McComb"
|
|
19
|
+
version = __version__
|
|
20
|
+
release = __version__
|
|
21
|
+
|
|
22
|
+
extensions = [
|
|
23
|
+
"sphinx.ext.autodoc",
|
|
24
|
+
"sphinx.ext.napoleon",
|
|
25
|
+
"sphinx.ext.viewcode",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
|
29
|
+
autodoc_member_order = "bysource"
|
|
30
|
+
autodoc_typehints = "description"
|
|
31
|
+
napoleon_google_docstring = True
|
|
32
|
+
napoleon_numpy_docstring = False
|
|
33
|
+
|
|
34
|
+
html_theme = "pydata_sphinx_theme"
|
|
35
|
+
html_title = "smallwords"
|
|
36
|
+
html_theme_options = {
|
|
37
|
+
"github_url": "https://github.com/cmccomb/smallwords",
|
|
38
|
+
"show_toc_level": 2,
|
|
39
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
smallwords
|
|
2
|
+
==========
|
|
3
|
+
|
|
4
|
+
Large language models can follow "write simply" instructions, but that soft
|
|
5
|
+
guidance is often insufficient when a workflow also needs reproducible wording,
|
|
6
|
+
portable output constraints, or offline validation. Prompt-only approaches are
|
|
7
|
+
easy to start and hard to trust. They leave the active vocabulary implicit,
|
|
8
|
+
make failures difficult to diagnose, and force each integration to recreate the
|
|
9
|
+
same constraint logic in a different format.
|
|
10
|
+
|
|
11
|
+
``smallwords`` addresses that gap by keeping one controlled vocabulary at the
|
|
12
|
+
center of the workflow. From that single specification, the package can build
|
|
13
|
+
prompt text, GBNF, JSON Schema, and validation checks that stay aligned.
|
|
14
|
+
|
|
15
|
+
Install
|
|
16
|
+
-------
|
|
17
|
+
|
|
18
|
+
.. code-block:: bash
|
|
19
|
+
|
|
20
|
+
pip install smallwords
|
|
21
|
+
|
|
22
|
+
Quick API
|
|
23
|
+
---------
|
|
24
|
+
|
|
25
|
+
Most integrations follow the same chain: choose a vocabulary, optionally add
|
|
26
|
+
task words, build the matching prompt and portable resources, then validate the
|
|
27
|
+
result.
|
|
28
|
+
|
|
29
|
+
.. autofunction:: smallwords.allow_input_words
|
|
30
|
+
|
|
31
|
+
.. autofunction:: smallwords.get_wordlist
|
|
32
|
+
|
|
33
|
+
.. autofunction:: smallwords.list_wordlists
|
|
34
|
+
|
|
35
|
+
.. autoclass:: smallwords.OutputShape
|
|
36
|
+
:members:
|
|
37
|
+
|
|
38
|
+
.. autoclass:: smallwords.OutputResources
|
|
39
|
+
:members:
|
|
40
|
+
|
|
41
|
+
.. autofunction:: smallwords.prompts.build_prompt
|
|
42
|
+
|
|
43
|
+
.. autofunction:: smallwords.is_compliant
|
|
44
|
+
|
|
45
|
+
.. autofunction:: smallwords.out_of_vocab
|
|
46
|
+
|
|
47
|
+
Built-ins
|
|
48
|
+
---------
|
|
49
|
+
|
|
50
|
+
The installed catalog currently includes:
|
|
51
|
+
|
|
52
|
+
- ``moby_898``
|
|
53
|
+
- ``basic_850``
|
|
54
|
+
- ``special_english_1475``
|
|
55
|
+
- ``caveman_898``
|
|
56
|
+
- ``pirate_898``
|
|
57
|
+
|
|
58
|
+
Use :func:`smallwords.list_wordlists` to inspect the installed catalog. Use
|
|
59
|
+
:func:`smallwords.get_wordlist` when a workflow needs the underlying
|
|
60
|
+
specification object directly.
|
|
61
|
+
|
|
62
|
+
Examples With Results
|
|
63
|
+
---------------------
|
|
64
|
+
|
|
65
|
+
The examples below were produced locally on April 5, 2026 with
|
|
66
|
+
``llama-server`` and ``bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m``.
|
|
67
|
+
|
|
68
|
+
Bridge Contrast
|
|
69
|
+
~~~~~~~~~~~~~~~
|
|
70
|
+
|
|
71
|
+
This comparison isolates the package's core claim. A plain prompt can already
|
|
72
|
+
produce a reasonable answer. However, it does not expose an explicit response
|
|
73
|
+
contract. The constrained run uses the same task, adds the active vocabulary to
|
|
74
|
+
the prompt, and applies the matching grammar.
|
|
75
|
+
|
|
76
|
+
.. code-block:: python
|
|
77
|
+
|
|
78
|
+
from smallwords import OutputResources, OutputShape, allow_input_words
|
|
79
|
+
from smallwords.prompts import build_prompt
|
|
80
|
+
|
|
81
|
+
spec = allow_input_words("basic_850", "How does a bridge work?")
|
|
82
|
+
shape = OutputShape(max_words_per_line=24, max_lines=1)
|
|
83
|
+
prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
|
|
84
|
+
resources = OutputResources.from_wordlist(spec, shape=shape)
|
|
85
|
+
|
|
86
|
+
Plain prompt result:
|
|
87
|
+
|
|
88
|
+
A bridge connects two points, usually across a body of water or a gap, allowing people and vehicles to cross safely.
|
|
89
|
+
|
|
90
|
+
Constrained result:
|
|
91
|
+
|
|
92
|
+
A bridge is a structure that helps people and things move across a river or a deep place.
|
|
93
|
+
|
|
94
|
+
See ``examples/readme_bridge_contrast.py`` for the full prompt-plus-grammar run.
|
|
95
|
+
|
|
96
|
+
Pirate Greeting
|
|
97
|
+
~~~~~~~~~~~~~~~
|
|
98
|
+
|
|
99
|
+
This example starts from the built-in ``pirate_898`` list, selects a tiny
|
|
100
|
+
greeting-focused vocabulary, and then applies a matching grammar.
|
|
101
|
+
|
|
102
|
+
.. code-block:: python
|
|
103
|
+
|
|
104
|
+
from smallwords import OutputResources, OutputShape, WordlistSpec, get_wordlist
|
|
105
|
+
from smallwords.prompts import build_prompt
|
|
106
|
+
|
|
107
|
+
base = get_wordlist("pirate_898")
|
|
108
|
+
spec = WordlistSpec(
|
|
109
|
+
name="pirate_898_greeting_focus",
|
|
110
|
+
words=("ahoy", "good", "matey", "meet", "to", "you"),
|
|
111
|
+
source_name="Selected surface forms from pirate_898 for the pirate greeting example",
|
|
112
|
+
source_urls=base.source_urls,
|
|
113
|
+
license_name=base.license_name,
|
|
114
|
+
allowed_punctuation=(".",),
|
|
115
|
+
variant_mode="surface_only",
|
|
116
|
+
)
|
|
117
|
+
shape = OutputShape(min_words_per_line=6, max_words_per_line=6, max_lines=1)
|
|
118
|
+
prompt = build_prompt(
|
|
119
|
+
"answer",
|
|
120
|
+
"A pirate meets a new friend on a ship. What short friendly greeting should the pirate say?",
|
|
121
|
+
wordlist=spec,
|
|
122
|
+
)
|
|
123
|
+
resources = OutputResources.from_wordlist(spec, shape=shape)
|
|
124
|
+
|
|
125
|
+
Constrained result:
|
|
126
|
+
|
|
127
|
+
Ahoy matey good to meet you.
|
|
128
|
+
|
|
129
|
+
Technical Rewrite
|
|
130
|
+
~~~~~~~~~~~~~~~~~
|
|
131
|
+
|
|
132
|
+
This example selects a compact rewrite vocabulary from ``basic_850`` that
|
|
133
|
+
excludes the source terminology altogether. The output shape then forces one
|
|
134
|
+
short ten-word sentence.
|
|
135
|
+
|
|
136
|
+
.. code-block:: python
|
|
137
|
+
|
|
138
|
+
from smallwords import OutputResources, OutputShape, WordlistSpec, get_wordlist
|
|
139
|
+
from smallwords.prompts import build_prompt
|
|
140
|
+
|
|
141
|
+
base = get_wordlist("basic_850")
|
|
142
|
+
text = (
|
|
143
|
+
"The thermal controller derates propulsion output after the sensor array "
|
|
144
|
+
"reports an overtemperature fault."
|
|
145
|
+
)
|
|
146
|
+
spec = WordlistSpec(
|
|
147
|
+
name="basic_850_rewrite_focus",
|
|
148
|
+
words=("be", "cut", "engine", "heat", "high", "if", "power", "system", "this", "very", "when"),
|
|
149
|
+
source_name="Selected from basic_850 for the rewrite example",
|
|
150
|
+
source_urls=base.source_urls,
|
|
151
|
+
license_name=base.license_name,
|
|
152
|
+
allowed_punctuation=(".",),
|
|
153
|
+
)
|
|
154
|
+
shape = OutputShape(min_words_per_line=10, max_words_per_line=10, max_lines=1)
|
|
155
|
+
prompt = build_prompt("rewrite", text, wordlist=spec)
|
|
156
|
+
resources = OutputResources.from_wordlist(spec, shape=shape)
|
|
157
|
+
|
|
158
|
+
Constrained result:
|
|
159
|
+
|
|
160
|
+
When engine heat is very high this system cuts power.
|
|
161
|
+
|
|
162
|
+
The live scripts in ``examples/`` print the full prompt, grammar, schema, and
|
|
163
|
+
validation details. That fuller output matters because it lets a reader inspect
|
|
164
|
+
not only the answer, but also the exact constraint setup that produced it.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
This directory keeps three live llama.cpp examples:
|
|
4
|
+
|
|
5
|
+
- `readme_bridge_contrast.py`: the bridge comparison used in the root README
|
|
6
|
+
- `pirate_greeting.py`: a focused pirate greeting built from `pirate_898`
|
|
7
|
+
- `rewrite_technical_passage.py`: a focused technical rewrite built from `basic_850`
|
|
8
|
+
|
|
9
|
+
Create and activate a virtualenv first:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
python -m venv .venv
|
|
13
|
+
source .venv/bin/activate
|
|
14
|
+
python -m pip install -e ".[dev]"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Then start a server, for example:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Run the examples from the project root:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
python examples/pirate_greeting.py
|
|
27
|
+
python examples/rewrite_technical_passage.py
|
|
28
|
+
python examples/readme_bridge_contrast.py
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
If your server uses a different address, set `SMALLWORDS_LLAMA_BASE_URL`.
|
|
32
|
+
|
|
33
|
+
Each script prints the prompt, output shape, matching grammar and schema
|
|
34
|
+
resources, the generated response, and whether the response stayed inside the
|
|
35
|
+
chosen vocabulary.
|