smallwords 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. smallwords-0.1.0/LICENSE +21 -0
  2. smallwords-0.1.0/MANIFEST.in +11 -0
  3. smallwords-0.1.0/PKG-INFO +175 -0
  4. smallwords-0.1.0/README.md +138 -0
  5. smallwords-0.1.0/RELEASING.md +39 -0
  6. smallwords-0.1.0/docs/conf.py +39 -0
  7. smallwords-0.1.0/docs/index.rst +164 -0
  8. smallwords-0.1.0/examples/README.md +35 -0
  9. smallwords-0.1.0/examples/_shared.py +67 -0
  10. smallwords-0.1.0/examples/pirate_greeting.py +116 -0
  11. smallwords-0.1.0/examples/readme_bridge_contrast.py +124 -0
  12. smallwords-0.1.0/examples/rewrite_technical_passage.py +128 -0
  13. smallwords-0.1.0/pyproject.toml +73 -0
  14. smallwords-0.1.0/scripts/check_documentation.py +75 -0
  15. smallwords-0.1.0/setup.cfg +4 -0
  16. smallwords-0.1.0/src/smallwords/__init__.py +29 -0
  17. smallwords-0.1.0/src/smallwords/_constraints.py +184 -0
  18. smallwords-0.1.0/src/smallwords/_spec_utils.py +26 -0
  19. smallwords-0.1.0/src/smallwords/_variant_data.py +288 -0
  20. smallwords-0.1.0/src/smallwords/data/__init__.py +4 -0
  21. smallwords-0.1.0/src/smallwords/data/basic_english_850.txt +856 -0
  22. smallwords-0.1.0/src/smallwords/data/moby_freq_alpha_898.txt +904 -0
  23. smallwords-0.1.0/src/smallwords/data/special_english_1475.txt +1481 -0
  24. smallwords-0.1.0/src/smallwords/grammar_builder.py +80 -0
  25. smallwords-0.1.0/src/smallwords/input_words.py +59 -0
  26. smallwords-0.1.0/src/smallwords/json_schema.py +42 -0
  27. smallwords-0.1.0/src/smallwords/prompts.py +62 -0
  28. smallwords-0.1.0/src/smallwords/py.typed +1 -0
  29. smallwords-0.1.0/src/smallwords/remix.py +73 -0
  30. smallwords-0.1.0/src/smallwords/resources.py +80 -0
  31. smallwords-0.1.0/src/smallwords/themes/__init__.py +13 -0
  32. smallwords-0.1.0/src/smallwords/themes/caveman.py +73 -0
  33. smallwords-0.1.0/src/smallwords/themes/pirate.py +76 -0
  34. smallwords-0.1.0/src/smallwords/types.py +108 -0
  35. smallwords-0.1.0/src/smallwords/validation.py +61 -0
  36. smallwords-0.1.0/src/smallwords/variants.py +123 -0
  37. smallwords-0.1.0/src/smallwords/wordlists.py +124 -0
  38. smallwords-0.1.0/src/smallwords.egg-info/PKG-INFO +175 -0
  39. smallwords-0.1.0/src/smallwords.egg-info/SOURCES.txt +46 -0
  40. smallwords-0.1.0/src/smallwords.egg-info/dependency_links.txt +1 -0
  41. smallwords-0.1.0/src/smallwords.egg-info/requires.txt +9 -0
  42. smallwords-0.1.0/src/smallwords.egg-info/top_level.txt +1 -0
  43. smallwords-0.1.0/tests/test_basics.py +73 -0
  44. smallwords-0.1.0/tests/test_examples.py +45 -0
  45. smallwords-0.1.0/tests/test_package_surface.py +61 -0
  46. smallwords-0.1.0/tests/test_prompt_helpers.py +70 -0
  47. smallwords-0.1.0/tests/test_schema.py +145 -0
  48. smallwords-0.1.0/tests/test_wordlists.py +84 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Chris McComb
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,11 @@
1
+ include LICENSE
2
+ include README.md
3
+ include RELEASING.md
4
+ graft docs
5
+ graft examples
6
+ graft scripts
7
+ graft tests
8
+ prune docs/_build
9
+ global-exclude __pycache__
10
+ global-exclude *.py[cod]
11
+ global-exclude .DS_Store
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: smallwords
3
+ Version: 0.1.0
4
+ Summary: Controlled-vocabulary prompts plus portable GBNF and JSON Schema resources for small-word English generation.
5
+ Author: Christopher McComb
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/cmccomb/smallwords
8
+ Project-URL: Documentation, https://cmccomb.github.io/smallwords/
9
+ Project-URL: Repository, https://github.com/cmccomb/smallwords.git
10
+ Project-URL: Issues, https://github.com/cmccomb/smallwords/issues
11
+ Project-URL: Changelog, https://github.com/cmccomb/smallwords/releases
12
+ Keywords: grammar,gbnf,json-schema,structured-output,controlled-language,simple-english
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Operating System :: OS Independent
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Text Processing :: Linguistic
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.10
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Provides-Extra: dev
29
+ Requires-Dist: build>=1.2; extra == "dev"
30
+ Requires-Dist: pydata-sphinx-theme>=0.16; extra == "dev"
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=7.1; extra == "dev"
33
+ Requires-Dist: ruff>=0.11; extra == "dev"
34
+ Requires-Dist: sphinx<9,>=8.1; extra == "dev"
35
+ Requires-Dist: twine>=6.1; extra == "dev"
36
+ Dynamic: license-file
37
+
38
+ # smallwords
39
+
40
+ [![CI](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml/badge.svg)](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml)
41
+ [![PyPI version](https://img.shields.io/pypi/v/smallwords)](https://pypi.org/project/smallwords/)
42
+ [![Python versions](https://img.shields.io/pypi/pyversions/smallwords)](https://pypi.org/project/smallwords/)
43
+ [![License](https://img.shields.io/github/license/cmccomb/smallwords)](https://github.com/cmccomb/smallwords/blob/main/LICENSE)
44
+ [![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-4c1)](https://cmccomb.github.io/smallwords/)
45
+
46
+ `smallwords` is a tiny Python package for controlled-vocabulary prompting plus
47
+ portable output resources. It keeps one wordlist at the center of the workflow
48
+ so prompt text, GBNF, JSON Schema, and post-generation validation all stay in
49
+ sync.
50
+
51
+ The package ships with a small set of bundled wordlists: direct source-backed
52
+ lists such as `moby_898`, `basic_850`, and `special_english_1475`, plus a
53
+ couple of intentionally themed remixes. By default, the built-ins also allow
54
+ slight family variants such as `go`, `goes`, and `going`.
55
+
56
+ It supports Python 3.10 and newer.
57
+
58
+ The hosted API-and-examples docs live at
59
+ [`cmccomb.github.io/smallwords`](https://cmccomb.github.io/smallwords/).
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ pip install smallwords
65
+ ```
66
+
67
+ For local development, create and activate a virtualenv first:
68
+
69
+ ```bash
70
+ python -m venv .venv
71
+ source .venv/bin/activate
72
+ python -m pip install -e ".[dev]"
73
+ ```
74
+
75
+ ## Quick Start
76
+
77
+ ```python
78
+ from smallwords import OutputResources, OutputShape, allow_input_words, is_compliant
79
+ from smallwords.prompts import build_prompt
80
+
81
+ shape = OutputShape(max_words_per_line=24, max_lines=1)
82
+ spec = allow_input_words("basic_850", "How does a bridge work?")
83
+ resources = OutputResources.from_wordlist(spec, shape=shape)
84
+ prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
85
+ schema = resources.json_schema(key="answer", title="bridge_explanation")
86
+
87
+ text = "A bridge is a structure that helps people and things move across a river or a deep place."
88
+ ok = is_compliant(text, spec)
89
+ ```
90
+
91
+ The contrast is the point. `build_prompt(...)` is the soft instruction layer.
92
+ `OutputResources` gives you the matching hard constraints in both GBNF and JSON
93
+ Schema form. `is_compliant(...)` is the lightweight offline check.
94
+
95
+ If you want the model to be able to repeat topic or question terms such as
96
+ `bridge`, `neighbor`, or `order`, use `allow_input_words(...)` once and pass
97
+ that derived spec into the prompt, resources, and validation helpers together.
98
+
99
+ ## Built-In Wordlists
100
+
101
+ - `moby_898`: the full normalized alpha-only Moby Words II frequency list
102
+ - `basic_850`: Charles Ogden's Basic English 850
103
+ - `special_english_1475`: Voice of America Special English
104
+ - `caveman_898`: a size-neutral surface-only `moby_898` remix with caveman adjustments
105
+ - `pirate_898`: a size-neutral `moby_898` remix with pirate adjustments
106
+
107
+ The bundled text files live in `src/smallwords/data/`. `moby_898`,
108
+ `basic_850`, and `special_english_1475` are direct source-backed lists.
109
+ `caveman_898` and `pirate_898` are derived size-neutral remixes built on top of
110
+ `moby_898`.
111
+
112
+ The themed remixes live in `src/smallwords/themes/caveman.py` and
113
+ `src/smallwords/themes/pirate.py`. If you want to build your own, use
114
+ `remix_wordlist(...)` with a base list plus curated additions and removals.
115
+
116
+ ## Contrastive Example
117
+
118
+ This is the clearest way to see what `smallwords` is trying to do. Both blocks
119
+ below are genuine local Qwen outputs from April 5, 2026. The first uses a plain
120
+ prompt. The second uses the same base prompt plus an explicit `basic_850`
121
+ vocabulary list, the topic word `bridge`, and the generated GBNF.
122
+
123
+ A plain prompt stays fairly natural:
124
+
125
+ > A bridge connects two points, usually across a body of water or a gap,
126
+ > allowing people and vehicles to cross safely.
127
+
128
+ A constrained `basic_850 + topic words` run stays simpler while still sounding
129
+ reasonably natural:
130
+
131
+ > A bridge is a structure that helps people and things move across a river or a
132
+ > deep place.
133
+
134
+ These runs use `llama-server` from `llama.cpp` and
135
+ [`Qwen/Qwen3-8B-GGUF`](https://huggingface.co/Qwen/Qwen3-8B-GGUF)
136
+ via
137
+ [`bartowski/Qwen_Qwen3-8B-GGUF`](https://huggingface.co/bartowski/Qwen_Qwen3-8B-GGUF).
138
+
139
+ Reproduce that comparison from a clone of the repository with an activated
140
+ virtualenv:
141
+
142
+ ```bash
143
+ llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
144
+ python examples/readme_bridge_contrast.py
145
+ ```
146
+
147
+ ## Examples
148
+
149
+ See the repository's
150
+ [`examples/README.md`](https://github.com/cmccomb/smallwords/blob/main/examples/README.md)
151
+ for the runnable examples. The current example set is live-model based:
152
+ the README bridge contrast, a focused pirate greeting, and a focused technical
153
+ rewrite all call a live `llama-server` model with a prompt plus generated
154
+ grammar.
155
+
156
+ ## Development
157
+
158
+ Run these commands from an activated virtualenv:
159
+
160
+ ```bash
161
+ python -m pip install -e ".[dev]"
162
+ python -m ruff check .
163
+ python -m ruff format --check .
164
+ python -m pytest
165
+ python scripts/check_documentation.py
166
+ python -m sphinx -W --keep-going -b html docs docs/_build/html
167
+ python -m build
168
+ python -m twine check --strict dist/*
169
+ ```
170
+
171
+ CI runs linting, tests, the documentation policy check, a `>=90%` coverage
172
+ gate, a Sphinx docs build, and a package build on GitHub Actions.
173
+
174
+ For release steps and Trusted Publishing setup, see
175
+ [`RELEASING.md`](https://github.com/cmccomb/smallwords/blob/main/RELEASING.md).
@@ -0,0 +1,138 @@
1
+ # smallwords
2
+
3
+ [![CI](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml/badge.svg)](https://github.com/cmccomb/smallwords/actions/workflows/ci.yml)
4
+ [![PyPI version](https://img.shields.io/pypi/v/smallwords)](https://pypi.org/project/smallwords/)
5
+ [![Python versions](https://img.shields.io/pypi/pyversions/smallwords)](https://pypi.org/project/smallwords/)
6
+ [![License](https://img.shields.io/github/license/cmccomb/smallwords)](https://github.com/cmccomb/smallwords/blob/main/LICENSE)
7
+ [![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-4c1)](https://cmccomb.github.io/smallwords/)
8
+
9
+ `smallwords` is a tiny Python package for controlled-vocabulary prompting plus
10
+ portable output resources. It keeps one wordlist at the center of the workflow
11
+ so prompt text, GBNF, JSON Schema, and post-generation validation all stay in
12
+ sync.
13
+
14
+ The package ships with a small set of bundled wordlists: direct source-backed
15
+ lists such as `moby_898`, `basic_850`, and `special_english_1475`, plus a
16
+ couple of intentionally themed remixes. By default, the built-ins also allow
17
+ slight family variants such as `go`, `goes`, and `going`.
18
+
19
+ It supports Python 3.10 and newer.
20
+
21
+ The hosted API-and-examples docs live at
22
+ [`cmccomb.github.io/smallwords`](https://cmccomb.github.io/smallwords/).
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ pip install smallwords
28
+ ```
29
+
30
+ For local development, create and activate a virtualenv first:
31
+
32
+ ```bash
33
+ python -m venv .venv
34
+ source .venv/bin/activate
35
+ python -m pip install -e ".[dev]"
36
+ ```
37
+
38
+ ## Quick Start
39
+
40
+ ```python
41
+ from smallwords import OutputResources, OutputShape, allow_input_words, is_compliant
42
+ from smallwords.prompts import build_prompt
43
+
44
+ shape = OutputShape(max_words_per_line=24, max_lines=1)
45
+ spec = allow_input_words("basic_850", "How does a bridge work?")
46
+ resources = OutputResources.from_wordlist(spec, shape=shape)
47
+ prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
48
+ schema = resources.json_schema(key="answer", title="bridge_explanation")
49
+
50
+ text = "A bridge is a structure that helps people and things move across a river or a deep place."
51
+ ok = is_compliant(text, spec)
52
+ ```
53
+
54
+ The contrast is the point. `build_prompt(...)` is the soft instruction layer.
55
+ `OutputResources` gives you the matching hard constraints in both GBNF and JSON
56
+ Schema form. `is_compliant(...)` is the lightweight offline check.
57
+
58
+ If you want the model to be able to repeat topic or question terms such as
59
+ `bridge`, `neighbor`, or `order`, use `allow_input_words(...)` once and pass
60
+ that derived spec into the prompt, resources, and validation helpers together.
61
+
62
+ ## Built-In Wordlists
63
+
64
+ - `moby_898`: the full normalized alpha-only Moby Words II frequency list
65
+ - `basic_850`: Charles Ogden's Basic English 850
66
+ - `special_english_1475`: Voice of America Special English
67
+ - `caveman_898`: a size-neutral surface-only `moby_898` remix with caveman adjustments
68
+ - `pirate_898`: a size-neutral `moby_898` remix with pirate adjustments
69
+
70
+ The bundled text files live in `src/smallwords/data/`. `moby_898`,
71
+ `basic_850`, and `special_english_1475` are direct source-backed lists.
72
+ `caveman_898` and `pirate_898` are derived size-neutral remixes built on top of
73
+ `moby_898`.
74
+
75
+ The themed remixes live in `src/smallwords/themes/caveman.py` and
76
+ `src/smallwords/themes/pirate.py`. If you want to build your own, use
77
+ `remix_wordlist(...)` with a base list plus curated additions and removals.
78
+
79
+ ## Contrastive Example
80
+
81
+ This is the clearest way to see what `smallwords` is trying to do. Both blocks
82
+ below are genuine local Qwen outputs from April 5, 2026. The first uses a plain
83
+ prompt. The second uses the same base prompt plus an explicit `basic_850`
84
+ vocabulary list, the topic word `bridge`, and the generated GBNF.
85
+
86
+ A plain prompt stays fairly natural:
87
+
88
+ > A bridge connects two points, usually across a body of water or a gap,
89
+ > allowing people and vehicles to cross safely.
90
+
91
+ A constrained `basic_850 + topic words` run stays simpler while still sounding
92
+ reasonably natural:
93
+
94
+ > A bridge is a structure that helps people and things move across a river or a
95
+ > deep place.
96
+
97
+ These runs use `llama-server` from `llama.cpp` and
98
+ [`Qwen/Qwen3-8B-GGUF`](https://huggingface.co/Qwen/Qwen3-8B-GGUF)
99
+ via
100
+ [`bartowski/Qwen_Qwen3-8B-GGUF`](https://huggingface.co/bartowski/Qwen_Qwen3-8B-GGUF).
101
+
102
+ Reproduce that comparison from a clone of the repository with an activated
103
+ virtualenv:
104
+
105
+ ```bash
106
+ llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
107
+ python examples/readme_bridge_contrast.py
108
+ ```
109
+
110
+ ## Examples
111
+
112
+ See the repository's
113
+ [`examples/README.md`](https://github.com/cmccomb/smallwords/blob/main/examples/README.md)
114
+ for the runnable examples. The current example set is live-model based:
115
+ the README bridge contrast, a focused pirate greeting, and a focused technical
116
+ rewrite all call a live `llama-server` model with a prompt plus generated
117
+ grammar.
118
+
119
+ ## Development
120
+
121
+ Run these commands from an activated virtualenv:
122
+
123
+ ```bash
124
+ python -m pip install -e ".[dev]"
125
+ python -m ruff check .
126
+ python -m ruff format --check .
127
+ python -m pytest
128
+ python scripts/check_documentation.py
129
+ python -m sphinx -W --keep-going -b html docs docs/_build/html
130
+ python -m build
131
+ python -m twine check --strict dist/*
132
+ ```
133
+
134
+ CI runs linting, tests, the documentation policy check, a `>=90%` coverage
135
+ gate, a Sphinx docs build, and a package build on GitHub Actions.
136
+
137
+ For release steps and Trusted Publishing setup, see
138
+ [`RELEASING.md`](https://github.com/cmccomb/smallwords/blob/main/RELEASING.md).
@@ -0,0 +1,39 @@
1
+ # Releasing `smallwords`
2
+
3
+ This repository is set up for PyPI Trusted Publishing through GitHub Actions.
4
+
5
+ Before the first release:
6
+
7
+ 1. Create the `smallwords` project on PyPI.
8
+ 2. Configure a Trusted Publisher on PyPI for this GitHub repository.
9
+ 3. Add the `pypi` GitHub Actions environment if you want environment-level protection rules.
10
+
11
+ Recommended release flow:
12
+
13
+ ```bash
14
+ python -m venv .venv
15
+ source .venv/bin/activate
16
+ python -m pip install -e ".[dev]"
17
+ python -m ruff check .
18
+ python -m ruff format --check .
19
+ python -m pytest
20
+ python scripts/check_documentation.py
21
+ python -m sphinx -W --keep-going -b html docs docs/_build/html
22
+ python -m build
23
+ python -m twine check --strict dist/*
24
+ python -m venv /tmp/smallwords-release-check
25
+ /tmp/smallwords-release-check/bin/python -m pip install --upgrade pip
26
+ /tmp/smallwords-release-check/bin/python -m pip install dist/*.whl
27
+ /tmp/smallwords-release-check/bin/python -c "import smallwords; print(smallwords.__version__)"
28
+ ```
29
+
30
+ Then:
31
+
32
+ 1. Update `version` in `pyproject.toml`.
33
+ 2. Commit the release.
34
+ 3. Create and push a Git tag.
35
+ 4. Publish a GitHub release from that tag.
36
+
37
+ The `Publish` workflow builds the distributions, validates them with `twine`,
38
+ and uploads them to PyPI through Trusted Publishing. The separate `Docs`
39
+ workflow builds the Sphinx site and deploys it to GitHub Pages from `main`.
@@ -0,0 +1,39 @@
1
+ """Sphinx configuration for the compact project docs site."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Add the source tree so autodoc can import the local package during docs builds.
9
+ ROOT = Path(__file__).resolve().parents[1]
10
+ SRC = ROOT / "src"
11
+ if str(SRC) not in sys.path:
12
+ sys.path.insert(0, str(SRC))
13
+
14
+ from smallwords import __version__ # noqa: E402
15
+
16
+ project = "smallwords"
17
+ author = "Christopher McComb"
18
+ copyright = "2026, Christopher McComb"
19
+ version = __version__
20
+ release = __version__
21
+
22
+ extensions = [
23
+ "sphinx.ext.autodoc",
24
+ "sphinx.ext.napoleon",
25
+ "sphinx.ext.viewcode",
26
+ ]
27
+
28
+ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
29
+ autodoc_member_order = "bysource"
30
+ autodoc_typehints = "description"
31
+ napoleon_google_docstring = True
32
+ napoleon_numpy_docstring = False
33
+
34
+ html_theme = "pydata_sphinx_theme"
35
+ html_title = "smallwords"
36
+ html_theme_options = {
37
+ "github_url": "https://github.com/cmccomb/smallwords",
38
+ "show_toc_level": 2,
39
+ }
@@ -0,0 +1,164 @@
1
+ smallwords
2
+ ==========
3
+
4
+ Large language models can follow "write simply" instructions, but that soft
5
+ guidance is often insufficient when a workflow also needs reproducible wording,
6
+ portable output constraints, or offline validation. Prompt-only approaches are
7
+ easy to start and hard to trust. They leave the active vocabulary implicit,
8
+ make failures difficult to diagnose, and force each integration to recreate the
9
+ same constraint logic in a different format.
10
+
11
+ ``smallwords`` addresses that gap by keeping one controlled vocabulary at the
12
+ center of the workflow. From that single specification, the package can build
13
+ prompt text, GBNF, JSON Schema, and validation checks that stay aligned.
14
+
15
+ Install
16
+ -------
17
+
18
+ .. code-block:: bash
19
+
20
+ pip install smallwords
21
+
22
+ Quick API
23
+ ---------
24
+
25
+ Most integrations follow the same chain: choose a vocabulary, optionally add
26
+ task words, build the matching prompt and portable resources, then validate the
27
+ result.
28
+
29
+ .. autofunction:: smallwords.allow_input_words
30
+
31
+ .. autofunction:: smallwords.get_wordlist
32
+
33
+ .. autofunction:: smallwords.list_wordlists
34
+
35
+ .. autoclass:: smallwords.OutputShape
36
+ :members:
37
+
38
+ .. autoclass:: smallwords.OutputResources
39
+ :members:
40
+
41
+ .. autofunction:: smallwords.prompts.build_prompt
42
+
43
+ .. autofunction:: smallwords.is_compliant
44
+
45
+ .. autofunction:: smallwords.out_of_vocab
46
+
47
+ Built-ins
48
+ ---------
49
+
50
+ The installed catalog currently includes:
51
+
52
+ - ``moby_898``
53
+ - ``basic_850``
54
+ - ``special_english_1475``
55
+ - ``caveman_898``
56
+ - ``pirate_898``
57
+
58
+ Use :func:`smallwords.list_wordlists` to inspect the installed catalog. Use
59
+ :func:`smallwords.get_wordlist` when a workflow needs the underlying
60
+ specification object directly.
61
+
62
+ Examples With Results
63
+ ---------------------
64
+
65
+ The examples below were produced locally on April 5, 2026 with
66
+ ``llama-server`` and ``bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m``.
67
+
68
+ Bridge Contrast
69
+ ~~~~~~~~~~~~~~~
70
+
71
+ This comparison isolates the package's core claim. A plain prompt can already
72
+ produce a reasonable answer. However, it does not expose an explicit response
73
+ contract. The constrained run uses the same task, adds the active vocabulary to
74
+ the prompt, and applies the matching grammar.
75
+
76
+ .. code-block:: python
77
+
78
+ from smallwords import OutputResources, OutputShape, allow_input_words
79
+ from smallwords.prompts import build_prompt
80
+
81
+ spec = allow_input_words("basic_850", "How does a bridge work?")
82
+ shape = OutputShape(max_words_per_line=24, max_lines=1)
83
+ prompt = build_prompt("explain", "How does a bridge work?", wordlist=spec)
84
+ resources = OutputResources.from_wordlist(spec, shape=shape)
85
+
86
+ Plain prompt result:
87
+
88
+ A bridge connects two points, usually across a body of water or a gap, allowing people and vehicles to cross safely.
89
+
90
+ Constrained result:
91
+
92
+ A bridge is a structure that helps people and things move across a river or a deep place.
93
+
94
+ See ``examples/readme_bridge_contrast.py`` for the full prompt-plus-grammar run.
95
+
96
+ Pirate Greeting
97
+ ~~~~~~~~~~~~~~~
98
+
99
+ This example starts from the built-in ``pirate_898`` list, selects a tiny
100
+ greeting-focused vocabulary, and then applies a matching grammar.
101
+
102
+ .. code-block:: python
103
+
104
+ from smallwords import OutputResources, OutputShape, WordlistSpec, get_wordlist
105
+ from smallwords.prompts import build_prompt
106
+
107
+ base = get_wordlist("pirate_898")
108
+ spec = WordlistSpec(
109
+ name="pirate_898_greeting_focus",
110
+ words=("ahoy", "good", "matey", "meet", "to", "you"),
111
+ source_name="Selected surface forms from pirate_898 for the pirate greeting example",
112
+ source_urls=base.source_urls,
113
+ license_name=base.license_name,
114
+ allowed_punctuation=(".",),
115
+ variant_mode="surface_only",
116
+ )
117
+ shape = OutputShape(min_words_per_line=6, max_words_per_line=6, max_lines=1)
118
+ prompt = build_prompt(
119
+ "answer",
120
+ "A pirate meets a new friend on a ship. What short friendly greeting should the pirate say?",
121
+ wordlist=spec,
122
+ )
123
+ resources = OutputResources.from_wordlist(spec, shape=shape)
124
+
125
+ Constrained result:
126
+
127
+ Ahoy matey good to meet you.
128
+
129
+ Technical Rewrite
130
+ ~~~~~~~~~~~~~~~~~
131
+
132
+ This example selects a compact rewrite vocabulary from ``basic_850`` that
133
+ excludes the source terminology altogether. The output shape then forces one
134
+ short ten-word sentence.
135
+
136
+ .. code-block:: python
137
+
138
+ from smallwords import OutputResources, OutputShape, WordlistSpec, get_wordlist
139
+ from smallwords.prompts import build_prompt
140
+
141
+ base = get_wordlist("basic_850")
142
+ text = (
143
+ "The thermal controller derates propulsion output after the sensor array "
144
+ "reports an overtemperature fault."
145
+ )
146
+ spec = WordlistSpec(
147
+ name="basic_850_rewrite_focus",
148
+ words=("be", "cut", "engine", "heat", "high", "if", "power", "system", "this", "very", "when"),
149
+ source_name="Selected from basic_850 for the rewrite example",
150
+ source_urls=base.source_urls,
151
+ license_name=base.license_name,
152
+ allowed_punctuation=(".",),
153
+ )
154
+ shape = OutputShape(min_words_per_line=10, max_words_per_line=10, max_lines=1)
155
+ prompt = build_prompt("rewrite", text, wordlist=spec)
156
+ resources = OutputResources.from_wordlist(spec, shape=shape)
157
+
158
+ Constrained result:
159
+
160
+ When engine heat is very high this system cuts power.
161
+
162
+ The live scripts in ``examples/`` print the full prompt, grammar, schema, and
163
+ validation details. That fuller output matters because it lets a reader inspect
164
+ not only the answer, but also the exact constraint setup that produced it.
@@ -0,0 +1,35 @@
1
+ # Examples
2
+
3
+ This directory keeps three live llama.cpp examples:
4
+
5
+ - `readme_bridge_contrast.py`: the bridge comparison used in the root README
6
+ - `pirate_greeting.py`: a focused pirate greeting built from `pirate_898`
7
+ - `rewrite_technical_passage.py`: a focused technical rewrite built from `basic_850`
8
+
9
+ Create and activate a virtualenv first:
10
+
11
+ ```bash
12
+ python -m venv .venv
13
+ source .venv/bin/activate
14
+ python -m pip install -e ".[dev]"
15
+ ```
16
+
17
+ Then start a server, for example:
18
+
19
+ ```bash
20
+ llama-server -hf bartowski/Qwen_Qwen3-8B-GGUF:q4_k_m --host 127.0.0.1 --port 8080 --reasoning-budget 0 --log-disable
21
+ ```
22
+
23
+ Run the examples from the project root:
24
+
25
+ ```bash
26
+ python examples/pirate_greeting.py
27
+ python examples/rewrite_technical_passage.py
28
+ python examples/readme_bridge_contrast.py
29
+ ```
30
+
31
+ If your server uses a different address, set `SMALLWORDS_LLAMA_BASE_URL`.
32
+
33
+ Each script prints the prompt, output shape, matching grammar and schema
34
+ resources, the generated response, and whether the response stayed inside the
35
+ chosen vocabulary.