optalph 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optalph-1.0.0/.github/workflows/ci.yml +46 -0
- optalph-1.0.0/.gitignore +13 -0
- optalph-1.0.0/.python-version +1 -0
- optalph-1.0.0/LICENSE +21 -0
- optalph-1.0.0/PKG-INFO +243 -0
- optalph-1.0.0/PLAN.md +78 -0
- optalph-1.0.0/README.md +208 -0
- optalph-1.0.0/benchmark_results.csv +41 -0
- optalph-1.0.0/pyproject.toml +73 -0
- optalph-1.0.0/src/optalph/__init__.py +43 -0
- optalph-1.0.0/src/optalph/__main__.py +4 -0
- optalph-1.0.0/src/optalph/_base.py +63 -0
- optalph-1.0.0/src/optalph/_cli.py +132 -0
- optalph-1.0.0/src/optalph/config.py +67 -0
- optalph-1.0.0/src/optalph/dictionary.py +249 -0
- optalph-1.0.0/src/optalph/evaluator.py +242 -0
- optalph-1.0.0/src/optalph/frequencies.py +68 -0
- optalph-1.0.0/src/optalph/grid_evaluator.py +162 -0
- optalph-1.0.0/src/optalph/main.py +366 -0
- optalph-1.0.0/src/optalph/optimizer.py +546 -0
- optalph-1.0.0/src/optalph/py.typed +0 -0
- optalph-1.0.0/src/optalph/reporting.py +170 -0
- optalph-1.0.0/src/optalph/rules.py +158 -0
- optalph-1.0.0/tests/test_cli.py +161 -0
- optalph-1.0.0/tests/test_config.py +124 -0
- optalph-1.0.0/tests/test_dictionary.py +56 -0
- optalph-1.0.0/tests/test_evaluator.py +334 -0
- optalph-1.0.0/tests/test_frequencies.py +78 -0
- optalph-1.0.0/tests/test_grid.py +255 -0
- optalph-1.0.0/tests/test_optimizer.py +238 -0
- optalph-1.0.0/tests/test_optimizer_features.py +135 -0
- optalph-1.0.0/tests/test_properties.py +286 -0
- optalph-1.0.0/tests/test_rules.py +96 -0
- optalph-1.0.0/uv.lock +482 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
tags: ["v*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main, master]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
lint:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: astral-sh/setup-uv@v4
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.13"
|
|
18
|
+
enable-cache: true
|
|
19
|
+
- run: uv run --frozen --extra dev ruff check .
|
|
20
|
+
- run: uv run --frozen --extra dev mypy src/optalph/
|
|
21
|
+
|
|
22
|
+
test:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
strategy:
|
|
25
|
+
matrix:
|
|
26
|
+
python-version: ["3.10", "3.12", "3.13"]
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
- uses: astral-sh/setup-uv@v4
|
|
30
|
+
with:
|
|
31
|
+
python-version: ${{ matrix.python-version }}
|
|
32
|
+
enable-cache: true
|
|
33
|
+
- run: uv run --frozen --extra dev pytest -q
|
|
34
|
+
|
|
35
|
+
publish:
|
|
36
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
|
|
37
|
+
needs: [lint, test]
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
environment: pypi
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/checkout@v4
|
|
44
|
+
- uses: astral-sh/setup-uv@v4
|
|
45
|
+
- run: uv build
|
|
46
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
optalph-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.14
|
optalph-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 optalph contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
optalph-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: optalph
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Optimize alphabet permutation to maximize dictionary word coverage
|
|
5
|
+
Project-URL: Repository, https://github.com/4bstr4ct/optalph
|
|
6
|
+
Project-URL: Bug-Tracker, https://github.com/4bstr4ct/optalph/issues
|
|
7
|
+
Author: optalph contributors
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: alphabet,english,iterated-local-search,optimization,russian,simulated-annealing
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: typing-extensions>=4.0
|
|
23
|
+
Provides-Extra: dawg
|
|
24
|
+
Requires-Dist: dawg-python>=0.7.2; extra == 'dawg'
|
|
25
|
+
Requires-Dist: pymorphy2-dicts-ru>=2.4.417127.4579844; extra == 'dawg'
|
|
26
|
+
Requires-Dist: pymorphy2>=0.9.1; extra == 'dawg'
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: hypothesis<6.155,>=6.131.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=9.0.3; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
32
|
+
Provides-Extra: tqdm
|
|
33
|
+
Requires-Dist: tqdm>=4.60; extra == 'tqdm'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# optalph
|
|
37
|
+
|
|
38
|
+
**Optimize alphabet permutation to maximize dictionary word coverage.**
|
|
39
|
+
|
|
40
|
+
[](https://github.com/4bstr4ct/optalph/actions/workflows/ci.yml)
|
|
41
|
+
[](https://pypi.org/project/optalph/)
|
|
42
|
+
[](https://www.python.org/downloads/)
|
|
43
|
+
[](LICENSE)
|
|
44
|
+
|
|
45
|
+
Supports Russian and English alphabets (auto-detected from dictionary), incremental evaluation, 2D grid extraction (Boggle-like), weighted scoring, and parallel multistart.
|
|
46
|
+
|
|
47
|
+
Requires Python >= 3.10.
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install optalph
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Or with [pipx](https://pypa.github.io/pipx/) (recommended for CLI tools):
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pipx install optalph
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Optional dependencies
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Progress bar
|
|
65
|
+
pip install optalph[tqdm]
|
|
66
|
+
|
|
67
|
+
# Russian DAWG dictionary (fast, 1.5M word forms)
|
|
68
|
+
pip install optalph[dawg]
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Easiest: download English dictionary automatically
|
|
75
|
+
optalph --download english -w 3 4 5 --iter 500000
|
|
76
|
+
|
|
77
|
+
# Russian with DAWG dictionary
|
|
78
|
+
optalph -w 3 4 5
|
|
79
|
+
|
|
80
|
+
# Custom dictionary file
|
|
81
|
+
optalph -w 3 4 5 --dict-file words.txt --no-dawg
|
|
82
|
+
|
|
83
|
+
# Weighted scoring (frequency file: "word count" per line)
|
|
84
|
+
optalph -w 3 4 5 --freq-file freqs.txt --dict-file words.txt --no-dawg
|
|
85
|
+
|
|
86
|
+
# 2D Boggle-like grid
|
|
87
|
+
optalph --grid-rows 4 --grid-cols 8 --dict-file words.txt --no-dawg
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Alphabet is auto-detected from the dictionary. Use `--alphabet` to override.
|
|
91
|
+
|
|
92
|
+
### Dictionary Sources
|
|
93
|
+
|
|
94
|
+
- **`--download english`**: Downloads and caches ~370k English words in `~/.optalph/`
|
|
95
|
+
- **`--download russian`**: Downloads and caches Russian words in `~/.optalph/`
|
|
96
|
+
- **DAWG**: `pymorphy2-dicts-ru` (install via `[dawg]` extra, provides 1.5M Russian word forms)
|
|
97
|
+
- **Custom**: any UTF-8 text file, one word per line
|
|
98
|
+
|
|
99
|
+
## Optimization Methods
|
|
100
|
+
|
|
101
|
+
| Method | Flag | Description |
|
|
102
|
+
|--------|------|-------------|
|
|
103
|
+
| SA | `--method sa` (default) | Simulated annealing with tabu list option |
|
|
104
|
+
| ILS | `--method ils` | Iterated local search — SA with perturbation restarts |
|
|
105
|
+
| Multistart | `-r N` | N independent SA runs, best result wins |
|
|
106
|
+
| Parallel | `-r N --parallel` | Multistart with ProcessPoolExecutor |
|
|
107
|
+
|
|
108
|
+
### Key Parameters
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
--iter N # Total iterations (default: 500000)
|
|
112
|
+
--t-start T # Starting temperature (default: 100)
|
|
113
|
+
--cooling R # Cooling rate (default: 0.9999)
|
|
114
|
+
--tabu # Enable tabu list (prevents cycling)
|
|
115
|
+
--tabu-tenure N # Tabu tenure (default: 20)
|
|
116
|
+
--seed N # Reproducible results
|
|
117
|
+
--incremental # Use incremental evaluator (faster for large alphabets)
|
|
118
|
+
--ils-steps N # ILS perturbation steps (default: 10)
|
|
119
|
+
--ils-perturb N # ILS perturbation strength (default: 3)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Extraction Rules
|
|
123
|
+
|
|
124
|
+
- **Adjacent(k)**: Contiguous k-character substrings (e.g. `-w 3` → all 3-letter substrings)
|
|
125
|
+
- **SkipAdjacent(k, step)**: Every `step`-th character in a k-length window (`--skip k step`)
|
|
126
|
+
- **GridRule(rows, cols)**: 2D grid, 8-directional word extraction (`--grid-rows --grid-cols`)
|
|
127
|
+
- Multiple windows are composed: `-w 3 4 5` → Adjacent(3)+Adjacent(4)+Adjacent(5)
|
|
128
|
+
|
|
129
|
+
## Example Results
|
|
130
|
+
|
|
131
|
+
### English (26 letters, ~370k words)
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
$ optalph --download english -w 3 4 5 --iter 500000 --seed 42 --tabu
|
|
135
|
+
|
|
136
|
+
Best score: 29/25237 (initial: 7, +22)
|
|
137
|
+
Best permutation: jclownqkhumpfgridestabvxyz
|
|
138
|
+
jclownqkhumpfgridestabvxyz
|
|
139
|
+
^^^ jcl
|
|
140
|
+
^^^ clo
|
|
141
|
+
^^^^ clow
|
|
142
|
+
^^^^^ clown
|
|
143
|
+
^^^ low
|
|
144
|
+
^^^^ lown
|
|
145
|
+
^^^ own
|
|
146
|
+
Words: jcl, clo, clow, clown, low, lown, own, khu, hum, hump, ump, ...
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Russian with weighted scoring (33 letters, ~1.5M forms)
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
$ optalph -w 3 4 5 --iter 500000 --seed 42 --freq-file ru_50k.txt --tabu
|
|
153
|
+
|
|
154
|
+
Best score: 129.5 (initial: 12.3, +117.3)
|
|
155
|
+
Best permutation: фбризъцшщёкгспалевыйдячьюхнэтомуж
|
|
156
|
+
Words: бри, бриз, риз, изъ, щёк, спа, спал, пал, лев, левы, левый, ...
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Library Usage
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from optalph import (
|
|
163
|
+
evaluate, evaluate_detailed, IncrementalEvaluator,
|
|
164
|
+
simulated_annealing, iterated_local_search, multistart_sa,
|
|
165
|
+
greedy_init, build_rules, RUSSIAN_ALPHABET, ENGLISH_ALPHABET,
|
|
166
|
+
load_dictionary, download_dictionary, load_frequencies,
|
|
167
|
+
OptimizerConfig, GridRule, IncrementalGridEvaluator,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Download and load dictionary
|
|
171
|
+
download_dictionary("english")
|
|
172
|
+
dictionary = load_dictionary(download="english", use_dawg=False, min_len=3, max_len=5)
|
|
173
|
+
|
|
174
|
+
# Build rules
|
|
175
|
+
rules = build_rules(adjacent_windows=[3, 4, 5])
|
|
176
|
+
|
|
177
|
+
# Evaluate
|
|
178
|
+
score = evaluate(ENGLISH_ALPHABET, rules, dictionary)
|
|
179
|
+
score, valid, invalid, breakdown = evaluate_detailed(ENGLISH_ALPHABET, rules, dictionary)
|
|
180
|
+
|
|
181
|
+
# Greedy initialization (bigram frequencies → Hamiltonian path)
|
|
182
|
+
initial = greedy_init(ENGLISH_ALPHABET, dictionary, window_sizes=[3, 4, 5])
|
|
183
|
+
|
|
184
|
+
# SA optimization
|
|
185
|
+
best, best_score, history, acc = simulated_annealing(
|
|
186
|
+
initial=initial,
|
|
187
|
+
score_fn=lambda p: evaluate(p, rules, dictionary),
|
|
188
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Incremental evaluator (Adjacent rules only, O(1) per swap)
|
|
192
|
+
incr = IncrementalEvaluator.from_rules(rules, dictionary, len(ENGLISH_ALPHABET))
|
|
193
|
+
best, best_score, history, acc = simulated_annealing(
|
|
194
|
+
initial=initial, evaluator=incr,
|
|
195
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# ILS (SA + perturbation restarts)
|
|
199
|
+
best, best_score, history, _ = iterated_local_search(
|
|
200
|
+
initial=initial, evaluator=incr,
|
|
201
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
202
|
+
perturbation_strength=3, n_perturbations=5,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Multistart (parallel)
|
|
206
|
+
best, best_score, history, acc = multistart_sa(
|
|
207
|
+
ENGLISH_ALPHABET,
|
|
208
|
+
score_fn=lambda p: evaluate(p, rules, dictionary),
|
|
209
|
+
config=OptimizerConfig(max_iterations=100000),
|
|
210
|
+
n_restarts=4, parallel=True,
|
|
211
|
+
rules=rules, dictionary=dictionary,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Weighted scoring
|
|
215
|
+
weights = load_frequencies("freqs.txt")
|
|
216
|
+
score = evaluate(ENGLISH_ALPHABET, rules, dictionary, weights=weights)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Algorithm Details
|
|
220
|
+
|
|
221
|
+
**Simulated Annealing** with:
|
|
222
|
+
- Geometric cooling schedule (T *= cooling_rate)
|
|
223
|
+
- Tabu list with aspiration criterion (override tabu if move improves current score)
|
|
224
|
+
- Plateau detection with scaling limit (`max(1000, max_iterations/100)`) and decaying reheat
|
|
225
|
+
- Greedy bigram initialization (+30% starting score over random)
|
|
226
|
+
- Parallel multistart via ProcessPoolExecutor
|
|
227
|
+
|
|
228
|
+
**Iterated Local Search**: SA → perturb (k random swaps) → SA → repeat. Budget divided equally across steps. Each step gets a unique random seed with recalculated cooling schedule.
|
|
229
|
+
|
|
230
|
+
**Incremental evaluation**: O(1) per swap attempt (only affected slots re-evaluated) vs O(n) full evaluation. Supported for Adjacent and Grid rules.
|
|
231
|
+
|
|
232
|
+
## Development
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
uv sync --extra dev
|
|
236
|
+
uv run ruff check .
|
|
237
|
+
uv run mypy src/optalph/
|
|
238
|
+
uv run pytest -q
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## License
|
|
242
|
+
|
|
243
|
+
MIT
|
optalph-1.0.0/PLAN.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# optalph Roadmap
|
|
2
|
+
|
|
3
|
+
## Done
|
|
4
|
+
|
|
5
|
+
### v0.1.0–v0.3.0: Core
|
|
6
|
+
- SA/GA/memetic optimization with weighted scoring
|
|
7
|
+
- IncrementalEvaluator (slot-based, snapshot undo, deep clone)
|
|
8
|
+
- Extraction rules: Adjacent, SkipAdjacent, Compose
|
|
9
|
+
- Annotated output, convergence plot, --save JSON
|
|
10
|
+
- tqdm, DAWG optional, English alphabet, auto-detect
|
|
11
|
+
|
|
12
|
+
### v0.4.0: Grid & Properties
|
|
13
|
+
- GridRule(rows, cols) with 8-directional extraction
|
|
14
|
+
- IncrementalGridEvaluator with _line_slots reverse mapping
|
|
15
|
+
- _IncrementalBase Generic ABC (shared slot management)
|
|
16
|
+
- 200 tests incl 14 hypothesis property tests
|
|
17
|
+
- detect_alphabet() from dictionary file
|
|
18
|
+
|
|
19
|
+
### v0.5.0: Benchmark-Driven Rewrite
|
|
20
|
+
- **Removed** GA, memetic, adaptive cooling (benchmark-proven inferior)
|
|
21
|
+
- **Fixed** critical tabu_set eviction bug (deque auto-evicts, set never shrunk → 100% saturation)
|
|
22
|
+
- **Fixed** tabu aspiration criterion (global→current best, unblocks improving moves)
|
|
23
|
+
- **Fixed** tabu clear on reheat (frozen search after plateau)
|
|
24
|
+
- **Fixed** ILS seeding (all inner SA got same seed → zero diversity)
|
|
25
|
+
- **Fixed** ILS budget division (each step got full budget → 11× overspend)
|
|
26
|
+
- **Fixed** parallel SA pickle error (pass rules/dict/weights, not closure)
|
|
27
|
+
- **Updated** defaults: T_start=100 (was 1000), cooling=0.9999 (was 0.9995)
|
|
28
|
+
- Benchmark results (English 26-letter, 5 seeds):
|
|
29
|
+
|
|
30
|
+
| Method | Mean Score | Time |
|
|
31
|
+
|--------|-----------|------|
|
|
32
|
+
| ILS(5) | 19.6 | 4.6s |
|
|
33
|
+
| SA+tabu | 19.2 | 1.6s |
|
|
34
|
+
| SA | 18.2 | 0.9s |
|
|
35
|
+
|
|
36
|
+
## Next: v0.6.0 — Quality & Polish
|
|
37
|
+
|
|
38
|
+
### Smart initialization
|
|
39
|
+
- **Effort:** 2-3h
|
|
40
|
+
- **What:** Greedy init — place frequent bigrams adjacent, then SA refines. Could also use letter-pair frequency from dictionary to seed the permutation.
|
|
41
|
+
- **Why:** Random init wastes early SA iterations. Greedy start could 2-3× coverage.
|
|
42
|
+
|
|
43
|
+
### Dictionary download on demand
|
|
44
|
+
- **Effort:** 1-2h
|
|
45
|
+
- **What:** `load_dictionary(download=True)` — fetch from GitHub URL on first use, cache in `~/.optalph/`. English from dwyl/english-words, Russian from built-in or URL.
|
|
46
|
+
- **Why:** Package can't bundle 35MB data. Users need zero-config dictionary access.
|
|
47
|
+
- **Dep:** `requests` or `urllib` for download
|
|
48
|
+
|
|
49
|
+
### Better coverage metrics
|
|
50
|
+
- **Effort:** ~1h
|
|
51
|
+
- **What:** Show coverage as % of eligible words (not just raw count). Show "eligible" = words of matching length in dictionary. Show theoretical max (all positions filled).
|
|
52
|
+
- **Why:** "28/25237 (0.1%)" looks bad but max is ~24 positions × words-per-position. Contextual metrics are more meaningful.
|
|
53
|
+
|
|
54
|
+
## Next: v0.7.0 — Visibility & Distribution
|
|
55
|
+
|
|
56
|
+
### GitHub repo + CI
|
|
57
|
+
- **Effort:** 1h
|
|
58
|
+
- **What:** Push to GitHub, add GitHub Actions (pytest + mypy + ruff), test coverage badge
|
|
59
|
+
- **Why:** Home for the project, bug reports, PRs
|
|
60
|
+
|
|
61
|
+
### PyPI publish
|
|
62
|
+
- **Effort:** 30min
|
|
63
|
+
- **What:** `uv publish`, proper classifiers, changelog
|
|
64
|
+
- **Dep:** GitHub repo first
|
|
65
|
+
|
|
66
|
+
### Streamlit web demo
|
|
67
|
+
- **Effort:** 2-3h
|
|
68
|
+
- **What:** Pick alphabet, choose rules, run SA, show annotated permutation + convergence plot
|
|
69
|
+
- **Why:** ×10 visibility over CLI-only tool
|
|
70
|
+
|
|
71
|
+
## Backlog
|
|
72
|
+
|
|
73
|
+
- Result comparison mode (`--compare prev.json`)
|
|
74
|
+
- Custom cooling schedule functions
|
|
75
|
+
- Rich library for terminal output (table, progress, colored)
|
|
76
|
+
- IncrementalEvaluator for SkipAdjacent (complex, low ROI)
|
|
77
|
+
- REST API / HTTP endpoint
|
|
78
|
+
- Word frequency integration with NLTK/norvig data
|
optalph-1.0.0/README.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# optalph
|
|
2
|
+
|
|
3
|
+
**Optimize alphabet permutation to maximize dictionary word coverage.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/4bstr4ct/optalph/actions/workflows/ci.yml)
|
|
6
|
+
[](https://pypi.org/project/optalph/)
|
|
7
|
+
[](https://www.python.org/downloads/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
Supports Russian and English alphabets (auto-detected from dictionary), incremental evaluation, 2D grid extraction (Boggle-like), weighted scoring, and parallel multistart.
|
|
11
|
+
|
|
12
|
+
Requires Python >= 3.10.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install optalph
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Or with [pipx](https://pypa.github.io/pipx/) (recommended for CLI tools):
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pipx install optalph
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Optional dependencies
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Progress bar
|
|
30
|
+
pip install optalph[tqdm]
|
|
31
|
+
|
|
32
|
+
# Russian DAWG dictionary (fast, 1.5M word forms)
|
|
33
|
+
pip install optalph[dawg]
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Easiest: download English dictionary automatically
|
|
40
|
+
optalph --download english -w 3 4 5 --iter 500000
|
|
41
|
+
|
|
42
|
+
# Russian with DAWG dictionary
|
|
43
|
+
optalph -w 3 4 5
|
|
44
|
+
|
|
45
|
+
# Custom dictionary file
|
|
46
|
+
optalph -w 3 4 5 --dict-file words.txt --no-dawg
|
|
47
|
+
|
|
48
|
+
# Weighted scoring (frequency file: "word count" per line)
|
|
49
|
+
optalph -w 3 4 5 --freq-file freqs.txt --dict-file words.txt --no-dawg
|
|
50
|
+
|
|
51
|
+
# 2D Boggle-like grid
|
|
52
|
+
optalph --grid-rows 4 --grid-cols 8 --dict-file words.txt --no-dawg
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Alphabet is auto-detected from the dictionary. Use `--alphabet` to override.
|
|
56
|
+
|
|
57
|
+
### Dictionary Sources
|
|
58
|
+
|
|
59
|
+
- **`--download english`**: Downloads and caches ~370k English words in `~/.optalph/`
|
|
60
|
+
- **`--download russian`**: Downloads and caches Russian words in `~/.optalph/`
|
|
61
|
+
- **DAWG**: `pymorphy2-dicts-ru` (install via `[dawg]` extra, provides 1.5M Russian word forms)
|
|
62
|
+
- **Custom**: any UTF-8 text file, one word per line
|
|
63
|
+
|
|
64
|
+
## Optimization Methods
|
|
65
|
+
|
|
66
|
+
| Method | Flag | Description |
|
|
67
|
+
|--------|------|-------------|
|
|
68
|
+
| SA | `--method sa` (default) | Simulated annealing with tabu list option |
|
|
69
|
+
| ILS | `--method ils` | Iterated local search — SA with perturbation restarts |
|
|
70
|
+
| Multistart | `-r N` | N independent SA runs, best result wins |
|
|
71
|
+
| Parallel | `-r N --parallel` | Multistart with ProcessPoolExecutor |
|
|
72
|
+
|
|
73
|
+
### Key Parameters
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
--iter N # Total iterations (default: 500000)
|
|
77
|
+
--t-start T # Starting temperature (default: 100)
|
|
78
|
+
--cooling R # Cooling rate (default: 0.9999)
|
|
79
|
+
--tabu # Enable tabu list (prevents cycling)
|
|
80
|
+
--tabu-tenure N # Tabu tenure (default: 20)
|
|
81
|
+
--seed N # Reproducible results
|
|
82
|
+
--incremental # Use incremental evaluator (faster for large alphabets)
|
|
83
|
+
--ils-steps N # ILS perturbation steps (default: 10)
|
|
84
|
+
--ils-perturb N # ILS perturbation strength (default: 3)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Extraction Rules
|
|
88
|
+
|
|
89
|
+
- **Adjacent(k)**: Contiguous k-character substrings (e.g. `-w 3` → all 3-letter substrings)
|
|
90
|
+
- **SkipAdjacent(k, step)**: Every `step`-th character in a k-length window (`--skip k step`)
|
|
91
|
+
- **GridRule(rows, cols)**: 2D grid, 8-directional word extraction (`--grid-rows --grid-cols`)
|
|
92
|
+
- Multiple windows are composed: `-w 3 4 5` → Adjacent(3)+Adjacent(4)+Adjacent(5)
|
|
93
|
+
|
|
94
|
+
## Example Results
|
|
95
|
+
|
|
96
|
+
### English (26 letters, ~370k words)
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
$ optalph --download english -w 3 4 5 --iter 500000 --seed 42 --tabu
|
|
100
|
+
|
|
101
|
+
Best score: 29/25237 (initial: 7, +22)
|
|
102
|
+
Best permutation: jclownqkhumpfgridestabvxyz
|
|
103
|
+
jclownqkhumpfgridestabvxyz
|
|
104
|
+
^^^ jcl
|
|
105
|
+
^^^ clo
|
|
106
|
+
^^^^ clow
|
|
107
|
+
^^^^^ clown
|
|
108
|
+
^^^ low
|
|
109
|
+
^^^^ lown
|
|
110
|
+
^^^ own
|
|
111
|
+
Words: jcl, clo, clow, clown, low, lown, own, khu, hum, hump, ump, ...
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Russian with weighted scoring (33 letters, ~1.5M forms)
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
$ optalph -w 3 4 5 --iter 500000 --seed 42 --freq-file ru_50k.txt --tabu
|
|
118
|
+
|
|
119
|
+
Best score: 129.5 (initial: 12.3, +117.3)
|
|
120
|
+
Best permutation: фбризъцшщёкгспалевыйдячьюхнэтомуж
|
|
121
|
+
Words: бри, бриз, риз, изъ, щёк, спа, спал, пал, лев, левы, левый, ...
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Library Usage
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from optalph import (
|
|
128
|
+
evaluate, evaluate_detailed, IncrementalEvaluator,
|
|
129
|
+
simulated_annealing, iterated_local_search, multistart_sa,
|
|
130
|
+
greedy_init, build_rules, RUSSIAN_ALPHABET, ENGLISH_ALPHABET,
|
|
131
|
+
load_dictionary, download_dictionary, load_frequencies,
|
|
132
|
+
OptimizerConfig, GridRule, IncrementalGridEvaluator,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Download and load dictionary
|
|
136
|
+
download_dictionary("english")
|
|
137
|
+
dictionary = load_dictionary(download="english", use_dawg=False, min_len=3, max_len=5)
|
|
138
|
+
|
|
139
|
+
# Build rules
|
|
140
|
+
rules = build_rules(adjacent_windows=[3, 4, 5])
|
|
141
|
+
|
|
142
|
+
# Evaluate
|
|
143
|
+
score = evaluate(ENGLISH_ALPHABET, rules, dictionary)
|
|
144
|
+
score, valid, invalid, breakdown = evaluate_detailed(ENGLISH_ALPHABET, rules, dictionary)
|
|
145
|
+
|
|
146
|
+
# Greedy initialization (bigram frequencies → Hamiltonian path)
|
|
147
|
+
initial = greedy_init(ENGLISH_ALPHABET, dictionary, window_sizes=[3, 4, 5])
|
|
148
|
+
|
|
149
|
+
# SA optimization
|
|
150
|
+
best, best_score, history, acc = simulated_annealing(
|
|
151
|
+
initial=initial,
|
|
152
|
+
score_fn=lambda p: evaluate(p, rules, dictionary),
|
|
153
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Incremental evaluator (Adjacent rules only, O(1) per swap)
|
|
157
|
+
incr = IncrementalEvaluator.from_rules(rules, dictionary, len(ENGLISH_ALPHABET))
|
|
158
|
+
best, best_score, history, acc = simulated_annealing(
|
|
159
|
+
initial=initial, evaluator=incr,
|
|
160
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# ILS (SA + perturbation restarts)
|
|
164
|
+
best, best_score, history, _ = iterated_local_search(
|
|
165
|
+
initial=initial, evaluator=incr,
|
|
166
|
+
config=OptimizerConfig(max_iterations=500000),
|
|
167
|
+
perturbation_strength=3, n_perturbations=5,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Multistart (parallel)
|
|
171
|
+
best, best_score, history, acc = multistart_sa(
|
|
172
|
+
ENGLISH_ALPHABET,
|
|
173
|
+
score_fn=lambda p: evaluate(p, rules, dictionary),
|
|
174
|
+
config=OptimizerConfig(max_iterations=100000),
|
|
175
|
+
n_restarts=4, parallel=True,
|
|
176
|
+
rules=rules, dictionary=dictionary,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Weighted scoring
|
|
180
|
+
weights = load_frequencies("freqs.txt")
|
|
181
|
+
score = evaluate(ENGLISH_ALPHABET, rules, dictionary, weights=weights)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Algorithm Details
|
|
185
|
+
|
|
186
|
+
**Simulated Annealing** with:
|
|
187
|
+
- Geometric cooling schedule (T *= cooling_rate)
|
|
188
|
+
- Tabu list with aspiration criterion (override tabu if move improves current score)
|
|
189
|
+
- Plateau detection with scaling limit (`max(1000, max_iterations/100)`) and decaying reheat
|
|
190
|
+
- Greedy bigram initialization (+30% starting score over random)
|
|
191
|
+
- Parallel multistart via ProcessPoolExecutor
|
|
192
|
+
|
|
193
|
+
**Iterated Local Search**: SA → perturb (k random swaps) → SA → repeat. Budget divided equally across steps. Each step gets a unique random seed with recalculated cooling schedule.
|
|
194
|
+
|
|
195
|
+
**Incremental evaluation**: O(1) per swap attempt (only affected slots re-evaluated) vs O(n) full evaluation. Supported for Adjacent and Grid rules.
|
|
196
|
+
|
|
197
|
+
## Development
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
uv sync --extra dev
|
|
201
|
+
uv run ruff check .
|
|
202
|
+
uv run mypy src/optalph/
|
|
203
|
+
uv run pytest -q
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
method,config,seed,score,elapsed,iterations,acceptance_rate
|
|
2
|
+
SA,{},42,18.0,0.9331065740007034,500000,0.52283955308961
|
|
3
|
+
SA,{},43,19.0,0.9003044849996513,500000,0.516368255898544
|
|
4
|
+
SA,{},44,18.0,0.909148651000578,500000,0.5180186538398897
|
|
5
|
+
SA,{},45,18.0,0.9672747950007761,500000,0.5158579354824699
|
|
6
|
+
SA,{},46,18.0,0.9556550019988208,500000,0.5199947882170274
|
|
7
|
+
SA+tabu,{},42,19.0,1.6473954120010603,500000,0.2793829502690262
|
|
8
|
+
SA+tabu,{},43,19.0,1.5804396199982875,500000,0.2910714859437751
|
|
9
|
+
SA+tabu,{},44,20.0,1.5668150679994142,500000,0.28163005650563183
|
|
10
|
+
SA+tabu,{},45,19.0,1.5041171130014845,500000,0.28891682513751865
|
|
11
|
+
SA+tabu,{},46,19.0,1.7165438679985527,500000,0.25414203506567806
|
|
12
|
+
SA+incr,{},42,18.0,1.2260328290012694,500000,0.52283955308961
|
|
13
|
+
SA+incr,{},43,19.0,1.1446929390003788,500000,0.516368255898544
|
|
14
|
+
SA+incr,{},44,18.0,1.2415487660000508,500000,0.5180186538398897
|
|
15
|
+
SA+incr,{},45,18.0,1.2080060460011737,500000,0.5158579354824699
|
|
16
|
+
SA+incr,{},46,18.0,1.1524075500001345,500000,0.5199947882170274
|
|
17
|
+
ILS(3),{},42,18.0,3.393292286998985,500000,0.0
|
|
18
|
+
ILS(3),{},43,19.0,3.529312396000023,500000,0.0
|
|
19
|
+
ILS(3),{},44,20.0,3.638003642001422,500000,0.0
|
|
20
|
+
ILS(3),{},45,20.0,3.622072569998636,500000,0.0
|
|
21
|
+
ILS(3),{},46,19.0,3.5928619589994923,500000,0.0
|
|
22
|
+
ILS(5),{},42,20.0,4.7638063819995295,500000,0.0
|
|
23
|
+
ILS(5),{},43,19.0,4.665814296999088,500000,0.0
|
|
24
|
+
ILS(5),{},44,20.0,4.5788373529994715,500000,0.0
|
|
25
|
+
ILS(5),{},45,20.0,4.445144731000255,500000,0.0
|
|
26
|
+
ILS(5),{},46,19.0,4.414251428999705,500000,0.0
|
|
27
|
+
ILS(5)+incr,{},42,20.0,5.8953090720006,500000,0.0
|
|
28
|
+
ILS(5)+incr,{},43,19.0,5.864872490999915,500000,0.0
|
|
29
|
+
ILS(5)+incr,{},44,20.0,5.947618627000338,500000,0.0
|
|
30
|
+
ILS(5)+incr,{},45,20.0,5.740321225999651,500000,0.0
|
|
31
|
+
ILS(5)+incr,{},46,19.0,6.091732259999844,500000,0.0
|
|
32
|
+
"MultiSA(4,seq)",{},42,19.0,3.2937221880001744,2000000,0.5198916383456932
|
|
33
|
+
"MultiSA(4,seq)",{},43,18.0,3.3281649869986722,2000000,0.5206326887371198
|
|
34
|
+
"MultiSA(4,seq)",{},44,18.0,3.495989789000305,2000000,0.520838988479788
|
|
35
|
+
"MultiSA(4,seq)",{},45,19.0,3.4421152000013535,2000000,0.5206598334401025
|
|
36
|
+
"MultiSA(4,seq)",{},46,20.0,3.4699412529989786,2000000,0.5196581939000423
|
|
37
|
+
"MultiSA(4,par)",{},42,19.0,1.2119941180008027,2000000,0.5198916383456932
|
|
38
|
+
"MultiSA(4,par)",{},43,18.0,1.0325349449994974,2000000,0.5206326887371198
|
|
39
|
+
"MultiSA(4,par)",{},44,18.0,1.0071037489997252,2000000,0.520838988479788
|
|
40
|
+
"MultiSA(4,par)",{},45,19.0,0.9964653509996424,2000000,0.5206598334401025
|
|
41
|
+
"MultiSA(4,par)",{},46,20.0,1.0318838949988276,2000000,0.5196581939000423
|