merkmal 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- merkmal-0.1.0/LICENSE +21 -0
- merkmal-0.1.0/PKG-INFO +232 -0
- merkmal-0.1.0/README.md +201 -0
- merkmal-0.1.0/pyproject.toml +75 -0
- merkmal-0.1.0/setup.cfg +4 -0
- merkmal-0.1.0/src/merkmal/__init__.py +119 -0
- merkmal-0.1.0/src/merkmal/analysis.py +485 -0
- merkmal-0.1.0/src/merkmal/audit.py +137 -0
- merkmal-0.1.0/src/merkmal/common.py +48 -0
- merkmal-0.1.0/src/merkmal/data/classes.tsv +21 -0
- merkmal-0.1.0/src/merkmal/data/classfeat/weights.json +303 -0
- merkmal-0.1.0/src/merkmal/data/features.tsv +123 -0
- merkmal-0.1.0/src/merkmal/data/pbase/ipa2allfeatures.csv +1274 -0
- merkmal-0.1.0/src/merkmal/data/pbase/pbase_license.txt +107 -0
- merkmal-0.1.0/src/merkmal/data/pbase/readme.txt +24 -0
- merkmal-0.1.0/src/merkmal/data/pbase/seg_convert.csv +2112 -0
- merkmal-0.1.0/src/merkmal/data/phoible/segments.tsv +3143 -0
- merkmal-0.1.0/src/merkmal/data/sounds.tsv +779 -0
- merkmal-0.1.0/src/merkmal/data/transcriptions/upa.tsv +119 -0
- merkmal-0.1.0/src/merkmal/dataset.py +92 -0
- merkmal-0.1.0/src/merkmal/exporters.py +166 -0
- merkmal-0.1.0/src/merkmal/geometry.py +606 -0
- merkmal-0.1.0/src/merkmal/protocol.py +50 -0
- merkmal-0.1.0/src/merkmal/registry.py +258 -0
- merkmal-0.1.0/src/merkmal/representations.py +52 -0
- merkmal-0.1.0/src/merkmal/resources.py +99 -0
- merkmal-0.1.0/src/merkmal/segmentation.py +125 -0
- merkmal-0.1.0/src/merkmal/systems/__init__.py +19 -0
- merkmal-0.1.0/src/merkmal/systems/broad.py +38 -0
- merkmal-0.1.0/src/merkmal/systems/categorical.py +673 -0
- merkmal-0.1.0/src/merkmal/systems/classfeat.py +687 -0
- merkmal-0.1.0/src/merkmal/systems/descriptive.py +36 -0
- merkmal-0.1.0/src/merkmal/systems/distinctive.py +357 -0
- merkmal-0.1.0/src/merkmal/systems/pbase.py +354 -0
- merkmal-0.1.0/src/merkmal/systems/phoible.py +224 -0
- merkmal-0.1.0/src/merkmal/upa.py +440 -0
- merkmal-0.1.0/src/merkmal.egg-info/PKG-INFO +232 -0
- merkmal-0.1.0/src/merkmal.egg-info/SOURCES.txt +55 -0
- merkmal-0.1.0/src/merkmal.egg-info/dependency_links.txt +1 -0
- merkmal-0.1.0/src/merkmal.egg-info/requires.txt +7 -0
- merkmal-0.1.0/src/merkmal.egg-info/top_level.txt +1 -0
- merkmal-0.1.0/tests/test_analysis.py +241 -0
- merkmal-0.1.0/tests/test_audit.py +34 -0
- merkmal-0.1.0/tests/test_broad.py +88 -0
- merkmal-0.1.0/tests/test_classfeat.py +266 -0
- merkmal-0.1.0/tests/test_dataset.py +56 -0
- merkmal-0.1.0/tests/test_descriptive.py +140 -0
- merkmal-0.1.0/tests/test_distinctive.py +93 -0
- merkmal-0.1.0/tests/test_exporters.py +62 -0
- merkmal-0.1.0/tests/test_functional_api.py +38 -0
- merkmal-0.1.0/tests/test_geometry.py +329 -0
- merkmal-0.1.0/tests/test_pbase.py +109 -0
- merkmal-0.1.0/tests/test_phoible.py +100 -0
- merkmal-0.1.0/tests/test_registry.py +42 -0
- merkmal-0.1.0/tests/test_representations.py +17 -0
- merkmal-0.1.0/tests/test_segmentation.py +126 -0
- merkmal-0.1.0/tests/test_upa.py +538 -0
merkmal-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2019-2026 Tiago Tresoldi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
merkmal-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: merkmal
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Standalone phonological feature systems for historical linguistics
|
|
5
|
+
Author-email: Tiago Tresoldi <tiago.tresoldi@lingfil.uu.se>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tresoldi/merkmal
|
|
8
|
+
Project-URL: Documentation, https://github.com/tresoldi/merkmal#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/tresoldi/merkmal
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/tresoldi/merkmal/issues
|
|
11
|
+
Keywords: phonology,phonological features,historical linguistics,sequence alignment,cognate detection
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.12
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff; extra == "dev"
|
|
28
|
+
Requires-Dist: mypy; extra == "dev"
|
|
29
|
+
Requires-Dist: build; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# merkmal
|
|
33
|
+
|
|
34
|
+
`merkmal` is a standalone Python package for manipulating phonological
|
|
35
|
+
features. Zero runtime dependencies, Python 3.12+.
|
|
36
|
+
|
|
37
|
+
It provides:
|
|
38
|
+
|
|
39
|
+
- bundled phonological feature datasets
|
|
40
|
+
- pluggable feature systems (9 built-in)
|
|
41
|
+
- feature geometry and distance functions (Clements & Hume 1995)
|
|
42
|
+
- tonal geometry (Yip/Bao)
|
|
43
|
+
- query and analysis helpers for graphemes and feature sets
|
|
44
|
+
- UPA transcription support
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
Install from PyPI:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install merkmal
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Development install:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
git clone https://github.com/tresoldi/merkmal.git
|
|
58
|
+
cd merkmal
|
|
59
|
+
pip install -e ".[dev]"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Run checks:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
ruff check .
|
|
66
|
+
mypy src
|
|
67
|
+
pytest -q
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Quick start
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
import merkmal
|
|
74
|
+
|
|
75
|
+
# Built-in systems
|
|
76
|
+
print(merkmal.list_systems())
|
|
77
|
+
# ['descriptive', 'broad', 'distinctive', 'pbase-hc', 'pbase-jfh',
|
|
78
|
+
# 'pbase-spe', 'pbase-uftc', 'phoible', 'classfeat']
|
|
79
|
+
|
|
80
|
+
# Basic grapheme lookup
|
|
81
|
+
print(merkmal.get_features("p"))
|
|
82
|
+
# frozenset({'consonant', 'voiceless', 'bilabial', 'stop'})
|
|
83
|
+
|
|
84
|
+
# Predefined sound classes
|
|
85
|
+
print(merkmal.get_class_features("V"))
|
|
86
|
+
# frozenset({'vowel'})
|
|
87
|
+
|
|
88
|
+
# Distance
|
|
89
|
+
print(merkmal.distance("a", "e"))
|
|
90
|
+
print(merkmal.distance("p", "b", system="classfeat"))
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Systems
|
|
94
|
+
|
|
95
|
+
| System | Type | Features | Distance |
|
|
96
|
+
|--------|------|----------|----------|
|
|
97
|
+
| `descriptive` | categorical | articulatory | geometry-weighted |
|
|
98
|
+
| `broad` | categorical | simplified | geometry-weighted |
|
|
99
|
+
| `distinctive` | privative | Clements & Hume | geometry-weighted |
|
|
100
|
+
| `pbase-hc`, `-jfh`, `-spe`, `-uftc` | multi-state | 4 theoretical families | geometry-weighted |
|
|
101
|
+
| `phoible` | binary | 37 features | Hamming |
|
|
102
|
+
| `classfeat` | hybrid | sound classes + continuous | trained weights |
|
|
103
|
+
|
|
104
|
+
All systems implement the same `FeatureSystem` protocol. Distances, queries,
|
|
105
|
+
matrices, and natural class derivation work across all of them.
|
|
106
|
+
|
|
107
|
+
## Working with systems
|
|
108
|
+
|
|
109
|
+
You can use the lazy default registry through top-level helpers, or work
|
|
110
|
+
with a specific system object.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
import merkmal
|
|
114
|
+
|
|
115
|
+
descriptive = merkmal.get_system("descriptive")
|
|
116
|
+
distinctive = merkmal.get_system("distinctive")
|
|
117
|
+
pbase = merkmal.get_system("pbase-hc")
|
|
118
|
+
|
|
119
|
+
print(descriptive.grapheme_to_features("a"))
|
|
120
|
+
print(distinctive.grapheme_to_features("a"))
|
|
121
|
+
print(pbase.grapheme_to_representation("a"))
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Exact reverse lookup is available when a native representation maps directly to
|
|
125
|
+
a known grapheme.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
descriptive = merkmal.get_system("descriptive")
|
|
129
|
+
|
|
130
|
+
grapheme = descriptive.features_to_grapheme(
|
|
131
|
+
frozenset({"consonant", "voiced", "bilabial", "stop"})
|
|
132
|
+
)
|
|
133
|
+
print(grapheme)
|
|
134
|
+
# 'b'
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Feature queries
|
|
138
|
+
|
|
139
|
+
Use `features_to_graphemes(...)` to find all graphemes matching a feature set.
|
|
140
|
+
Matching is partial by default.
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
import merkmal
|
|
144
|
+
|
|
145
|
+
vowels = merkmal.features_to_graphemes(frozenset({"vowel"}))
|
|
146
|
+
print(vowels[:10])
|
|
147
|
+
|
|
148
|
+
# Exact matching
|
|
149
|
+
features = merkmal.get_features("a")
|
|
150
|
+
print(merkmal.features_to_graphemes(features, exact=True))
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Natural classes and matrices
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
import merkmal
|
|
157
|
+
|
|
158
|
+
# Shared features of a segment set
|
|
159
|
+
print(merkmal.derive_class_features(["p", "t", "k"]))
|
|
160
|
+
# frozenset({'consonant', 'voiceless', 'stop'})
|
|
161
|
+
|
|
162
|
+
# Minimal distinguishing matrix
|
|
163
|
+
matrix = merkmal.minimal_matrix(["t", "d", "s"])
|
|
164
|
+
print(merkmal.tabulate_matrix(matrix))
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
```text
|
|
168
|
+
grapheme | continuant | voiced
|
|
169
|
+
---------+------------+-------
|
|
170
|
+
t | False | False
|
|
171
|
+
d | False | True
|
|
172
|
+
s | True | False
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Distance
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
import merkmal
|
|
179
|
+
|
|
180
|
+
print(merkmal.distance("a", "e"))
|
|
181
|
+
print(merkmal.distance("a", "u"))
|
|
182
|
+
print(merkmal.distance("p", "b"))
|
|
183
|
+
print(merkmal.distance("t", "d", system="pbase-hc"))
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
You can also supply a precomputed nested dictionary:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
precomputed = {"a": {"e": 1.5, "u": 2.0}, "p": {"b": 0.5}}
|
|
190
|
+
print(merkmal.distance("a", "e", precomputed=precomputed))
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Multi-state systems (P-base)
|
|
194
|
+
|
|
195
|
+
P-base-derived systems expose multi-state values (`+`, `-`, `n`, `.`, `o`, `x`)
|
|
196
|
+
through `FeatureState`.
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
import merkmal
|
|
200
|
+
|
|
201
|
+
rep = merkmal.get_representation("a", system="pbase-hc")
|
|
202
|
+
print(rep.values["syllabic"])
|
|
203
|
+
# FeatureState.POSITIVE
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
The bundled P-base table is derived, not verbatim. Duplicate rows with
|
|
207
|
+
conflicting values have the conflicting cells downgraded to `.`
|
|
208
|
+
(`FeatureState.DOT`). The P-base data retains its own attribution and license
|
|
209
|
+
notice in `src/merkmal/data/pbase/`.
|
|
210
|
+
|
|
211
|
+
## Custom datasets
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from merkmal import create_registry, load_dataset
|
|
215
|
+
|
|
216
|
+
dataset = load_dataset(directory="my_feature_data")
|
|
217
|
+
registry = create_registry(dataset=dataset)
|
|
218
|
+
system = registry.get_system("descriptive")
|
|
219
|
+
print(system.grapheme_to_features("k"))
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Expected files in `my_feature_data/`: `sounds.tsv`, `classes.tsv`, `features.tsv`.
|
|
223
|
+
|
|
224
|
+
## Documentation
|
|
225
|
+
|
|
226
|
+
See the [tutorials](docs/tutorials/) for worked examples covering phonological
|
|
227
|
+
features, typology, historical linguistics, cognate detection, and UPA
|
|
228
|
+
transcription.
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
MIT. See [LICENSE](LICENSE).
|
merkmal-0.1.0/README.md
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# merkmal
|
|
2
|
+
|
|
3
|
+
`merkmal` is a standalone Python package for manipulating phonological
|
|
4
|
+
features. Zero runtime dependencies, Python 3.12+.
|
|
5
|
+
|
|
6
|
+
It provides:
|
|
7
|
+
|
|
8
|
+
- bundled phonological feature datasets
|
|
9
|
+
- pluggable feature systems (9 built-in)
|
|
10
|
+
- feature geometry and distance functions (Clements & Hume 1995)
|
|
11
|
+
- tonal geometry (Yip/Bao)
|
|
12
|
+
- query and analysis helpers for graphemes and feature sets
|
|
13
|
+
- UPA transcription support
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
Install from PyPI:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install merkmal
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Development install:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/tresoldi/merkmal.git
|
|
27
|
+
cd merkmal
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Run checks:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
ruff check .
|
|
35
|
+
mypy src
|
|
36
|
+
pytest -q
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import merkmal
|
|
43
|
+
|
|
44
|
+
# Built-in systems
|
|
45
|
+
print(merkmal.list_systems())
|
|
46
|
+
# ['descriptive', 'broad', 'distinctive', 'pbase-hc', 'pbase-jfh',
|
|
47
|
+
# 'pbase-spe', 'pbase-uftc', 'phoible', 'classfeat']
|
|
48
|
+
|
|
49
|
+
# Basic grapheme lookup
|
|
50
|
+
print(merkmal.get_features("p"))
|
|
51
|
+
# frozenset({'consonant', 'voiceless', 'bilabial', 'stop'})
|
|
52
|
+
|
|
53
|
+
# Predefined sound classes
|
|
54
|
+
print(merkmal.get_class_features("V"))
|
|
55
|
+
# frozenset({'vowel'})
|
|
56
|
+
|
|
57
|
+
# Distance
|
|
58
|
+
print(merkmal.distance("a", "e"))
|
|
59
|
+
print(merkmal.distance("p", "b", system="classfeat"))
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Systems
|
|
63
|
+
|
|
64
|
+
| System | Type | Features | Distance |
|
|
65
|
+
|--------|------|----------|----------|
|
|
66
|
+
| `descriptive` | categorical | articulatory | geometry-weighted |
|
|
67
|
+
| `broad` | categorical | simplified | geometry-weighted |
|
|
68
|
+
| `distinctive` | privative | Clements & Hume | geometry-weighted |
|
|
69
|
+
| `pbase-hc`, `-jfh`, `-spe`, `-uftc` | multi-state | 4 theoretical families | geometry-weighted |
|
|
70
|
+
| `phoible` | binary | 37 features | Hamming |
|
|
71
|
+
| `classfeat` | hybrid | sound classes + continuous | trained weights |
|
|
72
|
+
|
|
73
|
+
All systems implement the same `FeatureSystem` protocol. Distances, queries,
|
|
74
|
+
matrices, and natural class derivation work across all of them.
|
|
75
|
+
|
|
76
|
+
## Working with systems
|
|
77
|
+
|
|
78
|
+
You can use the lazy default registry through top-level helpers, or work
|
|
79
|
+
with a specific system object.
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import merkmal
|
|
83
|
+
|
|
84
|
+
descriptive = merkmal.get_system("descriptive")
|
|
85
|
+
distinctive = merkmal.get_system("distinctive")
|
|
86
|
+
pbase = merkmal.get_system("pbase-hc")
|
|
87
|
+
|
|
88
|
+
print(descriptive.grapheme_to_features("a"))
|
|
89
|
+
print(distinctive.grapheme_to_features("a"))
|
|
90
|
+
print(pbase.grapheme_to_representation("a"))
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Exact reverse lookup is available when a native representation maps directly to
|
|
94
|
+
a known grapheme.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
descriptive = merkmal.get_system("descriptive")
|
|
98
|
+
|
|
99
|
+
grapheme = descriptive.features_to_grapheme(
|
|
100
|
+
frozenset({"consonant", "voiced", "bilabial", "stop"})
|
|
101
|
+
)
|
|
102
|
+
print(grapheme)
|
|
103
|
+
# 'b'
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Feature queries
|
|
107
|
+
|
|
108
|
+
Use `features_to_graphemes(...)` to find all graphemes matching a feature set.
|
|
109
|
+
Matching is partial by default.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import merkmal
|
|
113
|
+
|
|
114
|
+
vowels = merkmal.features_to_graphemes(frozenset({"vowel"}))
|
|
115
|
+
print(vowels[:10])
|
|
116
|
+
|
|
117
|
+
# Exact matching
|
|
118
|
+
features = merkmal.get_features("a")
|
|
119
|
+
print(merkmal.features_to_graphemes(features, exact=True))
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Natural classes and matrices
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import merkmal
|
|
126
|
+
|
|
127
|
+
# Shared features of a segment set
|
|
128
|
+
print(merkmal.derive_class_features(["p", "t", "k"]))
|
|
129
|
+
# frozenset({'consonant', 'voiceless', 'stop'})
|
|
130
|
+
|
|
131
|
+
# Minimal distinguishing matrix
|
|
132
|
+
matrix = merkmal.minimal_matrix(["t", "d", "s"])
|
|
133
|
+
print(merkmal.tabulate_matrix(matrix))
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
```text
|
|
137
|
+
grapheme | continuant | voiced
|
|
138
|
+
---------+------------+-------
|
|
139
|
+
t | False | False
|
|
140
|
+
d | False | True
|
|
141
|
+
s | True | False
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Distance
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
import merkmal
|
|
148
|
+
|
|
149
|
+
print(merkmal.distance("a", "e"))
|
|
150
|
+
print(merkmal.distance("a", "u"))
|
|
151
|
+
print(merkmal.distance("p", "b"))
|
|
152
|
+
print(merkmal.distance("t", "d", system="pbase-hc"))
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
You can also supply a precomputed nested dictionary:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
precomputed = {"a": {"e": 1.5, "u": 2.0}, "p": {"b": 0.5}}
|
|
159
|
+
print(merkmal.distance("a", "e", precomputed=precomputed))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Multi-state systems (P-base)
|
|
163
|
+
|
|
164
|
+
P-base-derived systems expose multi-state values (`+`, `-`, `n`, `.`, `o`, `x`)
|
|
165
|
+
through `FeatureState`.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import merkmal
|
|
169
|
+
|
|
170
|
+
rep = merkmal.get_representation("a", system="pbase-hc")
|
|
171
|
+
print(rep.values["syllabic"])
|
|
172
|
+
# FeatureState.POSITIVE
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The bundled P-base table is derived, not verbatim. Duplicate rows with
|
|
176
|
+
conflicting values have the conflicting cells downgraded to `.`
|
|
177
|
+
(`FeatureState.DOT`). The P-base data retains its own attribution and license
|
|
178
|
+
notice in `src/merkmal/data/pbase/`.
|
|
179
|
+
|
|
180
|
+
## Custom datasets
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from merkmal import create_registry, load_dataset
|
|
184
|
+
|
|
185
|
+
dataset = load_dataset(directory="my_feature_data")
|
|
186
|
+
registry = create_registry(dataset=dataset)
|
|
187
|
+
system = registry.get_system("descriptive")
|
|
188
|
+
print(system.grapheme_to_features("k"))
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Expected files in `my_feature_data/`: `sounds.tsv`, `classes.tsv`, `features.tsv`.
|
|
192
|
+
|
|
193
|
+
## Documentation
|
|
194
|
+
|
|
195
|
+
See the [tutorials](docs/tutorials/) for worked examples covering phonological
|
|
196
|
+
features, typology, historical linguistics, cognate detection, and UPA
|
|
197
|
+
transcription.
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "merkmal"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Standalone phonological feature systems for historical linguistics"
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Tiago Tresoldi", email = "tiago.tresoldi@lingfil.uu.se"}
|
|
11
|
+
]
|
|
12
|
+
license = "MIT"
|
|
13
|
+
license-files = ["LICENSE"]
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
requires-python = ">=3.12"
|
|
16
|
+
keywords = [
|
|
17
|
+
"phonology",
|
|
18
|
+
"phonological features",
|
|
19
|
+
"historical linguistics",
|
|
20
|
+
"sequence alignment",
|
|
21
|
+
"cognate detection",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3.12",
|
|
29
|
+
"Programming Language :: Python :: 3.13",
|
|
30
|
+
"Topic :: Scientific/Engineering",
|
|
31
|
+
"Topic :: Text Processing :: Linguistic",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
]
|
|
34
|
+
dependencies = []
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy", "build"]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/tresoldi/merkmal"
|
|
41
|
+
Documentation = "https://github.com/tresoldi/merkmal#readme"
|
|
42
|
+
Repository = "https://github.com/tresoldi/merkmal"
|
|
43
|
+
"Bug Tracker" = "https://github.com/tresoldi/merkmal/issues"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools]
|
|
46
|
+
package-dir = {"" = "src"}
|
|
47
|
+
include-package-data = true
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["src"]
|
|
51
|
+
include = ["merkmal*"]
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.package-data]
|
|
54
|
+
merkmal = ["data/*.tsv", "data/pbase/*", "data/phoible/*", "data/classfeat/*", "data/transcriptions/*"]
|
|
55
|
+
|
|
56
|
+
[tool.ruff]
|
|
57
|
+
target-version = "py312"
|
|
58
|
+
line-length = 99
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint]
|
|
61
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "A", "SIM", "TCH"]
|
|
62
|
+
|
|
63
|
+
[tool.mypy]
|
|
64
|
+
python_version = "3.12"
|
|
65
|
+
strict = true
|
|
66
|
+
|
|
67
|
+
[tool.pytest.ini_options]
|
|
68
|
+
testpaths = ["tests"]
|
|
69
|
+
python_files = ["test_*.py"]
|
|
70
|
+
python_classes = ["Test*"]
|
|
71
|
+
python_functions = ["test_*"]
|
|
72
|
+
|
|
73
|
+
[tool.coverage.run]
|
|
74
|
+
source = ["merkmal"]
|
|
75
|
+
omit = ["*/tests/*", "*/test_*"]
|
merkmal-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Public API for the merkmal package."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from merkmal.analysis import (
|
|
6
|
+
FeatureMatrix,
|
|
7
|
+
derive_class_features,
|
|
8
|
+
distance,
|
|
9
|
+
features_to_graphemes,
|
|
10
|
+
minimal_matrix,
|
|
11
|
+
tabulate_matrix,
|
|
12
|
+
valued_distance,
|
|
13
|
+
valued_matches,
|
|
14
|
+
)
|
|
15
|
+
from merkmal.audit import DatasetAuditReport, audit_dataset
|
|
16
|
+
from merkmal.dataset import FeatureDataset, dataset_from_rows, load_builtin_dataset, load_dataset
|
|
17
|
+
from merkmal.exporters import export_class_features, export_distances, export_matrix
|
|
18
|
+
from merkmal.geometry import DEFAULT_GEOMETRY, FeatureNode, GeometryNode
|
|
19
|
+
from merkmal.protocol import FeatureSystem
|
|
20
|
+
from merkmal.registry import (
|
|
21
|
+
Registry,
|
|
22
|
+
add_features,
|
|
23
|
+
create_registry,
|
|
24
|
+
feature_distance,
|
|
25
|
+
features_to_grapheme,
|
|
26
|
+
get_class_features,
|
|
27
|
+
get_class_representation,
|
|
28
|
+
get_features,
|
|
29
|
+
get_registry,
|
|
30
|
+
get_representation,
|
|
31
|
+
get_system,
|
|
32
|
+
is_class,
|
|
33
|
+
list_systems,
|
|
34
|
+
matches,
|
|
35
|
+
partial_match,
|
|
36
|
+
register,
|
|
37
|
+
reset_registry,
|
|
38
|
+
segment_distance,
|
|
39
|
+
set_default,
|
|
40
|
+
set_registry,
|
|
41
|
+
sound_distance,
|
|
42
|
+
)
|
|
43
|
+
from merkmal.representations import (
|
|
44
|
+
CategoricalFeatures,
|
|
45
|
+
FeatureRepresentation,
|
|
46
|
+
FeatureState,
|
|
47
|
+
ValuedFeatures,
|
|
48
|
+
)
|
|
49
|
+
from merkmal.segmentation import merge_tone_digits, parse_chao_digits
|
|
50
|
+
from merkmal.upa import adapt as adapt_upa
|
|
51
|
+
from merkmal.upa import adapt_segment as adapt_upa_segment
|
|
52
|
+
from merkmal.upa import segment_upa
|
|
53
|
+
from merkmal.systems.broad import BroadFeatureSystem
|
|
54
|
+
from merkmal.systems.descriptive import DescriptiveFeatureSystem
|
|
55
|
+
from merkmal.systems.distinctive import DistinctiveFeatureSystem
|
|
56
|
+
from merkmal.systems.pbase import PBaseFeatureSystem
|
|
57
|
+
from merkmal.systems.classfeat import ClassFeatFeatureSystem
|
|
58
|
+
from merkmal.systems.phoible import PhoibleFeatureSystem
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"__version__",
|
|
62
|
+
"DEFAULT_GEOMETRY",
|
|
63
|
+
"BroadFeatureSystem",
|
|
64
|
+
"DescriptiveFeatureSystem",
|
|
65
|
+
"DistinctiveFeatureSystem",
|
|
66
|
+
"ClassFeatFeatureSystem",
|
|
67
|
+
"CategoricalFeatures",
|
|
68
|
+
"DatasetAuditReport",
|
|
69
|
+
"FeatureDataset",
|
|
70
|
+
"FeatureMatrix",
|
|
71
|
+
"FeatureNode",
|
|
72
|
+
"FeatureRepresentation",
|
|
73
|
+
"FeatureState",
|
|
74
|
+
"FeatureSystem",
|
|
75
|
+
"GeometryNode",
|
|
76
|
+
"PBaseFeatureSystem",
|
|
77
|
+
"PhoibleFeatureSystem",
|
|
78
|
+
"Registry",
|
|
79
|
+
"ValuedFeatures",
|
|
80
|
+
"adapt_upa",
|
|
81
|
+
"adapt_upa_segment",
|
|
82
|
+
"add_features",
|
|
83
|
+
"audit_dataset",
|
|
84
|
+
"create_registry",
|
|
85
|
+
"dataset_from_rows",
|
|
86
|
+
"derive_class_features",
|
|
87
|
+
"distance",
|
|
88
|
+
"feature_distance",
|
|
89
|
+
"export_class_features",
|
|
90
|
+
"export_distances",
|
|
91
|
+
"export_matrix",
|
|
92
|
+
"features_to_grapheme",
|
|
93
|
+
"features_to_graphemes",
|
|
94
|
+
"get_class_features",
|
|
95
|
+
"get_class_representation",
|
|
96
|
+
"get_features",
|
|
97
|
+
"get_registry",
|
|
98
|
+
"get_representation",
|
|
99
|
+
"get_system",
|
|
100
|
+
"is_class",
|
|
101
|
+
"list_systems",
|
|
102
|
+
"matches",
|
|
103
|
+
"merge_tone_digits",
|
|
104
|
+
"load_builtin_dataset",
|
|
105
|
+
"load_dataset",
|
|
106
|
+
"minimal_matrix",
|
|
107
|
+
"parse_chao_digits",
|
|
108
|
+
"partial_match",
|
|
109
|
+
"register",
|
|
110
|
+
"reset_registry",
|
|
111
|
+
"segment_distance",
|
|
112
|
+
"set_default",
|
|
113
|
+
"segment_upa",
|
|
114
|
+
"set_registry",
|
|
115
|
+
"sound_distance",
|
|
116
|
+
"tabulate_matrix",
|
|
117
|
+
"valued_distance",
|
|
118
|
+
"valued_matches",
|
|
119
|
+
]
|