olaph 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
olaph-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Institute of Information Systems at Hof University
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
olaph-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: olaph
3
+ Version: 0.1.0
4
+ Summary: A multilingual phonemizer combining lexica, NLP, and probabilistic scoring for improved phonemization accuracy..
5
+ Author-email: Johannes Wirth <johannes.wirth.3@iisys.de>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/iisys-hof/olaph
8
+ Project-URL: Documentation, https://github.com/iisys-hof/olaph#readme
9
+ Project-URL: Issues, https://github.com/iisys-hof/olaph/issues
10
+ Keywords: phonemizer,text-to-speech,linguistics,NLP,multilingual
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Text Processing :: Linguistic
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Requires-Python: >=3.12
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: spacy
19
+ Requires-Dist: lingua-language-detector==2.1.1
20
+ Requires-Dist: num2words==0.5.14
21
+ Requires-Dist: requests==2.32.5
22
+ Requires-Dist: annotated-types==0.7.0
23
+ Requires-Dist: blis==1.3.0
24
+ Requires-Dist: catalogue==2.0.10
25
+ Requires-Dist: certifi==2025.10.5
26
+ Requires-Dist: charset-normalizer==3.4.3
27
+ Requires-Dist: click==8.3.0
28
+ Requires-Dist: cloudpathlib==0.22.0
29
+ Requires-Dist: colorama==0.4.6
30
+ Requires-Dist: confection==0.1.5
31
+ Requires-Dist: cymem==2.0.11
32
+ Requires-Dist: de-core-news-sm
33
+ Requires-Dist: docopt==0.6.2
34
+ Requires-Dist: en-core-web-sm
35
+ Requires-Dist: es-core-news-sm
36
+ Requires-Dist: fr-core-news-sm
37
+ Requires-Dist: idna==3.10
38
+ Requires-Dist: jinja2==3.1.6
39
+ Requires-Dist: langcodes==3.5.0
40
+ Requires-Dist: language-data==1.3.0
41
+ Requires-Dist: marisa-trie==1.3.1
42
+ Requires-Dist: markdown-it-py==4.0.0
43
+ Requires-Dist: markupsafe==3.0.3
44
+ Requires-Dist: mdurl==0.1.2
45
+ Requires-Dist: murmurhash==1.0.13
46
+ Requires-Dist: numpy==2.3.3
47
+ Requires-Dist: packaging==25.0
48
+ Requires-Dist: preshed==3.0.10
49
+ Requires-Dist: pydantic==2.11.10
50
+ Requires-Dist: pydantic-core==2.33.2
51
+ Requires-Dist: pygments==2.19.2
52
+ Requires-Dist: rich==14.1.0
53
+ Requires-Dist: setuptools==80.9.0
54
+ Requires-Dist: shellingham==1.5.4
55
+ Requires-Dist: smart-open==7.3.1
56
+ Requires-Dist: spacy-legacy==3.0.12
57
+ Requires-Dist: spacy-loggers==1.0.5
58
+ Requires-Dist: srsly==2.5.1
59
+ Requires-Dist: thinc==8.3.6
60
+ Requires-Dist: tqdm==4.67.1
61
+ Requires-Dist: typer==0.19.2
62
+ Requires-Dist: typing-inspection==0.4.2
63
+ Requires-Dist: typing-extensions==4.15.0
64
+ Requires-Dist: urllib3==2.5.0
65
+ Requires-Dist: wasabi==1.1.3
66
+ Requires-Dist: weasel==0.4.1
67
+ Requires-Dist: wrapt==1.17.3
68
+ Provides-Extra: dev
69
+ Requires-Dist: pytest; extra == "dev"
70
+ Requires-Dist: black; extra == "dev"
71
+ Requires-Dist: flake8; extra == "dev"
72
+ Dynamic: license-file
73
+
74
+ # OLaPh — Optimal Language Phonemizer
75
+
76
+ [![PyPI version](https://img.shields.io/pypi/v/olaph.svg?logo=pypi)](https://pypi.org/project/olaph/)
77
+ [![Python versions](https://img.shields.io/pypi/pyversions/olaph.svg)](https://pypi.org/project/olaph/)
78
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
79
+
80
+ **OLaPh (Optimal Language Phonemizer)** is a multilingual phonemization framework that converts text into phonemes surpassing the quality of comparable frameworks.
81
+
82
+ ---
83
+
84
+ ## Overview
85
+
86
+ Traditional phonemizers rely on simple rule-based mappings or lexicon lookups.
87
+ Neural and hybrid approaches improve generalization but still struggle with:
88
+
89
+ - Names and foreign words
90
+ - Abbreviations and acronyms
91
+ - Loanwords and compounds
92
+ - Ambiguous homographs
93
+
94
+ **OLaPh** tackles these challenges by combining:
95
+
96
+ - Extensive **language-specific dictionaries**
97
+ - **Abbreviation, number, and letter normalization**
98
+ - **Compound resolution with probabilistic scoring**
99
+ - **Cross-language handling**
100
+ - **NLP-based preprocessing** via [spaCy](https://spacy.io) and [Lingua](https://github.com/pemistahl/lingua-py)
101
+
102
+ Evaluations in **German** and **English** show improved accuracy and robustness over existing phonemizers, including on challenging multilingual datasets.
103
+
104
+ ---
105
+
106
+ ## Features
107
+
108
+ - Multilingual phonemization (DE, EN, FR, ES)
109
+ - Abbreviation and letter pronunciation dictionaries
110
+ - Number normalization
111
+ - Cross-language acronym detection
112
+ - Compound splitting with probabilistic scoring
113
+ - Freely available lexica for research and development derived from wiktionary.org.
114
+
115
+ ## Large Language Model
116
+ A LLM based on OLaPh output is also available. It is a GemmaX 2B Model trained on ~10M sentences derived from the FineWeb Corpus phonemized with the OLaPh framework.
117
+
118
+ Find it here on [huggingface](https://huggingface.co/iisys-hof/olaph)
119
+
120
+ ---
121
+
122
+ ## Installation
123
+
124
+ ### From PyPI
125
+
126
+ ```bash
127
+ pip install olaph
128
+ ```
129
+
130
+ ### From source
131
+
132
+ ```bash
133
+ git clone https://github.com/iisys-hof/olaph.git
134
+ cd olaph
135
+ pip install -e .
136
+ ```
137
+
138
+ ## Example Usage
139
+
140
+ ```python
141
+ from olaph import Olaph
142
+
143
+ phonemizer = Olaph()
144
+
145
+ output = phonemizer.phonemize_text("He ordered a Brezel and a beer in a tavern near München.", lang="en")
146
+
147
+ print(output)
148
+ ```
149
+
150
+ ---
151
+
152
+ ## Dependencies
153
+
154
+ - [spaCy](https://spacy.io)
155
+ - [Lingua](https://github.com/pemistahl/lingua-py)
156
+ - [num2words](https://github.com/savoirfairelinux/num2words)
157
+ - [requests](https://requests.readthedocs.io)
158
+
159
+ ---
160
+
161
+ ## Research Summary
162
+
163
+ Phonemization, the conversion of text into phonemes, is a key step in text-to-speech. Traditional approaches use rule-based transformations and lexicon lookups, while more advanced methods apply preprocessing techniques or neural networks for improved accuracy on out-of-domain vocabulary. However, all systems struggle with names, loanwords, abbreviations, and homographs. This work presents OLaPh (Optimal Language Phonemizer), a framework that combines large lexica, multiple NLP techniques, and compound resolution with a probabilistic scoring function. Evaluations in German and English show improved accuracy over previous approaches, including on a challenging dataset. To further address unresolved cases, we train a large language model on OLaPh-generated data, which achieves even stronger generalization and performance. Together, the framework and LLM improve phonemization consistency and provide a freely available resource for future research.
164
+
165
+ ---
166
+
167
+ ## Citation
168
+
169
+ If you use OLaPh in academic work, please cite:
170
+
171
+ ### Citation
172
+ ```bibtex
173
+ @misc{wirth2025olaphoptimallanguagephonemizer,
174
+ title={OLaPh: Optimal Language Phonemizer},
175
+ author={Johannes Wirth},
176
+ year={2025},
177
+ eprint={2509.20086},
178
+ archivePrefix={arXiv},
179
+ primaryClass={cs.CL},
180
+ url={https://arxiv.org/abs/2509.20086},
181
+ }
182
+ ```
olaph-0.1.0/README.md ADDED
@@ -0,0 +1,109 @@
1
+ # OLaPh — Optimal Language Phonemizer
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/olaph.svg?logo=pypi)](https://pypi.org/project/olaph/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/olaph.svg)](https://pypi.org/project/olaph/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ **OLaPh (Optimal Language Phonemizer)** is a multilingual phonemization framework that converts text into phonemes surpassing the quality of comparable frameworks.
8
+
9
+ ---
10
+
11
+ ## Overview
12
+
13
+ Traditional phonemizers rely on simple rule-based mappings or lexicon lookups.
14
+ Neural and hybrid approaches improve generalization but still struggle with:
15
+
16
+ - Names and foreign words
17
+ - Abbreviations and acronyms
18
+ - Loanwords and compounds
19
+ - Ambiguous homographs
20
+
21
+ **OLaPh** tackles these challenges by combining:
22
+
23
+ - Extensive **language-specific dictionaries**
24
+ - **Abbreviation, number, and letter normalization**
25
+ - **Compound resolution with probabilistic scoring**
26
+ - **Cross-language handling**
27
+ - **NLP-based preprocessing** via [spaCy](https://spacy.io) and [Lingua](https://github.com/pemistahl/lingua-py)
28
+
29
+ Evaluations in **German** and **English** show improved accuracy and robustness over existing phonemizers, including on challenging multilingual datasets.
30
+
31
+ ---
32
+
33
+ ## Features
34
+
35
+ - Multilingual phonemization (DE, EN, FR, ES)
36
+ - Abbreviation and letter pronunciation dictionaries
37
+ - Number normalization
38
+ - Cross-language acronym detection
39
+ - Compound splitting with probabilistic scoring
40
+ - Freely available lexica for research and development derived from wiktionary.org.
41
+
42
+ ## Large Language Model
43
+ A LLM based on OLaPh output is also available. It is a GemmaX 2B Model trained on ~10M sentences derived from the FineWeb Corpus phonemized with the OLaPh framework.
44
+
45
+ Find it here on [huggingface](https://huggingface.co/iisys-hof/olaph)
46
+
47
+ ---
48
+
49
+ ## Installation
50
+
51
+ ### From PyPI
52
+
53
+ ```bash
54
+ pip install olaph
55
+ ```
56
+
57
+ ### From source
58
+
59
+ ```bash
60
+ git clone https://github.com/iisys-hof/olaph.git
61
+ cd olaph
62
+ pip install -e .
63
+ ```
64
+
65
+ ## Example Usage
66
+
67
+ ```python
68
+ from olaph import Olaph
69
+
70
+ phonemizer = Olaph()
71
+
72
+ output = phonemizer.phonemize_text("He ordered a Brezel and a beer in a tavern near München.", lang="en")
73
+
74
+ print(output)
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Dependencies
80
+
81
+ - [spaCy](https://spacy.io)
82
+ - [Lingua](https://github.com/pemistahl/lingua-py)
83
+ - [num2words](https://github.com/savoirfairelinux/num2words)
84
+ - [requests](https://requests.readthedocs.io)
85
+
86
+ ---
87
+
88
+ ## Research Summary
89
+
90
+ Phonemization, the conversion of text into phonemes, is a key step in text-to-speech. Traditional approaches use rule-based transformations and lexicon lookups, while more advanced methods apply preprocessing techniques or neural networks for improved accuracy on out-of-domain vocabulary. However, all systems struggle with names, loanwords, abbreviations, and homographs. This work presents OLaPh (Optimal Language Phonemizer), a framework that combines large lexica, multiple NLP techniques, and compound resolution with a probabilistic scoring function. Evaluations in German and English show improved accuracy over previous approaches, including on a challenging dataset. To further address unresolved cases, we train a large language model on OLaPh-generated data, which achieves even stronger generalization and performance. Together, the framework and LLM improve phonemization consistency and provide a freely available resource for future research.
91
+
92
+ ---
93
+
94
+ ## Citation
95
+
96
+ If you use OLaPh in academic work, please cite:
97
+
98
+ ### Citation
99
+ ```bibtex
100
+ @misc{wirth2025olaphoptimallanguagephonemizer,
101
+ title={OLaPh: Optimal Language Phonemizer},
102
+ author={Johannes Wirth},
103
+ year={2025},
104
+ eprint={2509.20086},
105
+ archivePrefix={arXiv},
106
+ primaryClass={cs.CL},
107
+ url={https://arxiv.org/abs/2509.20086},
108
+ }
109
+ ```
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: olaph
3
+ Version: 0.1.0
4
+ Summary: A multilingual phonemizer combining lexica, NLP, and probabilistic scoring for improved phonemization accuracy..
5
+ Author-email: Johannes Wirth <johannes.wirth.3@iisys.de>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/iisys-hof/olaph
8
+ Project-URL: Documentation, https://github.com/iisys-hof/olaph#readme
9
+ Project-URL: Issues, https://github.com/iisys-hof/olaph/issues
10
+ Keywords: phonemizer,text-to-speech,linguistics,NLP,multilingual
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Text Processing :: Linguistic
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Requires-Python: >=3.12
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: spacy
19
+ Requires-Dist: lingua-language-detector==2.1.1
20
+ Requires-Dist: num2words==0.5.14
21
+ Requires-Dist: requests==2.32.5
22
+ Requires-Dist: annotated-types==0.7.0
23
+ Requires-Dist: blis==1.3.0
24
+ Requires-Dist: catalogue==2.0.10
25
+ Requires-Dist: certifi==2025.10.5
26
+ Requires-Dist: charset-normalizer==3.4.3
27
+ Requires-Dist: click==8.3.0
28
+ Requires-Dist: cloudpathlib==0.22.0
29
+ Requires-Dist: colorama==0.4.6
30
+ Requires-Dist: confection==0.1.5
31
+ Requires-Dist: cymem==2.0.11
32
+ Requires-Dist: de-core-news-sm
33
+ Requires-Dist: docopt==0.6.2
34
+ Requires-Dist: en-core-web-sm
35
+ Requires-Dist: es-core-news-sm
36
+ Requires-Dist: fr-core-news-sm
37
+ Requires-Dist: idna==3.10
38
+ Requires-Dist: jinja2==3.1.6
39
+ Requires-Dist: langcodes==3.5.0
40
+ Requires-Dist: language-data==1.3.0
41
+ Requires-Dist: marisa-trie==1.3.1
42
+ Requires-Dist: markdown-it-py==4.0.0
43
+ Requires-Dist: markupsafe==3.0.3
44
+ Requires-Dist: mdurl==0.1.2
45
+ Requires-Dist: murmurhash==1.0.13
46
+ Requires-Dist: numpy==2.3.3
47
+ Requires-Dist: packaging==25.0
48
+ Requires-Dist: preshed==3.0.10
49
+ Requires-Dist: pydantic==2.11.10
50
+ Requires-Dist: pydantic-core==2.33.2
51
+ Requires-Dist: pygments==2.19.2
52
+ Requires-Dist: rich==14.1.0
53
+ Requires-Dist: setuptools==80.9.0
54
+ Requires-Dist: shellingham==1.5.4
55
+ Requires-Dist: smart-open==7.3.1
56
+ Requires-Dist: spacy-legacy==3.0.12
57
+ Requires-Dist: spacy-loggers==1.0.5
58
+ Requires-Dist: srsly==2.5.1
59
+ Requires-Dist: thinc==8.3.6
60
+ Requires-Dist: tqdm==4.67.1
61
+ Requires-Dist: typer==0.19.2
62
+ Requires-Dist: typing-inspection==0.4.2
63
+ Requires-Dist: typing-extensions==4.15.0
64
+ Requires-Dist: urllib3==2.5.0
65
+ Requires-Dist: wasabi==1.1.3
66
+ Requires-Dist: weasel==0.4.1
67
+ Requires-Dist: wrapt==1.17.3
68
+ Provides-Extra: dev
69
+ Requires-Dist: pytest; extra == "dev"
70
+ Requires-Dist: black; extra == "dev"
71
+ Requires-Dist: flake8; extra == "dev"
72
+ Dynamic: license-file
73
+
74
+ # OLaPh — Optimal Language Phonemizer
75
+
76
+ [![PyPI version](https://img.shields.io/pypi/v/olaph.svg?logo=pypi)](https://pypi.org/project/olaph/)
77
+ [![Python versions](https://img.shields.io/pypi/pyversions/olaph.svg)](https://pypi.org/project/olaph/)
78
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
79
+
80
+ **OLaPh (Optimal Language Phonemizer)** is a multilingual phonemization framework that converts text into phonemes surpassing the quality of comparable frameworks.
81
+
82
+ ---
83
+
84
+ ## Overview
85
+
86
+ Traditional phonemizers rely on simple rule-based mappings or lexicon lookups.
87
+ Neural and hybrid approaches improve generalization but still struggle with:
88
+
89
+ - Names and foreign words
90
+ - Abbreviations and acronyms
91
+ - Loanwords and compounds
92
+ - Ambiguous homographs
93
+
94
+ **OLaPh** tackles these challenges by combining:
95
+
96
+ - Extensive **language-specific dictionaries**
97
+ - **Abbreviation, number, and letter normalization**
98
+ - **Compound resolution with probabilistic scoring**
99
+ - **Cross-language handling**
100
+ - **NLP-based preprocessing** via [spaCy](https://spacy.io) and [Lingua](https://github.com/pemistahl/lingua-py)
101
+
102
+ Evaluations in **German** and **English** show improved accuracy and robustness over existing phonemizers, including on challenging multilingual datasets.
103
+
104
+ ---
105
+
106
+ ## Features
107
+
108
+ - Multilingual phonemization (DE, EN, FR, ES)
109
+ - Abbreviation and letter pronunciation dictionaries
110
+ - Number normalization
111
+ - Cross-language acronym detection
112
+ - Compound splitting with probabilistic scoring
113
+ - Freely available lexica for research and development derived from wiktionary.org.
114
+
115
+ ## Large Language Model
116
+ A LLM based on OLaPh output is also available. It is a GemmaX 2B Model trained on ~10M sentences derived from the FineWeb Corpus phonemized with the OLaPh framework.
117
+
118
+ Find it here on [huggingface](https://huggingface.co/iisys-hof/olaph)
119
+
120
+ ---
121
+
122
+ ## Installation
123
+
124
+ ### From PyPI
125
+
126
+ ```bash
127
+ pip install olaph
128
+ ```
129
+
130
+ ### From source
131
+
132
+ ```bash
133
+ git clone https://github.com/iisys-hof/olaph.git
134
+ cd olaph
135
+ pip install -e .
136
+ ```
137
+
138
+ ## Example Usage
139
+
140
+ ```python
141
+ from olaph import Olaph
142
+
143
+ phonemizer = Olaph()
144
+
145
+ output = phonemizer.phonemize_text("He ordered a Brezel and a beer in a tavern near München.", lang="en")
146
+
147
+ print(output)
148
+ ```
149
+
150
+ ---
151
+
152
+ ## Dependencies
153
+
154
+ - [spaCy](https://spacy.io)
155
+ - [Lingua](https://github.com/pemistahl/lingua-py)
156
+ - [num2words](https://github.com/savoirfairelinux/num2words)
157
+ - [requests](https://requests.readthedocs.io)
158
+
159
+ ---
160
+
161
+ ## Research Summary
162
+
163
+ Phonemization, the conversion of text into phonemes, is a key step in text-to-speech. Traditional approaches use rule-based transformations and lexicon lookups, while more advanced methods apply preprocessing techniques or neural networks for improved accuracy on out-of-domain vocabulary. However, all systems struggle with names, loanwords, abbreviations, and homographs. This work presents OLaPh (Optimal Language Phonemizer), a framework that combines large lexica, multiple NLP techniques, and compound resolution with a probabilistic scoring function. Evaluations in German and English show improved accuracy over previous approaches, including on a challenging dataset. To further address unresolved cases, we train a large language model on OLaPh-generated data, which achieves even stronger generalization and performance. Together, the framework and LLM improve phonemization consistency and provide a freely available resource for future research.
164
+
165
+ ---
166
+
167
+ ## Citation
168
+
169
+ If you use OLaPh in academic work, please cite:
170
+
171
+ ### Citation
172
+ ```bibtex
173
+ @misc{wirth2025olaphoptimallanguagephonemizer,
174
+ title={OLaPh: Optimal Language Phonemizer},
175
+ author={Johannes Wirth},
176
+ year={2025},
177
+ eprint={2509.20086},
178
+ archivePrefix={arXiv},
179
+ primaryClass={cs.CL},
180
+ url={https://arxiv.org/abs/2509.20086},
181
+ }
182
+ ```
@@ -0,0 +1,8 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ olaph.egg-info/PKG-INFO
5
+ olaph.egg-info/SOURCES.txt
6
+ olaph.egg-info/dependency_links.txt
7
+ olaph.egg-info/requires.txt
8
+ olaph.egg-info/top_level.txt
@@ -0,0 +1,55 @@
1
+ spacy
2
+ lingua-language-detector==2.1.1
3
+ num2words==0.5.14
4
+ requests==2.32.5
5
+ annotated-types==0.7.0
6
+ blis==1.3.0
7
+ catalogue==2.0.10
8
+ certifi==2025.10.5
9
+ charset-normalizer==3.4.3
10
+ click==8.3.0
11
+ cloudpathlib==0.22.0
12
+ colorama==0.4.6
13
+ confection==0.1.5
14
+ cymem==2.0.11
15
+ de-core-news-sm
16
+ docopt==0.6.2
17
+ en-core-web-sm
18
+ es-core-news-sm
19
+ fr-core-news-sm
20
+ idna==3.10
21
+ jinja2==3.1.6
22
+ langcodes==3.5.0
23
+ language-data==1.3.0
24
+ marisa-trie==1.3.1
25
+ markdown-it-py==4.0.0
26
+ markupsafe==3.0.3
27
+ mdurl==0.1.2
28
+ murmurhash==1.0.13
29
+ numpy==2.3.3
30
+ packaging==25.0
31
+ preshed==3.0.10
32
+ pydantic==2.11.10
33
+ pydantic-core==2.33.2
34
+ pygments==2.19.2
35
+ rich==14.1.0
36
+ setuptools==80.9.0
37
+ shellingham==1.5.4
38
+ smart-open==7.3.1
39
+ spacy-legacy==3.0.12
40
+ spacy-loggers==1.0.5
41
+ srsly==2.5.1
42
+ thinc==8.3.6
43
+ tqdm==4.67.1
44
+ typer==0.19.2
45
+ typing-inspection==0.4.2
46
+ typing-extensions==4.15.0
47
+ urllib3==2.5.0
48
+ wasabi==1.1.3
49
+ weasel==0.4.1
50
+ wrapt==1.17.3
51
+
52
+ [dev]
53
+ pytest
54
+ black
55
+ flake8
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,98 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "olaph"
7
+ version = "0.1.0"
8
+ description = "A multilingual phonemizer combining lexica, NLP, and probabilistic scoring for improved phonemization accuracy.."
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = { text = "MIT" }
12
+
13
+ authors = [
14
+ { name = "Johannes Wirth", email = "johannes.wirth.3@iisys.de" }
15
+ ]
16
+
17
+ keywords = ["phonemizer", "text-to-speech", "linguistics", "NLP", "multilingual"]
18
+
19
+ classifiers = [
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Text Processing :: Linguistic",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Programming Language :: Python :: 3"
24
+ ]
25
+
26
+ dependencies = [
27
+ "spacy",
28
+ "lingua-language-detector==2.1.1",
29
+ "num2words==0.5.14",
30
+ "requests==2.32.5",
31
+ "annotated-types==0.7.0",
32
+ "blis==1.3.0",
33
+ "catalogue==2.0.10",
34
+ "certifi==2025.10.5",
35
+ "charset-normalizer==3.4.3",
36
+ "click==8.3.0",
37
+ "cloudpathlib==0.22.0",
38
+ "colorama==0.4.6",
39
+ "confection==0.1.5",
40
+ "cymem==2.0.11",
41
+ "de-core-news-sm",
42
+ "docopt==0.6.2",
43
+ "en-core-web-sm",
44
+ "es-core-news-sm",
45
+ "fr-core-news-sm",
46
+ "idna==3.10",
47
+ "jinja2==3.1.6",
48
+ "langcodes==3.5.0",
49
+ "language-data==1.3.0",
50
+ "marisa-trie==1.3.1",
51
+ "markdown-it-py==4.0.0",
52
+ "markupsafe==3.0.3",
53
+ "mdurl==0.1.2",
54
+ "murmurhash==1.0.13",
55
+ "numpy==2.3.3",
56
+ "packaging==25.0",
57
+ "preshed==3.0.10",
58
+ "pydantic==2.11.10",
59
+ "pydantic-core==2.33.2",
60
+ "pygments==2.19.2",
61
+ "rich==14.1.0",
62
+ "setuptools==80.9.0",
63
+ "shellingham==1.5.4",
64
+ "smart-open==7.3.1",
65
+ "spacy-legacy==3.0.12",
66
+ "spacy-loggers==1.0.5",
67
+ "srsly==2.5.1",
68
+ "thinc==8.3.6",
69
+ "tqdm==4.67.1",
70
+ "typer==0.19.2",
71
+ "typing-inspection==0.4.2",
72
+ "typing-extensions==4.15.0",
73
+ "urllib3==2.5.0",
74
+ "wasabi==1.1.3",
75
+ "weasel==0.4.1",
76
+ "wrapt==1.17.3",
77
+ ]
78
+
79
+ [project.urls]
80
+ Homepage = "https://github.com/iisys-hof/olaph"
81
+ Documentation = "https://github.com/iisys-hof/olaph#readme"
82
+ Issues = "https://github.com/iisys-hof/olaph/issues"
83
+
84
+ [tool.setuptools.packages.find]
85
+ where = ["."]
86
+ include = ["olaph*"]
87
+
88
+ [tool.setuptools.package-data]
89
+ "olaph" = ["dictionaries/**/*.txt"]
90
+
91
+ [tool.uv.sources]
92
+ de-core-news-sm = { url = "https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.8.0/de_core_news_sm-3.8.0-py3-none-any.whl" }
93
+ en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
94
+ es-core-news-sm = { url = "https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.8.0/es_core_news_sm-3.8.0-py3-none-any.whl" }
95
+ fr-core-news-sm = { url = "https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.8.0/fr_core_news_sm-3.8.0-py3-none-any.whl" }
96
+
97
+ [project.optional-dependencies]
98
+ dev = ["pytest", "black", "flake8"]
olaph-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+