grzegorz 0.6.0__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {grzegorz-0.6.0 → grzegorz-0.6.1}/PKG-INFO +7 -2
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/fetcher.py +14 -15
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/generator.py +7 -3
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/subcommands.py +3 -2
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/PKG-INFO +7 -2
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/requires.txt +1 -1
- {grzegorz-0.6.0 → grzegorz-0.6.1}/setup.cfg +2 -2
- {grzegorz-0.6.0 → grzegorz-0.6.1}/LICENSE +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/README.md +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/__init__.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/__main__.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/anki_integration.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/io.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/test.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/word.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/wordlist.py +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/SOURCES.txt +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/dependency_links.txt +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/entry_points.txt +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/top_level.txt +0 -0
- {grzegorz-0.6.0 → grzegorz-0.6.1}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: grzegorz
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Minimal pair generator and phonetics tool
|
|
5
5
|
Home-page: https://github.com/xylous/grzegorz
|
|
6
6
|
Author: xylous
|
|
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
16
16
|
Requires-Python: >=3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
+
Requires-Dist: beautifulsoup4
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: requests
|
|
22
|
+
Requires-Dist: genanki
|
|
23
|
+
Dynamic: license-file
|
|
19
24
|
|
|
20
25
|
# grzegorz
|
|
21
26
|
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
|
|
16
16
|
from grzegorz.word import Word
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
import requests
|
|
19
|
+
from bs4 import BeautifulSoup
|
|
19
20
|
import re
|
|
20
21
|
|
|
21
22
|
### HELPER FUNCTIONS ###
|
|
@@ -26,21 +27,19 @@ def get_ipa_for_word(word: str, language: str) -> Word:
|
|
|
26
27
|
and return a `Word` binding it to the letters. If no transcription was
|
|
27
28
|
found, then the `ipa` field of the result is empty.
|
|
28
29
|
"""
|
|
29
|
-
|
|
30
|
-
|
|
30
|
+
language = language.capitalize()
|
|
31
|
+
url = f"https://en.wiktionary.org/wiki/{word}"
|
|
32
|
+
webpage = requests.get(url)
|
|
33
|
+
soup= BeautifulSoup(webpage.text, "html.parser")
|
|
34
|
+
pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
|
|
35
|
+
|
|
31
36
|
ipa = ""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
ipa = first_ipa_pronunciation(text[0])
|
|
39
|
-
# Not all words have their IPAs on wiktionary, but they might have a
|
|
40
|
-
# "Rhymes" section (many German words do, for example). If we did fetch a
|
|
41
|
-
# rhyme, don't add it as a valid IPA
|
|
42
|
-
if len(ipa) and ipa[0] == '-':
|
|
43
|
-
ipa = ""
|
|
37
|
+
# maybe blindly choosing the first IPA transliteration is not the wisest
|
|
38
|
+
# choice in the world?
|
|
39
|
+
if len(pronunciations):
|
|
40
|
+
first_entry = pronunciations[0].find("span", {"class": "IPA"})
|
|
41
|
+
if first_entry is not None:
|
|
42
|
+
ipa = first_entry.text
|
|
44
43
|
|
|
45
44
|
return Word(word, ipa)
|
|
46
45
|
|
|
@@ -46,17 +46,21 @@ class MinPairGenerator:
|
|
|
46
46
|
lists_of_phonemes.append(line.replace(" ", "").split(","))
|
|
47
47
|
self.filter_pairs = phoneme_lists_to_phoneme_pairs(lists_of_phonemes)
|
|
48
48
|
|
|
49
|
-
def generate(self, words: list[Word]) -> list[WordPair]:
|
|
49
|
+
def generate(self, words: list[Word], silent: bool = True) -> list[WordPair]:
|
|
50
50
|
"""
|
|
51
51
|
Generate minimal pairs from the given parameters
|
|
52
52
|
"""
|
|
53
53
|
minpairs = []
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
progress_bar = tqdm(total=int(len(words) * (len(words) - 1) / 2), disable=silent)
|
|
56
|
+
for i in range(0, len(words)):
|
|
57
|
+
words_after = range(i+1, len(words))
|
|
58
|
+
for j in words_after:
|
|
57
59
|
pair = (words[i], words[j])
|
|
58
60
|
if self.check_minpair(pair):
|
|
59
61
|
minpairs.append(pair)
|
|
62
|
+
progress_bar.update(len(words_after))
|
|
63
|
+
progress_bar.close()
|
|
60
64
|
|
|
61
65
|
return minpairs
|
|
62
66
|
|
|
@@ -95,7 +95,7 @@ def wordlist_command(language: str, bounds: str, outfile: str) -> int:
|
|
|
95
95
|
else:
|
|
96
96
|
return 1
|
|
97
97
|
|
|
98
|
-
def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int =
|
|
98
|
+
def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 20) -> None:
|
|
99
99
|
"""
|
|
100
100
|
Given an input file containing a list of words separated, fetch the IPAs and
|
|
101
101
|
create a text file with their IPA spellings matched to their text
|
|
@@ -148,7 +148,8 @@ def generate_command(infile, outfile, nooptimise, no_phonemes, no_chronemes,
|
|
|
148
148
|
if no_stress:
|
|
149
149
|
print("Generator: syllable stress contrasts will be ignored")
|
|
150
150
|
|
|
151
|
-
|
|
151
|
+
print('Generating minimal pairs from:', len(words), 'words')
|
|
152
|
+
minpairs = g.generate(words, False)
|
|
152
153
|
writefile(outfile, encode_format(encode_minpair, minpairs))
|
|
153
154
|
print('Done! Generated', len(minpairs), 'minimal pairs')
|
|
154
155
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: grzegorz
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Minimal pair generator and phonetics tool
|
|
5
5
|
Home-page: https://github.com/xylous/grzegorz
|
|
6
6
|
Author: xylous
|
|
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
16
16
|
Requires-Python: >=3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
+
Requires-Dist: beautifulsoup4
|
|
20
|
+
Requires-Dist: tqdm
|
|
21
|
+
Requires-Dist: requests
|
|
22
|
+
Requires-Dist: genanki
|
|
23
|
+
Dynamic: license-file
|
|
19
24
|
|
|
20
25
|
# grzegorz
|
|
21
26
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = grzegorz
|
|
3
|
-
version = 0.6.
|
|
3
|
+
version = 0.6.1
|
|
4
4
|
author = xylous
|
|
5
5
|
author_email = xylous.e@gmail.com
|
|
6
6
|
description = Minimal pair generator and phonetics tool
|
|
@@ -23,7 +23,7 @@ packages =
|
|
|
23
23
|
grzegorz
|
|
24
24
|
python_requires = >=3.10
|
|
25
25
|
install_requires =
|
|
26
|
-
|
|
26
|
+
beautifulsoup4
|
|
27
27
|
tqdm
|
|
28
28
|
requests
|
|
29
29
|
genanki
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|