grzegorz 0.6.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: grzegorz
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Minimal pair generator and phonetics tool
5
5
  Home-page: https://github.com/xylous/grzegorz
6
6
  Author: xylous
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Requires-Dist: beautifulsoup4
20
+ Requires-Dist: tqdm
21
+ Requires-Dist: requests
22
+ Requires-Dist: genanki
23
+ Dynamic: license-file
19
24
 
20
25
  # grzegorz
21
26
 
@@ -15,7 +15,8 @@
15
15
 
16
16
  from grzegorz.word import Word
17
17
 
18
- from wiktionaryparser import WiktionaryParser
18
+ import requests
19
+ from bs4 import BeautifulSoup
19
20
  import re
20
21
 
21
22
  ### HELPER FUNCTIONS ###
@@ -26,21 +27,19 @@ def get_ipa_for_word(word: str, language: str) -> Word:
26
27
  and return a `Word` binding it to the letters. If no transcription was
27
28
  found, then the `ipa` field of the result is empty.
28
29
  """
29
- parser = WiktionaryParser()
30
- parser.set_default_language(language)
30
+ language = language.capitalize()
31
+ url = f"https://en.wiktionary.org/wiki/{word}"
32
+ webpage = requests.get(url)
33
+ soup= BeautifulSoup(webpage.text, "html.parser")
34
+ pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
35
+
31
36
  ipa = ""
32
- fetched = parser.fetch(word)
33
- if len(fetched):
34
- first_entry = fetched[0]
35
- pronunciations = first_entry.get('pronunciations')
36
- text = pronunciations.get('text')
37
- if len(text):
38
- ipa = first_ipa_pronunciation(text[0])
39
- # Not all words have their IPAs on wiktionary, but they might have a
40
- # "Rhymes" section (many German words do, for example). If we did fetch a
41
- # rhyme, don't add it as a valid IPA
42
- if len(ipa) and ipa[0] == '-':
43
- ipa = ""
37
+ # maybe blindly choosing the first IPA transliteration is not the wisest
38
+ # choice in the world?
39
+ if len(pronunciations):
40
+ first_entry = pronunciations[0].find("span", {"class": "IPA"})
41
+ if first_entry is not None:
42
+ ipa = first_entry.text
44
43
 
45
44
  return Word(word, ipa)
46
45
 
@@ -46,17 +46,21 @@ class MinPairGenerator:
46
46
  lists_of_phonemes.append(line.replace(" ", "").split(","))
47
47
  self.filter_pairs = phoneme_lists_to_phoneme_pairs(lists_of_phonemes)
48
48
 
49
- def generate(self, words: list[Word]) -> list[WordPair]:
49
+ def generate(self, words: list[Word], silent: bool = True) -> list[WordPair]:
50
50
  """
51
51
  Generate minimal pairs from the given parameters
52
52
  """
53
53
  minpairs = []
54
54
 
55
- for i in tqdm(range(0,len(words))):
56
- for j in range(i+1,len(words)):
55
+ progress_bar = tqdm(total=int(len(words) * (len(words) - 1) / 2), disable=silent)
56
+ for i in range(0, len(words)):
57
+ words_after = range(i+1, len(words))
58
+ for j in words_after:
57
59
  pair = (words[i], words[j])
58
60
  if self.check_minpair(pair):
59
61
  minpairs.append(pair)
62
+ progress_bar.update(len(words_after))
63
+ progress_bar.close()
60
64
 
61
65
  return minpairs
62
66
 
@@ -95,7 +95,7 @@ def wordlist_command(language: str, bounds: str, outfile: str) -> int:
95
95
  else:
96
96
  return 1
97
97
 
98
- def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 10) -> None:
98
+ def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 20) -> None:
99
99
  """
100
100
  Given an input file containing a list of words separated, fetch the IPAs and
101
101
  create a text file with their IPA spellings matched to their text
@@ -148,7 +148,8 @@ def generate_command(infile, outfile, nooptimise, no_phonemes, no_chronemes,
148
148
  if no_stress:
149
149
  print("Generator: syllable stress contrasts will be ignored")
150
150
 
151
- minpairs = g.generate(words)
151
+ print('Generating minimal pairs from:', len(words), 'words')
152
+ minpairs = g.generate(words, False)
152
153
  writefile(outfile, encode_format(encode_minpair, minpairs))
153
154
  print('Done! Generated', len(minpairs), 'minimal pairs')
154
155
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: grzegorz
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Minimal pair generator and phonetics tool
5
5
  Home-page: https://github.com/xylous/grzegorz
6
6
  Author: xylous
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Requires-Dist: beautifulsoup4
20
+ Requires-Dist: tqdm
21
+ Requires-Dist: requests
22
+ Requires-Dist: genanki
23
+ Dynamic: license-file
19
24
 
20
25
  # grzegorz
21
26
 
@@ -1,4 +1,4 @@
1
- wiktionaryparser
1
+ beautifulsoup4
2
2
  tqdm
3
3
  requests
4
4
  genanki
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = grzegorz
3
- version = 0.6.0
3
+ version = 0.6.1
4
4
  author = xylous
5
5
  author_email = xylous.e@gmail.com
6
6
  description = Minimal pair generator and phonetics tool
@@ -23,7 +23,7 @@ packages =
23
23
  grzegorz
24
24
  python_requires = >=3.10
25
25
  install_requires =
26
- wiktionaryparser
26
+ beautifulsoup4
27
27
  tqdm
28
28
  requests
29
29
  genanki
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes