grzegorz 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
grzegorz/fetcher.py CHANGED
@@ -15,7 +15,8 @@
15
15
 
16
16
  from grzegorz.word import Word
17
17
 
18
- from wiktionaryparser import WiktionaryParser
18
+ import requests
19
+ from bs4 import BeautifulSoup
19
20
  import re
20
21
 
21
22
  ### HELPER FUNCTIONS ###
@@ -26,21 +27,19 @@ def get_ipa_for_word(word: str, language: str) -> Word:
26
27
  and return a `Word` binding it to the letters. If no transcription was
27
28
  found, then the `ipa` field of the result is empty.
28
29
  """
29
- parser = WiktionaryParser()
30
- parser.set_default_language(language)
30
+ language = language.capitalize()
31
+ url = f"https://en.wiktionary.org/wiki/{word}"
32
+ webpage = requests.get(url)
33
+ soup= BeautifulSoup(webpage.text, "html.parser")
34
+ pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
35
+
31
36
  ipa = ""
32
- fetched = parser.fetch(word)
33
- if len(fetched):
34
- first_entry = fetched[0]
35
- pronunciations = first_entry.get('pronunciations')
36
- text = pronunciations.get('text')
37
- if len(text):
38
- ipa = first_ipa_pronunciation(text[0])
39
- # Not all words have their IPAs on wiktionary, but they might have a
40
- # "Rhymes" section (many German words do, for example). If we did fetch a
41
- # rhyme, don't add it as a valid IPA
42
- if len(ipa) and ipa[0] == '-':
43
- ipa = ""
37
+ # maybe blindly choosing the first IPA transliteration is not the wisest
38
+ # choice in the world?
39
+ if len(pronunciations):
40
+ first_entry = pronunciations[0].find("span", {"class": "IPA"})
41
+ if first_entry is not None:
42
+ ipa = first_entry.text
44
43
 
45
44
  return Word(word, ipa)
46
45
 
grzegorz/generator.py CHANGED
@@ -46,17 +46,21 @@ class MinPairGenerator:
46
46
  lists_of_phonemes.append(line.replace(" ", "").split(","))
47
47
  self.filter_pairs = phoneme_lists_to_phoneme_pairs(lists_of_phonemes)
48
48
 
49
- def generate(self, words: list[Word]) -> list[WordPair]:
49
+ def generate(self, words: list[Word], silent: bool = True) -> list[WordPair]:
50
50
  """
51
51
  Generate minimal pairs from the given parameters
52
52
  """
53
53
  minpairs = []
54
54
 
55
- for i in tqdm(range(0,len(words))):
56
- for j in range(i+1,len(words)):
55
+ progress_bar = tqdm(total=int(len(words) * (len(words) - 1) / 2), disable=silent)
56
+ for i in range(0, len(words)):
57
+ words_after = range(i+1, len(words))
58
+ for j in words_after:
57
59
  pair = (words[i], words[j])
58
60
  if self.check_minpair(pair):
59
61
  minpairs.append(pair)
62
+ progress_bar.update(len(words_after))
63
+ progress_bar.close()
60
64
 
61
65
  return minpairs
62
66
 
grzegorz/subcommands.py CHANGED
@@ -95,7 +95,7 @@ def wordlist_command(language: str, bounds: str, outfile: str) -> int:
95
95
  else:
96
96
  return 1
97
97
 
98
- def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 10) -> None:
98
+ def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 20) -> None:
99
99
  """
100
100
  Given an input file containing a list of words separated, fetch the IPAs and
101
101
  create a text file with their IPA spellings matched to their text
@@ -148,7 +148,8 @@ def generate_command(infile, outfile, nooptimise, no_phonemes, no_chronemes,
148
148
  if no_stress:
149
149
  print("Generator: syllable stress contrasts will be ignored")
150
150
 
151
- minpairs = g.generate(words)
151
+ print('Generating minimal pairs from:', len(words), 'words')
152
+ minpairs = g.generate(words, False)
152
153
  writefile(outfile, encode_format(encode_minpair, minpairs))
153
154
  print('Done! Generated', len(minpairs), 'minimal pairs')
154
155
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: grzegorz
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Minimal pair generator and phonetics tool
5
5
  Home-page: https://github.com/xylous/grzegorz
6
6
  Author: xylous
@@ -16,10 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: wiktionaryparser
19
+ Requires-Dist: beautifulsoup4
20
20
  Requires-Dist: tqdm
21
21
  Requires-Dist: requests
22
22
  Requires-Dist: genanki
23
+ Dynamic: license-file
23
24
 
24
25
  # grzegorz
25
26
 
@@ -0,0 +1,16 @@
1
+ grzegorz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ grzegorz/__main__.py,sha256=jYpB9FB0uuJwrpxBpU-d13vIw1vhNg2kYX4yC_-UWrg,7133
3
+ grzegorz/anki_integration.py,sha256=eMFdFNd0NsqLxX23NtlEzinhGMCecEFyoklfFkMqQOk,3933
4
+ grzegorz/fetcher.py,sha256=oAZRDZVqH93HgLFFffJ-dl6Qc83aD43ZuNVK9boy7F0,1902
5
+ grzegorz/generator.py,sha256=oCz9TKg9wPN3VIGGa2H8L2Ex4Uf2_gX_XFrlxiB4RSw,9320
6
+ grzegorz/io.py,sha256=JM2pOKgECmnVxCZplgRt1gEiyYWXUn_Z6OanmGSaab0,2221
7
+ grzegorz/subcommands.py,sha256=QQQX1LraTi9Lfo28N1s4G1j-j_z4HtiUsAYsVNyt5FI,6101
8
+ grzegorz/test.py,sha256=znHJFiV0Q1qP0kJYtoweMTNqJH1eX9ZHWFZedOJIuGo,3866
9
+ grzegorz/word.py,sha256=bXNTq_sjrn7CTOWBGkKdQXky_j0c-OzxhhgJWDh0BR0,7899
10
+ grzegorz/wordlist.py,sha256=SqKkZoyY8Ol1vp8Rt0PeNWtxL8ND3qE_yWGl5yiKZ_M,4058
11
+ grzegorz-0.6.1.dist-info/licenses/LICENSE,sha256=STF0KkBB_RpcXwp43xCvRIKKe_4V-zrq1lU1OsTgapY,35148
12
+ grzegorz-0.6.1.dist-info/METADATA,sha256=sZkAm0W5qIsh8fcWOpiy-cV6L64p5RsTWm06VYKbf5U,3980
13
+ grzegorz-0.6.1.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
14
+ grzegorz-0.6.1.dist-info/entry_points.txt,sha256=rZ-JLt-sbS1rZ5YwodMyf9o80C6sN4AfuSCb0sFNVJ8,52
15
+ grzegorz-0.6.1.dist-info/top_level.txt,sha256=W2SodvLxGhkJfWfNhDO0Vh7prBehEXdE9sHWJ1mZXTA,9
16
+ grzegorz-0.6.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.0)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,16 +0,0 @@
1
- grzegorz/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- grzegorz/__main__.py,sha256=jYpB9FB0uuJwrpxBpU-d13vIw1vhNg2kYX4yC_-UWrg,7133
3
- grzegorz/anki_integration.py,sha256=eMFdFNd0NsqLxX23NtlEzinhGMCecEFyoklfFkMqQOk,3933
4
- grzegorz/fetcher.py,sha256=quIpygekLCtXDS5Yx_2eHkJUeDaJDMdNcqI2P5HPxos,1929
5
- grzegorz/generator.py,sha256=IQyqY_dPUHmkaCcWnuN5QCQfvRiCS9B_8oOLuiNs2dM,9096
6
- grzegorz/io.py,sha256=JM2pOKgECmnVxCZplgRt1gEiyYWXUn_Z6OanmGSaab0,2221
7
- grzegorz/subcommands.py,sha256=uO0StbCG081rXPp71DMm--anb2pyA2QHNBCwDEwh6vk,6029
8
- grzegorz/test.py,sha256=znHJFiV0Q1qP0kJYtoweMTNqJH1eX9ZHWFZedOJIuGo,3866
9
- grzegorz/word.py,sha256=bXNTq_sjrn7CTOWBGkKdQXky_j0c-OzxhhgJWDh0BR0,7899
10
- grzegorz/wordlist.py,sha256=SqKkZoyY8Ol1vp8Rt0PeNWtxL8ND3qE_yWGl5yiKZ_M,4058
11
- grzegorz-0.6.0.dist-info/LICENSE,sha256=STF0KkBB_RpcXwp43xCvRIKKe_4V-zrq1lU1OsTgapY,35148
12
- grzegorz-0.6.0.dist-info/METADATA,sha256=IbAHh2s0xdwqEhLehK8SAImnm14mHO3k8nTnNfyOGtg,3960
13
- grzegorz-0.6.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
14
- grzegorz-0.6.0.dist-info/entry_points.txt,sha256=rZ-JLt-sbS1rZ5YwodMyf9o80C6sN4AfuSCb0sFNVJ8,52
15
- grzegorz-0.6.0.dist-info/top_level.txt,sha256=W2SodvLxGhkJfWfNhDO0Vh7prBehEXdE9sHWJ1mZXTA,9
16
- grzegorz-0.6.0.dist-info/RECORD,,