grzegorz 0.6.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: grzegorz
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: Minimal pair generator and phonetics tool
5
5
  Home-page: https://github.com/xylous/grzegorz
6
6
  Author: xylous
@@ -20,6 +20,7 @@ Requires-Dist: beautifulsoup4
20
20
  Requires-Dist: tqdm
21
21
  Requires-Dist: requests
22
22
  Requires-Dist: genanki
23
+ Requires-Dist: fake-useragent
23
24
  Dynamic: license-file
24
25
 
25
26
  # grzegorz
@@ -26,20 +26,17 @@ def create_argparser() -> argparse.ArgumentParser:
26
26
 
27
27
  # 'analyse' subcommand
28
28
  parser_analyse = subparsers.add_parser('analyse',
29
- help='Print the result of phonologically parsing of the given IPA transcription')
29
+ help='Parse the given IPA transcription')
30
30
  parser_analyse.add_argument('ipa',
31
- type=str,
32
- help="IPA transcription")
31
+ type=str)
33
32
 
34
33
  # 'check' subcommand
35
34
  parser_check = subparsers.add_parser('check',
36
- help='Check if the two given IPAs can form minimal pair')
37
- parser_check .add_argument('ipa_first',
38
- type=str,
39
- help="first IPA transcription")
40
- parser_check .add_argument('ipa_second',
41
- type=str,
42
- help="second IPA transcription")
35
+ help='Check if the two given IPAs can form a minimal pair')
36
+ parser_check.add_argument('ipa_first',
37
+ type=str)
38
+ parser_check.add_argument('ipa_second',
39
+ type=str)
43
40
 
44
41
  # 'list-languages' subcommand
45
42
  subparsers.add_parser('list-languages',
@@ -47,12 +44,12 @@ def create_argparser() -> argparse.ArgumentParser:
47
44
 
48
45
  # 'fullmake' command
49
46
  parser_fullmake = subparsers.add_parser('fullmake',
50
- help='Build an Anki deck for a language automatically')
47
+ help=f'Build an Anki deck for a language (equivalent of \'wordlist\', \'fetchipa\', \'generate\', \'makedeck\')')
51
48
  parser_fullmake.add_argument('language',
52
49
  type=str)
53
50
  parser_fullmake.add_argument('bounds',
54
51
  type=str,
55
- help='number of words to keep; alternatively, the range of words to keep, e.g. "1500:3000"')
52
+ help='number of words to keep, e.g. "5000"; alternatively, the range of words to keep, e.g. "1500:3000"')
56
53
  parser_fullmake.add_argument('--clean',
57
54
  dest='clean',
58
55
  action='store_true',
@@ -61,31 +58,30 @@ def create_argparser() -> argparse.ArgumentParser:
61
58
 
62
59
  # 'wordlist' command
63
60
  parser_wordlist = subparsers.add_parser('wordlist',
64
- help='Fetch the word list for a given language, containing a certain number of words')
61
+ help='Get the specified number of words from a frequency wordlist in the given language')
65
62
  parser_wordlist.add_argument('language',
66
63
  type=str,
67
64
  help='language of the wordlist')
68
65
  parser_wordlist.add_argument('bounds',
69
66
  type=str,
70
- help='number of words to keep; alternatively, the range of words to keep, e.g. "1500:3000"')
67
+ help='number of words to keep, e.g. "5000"; alternatively, the range of words to keep, e.g. "1500:3000"')
71
68
  parser_wordlist.add_argument('outfile',
72
69
  type=str,
73
70
  help='path where the wordlist should be stored')
74
71
 
75
72
  # 'fetchipa' subcommand
76
73
  parser_fetchipa = subparsers.add_parser('fetchipa',
77
- help='Fetch all IPA pronunciations for the words into a JSON file')
74
+ help='Fetch IPA pronunciations for words in a wordlist')
78
75
  parser_fetchipa.add_argument('infile',
79
76
  type=str,
80
- help='file containing the list of words')
77
+ help='wordlist output file')
81
78
  parser_fetchipa.add_argument('outfile',
82
- type=str,
83
- help='output file (JSON)')
79
+ type=str)
84
80
  parser_fetchipa.add_argument('--keep-failed',
85
81
  dest='keep_failed',
86
82
  action='store_true',
87
83
  default=False,
88
- help='Save the words for which no IPA was found in the output file (default: don\'t)')
84
+ help='In the output file, keep the words with no found IPA (default: don\'t)')
89
85
  parser_fetchipa.add_argument('--numproc',
90
86
  type=int,
91
87
  dest='numproc',
@@ -94,10 +90,10 @@ def create_argparser() -> argparse.ArgumentParser:
94
90
 
95
91
  # 'generate' subcommand
96
92
  parser_generate = subparsers.add_parser('generate',
97
- help='Create minimal pairs, given a JSON input file')
93
+ help='Find minimal pairs based on the output file of \'fetchipa\'')
98
94
  parser_generate.add_argument('infile',
99
95
  type=str,
100
- help='JSON file created by fetchipa')
96
+ help='file created by fetchipa')
101
97
  parser_generate.add_argument('outfile',
102
98
  type=str,
103
99
  help='path where the created minimal pairs will be stored')
@@ -105,7 +101,7 @@ def create_argparser() -> argparse.ArgumentParser:
105
101
  action='store_true',
106
102
  default=False,
107
103
  dest="nooptimise",
108
- help="generate all possible minimal pairs (default: optimise)")
104
+ help="generate all possible minimal pairs (default: similar sounds)")
109
105
  parser_generate.add_argument('--no-phonemes',
110
106
  action='store_true',
111
107
  default=False,
@@ -124,17 +120,17 @@ def create_argparser() -> argparse.ArgumentParser:
124
120
  parser_generate.add_argument('-f', '--filter-file',
125
121
  type=str,
126
122
  dest="path",
127
- help="path to the file whose contents determine the phones to keep when optimising")
123
+ help="path to file with rules for desired phoneme differences")
128
124
 
129
125
  # 'makedeck' subcommand
130
126
  parser_makedeck = subparsers.add_parser('makedeck',
131
- help='Create an Anki deck package containing all minimal pairs')
127
+ help='Create an Anki deck package file from the output of the \'generate\' command')
132
128
  parser_makedeck.add_argument('infile',
133
129
  type=str,
134
- help="Output file of 'generate'")
130
+ help="output file of 'generate'")
135
131
  parser_makedeck.add_argument('outfile',
136
132
  type=str,
137
- help="Output file; note that it should ideally have the .apkg extension")
133
+ help="(.apkg extension)")
138
134
 
139
135
  return parser
140
136
 
@@ -17,10 +17,10 @@ from grzegorz.word import Word
17
17
 
18
18
  import requests
19
19
  from bs4 import BeautifulSoup
20
+ from fake_useragent import UserAgent
20
21
  import re
21
22
 
22
23
  ### HELPER FUNCTIONS ###
23
-
24
24
  def get_ipa_for_word(word: str, language: str) -> Word:
25
25
  """
26
26
  Look for the IPA transliteration of the given word in the specified language
@@ -28,9 +28,16 @@ def get_ipa_for_word(word: str, language: str) -> Word:
28
28
  found, then the `ipa` field of the result is empty.
29
29
  """
30
30
  language = language.capitalize()
31
+ language = "Serbo-Croatian" if language in ["Croatian", "Serbian"] else language
31
32
  url = f"https://en.wiktionary.org/wiki/{word}"
32
- webpage = requests.get(url)
33
- soup= BeautifulSoup(webpage.text, "html.parser")
33
+
34
+ # wiktionary blocks requests with no/standard user-agent
35
+ # use a random one to bypass that
36
+ ua = UserAgent()
37
+ headers = {"User-Agent": ua.random}
38
+
39
+ webpage = requests.get(url, headers=headers)
40
+ soup = BeautifulSoup(webpage.text, "html.parser")
34
41
  pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
35
42
 
36
43
  ipa = ""
@@ -41,8 +48,15 @@ def get_ipa_for_word(word: str, language: str) -> Word:
41
48
  if first_entry is not None:
42
49
  ipa = first_entry.text
43
50
 
51
+ # in German, nouns are capitalized, but the wordlist we're using might not
52
+ # respect that. This accounts for that, but likely reduces performance for
53
+ # words without any wiktionary entry.
54
+ if language == "German" and ipa == "" and word != word.capitalize():
55
+ return get_ipa_for_word(word.capitalize(), language)
56
+
44
57
  return Word(word, ipa)
45
58
 
59
+
46
60
  def first_ipa_pronunciation(ipa_str: str) -> str:
47
61
  """Find the first IPA spelling in the given string"""
48
62
  result = re.findall(r"[/\[].*?[/\]]", ipa_str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: grzegorz
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: Minimal pair generator and phonetics tool
5
5
  Home-page: https://github.com/xylous/grzegorz
6
6
  Author: xylous
@@ -20,6 +20,7 @@ Requires-Dist: beautifulsoup4
20
20
  Requires-Dist: tqdm
21
21
  Requires-Dist: requests
22
22
  Requires-Dist: genanki
23
+ Requires-Dist: fake-useragent
23
24
  Dynamic: license-file
24
25
 
25
26
  # grzegorz
@@ -2,3 +2,4 @@ beautifulsoup4
2
2
  tqdm
3
3
  requests
4
4
  genanki
5
+ fake-useragent
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = grzegorz
3
- version = 0.6.1
3
+ version = 0.6.2
4
4
  author = xylous
5
5
  author_email = xylous.e@gmail.com
6
6
  description = Minimal pair generator and phonetics tool
@@ -27,6 +27,7 @@ install_requires =
27
27
  tqdm
28
28
  requests
29
29
  genanki
30
+ fake-useragent
30
31
 
31
32
  [options.entry_points]
32
33
  console_scripts =
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes