PyPI - grzegorz - Versions diffs - 0.6.1__tar.gz → 0.6.2__tar.gz - Mend

grzegorz 0.6.1tar.gz → 0.6.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{grzegorz-0.6.1 → grzegorz-0.6.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: grzegorz
-Version: 0.6.1
+Version: 0.6.2
 Summary: Minimal pair generator and phonetics tool
 Home-page: https://github.com/xylous/grzegorz
 Author: xylous
@@ -20,6 +20,7 @@ Requires-Dist: beautifulsoup4
 Requires-Dist: tqdm
 Requires-Dist: requests
 Requires-Dist: genanki
+Requires-Dist: fake-useragent
 Dynamic: license-file
 # grzegorz

{grzegorz-0.6.1 → grzegorz-0.6.2}/grzegorz/__main__.py RENAMED Viewed

@@ -26,20 +26,17 @@ def create_argparser() -> argparse.ArgumentParser:
     # 'analyse' subcommand
     parser_analyse = subparsers.add_parser('analyse',
-            help='Print the result of phonologically parsing of the given IPA transcription')
+            help='Parse the given IPA transcription')
     parser_analyse.add_argument('ipa',
-            type=str,
-            help="IPA transcription")
+            type=str)
     # 'check' subcommand
     parser_check = subparsers.add_parser('check',
-            help='Check if the two given IPAs can form minimal pair')
-    parser_check .add_argument('ipa_first',
-            type=str,
-            help="first IPA transcription")
-    parser_check .add_argument('ipa_second',
-            type=str,
-            help="second IPA transcription")
+            help='Check if the two given IPAs can form a minimal pair')
+    parser_check.add_argument('ipa_first',
+            type=str)
+    parser_check.add_argument('ipa_second',
+            type=str)
     # 'list-languages' subcommand
     subparsers.add_parser('list-languages',
@@ -47,12 +44,12 @@ def create_argparser() -> argparse.ArgumentParser:
     # 'fullmake' command
     parser_fullmake = subparsers.add_parser('fullmake',
-            help='Build an Anki deck for a language automatically')
+            help=f'Build an Anki deck for a language (equivalent of \'wordlist\', \'fetchipa\', \'generate\', \'makedeck\')')
     parser_fullmake.add_argument('language',
             type=str)
     parser_fullmake.add_argument('bounds',
             type=str,
-            help='number of words to keep; alternatively, the range of words to keep, e.g. "1500:3000"')
+            help='number of words to keep, e.g. "5000"; alternatively, the range of words to keep, e.g. "1500:3000"')
     parser_fullmake.add_argument('--clean',
             dest='clean',
             action='store_true',
@@ -61,31 +58,30 @@ def create_argparser() -> argparse.ArgumentParser:
     # 'wordlist' command
     parser_wordlist = subparsers.add_parser('wordlist',
-            help='Fetch the word list for a given language, containing a certain number of words')
+            help='Get the specified number of words from a frequency wordlist in the given language')
     parser_wordlist.add_argument('language',
             type=str,
             help='language of the wordlist')
     parser_wordlist.add_argument('bounds',
             type=str,
-            help='number of words to keep; alternatively, the range of words to keep, e.g. "1500:3000"')
+            help='number of words to keep, e.g. "5000"; alternatively, the range of words to keep, e.g. "1500:3000"')
     parser_wordlist.add_argument('outfile',
             type=str,
             help='path where the wordlist should be stored')
     # 'fetchipa' subcommand
     parser_fetchipa = subparsers.add_parser('fetchipa',
-            help='Fetch all IPA pronunciations for the words into a JSON file')
+            help='Fetch IPA pronunciations for words in a wordlist')
     parser_fetchipa.add_argument('infile',
             type=str,
-            help='file containing the list of words')
+            help='wordlist output file')
     parser_fetchipa.add_argument('outfile',
-            type=str,
-            help='output file (JSON)')
+            type=str)
     parser_fetchipa.add_argument('--keep-failed',
             dest='keep_failed',
             action='store_true',
             default=False,
-            help='Save the words for which no IPA was found in the output file (default: don\'t)')
+            help='In the output file, keep the words with no found IPA (default: don\'t)')
     parser_fetchipa.add_argument('--numproc',
             type=int,
             dest='numproc',
@@ -94,10 +90,10 @@ def create_argparser() -> argparse.ArgumentParser:
     # 'generate' subcommand
     parser_generate = subparsers.add_parser('generate',
-            help='Create minimal pairs, given a JSON input file')
+            help='Find minimal pairs based on the output file of \'fetchipa\'')
     parser_generate.add_argument('infile',
             type=str,
-            help='JSON file created by fetchipa')
+            help='file created by fetchipa')
     parser_generate.add_argument('outfile',
             type=str,
             help='path where the created minimal pairs will be stored')
@@ -105,7 +101,7 @@ def create_argparser() -> argparse.ArgumentParser:
             action='store_true',
             default=False,
             dest="nooptimise",
-            help="generate all possible minimal pairs (default: optimise)")
+            help="generate all possible minimal pairs (default: similar sounds)")
     parser_generate.add_argument('--no-phonemes',
             action='store_true',
             default=False,
@@ -124,17 +120,17 @@ def create_argparser() -> argparse.ArgumentParser:
     parser_generate.add_argument('-f', '--filter-file',
             type=str,
             dest="path",
-            help="path to the file whose contents determine the phones to keep when optimising")
+            help="path to file with rules for desired phoneme differences")
     # 'makedeck' subcommand
     parser_makedeck = subparsers.add_parser('makedeck',
-            help='Create an Anki deck package containing all minimal pairs')
+            help='Create an Anki deck package file from the output of the \'generate\' command')
     parser_makedeck.add_argument('infile',
             type=str,
-            help="Output file of 'generate'")
+            help="output file of 'generate'")
     parser_makedeck.add_argument('outfile',
             type=str,
-            help="Output file; note that it should ideally have the .apkg extension")
+            help="(.apkg extension)")
     return parser

{grzegorz-0.6.1 → grzegorz-0.6.2}/grzegorz/fetcher.py RENAMED Viewed

@@ -17,10 +17,10 @@ from grzegorz.word import Word
 import requests
 from bs4 import BeautifulSoup
+from fake_useragent import UserAgent
 import re
 ### HELPER FUNCTIONS ###
 def get_ipa_for_word(word: str, language: str) -> Word:
     """
     Look for the IPA transliteration of the given word in the specified language
@@ -28,9 +28,16 @@ def get_ipa_for_word(word: str, language: str) -> Word:
     found, then the `ipa` field of the result is empty.
     """
     language = language.capitalize()
+    language = "Serbo-Croatian" if language in ["Croatian", "Serbian"] else language
     url = f"https://en.wiktionary.org/wiki/{word}"
-    webpage = requests.get(url)
-    soup= BeautifulSoup(webpage.text, "html.parser")
+    # wiktionary blocks requests with no/standard user-agent
+    # use a random one to bypass that
+    ua = UserAgent()
+    headers = {"User-Agent": ua.random}
+    webpage = requests.get(url, headers=headers)
+    soup = BeautifulSoup(webpage.text, "html.parser")
     pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
     ipa = ""
@@ -41,8 +48,15 @@ def get_ipa_for_word(word: str, language: str) -> Word:
         if first_entry is not None:
             ipa = first_entry.text
+    # in German, nouns are capitalized, but the wordlist we're using might not
+    # respect that. This accounts for that, but likely reduces performance for
+    # words without any wiktionary entry.
+    if language == "German" and ipa == "" and word != word.capitalize():
+        return get_ipa_for_word(word.capitalize(), language)
     return Word(word, ipa)
 def first_ipa_pronunciation(ipa_str: str) -> str:
     """Find the first IPA spelling in the given string"""
     result = re.findall(r"[/\[].*?[/\]]", ipa_str)

{grzegorz-0.6.1 → grzegorz-0.6.2}/grzegorz.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: grzegorz
-Version: 0.6.1
+Version: 0.6.2
 Summary: Minimal pair generator and phonetics tool
 Home-page: https://github.com/xylous/grzegorz
 Author: xylous
@@ -20,6 +20,7 @@ Requires-Dist: beautifulsoup4
 Requires-Dist: tqdm
 Requires-Dist: requests
 Requires-Dist: genanki
+Requires-Dist: fake-useragent
 Dynamic: license-file
 # grzegorz

{grzegorz-0.6.1 → grzegorz-0.6.2}/grzegorz.egg-info/requires.txt RENAMED Viewed

@@ -2,3 +2,4 @@ beautifulsoup4
 tqdm
 requests
 genanki
+fake-useragent

{grzegorz-0.6.1 → grzegorz-0.6.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = grzegorz
-version = 0.6.1
+version = 0.6.2
 author = xylous
 author_email = xylous.e@gmail.com
 description = Minimal pair generator and phonetics tool
@@ -27,6 +27,7 @@ install_requires =
 	tqdm
 	requests
 	genanki
+	fake-useragent
 [options.entry_points]
 console_scripts =