PyPI - grzegorz - Versions diffs - 0.6.0__tar.gz → 0.6.1__tar.gz - Mend

grzegorz 0.6.0tar.gz → 0.6.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{grzegorz-0.6.0 → grzegorz-0.6.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: grzegorz
-Version: 0.6.0
+Version: 0.6.1
 Summary: Minimal pair generator and phonetics tool
 Home-page: https://github.com/xylous/grzegorz
 Author: xylous
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: beautifulsoup4
+Requires-Dist: tqdm
+Requires-Dist: requests
+Requires-Dist: genanki
+Dynamic: license-file
 # grzegorz

{grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/fetcher.py RENAMED Viewed

@@ -15,7 +15,8 @@
 from grzegorz.word import Word
-from wiktionaryparser import WiktionaryParser
+import requests
+from bs4 import BeautifulSoup
 import re
 ### HELPER FUNCTIONS ###
@@ -26,21 +27,19 @@ def get_ipa_for_word(word: str, language: str) -> Word:
     and return a `Word` binding it to the letters. If no transcription was
     found, then the `ipa` field of the result is empty.
     """
-    parser = WiktionaryParser()
-    parser.set_default_language(language)
+    language = language.capitalize()
+    url = f"https://en.wiktionary.org/wiki/{word}"
+    webpage = requests.get(url)
+    soup= BeautifulSoup(webpage.text, "html.parser")
+    pronunciations= soup.select(f'li:has(sup:has(a[href="/wiki/Appendix:{language}_pronunciation"]))' )
     ipa = ""
-    fetched = parser.fetch(word)
-    if len(fetched):
-        first_entry = fetched[0]
-        pronunciations = first_entry.get('pronunciations')
-        text = pronunciations.get('text')
-        if len(text):
-            ipa = first_ipa_pronunciation(text[0])
-    # Not all words have their IPAs on wiktionary, but they might have a
-    # "Rhymes" section (many German words do, for example). If we did fetch a
-    # rhyme, don't add it as a valid IPA
-    if len(ipa) and ipa[0] == '-':
-        ipa = ""
+    # maybe blindly choosing the first IPA transliteration is not the wisest
+    # choice in the world?
+    if len(pronunciations):
+        first_entry = pronunciations[0].find("span", {"class": "IPA"})
+        if first_entry is not None:
+            ipa = first_entry.text
     return Word(word, ipa)

{grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/generator.py RENAMED Viewed

@@ -46,17 +46,21 @@ class MinPairGenerator:
                 lists_of_phonemes.append(line.replace(" ", "").split(","))
         self.filter_pairs = phoneme_lists_to_phoneme_pairs(lists_of_phonemes)
-    def generate(self, words: list[Word]) -> list[WordPair]:
+    def generate(self, words: list[Word], silent: bool = True) -> list[WordPair]:
         """
         Generate minimal pairs from the given parameters
         """
         minpairs = []
-        for i in tqdm(range(0,len(words))):
-            for j in range(i+1,len(words)):
+        progress_bar = tqdm(total=int(len(words) * (len(words) - 1) / 2), disable=silent)
+        for i in range(0, len(words)):
+            words_after = range(i+1, len(words))
+            for j in words_after:
                 pair = (words[i], words[j])
                 if self.check_minpair(pair):
                     minpairs.append(pair)
+            progress_bar.update(len(words_after))
+        progress_bar.close()
         return minpairs

{grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz/subcommands.py RENAMED Viewed

@@ -95,7 +95,7 @@ def wordlist_command(language: str, bounds: str, outfile: str) -> int:
     else:
         return 1
-def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 10) -> None:
+def fetchipa(infile: str, outfile: str, keep_failed: bool, numproc: int = 20) -> None:
     """
     Given an input file containing a list of words separated, fetch the IPAs and
     create a text file with their IPA spellings matched to their text
@@ -148,7 +148,8 @@ def generate_command(infile, outfile, nooptimise, no_phonemes, no_chronemes,
     if no_stress:
         print("Generator: syllable stress contrasts will be ignored")
-    minpairs = g.generate(words)
+    print('Generating minimal pairs from:', len(words), 'words')
+    minpairs = g.generate(words, False)
     writefile(outfile, encode_format(encode_minpair, minpairs))
     print('Done! Generated', len(minpairs), 'minimal pairs')

{grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: grzegorz
-Version: 0.6.0
+Version: 0.6.1
 Summary: Minimal pair generator and phonetics tool
 Home-page: https://github.com/xylous/grzegorz
 Author: xylous
@@ -16,6 +16,11 @@ Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: beautifulsoup4
+Requires-Dist: tqdm
+Requires-Dist: requests
+Requires-Dist: genanki
+Dynamic: license-file
 # grzegorz

{grzegorz-0.6.0 → grzegorz-0.6.1}/grzegorz.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-wiktionaryparser
+beautifulsoup4
 tqdm
 requests
 genanki

{grzegorz-0.6.0 → grzegorz-0.6.1}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = grzegorz
-version = 0.6.0
+version = 0.6.1
 author = xylous
 author_email = xylous.e@gmail.com
 description = Minimal pair generator and phonetics tool
@@ -23,7 +23,7 @@ packages =
 	grzegorz
 python_requires = >=3.10
 install_requires =
-	wiktionaryparser
+	beautifulsoup4
 	tqdm
 	requests
 	genanki