PyPI - argos-translate-files-main - Versions diffs - 1.4.1__py3-none-any.whl - Mend

argos-translate-files-main 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

argos_translate_files_main-1.4.1.dist-info/METADATA +65 -0
argos_translate_files_main-1.4.1.dist-info/RECORD +23 -0
argos_translate_files_main-1.4.1.dist-info/WHEEL +5 -0
argos_translate_files_main-1.4.1.dist-info/licenses/LICENSE +661 -0
argos_translate_files_main-1.4.1.dist-info/top_level.txt +2 -0
argostranslatefiles/__init__.py +1 -0
argostranslatefiles/abstract_file.py +26 -0
argostranslatefiles/argostranslatefiles.py +59 -0
argostranslatefiles/formats/__init__.py +1 -0
argostranslatefiles/formats/abstract_xml.py +39 -0
argostranslatefiles/formats/epub.py +86 -0
argostranslatefiles/formats/html.py +43 -0
argostranslatefiles/formats/opendocument/__init__.py +0 -0
argostranslatefiles/formats/opendocument/odp.py +5 -0
argostranslatefiles/formats/opendocument/odt.py +52 -0
argostranslatefiles/formats/openxml/__init__.py +1 -0
argostranslatefiles/formats/openxml/docx.py +58 -0
argostranslatefiles/formats/openxml/pptx.py +53 -0
argostranslatefiles/formats/pdf.py +288 -0
argostranslatefiles/formats/srt.py +28 -0
argostranslatefiles/formats/txt.py +24 -0
tests/__init__.py +0 -0
tests/test_init.py +6 -0

argostranslatefiles/abstract_file.py ADDED Viewed

@@ -0,0 +1,26 @@
+import abc
+import os.path
+from argostranslate.translate import ITranslation
+class AbstractFile():
+    supported_file_extensions = []
+    def support(self, file_path: str):
+        file_ext = os.path.splitext(file_path)[1]
+        return file_ext in self.supported_file_extensions
+    def get_output_path(self, underlying_translation: ITranslation, file_path: str):
+        dir_path = os.path.dirname(file_path)
+        file_name, file_ext = os.path.splitext(os.path.basename(file_path))
+        to_code = underlying_translation.to_lang.code
+        return dir_path + "/" + file_name + '_' + to_code + file_ext
+    @abc.abstractmethod
+    def translate(self, underlying_translation: ITranslation, file_path: str): raise NotImplementedError
+    @abc.abstractmethod
+    def get_texts(self, file_path: str): raise NotImplementedError

argostranslatefiles/argostranslatefiles.py ADDED Viewed

@@ -0,0 +1,59 @@
+from argostranslate.translate import ITranslation
+from argostranslatefiles.formats.html import Html
+from argostranslatefiles.formats.opendocument.odp import Odp
+from argostranslatefiles.formats.opendocument.odt import Odt
+from argostranslatefiles.formats.openxml.docx import Docx
+from argostranslatefiles.formats.openxml.pptx import Pptx
+from argostranslatefiles.formats.txt import Txt
+from argostranslatefiles.formats.epub import Epub
+from argostranslatefiles.formats.srt import Srt
+from argostranslatefiles.formats.pdf import Pdf
+def get_supported_formats():
+    return [
+        Txt(),
+        Odt(),
+        Odp(),
+        Docx(),
+        Pptx(),
+        Epub(),
+        Html(),
+        Srt(),
+        Pdf(),
+    ]
+def translate_file(underlying_translation: ITranslation, file_path: str):
+    """Translate a file.
+    Args:
+        underlying_translation (argostranslate.translate.ITranslation): Argos Translate Translation
+        file_path (str): file path
+    Returns:
+        file_path: Translated file
+    """
+    for supported_format in get_supported_formats():
+        if supported_format.support(file_path):
+            return supported_format.translate(underlying_translation, file_path)
+    return False
+def get_texts(file_path: str):
+    """Get the file contents.
+    Args:
+        file_path (str): file path
+    Returns:
+        texts: File contents
+    """
+    for supported_format in get_supported_formats():
+        if supported_format.support(file_path):
+            return supported_format.get_texts(file_path)
+    return False

argostranslatefiles/formats/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

argostranslatefiles/formats/abstract_xml.py ADDED Viewed

@@ -0,0 +1,39 @@
+import argostranslate
+import bs4
+from argostranslate.tags import Tag, ITag
+from argostranslatefiles.abstract_file import AbstractFile
+class AbstractXml(AbstractFile):
+    def is_translatable(self, soup):
+        return soup.text != ""
+    def itag_of_soup(self, soup):
+        """Returns an argostranslate.tags.ITag tree from a BeautifulSoup object.
+        Args:
+            soup (bs4.element.Navigablestring or bs4.element.Tag): Beautiful Soup object
+        Returns:
+            argostranslate.tags.ITag: Argos Translate ITag tree
+        """
+        if isinstance(soup, bs4.element.NavigableString):
+            return str(soup)
+        translatable = self.is_translatable(soup)
+        to_return = Tag([self.itag_of_soup(content) for content in soup.contents], translatable)
+        to_return.soup = soup
+        return to_return
+    def soup_of_itag(self, itag: ITag):
+        """Returns a BeautifulSoup object from an Argos Translate ITag.
+        Args:
+            itag (argostranslate.tags.ITag): ITag object to convert to Soup
+        Returns:
+            bs4.elements.BeautifulSoup: BeautifulSoup object
+        """
+        if type(itag) == str:
+            return bs4.element.NavigableString(itag)
+        soup = itag.soup
+        soup.contents = [self.soup_of_itag(child) for child in itag.children]
+        return soup

argostranslatefiles/formats/epub.py ADDED Viewed

@@ -0,0 +1,86 @@
+import re
+import zipfile
+import translatehtml
+from argostranslate.tags import translate_tags
+from argostranslate.translate import ITranslation
+from bs4 import BeautifulSoup
+from argostranslatefiles.formats.abstract_xml import AbstractXml
+class Epub(AbstractXml):
+    supported_file_extensions = ['.epub']
+    def is_translatable(self, soup):
+        return soup.text != ""
+    def translate(self, underlying_translation: ITranslation, file_path: str):
+        outzip_path = self.get_output_path(underlying_translation, file_path)
+        inzip = zipfile.ZipFile(file_path, "r")
+        outzip = zipfile.ZipFile(outzip_path, "w")
+        for inzipinfo in inzip.infolist():
+            with inzip.open(inzipinfo) as infile:
+                translatable_xml_filenames = ["OPS/content.opf", "OPS/toc.ncx", "OEBPS/content.opf", "OEBPS/toc.ncx"]
+                if inzipinfo.filename in translatable_xml_filenames:
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    itag = self.itag_of_soup(soup)
+                    translated_tag = translate_tags(underlying_translation, itag)
+                    translated_soup = self.soup_of_itag(translated_tag)
+                    outzip.writestr(inzipinfo.filename, str(translated_soup))
+                elif inzipinfo.filename.endswith('.html') or inzipinfo.filename.endswith('.xhtml'):
+                    head = '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
+                    content = str(infile.read(), 'utf-8')
+                    head_present = content.startswith(head)
+                    if head_present:
+                        content = content[len(head):]
+                    translated = str(translatehtml.translate_html(underlying_translation, content))
+                    if head_present:
+                        translated = str(head) + translated
+                    outzip.writestr(inzipinfo.filename, translated)
+                else:
+                    outzip.writestr(inzipinfo.filename, infile.read())
+        inzip.close()
+        outzip.close()
+        return outzip_path
+    def get_texts(self, file_path: str):
+        inzip = zipfile.ZipFile(file_path, "r")
+        texts = ""
+        for inzipinfo in inzip.infolist():
+            if len(texts) > 4096:
+                break
+            with inzip.open(inzipinfo) as infile:
+                translatable_xml_filenames = ["OPS/content.opf", "OPS/toc.ncx", "OEBPS/content.opf", "OEBPS/toc.ncx"]
+                if inzipinfo.filename in translatable_xml_filenames:
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    texts += self.itag_of_soup(soup).text()
+                elif inzipinfo.filename.endswith('.html') or inzipinfo.filename.endswith('.xhtml'):
+                    head = '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
+                    content = str(infile.read(), 'utf-8')
+                    head_present = content.startswith(head)
+                    if head_present:
+                        content = content[len(head):]
+                    texts += content
+                else:
+                    texts += infile.read().decode()
+        inzip.close()
+        return texts[:4096]

argostranslatefiles/formats/html.py ADDED Viewed

@@ -0,0 +1,43 @@
+import translatehtml
+from argostranslate.translate import ITranslation
+from argostranslatefiles.abstract_file import AbstractFile
+from bs4 import BeautifulSoup
+class Html(AbstractFile):
+    supported_file_extensions = ['.html']
+    def translate(self, underlying_translation: ITranslation, file_path: str):
+        outfile_path = self.get_output_path(underlying_translation, file_path)
+        infile = open(file_path, "r")
+        outfile = open(outfile_path, "w")
+        content = infile.read()
+        head = '<!DOCTYPE html>'
+        head_present = content.startswith(head)
+        if head_present:
+            content = content[len(head):]
+        translated = str(translatehtml.translate_html(underlying_translation, content))
+        if head_present:
+            translated = str(head) + translated
+        outfile.write(translated)
+        infile.close()
+        outfile.close()
+        return outfile_path
+    def get_texts(self, file_path: str):
+        infile = open(file_path, "r")
+        content = infile.read()
+        soup = BeautifulSoup(content, "html.parser")
+        return translatehtml.itag_of_soup(soup).text()[0:4096]

argostranslatefiles/formats/opendocument/__init__.py ADDED Viewed

File without changes

argostranslatefiles/formats/opendocument/odp.py ADDED Viewed

@@ -0,0 +1,5 @@
+from argostranslatefiles.formats.opendocument.odt import Odt
+class Odp(Odt):
+    supported_file_extensions = ['.odp']

argostranslatefiles/formats/opendocument/odt.py ADDED Viewed

@@ -0,0 +1,52 @@
+import zipfile
+from argostranslate.tags import translate_tags
+from argostranslate.translate import ITranslation
+from bs4 import BeautifulSoup
+from argostranslatefiles.formats.abstract_xml import AbstractXml
+class Odt(AbstractXml):
+    supported_file_extensions = ['.odt']
+    def translate(self, underlying_translation: ITranslation, file_path: str):
+        outzip_path = self.get_output_path(underlying_translation, file_path)
+        inzip = zipfile.ZipFile(file_path, "r")
+        outzip = zipfile.ZipFile(outzip_path, "w")
+        for inzipinfo in inzip.infolist():
+            with inzip.open(inzipinfo) as infile:
+                if inzipinfo.filename == "content.xml":
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    itag = self.itag_of_soup(soup)
+                    translated_tag = translate_tags(underlying_translation, itag)
+                    translated_soup = self.soup_of_itag(translated_tag)
+                    outzip.writestr(inzipinfo.filename, str(translated_soup))
+                else:
+                    outzip.writestr(inzipinfo.filename, infile.read())
+        inzip.close()
+        outzip.close()
+        return outzip_path
+    def get_texts(self, file_path: str):
+        inzip = zipfile.ZipFile(file_path, "r")
+        texts = ""
+        for inzipinfo in inzip.infolist():
+            if len(texts) > 4096:
+                break
+            with inzip.open(inzipinfo) as infile:
+                if inzipinfo.filename == "content.xml":
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    texts += self.itag_of_soup(soup).text()
+        inzip.close()
+        return texts[:4096]

argostranslatefiles/formats/openxml/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

argostranslatefiles/formats/openxml/docx.py ADDED Viewed

@@ -0,0 +1,58 @@
+import zipfile
+from argostranslate.tags import translate_tags
+from argostranslate.translate import ITranslation
+from bs4 import BeautifulSoup
+from argostranslatefiles.formats.abstract_xml import AbstractXml
+class Docx(AbstractXml):
+    supported_file_extensions = ['.docx']
+    def translate(self, underlying_translation: ITranslation, file_path: str):
+        outzip_path = self.get_output_path(underlying_translation, file_path)
+        inzip = zipfile.ZipFile(file_path, "r")
+        outzip = zipfile.ZipFile(outzip_path, "w")
+        for inzipinfo in inzip.infolist():
+            with inzip.open(inzipinfo) as infile:
+                if (inzipinfo.filename == "word/document.xml" or
+                    inzipinfo.filename.startswith("word/header") or
+                    inzipinfo.filename.startswith("word/footer")):
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    itag = self.itag_of_soup(soup)
+                    translated_tag = translate_tags(underlying_translation, itag)
+                    translated_soup = self.soup_of_itag(translated_tag)
+                    outzip.writestr(inzipinfo.filename, str(translated_soup))
+                else:
+                    outzip.writestr(inzipinfo.filename, infile.read())
+        inzip.close()
+        outzip.close()
+        return outzip_path
+    def get_texts(self, file_path: str):
+        inzip = zipfile.ZipFile(file_path, "r")
+        texts = ""
+        for inzipinfo in inzip.infolist():
+            if len(texts) > 4096:
+                break
+            with inzip.open(inzipinfo) as infile:
+                if (inzipinfo.filename == "word/document.xml" or
+                    inzipinfo.filename.startswith("word/header") or
+                    inzipinfo.filename.startswith("word/footer")):
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    texts += self.itag_of_soup(soup).text()
+        inzip.close()
+        return texts[:4096]

argostranslatefiles/formats/openxml/pptx.py ADDED Viewed

@@ -0,0 +1,53 @@
+import re
+import zipfile
+from argostranslate.tags import translate_tags
+from argostranslate.translate import ITranslation
+from bs4 import BeautifulSoup
+from argostranslatefiles.formats.abstract_xml import AbstractXml
+class Pptx(AbstractXml):
+    supported_file_extensions = ['.pptx']
+    def translate(self, underlying_translation: ITranslation, file_path: str):
+        outzip_path = self.get_output_path(underlying_translation, file_path)
+        inzip = zipfile.ZipFile(file_path, "r")
+        outzip = zipfile.ZipFile(outzip_path, "w")
+        for inzipinfo in inzip.infolist():
+            with inzip.open(inzipinfo) as infile:
+                if re.match(r"ppt\/slides\/slide[0-9]*\.xml", inzipinfo.filename):
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    itag = self.itag_of_soup(soup)
+                    translated_tag = translate_tags(underlying_translation, itag)
+                    translated_soup = self.soup_of_itag(translated_tag)
+                    outzip.writestr(inzipinfo.filename, str(translated_soup))
+                else:
+                    outzip.writestr(inzipinfo.filename, infile.read())
+        inzip.close()
+        outzip.close()
+        return outzip_path
+    def get_texts(self, file_path: str):
+        inzip = zipfile.ZipFile(file_path, "r")
+        texts = ""
+        for inzipinfo in inzip.infolist():
+            if len(texts) > 4096:
+                break
+            with inzip.open(inzipinfo) as infile:
+                if re.match(r"ppt\/slides\/slide[0-9]*\.xml", inzipinfo.filename):
+                    soup = BeautifulSoup(infile.read(), 'xml')
+                    texts += self.itag_of_soup(soup).text()
+        inzip.close()
+        return texts[:4096]