argos-translate-files-main 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ import abc
2
+ import os.path
3
+
4
+ from argostranslate.translate import ITranslation
5
+
6
+
7
+ class AbstractFile():
8
+ supported_file_extensions = []
9
+
10
+ def support(self, file_path: str):
11
+ file_ext = os.path.splitext(file_path)[1]
12
+
13
+ return file_ext in self.supported_file_extensions
14
+
15
+ def get_output_path(self, underlying_translation: ITranslation, file_path: str):
16
+ dir_path = os.path.dirname(file_path)
17
+ file_name, file_ext = os.path.splitext(os.path.basename(file_path))
18
+ to_code = underlying_translation.to_lang.code
19
+
20
+ return dir_path + "/" + file_name + '_' + to_code + file_ext
21
+
22
+ @abc.abstractmethod
23
+ def translate(self, underlying_translation: ITranslation, file_path: str): raise NotImplementedError
24
+
25
+ @abc.abstractmethod
26
+ def get_texts(self, file_path: str): raise NotImplementedError
@@ -0,0 +1,59 @@
1
+ from argostranslate.translate import ITranslation
2
+
3
+ from argostranslatefiles.formats.html import Html
4
+ from argostranslatefiles.formats.opendocument.odp import Odp
5
+ from argostranslatefiles.formats.opendocument.odt import Odt
6
+ from argostranslatefiles.formats.openxml.docx import Docx
7
+ from argostranslatefiles.formats.openxml.pptx import Pptx
8
+ from argostranslatefiles.formats.txt import Txt
9
+ from argostranslatefiles.formats.epub import Epub
10
+ from argostranslatefiles.formats.srt import Srt
11
+ from argostranslatefiles.formats.pdf import Pdf
12
+
13
+ def get_supported_formats():
14
+ return [
15
+ Txt(),
16
+ Odt(),
17
+ Odp(),
18
+ Docx(),
19
+ Pptx(),
20
+ Epub(),
21
+ Html(),
22
+ Srt(),
23
+ Pdf(),
24
+ ]
25
+
26
+
27
+ def translate_file(underlying_translation: ITranslation, file_path: str):
28
+ """Translate a file.
29
+
30
+ Args:
31
+ underlying_translation (argostranslate.translate.ITranslation): Argos Translate Translation
32
+ file_path (str): file path
33
+
34
+ Returns:
35
+ file_path: Translated file
36
+ """
37
+
38
+ for supported_format in get_supported_formats():
39
+ if supported_format.support(file_path):
40
+ return supported_format.translate(underlying_translation, file_path)
41
+
42
+ return False
43
+
44
+
45
+ def get_texts(file_path: str):
46
+ """Get the file contents.
47
+
48
+ Args:
49
+ file_path (str): file path
50
+
51
+ Returns:
52
+ texts: File contents
53
+ """
54
+
55
+ for supported_format in get_supported_formats():
56
+ if supported_format.support(file_path):
57
+ return supported_format.get_texts(file_path)
58
+
59
+ return False
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,39 @@
1
+ import argostranslate
2
+ import bs4
3
+ from argostranslate.tags import Tag, ITag
4
+
5
+ from argostranslatefiles.abstract_file import AbstractFile
6
+
7
+
8
+ class AbstractXml(AbstractFile):
9
+
10
+ def is_translatable(self, soup):
11
+ return soup.text != ""
12
+
13
+ def itag_of_soup(self, soup):
14
+ """Returns an argostranslate.tags.ITag tree from a BeautifulSoup object.
15
+ Args:
16
+ soup (bs4.element.Navigablestring or bs4.element.Tag): Beautiful Soup object
17
+ Returns:
18
+ argostranslate.tags.ITag: Argos Translate ITag tree
19
+ """
20
+ if isinstance(soup, bs4.element.NavigableString):
21
+ return str(soup)
22
+
23
+ translatable = self.is_translatable(soup)
24
+ to_return = Tag([self.itag_of_soup(content) for content in soup.contents], translatable)
25
+ to_return.soup = soup
26
+ return to_return
27
+
28
+ def soup_of_itag(self, itag: ITag):
29
+ """Returns a BeautifulSoup object from an Argos Translate ITag.
30
+ Args:
31
+ itag (argostranslate.tags.ITag): ITag object to convert to Soup
32
+ Returns:
33
+ bs4.elements.BeautifulSoup: BeautifulSoup object
34
+ """
35
+ if type(itag) == str:
36
+ return bs4.element.NavigableString(itag)
37
+ soup = itag.soup
38
+ soup.contents = [self.soup_of_itag(child) for child in itag.children]
39
+ return soup
@@ -0,0 +1,86 @@
1
+ import re
2
+ import zipfile
3
+
4
+ import translatehtml
5
+ from argostranslate.tags import translate_tags
6
+ from argostranslate.translate import ITranslation
7
+ from bs4 import BeautifulSoup
8
+
9
+ from argostranslatefiles.formats.abstract_xml import AbstractXml
10
+
11
+
12
+ class Epub(AbstractXml):
13
+ supported_file_extensions = ['.epub']
14
+
15
+ def is_translatable(self, soup):
16
+ return soup.text != ""
17
+
18
+ def translate(self, underlying_translation: ITranslation, file_path: str):
19
+ outzip_path = self.get_output_path(underlying_translation, file_path)
20
+
21
+ inzip = zipfile.ZipFile(file_path, "r")
22
+ outzip = zipfile.ZipFile(outzip_path, "w")
23
+
24
+
25
+ for inzipinfo in inzip.infolist():
26
+ with inzip.open(inzipinfo) as infile:
27
+ translatable_xml_filenames = ["OPS/content.opf", "OPS/toc.ncx", "OEBPS/content.opf", "OEBPS/toc.ncx"]
28
+ if inzipinfo.filename in translatable_xml_filenames:
29
+ soup = BeautifulSoup(infile.read(), 'xml')
30
+
31
+ itag = self.itag_of_soup(soup)
32
+ translated_tag = translate_tags(underlying_translation, itag)
33
+ translated_soup = self.soup_of_itag(translated_tag)
34
+
35
+ outzip.writestr(inzipinfo.filename, str(translated_soup))
36
+ elif inzipinfo.filename.endswith('.html') or inzipinfo.filename.endswith('.xhtml'):
37
+ head = '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
38
+ content = str(infile.read(), 'utf-8')
39
+ head_present = content.startswith(head)
40
+
41
+ if head_present:
42
+ content = content[len(head):]
43
+
44
+ translated = str(translatehtml.translate_html(underlying_translation, content))
45
+
46
+ if head_present:
47
+ translated = str(head) + translated
48
+
49
+ outzip.writestr(inzipinfo.filename, translated)
50
+ else:
51
+ outzip.writestr(inzipinfo.filename, infile.read())
52
+
53
+ inzip.close()
54
+ outzip.close()
55
+
56
+ return outzip_path
57
+
58
+ def get_texts(self, file_path: str):
59
+ inzip = zipfile.ZipFile(file_path, "r")
60
+
61
+ texts = ""
62
+
63
+ for inzipinfo in inzip.infolist():
64
+ if len(texts) > 4096:
65
+ break
66
+ with inzip.open(inzipinfo) as infile:
67
+ translatable_xml_filenames = ["OPS/content.opf", "OPS/toc.ncx", "OEBPS/content.opf", "OEBPS/toc.ncx"]
68
+ if inzipinfo.filename in translatable_xml_filenames:
69
+ soup = BeautifulSoup(infile.read(), 'xml')
70
+
71
+ texts += self.itag_of_soup(soup).text()
72
+ elif inzipinfo.filename.endswith('.html') or inzipinfo.filename.endswith('.xhtml'):
73
+ head = '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
74
+ content = str(infile.read(), 'utf-8')
75
+ head_present = content.startswith(head)
76
+
77
+ if head_present:
78
+ content = content[len(head):]
79
+
80
+ texts += content
81
+ else:
82
+ texts += infile.read().decode()
83
+
84
+ inzip.close()
85
+
86
+ return texts[:4096]
@@ -0,0 +1,43 @@
1
+ import translatehtml
2
+ from argostranslate.translate import ITranslation
3
+
4
+ from argostranslatefiles.abstract_file import AbstractFile
5
+ from bs4 import BeautifulSoup
6
+
7
+
8
+ class Html(AbstractFile):
9
+ supported_file_extensions = ['.html']
10
+
11
+ def translate(self, underlying_translation: ITranslation, file_path: str):
12
+ outfile_path = self.get_output_path(underlying_translation, file_path)
13
+
14
+ infile = open(file_path, "r")
15
+ outfile = open(outfile_path, "w")
16
+
17
+ content = infile.read()
18
+
19
+ head = '<!DOCTYPE html>'
20
+ head_present = content.startswith(head)
21
+
22
+ if head_present:
23
+ content = content[len(head):]
24
+
25
+ translated = str(translatehtml.translate_html(underlying_translation, content))
26
+
27
+ if head_present:
28
+ translated = str(head) + translated
29
+
30
+ outfile.write(translated)
31
+
32
+ infile.close()
33
+ outfile.close()
34
+
35
+ return outfile_path
36
+
37
+ def get_texts(self, file_path: str):
38
+ infile = open(file_path, "r")
39
+
40
+ content = infile.read()
41
+
42
+ soup = BeautifulSoup(content, "html.parser")
43
+ return translatehtml.itag_of_soup(soup).text()[0:4096]
File without changes
@@ -0,0 +1,5 @@
1
+ from argostranslatefiles.formats.opendocument.odt import Odt
2
+
3
+
4
+ class Odp(Odt):
5
+ supported_file_extensions = ['.odp']
@@ -0,0 +1,52 @@
1
+ import zipfile
2
+
3
+ from argostranslate.tags import translate_tags
4
+ from argostranslate.translate import ITranslation
5
+ from bs4 import BeautifulSoup
6
+
7
+ from argostranslatefiles.formats.abstract_xml import AbstractXml
8
+
9
+
10
+ class Odt(AbstractXml):
11
+ supported_file_extensions = ['.odt']
12
+
13
+ def translate(self, underlying_translation: ITranslation, file_path: str):
14
+ outzip_path = self.get_output_path(underlying_translation, file_path)
15
+
16
+ inzip = zipfile.ZipFile(file_path, "r")
17
+ outzip = zipfile.ZipFile(outzip_path, "w")
18
+
19
+ for inzipinfo in inzip.infolist():
20
+ with inzip.open(inzipinfo) as infile:
21
+ if inzipinfo.filename == "content.xml":
22
+ soup = BeautifulSoup(infile.read(), 'xml')
23
+
24
+ itag = self.itag_of_soup(soup)
25
+ translated_tag = translate_tags(underlying_translation, itag)
26
+ translated_soup = self.soup_of_itag(translated_tag)
27
+
28
+ outzip.writestr(inzipinfo.filename, str(translated_soup))
29
+ else:
30
+ outzip.writestr(inzipinfo.filename, infile.read())
31
+
32
+ inzip.close()
33
+ outzip.close()
34
+
35
+ return outzip_path
36
+
37
+ def get_texts(self, file_path: str):
38
+ inzip = zipfile.ZipFile(file_path, "r")
39
+
40
+ texts = ""
41
+
42
+ for inzipinfo in inzip.infolist():
43
+ if len(texts) > 4096:
44
+ break
45
+ with inzip.open(inzipinfo) as infile:
46
+ if inzipinfo.filename == "content.xml":
47
+ soup = BeautifulSoup(infile.read(), 'xml')
48
+ texts += self.itag_of_soup(soup).text()
49
+
50
+ inzip.close()
51
+
52
+ return texts[:4096]
@@ -0,0 +1,58 @@
1
+ import zipfile
2
+
3
+ from argostranslate.tags import translate_tags
4
+ from argostranslate.translate import ITranslation
5
+ from bs4 import BeautifulSoup
6
+
7
+ from argostranslatefiles.formats.abstract_xml import AbstractXml
8
+
9
+
10
+ class Docx(AbstractXml):
11
+ supported_file_extensions = ['.docx']
12
+
13
+ def translate(self, underlying_translation: ITranslation, file_path: str):
14
+ outzip_path = self.get_output_path(underlying_translation, file_path)
15
+
16
+ inzip = zipfile.ZipFile(file_path, "r")
17
+ outzip = zipfile.ZipFile(outzip_path, "w")
18
+
19
+ for inzipinfo in inzip.infolist():
20
+ with inzip.open(inzipinfo) as infile:
21
+ if (inzipinfo.filename == "word/document.xml" or
22
+ inzipinfo.filename.startswith("word/header") or
23
+ inzipinfo.filename.startswith("word/footer")):
24
+
25
+ soup = BeautifulSoup(infile.read(), 'xml')
26
+
27
+ itag = self.itag_of_soup(soup)
28
+ translated_tag = translate_tags(underlying_translation, itag)
29
+ translated_soup = self.soup_of_itag(translated_tag)
30
+
31
+ outzip.writestr(inzipinfo.filename, str(translated_soup))
32
+ else:
33
+ outzip.writestr(inzipinfo.filename, infile.read())
34
+
35
+ inzip.close()
36
+ outzip.close()
37
+
38
+ return outzip_path
39
+
40
+ def get_texts(self, file_path: str):
41
+ inzip = zipfile.ZipFile(file_path, "r")
42
+
43
+ texts = ""
44
+
45
+ for inzipinfo in inzip.infolist():
46
+ if len(texts) > 4096:
47
+ break
48
+ with inzip.open(inzipinfo) as infile:
49
+ if (inzipinfo.filename == "word/document.xml" or
50
+ inzipinfo.filename.startswith("word/header") or
51
+ inzipinfo.filename.startswith("word/footer")):
52
+
53
+ soup = BeautifulSoup(infile.read(), 'xml')
54
+ texts += self.itag_of_soup(soup).text()
55
+
56
+ inzip.close()
57
+
58
+ return texts[:4096]
@@ -0,0 +1,53 @@
1
+ import re
2
+ import zipfile
3
+
4
+ from argostranslate.tags import translate_tags
5
+ from argostranslate.translate import ITranslation
6
+ from bs4 import BeautifulSoup
7
+
8
+ from argostranslatefiles.formats.abstract_xml import AbstractXml
9
+
10
+
11
+ class Pptx(AbstractXml):
12
+ supported_file_extensions = ['.pptx']
13
+
14
+ def translate(self, underlying_translation: ITranslation, file_path: str):
15
+ outzip_path = self.get_output_path(underlying_translation, file_path)
16
+
17
+ inzip = zipfile.ZipFile(file_path, "r")
18
+ outzip = zipfile.ZipFile(outzip_path, "w")
19
+
20
+ for inzipinfo in inzip.infolist():
21
+ with inzip.open(inzipinfo) as infile:
22
+ if re.match(r"ppt\/slides\/slide[0-9]*\.xml", inzipinfo.filename):
23
+ soup = BeautifulSoup(infile.read(), 'xml')
24
+
25
+ itag = self.itag_of_soup(soup)
26
+ translated_tag = translate_tags(underlying_translation, itag)
27
+ translated_soup = self.soup_of_itag(translated_tag)
28
+
29
+ outzip.writestr(inzipinfo.filename, str(translated_soup))
30
+ else:
31
+ outzip.writestr(inzipinfo.filename, infile.read())
32
+
33
+ inzip.close()
34
+ outzip.close()
35
+
36
+ return outzip_path
37
+
38
+ def get_texts(self, file_path: str):
39
+ inzip = zipfile.ZipFile(file_path, "r")
40
+
41
+ texts = ""
42
+
43
+ for inzipinfo in inzip.infolist():
44
+ if len(texts) > 4096:
45
+ break
46
+ with inzip.open(inzipinfo) as infile:
47
+ if re.match(r"ppt\/slides\/slide[0-9]*\.xml", inzipinfo.filename):
48
+ soup = BeautifulSoup(infile.read(), 'xml')
49
+ texts += self.itag_of_soup(soup).text()
50
+
51
+ inzip.close()
52
+
53
+ return texts[:4096]