docs-html-konwerter 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs_html_konwerter-0.1.0/PKG-INFO +6 -0
- docs_html_konwerter-0.1.0/README.md +1 -0
- docs_html_konwerter-0.1.0/pyproject.toml +17 -0
- docs_html_konwerter-0.1.0/setup.cfg +4 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter/__init__.py +5 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter/doc_konwerter.py +85 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter.egg-info/PKG-INFO +6 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter.egg-info/SOURCES.txt +10 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter.egg-info/dependency_links.txt +1 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter.egg-info/requires.txt +1 -0
- docs_html_konwerter-0.1.0/src/docs_html_konwerter.egg-info/top_level.txt +1 -0
- docs_html_konwerter-0.1.0/tests/test1.py +128 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# docs-html-konwerter
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "docs-html-konwerter"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"xml-konwerter",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[project.urls]
|
|
14
|
+
Repository = "https://github.com/stanislawbartkowski/docs-html-konwerter"
|
|
15
|
+
|
|
16
|
+
[tool.setuptools.packages.find]
|
|
17
|
+
where = ["src"]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from tempfile import NamedTemporaryFile
|
|
5
|
+
|
|
6
|
+
import xml.etree.ElementTree as et
|
|
7
|
+
|
|
8
|
+
from xml_konwerter import konwertujdok
|
|
9
|
+
|
|
10
|
+
_LINIE = ""
|
|
11
|
+
_LINIE1 = "1"
|
|
12
|
+
_LINIE2 = "2"
|
|
13
|
+
_LINIE3 = "3"
|
|
14
|
+
_LINIE4 = "4"
|
|
15
|
+
_LINIETPOD = "TPOD"
|
|
16
|
+
_LINIETNAD = "TNAD"
|
|
17
|
+
|
|
18
|
+
_LISTA = "linie"
|
|
19
|
+
_LISTA1 = "linie1"
|
|
20
|
+
_LISTA2 = "linie2"
|
|
21
|
+
_LISTA3 = "linie3"
|
|
22
|
+
_LISTA4 = "linie4"
|
|
23
|
+
_LISTATPOD = "linietpod"
|
|
24
|
+
_LISTATNAD = "linietnad"
|
|
25
|
+
|
|
26
|
+
_LI_CH = ["nbsp", "oacute", "hellip", "Oacute"]
|
|
27
|
+
|
|
28
|
+
_htmlkeypairing = [
|
|
29
|
+
(_LINIE, _LISTA),
|
|
30
|
+
(_LINIE1, _LISTA1),
|
|
31
|
+
(_LINIE2, _LISTA2),
|
|
32
|
+
(_LINIE3, _LISTA3),
|
|
33
|
+
(_LINIE4, _LISTA4),
|
|
34
|
+
(_LINIETPOD, _LISTATPOD),
|
|
35
|
+
(_LINIETNAD, _LISTATNAD)
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _replace_ch(tname, ch):
|
|
40
|
+
command = ["sed", "-i", "-e", f's/\&{ch};/{ch};/g', tname]
|
|
41
|
+
subprocess.call(command)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _replace_ch_back(tname, ch):
|
|
45
|
+
command = ["sed", "-i", "-e", f's/{ch};/\&{ch};/g', tname]
|
|
46
|
+
subprocess.call(command)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _replace_tag(tname, tag):
|
|
50
|
+
pars = f's/<{tag}\([^>]*\)>/<{tag}\\1\\/>/g'
|
|
51
|
+
command = ["sed", "-i", "-e", pars, tname]
|
|
52
|
+
subprocess.call(command)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _replace_page_break(tname):
|
|
56
|
+
before = "page-break-before:always;display:none;"
|
|
57
|
+
after = "page-break-before:always;"
|
|
58
|
+
pars = f's/{before}/{after}/g'
|
|
59
|
+
command = ["sed", "-i", "-e", pars, tname]
|
|
60
|
+
subprocess.call(command)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _remove_meta(htmltemplate, tname):
|
|
64
|
+
shutil.copyfile(htmltemplate, tname)
|
|
65
|
+
_replace_tag(tname, "meta")
|
|
66
|
+
_replace_tag(tname, "img")
|
|
67
|
+
_replace_tag(tname, "hr")
|
|
68
|
+
_replace_page_break(tname)
|
|
69
|
+
for ch in _LI_CH:
|
|
70
|
+
_replace_ch(tname, ch)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _restore_nbsp(output):
|
|
74
|
+
for ch in _LI_CH:
|
|
75
|
+
_replace_ch_back(output, ch)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parsehtml(htmltemplate: str, outputdir: str, outputhtml: str, d: dict):
|
|
79
|
+
with NamedTemporaryFile() as tfile:
|
|
80
|
+
tname = tfile.name
|
|
81
|
+
_remove_meta(htmltemplate, tname)
|
|
82
|
+
htmloutputfile = os.path.join(outputdir, outputhtml)
|
|
83
|
+
konwertujdok(tname, htmloutputfile, d,
|
|
84
|
+
htmlkeypairing=_htmlkeypairing)
|
|
85
|
+
_restore_nbsp(htmloutputfile)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/docs_html_konwerter/__init__.py
|
|
4
|
+
src/docs_html_konwerter/doc_konwerter.py
|
|
5
|
+
src/docs_html_konwerter.egg-info/PKG-INFO
|
|
6
|
+
src/docs_html_konwerter.egg-info/SOURCES.txt
|
|
7
|
+
src/docs_html_konwerter.egg-info/dependency_links.txt
|
|
8
|
+
src/docs_html_konwerter.egg-info/requires.txt
|
|
9
|
+
src/docs_html_konwerter.egg-info/top_level.txt
|
|
10
|
+
tests/test1.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
xml-konwerter
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
docs_html_konwerter
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
|
|
4
|
+
import helper as H
|
|
5
|
+
|
|
6
|
+
from docs_html_konwerter import parsehtml
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_TESTDOKUMENT = "TestDokument1.html"
|
|
10
|
+
_DOKUMENTPROSTEPOWT = "ProstePowtorz1.html"
|
|
11
|
+
_TESTDOKUMENT2 = "TestDokument2.html"
|
|
12
|
+
_TESTDOKUMENT3 = "TestDokument3.html"
|
|
13
|
+
_TDOKUMENT = "TestDokumentTLinie.html"
|
|
14
|
+
_HELLOPAGE = "TestHelloPage.html"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestHtmlKonwerter(unittest.TestCase):
|
|
18
|
+
|
|
19
|
+
def _prosty_html(self, htmltemplate, lista=False):
|
|
20
|
+
htmlfile = H.html_file(htmltemplate)
|
|
21
|
+
print(htmlfile)
|
|
22
|
+
outputdir = H.tmpdir()
|
|
23
|
+
liniepod = [{"LPOD": f"To jest linia pod o numerz {no}"}
|
|
24
|
+
for no in range(100)]
|
|
25
|
+
d = {
|
|
26
|
+
"IMIE": "Juliusz",
|
|
27
|
+
"NAZWISKO": "Cezar",
|
|
28
|
+
"linietpod": liniepod,
|
|
29
|
+
"linie": [
|
|
30
|
+
{
|
|
31
|
+
"NAZWA": "Artykuł",
|
|
32
|
+
"KWOTA": "999 USD"
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
}
|
|
37
|
+
parsehtml(htmltemplate=htmlfile, outputdir=outputdir,
|
|
38
|
+
outputhtml=htmltemplate, d=d)
|
|
39
|
+
outfile = os.path.join(outputdir, htmltemplate)
|
|
40
|
+
with open(outfile) as f:
|
|
41
|
+
xml = f.read()
|
|
42
|
+
print(xml)
|
|
43
|
+
self.assertIn("Juliusz", xml)
|
|
44
|
+
self.assertIn("Cezar", xml)
|
|
45
|
+
if lista:
|
|
46
|
+
self.assertIn("Artyku", xml)
|
|
47
|
+
self.assertIn("999 USD", xml)
|
|
48
|
+
return xml
|
|
49
|
+
|
|
50
|
+
def test_proste(self):
|
|
51
|
+
self._prosty_html(_TESTDOKUMENT)
|
|
52
|
+
|
|
53
|
+
def test_proste2(self):
|
|
54
|
+
self._prosty_html(_TESTDOKUMENT2, lista=True)
|
|
55
|
+
|
|
56
|
+
def test_proste3(self):
|
|
57
|
+
self._prosty_html(_TESTDOKUMENT3, lista=True)
|
|
58
|
+
|
|
59
|
+
def test_proste4(self):
|
|
60
|
+
xml = self._prosty_html(_TDOKUMENT, lista=True)
|
|
61
|
+
self.assertIn("To jest linia pod o numerz 99", xml)
|
|
62
|
+
|
|
63
|
+
def test_proste_linie(self):
|
|
64
|
+
htmlfile = H.html_file(_DOKUMENTPROSTEPOWT)
|
|
65
|
+
print(htmlfile)
|
|
66
|
+
outputdir = H.tmpdir()
|
|
67
|
+
d = {
|
|
68
|
+
"linie": [
|
|
69
|
+
{
|
|
70
|
+
"IMIE": "Juliusz",
|
|
71
|
+
"NAZWISKO": "Cezar",
|
|
72
|
+
"linie": [
|
|
73
|
+
{
|
|
74
|
+
"NAZWA": "Artykuł",
|
|
75
|
+
"KWOTA": "999 USD"
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
parsehtml(htmltemplate=htmlfile, outputdir=outputdir,
|
|
82
|
+
outputhtml=_DOKUMENTPROSTEPOWT, d=d)
|
|
83
|
+
outfile = os.path.join(outputdir, _DOKUMENTPROSTEPOWT)
|
|
84
|
+
with open(outfile) as f:
|
|
85
|
+
xml = f.read()
|
|
86
|
+
print(xml)
|
|
87
|
+
self.assertIn("Juliusz", xml)
|
|
88
|
+
self.assertIn("Cezar", xml)
|
|
89
|
+
self.assertIn("Artyku", xml)
|
|
90
|
+
self.assertIn("999 USD", xml)
|
|
91
|
+
|
|
92
|
+
def test_proste_linie_2(self):
|
|
93
|
+
htmlfile = H.html_file(_DOKUMENTPROSTEPOWT)
|
|
94
|
+
print(htmlfile)
|
|
95
|
+
outputdir = H.tmpdir()
|
|
96
|
+
linie = [
|
|
97
|
+
{
|
|
98
|
+
"NAZWA": "Artykuł",
|
|
99
|
+
"KWOTA": "999 USD"
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"NAZWA": "Rower",
|
|
103
|
+
"KWOTA": "888 USD"
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
d = {
|
|
108
|
+
"linie": [{"IMIE": f"Juliusz {no}", "NAZWISKO": f"Cezar {no}", "linie": linie} for no in range(100)]
|
|
109
|
+
}
|
|
110
|
+
parsehtml(htmltemplate=htmlfile, outputdir=outputdir,
|
|
111
|
+
outputhtml=_DOKUMENTPROSTEPOWT, d=d)
|
|
112
|
+
outfile = os.path.join(outputdir, _DOKUMENTPROSTEPOWT)
|
|
113
|
+
with open(outfile) as f:
|
|
114
|
+
xml = f.read()
|
|
115
|
+
print(xml)
|
|
116
|
+
self.assertIn("Juliusz 0", xml)
|
|
117
|
+
self.assertIn("Cezar", xml)
|
|
118
|
+
self.assertIn("Artyku", xml)
|
|
119
|
+
self.assertIn("999 USD", xml)
|
|
120
|
+
self.assertIn("Juliusz 99", xml)
|
|
121
|
+
|
|
122
|
+
def test_hello_page(self):
|
|
123
|
+
htmlfile = H.html_file(_HELLOPAGE)
|
|
124
|
+
print(htmlfile)
|
|
125
|
+
outputdir = H.tmpdir()
|
|
126
|
+
parsehtml(htmltemplate=htmlfile, outputdir=outputdir,
|
|
127
|
+
outputhtml=_HELLOPAGE, d={})
|
|
128
|
+
# tylko test, ze się nie wywala
|