chgksuite 0.27.0__py3-none-any.whl → 0.27.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chgksuite/cli.py +0 -9
- chgksuite/common.py +3 -2
- chgksuite/composer/__init__.py +7 -9
- chgksuite/composer/chgksuite_parser.py +7 -16
- chgksuite/composer/db.py +2 -1
- chgksuite/composer/docx.py +1 -1
- chgksuite/composer/latex.py +2 -1
- chgksuite/composer/lj.py +2 -1
- chgksuite/composer/openquiz.py +2 -1
- chgksuite/composer/{markdown.py → reddit.py} +25 -35
- chgksuite/handouter/installer.py +0 -0
- chgksuite/handouter/runner.py +6 -224
- chgksuite/handouter/tex_internals.py +2 -12
- chgksuite/parser.py +24 -26
- chgksuite/parser_db.py +2 -1
- chgksuite/trello.py +8 -7
- chgksuite/typotools.py +4 -4
- chgksuite/version.py +1 -1
- {chgksuite-0.27.0.dist-info → chgksuite-0.27.0b3.dist-info}/METADATA +8 -6
- {chgksuite-0.27.0.dist-info → chgksuite-0.27.0b3.dist-info}/RECORD +23 -24
- {chgksuite-0.27.0.dist-info → chgksuite-0.27.0b3.dist-info}/WHEEL +2 -1
- chgksuite-0.27.0b3.dist-info/top_level.txt +1 -0
- chgksuite/_html2md.py +0 -90
- chgksuite/lastdir +0 -1
- {chgksuite-0.27.0.dist-info → chgksuite-0.27.0b3.dist-info}/entry_points.txt +0 -0
- {chgksuite-0.27.0.dist-info → chgksuite-0.27.0b3.dist-info}/licenses/LICENSE +0 -0
chgksuite/cli.py
CHANGED
|
@@ -596,15 +596,6 @@ class ArgparseBuilder:
|
|
|
596
596
|
caption="Имя 4s-файла",
|
|
597
597
|
filetypes=[("chgksuite markup files", "*.4s")],
|
|
598
598
|
)
|
|
599
|
-
cmdcompose_markdown = cmdcompose_filetype.add_parser("markdown")
|
|
600
|
-
self.add_argument(
|
|
601
|
-
cmdcompose_markdown,
|
|
602
|
-
"filename",
|
|
603
|
-
nargs="*",
|
|
604
|
-
help="file(s) to compose from.",
|
|
605
|
-
caption="Имя 4s-файла",
|
|
606
|
-
filetypes=[("chgksuite markup files", "*.4s")],
|
|
607
|
-
)
|
|
608
599
|
cmdcompose_pptx = cmdcompose_filetype.add_parser("pptx")
|
|
609
600
|
self.add_argument(
|
|
610
601
|
cmdcompose_pptx,
|
chgksuite/common.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
import argparse
|
|
4
|
+
import codecs
|
|
4
5
|
import csv
|
|
5
6
|
import itertools
|
|
6
7
|
import json
|
|
@@ -113,7 +114,7 @@ class DefaultArgs:
|
|
|
113
114
|
def set_lastdir(path):
|
|
114
115
|
chgksuite_dir = get_chgksuite_dir()
|
|
115
116
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
116
|
-
with open(lastdir, "w",
|
|
117
|
+
with codecs.open(lastdir, "w", "utf8") as f:
|
|
117
118
|
f.write(path)
|
|
118
119
|
|
|
119
120
|
|
|
@@ -121,7 +122,7 @@ def get_lastdir():
|
|
|
121
122
|
chgksuite_dir = get_chgksuite_dir()
|
|
122
123
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
123
124
|
if os.path.isfile(lastdir):
|
|
124
|
-
with open(lastdir, "r",
|
|
125
|
+
with codecs.open(lastdir, "r", "utf8") as f:
|
|
125
126
|
return f.read().rstrip()
|
|
126
127
|
return "."
|
|
127
128
|
|
chgksuite/composer/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
+
import codecs
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import shutil
|
|
@@ -21,7 +22,7 @@ from chgksuite.composer.docx import DocxExporter
|
|
|
21
22
|
from chgksuite.composer.latex import LatexExporter
|
|
22
23
|
from chgksuite.composer.lj import LjExporter
|
|
23
24
|
from chgksuite.composer.pptx import PptxExporter
|
|
24
|
-
from chgksuite.composer.
|
|
25
|
+
from chgksuite.composer.reddit import RedditExporter
|
|
25
26
|
from chgksuite.composer.stats import StatsAdder
|
|
26
27
|
from chgksuite.composer.telegram import TelegramExporter
|
|
27
28
|
from chgksuite.composer.openquiz import OpenquizExporter
|
|
@@ -74,13 +75,10 @@ def process_file_wrapper(filename, sourcedir, targetdir, args):
|
|
|
74
75
|
|
|
75
76
|
def parse_filepath(filepath, args=None):
|
|
76
77
|
args = args or DefaultArgs()
|
|
77
|
-
with open(filepath, "r",
|
|
78
|
+
with codecs.open(filepath, "r", "utf8") as input_file:
|
|
78
79
|
input_text = input_file.read()
|
|
79
80
|
input_text = input_text.replace("\r", "")
|
|
80
|
-
|
|
81
|
-
return parse_4s(
|
|
82
|
-
input_text, randomize=args.randomize, debug=args.debug, debug_dir=debug_dir
|
|
83
|
-
)
|
|
81
|
+
return parse_4s(input_text, randomize=args.randomize, debug=args.debug)
|
|
84
82
|
|
|
85
83
|
|
|
86
84
|
def make_merged_filename(filelist):
|
|
@@ -107,7 +105,7 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
107
105
|
targetdir,
|
|
108
106
|
make_filename(os.path.basename(filename), "dbg", args),
|
|
109
107
|
)
|
|
110
|
-
with open(debug_fn, "w",
|
|
108
|
+
with codecs.open(debug_fn, "w", "utf8") as output_file:
|
|
111
109
|
output_file.write(json.dumps(structure, indent=2, ensure_ascii=False))
|
|
112
110
|
|
|
113
111
|
if not args.filetype:
|
|
@@ -148,8 +146,8 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
148
146
|
outfilename = os.path.join(targetdir, make_filename(filename, "txt", args))
|
|
149
147
|
exporter.export(outfilename)
|
|
150
148
|
|
|
151
|
-
if args.filetype
|
|
152
|
-
exporter =
|
|
149
|
+
if args.filetype == "redditmd":
|
|
150
|
+
exporter = RedditExporter(structure, args, dir_kwargs)
|
|
153
151
|
outfilename = os.path.join(targetdir, make_filename(filename, "md", args))
|
|
154
152
|
exporter.export(outfilename)
|
|
155
153
|
|
|
@@ -1,15 +1,9 @@
|
|
|
1
|
-
import
|
|
1
|
+
import codecs
|
|
2
2
|
import random
|
|
3
3
|
import re
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
|
-
from chgksuite.common import
|
|
7
|
-
QUESTION_LABELS,
|
|
8
|
-
check_question,
|
|
9
|
-
get_chgksuite_dir,
|
|
10
|
-
init_logger,
|
|
11
|
-
log_wrap,
|
|
12
|
-
)
|
|
6
|
+
from chgksuite.common import QUESTION_LABELS, check_question, init_logger, log_wrap
|
|
13
7
|
from chgksuite.typotools import remove_excessive_whitespace as rew
|
|
14
8
|
|
|
15
9
|
REQUIRED_LABELS = set(["question", "answer"])
|
|
@@ -95,7 +89,7 @@ def replace_counters(string_):
|
|
|
95
89
|
return string_
|
|
96
90
|
|
|
97
91
|
|
|
98
|
-
def parse_4s(s, randomize=False, debug=False, logger=None
|
|
92
|
+
def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
99
93
|
logger = logger or init_logger("composer")
|
|
100
94
|
mapping = {
|
|
101
95
|
"#": "meta",
|
|
@@ -121,11 +115,8 @@ def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
|
|
|
121
115
|
if s[0] == "\ufeff" and len(s) > 1:
|
|
122
116
|
s = s[1:]
|
|
123
117
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
debug_path = os.path.join(debug_dir, "raw.debug")
|
|
127
|
-
with open(debug_path, "w", encoding="utf-8") as debugf:
|
|
128
|
-
debugf.write(log_wrap(s.split("\n")))
|
|
118
|
+
with codecs.open("raw.debug", "w", "utf8") as debugf:
|
|
119
|
+
debugf.write(log_wrap(s.split("\n")))
|
|
129
120
|
|
|
130
121
|
s = replace_counters(s)
|
|
131
122
|
|
|
@@ -146,7 +137,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
|
|
|
146
137
|
counter = 1
|
|
147
138
|
|
|
148
139
|
if debug:
|
|
149
|
-
with open("debug1st.debug", "w",
|
|
140
|
+
with codecs.open("debug1st.debug", "w", "utf8") as debugf:
|
|
150
141
|
debugf.write(log_wrap(structure))
|
|
151
142
|
|
|
152
143
|
for element in structure:
|
|
@@ -239,7 +230,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
|
|
|
239
230
|
i += 1
|
|
240
231
|
|
|
241
232
|
if debug:
|
|
242
|
-
with open("debug.debug", "w",
|
|
233
|
+
with codecs.open("debug.debug", "w", "utf8") as debugf:
|
|
243
234
|
debugf.write(log_wrap(final_structure))
|
|
244
235
|
|
|
245
236
|
for element in final_structure:
|
chgksuite/composer/db.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import codecs
|
|
1
2
|
import datetime
|
|
2
3
|
import os
|
|
3
4
|
import re
|
|
@@ -210,7 +211,7 @@ class DbExporter(BaseExporter):
|
|
|
210
211
|
if res:
|
|
211
212
|
result.append(res)
|
|
212
213
|
text = "".join(result)
|
|
213
|
-
with open(outfilename, "w",
|
|
214
|
+
with codecs.open(outfilename, "w", "utf8") as f:
|
|
214
215
|
f.write(text)
|
|
215
216
|
self.logger.info("Output: {}".format(outfilename))
|
|
216
217
|
if self.args.clipboard:
|
chgksuite/composer/docx.py
CHANGED
|
@@ -102,7 +102,7 @@ def remove_square_brackets_standalone(s, regexes):
|
|
|
102
102
|
s = s.replace("\\[", "LEFTSQUAREBRACKET")
|
|
103
103
|
s = s.replace("\\]", "RIGHTSQUAREBRACKET")
|
|
104
104
|
# Use placeholder to preserve handout brackets during removal
|
|
105
|
-
s = re.sub(f"\\[
|
|
105
|
+
s = re.sub(f"\\[{hs}(.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
|
|
106
106
|
i = 0
|
|
107
107
|
while "[" in s and "]" in s and i < 10:
|
|
108
108
|
s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
|
chgksuite/composer/latex.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import codecs
|
|
1
2
|
import hashlib
|
|
2
3
|
import os
|
|
3
4
|
import re
|
|
@@ -220,7 +221,7 @@ class LatexExporter(BaseExporter):
|
|
|
220
221
|
|
|
221
222
|
tex += "\\end{document}"
|
|
222
223
|
|
|
223
|
-
with open(outfilename, "w",
|
|
224
|
+
with codecs.open(outfilename, "w", "utf8") as outfile:
|
|
224
225
|
outfile.write(tex)
|
|
225
226
|
cwd = os.getcwd()
|
|
226
227
|
os.chdir(self.dir_kwargs["tmp_dir"])
|
chgksuite/composer/lj.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import codecs
|
|
1
2
|
import datetime
|
|
2
3
|
import os
|
|
3
4
|
import random
|
|
@@ -239,7 +240,7 @@ class LjExporter(BaseExporter):
|
|
|
239
240
|
"general_impressions_text"
|
|
240
241
|
]
|
|
241
242
|
if self.args.debug:
|
|
242
|
-
with open("lj.debug", "w",
|
|
243
|
+
with codecs.open("lj.debug", "w", "utf8") as f:
|
|
243
244
|
f.write(log_wrap(final_structure))
|
|
244
245
|
return final_structure
|
|
245
246
|
|
chgksuite/composer/openquiz.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import codecs
|
|
1
2
|
import copy
|
|
2
3
|
import re
|
|
3
4
|
import json
|
|
@@ -174,5 +175,5 @@ class OpenquizExporter(BaseExporter):
|
|
|
174
175
|
result = []
|
|
175
176
|
for q in questions:
|
|
176
177
|
result.append(self.oq_format_question(q))
|
|
177
|
-
with open(outfilename, "w",
|
|
178
|
+
with codecs.open(outfilename, "w", "utf8") as f:
|
|
178
179
|
f.write(json.dumps(result, indent=2, ensure_ascii=False))
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import codecs
|
|
1
2
|
import os
|
|
2
3
|
|
|
3
4
|
from chgksuite.composer.composer_common import (
|
|
@@ -8,20 +9,20 @@ from chgksuite.composer.composer_common import (
|
|
|
8
9
|
)
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
class
|
|
12
|
+
class RedditExporter(BaseExporter):
|
|
12
13
|
def __init__(self, *args, **kwargs):
|
|
13
14
|
super().__init__(*args, **kwargs)
|
|
14
15
|
self.im = Imgur(self.args.imgur_client_id or IMGUR_CLIENT_ID)
|
|
15
16
|
self.qcount = 1
|
|
16
17
|
|
|
17
|
-
def
|
|
18
|
+
def reddityapper(self, e):
|
|
18
19
|
if isinstance(e, str):
|
|
19
|
-
return self.
|
|
20
|
+
return self.reddit_element_layout(e)
|
|
20
21
|
elif isinstance(e, list):
|
|
21
22
|
if not any(isinstance(x, list) for x in e):
|
|
22
|
-
return self.
|
|
23
|
+
return self.reddit_element_layout(e)
|
|
23
24
|
else:
|
|
24
|
-
return " \n".join([self.
|
|
25
|
+
return " \n".join([self.reddit_element_layout(x) for x in e])
|
|
25
26
|
|
|
26
27
|
def parse_and_upload_image(self, path):
|
|
27
28
|
parsed_image = parseimg(
|
|
@@ -36,13 +37,11 @@ class MarkdownExporter(BaseExporter):
|
|
|
36
37
|
imglink = uploaded_image["data"]["link"]
|
|
37
38
|
return imglink
|
|
38
39
|
|
|
39
|
-
def
|
|
40
|
+
def redditformat(self, s):
|
|
40
41
|
res = ""
|
|
41
42
|
for run in self.parse_4s_elem(s):
|
|
42
|
-
if run[0]
|
|
43
|
+
if run[0] in ("", "hyperlink"):
|
|
43
44
|
res += run[1]
|
|
44
|
-
if run[0] == "hyperlink":
|
|
45
|
-
res += "<{}>".format(run[1])
|
|
46
45
|
if run[0] == "screen":
|
|
47
46
|
res += run[1]["for_screen"]
|
|
48
47
|
if run[0] == "italic":
|
|
@@ -52,70 +51,61 @@ class MarkdownExporter(BaseExporter):
|
|
|
52
51
|
imglink = run[1]
|
|
53
52
|
else:
|
|
54
53
|
imglink = self.parse_and_upload_image(run[1])
|
|
55
|
-
|
|
56
|
-
res += "[картинка]({})".format(imglink)
|
|
57
|
-
else:
|
|
58
|
-
res += "".format(imglink)
|
|
54
|
+
res += "[картинка]({})".format(imglink)
|
|
59
55
|
while res.endswith("\n"):
|
|
60
56
|
res = res[:-1]
|
|
61
57
|
res = res.replace("\n", " \n")
|
|
62
58
|
return res
|
|
63
59
|
|
|
64
|
-
def
|
|
60
|
+
def reddit_element_layout(self, e):
|
|
65
61
|
res = ""
|
|
66
62
|
if isinstance(e, str):
|
|
67
|
-
res = self.
|
|
63
|
+
res = self.redditformat(e)
|
|
68
64
|
return res
|
|
69
65
|
if isinstance(e, list):
|
|
70
66
|
res = " \n".join(
|
|
71
67
|
[
|
|
72
|
-
"{}\\. {}".format(i + 1, self.
|
|
68
|
+
"{}\\. {}".format(i + 1, self.reddit_element_layout(x))
|
|
73
69
|
for i, x in enumerate(e)
|
|
74
70
|
]
|
|
75
71
|
)
|
|
76
72
|
return res
|
|
77
73
|
|
|
78
|
-
def
|
|
74
|
+
def reddit_format_element(self, pair):
|
|
79
75
|
if pair[0] == "Question":
|
|
80
|
-
return self.
|
|
76
|
+
return self.reddit_format_question(pair[1])
|
|
81
77
|
|
|
82
|
-
def
|
|
78
|
+
def reddit_format_question(self, q):
|
|
83
79
|
if "setcounter" in q:
|
|
84
80
|
self.qcount = int(q["setcounter"])
|
|
85
81
|
res = "__Вопрос {}__: {} \n".format(
|
|
86
82
|
self.qcount if "number" not in q else q["number"],
|
|
87
|
-
self.
|
|
83
|
+
self.reddityapper(q["question"]),
|
|
88
84
|
)
|
|
89
85
|
if "number" not in q:
|
|
90
86
|
self.qcount += 1
|
|
91
|
-
|
|
92
|
-
spoiler_end = "!<" if self.args.filetype == "redditmd" else ""
|
|
93
|
-
res += "__Ответ:__ {}{} \n".format(
|
|
94
|
-
spoiler_start, self.markdownyapper(q["answer"])
|
|
95
|
-
)
|
|
87
|
+
res += "__Ответ:__ >!{} \n".format(self.reddityapper(q["answer"]))
|
|
96
88
|
if "zachet" in q:
|
|
97
|
-
res += "__Зачёт:__ {} \n".format(self.
|
|
89
|
+
res += "__Зачёт:__ {} \n".format(self.reddityapper(q["zachet"]))
|
|
98
90
|
if "nezachet" in q:
|
|
99
|
-
res += "__Незачёт:__ {} \n".format(self.
|
|
91
|
+
res += "__Незачёт:__ {} \n".format(self.reddityapper(q["nezachet"]))
|
|
100
92
|
if "comment" in q:
|
|
101
|
-
res += "__Комментарий:__ {} \n".format(self.
|
|
93
|
+
res += "__Комментарий:__ {} \n".format(self.reddityapper(q["comment"]))
|
|
102
94
|
if "source" in q:
|
|
103
|
-
res += "__Источник:__ {} \n".format(self.
|
|
95
|
+
res += "__Источник:__ {} \n".format(self.reddityapper(q["source"]))
|
|
104
96
|
if "author" in q:
|
|
105
|
-
res += "
|
|
106
|
-
spoiler_end, self.markdownyapper(q["author"])
|
|
107
|
-
)
|
|
97
|
+
res += "!<\n__Автор:__ {} \n".format(self.reddityapper(q["author"]))
|
|
108
98
|
else:
|
|
109
|
-
res +=
|
|
99
|
+
res += "!<\n"
|
|
110
100
|
return res
|
|
111
101
|
|
|
112
102
|
def export(self, outfile):
|
|
113
103
|
result = []
|
|
114
104
|
for pair in self.structure:
|
|
115
|
-
res = self.
|
|
105
|
+
res = self.reddit_format_element(pair)
|
|
116
106
|
if res:
|
|
117
107
|
result.append(res)
|
|
118
108
|
text = "\n\n".join(result)
|
|
119
|
-
with open(outfile, "w",
|
|
109
|
+
with codecs.open(outfile, "w", "utf8") as f:
|
|
120
110
|
f.write(text)
|
|
121
111
|
self.logger.info("Output: {}".format(outfile))
|
chgksuite/handouter/installer.py
CHANGED
|
File without changes
|
chgksuite/handouter/runner.py
CHANGED
|
@@ -14,9 +14,6 @@ from chgksuite.handouter.gen import generate_handouts
|
|
|
14
14
|
from chgksuite.handouter.pack import pack_handouts
|
|
15
15
|
from chgksuite.handouter.installer import get_tectonic_path, install_tectonic
|
|
16
16
|
from chgksuite.handouter.tex_internals import (
|
|
17
|
-
EDGE_DASHED,
|
|
18
|
-
EDGE_NONE,
|
|
19
|
-
EDGE_SOLID,
|
|
20
17
|
GREYTEXT,
|
|
21
18
|
HEADER,
|
|
22
19
|
IMG,
|
|
@@ -64,28 +61,7 @@ class HandoutGenerator:
|
|
|
64
61
|
)
|
|
65
62
|
return GREYTEXT.replace("<GREYTEXT>", handout_text)
|
|
66
63
|
|
|
67
|
-
def make_tikzbox(self, block
|
|
68
|
-
"""
|
|
69
|
-
Create a TikZ box with configurable edge styles and extensions.
|
|
70
|
-
edges is a dict with keys 'top', 'bottom', 'left', 'right'
|
|
71
|
-
values are EDGE_DASHED or EDGE_SOLID
|
|
72
|
-
ext is a dict with edge extensions to close gaps at boundaries
|
|
73
|
-
"""
|
|
74
|
-
if edges is None:
|
|
75
|
-
edges = {
|
|
76
|
-
"top": EDGE_DASHED,
|
|
77
|
-
"bottom": EDGE_DASHED,
|
|
78
|
-
"left": EDGE_DASHED,
|
|
79
|
-
"right": EDGE_DASHED,
|
|
80
|
-
}
|
|
81
|
-
if ext is None:
|
|
82
|
-
ext = {
|
|
83
|
-
"top": ("0pt", "0pt"),
|
|
84
|
-
"bottom": ("0pt", "0pt"),
|
|
85
|
-
"left": ("0pt", "0pt"),
|
|
86
|
-
"right": ("0pt", "0pt"),
|
|
87
|
-
}
|
|
88
|
-
|
|
64
|
+
def make_tikzbox(self, block):
|
|
89
65
|
if block.get("no_center"):
|
|
90
66
|
align = ""
|
|
91
67
|
else:
|
|
@@ -103,206 +79,19 @@ class HandoutGenerator:
|
|
|
103
79
|
.replace("<ALIGN>", align)
|
|
104
80
|
.replace("<TEXTWIDTH>", textwidth)
|
|
105
81
|
.replace("<FONTSIZE>", fontsize)
|
|
106
|
-
.replace("<TOP>", edges["top"])
|
|
107
|
-
.replace("<BOTTOM>", edges["bottom"])
|
|
108
|
-
.replace("<LEFT>", edges["left"])
|
|
109
|
-
.replace("<RIGHT>", edges["right"])
|
|
110
|
-
.replace("<TOP_EXT_L>", ext["top"][0])
|
|
111
|
-
.replace("<TOP_EXT_R>", ext["top"][1])
|
|
112
|
-
.replace("<BOTTOM_EXT_L>", ext["bottom"][0])
|
|
113
|
-
.replace("<BOTTOM_EXT_R>", ext["bottom"][1])
|
|
114
|
-
.replace("<LEFT_EXT_T>", ext["left"][0])
|
|
115
|
-
.replace("<LEFT_EXT_B>", ext["left"][1])
|
|
116
|
-
.replace("<RIGHT_EXT_T>", ext["right"][0])
|
|
117
|
-
.replace("<RIGHT_EXT_B>", ext["right"][1])
|
|
118
82
|
)
|
|
119
83
|
|
|
120
84
|
def get_page_width(self):
|
|
121
85
|
return self.args.paperwidth - self.args.margin_left - self.args.margin_right - 2
|
|
122
86
|
|
|
123
|
-
def get_cut_direction(self, columns, num_rows, handouts_per_team):
|
|
124
|
-
"""
|
|
125
|
-
Determine whether to cut vertically or horizontally.
|
|
126
|
-
Returns (direction, team_size) where:
|
|
127
|
-
- direction is 'vertical', 'horizontal', or None
|
|
128
|
-
- team_size is the number of columns (vertical) or rows (horizontal) per team
|
|
129
|
-
|
|
130
|
-
Falls back to None if handouts can't be evenly divided into teams.
|
|
131
|
-
"""
|
|
132
|
-
total = columns * num_rows
|
|
133
|
-
|
|
134
|
-
# Check if total handouts can be evenly divided
|
|
135
|
-
if total % handouts_per_team != 0:
|
|
136
|
-
return None, None
|
|
137
|
-
|
|
138
|
-
num_teams = total // handouts_per_team
|
|
139
|
-
if num_teams < 2:
|
|
140
|
-
return None, None # Only 1 team, no cuts needed
|
|
141
|
-
|
|
142
|
-
# Try vertical layout (teams as column groups)
|
|
143
|
-
# Each team gets team_cols columns × all rows
|
|
144
|
-
if handouts_per_team % num_rows == 0:
|
|
145
|
-
team_cols = handouts_per_team // num_rows
|
|
146
|
-
if columns % team_cols == 0:
|
|
147
|
-
return "vertical", team_cols
|
|
148
|
-
|
|
149
|
-
# Try horizontal layout (teams as row groups)
|
|
150
|
-
# Each team gets all columns × team_rows rows
|
|
151
|
-
if handouts_per_team % columns == 0:
|
|
152
|
-
team_rows = handouts_per_team // columns
|
|
153
|
-
if num_rows % team_rows == 0:
|
|
154
|
-
return "horizontal", team_rows
|
|
155
|
-
|
|
156
|
-
return None, None
|
|
157
|
-
|
|
158
|
-
def get_edge_styles(
|
|
159
|
-
self, row_idx, col_idx, num_rows, columns, cut_direction, team_size
|
|
160
|
-
):
|
|
161
|
-
"""
|
|
162
|
-
Determine edge styles and extensions for a box at position (row_idx, col_idx).
|
|
163
|
-
Outer edges of team rectangles are solid (thicker), inner edges are dashed.
|
|
164
|
-
Extensions are used to close gaps in ALL solid lines.
|
|
165
|
-
Duplicate dashed edges are skipped to avoid double lines.
|
|
166
|
-
|
|
167
|
-
team_size is the number of columns (vertical) or rows (horizontal) per team.
|
|
168
|
-
"""
|
|
169
|
-
# Default: all dashed, no extension
|
|
170
|
-
edges = {
|
|
171
|
-
"top": EDGE_DASHED,
|
|
172
|
-
"bottom": EDGE_DASHED,
|
|
173
|
-
"left": EDGE_DASHED,
|
|
174
|
-
"right": EDGE_DASHED,
|
|
175
|
-
}
|
|
176
|
-
ext = {
|
|
177
|
-
"top": ("0pt", "0pt"),
|
|
178
|
-
"bottom": ("0pt", "0pt"),
|
|
179
|
-
"left": ("0pt", "0pt"),
|
|
180
|
-
"right": ("0pt", "0pt"),
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
# Gap sizes (half of spacing to extend into)
|
|
184
|
-
h_gap = "0.75mm" # half of SPACE (1.5mm)
|
|
185
|
-
v_gap = "0.5mm" # half of vspace (1mm)
|
|
186
|
-
|
|
187
|
-
# Helper functions to check if position is at a team boundary
|
|
188
|
-
def is_at_right_team_boundary():
|
|
189
|
-
"""Is this box at the right edge of its team (but not at grid edge)?"""
|
|
190
|
-
if cut_direction != "vertical" or not team_size:
|
|
191
|
-
return False
|
|
192
|
-
return (col_idx + 1) % team_size == 0 and col_idx < columns - 1
|
|
193
|
-
|
|
194
|
-
def is_at_left_team_boundary():
|
|
195
|
-
"""Is this box at the left edge of its team (but not at grid edge)?"""
|
|
196
|
-
if cut_direction != "vertical" or not team_size:
|
|
197
|
-
return False
|
|
198
|
-
return col_idx % team_size == 0 and col_idx > 0
|
|
199
|
-
|
|
200
|
-
def is_at_bottom_team_boundary():
|
|
201
|
-
"""Is this box at the bottom edge of its team (but not at grid edge)?"""
|
|
202
|
-
if cut_direction != "horizontal" or not team_size:
|
|
203
|
-
return False
|
|
204
|
-
return (row_idx + 1) % team_size == 0 and row_idx < num_rows - 1
|
|
205
|
-
|
|
206
|
-
def is_at_top_team_boundary():
|
|
207
|
-
"""Is this box at the top edge of its team (but not at grid edge)?"""
|
|
208
|
-
if cut_direction != "horizontal" or not team_size:
|
|
209
|
-
return False
|
|
210
|
-
return row_idx % team_size == 0 and row_idx > 0
|
|
211
|
-
|
|
212
|
-
# Determine which edges are solid
|
|
213
|
-
# Only apply solid edges if we have a valid cut direction
|
|
214
|
-
# Otherwise fall back to all-dashed (default)
|
|
215
|
-
if cut_direction is not None:
|
|
216
|
-
# Outer edges of the entire grid
|
|
217
|
-
if row_idx == 0:
|
|
218
|
-
edges["top"] = EDGE_SOLID
|
|
219
|
-
if row_idx == num_rows - 1:
|
|
220
|
-
edges["bottom"] = EDGE_SOLID
|
|
221
|
-
if col_idx == 0:
|
|
222
|
-
edges["left"] = EDGE_SOLID
|
|
223
|
-
if col_idx == columns - 1:
|
|
224
|
-
edges["right"] = EDGE_SOLID
|
|
225
|
-
|
|
226
|
-
# Team boundary edges
|
|
227
|
-
if is_at_right_team_boundary():
|
|
228
|
-
edges["right"] = EDGE_SOLID
|
|
229
|
-
if is_at_left_team_boundary():
|
|
230
|
-
edges["left"] = EDGE_SOLID
|
|
231
|
-
if is_at_bottom_team_boundary():
|
|
232
|
-
edges["bottom"] = EDGE_SOLID
|
|
233
|
-
if is_at_top_team_boundary():
|
|
234
|
-
edges["top"] = EDGE_SOLID
|
|
235
|
-
|
|
236
|
-
# Skip duplicate dashed edges (to avoid double lines between adjacent boxes)
|
|
237
|
-
if edges["left"] == EDGE_DASHED and col_idx > 0:
|
|
238
|
-
edges["left"] = EDGE_NONE
|
|
239
|
-
|
|
240
|
-
if edges["top"] == EDGE_DASHED and row_idx > 0:
|
|
241
|
-
edges["top"] = EDGE_NONE
|
|
242
|
-
|
|
243
|
-
# Calculate extensions for solid edges to close gaps
|
|
244
|
-
# But don't extend into team boundary gaps!
|
|
245
|
-
|
|
246
|
-
if edges["top"] == EDGE_SOLID:
|
|
247
|
-
at_left_boundary = is_at_left_team_boundary()
|
|
248
|
-
ext_left = "-" + h_gap if col_idx > 0 and not at_left_boundary else "0pt"
|
|
249
|
-
at_right_boundary = is_at_right_team_boundary()
|
|
250
|
-
ext_right = (
|
|
251
|
-
h_gap if col_idx < columns - 1 and not at_right_boundary else "0pt"
|
|
252
|
-
)
|
|
253
|
-
ext["top"] = (ext_left, ext_right)
|
|
254
|
-
|
|
255
|
-
if edges["bottom"] == EDGE_SOLID:
|
|
256
|
-
at_left_boundary = is_at_left_team_boundary()
|
|
257
|
-
ext_left = "-" + h_gap if col_idx > 0 and not at_left_boundary else "0pt"
|
|
258
|
-
at_right_boundary = is_at_right_team_boundary()
|
|
259
|
-
ext_right = (
|
|
260
|
-
h_gap if col_idx < columns - 1 and not at_right_boundary else "0pt"
|
|
261
|
-
)
|
|
262
|
-
ext["bottom"] = (ext_left, ext_right)
|
|
263
|
-
|
|
264
|
-
if edges["left"] == EDGE_SOLID:
|
|
265
|
-
at_top_boundary = is_at_top_team_boundary()
|
|
266
|
-
ext_top = v_gap if row_idx > 0 and not at_top_boundary else "0pt"
|
|
267
|
-
at_bottom_boundary = is_at_bottom_team_boundary()
|
|
268
|
-
ext_bottom = (
|
|
269
|
-
"-" + v_gap
|
|
270
|
-
if row_idx < num_rows - 1 and not at_bottom_boundary
|
|
271
|
-
else "0pt"
|
|
272
|
-
)
|
|
273
|
-
ext["left"] = (ext_top, ext_bottom)
|
|
274
|
-
|
|
275
|
-
if edges["right"] == EDGE_SOLID:
|
|
276
|
-
at_top_boundary = is_at_top_team_boundary()
|
|
277
|
-
ext_top = v_gap if row_idx > 0 and not at_top_boundary else "0pt"
|
|
278
|
-
at_bottom_boundary = is_at_bottom_team_boundary()
|
|
279
|
-
ext_bottom = (
|
|
280
|
-
"-" + v_gap
|
|
281
|
-
if row_idx < num_rows - 1 and not at_bottom_boundary
|
|
282
|
-
else "0pt"
|
|
283
|
-
)
|
|
284
|
-
ext["right"] = (ext_top, ext_bottom)
|
|
285
|
-
|
|
286
|
-
return edges, ext
|
|
287
|
-
|
|
288
87
|
def generate_regular_block(self, block_):
|
|
289
88
|
block = block_.copy()
|
|
290
89
|
if not (block.get("image") or block.get("text")):
|
|
291
90
|
return
|
|
292
91
|
columns = block["columns"]
|
|
293
|
-
|
|
294
|
-
handouts_per_team = block.get("handouts_per_team") or 3
|
|
295
|
-
|
|
296
|
-
# Determine cut direction
|
|
297
|
-
cut_direction, cut_after = self.get_cut_direction(
|
|
298
|
-
columns, num_rows, handouts_per_team
|
|
299
|
-
)
|
|
300
|
-
if self.args.debug:
|
|
301
|
-
print(f"cut_direction: {cut_direction}, cut_after: {cut_after}")
|
|
302
|
-
|
|
303
|
-
spaces = columns - 1
|
|
92
|
+
spaces = block["columns"] - 1
|
|
304
93
|
boxwidth = self.args.boxwidth or round(
|
|
305
|
-
(self.get_page_width() - spaces * self.SPACE) / columns,
|
|
94
|
+
(self.get_page_width() - spaces * self.SPACE) / block["columns"],
|
|
306
95
|
3,
|
|
307
96
|
)
|
|
308
97
|
total_width = boxwidth * columns + spaces * self.SPACE
|
|
@@ -315,6 +104,7 @@ class HandoutGenerator:
|
|
|
315
104
|
r"\setlength{\boxwidth}{<Q>mm}%".replace("<Q>", str(boxwidth)),
|
|
316
105
|
r"\setlength{\boxwidthinner}{<Q>mm}%".replace("<Q>", str(boxwidthinner)),
|
|
317
106
|
]
|
|
107
|
+
rows = []
|
|
318
108
|
contents = []
|
|
319
109
|
if block.get("image"):
|
|
320
110
|
img_qwidth = block.get("resize_image") or 1.0
|
|
@@ -329,18 +119,10 @@ class HandoutGenerator:
|
|
|
329
119
|
block["centering"] = ""
|
|
330
120
|
else:
|
|
331
121
|
block["centering"] = "\\centering"
|
|
332
|
-
|
|
333
|
-
rows = []
|
|
334
|
-
for row_idx in range(num_rows):
|
|
335
|
-
row_boxes = []
|
|
336
|
-
for col_idx in range(columns):
|
|
337
|
-
edges, ext = self.get_edge_styles(
|
|
338
|
-
row_idx, col_idx, num_rows, columns, cut_direction, cut_after
|
|
339
|
-
)
|
|
340
|
-
row_boxes.append(self.make_tikzbox(block, edges, ext))
|
|
122
|
+
for _ in range(block.get("rows") or 1):
|
|
341
123
|
row = (
|
|
342
124
|
TIKZBOX_START.replace("<CENTERING>", block["centering"])
|
|
343
|
-
+ "\n".join(
|
|
125
|
+
+ "\n".join([self.make_tikzbox(block)] * block["columns"])
|
|
344
126
|
+ TIKZBOX_END
|
|
345
127
|
)
|
|
346
128
|
rows.append(row)
|
|
@@ -13,7 +13,7 @@ HEADER = r"""
|
|
|
13
13
|
\begin{document}
|
|
14
14
|
\fontsize{14pt}{16pt}\selectfont
|
|
15
15
|
\setlength\parindent{0pt}
|
|
16
|
-
\tikzstyle{box}=[rectangle, inner sep=<TIKZ_MM>mm]
|
|
16
|
+
\tikzstyle{box}=[draw, dashed, rectangle, inner sep=<TIKZ_MM>mm]
|
|
17
17
|
\raggedright
|
|
18
18
|
\raggedbottom
|
|
19
19
|
""".strip()
|
|
@@ -25,20 +25,10 @@ TIKZBOX_START = r"""{<CENTERING>
|
|
|
25
25
|
|
|
26
26
|
TIKZBOX_INNER = r"""
|
|
27
27
|
\begin{tikzpicture}
|
|
28
|
-
\node[box, minimum width=\boxwidth<TEXTWIDTH><ALIGN>]
|
|
29
|
-
\useasboundingbox (b.south west) rectangle (b.north east);
|
|
30
|
-
\draw[<TOP>] ([xshift=<TOP_EXT_L>]b.north west) -- ([xshift=<TOP_EXT_R>]b.north east);
|
|
31
|
-
\draw[<BOTTOM>] ([xshift=<BOTTOM_EXT_L>]b.south west) -- ([xshift=<BOTTOM_EXT_R>]b.south east);
|
|
32
|
-
\draw[<LEFT>] ([yshift=<LEFT_EXT_T>]b.north west) -- ([yshift=<LEFT_EXT_B>]b.south west);
|
|
33
|
-
\draw[<RIGHT>] ([yshift=<RIGHT_EXT_T>]b.north east) -- ([yshift=<RIGHT_EXT_B>]b.south east);
|
|
28
|
+
\node[box, minimum width=\boxwidth<TEXTWIDTH><ALIGN>] {<FONTSIZE><CONTENTS>\par};
|
|
34
29
|
\end{tikzpicture}
|
|
35
30
|
""".strip()
|
|
36
31
|
|
|
37
|
-
# Line styles for box edges
|
|
38
|
-
EDGE_SOLID = "line width=0.8pt"
|
|
39
|
-
EDGE_DASHED = "dashed"
|
|
40
|
-
EDGE_NONE = "draw=none" # Don't draw this edge (to avoid double dashed lines)
|
|
41
|
-
|
|
42
32
|
TIKZBOX_END = "\n}"
|
|
43
33
|
|
|
44
34
|
IMG = r"""\includegraphics<IMGWIDTH>{<IMGPATH>}"""
|
chgksuite/parser.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
import base64
|
|
4
|
+
import codecs
|
|
4
5
|
import datetime
|
|
5
6
|
import hashlib
|
|
6
7
|
import itertools
|
|
@@ -17,9 +18,8 @@ import time
|
|
|
17
18
|
|
|
18
19
|
import bs4
|
|
19
20
|
import chardet
|
|
21
|
+
import dashtable
|
|
20
22
|
import mammoth
|
|
21
|
-
|
|
22
|
-
from chgksuite._html2md import html2md
|
|
23
23
|
import pypandoc
|
|
24
24
|
import requests
|
|
25
25
|
import toml
|
|
@@ -61,7 +61,7 @@ def partition(alist, indices):
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def load_regexes(regexfile):
|
|
64
|
-
with open(regexfile, "r",
|
|
64
|
+
with codecs.open(regexfile, "r", "utf8") as f:
|
|
65
65
|
regexes = json.loads(f.read())
|
|
66
66
|
return {k: re.compile(v) for k, v in regexes.items()}
|
|
67
67
|
|
|
@@ -526,7 +526,7 @@ class ChgkParser:
|
|
|
526
526
|
)
|
|
527
527
|
|
|
528
528
|
if debug:
|
|
529
|
-
with open("debug_0.txt", "w",
|
|
529
|
+
with codecs.open("debug_0.txt", "w", "utf8") as f:
|
|
530
530
|
f.write(text)
|
|
531
531
|
|
|
532
532
|
# 1.
|
|
@@ -558,7 +558,7 @@ class ChgkParser:
|
|
|
558
558
|
i = 0
|
|
559
559
|
|
|
560
560
|
if debug:
|
|
561
|
-
with open("debug_1.json", "w",
|
|
561
|
+
with codecs.open("debug_1.json", "w", "utf8") as f:
|
|
562
562
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
563
563
|
|
|
564
564
|
self.process_single_number_lines()
|
|
@@ -574,7 +574,7 @@ class ChgkParser:
|
|
|
574
574
|
element[0] = "question"
|
|
575
575
|
|
|
576
576
|
if debug:
|
|
577
|
-
with open("debug_1a.json", "w",
|
|
577
|
+
with codecs.open("debug_1a.json", "w", "utf8") as f:
|
|
578
578
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
579
579
|
|
|
580
580
|
# 2.
|
|
@@ -584,7 +584,7 @@ class ChgkParser:
|
|
|
584
584
|
self.merge_to_x_until_nextfield("comment")
|
|
585
585
|
|
|
586
586
|
if debug:
|
|
587
|
-
with open("debug_2.json", "w",
|
|
587
|
+
with codecs.open("debug_2.json", "w", "utf8") as f:
|
|
588
588
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
589
589
|
|
|
590
590
|
# 3.
|
|
@@ -647,7 +647,7 @@ class ChgkParser:
|
|
|
647
647
|
self.merge_to_x_until_nextfield("nezachet")
|
|
648
648
|
|
|
649
649
|
if debug:
|
|
650
|
-
with open("debug_3.json", "w",
|
|
650
|
+
with codecs.open("debug_3.json", "w", "utf8") as f:
|
|
651
651
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
652
652
|
|
|
653
653
|
# 4.
|
|
@@ -660,7 +660,7 @@ class ChgkParser:
|
|
|
660
660
|
self.merge_to_next(0)
|
|
661
661
|
|
|
662
662
|
if debug:
|
|
663
|
-
with open("debug_3a.json", "w",
|
|
663
|
+
with codecs.open("debug_3a.json", "w", "utf8") as f:
|
|
664
664
|
f.write(
|
|
665
665
|
json.dumps(
|
|
666
666
|
list(enumerate(self.structure)), ensure_ascii=False, indent=4
|
|
@@ -718,7 +718,7 @@ class ChgkParser:
|
|
|
718
718
|
idx += 1
|
|
719
719
|
|
|
720
720
|
if debug:
|
|
721
|
-
with open("debug_4.json", "w",
|
|
721
|
+
with codecs.open("debug_4.json", "w", "utf8") as f:
|
|
722
722
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
723
723
|
|
|
724
724
|
# 5.
|
|
@@ -798,7 +798,7 @@ class ChgkParser:
|
|
|
798
798
|
)
|
|
799
799
|
|
|
800
800
|
if debug:
|
|
801
|
-
with open("debug_5.json", "w",
|
|
801
|
+
with codecs.open("debug_5.json", "w", "utf8") as f:
|
|
802
802
|
f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
|
|
803
803
|
|
|
804
804
|
# 6.
|
|
@@ -853,7 +853,7 @@ class ChgkParser:
|
|
|
853
853
|
final_structure.append(["Question", current_question])
|
|
854
854
|
|
|
855
855
|
if debug:
|
|
856
|
-
with open("debug_6.json", "w",
|
|
856
|
+
with codecs.open("debug_6.json", "w", "utf8") as f:
|
|
857
857
|
f.write(json.dumps(final_structure, ensure_ascii=False, indent=4))
|
|
858
858
|
|
|
859
859
|
# 7.
|
|
@@ -899,7 +899,7 @@ class ChgkParser:
|
|
|
899
899
|
element[1] = self._process_images_in_text(element[1])
|
|
900
900
|
|
|
901
901
|
if debug:
|
|
902
|
-
with open("debug_final.json", "w",
|
|
902
|
+
with codecs.open("debug_final.json", "w", "utf8") as f:
|
|
903
903
|
f.write(json.dumps(final_structure, ensure_ascii=False, indent=4))
|
|
904
904
|
return final_structure
|
|
905
905
|
|
|
@@ -982,8 +982,8 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
|
|
|
982
982
|
with open(docxfile, "rb") as docx_file:
|
|
983
983
|
html = mammoth.convert_to_html(docx_file).value
|
|
984
984
|
if args.debug:
|
|
985
|
-
with open(
|
|
986
|
-
os.path.join(target_dir, "debugdebug.pydocx"), "w",
|
|
985
|
+
with codecs.open(
|
|
986
|
+
os.path.join(target_dir, "debugdebug.pydocx"), "w", "utf8"
|
|
987
987
|
) as dbg:
|
|
988
988
|
dbg.write(html)
|
|
989
989
|
input_docx = (
|
|
@@ -994,8 +994,8 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
|
|
|
994
994
|
bsoup = BeautifulSoup(input_docx, "html.parser")
|
|
995
995
|
|
|
996
996
|
if args.debug:
|
|
997
|
-
with open(
|
|
998
|
-
os.path.join(target_dir, "debug.pydocx"), "w",
|
|
997
|
+
with codecs.open(
|
|
998
|
+
os.path.join(target_dir, "debug.pydocx"), "w", "utf8"
|
|
999
999
|
) as dbg:
|
|
1000
1000
|
dbg.write(input_docx)
|
|
1001
1001
|
|
|
@@ -1065,7 +1065,7 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
|
|
|
1065
1065
|
ensure_line_breaks(tag)
|
|
1066
1066
|
for tag in bsoup.find_all("table"):
|
|
1067
1067
|
try:
|
|
1068
|
-
table = html2md(str(tag))
|
|
1068
|
+
table = dashtable.html2md(str(tag))
|
|
1069
1069
|
tag.insert_before(table)
|
|
1070
1070
|
except (TypeError, ValueError):
|
|
1071
1071
|
logger.error(f"couldn't parse html table: {str(tag)}")
|
|
@@ -1096,12 +1096,12 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
|
|
|
1096
1096
|
tag.unwrap()
|
|
1097
1097
|
|
|
1098
1098
|
if args.debug:
|
|
1099
|
-
with open(
|
|
1100
|
-
os.path.join(target_dir, "debug_raw.html"), "w",
|
|
1099
|
+
with codecs.open(
|
|
1100
|
+
os.path.join(target_dir, "debug_raw.html"), "w", "utf8"
|
|
1101
1101
|
) as dbg:
|
|
1102
1102
|
dbg.write(str(bsoup))
|
|
1103
|
-
with open(
|
|
1104
|
-
os.path.join(target_dir, "debug.html"), "w",
|
|
1103
|
+
with codecs.open(
|
|
1104
|
+
os.path.join(target_dir, "debug.html"), "w", "utf8"
|
|
1105
1105
|
) as dbg:
|
|
1106
1106
|
dbg.write(bsoup.prettify())
|
|
1107
1107
|
|
|
@@ -1139,9 +1139,7 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
|
|
|
1139
1139
|
txt = txt.replace(f"IMGPATH({i})", elem)
|
|
1140
1140
|
|
|
1141
1141
|
if args.debug:
|
|
1142
|
-
with open(
|
|
1143
|
-
os.path.join(target_dir, "debug.debug"), "w", encoding="utf-8"
|
|
1144
|
-
) as dbg:
|
|
1142
|
+
with codecs.open(os.path.join(target_dir, "debug.debug"), "w", "utf8") as dbg:
|
|
1145
1143
|
dbg.write(txt)
|
|
1146
1144
|
|
|
1147
1145
|
final_structure = chgk_parse(txt, defaultauthor=defaultauthor, args=args)
|
|
@@ -1175,7 +1173,7 @@ def chgk_parse_wrapper(path, args, logger=None):
|
|
|
1175
1173
|
sys.exit()
|
|
1176
1174
|
outfilename = os.path.join(target_dir, make_filename(abspath, "4s", args))
|
|
1177
1175
|
logger.info("Output: {}".format(os.path.abspath(outfilename)))
|
|
1178
|
-
with open(outfilename, "w",
|
|
1176
|
+
with codecs.open(outfilename, "w", "utf8") as output_file:
|
|
1179
1177
|
output_file.write(compose_4s(final_structure, args=args))
|
|
1180
1178
|
return outfilename
|
|
1181
1179
|
|
chgksuite/parser_db.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
|
|
4
|
+
import codecs
|
|
4
5
|
import json
|
|
5
6
|
import os
|
|
6
7
|
import re
|
|
@@ -435,7 +436,7 @@ def chgk_parse_db(text, debug=False, logger=False):
|
|
|
435
436
|
append_question(lexer)
|
|
436
437
|
|
|
437
438
|
if debug:
|
|
438
|
-
with open("debug_final.json", "w",
|
|
439
|
+
with codecs.open("debug_final.json", "w", "utf8") as f:
|
|
439
440
|
f.write(json.dumps(lexer.structure, ensure_ascii=False, indent=4))
|
|
440
441
|
|
|
441
442
|
return lexer.structure
|
chgksuite/trello.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
+
import codecs
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import pdb
|
|
@@ -51,7 +52,7 @@ def upload_file(filepath, trello, list_name=None):
|
|
|
51
52
|
assert lid is not None
|
|
52
53
|
print(f"uploading to list '{list_['name']}'")
|
|
53
54
|
content = ""
|
|
54
|
-
with open(filepath, "r",
|
|
55
|
+
with codecs.open(filepath, "r", "utf8") as f:
|
|
55
56
|
content = f.read()
|
|
56
57
|
cards = re.split(r"(\r?\n){2,}", content)
|
|
57
58
|
cards = [x for x in cards if x != "" and x != "\n" and x != "\r\n"]
|
|
@@ -254,7 +255,7 @@ def gui_trello_download(args):
|
|
|
254
255
|
|
|
255
256
|
board_id_path = os.path.join(args.folder, ".board_id")
|
|
256
257
|
if os.path.isfile(board_id_path):
|
|
257
|
-
with open(board_id_path, "r",
|
|
258
|
+
with codecs.open(board_id_path, "r", "utf8") as f:
|
|
258
259
|
board_id = f.read().rstrip()
|
|
259
260
|
else:
|
|
260
261
|
board_id = get_board_id(path=args.folder)
|
|
@@ -400,14 +401,14 @@ def gui_trello_download(args):
|
|
|
400
401
|
result.extend(_lists[_list["name"]])
|
|
401
402
|
filename = "singlefile.4s"
|
|
402
403
|
print("outputting {}".format(filename))
|
|
403
|
-
with open(filename, "w",
|
|
404
|
+
with codecs.open(filename, "w", "utf8") as f:
|
|
404
405
|
for item in result:
|
|
405
406
|
f.write("\n" + item + "\n")
|
|
406
407
|
else:
|
|
407
408
|
for _list in _lists:
|
|
408
409
|
filename = "{}.4s".format(_list)
|
|
409
410
|
print("outputting {}".format(filename))
|
|
410
|
-
with open(filename, "w",
|
|
411
|
+
with codecs.open(filename, "w", "utf8") as f:
|
|
411
412
|
for item in _lists[_list]:
|
|
412
413
|
f.write("\n" + item + "\n")
|
|
413
414
|
|
|
@@ -425,7 +426,7 @@ def get_board_id(path=None):
|
|
|
425
426
|
if "trello.com" in board_id:
|
|
426
427
|
board_id = re_bi.search(board_id).group(1)
|
|
427
428
|
if path:
|
|
428
|
-
with open(os.path.join(path, ".board_id"), "w",
|
|
429
|
+
with codecs.open(os.path.join(path, ".board_id"), "w", "utf8") as f:
|
|
429
430
|
f.write(board_id)
|
|
430
431
|
return board_id
|
|
431
432
|
|
|
@@ -436,7 +437,7 @@ def get_token(tokenpath, args):
|
|
|
436
437
|
else:
|
|
437
438
|
webbrowser.open(TRELLO_URL)
|
|
438
439
|
token = input("Please paste the obtained token: ").rstrip()
|
|
439
|
-
with open(tokenpath, "w",
|
|
440
|
+
with codecs.open(tokenpath, "w", "utf8") as f:
|
|
440
441
|
f.write(token)
|
|
441
442
|
return token
|
|
442
443
|
|
|
@@ -451,7 +452,7 @@ def gui_trello(args):
|
|
|
451
452
|
if not os.path.isfile(tokenpath):
|
|
452
453
|
token = get_token(tokenpath, args)
|
|
453
454
|
else:
|
|
454
|
-
with open(tokenpath, "r",
|
|
455
|
+
with codecs.open(tokenpath, "r", "utf8") as f:
|
|
455
456
|
token = f.read().rstrip()
|
|
456
457
|
|
|
457
458
|
with open(os.path.join(resourcedir, "trello.json")) as f:
|
chgksuite/typotools.py
CHANGED
|
@@ -92,13 +92,13 @@ def uni_normalize(k):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def cyr_lat_check_char(i, char, word):
|
|
95
|
-
if char
|
|
95
|
+
if char in CYRILLIC_CHARS:
|
|
96
96
|
return
|
|
97
97
|
if not (
|
|
98
|
-
(i == 0 or word[i - 1]
|
|
98
|
+
(i == 0 or word[i - 1] in CYRILLIC_CHARS or not word[i - 1].isalpha())
|
|
99
99
|
and (
|
|
100
100
|
i == len(word) - 1
|
|
101
|
-
or word[i + 1]
|
|
101
|
+
or word[i + 1] in CYRILLIC_CHARS
|
|
102
102
|
or not word[i + 1].isalpha()
|
|
103
103
|
)
|
|
104
104
|
):
|
|
@@ -121,7 +121,7 @@ def cyr_lat_check_word(word):
|
|
|
121
121
|
if check_result:
|
|
122
122
|
replacements[char] = check_result
|
|
123
123
|
elif (
|
|
124
|
-
char
|
|
124
|
+
char in CYRILLIC_CHARS
|
|
125
125
|
and i < len(word) - 1
|
|
126
126
|
and word[i + 1] in ACCENTS_TO_FIX
|
|
127
127
|
):
|
chgksuite/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.27.
|
|
1
|
+
__version__ = "0.27.0b3"
|
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chgksuite
|
|
3
|
-
Version: 0.27.
|
|
3
|
+
Version: 0.27.0b3
|
|
4
4
|
Summary: A package for chgk automation
|
|
5
|
-
Project-URL: Homepage, https://gitlab.com/peczony/chgksuite
|
|
6
5
|
Author-email: Alexander Pecheny <ap@pecheny.me>
|
|
7
6
|
License-Expression: MIT
|
|
8
|
-
|
|
9
|
-
Classifier: Operating System :: OS Independent
|
|
7
|
+
Project-URL: Homepage, https://gitlab.com/peczony/chgksuite
|
|
10
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
11
10
|
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
12
13
|
Requires-Dist: beautifulsoup4
|
|
13
14
|
Requires-Dist: chardet
|
|
15
|
+
Requires-Dist: dashtable
|
|
14
16
|
Requires-Dist: dateparser
|
|
15
17
|
Requires-Dist: mammoth
|
|
16
18
|
Requires-Dist: openpyxl
|
|
17
19
|
Requires-Dist: parse
|
|
18
|
-
Requires-Dist:
|
|
20
|
+
Requires-Dist: Pillow
|
|
19
21
|
Requires-Dist: ply
|
|
20
22
|
Requires-Dist: pypandoc
|
|
21
23
|
Requires-Dist: pypdf
|
|
@@ -27,7 +29,7 @@ Requires-Dist: requests
|
|
|
27
29
|
Requires-Dist: toml
|
|
28
30
|
Requires-Dist: urllib3>=2.6.2
|
|
29
31
|
Requires-Dist: watchdog
|
|
30
|
-
|
|
32
|
+
Dynamic: license-file
|
|
31
33
|
|
|
32
34
|
**chgksuite** is an utility that helps chgk editors.
|
|
33
35
|
|
|
@@ -1,25 +1,23 @@
|
|
|
1
1
|
chgksuite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
chgksuite/__main__.py,sha256=0-_jfloveTW3SZYW5XEagbyaHKGCiDhGNgcLxsT_dMs,140
|
|
3
|
-
chgksuite/
|
|
4
|
-
chgksuite/
|
|
5
|
-
chgksuite/
|
|
6
|
-
chgksuite/
|
|
7
|
-
chgksuite/
|
|
8
|
-
chgksuite/
|
|
9
|
-
chgksuite/
|
|
10
|
-
chgksuite/typotools.py,sha256=J2AEQbfcR0HHSu5WCALpj4Ya_ngOMXRh7esJgcybWQM,12797
|
|
11
|
-
chgksuite/version.py,sha256=Hjsc9XQ3AkqXm-5cdlX-H8iKjURzi1FQeWNi2xx34Uo,23
|
|
3
|
+
chgksuite/cli.py,sha256=95Xj1jrnybtso9qSCAwgv6utCrhJF43oFXlH5LWyHKA,40918
|
|
4
|
+
chgksuite/common.py,sha256=M4mYjVAzF7uk9ElF0XmO3eggxSFf2bPlKinT2nEZPXA,11350
|
|
5
|
+
chgksuite/parser.py,sha256=2UL-2dJtCFYEjakUfeh3n1E4epMKPo0pjdaC_cBZplM,45775
|
|
6
|
+
chgksuite/parser_db.py,sha256=gh5AM80UR_hQ0r_-AwWLbqZCl2V0j3HAgj6BilImXyc,11116
|
|
7
|
+
chgksuite/trello.py,sha256=I1JmIbZVJjJuuRO0g26fNt8deRo9hDlk1xBIf8w31Rs,14724
|
|
8
|
+
chgksuite/typotools.py,sha256=0fQOjt5H3NRl0JeGTp2x-k-2ZN68E9Z9WX8D7lCwXmA,12765
|
|
9
|
+
chgksuite/version.py,sha256=1PfujVHYVF7srJSn6DXPFkpAJHTDurCFxLyRIehyqJk,25
|
|
12
10
|
chgksuite/vulture_whitelist.py,sha256=P__p_X0zt10ivddIf81uyxsobV14vFg8uS2lt4foYpc,3582
|
|
13
|
-
chgksuite/composer/__init__.py,sha256=
|
|
14
|
-
chgksuite/composer/chgksuite_parser.py,sha256=
|
|
11
|
+
chgksuite/composer/__init__.py,sha256=MAOVZIYXmZmD6nNQSo9DueV6b5RgxF7_HGeLvsAhMJs,6490
|
|
12
|
+
chgksuite/composer/chgksuite_parser.py,sha256=XxopZLVY4DQxgxw2a6NT70ga_2XZHzuvn58RnlriiqE,8847
|
|
15
13
|
chgksuite/composer/composer_common.py,sha256=kc-_Tc9NjevfXGj4fXoa9fye9AO0EuMSnEPJnS0n-aQ,16281
|
|
16
|
-
chgksuite/composer/db.py,sha256=
|
|
17
|
-
chgksuite/composer/docx.py,sha256=
|
|
18
|
-
chgksuite/composer/latex.py,sha256=
|
|
19
|
-
chgksuite/composer/lj.py,sha256=
|
|
20
|
-
chgksuite/composer/
|
|
21
|
-
chgksuite/composer/openquiz.py,sha256=BF506tH6b1IoocC61l5xBU969l38S1IjqMi6mqhG_HI,6990
|
|
14
|
+
chgksuite/composer/db.py,sha256=71cINE_V8s6YidvqpmBmmlWbcXraUEGZA1xpVFAUENw,8173
|
|
15
|
+
chgksuite/composer/docx.py,sha256=W46glwffZFpIATrwe_100vl5ypobMwXADSfoaApyHl8,23716
|
|
16
|
+
chgksuite/composer/latex.py,sha256=_IKylzdDcokgXYvvxsVSiq-Ba5fVirWcfCp6eOyx6zQ,9242
|
|
17
|
+
chgksuite/composer/lj.py,sha256=nty3Zs3N1H0gNK378U04aAHo71_5cABhCM1Mm9jiUEA,15213
|
|
18
|
+
chgksuite/composer/openquiz.py,sha256=D9q7lcEgUGwR1UF6Qp3I-wBJucy9vMnrxoSOst4V45Q,7001
|
|
22
19
|
chgksuite/composer/pptx.py,sha256=9O0tfx2xpyx9Y4ceatVIXdiwvslnj8gliZIlxsDe5Ow,23971
|
|
20
|
+
chgksuite/composer/reddit.py,sha256=-Eg4CqMHhyGGfCteVwdQdtE1pfUXQ42XcP5OYUrBXmo,3878
|
|
23
21
|
chgksuite/composer/stats.py,sha256=GbraSrjaZ8Mc2URs5aGAsI4ekboAKzlJJOqsbe96ELA,3995
|
|
24
22
|
chgksuite/composer/telegram.py,sha256=KM9Bnkf2bxdJNMrhjCCw2xx-sWcIQioBc1FdSY4OX-g,47431
|
|
25
23
|
chgksuite/composer/telegram_bot.py,sha256=xT5D39m4zGmIbHV_ZfyQ9Rc8PAmG2V5FGUeDKpkgyTw,3767
|
|
@@ -27,8 +25,8 @@ chgksuite/handouter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
27
25
|
chgksuite/handouter/gen.py,sha256=CJfGl8R2vnElnsjBo0n4u-y8KD-l2qxrIuUHj5WZBuI,5241
|
|
28
26
|
chgksuite/handouter/installer.py,sha256=u4jQKeCn0VjOnaDFezx35g8oRjji5edvYGj5xSHCEW4,7574
|
|
29
27
|
chgksuite/handouter/pack.py,sha256=H-Ln1JqKK2u3jFI5wwsh7pQdJBpQJ-s8gV9iECQ3kgU,2504
|
|
30
|
-
chgksuite/handouter/runner.py,sha256=
|
|
31
|
-
chgksuite/handouter/tex_internals.py,sha256=
|
|
28
|
+
chgksuite/handouter/runner.py,sha256=QPqLKbfNDL0ucJ365lvKCwRcPKrhxRBJ-YFSbQLU9_8,8641
|
|
29
|
+
chgksuite/handouter/tex_internals.py,sha256=pxvMXkqlMfzBbAlTkFCJBlcT7t1XS0Z9XOzVboc2f2Y,966
|
|
32
30
|
chgksuite/handouter/utils.py,sha256=0RoECvHzfmwWnRL2jj4WKh22oTCPh2MXid_a9ShplDA,2243
|
|
33
31
|
chgksuite/resources/cheader.tex,sha256=Jfe3LESk0VIV0HCObbajSQpEMljaIDAIEGSs6YY9rTk,3454
|
|
34
32
|
chgksuite/resources/fix-unnumbered-sections.sty,sha256=FN6ZSWC6MvoRoThPm5AxCF98DdgcxbxyBYG6YImM05s,1409
|
|
@@ -56,8 +54,9 @@ chgksuite/resources/regexes_uz_cyr.json,sha256=D4AyaEPEY753I47Ky2Fwol_4kxQsl-Yu9
|
|
|
56
54
|
chgksuite/resources/template.docx,sha256=Do29TAsg3YbH0rRSaXhVzKEoh4pwXkklW_idWA34HVE,11189
|
|
57
55
|
chgksuite/resources/template.pptx,sha256=hEFWqE-yYpwZ8ejrMCJIPEyoMT3eDqaqtiEeQ7I4fyk,29777
|
|
58
56
|
chgksuite/resources/trello.json,sha256=M5Q9JR-AAJF1u16YtNAxDX-7c7VoVTXuq4POTqYvq8o,555
|
|
59
|
-
chgksuite-0.27.
|
|
60
|
-
chgksuite-0.27.
|
|
61
|
-
chgksuite-0.27.
|
|
62
|
-
chgksuite-0.27.
|
|
63
|
-
chgksuite-0.27.
|
|
57
|
+
chgksuite-0.27.0b3.dist-info/licenses/LICENSE,sha256=_a1yfntuPmctLsuiE_08xMSORuCfGS8X5hQph2U_PUw,1081
|
|
58
|
+
chgksuite-0.27.0b3.dist-info/METADATA,sha256=muUWnuAXvRsCjuwnkkKsES747wqhlLWTquzhmOm9ymk,1201
|
|
59
|
+
chgksuite-0.27.0b3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
60
|
+
chgksuite-0.27.0b3.dist-info/entry_points.txt,sha256=lqjX6ULQZGDt0rgouTXBuwEPiwKkDQkSiNsT877A_Jg,54
|
|
61
|
+
chgksuite-0.27.0b3.dist-info/top_level.txt,sha256=cSWiRBOGZW9nIO6Rv1IrEfwPgV2ZWs87QV9wPXeBGqM,10
|
|
62
|
+
chgksuite-0.27.0b3.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
chgksuite
|
chgksuite/_html2md.py
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
"""Simple HTML table to Markdown converter.
|
|
2
|
-
|
|
3
|
-
Replaces dashtable.html2md with a minimal implementation that avoids
|
|
4
|
-
deprecated BeautifulSoup methods.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from bs4 import BeautifulSoup
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def html2md(html_string: str) -> str:
|
|
11
|
-
"""Convert an HTML table to a Markdown table string.
|
|
12
|
-
|
|
13
|
-
Parameters
|
|
14
|
-
----------
|
|
15
|
-
html_string : str
|
|
16
|
-
HTML string containing a table
|
|
17
|
-
|
|
18
|
-
Returns
|
|
19
|
-
-------
|
|
20
|
-
str
|
|
21
|
-
The table formatted as Markdown
|
|
22
|
-
"""
|
|
23
|
-
soup = BeautifulSoup(html_string, "html.parser")
|
|
24
|
-
table = soup.find("table")
|
|
25
|
-
|
|
26
|
-
if not table:
|
|
27
|
-
return ""
|
|
28
|
-
|
|
29
|
-
rows = table.find_all("tr")
|
|
30
|
-
if not rows:
|
|
31
|
-
return ""
|
|
32
|
-
|
|
33
|
-
# Extract all rows as lists of cell texts
|
|
34
|
-
data = []
|
|
35
|
-
for row in rows:
|
|
36
|
-
# Check for header cells first, then data cells
|
|
37
|
-
cells = row.find_all("th")
|
|
38
|
-
if not cells:
|
|
39
|
-
cells = row.find_all("td")
|
|
40
|
-
|
|
41
|
-
row_data = []
|
|
42
|
-
for cell in cells:
|
|
43
|
-
# Get text, normalize whitespace
|
|
44
|
-
text = " ".join(cell.get_text().split())
|
|
45
|
-
row_data.append(text)
|
|
46
|
-
if row_data:
|
|
47
|
-
data.append(row_data)
|
|
48
|
-
|
|
49
|
-
if not data:
|
|
50
|
-
return ""
|
|
51
|
-
|
|
52
|
-
# Normalize row lengths (pad shorter rows)
|
|
53
|
-
max_cols = max(len(row) for row in data)
|
|
54
|
-
for row in data:
|
|
55
|
-
while len(row) < max_cols:
|
|
56
|
-
row.append("")
|
|
57
|
-
|
|
58
|
-
# Calculate column widths (minimum 3 for markdown separator)
|
|
59
|
-
# Add 2 for space cushions on each side
|
|
60
|
-
widths = []
|
|
61
|
-
for col in range(max_cols):
|
|
62
|
-
width = max(len(row[col]) for row in data)
|
|
63
|
-
widths.append(max(width + 2, 3))
|
|
64
|
-
|
|
65
|
-
# Build markdown table
|
|
66
|
-
lines = []
|
|
67
|
-
|
|
68
|
-
# Header row (centered, with padding)
|
|
69
|
-
header = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(data[0])) + "|"
|
|
70
|
-
lines.append(header)
|
|
71
|
-
|
|
72
|
-
# Separator row (no spaces to avoid typotools converting --- to em-dash)
|
|
73
|
-
separator = "|" + "|".join("-" * w for w in widths) + "|"
|
|
74
|
-
lines.append(separator)
|
|
75
|
-
|
|
76
|
-
# Data rows (centered, with padding)
|
|
77
|
-
for row in data[1:]:
|
|
78
|
-
line = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(row)) + "|"
|
|
79
|
-
lines.append(line)
|
|
80
|
-
|
|
81
|
-
return "\n".join(lines)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _center(text: str, width: int) -> str:
|
|
85
|
-
"""Center text within width, with space padding."""
|
|
86
|
-
text = text.strip()
|
|
87
|
-
padding = width - len(text)
|
|
88
|
-
left = padding // 2
|
|
89
|
-
right = padding - left
|
|
90
|
-
return " " * left + text + " " * right
|
chgksuite/lastdir
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
/Users/pecheny/chgksuite1/tmpz_2mf3o8/tmptke0lqfv
|
|
File without changes
|
|
File without changes
|