chgksuite 0.26.1__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chgksuite/_html2md.py +90 -0
- chgksuite/cli.py +21 -8
- chgksuite/common.py +16 -12
- chgksuite/composer/__init__.py +9 -7
- chgksuite/composer/chgksuite_parser.py +16 -7
- chgksuite/composer/composer_common.py +14 -5
- chgksuite/composer/db.py +1 -2
- chgksuite/composer/docx.py +29 -8
- chgksuite/composer/latex.py +1 -2
- chgksuite/composer/lj.py +1 -2
- chgksuite/composer/{reddit.py → markdown.py} +35 -25
- chgksuite/composer/openquiz.py +2 -3
- chgksuite/composer/pptx.py +2 -2
- chgksuite/composer/telegram.py +2 -1
- chgksuite/handouter/gen.py +11 -7
- chgksuite/handouter/installer.py +0 -0
- chgksuite/handouter/runner.py +234 -10
- chgksuite/handouter/tex_internals.py +12 -13
- chgksuite/lastdir +1 -0
- chgksuite/parser.py +32 -33
- chgksuite/parser_db.py +4 -6
- chgksuite/resources/labels_az.toml +22 -0
- chgksuite/resources/labels_by.toml +1 -2
- chgksuite/resources/labels_by_tar.toml +1 -2
- chgksuite/resources/labels_en.toml +1 -2
- chgksuite/resources/labels_kz_cyr.toml +1 -2
- chgksuite/resources/labels_ru.toml +1 -2
- chgksuite/resources/labels_sr.toml +1 -2
- chgksuite/resources/labels_ua.toml +1 -2
- chgksuite/resources/labels_uz.toml +0 -3
- chgksuite/resources/labels_uz_cyr.toml +1 -2
- chgksuite/resources/regexes_az.json +17 -0
- chgksuite/resources/regexes_by.json +3 -2
- chgksuite/resources/regexes_by_tar.json +17 -0
- chgksuite/resources/regexes_en.json +3 -2
- chgksuite/resources/regexes_kz_cyr.json +3 -2
- chgksuite/resources/regexes_ru.json +3 -2
- chgksuite/resources/regexes_sr.json +3 -2
- chgksuite/resources/regexes_ua.json +3 -2
- chgksuite/resources/regexes_uz.json +16 -0
- chgksuite/resources/regexes_uz_cyr.json +3 -2
- chgksuite/trello.py +8 -9
- chgksuite/typotools.py +9 -8
- chgksuite/version.py +1 -1
- {chgksuite-0.26.1.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
- chgksuite-0.27.0.dist-info/RECORD +63 -0
- {chgksuite-0.26.1.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
- chgksuite/composer/telegram_parser.py +0 -230
- chgksuite-0.26.1.dist-info/RECORD +0 -59
- chgksuite-0.26.1.dist-info/top_level.txt +0 -1
- {chgksuite-0.26.1.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
- {chgksuite-0.26.1.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0
chgksuite/_html2md.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Simple HTML table to Markdown converter.
|
|
2
|
+
|
|
3
|
+
Replaces dashtable.html2md with a minimal implementation that avoids
|
|
4
|
+
deprecated BeautifulSoup methods.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def html2md(html_string: str) -> str:
|
|
11
|
+
"""Convert an HTML table to a Markdown table string.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
html_string : str
|
|
16
|
+
HTML string containing a table
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
str
|
|
21
|
+
The table formatted as Markdown
|
|
22
|
+
"""
|
|
23
|
+
soup = BeautifulSoup(html_string, "html.parser")
|
|
24
|
+
table = soup.find("table")
|
|
25
|
+
|
|
26
|
+
if not table:
|
|
27
|
+
return ""
|
|
28
|
+
|
|
29
|
+
rows = table.find_all("tr")
|
|
30
|
+
if not rows:
|
|
31
|
+
return ""
|
|
32
|
+
|
|
33
|
+
# Extract all rows as lists of cell texts
|
|
34
|
+
data = []
|
|
35
|
+
for row in rows:
|
|
36
|
+
# Check for header cells first, then data cells
|
|
37
|
+
cells = row.find_all("th")
|
|
38
|
+
if not cells:
|
|
39
|
+
cells = row.find_all("td")
|
|
40
|
+
|
|
41
|
+
row_data = []
|
|
42
|
+
for cell in cells:
|
|
43
|
+
# Get text, normalize whitespace
|
|
44
|
+
text = " ".join(cell.get_text().split())
|
|
45
|
+
row_data.append(text)
|
|
46
|
+
if row_data:
|
|
47
|
+
data.append(row_data)
|
|
48
|
+
|
|
49
|
+
if not data:
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
# Normalize row lengths (pad shorter rows)
|
|
53
|
+
max_cols = max(len(row) for row in data)
|
|
54
|
+
for row in data:
|
|
55
|
+
while len(row) < max_cols:
|
|
56
|
+
row.append("")
|
|
57
|
+
|
|
58
|
+
# Calculate column widths (minimum 3 for markdown separator)
|
|
59
|
+
# Add 2 for space cushions on each side
|
|
60
|
+
widths = []
|
|
61
|
+
for col in range(max_cols):
|
|
62
|
+
width = max(len(row[col]) for row in data)
|
|
63
|
+
widths.append(max(width + 2, 3))
|
|
64
|
+
|
|
65
|
+
# Build markdown table
|
|
66
|
+
lines = []
|
|
67
|
+
|
|
68
|
+
# Header row (centered, with padding)
|
|
69
|
+
header = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(data[0])) + "|"
|
|
70
|
+
lines.append(header)
|
|
71
|
+
|
|
72
|
+
# Separator row (no spaces to avoid typotools converting --- to em-dash)
|
|
73
|
+
separator = "|" + "|".join("-" * w for w in widths) + "|"
|
|
74
|
+
lines.append(separator)
|
|
75
|
+
|
|
76
|
+
# Data rows (centered, with padding)
|
|
77
|
+
for row in data[1:]:
|
|
78
|
+
line = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(row)) + "|"
|
|
79
|
+
lines.append(line)
|
|
80
|
+
|
|
81
|
+
return "\n".join(lines)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _center(text: str, width: int) -> str:
|
|
85
|
+
"""Center text within width, with space padding."""
|
|
86
|
+
text = text.strip()
|
|
87
|
+
padding = width - len(text)
|
|
88
|
+
left = padding // 2
|
|
89
|
+
right = padding - left
|
|
90
|
+
return " " * left + text + " " * right
|
chgksuite/cli.py
CHANGED
|
@@ -13,12 +13,14 @@ from chgksuite.common import (
|
|
|
13
13
|
)
|
|
14
14
|
from chgksuite.composer import gui_compose
|
|
15
15
|
from chgksuite.handouter.runner import gui_handouter
|
|
16
|
-
from chgksuite.handouter.tex_internals import GREYTEXT_LANGS
|
|
17
16
|
from chgksuite.parser import gui_parse
|
|
18
17
|
from chgksuite.trello import gui_trello
|
|
19
18
|
from chgksuite.version import __version__
|
|
20
19
|
|
|
21
|
-
LANGS = ["by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + [
|
|
20
|
+
LANGS = ["az", "by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + [
|
|
21
|
+
"custom"
|
|
22
|
+
]
|
|
23
|
+
HANDOUT_LANGS = [lang for lang in LANGS if lang != "custom"]
|
|
22
24
|
|
|
23
25
|
debug = False
|
|
24
26
|
|
|
@@ -594,6 +596,15 @@ class ArgparseBuilder:
|
|
|
594
596
|
caption="Имя 4s-файла",
|
|
595
597
|
filetypes=[("chgksuite markup files", "*.4s")],
|
|
596
598
|
)
|
|
599
|
+
cmdcompose_markdown = cmdcompose_filetype.add_parser("markdown")
|
|
600
|
+
self.add_argument(
|
|
601
|
+
cmdcompose_markdown,
|
|
602
|
+
"filename",
|
|
603
|
+
nargs="*",
|
|
604
|
+
help="file(s) to compose from.",
|
|
605
|
+
caption="Имя 4s-файла",
|
|
606
|
+
filetypes=[("chgksuite markup files", "*.4s")],
|
|
607
|
+
)
|
|
597
608
|
cmdcompose_pptx = cmdcompose_filetype.add_parser("pptx")
|
|
598
609
|
self.add_argument(
|
|
599
610
|
cmdcompose_pptx,
|
|
@@ -889,10 +900,11 @@ class ArgparseBuilder:
|
|
|
889
900
|
)
|
|
890
901
|
self.add_argument(
|
|
891
902
|
cmdhandouts_run,
|
|
892
|
-
"--
|
|
903
|
+
"--language",
|
|
904
|
+
"-lang",
|
|
893
905
|
default="ru",
|
|
894
906
|
argtype="radiobutton",
|
|
895
|
-
choices=sorted(
|
|
907
|
+
choices=sorted(HANDOUT_LANGS),
|
|
896
908
|
help="language",
|
|
897
909
|
caption="Язык",
|
|
898
910
|
advanced=True,
|
|
@@ -1012,12 +1024,13 @@ class ArgparseBuilder:
|
|
|
1012
1024
|
)
|
|
1013
1025
|
self.add_argument(
|
|
1014
1026
|
cmdhandouts_generate,
|
|
1015
|
-
"--
|
|
1027
|
+
"--language",
|
|
1028
|
+
"-lang",
|
|
1016
1029
|
default="ru",
|
|
1017
1030
|
help="language",
|
|
1018
1031
|
caption="Язык",
|
|
1019
1032
|
argtype="radiobutton",
|
|
1020
|
-
choices=sorted(
|
|
1033
|
+
choices=sorted(HANDOUT_LANGS),
|
|
1021
1034
|
advanced=True,
|
|
1022
1035
|
)
|
|
1023
1036
|
self.add_argument(
|
|
@@ -1084,9 +1097,9 @@ def single_action(args, use_wrapper, resourcedir):
|
|
|
1084
1097
|
args.console_mode = True
|
|
1085
1098
|
|
|
1086
1099
|
if args.language in LANGS:
|
|
1100
|
+
args.regexes_file = os.path.join(resourcedir, f"regexes_{args.language}.json")
|
|
1087
1101
|
if args.action == "parse":
|
|
1088
|
-
|
|
1089
|
-
args.regexes = os.path.join(resourcedir, f"regexes_{regex_lang}.json")
|
|
1102
|
+
args.regexes = args.regexes_file
|
|
1090
1103
|
args.labels_file = os.path.join(resourcedir, f"labels_{args.language}.toml")
|
|
1091
1104
|
if not args.docx_template:
|
|
1092
1105
|
args.docx_template = os.path.join(resourcedir, "template.docx")
|
chgksuite/common.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
import argparse
|
|
4
|
-
import codecs
|
|
5
4
|
import csv
|
|
6
5
|
import itertools
|
|
7
6
|
import json
|
|
@@ -29,16 +28,28 @@ QUESTION_LABELS = [
|
|
|
29
28
|
"setcounter",
|
|
30
29
|
]
|
|
31
30
|
SEP = os.linesep
|
|
32
|
-
|
|
31
|
+
try:
|
|
32
|
+
ENC = sys.stdout.encoding or "utf8"
|
|
33
|
+
except AttributeError:
|
|
34
|
+
ENC = "utf8"
|
|
33
35
|
|
|
34
36
|
lastdir = os.path.join(os.path.dirname(os.path.abspath("__file__")), "lastdir")
|
|
35
37
|
|
|
36
38
|
|
|
39
|
+
def get_chgksuite_dir():
|
|
40
|
+
chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
|
|
41
|
+
if not os.path.isdir(chgksuite_dir):
|
|
42
|
+
os.mkdir(chgksuite_dir)
|
|
43
|
+
return chgksuite_dir
|
|
44
|
+
|
|
45
|
+
|
|
37
46
|
def init_logger(logger_name, debug=False):
|
|
38
47
|
logger = logging.getLogger(logger_name)
|
|
39
48
|
if not logger.handlers:
|
|
40
49
|
logger.setLevel(logging.DEBUG)
|
|
41
|
-
|
|
50
|
+
log_dir = get_chgksuite_dir()
|
|
51
|
+
log_path = os.path.join(log_dir, f"{logger_name}.log")
|
|
52
|
+
fh = logging.FileHandler(log_path, encoding="utf8")
|
|
42
53
|
fh.setLevel(logging.DEBUG)
|
|
43
54
|
ch = logging.StreamHandler()
|
|
44
55
|
if debug:
|
|
@@ -53,13 +64,6 @@ def init_logger(logger_name, debug=False):
|
|
|
53
64
|
return logger
|
|
54
65
|
|
|
55
66
|
|
|
56
|
-
def get_chgksuite_dir():
|
|
57
|
-
chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
|
|
58
|
-
if not os.path.isdir(chgksuite_dir):
|
|
59
|
-
os.mkdir(chgksuite_dir)
|
|
60
|
-
return chgksuite_dir
|
|
61
|
-
|
|
62
|
-
|
|
63
67
|
def load_settings():
|
|
64
68
|
chgksuite_dir = get_chgksuite_dir()
|
|
65
69
|
settings_file = os.path.join(chgksuite_dir, "settings.toml")
|
|
@@ -109,7 +113,7 @@ class DefaultArgs:
|
|
|
109
113
|
def set_lastdir(path):
|
|
110
114
|
chgksuite_dir = get_chgksuite_dir()
|
|
111
115
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
112
|
-
with
|
|
116
|
+
with open(lastdir, "w", encoding="utf-8") as f:
|
|
113
117
|
f.write(path)
|
|
114
118
|
|
|
115
119
|
|
|
@@ -117,7 +121,7 @@ def get_lastdir():
|
|
|
117
121
|
chgksuite_dir = get_chgksuite_dir()
|
|
118
122
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
119
123
|
if os.path.isfile(lastdir):
|
|
120
|
-
with
|
|
124
|
+
with open(lastdir, "r", encoding="utf-8") as f:
|
|
121
125
|
return f.read().rstrip()
|
|
122
126
|
return "."
|
|
123
127
|
|
chgksuite/composer/__init__.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#!usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
import codecs
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
5
|
import shutil
|
|
@@ -22,7 +21,7 @@ from chgksuite.composer.docx import DocxExporter
|
|
|
22
21
|
from chgksuite.composer.latex import LatexExporter
|
|
23
22
|
from chgksuite.composer.lj import LjExporter
|
|
24
23
|
from chgksuite.composer.pptx import PptxExporter
|
|
25
|
-
from chgksuite.composer.
|
|
24
|
+
from chgksuite.composer.markdown import MarkdownExporter
|
|
26
25
|
from chgksuite.composer.stats import StatsAdder
|
|
27
26
|
from chgksuite.composer.telegram import TelegramExporter
|
|
28
27
|
from chgksuite.composer.openquiz import OpenquizExporter
|
|
@@ -75,10 +74,13 @@ def process_file_wrapper(filename, sourcedir, targetdir, args):
|
|
|
75
74
|
|
|
76
75
|
def parse_filepath(filepath, args=None):
|
|
77
76
|
args = args or DefaultArgs()
|
|
78
|
-
with
|
|
77
|
+
with open(filepath, "r", encoding="utf-8") as input_file:
|
|
79
78
|
input_text = input_file.read()
|
|
80
79
|
input_text = input_text.replace("\r", "")
|
|
81
|
-
|
|
80
|
+
debug_dir = os.path.dirname(os.path.abspath(filepath))
|
|
81
|
+
return parse_4s(
|
|
82
|
+
input_text, randomize=args.randomize, debug=args.debug, debug_dir=debug_dir
|
|
83
|
+
)
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
def make_merged_filename(filelist):
|
|
@@ -105,7 +107,7 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
105
107
|
targetdir,
|
|
106
108
|
make_filename(os.path.basename(filename), "dbg", args),
|
|
107
109
|
)
|
|
108
|
-
with
|
|
110
|
+
with open(debug_fn, "w", encoding="utf-8") as output_file:
|
|
109
111
|
output_file.write(json.dumps(structure, indent=2, ensure_ascii=False))
|
|
110
112
|
|
|
111
113
|
if not args.filetype:
|
|
@@ -146,8 +148,8 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
146
148
|
outfilename = os.path.join(targetdir, make_filename(filename, "txt", args))
|
|
147
149
|
exporter.export(outfilename)
|
|
148
150
|
|
|
149
|
-
if args.filetype
|
|
150
|
-
exporter =
|
|
151
|
+
if args.filetype in ("redditmd", "markdown"):
|
|
152
|
+
exporter = MarkdownExporter(structure, args, dir_kwargs)
|
|
151
153
|
outfilename = os.path.join(targetdir, make_filename(filename, "md", args))
|
|
152
154
|
exporter.export(outfilename)
|
|
153
155
|
|
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
2
|
import random
|
|
3
3
|
import re
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
|
-
from chgksuite.common import
|
|
6
|
+
from chgksuite.common import (
|
|
7
|
+
QUESTION_LABELS,
|
|
8
|
+
check_question,
|
|
9
|
+
get_chgksuite_dir,
|
|
10
|
+
init_logger,
|
|
11
|
+
log_wrap,
|
|
12
|
+
)
|
|
7
13
|
from chgksuite.typotools import remove_excessive_whitespace as rew
|
|
8
14
|
|
|
9
15
|
REQUIRED_LABELS = set(["question", "answer"])
|
|
@@ -89,7 +95,7 @@ def replace_counters(string_):
|
|
|
89
95
|
return string_
|
|
90
96
|
|
|
91
97
|
|
|
92
|
-
def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
98
|
+
def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
|
|
93
99
|
logger = logger or init_logger("composer")
|
|
94
100
|
mapping = {
|
|
95
101
|
"#": "meta",
|
|
@@ -115,8 +121,11 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
115
121
|
if s[0] == "\ufeff" and len(s) > 1:
|
|
116
122
|
s = s[1:]
|
|
117
123
|
|
|
118
|
-
|
|
119
|
-
|
|
124
|
+
if debug:
|
|
125
|
+
debug_dir = debug_dir or get_chgksuite_dir()
|
|
126
|
+
debug_path = os.path.join(debug_dir, "raw.debug")
|
|
127
|
+
with open(debug_path, "w", encoding="utf-8") as debugf:
|
|
128
|
+
debugf.write(log_wrap(s.split("\n")))
|
|
120
129
|
|
|
121
130
|
s = replace_counters(s)
|
|
122
131
|
|
|
@@ -137,7 +146,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
137
146
|
counter = 1
|
|
138
147
|
|
|
139
148
|
if debug:
|
|
140
|
-
with
|
|
149
|
+
with open("debug1st.debug", "w", encoding="utf-8") as debugf:
|
|
141
150
|
debugf.write(log_wrap(structure))
|
|
142
151
|
|
|
143
152
|
for element in structure:
|
|
@@ -230,7 +239,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
230
239
|
i += 1
|
|
231
240
|
|
|
232
241
|
if debug:
|
|
233
|
-
with
|
|
242
|
+
with open("debug.debug", "w", encoding="utf-8") as debugf:
|
|
234
243
|
debugf.write(log_wrap(final_structure))
|
|
235
244
|
|
|
236
245
|
for element in final_structure:
|
|
@@ -59,8 +59,8 @@ def backtick_replace(el):
|
|
|
59
59
|
return el
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
def remove_accents_standalone(s,
|
|
63
|
-
hs =
|
|
62
|
+
def remove_accents_standalone(s, regexes):
|
|
63
|
+
hs = regexes["handout_short"]
|
|
64
64
|
re_hs = re.compile(f"\\[{hs}(.+?)\\]", flags=re.DOTALL)
|
|
65
65
|
replacements = {}
|
|
66
66
|
n_handouts = 0
|
|
@@ -413,6 +413,8 @@ class BaseExporter:
|
|
|
413
413
|
self.dir_kwargs = args[2]
|
|
414
414
|
with open(self.args.labels_file, encoding="utf8") as f:
|
|
415
415
|
self.labels = toml.load(f)
|
|
416
|
+
with open(self.args.regexes_file, encoding="utf8") as f:
|
|
417
|
+
self.regexes = json.load(f)
|
|
416
418
|
logger = kwargs.get("logger")
|
|
417
419
|
if logger:
|
|
418
420
|
self.logger = logger
|
|
@@ -451,10 +453,11 @@ class BaseExporter:
|
|
|
451
453
|
return self.labels["question_labels"][field]
|
|
452
454
|
|
|
453
455
|
def remove_square_brackets(self, s):
|
|
454
|
-
hs = self.
|
|
456
|
+
hs = self.regexes["handout_short"]
|
|
455
457
|
s = s.replace("\\[", "LEFTSQUAREBRACKET")
|
|
456
458
|
s = s.replace("\\]", "RIGHTSQUAREBRACKET")
|
|
457
|
-
|
|
459
|
+
# Use placeholder to preserve handout brackets during removal
|
|
460
|
+
s = re.sub(f"\\[{hs}(.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
|
|
458
461
|
i = 0
|
|
459
462
|
while "[" in s and "]" in s and i < 10:
|
|
460
463
|
s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
|
|
@@ -464,7 +467,13 @@ class BaseExporter:
|
|
|
464
467
|
sys.stderr.write(
|
|
465
468
|
f"Error replacing square brackets on question: {s}, retries exceeded\n"
|
|
466
469
|
)
|
|
467
|
-
|
|
470
|
+
# Restore handout brackets - get the original matched text from the placeholder
|
|
471
|
+
s = re.sub(
|
|
472
|
+
r"\{HANDOUT_PLACEHOLDER(.+?)\}",
|
|
473
|
+
lambda m: "[" + m.group(1) + "]",
|
|
474
|
+
s,
|
|
475
|
+
flags=re.DOTALL,
|
|
476
|
+
)
|
|
468
477
|
s = s.replace("LEFTSQUAREBRACKET", "[")
|
|
469
478
|
s = s.replace("RIGHTSQUAREBRACKET", "]")
|
|
470
479
|
return s
|
chgksuite/composer/db.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import codecs
|
|
2
1
|
import datetime
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
@@ -211,7 +210,7 @@ class DbExporter(BaseExporter):
|
|
|
211
210
|
if res:
|
|
212
211
|
result.append(res)
|
|
213
212
|
text = "".join(result)
|
|
214
|
-
with
|
|
213
|
+
with open(outfilename, "w", encoding="utf-8") as f:
|
|
215
214
|
f.write(text)
|
|
216
215
|
self.logger.info("Output: {}".format(outfilename))
|
|
217
216
|
if self.args.clipboard:
|
chgksuite/composer/docx.py
CHANGED
|
@@ -96,12 +96,13 @@ def get_label_standalone(
|
|
|
96
96
|
return labels["question_labels"][field]
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
def remove_square_brackets_standalone(s,
|
|
99
|
+
def remove_square_brackets_standalone(s, regexes):
|
|
100
100
|
"""Standalone version of remove_square_brackets"""
|
|
101
|
-
hs =
|
|
101
|
+
hs = regexes["handout_short"]
|
|
102
102
|
s = s.replace("\\[", "LEFTSQUAREBRACKET")
|
|
103
103
|
s = s.replace("\\]", "RIGHTSQUAREBRACKET")
|
|
104
|
-
|
|
104
|
+
# Use placeholder to preserve handout brackets during removal
|
|
105
|
+
s = re.sub(f"\\[({hs}.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
|
|
105
106
|
i = 0
|
|
106
107
|
while "[" in s and "]" in s and i < 10:
|
|
107
108
|
s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
|
|
@@ -111,7 +112,13 @@ def remove_square_brackets_standalone(s, labels):
|
|
|
111
112
|
sys.stderr.write(
|
|
112
113
|
f"Error replacing square brackets on question: {s}, retries exceeded\n"
|
|
113
114
|
)
|
|
114
|
-
|
|
115
|
+
# Restore handout brackets - get the original matched text from the placeholder
|
|
116
|
+
s = re.sub(
|
|
117
|
+
r"\{HANDOUT_PLACEHOLDER(.+?)\}",
|
|
118
|
+
lambda m: "[" + m.group(1) + "]",
|
|
119
|
+
s,
|
|
120
|
+
flags=re.DOTALL,
|
|
121
|
+
)
|
|
115
122
|
s = s.replace("LEFTSQUAREBRACKET", "[")
|
|
116
123
|
s = s.replace("RIGHTSQUAREBRACKET", "]")
|
|
117
124
|
return s
|
|
@@ -140,6 +147,7 @@ def format_docx_element(
|
|
|
140
147
|
spoilers="none",
|
|
141
148
|
logger=None,
|
|
142
149
|
labels=None,
|
|
150
|
+
regexes=None,
|
|
143
151
|
language="ru",
|
|
144
152
|
remove_accents=False,
|
|
145
153
|
remove_brackets=False,
|
|
@@ -157,6 +165,7 @@ def format_docx_element(
|
|
|
157
165
|
spoilers: Spoiler handling mode ("none", "whiten", "dots", "pagebreak")
|
|
158
166
|
logger: Logger instance
|
|
159
167
|
labels: Labels dictionary
|
|
168
|
+
regexes: Regexes dictionary (for handout_short)
|
|
160
169
|
language: Language code
|
|
161
170
|
remove_accents: Whether to remove accents
|
|
162
171
|
remove_brackets: Whether to remove square brackets
|
|
@@ -176,6 +185,7 @@ def format_docx_element(
|
|
|
176
185
|
spoilers,
|
|
177
186
|
logger,
|
|
178
187
|
labels,
|
|
188
|
+
regexes,
|
|
179
189
|
language,
|
|
180
190
|
remove_accents,
|
|
181
191
|
remove_brackets,
|
|
@@ -194,6 +204,7 @@ def format_docx_element(
|
|
|
194
204
|
spoilers,
|
|
195
205
|
logger,
|
|
196
206
|
labels,
|
|
207
|
+
regexes,
|
|
197
208
|
language,
|
|
198
209
|
remove_accents,
|
|
199
210
|
remove_brackets,
|
|
@@ -213,6 +224,7 @@ def format_docx_element(
|
|
|
213
224
|
spoilers,
|
|
214
225
|
logger,
|
|
215
226
|
labels,
|
|
227
|
+
regexes,
|
|
216
228
|
language,
|
|
217
229
|
remove_accents,
|
|
218
230
|
remove_brackets,
|
|
@@ -223,10 +235,10 @@ def format_docx_element(
|
|
|
223
235
|
if isinstance(el, str):
|
|
224
236
|
logger.debug("parsing element {}:".format(log_wrap(el)))
|
|
225
237
|
|
|
226
|
-
if remove_accents and
|
|
227
|
-
el = remove_accents_standalone(el,
|
|
228
|
-
if remove_brackets and
|
|
229
|
-
el = remove_square_brackets_standalone(el,
|
|
238
|
+
if remove_accents and regexes:
|
|
239
|
+
el = remove_accents_standalone(el, regexes)
|
|
240
|
+
if remove_brackets and regexes:
|
|
241
|
+
el = remove_square_brackets_standalone(el, regexes)
|
|
230
242
|
else:
|
|
231
243
|
el = replace_escaped(el)
|
|
232
244
|
|
|
@@ -309,6 +321,7 @@ def add_question_to_docx(
|
|
|
309
321
|
doc,
|
|
310
322
|
question_data,
|
|
311
323
|
labels,
|
|
324
|
+
regexes=None,
|
|
312
325
|
qcount=None,
|
|
313
326
|
skip_qcount=False,
|
|
314
327
|
screen_mode=False,
|
|
@@ -329,6 +342,7 @@ def add_question_to_docx(
|
|
|
329
342
|
doc: docx Document object
|
|
330
343
|
question_data: Dictionary containing question data
|
|
331
344
|
labels: Labels dictionary
|
|
345
|
+
regexes: Regexes dictionary (for handout_short)
|
|
332
346
|
qcount: Current question count (will be incremented if not skip_qcount)
|
|
333
347
|
skip_qcount: Whether to skip incrementing question count
|
|
334
348
|
screen_mode: Whether to use screen mode formatting
|
|
@@ -392,6 +406,7 @@ def add_question_to_docx(
|
|
|
392
406
|
spoilers,
|
|
393
407
|
logger,
|
|
394
408
|
labels,
|
|
409
|
+
regexes,
|
|
395
410
|
language,
|
|
396
411
|
remove_accents=screen_mode,
|
|
397
412
|
remove_brackets=screen_mode,
|
|
@@ -411,6 +426,7 @@ def add_question_to_docx(
|
|
|
411
426
|
spoilers,
|
|
412
427
|
logger,
|
|
413
428
|
labels,
|
|
429
|
+
regexes,
|
|
414
430
|
language,
|
|
415
431
|
remove_accents=screen_mode,
|
|
416
432
|
remove_brackets=screen_mode,
|
|
@@ -453,6 +469,7 @@ def add_question_to_docx(
|
|
|
453
469
|
spoilers,
|
|
454
470
|
logger,
|
|
455
471
|
labels,
|
|
472
|
+
regexes,
|
|
456
473
|
language,
|
|
457
474
|
remove_accents=screen_mode,
|
|
458
475
|
replace_no_break_spaces=True,
|
|
@@ -481,6 +498,7 @@ def add_question_to_docx(
|
|
|
481
498
|
spoilers,
|
|
482
499
|
logger,
|
|
483
500
|
labels,
|
|
501
|
+
regexes,
|
|
484
502
|
language,
|
|
485
503
|
remove_accents=screen_mode,
|
|
486
504
|
remove_brackets=screen_mode,
|
|
@@ -514,6 +532,7 @@ class DocxExporter(BaseExporter):
|
|
|
514
532
|
spoilers=self.args.spoilers,
|
|
515
533
|
logger=self.logger,
|
|
516
534
|
labels=self.labels,
|
|
535
|
+
regexes=self.regexes,
|
|
517
536
|
language=self.args.language,
|
|
518
537
|
**kwargs,
|
|
519
538
|
)
|
|
@@ -528,6 +547,7 @@ class DocxExporter(BaseExporter):
|
|
|
528
547
|
spoilers=self.args.spoilers,
|
|
529
548
|
logger=self.logger,
|
|
530
549
|
labels=self.labels,
|
|
550
|
+
regexes=self.regexes,
|
|
531
551
|
language=self.args.language,
|
|
532
552
|
**kwargs,
|
|
533
553
|
)
|
|
@@ -542,6 +562,7 @@ class DocxExporter(BaseExporter):
|
|
|
542
562
|
self.doc,
|
|
543
563
|
element[1],
|
|
544
564
|
self.labels,
|
|
565
|
+
self.regexes,
|
|
545
566
|
self.qcount,
|
|
546
567
|
skip_qcount,
|
|
547
568
|
screen_mode,
|
chgksuite/composer/latex.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import codecs
|
|
2
1
|
import hashlib
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
@@ -221,7 +220,7 @@ class LatexExporter(BaseExporter):
|
|
|
221
220
|
|
|
222
221
|
tex += "\\end{document}"
|
|
223
222
|
|
|
224
|
-
with
|
|
223
|
+
with open(outfilename, "w", encoding="utf-8") as outfile:
|
|
225
224
|
outfile.write(tex)
|
|
226
225
|
cwd = os.getcwd()
|
|
227
226
|
os.chdir(self.dir_kwargs["tmp_dir"])
|
chgksuite/composer/lj.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import codecs
|
|
2
1
|
import datetime
|
|
3
2
|
import os
|
|
4
3
|
import random
|
|
@@ -240,7 +239,7 @@ class LjExporter(BaseExporter):
|
|
|
240
239
|
"general_impressions_text"
|
|
241
240
|
]
|
|
242
241
|
if self.args.debug:
|
|
243
|
-
with
|
|
242
|
+
with open("lj.debug", "w", encoding="utf-8") as f:
|
|
244
243
|
f.write(log_wrap(final_structure))
|
|
245
244
|
return final_structure
|
|
246
245
|
|