chgksuite 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chgksuite/_html2md.py +90 -0
- chgksuite/cli.py +38 -8
- chgksuite/common.py +16 -12
- chgksuite/composer/__init__.py +9 -7
- chgksuite/composer/chgksuite_parser.py +20 -9
- chgksuite/composer/composer_common.py +30 -3
- chgksuite/composer/db.py +1 -2
- chgksuite/composer/docx.py +542 -292
- chgksuite/composer/latex.py +3 -4
- chgksuite/composer/lj.py +1 -2
- chgksuite/composer/{reddit.py → markdown.py} +35 -25
- chgksuite/composer/openquiz.py +2 -3
- chgksuite/composer/pptx.py +18 -6
- chgksuite/composer/telegram.py +22 -10
- chgksuite/handouter/gen.py +11 -7
- chgksuite/handouter/installer.py +0 -0
- chgksuite/handouter/runner.py +237 -10
- chgksuite/handouter/tex_internals.py +12 -13
- chgksuite/handouter/utils.py +22 -1
- chgksuite/lastdir +1 -0
- chgksuite/parser.py +218 -37
- chgksuite/parser_db.py +4 -6
- chgksuite/resources/labels_az.toml +22 -0
- chgksuite/resources/labels_by.toml +1 -2
- chgksuite/resources/labels_by_tar.toml +1 -2
- chgksuite/resources/labels_en.toml +1 -2
- chgksuite/resources/labels_kz_cyr.toml +1 -2
- chgksuite/resources/labels_ru.toml +1 -2
- chgksuite/resources/labels_sr.toml +1 -2
- chgksuite/resources/labels_ua.toml +1 -2
- chgksuite/resources/labels_uz.toml +0 -3
- chgksuite/resources/labels_uz_cyr.toml +1 -2
- chgksuite/resources/regexes_az.json +17 -0
- chgksuite/resources/regexes_by.json +3 -2
- chgksuite/resources/regexes_by_tar.json +17 -0
- chgksuite/resources/regexes_en.json +3 -2
- chgksuite/resources/regexes_kz_cyr.json +3 -2
- chgksuite/resources/regexes_ru.json +3 -2
- chgksuite/resources/regexes_sr.json +3 -2
- chgksuite/resources/regexes_ua.json +3 -2
- chgksuite/resources/regexes_uz.json +16 -0
- chgksuite/resources/regexes_uz_cyr.json +3 -2
- chgksuite/trello.py +8 -9
- chgksuite/typotools.py +9 -8
- chgksuite/version.py +1 -1
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
- chgksuite-0.27.0.dist-info/RECORD +63 -0
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
- chgksuite/composer/telegram_parser.py +0 -230
- chgksuite-0.26.0b11.dist-info/RECORD +0 -59
- chgksuite-0.26.0b11.dist-info/top_level.txt +0 -1
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
- {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0
chgksuite/_html2md.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Simple HTML table to Markdown converter.
|
|
2
|
+
|
|
3
|
+
Replaces dashtable.html2md with a minimal implementation that avoids
|
|
4
|
+
deprecated BeautifulSoup methods.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def html2md(html_string: str) -> str:
|
|
11
|
+
"""Convert an HTML table to a Markdown table string.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
html_string : str
|
|
16
|
+
HTML string containing a table
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
str
|
|
21
|
+
The table formatted as Markdown
|
|
22
|
+
"""
|
|
23
|
+
soup = BeautifulSoup(html_string, "html.parser")
|
|
24
|
+
table = soup.find("table")
|
|
25
|
+
|
|
26
|
+
if not table:
|
|
27
|
+
return ""
|
|
28
|
+
|
|
29
|
+
rows = table.find_all("tr")
|
|
30
|
+
if not rows:
|
|
31
|
+
return ""
|
|
32
|
+
|
|
33
|
+
# Extract all rows as lists of cell texts
|
|
34
|
+
data = []
|
|
35
|
+
for row in rows:
|
|
36
|
+
# Check for header cells first, then data cells
|
|
37
|
+
cells = row.find_all("th")
|
|
38
|
+
if not cells:
|
|
39
|
+
cells = row.find_all("td")
|
|
40
|
+
|
|
41
|
+
row_data = []
|
|
42
|
+
for cell in cells:
|
|
43
|
+
# Get text, normalize whitespace
|
|
44
|
+
text = " ".join(cell.get_text().split())
|
|
45
|
+
row_data.append(text)
|
|
46
|
+
if row_data:
|
|
47
|
+
data.append(row_data)
|
|
48
|
+
|
|
49
|
+
if not data:
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
# Normalize row lengths (pad shorter rows)
|
|
53
|
+
max_cols = max(len(row) for row in data)
|
|
54
|
+
for row in data:
|
|
55
|
+
while len(row) < max_cols:
|
|
56
|
+
row.append("")
|
|
57
|
+
|
|
58
|
+
# Calculate column widths (minimum 3 for markdown separator)
|
|
59
|
+
# Add 2 for space cushions on each side
|
|
60
|
+
widths = []
|
|
61
|
+
for col in range(max_cols):
|
|
62
|
+
width = max(len(row[col]) for row in data)
|
|
63
|
+
widths.append(max(width + 2, 3))
|
|
64
|
+
|
|
65
|
+
# Build markdown table
|
|
66
|
+
lines = []
|
|
67
|
+
|
|
68
|
+
# Header row (centered, with padding)
|
|
69
|
+
header = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(data[0])) + "|"
|
|
70
|
+
lines.append(header)
|
|
71
|
+
|
|
72
|
+
# Separator row (no spaces to avoid typotools converting --- to em-dash)
|
|
73
|
+
separator = "|" + "|".join("-" * w for w in widths) + "|"
|
|
74
|
+
lines.append(separator)
|
|
75
|
+
|
|
76
|
+
# Data rows (centered, with padding)
|
|
77
|
+
for row in data[1:]:
|
|
78
|
+
line = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(row)) + "|"
|
|
79
|
+
lines.append(line)
|
|
80
|
+
|
|
81
|
+
return "\n".join(lines)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _center(text: str, width: int) -> str:
|
|
85
|
+
"""Center text within width, with space padding."""
|
|
86
|
+
text = text.strip()
|
|
87
|
+
padding = width - len(text)
|
|
88
|
+
left = padding // 2
|
|
89
|
+
right = padding - left
|
|
90
|
+
return " " * left + text + " " * right
|
chgksuite/cli.py
CHANGED
|
@@ -13,12 +13,14 @@ from chgksuite.common import (
|
|
|
13
13
|
)
|
|
14
14
|
from chgksuite.composer import gui_compose
|
|
15
15
|
from chgksuite.handouter.runner import gui_handouter
|
|
16
|
-
from chgksuite.handouter.tex_internals import GREYTEXT_LANGS
|
|
17
16
|
from chgksuite.parser import gui_parse
|
|
18
17
|
from chgksuite.trello import gui_trello
|
|
19
18
|
from chgksuite.version import __version__
|
|
20
19
|
|
|
21
|
-
LANGS = ["by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + [
|
|
20
|
+
LANGS = ["az", "by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + [
|
|
21
|
+
"custom"
|
|
22
|
+
]
|
|
23
|
+
HANDOUT_LANGS = [lang for lang in LANGS if lang != "custom"]
|
|
22
24
|
|
|
23
25
|
debug = False
|
|
24
26
|
|
|
@@ -277,6 +279,14 @@ class ArgparseBuilder:
|
|
|
277
279
|
advanced=True,
|
|
278
280
|
argtype="radiobutton",
|
|
279
281
|
)
|
|
282
|
+
self.add_argument(
|
|
283
|
+
cmdparse,
|
|
284
|
+
"--download_images",
|
|
285
|
+
action="store_true",
|
|
286
|
+
help="download images from direct URLs and replace with local references",
|
|
287
|
+
caption="Скачивать изображения по прямым ссылкам",
|
|
288
|
+
advanced=True,
|
|
289
|
+
)
|
|
280
290
|
self.add_argument(
|
|
281
291
|
cmdparse,
|
|
282
292
|
"--add_ts",
|
|
@@ -436,6 +446,15 @@ class ArgparseBuilder:
|
|
|
436
446
|
advanced=True,
|
|
437
447
|
caption='Без переноса строки после "Вопрос N."',
|
|
438
448
|
)
|
|
449
|
+
|
|
450
|
+
self.add_argument(
|
|
451
|
+
cmdcompose_docx,
|
|
452
|
+
"--only_question_number",
|
|
453
|
+
action="store_true",
|
|
454
|
+
help="only show question number",
|
|
455
|
+
advanced=True,
|
|
456
|
+
caption='Без слова "Вопрос" в "Вопрос N."',
|
|
457
|
+
)
|
|
439
458
|
self.add_argument(
|
|
440
459
|
cmdcompose_docx,
|
|
441
460
|
"--randomize",
|
|
@@ -577,6 +596,15 @@ class ArgparseBuilder:
|
|
|
577
596
|
caption="Имя 4s-файла",
|
|
578
597
|
filetypes=[("chgksuite markup files", "*.4s")],
|
|
579
598
|
)
|
|
599
|
+
cmdcompose_markdown = cmdcompose_filetype.add_parser("markdown")
|
|
600
|
+
self.add_argument(
|
|
601
|
+
cmdcompose_markdown,
|
|
602
|
+
"filename",
|
|
603
|
+
nargs="*",
|
|
604
|
+
help="file(s) to compose from.",
|
|
605
|
+
caption="Имя 4s-файла",
|
|
606
|
+
filetypes=[("chgksuite markup files", "*.4s")],
|
|
607
|
+
)
|
|
580
608
|
cmdcompose_pptx = cmdcompose_filetype.add_parser("pptx")
|
|
581
609
|
self.add_argument(
|
|
582
610
|
cmdcompose_pptx,
|
|
@@ -872,10 +900,11 @@ class ArgparseBuilder:
|
|
|
872
900
|
)
|
|
873
901
|
self.add_argument(
|
|
874
902
|
cmdhandouts_run,
|
|
875
|
-
"--
|
|
903
|
+
"--language",
|
|
904
|
+
"-lang",
|
|
876
905
|
default="ru",
|
|
877
906
|
argtype="radiobutton",
|
|
878
|
-
choices=sorted(
|
|
907
|
+
choices=sorted(HANDOUT_LANGS),
|
|
879
908
|
help="language",
|
|
880
909
|
caption="Язык",
|
|
881
910
|
advanced=True,
|
|
@@ -995,12 +1024,13 @@ class ArgparseBuilder:
|
|
|
995
1024
|
)
|
|
996
1025
|
self.add_argument(
|
|
997
1026
|
cmdhandouts_generate,
|
|
998
|
-
"--
|
|
1027
|
+
"--language",
|
|
1028
|
+
"-lang",
|
|
999
1029
|
default="ru",
|
|
1000
1030
|
help="language",
|
|
1001
1031
|
caption="Язык",
|
|
1002
1032
|
argtype="radiobutton",
|
|
1003
|
-
choices=sorted(
|
|
1033
|
+
choices=sorted(HANDOUT_LANGS),
|
|
1004
1034
|
advanced=True,
|
|
1005
1035
|
)
|
|
1006
1036
|
self.add_argument(
|
|
@@ -1067,9 +1097,9 @@ def single_action(args, use_wrapper, resourcedir):
|
|
|
1067
1097
|
args.console_mode = True
|
|
1068
1098
|
|
|
1069
1099
|
if args.language in LANGS:
|
|
1100
|
+
args.regexes_file = os.path.join(resourcedir, f"regexes_{args.language}.json")
|
|
1070
1101
|
if args.action == "parse":
|
|
1071
|
-
|
|
1072
|
-
args.regexes = os.path.join(resourcedir, f"regexes_{regex_lang}.json")
|
|
1102
|
+
args.regexes = args.regexes_file
|
|
1073
1103
|
args.labels_file = os.path.join(resourcedir, f"labels_{args.language}.toml")
|
|
1074
1104
|
if not args.docx_template:
|
|
1075
1105
|
args.docx_template = os.path.join(resourcedir, "template.docx")
|
chgksuite/common.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
import argparse
|
|
4
|
-
import codecs
|
|
5
4
|
import csv
|
|
6
5
|
import itertools
|
|
7
6
|
import json
|
|
@@ -29,16 +28,28 @@ QUESTION_LABELS = [
|
|
|
29
28
|
"setcounter",
|
|
30
29
|
]
|
|
31
30
|
SEP = os.linesep
|
|
32
|
-
|
|
31
|
+
try:
|
|
32
|
+
ENC = sys.stdout.encoding or "utf8"
|
|
33
|
+
except AttributeError:
|
|
34
|
+
ENC = "utf8"
|
|
33
35
|
|
|
34
36
|
lastdir = os.path.join(os.path.dirname(os.path.abspath("__file__")), "lastdir")
|
|
35
37
|
|
|
36
38
|
|
|
39
|
+
def get_chgksuite_dir():
|
|
40
|
+
chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
|
|
41
|
+
if not os.path.isdir(chgksuite_dir):
|
|
42
|
+
os.mkdir(chgksuite_dir)
|
|
43
|
+
return chgksuite_dir
|
|
44
|
+
|
|
45
|
+
|
|
37
46
|
def init_logger(logger_name, debug=False):
|
|
38
47
|
logger = logging.getLogger(logger_name)
|
|
39
48
|
if not logger.handlers:
|
|
40
49
|
logger.setLevel(logging.DEBUG)
|
|
41
|
-
|
|
50
|
+
log_dir = get_chgksuite_dir()
|
|
51
|
+
log_path = os.path.join(log_dir, f"{logger_name}.log")
|
|
52
|
+
fh = logging.FileHandler(log_path, encoding="utf8")
|
|
42
53
|
fh.setLevel(logging.DEBUG)
|
|
43
54
|
ch = logging.StreamHandler()
|
|
44
55
|
if debug:
|
|
@@ -53,13 +64,6 @@ def init_logger(logger_name, debug=False):
|
|
|
53
64
|
return logger
|
|
54
65
|
|
|
55
66
|
|
|
56
|
-
def get_chgksuite_dir():
|
|
57
|
-
chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
|
|
58
|
-
if not os.path.isdir(chgksuite_dir):
|
|
59
|
-
os.mkdir(chgksuite_dir)
|
|
60
|
-
return chgksuite_dir
|
|
61
|
-
|
|
62
|
-
|
|
63
67
|
def load_settings():
|
|
64
68
|
chgksuite_dir = get_chgksuite_dir()
|
|
65
69
|
settings_file = os.path.join(chgksuite_dir, "settings.toml")
|
|
@@ -109,7 +113,7 @@ class DefaultArgs:
|
|
|
109
113
|
def set_lastdir(path):
|
|
110
114
|
chgksuite_dir = get_chgksuite_dir()
|
|
111
115
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
112
|
-
with
|
|
116
|
+
with open(lastdir, "w", encoding="utf-8") as f:
|
|
113
117
|
f.write(path)
|
|
114
118
|
|
|
115
119
|
|
|
@@ -117,7 +121,7 @@ def get_lastdir():
|
|
|
117
121
|
chgksuite_dir = get_chgksuite_dir()
|
|
118
122
|
lastdir = os.path.join(chgksuite_dir, "lastdir")
|
|
119
123
|
if os.path.isfile(lastdir):
|
|
120
|
-
with
|
|
124
|
+
with open(lastdir, "r", encoding="utf-8") as f:
|
|
121
125
|
return f.read().rstrip()
|
|
122
126
|
return "."
|
|
123
127
|
|
chgksuite/composer/__init__.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#!usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
import codecs
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
5
|
import shutil
|
|
@@ -22,7 +21,7 @@ from chgksuite.composer.docx import DocxExporter
|
|
|
22
21
|
from chgksuite.composer.latex import LatexExporter
|
|
23
22
|
from chgksuite.composer.lj import LjExporter
|
|
24
23
|
from chgksuite.composer.pptx import PptxExporter
|
|
25
|
-
from chgksuite.composer.
|
|
24
|
+
from chgksuite.composer.markdown import MarkdownExporter
|
|
26
25
|
from chgksuite.composer.stats import StatsAdder
|
|
27
26
|
from chgksuite.composer.telegram import TelegramExporter
|
|
28
27
|
from chgksuite.composer.openquiz import OpenquizExporter
|
|
@@ -75,10 +74,13 @@ def process_file_wrapper(filename, sourcedir, targetdir, args):
|
|
|
75
74
|
|
|
76
75
|
def parse_filepath(filepath, args=None):
|
|
77
76
|
args = args or DefaultArgs()
|
|
78
|
-
with
|
|
77
|
+
with open(filepath, "r", encoding="utf-8") as input_file:
|
|
79
78
|
input_text = input_file.read()
|
|
80
79
|
input_text = input_text.replace("\r", "")
|
|
81
|
-
|
|
80
|
+
debug_dir = os.path.dirname(os.path.abspath(filepath))
|
|
81
|
+
return parse_4s(
|
|
82
|
+
input_text, randomize=args.randomize, debug=args.debug, debug_dir=debug_dir
|
|
83
|
+
)
|
|
82
84
|
|
|
83
85
|
|
|
84
86
|
def make_merged_filename(filelist):
|
|
@@ -105,7 +107,7 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
105
107
|
targetdir,
|
|
106
108
|
make_filename(os.path.basename(filename), "dbg", args),
|
|
107
109
|
)
|
|
108
|
-
with
|
|
110
|
+
with open(debug_fn, "w", encoding="utf-8") as output_file:
|
|
109
111
|
output_file.write(json.dumps(structure, indent=2, ensure_ascii=False))
|
|
110
112
|
|
|
111
113
|
if not args.filetype:
|
|
@@ -146,8 +148,8 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
|
|
|
146
148
|
outfilename = os.path.join(targetdir, make_filename(filename, "txt", args))
|
|
147
149
|
exporter.export(outfilename)
|
|
148
150
|
|
|
149
|
-
if args.filetype
|
|
150
|
-
exporter =
|
|
151
|
+
if args.filetype in ("redditmd", "markdown"):
|
|
152
|
+
exporter = MarkdownExporter(structure, args, dir_kwargs)
|
|
151
153
|
outfilename = os.path.join(targetdir, make_filename(filename, "md", args))
|
|
152
154
|
exporter.export(outfilename)
|
|
153
155
|
|
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
2
|
import random
|
|
3
3
|
import re
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
6
|
-
from chgksuite.common import
|
|
6
|
+
from chgksuite.common import (
|
|
7
|
+
QUESTION_LABELS,
|
|
8
|
+
check_question,
|
|
9
|
+
get_chgksuite_dir,
|
|
10
|
+
init_logger,
|
|
11
|
+
log_wrap,
|
|
12
|
+
)
|
|
7
13
|
from chgksuite.typotools import remove_excessive_whitespace as rew
|
|
8
14
|
|
|
9
15
|
REQUIRED_LABELS = set(["question", "answer"])
|
|
@@ -64,7 +70,9 @@ def process_list(element):
|
|
|
64
70
|
|
|
65
71
|
|
|
66
72
|
RE_COUNTER = "4SCOUNTER(?P<counter_id>[0-9a-zA-Z_]*)"
|
|
67
|
-
RE_SET_COUNTER =
|
|
73
|
+
RE_SET_COUNTER = (
|
|
74
|
+
"set 4SCOUNTER(?P<counter_id_set>[0-9a-zA-Z_]*) = (?P<counter_value>[0-9+])"
|
|
75
|
+
)
|
|
68
76
|
RE_COUNTER_UNIFY = re.compile(f"({RE_COUNTER}|{RE_SET_COUNTER})")
|
|
69
77
|
|
|
70
78
|
|
|
@@ -83,11 +91,11 @@ def replace_counters(string_):
|
|
|
83
91
|
counter_id = match.group("counter_id")
|
|
84
92
|
string_ = string_[: span[0]] + str(dd[counter_id]) + string_[span[1] :]
|
|
85
93
|
dd[counter_id] += 1
|
|
86
|
-
match = RE_COUNTER_UNIFY.search(string_)
|
|
94
|
+
match = RE_COUNTER_UNIFY.search(string_)
|
|
87
95
|
return string_
|
|
88
96
|
|
|
89
97
|
|
|
90
|
-
def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
98
|
+
def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
|
|
91
99
|
logger = logger or init_logger("composer")
|
|
92
100
|
mapping = {
|
|
93
101
|
"#": "meta",
|
|
@@ -113,8 +121,11 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
113
121
|
if s[0] == "\ufeff" and len(s) > 1:
|
|
114
122
|
s = s[1:]
|
|
115
123
|
|
|
116
|
-
|
|
117
|
-
|
|
124
|
+
if debug:
|
|
125
|
+
debug_dir = debug_dir or get_chgksuite_dir()
|
|
126
|
+
debug_path = os.path.join(debug_dir, "raw.debug")
|
|
127
|
+
with open(debug_path, "w", encoding="utf-8") as debugf:
|
|
128
|
+
debugf.write(log_wrap(s.split("\n")))
|
|
118
129
|
|
|
119
130
|
s = replace_counters(s)
|
|
120
131
|
|
|
@@ -135,7 +146,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
135
146
|
counter = 1
|
|
136
147
|
|
|
137
148
|
if debug:
|
|
138
|
-
with
|
|
149
|
+
with open("debug1st.debug", "w", encoding="utf-8") as debugf:
|
|
139
150
|
debugf.write(log_wrap(structure))
|
|
140
151
|
|
|
141
152
|
for element in structure:
|
|
@@ -228,7 +239,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
|
|
|
228
239
|
i += 1
|
|
229
240
|
|
|
230
241
|
if debug:
|
|
231
|
-
with
|
|
242
|
+
with open("debug.debug", "w", encoding="utf-8") as debugf:
|
|
232
243
|
debugf.write(log_wrap(final_structure))
|
|
233
244
|
|
|
234
245
|
for element in final_structure:
|
|
@@ -59,6 +59,22 @@ def backtick_replace(el):
|
|
|
59
59
|
return el
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
def remove_accents_standalone(s, regexes):
|
|
63
|
+
hs = regexes["handout_short"]
|
|
64
|
+
re_hs = re.compile(f"\\[{hs}(.+?)\\]", flags=re.DOTALL)
|
|
65
|
+
replacements = {}
|
|
66
|
+
n_handouts = 0
|
|
67
|
+
while match := re_hs.search(s):
|
|
68
|
+
original = match.group(0)
|
|
69
|
+
n_handouts += 1
|
|
70
|
+
replacements[original] = f"HANDOUT_{str(n_handouts).zfill(3)}"
|
|
71
|
+
s = s.replace(original, replacements[original])
|
|
72
|
+
s = s.replace("\u0301", "")
|
|
73
|
+
for k, v in replacements.items():
|
|
74
|
+
s = s.replace(v, k)
|
|
75
|
+
return s
|
|
76
|
+
|
|
77
|
+
|
|
62
78
|
def unquote(bytestring):
|
|
63
79
|
return urllib.parse.unquote(bytestring.decode("utf8")).encode("utf8")
|
|
64
80
|
|
|
@@ -397,6 +413,8 @@ class BaseExporter:
|
|
|
397
413
|
self.dir_kwargs = args[2]
|
|
398
414
|
with open(self.args.labels_file, encoding="utf8") as f:
|
|
399
415
|
self.labels = toml.load(f)
|
|
416
|
+
with open(self.args.regexes_file, encoding="utf8") as f:
|
|
417
|
+
self.regexes = json.load(f)
|
|
400
418
|
logger = kwargs.get("logger")
|
|
401
419
|
if logger:
|
|
402
420
|
self.logger = logger
|
|
@@ -415,6 +433,8 @@ class BaseExporter:
|
|
|
415
433
|
return _parse_4s_elem(*args, **kwargs)
|
|
416
434
|
|
|
417
435
|
def get_label(self, question, field, number=None):
|
|
436
|
+
if field == "question" and self.args.only_question_number:
|
|
437
|
+
return str(question.get("number") or number)
|
|
418
438
|
if field in ("question", "tour"):
|
|
419
439
|
lbl = (question.get("overrides") or {}).get(field) or self.labels[
|
|
420
440
|
"question_labels"
|
|
@@ -433,10 +453,11 @@ class BaseExporter:
|
|
|
433
453
|
return self.labels["question_labels"][field]
|
|
434
454
|
|
|
435
455
|
def remove_square_brackets(self, s):
|
|
436
|
-
hs = self.
|
|
456
|
+
hs = self.regexes["handout_short"]
|
|
437
457
|
s = s.replace("\\[", "LEFTSQUAREBRACKET")
|
|
438
458
|
s = s.replace("\\]", "RIGHTSQUAREBRACKET")
|
|
439
|
-
|
|
459
|
+
# Use placeholder to preserve handout brackets during removal
|
|
460
|
+
s = re.sub(f"\\[{hs}(.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
|
|
440
461
|
i = 0
|
|
441
462
|
while "[" in s and "]" in s and i < 10:
|
|
442
463
|
s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
|
|
@@ -446,7 +467,13 @@ class BaseExporter:
|
|
|
446
467
|
sys.stderr.write(
|
|
447
468
|
f"Error replacing square brackets on question: {s}, retries exceeded\n"
|
|
448
469
|
)
|
|
449
|
-
|
|
470
|
+
# Restore handout brackets - get the original matched text from the placeholder
|
|
471
|
+
s = re.sub(
|
|
472
|
+
r"\{HANDOUT_PLACEHOLDER(.+?)\}",
|
|
473
|
+
lambda m: "[" + m.group(1) + "]",
|
|
474
|
+
s,
|
|
475
|
+
flags=re.DOTALL,
|
|
476
|
+
)
|
|
450
477
|
s = s.replace("LEFTSQUAREBRACKET", "[")
|
|
451
478
|
s = s.replace("RIGHTSQUAREBRACKET", "]")
|
|
452
479
|
return s
|
chgksuite/composer/db.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import codecs
|
|
2
1
|
import datetime
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
@@ -211,7 +210,7 @@ class DbExporter(BaseExporter):
|
|
|
211
210
|
if res:
|
|
212
211
|
result.append(res)
|
|
213
212
|
text = "".join(result)
|
|
214
|
-
with
|
|
213
|
+
with open(outfilename, "w", encoding="utf-8") as f:
|
|
215
214
|
f.write(text)
|
|
216
215
|
self.logger.info("Output: {}".format(outfilename))
|
|
217
216
|
if self.args.clipboard:
|