chgksuite 0.25.1__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chgksuite/cli.py +292 -31
- chgksuite/composer/composer_common.py +2 -0
- chgksuite/composer/docx.py +520 -292
- chgksuite/composer/pptx.py +16 -4
- chgksuite/composer/telegram.py +68 -46
- chgksuite/handouter/__init__.py +0 -0
- chgksuite/handouter/gen.py +143 -0
- chgksuite/handouter/installer.py +245 -0
- chgksuite/handouter/pack.py +79 -0
- chgksuite/handouter/runner.py +237 -0
- chgksuite/handouter/tex_internals.py +47 -0
- chgksuite/handouter/utils.py +88 -0
- chgksuite/parser.py +210 -17
- chgksuite/resources/regexes_by.json +1 -1
- chgksuite/resources/regexes_en.json +1 -1
- chgksuite/resources/regexes_kz_cyr.json +1 -1
- chgksuite/resources/regexes_ru.json +2 -2
- chgksuite/resources/regexes_sr.json +1 -1
- chgksuite/resources/regexes_ua.json +1 -1
- chgksuite/resources/regexes_uz_cyr.json +1 -1
- chgksuite/version.py +1 -1
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/METADATA +4 -2
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/RECORD +27 -21
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/WHEEL +1 -1
- chgksuite/resources/template_shorin.pptx +0 -0
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/entry_points.txt +0 -0
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/licenses/LICENSE +0 -0
- {chgksuite-0.25.1.dist-info → chgksuite-0.26.0.dist-info}/top_level.txt +0 -0
chgksuite/composer/docx.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import re
|
|
2
3
|
import shutil
|
|
3
4
|
import sys
|
|
4
5
|
import tempfile
|
|
@@ -12,8 +13,14 @@ from docx.oxml.ns import qn
|
|
|
12
13
|
from docx.shared import Inches
|
|
13
14
|
from docx.shared import Pt as DocxPt
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
from chgksuite.
|
|
16
|
+
import chgksuite.typotools as typotools
|
|
17
|
+
from chgksuite.common import DummyLogger, get_chgksuite_dir, log_wrap, replace_escaped
|
|
18
|
+
from chgksuite.composer.composer_common import (
|
|
19
|
+
BaseExporter,
|
|
20
|
+
_parse_4s_elem,
|
|
21
|
+
backtick_replace,
|
|
22
|
+
parseimg,
|
|
23
|
+
)
|
|
17
24
|
|
|
18
25
|
WHITEN = {
|
|
19
26
|
"handout": False,
|
|
@@ -59,6 +66,430 @@ def replace_font_in_docx(template_path, new_font):
|
|
|
59
66
|
return temp_template
|
|
60
67
|
|
|
61
68
|
|
|
69
|
+
def replace_no_break_standalone(s, replace_spaces=True, replace_hyphens=True):
|
|
70
|
+
"""Standalone version of _replace_no_break"""
|
|
71
|
+
return typotools.replace_no_break(s, spaces=replace_spaces, hyphens=replace_hyphens)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_label_standalone(
|
|
75
|
+
question, field, labels, language="ru", only_question_number=False, number=None
|
|
76
|
+
):
|
|
77
|
+
"""Standalone version of get_label"""
|
|
78
|
+
if field == "question" and only_question_number:
|
|
79
|
+
return str(question.get("number") or number)
|
|
80
|
+
if field in ("question", "tour"):
|
|
81
|
+
lbl = (question.get("overrides") or {}).get(field) or labels["question_labels"][
|
|
82
|
+
field
|
|
83
|
+
]
|
|
84
|
+
num = question.get("number") or number
|
|
85
|
+
if language in ("uz", "uz_cyr"):
|
|
86
|
+
return f"{num} – {lbl}"
|
|
87
|
+
elif language == "kz":
|
|
88
|
+
return f"{num}-{lbl}"
|
|
89
|
+
else:
|
|
90
|
+
return f"{lbl} {num}"
|
|
91
|
+
if field in (question.get("overrides") or {}):
|
|
92
|
+
return question["overrides"][field]
|
|
93
|
+
if field == "source" and isinstance(question.get("source" or ""), list):
|
|
94
|
+
return labels["question_labels"]["sources"]
|
|
95
|
+
return labels["question_labels"][field]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def remove_square_brackets_standalone(s, labels):
|
|
99
|
+
"""Standalone version of remove_square_brackets"""
|
|
100
|
+
hs = labels["question_labels"]["handout_short"]
|
|
101
|
+
s = s.replace("\\[", "LEFTSQUAREBRACKET")
|
|
102
|
+
s = s.replace("\\]", "RIGHTSQUAREBRACKET")
|
|
103
|
+
s = re.sub(f"\\[{hs}(.+?)\\]", "{" + hs + "\\1}", s, flags=re.DOTALL)
|
|
104
|
+
i = 0
|
|
105
|
+
while "[" in s and "]" in s and i < 10:
|
|
106
|
+
s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
|
|
107
|
+
s = s.strip()
|
|
108
|
+
i += 1
|
|
109
|
+
if i == 10:
|
|
110
|
+
sys.stderr.write(
|
|
111
|
+
f"Error replacing square brackets on question: {s}, retries exceeded\n"
|
|
112
|
+
)
|
|
113
|
+
s = re.sub("\\{" + hs + "(.+?)\\}", "[" + hs + "\\1]", s, flags=re.DOTALL)
|
|
114
|
+
s = s.replace("LEFTSQUAREBRACKET", "[")
|
|
115
|
+
s = s.replace("RIGHTSQUAREBRACKET", "]")
|
|
116
|
+
return s
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def add_hyperlink_to_docx(doc, paragraph, text, url):
|
|
120
|
+
"""Standalone version of add_hyperlink"""
|
|
121
|
+
run = paragraph.add_run(text)
|
|
122
|
+
run.style = doc.styles["Hyperlink"]
|
|
123
|
+
part = paragraph.part
|
|
124
|
+
r_id = part.relate_to(
|
|
125
|
+
url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True
|
|
126
|
+
)
|
|
127
|
+
hyperlink = docx.oxml.shared.OxmlElement("w:hyperlink")
|
|
128
|
+
hyperlink.set(docx.oxml.shared.qn("r:id"), r_id)
|
|
129
|
+
hyperlink.append(run._r)
|
|
130
|
+
paragraph._p.append(hyperlink)
|
|
131
|
+
return hyperlink
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def format_docx_element(
|
|
135
|
+
doc,
|
|
136
|
+
el,
|
|
137
|
+
para,
|
|
138
|
+
whiten,
|
|
139
|
+
spoilers="none",
|
|
140
|
+
logger=None,
|
|
141
|
+
labels=None,
|
|
142
|
+
language="ru",
|
|
143
|
+
remove_accents=False,
|
|
144
|
+
remove_brackets=False,
|
|
145
|
+
replace_no_break_spaces=False,
|
|
146
|
+
**kwargs,
|
|
147
|
+
):
|
|
148
|
+
"""
|
|
149
|
+
Standalone version of docx_format that can be used outside DocxExporter.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
doc: docx Document object
|
|
153
|
+
el: Element to format
|
|
154
|
+
para: Paragraph object to add content to
|
|
155
|
+
whiten: Whether to apply whitening
|
|
156
|
+
spoilers: Spoiler handling mode ("none", "whiten", "dots", "pagebreak")
|
|
157
|
+
logger: Logger instance
|
|
158
|
+
labels: Labels dictionary
|
|
159
|
+
language: Language code
|
|
160
|
+
remove_accents: Whether to remove accents
|
|
161
|
+
remove_brackets: Whether to remove square brackets
|
|
162
|
+
replace_no_break_spaces: Whether to replace non-breaking spaces
|
|
163
|
+
**kwargs: Additional arguments (tmp_dir, targetdir, etc.)
|
|
164
|
+
"""
|
|
165
|
+
if logger is None:
|
|
166
|
+
logger = DummyLogger()
|
|
167
|
+
|
|
168
|
+
if isinstance(el, list):
|
|
169
|
+
if len(el) > 1 and isinstance(el[1], list):
|
|
170
|
+
format_docx_element(
|
|
171
|
+
doc,
|
|
172
|
+
el[0],
|
|
173
|
+
para,
|
|
174
|
+
whiten,
|
|
175
|
+
spoilers,
|
|
176
|
+
logger,
|
|
177
|
+
labels,
|
|
178
|
+
language,
|
|
179
|
+
remove_accents,
|
|
180
|
+
remove_brackets,
|
|
181
|
+
replace_no_break_spaces,
|
|
182
|
+
**kwargs,
|
|
183
|
+
)
|
|
184
|
+
licount = 0
|
|
185
|
+
for li in el[1]:
|
|
186
|
+
licount += 1
|
|
187
|
+
para.add_run("\n{}. ".format(licount))
|
|
188
|
+
format_docx_element(
|
|
189
|
+
doc,
|
|
190
|
+
li,
|
|
191
|
+
para,
|
|
192
|
+
whiten,
|
|
193
|
+
spoilers,
|
|
194
|
+
logger,
|
|
195
|
+
labels,
|
|
196
|
+
language,
|
|
197
|
+
remove_accents,
|
|
198
|
+
remove_brackets,
|
|
199
|
+
replace_no_break_spaces,
|
|
200
|
+
**kwargs,
|
|
201
|
+
)
|
|
202
|
+
else:
|
|
203
|
+
licount = 0
|
|
204
|
+
for li in el:
|
|
205
|
+
licount += 1
|
|
206
|
+
para.add_run("\n{}. ".format(licount))
|
|
207
|
+
format_docx_element(
|
|
208
|
+
doc,
|
|
209
|
+
li,
|
|
210
|
+
para,
|
|
211
|
+
whiten,
|
|
212
|
+
spoilers,
|
|
213
|
+
logger,
|
|
214
|
+
labels,
|
|
215
|
+
language,
|
|
216
|
+
remove_accents,
|
|
217
|
+
remove_brackets,
|
|
218
|
+
replace_no_break_spaces,
|
|
219
|
+
**kwargs,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if isinstance(el, str):
|
|
223
|
+
logger.debug("parsing element {}:".format(log_wrap(el)))
|
|
224
|
+
|
|
225
|
+
if remove_accents:
|
|
226
|
+
el = el.replace("\u0301", "")
|
|
227
|
+
if remove_brackets and labels:
|
|
228
|
+
el = remove_square_brackets_standalone(el, labels)
|
|
229
|
+
else:
|
|
230
|
+
el = replace_escaped(el)
|
|
231
|
+
|
|
232
|
+
el = backtick_replace(el)
|
|
233
|
+
|
|
234
|
+
for run in _parse_4s_elem(el, logger=logger):
|
|
235
|
+
if run[0] == "pagebreak":
|
|
236
|
+
if spoilers == "dots":
|
|
237
|
+
for _ in range(30):
|
|
238
|
+
para = doc.add_paragraph()
|
|
239
|
+
para.add_run(".")
|
|
240
|
+
para = doc.add_paragraph()
|
|
241
|
+
else:
|
|
242
|
+
para = doc.add_page_break()
|
|
243
|
+
elif run[0] == "linebreak":
|
|
244
|
+
para.add_run("\n")
|
|
245
|
+
elif run[0] == "screen":
|
|
246
|
+
if remove_accents or remove_brackets:
|
|
247
|
+
text = run[1]["for_screen"]
|
|
248
|
+
else:
|
|
249
|
+
text = run[1]["for_print"]
|
|
250
|
+
if replace_no_break_spaces:
|
|
251
|
+
text = replace_no_break_standalone(text)
|
|
252
|
+
r = para.add_run(text)
|
|
253
|
+
elif run[0] == "hyperlink" and not (whiten and spoilers == "whiten"):
|
|
254
|
+
r = add_hyperlink_to_docx(doc, para, run[1], run[1])
|
|
255
|
+
elif run[0] == "img":
|
|
256
|
+
if run[1].endswith(".shtml"):
|
|
257
|
+
r = para.add_run("(ТУТ БЫЛА ССЫЛКА НА ПРОТУХШУЮ КАРТИНКУ)\n")
|
|
258
|
+
continue
|
|
259
|
+
parsed_image = parseimg(
|
|
260
|
+
run[1],
|
|
261
|
+
dimensions="inches",
|
|
262
|
+
tmp_dir=kwargs.get("tmp_dir"),
|
|
263
|
+
targetdir=kwargs.get("targetdir"),
|
|
264
|
+
)
|
|
265
|
+
imgfile = parsed_image["imgfile"]
|
|
266
|
+
width = parsed_image["width"]
|
|
267
|
+
height = parsed_image["height"]
|
|
268
|
+
inline = parsed_image["inline"]
|
|
269
|
+
if inline:
|
|
270
|
+
r = para.add_run("")
|
|
271
|
+
else:
|
|
272
|
+
r = para.add_run("\n")
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
if inline:
|
|
276
|
+
r.add_picture(imgfile, height=Inches(1.0 / 6))
|
|
277
|
+
else:
|
|
278
|
+
r.add_picture(
|
|
279
|
+
imgfile, width=Inches(width), height=Inches(height)
|
|
280
|
+
)
|
|
281
|
+
except UnrecognizedImageError:
|
|
282
|
+
sys.stderr.write(
|
|
283
|
+
f"python-docx can't recognize header for {imgfile}\n"
|
|
284
|
+
)
|
|
285
|
+
if not inline:
|
|
286
|
+
r = para.add_run("\n")
|
|
287
|
+
continue
|
|
288
|
+
else:
|
|
289
|
+
text = run[1]
|
|
290
|
+
if replace_no_break_spaces:
|
|
291
|
+
text = replace_no_break_standalone(text)
|
|
292
|
+
r = para.add_run(text)
|
|
293
|
+
if "italic" in run[0]:
|
|
294
|
+
r.italic = True
|
|
295
|
+
if "bold" in run[0]:
|
|
296
|
+
r.bold = True
|
|
297
|
+
if "underline" in run[0]:
|
|
298
|
+
r.underline = True
|
|
299
|
+
if run[0] == "strike":
|
|
300
|
+
r.font.strike = True
|
|
301
|
+
if run[0] == "sc":
|
|
302
|
+
r.small_caps = True
|
|
303
|
+
if whiten and spoilers == "whiten":
|
|
304
|
+
r.style = "Whitened"
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def add_question_to_docx(
|
|
308
|
+
doc,
|
|
309
|
+
question_data,
|
|
310
|
+
labels,
|
|
311
|
+
qcount=None,
|
|
312
|
+
skip_qcount=False,
|
|
313
|
+
screen_mode=False,
|
|
314
|
+
external_para=None,
|
|
315
|
+
noparagraph=False,
|
|
316
|
+
noanswers=False,
|
|
317
|
+
spoilers="none",
|
|
318
|
+
language="ru",
|
|
319
|
+
only_question_number=False,
|
|
320
|
+
add_question_label=True,
|
|
321
|
+
logger=None,
|
|
322
|
+
**kwargs,
|
|
323
|
+
):
|
|
324
|
+
"""
|
|
325
|
+
Standalone function to add a question to a docx document.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
doc: docx Document object
|
|
329
|
+
question_data: Dictionary containing question data
|
|
330
|
+
labels: Labels dictionary
|
|
331
|
+
qcount: Current question count (will be incremented if not skip_qcount)
|
|
332
|
+
skip_qcount: Whether to skip incrementing question count
|
|
333
|
+
screen_mode: Whether to use screen mode formatting
|
|
334
|
+
external_para: External paragraph to use instead of creating new ones
|
|
335
|
+
noparagraph: Whether to skip paragraph breaks
|
|
336
|
+
noanswers: Whether to skip adding answers
|
|
337
|
+
spoilers: Spoiler handling mode ("none", "whiten", "dots", "pagebreak")
|
|
338
|
+
language: Language code
|
|
339
|
+
only_question_number: Whether to show only question numbers
|
|
340
|
+
logger: Logger instance
|
|
341
|
+
**kwargs: Additional arguments passed to format_docx_element
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Updated question count
|
|
345
|
+
"""
|
|
346
|
+
if not kwargs.get("tmp_dir"):
|
|
347
|
+
kwargs["tmp_dir"] = tempfile.mkdtemp()
|
|
348
|
+
if not kwargs.get("targetdir"):
|
|
349
|
+
kwargs["targetdir"] = os.getcwd()
|
|
350
|
+
if logger is None:
|
|
351
|
+
logger = DummyLogger()
|
|
352
|
+
|
|
353
|
+
q = question_data
|
|
354
|
+
if external_para is None:
|
|
355
|
+
p = doc.add_paragraph()
|
|
356
|
+
else:
|
|
357
|
+
p = external_para
|
|
358
|
+
if add_question_label:
|
|
359
|
+
p.paragraph_format.space_before = DocxPt(18)
|
|
360
|
+
p.paragraph_format.keep_together = True
|
|
361
|
+
|
|
362
|
+
# Handle question numbering
|
|
363
|
+
if qcount is None:
|
|
364
|
+
qcount = 1
|
|
365
|
+
if "number" not in q and not skip_qcount:
|
|
366
|
+
qcount += 1
|
|
367
|
+
if "setcounter" in q:
|
|
368
|
+
qcount = int(q["setcounter"])
|
|
369
|
+
|
|
370
|
+
# Add question label
|
|
371
|
+
if add_question_label:
|
|
372
|
+
question_label = get_label_standalone(
|
|
373
|
+
q,
|
|
374
|
+
"question",
|
|
375
|
+
labels,
|
|
376
|
+
language,
|
|
377
|
+
only_question_number,
|
|
378
|
+
number=qcount if "number" not in q else q["number"],
|
|
379
|
+
)
|
|
380
|
+
p.add_run(f"{question_label}. ").bold = True
|
|
381
|
+
|
|
382
|
+
# Add handout if present
|
|
383
|
+
if "handout" in q:
|
|
384
|
+
handout_label = get_label_standalone(q, "handout", labels, language)
|
|
385
|
+
p.add_run(f"\n[{handout_label}: ")
|
|
386
|
+
format_docx_element(
|
|
387
|
+
doc,
|
|
388
|
+
q["handout"],
|
|
389
|
+
p,
|
|
390
|
+
WHITEN["handout"],
|
|
391
|
+
spoilers,
|
|
392
|
+
logger,
|
|
393
|
+
labels,
|
|
394
|
+
language,
|
|
395
|
+
remove_accents=screen_mode,
|
|
396
|
+
remove_brackets=screen_mode,
|
|
397
|
+
**kwargs,
|
|
398
|
+
)
|
|
399
|
+
p.add_run("\n]")
|
|
400
|
+
|
|
401
|
+
if not noparagraph:
|
|
402
|
+
p.add_run("\n")
|
|
403
|
+
|
|
404
|
+
# Add question text
|
|
405
|
+
format_docx_element(
|
|
406
|
+
doc,
|
|
407
|
+
q["question"],
|
|
408
|
+
p,
|
|
409
|
+
False,
|
|
410
|
+
spoilers,
|
|
411
|
+
logger,
|
|
412
|
+
labels,
|
|
413
|
+
language,
|
|
414
|
+
remove_accents=screen_mode,
|
|
415
|
+
remove_brackets=screen_mode,
|
|
416
|
+
replace_no_break_spaces=True,
|
|
417
|
+
**kwargs,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# Add answers and other fields if not disabled
|
|
421
|
+
if not noanswers:
|
|
422
|
+
if spoilers == "pagebreak":
|
|
423
|
+
p = doc.add_page_break()
|
|
424
|
+
elif spoilers == "dots":
|
|
425
|
+
for _ in range(30):
|
|
426
|
+
if external_para is None:
|
|
427
|
+
p = doc.add_paragraph()
|
|
428
|
+
else:
|
|
429
|
+
p.add_run("\n")
|
|
430
|
+
p.add_run(".")
|
|
431
|
+
if external_para is None:
|
|
432
|
+
p = doc.add_paragraph()
|
|
433
|
+
else:
|
|
434
|
+
p.add_run("\n")
|
|
435
|
+
else:
|
|
436
|
+
if external_para is None:
|
|
437
|
+
p = doc.add_paragraph()
|
|
438
|
+
else:
|
|
439
|
+
p.add_run("\n")
|
|
440
|
+
|
|
441
|
+
p.paragraph_format.keep_together = True
|
|
442
|
+
p.paragraph_format.space_before = DocxPt(6)
|
|
443
|
+
|
|
444
|
+
# Add answer
|
|
445
|
+
answer_label = get_label_standalone(q, "answer", labels, language)
|
|
446
|
+
p.add_run(f"{answer_label}: ").bold = True
|
|
447
|
+
format_docx_element(
|
|
448
|
+
doc,
|
|
449
|
+
q["answer"],
|
|
450
|
+
p,
|
|
451
|
+
True,
|
|
452
|
+
spoilers,
|
|
453
|
+
logger,
|
|
454
|
+
labels,
|
|
455
|
+
language,
|
|
456
|
+
remove_accents=screen_mode,
|
|
457
|
+
replace_no_break_spaces=True,
|
|
458
|
+
**kwargs,
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Add other fields
|
|
462
|
+
for field in ["zachet", "nezachet", "comment", "source", "author"]:
|
|
463
|
+
if field in q:
|
|
464
|
+
if field == "source":
|
|
465
|
+
if external_para is None:
|
|
466
|
+
p = doc.add_paragraph()
|
|
467
|
+
p.paragraph_format.keep_together = True
|
|
468
|
+
else:
|
|
469
|
+
p.add_run("\n")
|
|
470
|
+
else:
|
|
471
|
+
p.add_run("\n")
|
|
472
|
+
|
|
473
|
+
field_label = get_label_standalone(q, field, labels, language)
|
|
474
|
+
p.add_run(f"{field_label}: ").bold = True
|
|
475
|
+
format_docx_element(
|
|
476
|
+
doc,
|
|
477
|
+
q[field],
|
|
478
|
+
p,
|
|
479
|
+
WHITEN[field],
|
|
480
|
+
spoilers,
|
|
481
|
+
logger,
|
|
482
|
+
labels,
|
|
483
|
+
language,
|
|
484
|
+
remove_accents=screen_mode,
|
|
485
|
+
remove_brackets=screen_mode,
|
|
486
|
+
replace_no_break_spaces=field != "source",
|
|
487
|
+
**kwargs,
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
return qcount
|
|
491
|
+
|
|
492
|
+
|
|
62
493
|
class DocxExporter(BaseExporter):
|
|
63
494
|
def __init__(self, *args, **kwargs):
|
|
64
495
|
super().__init__(*args, **kwargs)
|
|
@@ -76,230 +507,53 @@ class DocxExporter(BaseExporter):
|
|
|
76
507
|
|
|
77
508
|
def _docx_format(self, *args, **kwargs):
|
|
78
509
|
kwargs.update(self.dir_kwargs)
|
|
79
|
-
return
|
|
510
|
+
return format_docx_element(
|
|
511
|
+
self.doc,
|
|
512
|
+
*args,
|
|
513
|
+
spoilers=self.args.spoilers,
|
|
514
|
+
logger=self.logger,
|
|
515
|
+
labels=self.labels,
|
|
516
|
+
language=self.args.language,
|
|
517
|
+
**kwargs,
|
|
518
|
+
)
|
|
80
519
|
|
|
81
520
|
def docx_format(self, el, para, whiten, **kwargs):
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
licount += 1
|
|
95
|
-
|
|
96
|
-
para.add_run("\n{}. ".format(licount))
|
|
97
|
-
self.docx_format(li, para, whiten, **kwargs)
|
|
98
|
-
|
|
99
|
-
if isinstance(el, str):
|
|
100
|
-
self.logger.debug("parsing element {}:".format(log_wrap(el)))
|
|
101
|
-
|
|
102
|
-
if kwargs.get("remove_accents"):
|
|
103
|
-
el = el.replace("\u0301", "")
|
|
104
|
-
if kwargs.get("remove_brackets"):
|
|
105
|
-
el = self.remove_square_brackets(el)
|
|
106
|
-
else:
|
|
107
|
-
el = replace_escaped(el)
|
|
108
|
-
|
|
109
|
-
el = backtick_replace(el)
|
|
110
|
-
|
|
111
|
-
for run in self.parse_4s_elem(el):
|
|
112
|
-
if run[0] == "pagebreak":
|
|
113
|
-
if self.args.spoilers == "dots":
|
|
114
|
-
for _ in range(30):
|
|
115
|
-
para = self.doc.add_paragraph()
|
|
116
|
-
para.add_run(".")
|
|
117
|
-
para = self.doc.add_paragraph()
|
|
118
|
-
else:
|
|
119
|
-
para = self.doc.add_page_break()
|
|
120
|
-
elif run[0] == "linebreak":
|
|
121
|
-
para.add_run("\n")
|
|
122
|
-
elif run[0] == "screen":
|
|
123
|
-
if kwargs.get("remove_accents") or kwargs.get("remove_brackets"):
|
|
124
|
-
text = run[1]["for_screen"]
|
|
125
|
-
else:
|
|
126
|
-
text = run[1]["for_print"]
|
|
127
|
-
if kwargs.get("replace_no_break_spaces"):
|
|
128
|
-
text = self._replace_no_break(text)
|
|
129
|
-
r = para.add_run(text)
|
|
130
|
-
elif run[0] == "hyperlink" and not (
|
|
131
|
-
whiten and self.args.spoilers == "whiten"
|
|
132
|
-
):
|
|
133
|
-
r = self.add_hyperlink(para, run[1], run[1])
|
|
134
|
-
elif run[0] == "img":
|
|
135
|
-
if run[1].endswith(".shtml"):
|
|
136
|
-
r = para.add_run(
|
|
137
|
-
"(ТУТ БЫЛА ССЫЛКА НА ПРОТУХШУЮ КАРТИНКУ)\n"
|
|
138
|
-
) # TODO: добавить возможность пропускать кривые картинки опцией
|
|
139
|
-
continue
|
|
140
|
-
parsed_image = parseimg(
|
|
141
|
-
run[1],
|
|
142
|
-
dimensions="inches",
|
|
143
|
-
tmp_dir=kwargs.get("tmp_dir"),
|
|
144
|
-
targetdir=kwargs.get("targetdir"),
|
|
145
|
-
)
|
|
146
|
-
imgfile = parsed_image["imgfile"]
|
|
147
|
-
width = parsed_image["width"]
|
|
148
|
-
height = parsed_image["height"]
|
|
149
|
-
inline = parsed_image["inline"]
|
|
150
|
-
if inline:
|
|
151
|
-
r = para.add_run("")
|
|
152
|
-
else:
|
|
153
|
-
r = para.add_run("\n")
|
|
154
|
-
|
|
155
|
-
try:
|
|
156
|
-
if inline:
|
|
157
|
-
r.add_picture(
|
|
158
|
-
imgfile,
|
|
159
|
-
height=Inches(
|
|
160
|
-
1.0 / 6
|
|
161
|
-
), # Height is based on docx template
|
|
162
|
-
)
|
|
163
|
-
else:
|
|
164
|
-
r.add_picture(
|
|
165
|
-
imgfile, width=Inches(width), height=Inches(height)
|
|
166
|
-
)
|
|
167
|
-
except UnrecognizedImageError:
|
|
168
|
-
sys.stderr.write(
|
|
169
|
-
f"python-docx can't recognize header for {imgfile}\n"
|
|
170
|
-
)
|
|
171
|
-
if not inline:
|
|
172
|
-
r = para.add_run("\n")
|
|
173
|
-
continue
|
|
174
|
-
else:
|
|
175
|
-
text = run[1]
|
|
176
|
-
if kwargs.get("replace_no_break_spaces"):
|
|
177
|
-
text = self._replace_no_break(text)
|
|
178
|
-
r = para.add_run(text)
|
|
179
|
-
if "italic" in run[0]:
|
|
180
|
-
r.italic = True
|
|
181
|
-
if "bold" in run[0]:
|
|
182
|
-
r.bold = True
|
|
183
|
-
if "underline" in run[0]:
|
|
184
|
-
r.underline = True
|
|
185
|
-
if run[0] == "strike":
|
|
186
|
-
r.font.strike = True
|
|
187
|
-
if run[0] == "sc":
|
|
188
|
-
r.small_caps = True
|
|
189
|
-
if whiten and self.args.spoilers == "whiten":
|
|
190
|
-
r.style = "Whitened"
|
|
521
|
+
# Redirect to standalone function
|
|
522
|
+
return format_docx_element(
|
|
523
|
+
self.doc,
|
|
524
|
+
el,
|
|
525
|
+
para,
|
|
526
|
+
whiten,
|
|
527
|
+
spoilers=self.args.spoilers,
|
|
528
|
+
logger=self.logger,
|
|
529
|
+
labels=self.labels,
|
|
530
|
+
language=self.args.language,
|
|
531
|
+
**kwargs,
|
|
532
|
+
)
|
|
191
533
|
|
|
192
534
|
def add_hyperlink(self, paragraph, text, url):
|
|
193
|
-
|
|
194
|
-
doc = self.doc
|
|
195
|
-
run = paragraph.add_run(text)
|
|
196
|
-
run.style = doc.styles["Hyperlink"]
|
|
197
|
-
part = paragraph.part
|
|
198
|
-
r_id = part.relate_to(
|
|
199
|
-
url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True
|
|
200
|
-
)
|
|
201
|
-
hyperlink = docx.oxml.shared.OxmlElement("w:hyperlink")
|
|
202
|
-
hyperlink.set(docx.oxml.shared.qn("r:id"), r_id)
|
|
203
|
-
hyperlink.append(run._r)
|
|
204
|
-
paragraph._p.append(hyperlink)
|
|
205
|
-
return hyperlink
|
|
535
|
+
return add_hyperlink_to_docx(self.doc, paragraph, text, url)
|
|
206
536
|
|
|
207
537
|
def add_question(
|
|
208
538
|
self, element, skip_qcount=False, screen_mode=False, external_para=None
|
|
209
539
|
):
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
self.
|
|
219
|
-
|
|
220
|
-
self.
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
"question",
|
|
226
|
-
number=self.qcount if "number" not in q else q["number"],
|
|
227
|
-
)
|
|
228
|
-
)
|
|
229
|
-
).bold = True
|
|
230
|
-
|
|
231
|
-
if "handout" in q:
|
|
232
|
-
p.add_run("\n[{handout}: ".format(handout=self.get_label(q, "handout")))
|
|
233
|
-
self._docx_format(
|
|
234
|
-
q["handout"],
|
|
235
|
-
p,
|
|
236
|
-
WHITEN["handout"],
|
|
237
|
-
remove_accents=screen_mode,
|
|
238
|
-
remove_brackets=screen_mode,
|
|
239
|
-
)
|
|
240
|
-
p.add_run("\n]")
|
|
241
|
-
if not self.args.noparagraph:
|
|
242
|
-
p.add_run("\n")
|
|
243
|
-
|
|
244
|
-
self._docx_format(
|
|
245
|
-
q["question"],
|
|
246
|
-
p,
|
|
247
|
-
False,
|
|
248
|
-
remove_accents=screen_mode,
|
|
249
|
-
remove_brackets=screen_mode,
|
|
250
|
-
replace_no_break_spaces=True,
|
|
540
|
+
self.qcount = add_question_to_docx(
|
|
541
|
+
self.doc,
|
|
542
|
+
element[1],
|
|
543
|
+
self.labels,
|
|
544
|
+
self.qcount,
|
|
545
|
+
skip_qcount,
|
|
546
|
+
screen_mode,
|
|
547
|
+
external_para,
|
|
548
|
+
self.args.noparagraph,
|
|
549
|
+
self.args.noanswers,
|
|
550
|
+
self.args.spoilers,
|
|
551
|
+
self.args.language,
|
|
552
|
+
self.args.only_question_number,
|
|
553
|
+
self.logger,
|
|
554
|
+
**self.dir_kwargs,
|
|
251
555
|
)
|
|
252
556
|
|
|
253
|
-
if not self.args.noanswers:
|
|
254
|
-
if self.args.spoilers == "pagebreak":
|
|
255
|
-
p = self.doc.add_page_break()
|
|
256
|
-
elif self.args.spoilers == "dots":
|
|
257
|
-
for _ in range(30):
|
|
258
|
-
if external_para is None:
|
|
259
|
-
p = self.doc.add_paragraph()
|
|
260
|
-
else:
|
|
261
|
-
p.add_run("\n")
|
|
262
|
-
p.add_run(".")
|
|
263
|
-
if external_para is None:
|
|
264
|
-
p = self.doc.add_paragraph()
|
|
265
|
-
else:
|
|
266
|
-
p.add_run("\n")
|
|
267
|
-
else:
|
|
268
|
-
if external_para is None:
|
|
269
|
-
p = self.doc.add_paragraph()
|
|
270
|
-
else:
|
|
271
|
-
p.add_run("\n")
|
|
272
|
-
p.paragraph_format.keep_together = True
|
|
273
|
-
p.paragraph_format.space_before = DocxPt(6)
|
|
274
|
-
p.add_run(f"{self.get_label(q, 'answer')}: ").bold = True
|
|
275
|
-
self._docx_format(
|
|
276
|
-
q["answer"],
|
|
277
|
-
p,
|
|
278
|
-
True,
|
|
279
|
-
remove_accents=screen_mode,
|
|
280
|
-
replace_no_break_spaces=True,
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
for field in ["zachet", "nezachet", "comment", "source", "author"]:
|
|
284
|
-
if field in q:
|
|
285
|
-
if field == "source":
|
|
286
|
-
if external_para is None:
|
|
287
|
-
p = self.doc.add_paragraph()
|
|
288
|
-
p.paragraph_format.keep_together = True
|
|
289
|
-
else:
|
|
290
|
-
p.add_run("\n")
|
|
291
|
-
else:
|
|
292
|
-
p.add_run("\n")
|
|
293
|
-
p.add_run(f"{self.get_label(q, field)}: ").bold = True
|
|
294
|
-
self._docx_format(
|
|
295
|
-
q[field],
|
|
296
|
-
p,
|
|
297
|
-
WHITEN[field],
|
|
298
|
-
remove_accents=screen_mode,
|
|
299
|
-
remove_brackets=screen_mode,
|
|
300
|
-
replace_no_break_spaces=field != "source",
|
|
301
|
-
)
|
|
302
|
-
|
|
303
557
|
def _add_question_columns(self, element):
|
|
304
558
|
table = self.doc.add_table(rows=1, cols=2)
|
|
305
559
|
table.autofit = True
|
|
@@ -332,82 +586,6 @@ class DocxExporter(BaseExporter):
|
|
|
332
586
|
|
|
333
587
|
self.doc.add_paragraph()
|
|
334
588
|
|
|
335
|
-
def _add_question_content(self, q, p, skip_qcount=False, screen_mode=False):
|
|
336
|
-
"""Helper method to add question content to a paragraph"""
|
|
337
|
-
if "number" not in q and not skip_qcount:
|
|
338
|
-
self.qcount += 1
|
|
339
|
-
if "setcounter" in q:
|
|
340
|
-
self.qcount = int(q["setcounter"])
|
|
341
|
-
p.add_run(
|
|
342
|
-
"{question}. ".format(
|
|
343
|
-
question=self.get_label(
|
|
344
|
-
q,
|
|
345
|
-
"question",
|
|
346
|
-
number=self.qcount if "number" not in q else q["number"],
|
|
347
|
-
)
|
|
348
|
-
)
|
|
349
|
-
).bold = True
|
|
350
|
-
|
|
351
|
-
if "handout" in q:
|
|
352
|
-
p.add_run("\n[{handout}: ".format(handout=self.get_label(q, "handout")))
|
|
353
|
-
self._docx_format(
|
|
354
|
-
q["handout"],
|
|
355
|
-
p,
|
|
356
|
-
WHITEN["handout"],
|
|
357
|
-
remove_accents=screen_mode,
|
|
358
|
-
remove_brackets=screen_mode,
|
|
359
|
-
)
|
|
360
|
-
p.add_run("\n]")
|
|
361
|
-
if not self.args.noparagraph:
|
|
362
|
-
p.add_run("\n")
|
|
363
|
-
|
|
364
|
-
self._docx_format(
|
|
365
|
-
q["question"],
|
|
366
|
-
p,
|
|
367
|
-
False,
|
|
368
|
-
remove_accents=screen_mode,
|
|
369
|
-
remove_brackets=screen_mode,
|
|
370
|
-
replace_no_break_spaces=True,
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
if not self.args.noanswers:
|
|
374
|
-
if self.args.spoilers == "pagebreak":
|
|
375
|
-
p = self.doc.add_page_break()
|
|
376
|
-
elif self.args.spoilers == "dots":
|
|
377
|
-
for _ in range(30):
|
|
378
|
-
p = self.doc.add_paragraph()
|
|
379
|
-
p.add_run(".")
|
|
380
|
-
p = self.doc.add_paragraph()
|
|
381
|
-
else:
|
|
382
|
-
p = self.doc.add_paragraph()
|
|
383
|
-
p.paragraph_format.keep_together = True
|
|
384
|
-
p.paragraph_format.space_before = DocxPt(6)
|
|
385
|
-
p.add_run(f"{self.get_label(q, 'answer')}: ").bold = True
|
|
386
|
-
self._docx_format(
|
|
387
|
-
q["answer"],
|
|
388
|
-
p,
|
|
389
|
-
True,
|
|
390
|
-
remove_accents=screen_mode,
|
|
391
|
-
replace_no_break_spaces=True,
|
|
392
|
-
)
|
|
393
|
-
|
|
394
|
-
for field in ["zachet", "nezachet", "comment", "source", "author"]:
|
|
395
|
-
if field in q:
|
|
396
|
-
if field == "source":
|
|
397
|
-
p = self.doc.add_paragraph()
|
|
398
|
-
p.paragraph_format.keep_together = True
|
|
399
|
-
else:
|
|
400
|
-
p.add_run("\n")
|
|
401
|
-
p.add_run(f"{self.get_label(q, field)}: ").bold = True
|
|
402
|
-
self._docx_format(
|
|
403
|
-
q[field],
|
|
404
|
-
p,
|
|
405
|
-
WHITEN[field],
|
|
406
|
-
remove_accents=screen_mode,
|
|
407
|
-
remove_brackets=screen_mode,
|
|
408
|
-
replace_no_break_spaces=field != "source",
|
|
409
|
-
)
|
|
410
|
-
|
|
411
589
|
def export(self, outfilename):
|
|
412
590
|
self.logger.debug(self.args.docx_template)
|
|
413
591
|
self.doc = Document(self.args.docx_template)
|
|
@@ -473,3 +651,53 @@ class DocxExporter(BaseExporter):
|
|
|
473
651
|
|
|
474
652
|
self.doc.save(outfilename)
|
|
475
653
|
self.logger.info("Output: {}".format(outfilename))
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
# Example usage of the extracted DOCX functions:
|
|
657
|
+
"""
|
|
658
|
+
from docx import Document
|
|
659
|
+
import toml
|
|
660
|
+
from chgksuite.composer.docx import add_question_to_docx, format_docx_element
|
|
661
|
+
|
|
662
|
+
# Load labels
|
|
663
|
+
with open("labels.toml", encoding="utf8") as f:
|
|
664
|
+
labels = toml.load(f)
|
|
665
|
+
|
|
666
|
+
# Create a new document
|
|
667
|
+
doc = Document()
|
|
668
|
+
|
|
669
|
+
# Example question data
|
|
670
|
+
question_data = {
|
|
671
|
+
"question": "What is the capital of France?",
|
|
672
|
+
"answer": "Paris",
|
|
673
|
+
"comment": "This is a basic geography question",
|
|
674
|
+
"source": "World Geography Book"
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
# Add question to document
|
|
678
|
+
qcount = add_question_to_docx(
|
|
679
|
+
doc=doc,
|
|
680
|
+
question_data=question_data,
|
|
681
|
+
labels=labels,
|
|
682
|
+
qcount=0, # Starting question count
|
|
683
|
+
noanswers=False, # Include answers
|
|
684
|
+
spoilers="none", # No spoiler handling
|
|
685
|
+
language="en",
|
|
686
|
+
only_question_number=False
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# Or use the lower-level formatting function directly
|
|
690
|
+
paragraph = doc.add_paragraph()
|
|
691
|
+
format_docx_element(
|
|
692
|
+
doc=doc,
|
|
693
|
+
el="This is **bold text** and _italic text_",
|
|
694
|
+
para=paragraph,
|
|
695
|
+
whiten=False,
|
|
696
|
+
spoilers="none",
|
|
697
|
+
labels=labels,
|
|
698
|
+
language="en"
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Save the document
|
|
702
|
+
doc.save("example_output.docx")
|
|
703
|
+
"""
|