chgksuite 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. chgksuite/_html2md.py +90 -0
  2. chgksuite/cli.py +38 -8
  3. chgksuite/common.py +16 -12
  4. chgksuite/composer/__init__.py +9 -7
  5. chgksuite/composer/chgksuite_parser.py +20 -9
  6. chgksuite/composer/composer_common.py +30 -3
  7. chgksuite/composer/db.py +1 -2
  8. chgksuite/composer/docx.py +542 -292
  9. chgksuite/composer/latex.py +3 -4
  10. chgksuite/composer/lj.py +1 -2
  11. chgksuite/composer/{reddit.py → markdown.py} +35 -25
  12. chgksuite/composer/openquiz.py +2 -3
  13. chgksuite/composer/pptx.py +18 -6
  14. chgksuite/composer/telegram.py +22 -10
  15. chgksuite/handouter/gen.py +11 -7
  16. chgksuite/handouter/installer.py +0 -0
  17. chgksuite/handouter/runner.py +237 -10
  18. chgksuite/handouter/tex_internals.py +12 -13
  19. chgksuite/handouter/utils.py +22 -1
  20. chgksuite/lastdir +1 -0
  21. chgksuite/parser.py +218 -37
  22. chgksuite/parser_db.py +4 -6
  23. chgksuite/resources/labels_az.toml +22 -0
  24. chgksuite/resources/labels_by.toml +1 -2
  25. chgksuite/resources/labels_by_tar.toml +1 -2
  26. chgksuite/resources/labels_en.toml +1 -2
  27. chgksuite/resources/labels_kz_cyr.toml +1 -2
  28. chgksuite/resources/labels_ru.toml +1 -2
  29. chgksuite/resources/labels_sr.toml +1 -2
  30. chgksuite/resources/labels_ua.toml +1 -2
  31. chgksuite/resources/labels_uz.toml +0 -3
  32. chgksuite/resources/labels_uz_cyr.toml +1 -2
  33. chgksuite/resources/regexes_az.json +17 -0
  34. chgksuite/resources/regexes_by.json +3 -2
  35. chgksuite/resources/regexes_by_tar.json +17 -0
  36. chgksuite/resources/regexes_en.json +3 -2
  37. chgksuite/resources/regexes_kz_cyr.json +3 -2
  38. chgksuite/resources/regexes_ru.json +3 -2
  39. chgksuite/resources/regexes_sr.json +3 -2
  40. chgksuite/resources/regexes_ua.json +3 -2
  41. chgksuite/resources/regexes_uz.json +16 -0
  42. chgksuite/resources/regexes_uz_cyr.json +3 -2
  43. chgksuite/trello.py +8 -9
  44. chgksuite/typotools.py +9 -8
  45. chgksuite/version.py +1 -1
  46. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
  47. chgksuite-0.27.0.dist-info/RECORD +63 -0
  48. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
  49. chgksuite/composer/telegram_parser.py +0 -230
  50. chgksuite-0.26.0b11.dist-info/RECORD +0 -59
  51. chgksuite-0.26.0b11.dist-info/top_level.txt +0 -1
  52. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
  53. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
  import shutil
3
4
  import sys
4
5
  import tempfile
@@ -12,8 +13,15 @@ from docx.oxml.ns import qn
12
13
  from docx.shared import Inches
13
14
  from docx.shared import Pt as DocxPt
14
15
 
15
- from chgksuite.common import log_wrap, replace_escaped
16
- from chgksuite.composer.composer_common import BaseExporter, backtick_replace, parseimg
16
+ import chgksuite.typotools as typotools
17
+ from chgksuite.common import DummyLogger, log_wrap, replace_escaped
18
+ from chgksuite.composer.composer_common import (
19
+ BaseExporter,
20
+ _parse_4s_elem,
21
+ backtick_replace,
22
+ parseimg,
23
+ remove_accents_standalone,
24
+ )
17
25
 
18
26
  WHITEN = {
19
27
  "handout": False,
@@ -59,6 +67,448 @@ def replace_font_in_docx(template_path, new_font):
59
67
  return temp_template
60
68
 
61
69
 
70
+ def replace_no_break_standalone(s, replace_spaces=True, replace_hyphens=True):
71
+ """Standalone version of _replace_no_break"""
72
+ return typotools.replace_no_break(s, spaces=replace_spaces, hyphens=replace_hyphens)
73
+
74
+
75
+ def get_label_standalone(
76
+ question, field, labels, language="ru", only_question_number=False, number=None
77
+ ):
78
+ """Standalone version of get_label"""
79
+ if field == "question" and only_question_number:
80
+ return str(question.get("number") or number)
81
+ if field in ("question", "tour"):
82
+ lbl = (question.get("overrides") or {}).get(field) or labels["question_labels"][
83
+ field
84
+ ]
85
+ num = question.get("number") or number
86
+ if language in ("uz", "uz_cyr"):
87
+ return f"{num} – {lbl}"
88
+ elif language == "kz":
89
+ return f"{num}-{lbl}"
90
+ else:
91
+ return f"{lbl} {num}"
92
+ if field in (question.get("overrides") or {}):
93
+ return question["overrides"][field]
94
+ if field == "source" and isinstance(question.get("source" or ""), list):
95
+ return labels["question_labels"]["sources"]
96
+ return labels["question_labels"][field]
97
+
98
+
99
+ def remove_square_brackets_standalone(s, regexes):
100
+ """Standalone version of remove_square_brackets"""
101
+ hs = regexes["handout_short"]
102
+ s = s.replace("\\[", "LEFTSQUAREBRACKET")
103
+ s = s.replace("\\]", "RIGHTSQUAREBRACKET")
104
+ # Use placeholder to preserve handout brackets during removal
105
+ s = re.sub(f"\\[({hs}.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
106
+ i = 0
107
+ while "[" in s and "]" in s and i < 10:
108
+ s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
109
+ s = s.strip()
110
+ i += 1
111
+ if i == 10:
112
+ sys.stderr.write(
113
+ f"Error replacing square brackets on question: {s}, retries exceeded\n"
114
+ )
115
+ # Restore handout brackets - get the original matched text from the placeholder
116
+ s = re.sub(
117
+ r"\{HANDOUT_PLACEHOLDER(.+?)\}",
118
+ lambda m: "[" + m.group(1) + "]",
119
+ s,
120
+ flags=re.DOTALL,
121
+ )
122
+ s = s.replace("LEFTSQUAREBRACKET", "[")
123
+ s = s.replace("RIGHTSQUAREBRACKET", "]")
124
+ return s
125
+
126
+
127
+ def add_hyperlink_to_docx(doc, paragraph, text, url):
128
+ """Standalone version of add_hyperlink"""
129
+ run = paragraph.add_run(text)
130
+ run.style = doc.styles["Hyperlink"]
131
+ part = paragraph.part
132
+ r_id = part.relate_to(
133
+ url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True
134
+ )
135
+ hyperlink = docx.oxml.shared.OxmlElement("w:hyperlink")
136
+ hyperlink.set(docx.oxml.shared.qn("r:id"), r_id)
137
+ hyperlink.append(run._r)
138
+ paragraph._p.append(hyperlink)
139
+ return hyperlink
140
+
141
+
142
+ def format_docx_element(
143
+ doc,
144
+ el,
145
+ para,
146
+ whiten,
147
+ spoilers="none",
148
+ logger=None,
149
+ labels=None,
150
+ regexes=None,
151
+ language="ru",
152
+ remove_accents=False,
153
+ remove_brackets=False,
154
+ replace_no_break_spaces=False,
155
+ **kwargs,
156
+ ):
157
+ """
158
+ Standalone version of docx_format that can be used outside DocxExporter.
159
+
160
+ Args:
161
+ doc: docx Document object
162
+ el: Element to format
163
+ para: Paragraph object to add content to
164
+ whiten: Whether to apply whitening
165
+ spoilers: Spoiler handling mode ("none", "whiten", "dots", "pagebreak")
166
+ logger: Logger instance
167
+ labels: Labels dictionary
168
+ regexes: Regexes dictionary (for handout_short)
169
+ language: Language code
170
+ remove_accents: Whether to remove accents
171
+ remove_brackets: Whether to remove square brackets
172
+ replace_no_break_spaces: Whether to replace non-breaking spaces
173
+ **kwargs: Additional arguments (tmp_dir, targetdir, etc.)
174
+ """
175
+ if logger is None:
176
+ logger = DummyLogger()
177
+
178
+ if isinstance(el, list):
179
+ if len(el) > 1 and isinstance(el[1], list):
180
+ format_docx_element(
181
+ doc,
182
+ el[0],
183
+ para,
184
+ whiten,
185
+ spoilers,
186
+ logger,
187
+ labels,
188
+ regexes,
189
+ language,
190
+ remove_accents,
191
+ remove_brackets,
192
+ replace_no_break_spaces,
193
+ **kwargs,
194
+ )
195
+ licount = 0
196
+ for li in el[1]:
197
+ licount += 1
198
+ para.add_run("\n{}. ".format(licount))
199
+ format_docx_element(
200
+ doc,
201
+ li,
202
+ para,
203
+ whiten,
204
+ spoilers,
205
+ logger,
206
+ labels,
207
+ regexes,
208
+ language,
209
+ remove_accents,
210
+ remove_brackets,
211
+ replace_no_break_spaces,
212
+ **kwargs,
213
+ )
214
+ else:
215
+ licount = 0
216
+ for li in el:
217
+ licount += 1
218
+ para.add_run("\n{}. ".format(licount))
219
+ format_docx_element(
220
+ doc,
221
+ li,
222
+ para,
223
+ whiten,
224
+ spoilers,
225
+ logger,
226
+ labels,
227
+ regexes,
228
+ language,
229
+ remove_accents,
230
+ remove_brackets,
231
+ replace_no_break_spaces,
232
+ **kwargs,
233
+ )
234
+
235
+ if isinstance(el, str):
236
+ logger.debug("parsing element {}:".format(log_wrap(el)))
237
+
238
+ if remove_accents and regexes:
239
+ el = remove_accents_standalone(el, regexes)
240
+ if remove_brackets and regexes:
241
+ el = remove_square_brackets_standalone(el, regexes)
242
+ else:
243
+ el = replace_escaped(el)
244
+
245
+ el = backtick_replace(el)
246
+
247
+ for run in _parse_4s_elem(el, logger=logger):
248
+ if run[0] == "pagebreak":
249
+ if spoilers == "dots":
250
+ for _ in range(30):
251
+ para = doc.add_paragraph()
252
+ para.add_run(".")
253
+ para = doc.add_paragraph()
254
+ else:
255
+ para = doc.add_page_break()
256
+ elif run[0] == "linebreak":
257
+ para.add_run("\n")
258
+ elif run[0] == "screen":
259
+ if remove_accents or remove_brackets:
260
+ text = run[1]["for_screen"]
261
+ else:
262
+ text = run[1]["for_print"]
263
+ if replace_no_break_spaces:
264
+ text = replace_no_break_standalone(text)
265
+ r = para.add_run(text)
266
+ elif run[0] == "hyperlink" and not (whiten and spoilers == "whiten"):
267
+ r = add_hyperlink_to_docx(doc, para, run[1], run[1])
268
+ elif run[0] == "img":
269
+ if run[1].endswith(".shtml"):
270
+ r = para.add_run("(ТУТ БЫЛА ССЫЛКА НА ПРОТУХШУЮ КАРТИНКУ)\n")
271
+ continue
272
+ parsed_image = parseimg(
273
+ run[1],
274
+ dimensions="inches",
275
+ tmp_dir=kwargs.get("tmp_dir"),
276
+ targetdir=kwargs.get("targetdir"),
277
+ )
278
+ imgfile = parsed_image["imgfile"]
279
+ width = parsed_image["width"]
280
+ height = parsed_image["height"]
281
+ inline = parsed_image["inline"]
282
+ if inline:
283
+ r = para.add_run("")
284
+ else:
285
+ r = para.add_run("\n")
286
+
287
+ try:
288
+ if inline:
289
+ r.add_picture(imgfile, height=Inches(1.0 / 6))
290
+ else:
291
+ r.add_picture(
292
+ imgfile, width=Inches(width), height=Inches(height)
293
+ )
294
+ except UnrecognizedImageError:
295
+ sys.stderr.write(
296
+ f"python-docx can't recognize header for {imgfile}\n"
297
+ )
298
+ if not inline:
299
+ r = para.add_run("\n")
300
+ continue
301
+ else:
302
+ text = run[1]
303
+ if replace_no_break_spaces:
304
+ text = replace_no_break_standalone(text)
305
+ r = para.add_run(text)
306
+ if "italic" in run[0]:
307
+ r.italic = True
308
+ if "bold" in run[0]:
309
+ r.bold = True
310
+ if "underline" in run[0]:
311
+ r.underline = True
312
+ if run[0] == "strike":
313
+ r.font.strike = True
314
+ if run[0] == "sc":
315
+ r.small_caps = True
316
+ if whiten and spoilers == "whiten":
317
+ r.style = "Whitened"
318
+
319
+
320
+ def add_question_to_docx(
321
+ doc,
322
+ question_data,
323
+ labels,
324
+ regexes=None,
325
+ qcount=None,
326
+ skip_qcount=False,
327
+ screen_mode=False,
328
+ external_para=None,
329
+ noparagraph=False,
330
+ noanswers=False,
331
+ spoilers="none",
332
+ language="ru",
333
+ only_question_number=False,
334
+ add_question_label=True,
335
+ logger=None,
336
+ **kwargs,
337
+ ):
338
+ """
339
+ Standalone function to add a question to a docx document.
340
+
341
+ Args:
342
+ doc: docx Document object
343
+ question_data: Dictionary containing question data
344
+ labels: Labels dictionary
345
+ regexes: Regexes dictionary (for handout_short)
346
+ qcount: Current question count (will be incremented if not skip_qcount)
347
+ skip_qcount: Whether to skip incrementing question count
348
+ screen_mode: Whether to use screen mode formatting
349
+ external_para: External paragraph to use instead of creating new ones
350
+ noparagraph: Whether to skip paragraph breaks
351
+ noanswers: Whether to skip adding answers
352
+ spoilers: Spoiler handling mode ("none", "whiten", "dots", "pagebreak")
353
+ language: Language code
354
+ only_question_number: Whether to show only question numbers
355
+ logger: Logger instance
356
+ **kwargs: Additional arguments passed to format_docx_element
357
+
358
+ Returns:
359
+ Updated question count
360
+ """
361
+ if not kwargs.get("tmp_dir"):
362
+ kwargs["tmp_dir"] = tempfile.mkdtemp()
363
+ if not kwargs.get("targetdir"):
364
+ kwargs["targetdir"] = os.getcwd()
365
+ if logger is None:
366
+ logger = DummyLogger()
367
+
368
+ q = question_data
369
+ if external_para is None:
370
+ p = doc.add_paragraph()
371
+ else:
372
+ p = external_para
373
+ if add_question_label:
374
+ p.paragraph_format.space_before = DocxPt(18)
375
+ p.paragraph_format.keep_together = True
376
+
377
+ # Handle question numbering
378
+ if qcount is None:
379
+ qcount = 1
380
+ if "number" not in q and not skip_qcount:
381
+ qcount += 1
382
+ if "setcounter" in q:
383
+ qcount = int(q["setcounter"])
384
+
385
+ # Add question label
386
+ if add_question_label:
387
+ question_label = get_label_standalone(
388
+ q,
389
+ "question",
390
+ labels,
391
+ language,
392
+ only_question_number,
393
+ number=qcount if "number" not in q else q["number"],
394
+ )
395
+ p.add_run(f"{question_label}. ").bold = True
396
+
397
+ # Add handout if present
398
+ if "handout" in q:
399
+ handout_label = get_label_standalone(q, "handout", labels, language)
400
+ p.add_run(f"\n[{handout_label}: ")
401
+ format_docx_element(
402
+ doc,
403
+ q["handout"],
404
+ p,
405
+ WHITEN["handout"],
406
+ spoilers,
407
+ logger,
408
+ labels,
409
+ regexes,
410
+ language,
411
+ remove_accents=screen_mode,
412
+ remove_brackets=screen_mode,
413
+ **kwargs,
414
+ )
415
+ p.add_run("\n]")
416
+
417
+ if not noparagraph:
418
+ p.add_run("\n")
419
+
420
+ # Add question text
421
+ format_docx_element(
422
+ doc,
423
+ q["question"],
424
+ p,
425
+ False,
426
+ spoilers,
427
+ logger,
428
+ labels,
429
+ regexes,
430
+ language,
431
+ remove_accents=screen_mode,
432
+ remove_brackets=screen_mode,
433
+ replace_no_break_spaces=True,
434
+ **kwargs,
435
+ )
436
+
437
+ # Add answers and other fields if not disabled
438
+ if not noanswers:
439
+ if spoilers == "pagebreak":
440
+ p = doc.add_page_break()
441
+ elif spoilers == "dots":
442
+ for _ in range(30):
443
+ if external_para is None:
444
+ p = doc.add_paragraph()
445
+ else:
446
+ p.add_run("\n")
447
+ p.add_run(".")
448
+ if external_para is None:
449
+ p = doc.add_paragraph()
450
+ else:
451
+ p.add_run("\n")
452
+ else:
453
+ if external_para is None:
454
+ p = doc.add_paragraph()
455
+ else:
456
+ p.add_run("\n")
457
+
458
+ p.paragraph_format.keep_together = True
459
+ p.paragraph_format.space_before = DocxPt(6)
460
+
461
+ # Add answer
462
+ answer_label = get_label_standalone(q, "answer", labels, language)
463
+ p.add_run(f"{answer_label}: ").bold = True
464
+ format_docx_element(
465
+ doc,
466
+ q["answer"],
467
+ p,
468
+ True,
469
+ spoilers,
470
+ logger,
471
+ labels,
472
+ regexes,
473
+ language,
474
+ remove_accents=screen_mode,
475
+ replace_no_break_spaces=True,
476
+ **kwargs,
477
+ )
478
+
479
+ # Add other fields
480
+ for field in ["zachet", "nezachet", "comment", "source", "author"]:
481
+ if field in q:
482
+ if field == "source":
483
+ if external_para is None:
484
+ p = doc.add_paragraph()
485
+ p.paragraph_format.keep_together = True
486
+ else:
487
+ p.add_run("\n")
488
+ else:
489
+ p.add_run("\n")
490
+
491
+ field_label = get_label_standalone(q, field, labels, language)
492
+ p.add_run(f"{field_label}: ").bold = True
493
+ format_docx_element(
494
+ doc,
495
+ q[field],
496
+ p,
497
+ WHITEN[field],
498
+ spoilers,
499
+ logger,
500
+ labels,
501
+ regexes,
502
+ language,
503
+ remove_accents=screen_mode,
504
+ remove_brackets=screen_mode,
505
+ replace_no_break_spaces=field != "source",
506
+ **kwargs,
507
+ )
508
+
509
+ return qcount
510
+
511
+
62
512
  class DocxExporter(BaseExporter):
63
513
  def __init__(self, *args, **kwargs):
64
514
  super().__init__(*args, **kwargs)
@@ -76,230 +526,56 @@ class DocxExporter(BaseExporter):
76
526
 
77
527
  def _docx_format(self, *args, **kwargs):
78
528
  kwargs.update(self.dir_kwargs)
79
- return self.docx_format(*args, **kwargs)
529
+ return format_docx_element(
530
+ self.doc,
531
+ *args,
532
+ spoilers=self.args.spoilers,
533
+ logger=self.logger,
534
+ labels=self.labels,
535
+ regexes=self.regexes,
536
+ language=self.args.language,
537
+ **kwargs,
538
+ )
80
539
 
81
540
  def docx_format(self, el, para, whiten, **kwargs):
82
- if isinstance(el, list):
83
- if len(el) > 1 and isinstance(el[1], list):
84
- self.docx_format(el[0], para, whiten, **kwargs)
85
- licount = 0
86
- for li in el[1]:
87
- licount += 1
88
-
89
- para.add_run("\n{}. ".format(licount))
90
- self.docx_format(li, para, whiten, **kwargs)
91
- else:
92
- licount = 0
93
- for li in el:
94
- licount += 1
95
-
96
- para.add_run("\n{}. ".format(licount))
97
- self.docx_format(li, para, whiten, **kwargs)
98
-
99
- if isinstance(el, str):
100
- self.logger.debug("parsing element {}:".format(log_wrap(el)))
101
-
102
- if kwargs.get("remove_accents"):
103
- el = el.replace("\u0301", "")
104
- if kwargs.get("remove_brackets"):
105
- el = self.remove_square_brackets(el)
106
- else:
107
- el = replace_escaped(el)
108
-
109
- el = backtick_replace(el)
110
-
111
- for run in self.parse_4s_elem(el):
112
- if run[0] == "pagebreak":
113
- if self.args.spoilers == "dots":
114
- for _ in range(30):
115
- para = self.doc.add_paragraph()
116
- para.add_run(".")
117
- para = self.doc.add_paragraph()
118
- else:
119
- para = self.doc.add_page_break()
120
- elif run[0] == "linebreak":
121
- para.add_run("\n")
122
- elif run[0] == "screen":
123
- if kwargs.get("remove_accents") or kwargs.get("remove_brackets"):
124
- text = run[1]["for_screen"]
125
- else:
126
- text = run[1]["for_print"]
127
- if kwargs.get("replace_no_break_spaces"):
128
- text = self._replace_no_break(text)
129
- r = para.add_run(text)
130
- elif run[0] == "hyperlink" and not (
131
- whiten and self.args.spoilers == "whiten"
132
- ):
133
- r = self.add_hyperlink(para, run[1], run[1])
134
- elif run[0] == "img":
135
- if run[1].endswith(".shtml"):
136
- r = para.add_run(
137
- "(ТУТ БЫЛА ССЫЛКА НА ПРОТУХШУЮ КАРТИНКУ)\n"
138
- ) # TODO: добавить возможность пропускать кривые картинки опцией
139
- continue
140
- parsed_image = parseimg(
141
- run[1],
142
- dimensions="inches",
143
- tmp_dir=kwargs.get("tmp_dir"),
144
- targetdir=kwargs.get("targetdir"),
145
- )
146
- imgfile = parsed_image["imgfile"]
147
- width = parsed_image["width"]
148
- height = parsed_image["height"]
149
- inline = parsed_image["inline"]
150
- if inline:
151
- r = para.add_run("")
152
- else:
153
- r = para.add_run("\n")
154
-
155
- try:
156
- if inline:
157
- r.add_picture(
158
- imgfile,
159
- height=Inches(
160
- 1.0 / 6
161
- ), # Height is based on docx template
162
- )
163
- else:
164
- r.add_picture(
165
- imgfile, width=Inches(width), height=Inches(height)
166
- )
167
- except UnrecognizedImageError:
168
- sys.stderr.write(
169
- f"python-docx can't recognize header for {imgfile}\n"
170
- )
171
- if not inline:
172
- r = para.add_run("\n")
173
- continue
174
- else:
175
- text = run[1]
176
- if kwargs.get("replace_no_break_spaces"):
177
- text = self._replace_no_break(text)
178
- r = para.add_run(text)
179
- if "italic" in run[0]:
180
- r.italic = True
181
- if "bold" in run[0]:
182
- r.bold = True
183
- if "underline" in run[0]:
184
- r.underline = True
185
- if run[0] == "strike":
186
- r.font.strike = True
187
- if run[0] == "sc":
188
- r.small_caps = True
189
- if whiten and self.args.spoilers == "whiten":
190
- r.style = "Whitened"
541
+ # Redirect to standalone function
542
+ return format_docx_element(
543
+ self.doc,
544
+ el,
545
+ para,
546
+ whiten,
547
+ spoilers=self.args.spoilers,
548
+ logger=self.logger,
549
+ labels=self.labels,
550
+ regexes=self.regexes,
551
+ language=self.args.language,
552
+ **kwargs,
553
+ )
191
554
 
192
555
  def add_hyperlink(self, paragraph, text, url):
193
- # adapted from https://github.com/python-openxml/python-docx/issues/610
194
- doc = self.doc
195
- run = paragraph.add_run(text)
196
- run.style = doc.styles["Hyperlink"]
197
- part = paragraph.part
198
- r_id = part.relate_to(
199
- url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True
200
- )
201
- hyperlink = docx.oxml.shared.OxmlElement("w:hyperlink")
202
- hyperlink.set(docx.oxml.shared.qn("r:id"), r_id)
203
- hyperlink.append(run._r)
204
- paragraph._p.append(hyperlink)
205
- return hyperlink
556
+ return add_hyperlink_to_docx(self.doc, paragraph, text, url)
206
557
 
207
558
  def add_question(
208
559
  self, element, skip_qcount=False, screen_mode=False, external_para=None
209
560
  ):
210
- q = element[1]
211
- if external_para is None:
212
- p = self.doc.add_paragraph()
213
- else:
214
- p = external_para
215
- p.paragraph_format.space_before = DocxPt(18)
216
- p.paragraph_format.keep_together = True
217
- if "number" not in q and not skip_qcount:
218
- self.qcount += 1
219
- if "setcounter" in q:
220
- self.qcount = int(q["setcounter"])
221
- p.add_run(
222
- "{question}. ".format(
223
- question=self.get_label(
224
- q,
225
- "question",
226
- number=self.qcount if "number" not in q else q["number"],
227
- )
228
- )
229
- ).bold = True
230
-
231
- if "handout" in q:
232
- p.add_run("\n[{handout}: ".format(handout=self.get_label(q, "handout")))
233
- self._docx_format(
234
- q["handout"],
235
- p,
236
- WHITEN["handout"],
237
- remove_accents=screen_mode,
238
- remove_brackets=screen_mode,
239
- )
240
- p.add_run("\n]")
241
- if not self.args.noparagraph:
242
- p.add_run("\n")
243
-
244
- self._docx_format(
245
- q["question"],
246
- p,
247
- False,
248
- remove_accents=screen_mode,
249
- remove_brackets=screen_mode,
250
- replace_no_break_spaces=True,
561
+ self.qcount = add_question_to_docx(
562
+ self.doc,
563
+ element[1],
564
+ self.labels,
565
+ self.regexes,
566
+ self.qcount,
567
+ skip_qcount,
568
+ screen_mode,
569
+ external_para,
570
+ self.args.noparagraph,
571
+ self.args.noanswers,
572
+ self.args.spoilers,
573
+ self.args.language,
574
+ self.args.only_question_number,
575
+ self.logger,
576
+ **self.dir_kwargs,
251
577
  )
252
578
 
253
- if not self.args.noanswers:
254
- if self.args.spoilers == "pagebreak":
255
- p = self.doc.add_page_break()
256
- elif self.args.spoilers == "dots":
257
- for _ in range(30):
258
- if external_para is None:
259
- p = self.doc.add_paragraph()
260
- else:
261
- p.add_run("\n")
262
- p.add_run(".")
263
- if external_para is None:
264
- p = self.doc.add_paragraph()
265
- else:
266
- p.add_run("\n")
267
- else:
268
- if external_para is None:
269
- p = self.doc.add_paragraph()
270
- else:
271
- p.add_run("\n")
272
- p.paragraph_format.keep_together = True
273
- p.paragraph_format.space_before = DocxPt(6)
274
- p.add_run(f"{self.get_label(q, 'answer')}: ").bold = True
275
- self._docx_format(
276
- q["answer"],
277
- p,
278
- True,
279
- remove_accents=screen_mode,
280
- replace_no_break_spaces=True,
281
- )
282
-
283
- for field in ["zachet", "nezachet", "comment", "source", "author"]:
284
- if field in q:
285
- if field == "source":
286
- if external_para is None:
287
- p = self.doc.add_paragraph()
288
- p.paragraph_format.keep_together = True
289
- else:
290
- p.add_run("\n")
291
- else:
292
- p.add_run("\n")
293
- p.add_run(f"{self.get_label(q, field)}: ").bold = True
294
- self._docx_format(
295
- q[field],
296
- p,
297
- WHITEN[field],
298
- remove_accents=screen_mode,
299
- remove_brackets=screen_mode,
300
- replace_no_break_spaces=field != "source",
301
- )
302
-
303
579
  def _add_question_columns(self, element):
304
580
  table = self.doc.add_table(rows=1, cols=2)
305
581
  table.autofit = True
@@ -332,82 +608,6 @@ class DocxExporter(BaseExporter):
332
608
 
333
609
  self.doc.add_paragraph()
334
610
 
335
- def _add_question_content(self, q, p, skip_qcount=False, screen_mode=False):
336
- """Helper method to add question content to a paragraph"""
337
- if "number" not in q and not skip_qcount:
338
- self.qcount += 1
339
- if "setcounter" in q:
340
- self.qcount = int(q["setcounter"])
341
- p.add_run(
342
- "{question}. ".format(
343
- question=self.get_label(
344
- q,
345
- "question",
346
- number=self.qcount if "number" not in q else q["number"],
347
- )
348
- )
349
- ).bold = True
350
-
351
- if "handout" in q:
352
- p.add_run("\n[{handout}: ".format(handout=self.get_label(q, "handout")))
353
- self._docx_format(
354
- q["handout"],
355
- p,
356
- WHITEN["handout"],
357
- remove_accents=screen_mode,
358
- remove_brackets=screen_mode,
359
- )
360
- p.add_run("\n]")
361
- if not self.args.noparagraph:
362
- p.add_run("\n")
363
-
364
- self._docx_format(
365
- q["question"],
366
- p,
367
- False,
368
- remove_accents=screen_mode,
369
- remove_brackets=screen_mode,
370
- replace_no_break_spaces=True,
371
- )
372
-
373
- if not self.args.noanswers:
374
- if self.args.spoilers == "pagebreak":
375
- p = self.doc.add_page_break()
376
- elif self.args.spoilers == "dots":
377
- for _ in range(30):
378
- p = self.doc.add_paragraph()
379
- p.add_run(".")
380
- p = self.doc.add_paragraph()
381
- else:
382
- p = self.doc.add_paragraph()
383
- p.paragraph_format.keep_together = True
384
- p.paragraph_format.space_before = DocxPt(6)
385
- p.add_run(f"{self.get_label(q, 'answer')}: ").bold = True
386
- self._docx_format(
387
- q["answer"],
388
- p,
389
- True,
390
- remove_accents=screen_mode,
391
- replace_no_break_spaces=True,
392
- )
393
-
394
- for field in ["zachet", "nezachet", "comment", "source", "author"]:
395
- if field in q:
396
- if field == "source":
397
- p = self.doc.add_paragraph()
398
- p.paragraph_format.keep_together = True
399
- else:
400
- p.add_run("\n")
401
- p.add_run(f"{self.get_label(q, field)}: ").bold = True
402
- self._docx_format(
403
- q[field],
404
- p,
405
- WHITEN[field],
406
- remove_accents=screen_mode,
407
- remove_brackets=screen_mode,
408
- replace_no_break_spaces=field != "source",
409
- )
410
-
411
611
  def export(self, outfilename):
412
612
  self.logger.debug(self.args.docx_template)
413
613
  self.doc = Document(self.args.docx_template)
@@ -473,3 +673,53 @@ class DocxExporter(BaseExporter):
473
673
 
474
674
  self.doc.save(outfilename)
475
675
  self.logger.info("Output: {}".format(outfilename))
676
+
677
+
678
+ # Example usage of the extracted DOCX functions:
679
+ """
680
+ from docx import Document
681
+ import toml
682
+ from chgksuite.composer.docx import add_question_to_docx, format_docx_element
683
+
684
+ # Load labels
685
+ with open("labels.toml", encoding="utf8") as f:
686
+ labels = toml.load(f)
687
+
688
+ # Create a new document
689
+ doc = Document()
690
+
691
+ # Example question data
692
+ question_data = {
693
+ "question": "What is the capital of France?",
694
+ "answer": "Paris",
695
+ "comment": "This is a basic geography question",
696
+ "source": "World Geography Book"
697
+ }
698
+
699
+ # Add question to document
700
+ qcount = add_question_to_docx(
701
+ doc=doc,
702
+ question_data=question_data,
703
+ labels=labels,
704
+ qcount=0, # Starting question count
705
+ noanswers=False, # Include answers
706
+ spoilers="none", # No spoiler handling
707
+ language="en",
708
+ only_question_number=False
709
+ )
710
+
711
+ # Or use the lower-level formatting function directly
712
+ paragraph = doc.add_paragraph()
713
+ format_docx_element(
714
+ doc=doc,
715
+ el="This is **bold text** and _italic text_",
716
+ para=paragraph,
717
+ whiten=False,
718
+ spoilers="none",
719
+ labels=labels,
720
+ language="en"
721
+ )
722
+
723
+ # Save the document
724
+ doc.save("example_output.docx")
725
+ """