chgksuite 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. chgksuite/_html2md.py +90 -0
  2. chgksuite/cli.py +38 -8
  3. chgksuite/common.py +16 -12
  4. chgksuite/composer/__init__.py +9 -7
  5. chgksuite/composer/chgksuite_parser.py +20 -9
  6. chgksuite/composer/composer_common.py +30 -3
  7. chgksuite/composer/db.py +1 -2
  8. chgksuite/composer/docx.py +542 -292
  9. chgksuite/composer/latex.py +3 -4
  10. chgksuite/composer/lj.py +1 -2
  11. chgksuite/composer/{reddit.py → markdown.py} +35 -25
  12. chgksuite/composer/openquiz.py +2 -3
  13. chgksuite/composer/pptx.py +18 -6
  14. chgksuite/composer/telegram.py +22 -10
  15. chgksuite/handouter/gen.py +11 -7
  16. chgksuite/handouter/installer.py +0 -0
  17. chgksuite/handouter/runner.py +237 -10
  18. chgksuite/handouter/tex_internals.py +12 -13
  19. chgksuite/handouter/utils.py +22 -1
  20. chgksuite/lastdir +1 -0
  21. chgksuite/parser.py +218 -37
  22. chgksuite/parser_db.py +4 -6
  23. chgksuite/resources/labels_az.toml +22 -0
  24. chgksuite/resources/labels_by.toml +1 -2
  25. chgksuite/resources/labels_by_tar.toml +1 -2
  26. chgksuite/resources/labels_en.toml +1 -2
  27. chgksuite/resources/labels_kz_cyr.toml +1 -2
  28. chgksuite/resources/labels_ru.toml +1 -2
  29. chgksuite/resources/labels_sr.toml +1 -2
  30. chgksuite/resources/labels_ua.toml +1 -2
  31. chgksuite/resources/labels_uz.toml +0 -3
  32. chgksuite/resources/labels_uz_cyr.toml +1 -2
  33. chgksuite/resources/regexes_az.json +17 -0
  34. chgksuite/resources/regexes_by.json +3 -2
  35. chgksuite/resources/regexes_by_tar.json +17 -0
  36. chgksuite/resources/regexes_en.json +3 -2
  37. chgksuite/resources/regexes_kz_cyr.json +3 -2
  38. chgksuite/resources/regexes_ru.json +3 -2
  39. chgksuite/resources/regexes_sr.json +3 -2
  40. chgksuite/resources/regexes_ua.json +3 -2
  41. chgksuite/resources/regexes_uz.json +16 -0
  42. chgksuite/resources/regexes_uz_cyr.json +3 -2
  43. chgksuite/trello.py +8 -9
  44. chgksuite/typotools.py +9 -8
  45. chgksuite/version.py +1 -1
  46. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
  47. chgksuite-0.27.0.dist-info/RECORD +63 -0
  48. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
  49. chgksuite/composer/telegram_parser.py +0 -230
  50. chgksuite-0.26.0b11.dist-info/RECORD +0 -59
  51. chgksuite-0.26.0b11.dist-info/top_level.txt +0 -1
  52. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
  53. {chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0
chgksuite/parser.py CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
  import base64
4
- import codecs
5
4
  import datetime
5
+ import hashlib
6
6
  import itertools
7
7
  import json
8
8
  import os
@@ -13,12 +13,15 @@ import subprocess
13
13
  import sys
14
14
  import tempfile
15
15
  import urllib
16
+ import time
16
17
 
17
18
  import bs4
18
19
  import chardet
19
- import dashtable
20
20
  import mammoth
21
+
22
+ from chgksuite._html2md import html2md
21
23
  import pypandoc
24
+ import requests
22
25
  import toml
23
26
  from bs4 import BeautifulSoup
24
27
  from parse import parse
@@ -26,11 +29,11 @@ from parse import parse
26
29
  import chgksuite.typotools as typotools
27
30
  from chgksuite.common import (
28
31
  QUESTION_LABELS,
29
- DefaultArgs,
30
32
  DefaultNamespace,
31
33
  DummyLogger,
32
34
  check_question,
33
35
  compose_4s,
36
+ get_chgksuite_dir,
34
37
  get_lastdir,
35
38
  init_logger,
36
39
  load_settings,
@@ -40,9 +43,10 @@ from chgksuite.common import (
40
43
  from chgksuite.composer import gui_compose
41
44
  from chgksuite.composer.composer_common import make_filename
42
45
  from chgksuite.parser_db import chgk_parse_db
46
+ from chgksuite.typotools import re_url
43
47
  from chgksuite.typotools import remove_excessive_whitespace as rew
44
48
 
45
- ENC = sys.stdout.encoding or "utf8"
49
+
46
50
  SEP = os.linesep
47
51
  EDITORS = {
48
52
  "win32": "notepad",
@@ -57,7 +61,7 @@ def partition(alist, indices):
57
61
 
58
62
 
59
63
  def load_regexes(regexfile):
60
- with codecs.open(regexfile, "r", "utf8") as f:
64
+ with open(regexfile, "r", encoding="utf-8") as f:
61
65
  regexes = json.loads(f.read())
62
66
  return {k: re.compile(v) for k, v in regexes.items()}
63
67
 
@@ -107,7 +111,7 @@ class ChgkParser:
107
111
 
108
112
  def __init__(self, defaultauthor=None, args=None, logger=None):
109
113
  self.defaultauthor = defaultauthor
110
- args = args or DefaultArgs()
114
+ args = args or DefaultNamespace()
111
115
  self.regexes = load_regexes(args.regexes)
112
116
  self.logger = logger or init_logger("parser")
113
117
  self.args = args
@@ -121,6 +125,148 @@ class ChgkParser:
121
125
  if self.args.language == "en":
122
126
  self.args.typography_quotes = "off"
123
127
 
128
+ def _setup_image_cache(self):
129
+ """Setup image download cache directory and load existing cache"""
130
+ if not hasattr(self, "_image_cache"):
131
+ self.image_cache_dir = os.path.join(
132
+ get_chgksuite_dir(), "downloaded_images"
133
+ )
134
+ os.makedirs(self.image_cache_dir, exist_ok=True)
135
+
136
+ self.image_cache_file = os.path.join(
137
+ get_chgksuite_dir(), "image_download_cache.json"
138
+ )
139
+ if os.path.isfile(self.image_cache_file):
140
+ try:
141
+ with open(self.image_cache_file, encoding="utf8") as f:
142
+ self._image_cache = json.load(f)
143
+ except (json.JSONDecodeError, OSError):
144
+ self._image_cache = {}
145
+ else:
146
+ self._image_cache = {}
147
+
148
+ def _download_image(self, url):
149
+ """Download image from URL and return local filename"""
150
+ self._setup_image_cache()
151
+ url = url.replace("\\", "")
152
+
153
+ # Check cache first
154
+ url_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()[:20]
155
+ if url_hash in self._image_cache:
156
+ cached_filename = self._image_cache[url_hash]
157
+ cached_path = os.path.join(self.image_cache_dir, cached_filename)
158
+ if os.path.isfile(cached_path):
159
+ return cached_path
160
+
161
+ # Determine file extension
162
+ parsed_url = urllib.parse.urlparse(url)
163
+ path_lower = parsed_url.path.lower()
164
+ ext = None
165
+ for image_ext in [".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".svg"]:
166
+ if path_lower.endswith(image_ext):
167
+ ext = image_ext
168
+ break
169
+
170
+ if not ext:
171
+ # Try to guess from URL structure
172
+ if any(
173
+ img_ext in path_lower
174
+ for img_ext in [
175
+ ".jpg",
176
+ ".jpeg",
177
+ ".png",
178
+ ".webp",
179
+ ".gif",
180
+ ".bmp",
181
+ ".svg",
182
+ ]
183
+ ):
184
+ for image_ext in [
185
+ ".jpg",
186
+ ".jpeg",
187
+ ".png",
188
+ ".webp",
189
+ ".gif",
190
+ ".bmp",
191
+ ".svg",
192
+ ]:
193
+ if image_ext in path_lower:
194
+ ext = image_ext
195
+ break
196
+ else:
197
+ ext = ".jpg" # Default extension
198
+
199
+ filename = url_hash + ext
200
+ filepath = os.path.join(self.image_cache_dir, filename)
201
+
202
+ try:
203
+ self.logger.info(f"Downloading image from {url}")
204
+ headers = {
205
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
206
+ "Accept": "image/png,image/jpeg,image/webp,image/gif,image/*,*/*;q=0.8",
207
+ "Accept-Language": "en-US,en;q=0.9",
208
+ "Accept-Encoding": "gzip, deflate, br",
209
+ "Connection": "keep-alive",
210
+ "Upgrade-Insecure-Requests": "1",
211
+ }
212
+ response = requests.get(url, timeout=30, stream=True, headers=headers)
213
+ response.raise_for_status()
214
+ time.sleep(0.5) # rate limiting
215
+
216
+ with open(filepath, "wb") as f:
217
+ for chunk in response.iter_content(chunk_size=8192):
218
+ f.write(chunk)
219
+
220
+ # Update cache
221
+ self._image_cache[url_hash] = filename
222
+ with open(self.image_cache_file, "w", encoding="utf8") as f:
223
+ json.dump(self._image_cache, f, indent=2, sort_keys=True)
224
+
225
+ return filepath
226
+
227
+ except Exception as e:
228
+ self.logger.warning(f"Failed to download image from {url}: {e}")
229
+ return None
230
+
231
+ def _process_images_in_text(self, text):
232
+ """Process text to find image URLs and replace them with local references"""
233
+ if not text or not getattr(self.args, "download_images", False):
234
+ return text
235
+
236
+ if isinstance(text, list):
237
+ return [self._process_images_in_text(item) for item in text]
238
+
239
+ if not isinstance(text, str):
240
+ return text
241
+
242
+ # Find all URLs in the text
243
+ for match in re_url.finditer(text):
244
+ url = match.group(0)
245
+ url_lower = url.lower()
246
+
247
+ # Check if it's a direct image URL
248
+ if any(
249
+ url_lower.endswith(ext)
250
+ for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif", ".bmp", ".svg"]
251
+ ):
252
+ local_filename = self._download_image(url)
253
+ if local_filename:
254
+ # Replace URL with chgksuite image syntax
255
+ img_reference = f"(img {local_filename})"
256
+ text = text.replace(url, img_reference)
257
+
258
+ return text
259
+
260
+ def _process_question_images(self, question):
261
+ """Process a question dict to download images from URLs"""
262
+ if not getattr(self.args, "download_images", False):
263
+ return
264
+
265
+ # Process all fields except 'source'
266
+ for field in question:
267
+ if field != "source":
268
+ question[field] = self._process_images_in_text(question[field])
269
+
124
270
  def merge_to_previous(self, index):
125
271
  target = index - 1
126
272
  if self.structure[target][1]:
@@ -181,7 +327,7 @@ class ChgkParser:
181
327
  regex,
182
328
  regexes[regex].search(self.remove_formatting(st[i][1])).start(0),
183
329
  )
184
- for regex in set(regexes) - {"number", "date2"}
330
+ for regex in set(regexes) - {"number", "date2", "handout_short"}
185
331
  if regexes[regex].search(self.remove_formatting(st[i][1]))
186
332
  }
187
333
 
@@ -380,7 +526,7 @@ class ChgkParser:
380
526
  )
381
527
 
382
528
  if debug:
383
- with codecs.open("debug_0.txt", "w", "utf8") as f:
529
+ with open("debug_0.txt", "w", encoding="utf-8") as f:
384
530
  f.write(text)
385
531
 
386
532
  # 1.
@@ -412,7 +558,7 @@ class ChgkParser:
412
558
  i = 0
413
559
 
414
560
  if debug:
415
- with codecs.open("debug_1.json", "w", "utf8") as f:
561
+ with open("debug_1.json", "w", encoding="utf-8") as f:
416
562
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
417
563
 
418
564
  self.process_single_number_lines()
@@ -420,15 +566,15 @@ class ChgkParser:
420
566
  # hack for https://gitlab.com/peczony/chgksuite/-/issues/23; TODO: make less hacky
421
567
  for i, element in enumerate(self.structure):
422
568
  if (
423
- "дуплет." in element[1].lower().split()
424
- or "блиц." in element[1].lower().split()
569
+ "Дуплет." in element[1].split()
570
+ or "Блиц." in element[1].split()
425
571
  and element[0] != "question"
426
572
  and (i == 0 or self.structure[i - 1][0] != "question")
427
573
  ):
428
574
  element[0] = "question"
429
575
 
430
576
  if debug:
431
- with codecs.open("debug_1a.json", "w", "utf8") as f:
577
+ with open("debug_1a.json", "w", encoding="utf-8") as f:
432
578
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
433
579
 
434
580
  # 2.
@@ -438,7 +584,7 @@ class ChgkParser:
438
584
  self.merge_to_x_until_nextfield("comment")
439
585
 
440
586
  if debug:
441
- with codecs.open("debug_2.json", "w", "utf8") as f:
587
+ with open("debug_2.json", "w", encoding="utf-8") as f:
442
588
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
443
589
 
444
590
  # 3.
@@ -501,7 +647,7 @@ class ChgkParser:
501
647
  self.merge_to_x_until_nextfield("nezachet")
502
648
 
503
649
  if debug:
504
- with codecs.open("debug_3.json", "w", "utf8") as f:
650
+ with open("debug_3.json", "w", encoding="utf-8") as f:
505
651
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
506
652
 
507
653
  # 4.
@@ -513,7 +659,19 @@ class ChgkParser:
513
659
  ):
514
660
  self.merge_to_next(0)
515
661
 
516
- for _id, element in enumerate(self.structure):
662
+ if debug:
663
+ with open("debug_3a.json", "w", encoding="utf-8") as f:
664
+ f.write(
665
+ json.dumps(
666
+ list(enumerate(self.structure)), ensure_ascii=False, indent=4
667
+ )
668
+ )
669
+
670
+ idx = 0
671
+ cycle = -1
672
+ while idx < len(self.structure):
673
+ cycle += 1
674
+ element = self.structure[idx]
517
675
  if element[0] == "":
518
676
  element[0] = "meta"
519
677
  if element[0] in regexes and element[0] not in [
@@ -525,34 +683,42 @@ class ChgkParser:
525
683
  try:
526
684
  num = regexes["question"].search(element[1])
527
685
  if num and num.group("number"):
528
- self.structure.insert(_id, ["number", num.group("number")])
686
+ self.structure.insert(idx, ["number", num.group("number")])
687
+ idx += 1
529
688
  except Exception as e:
689
+ num = None
530
690
  sys.stderr.write(
531
- f"exception at line 445 of parser: {type(e)} {e}\n"
691
+ f"exception at setting number: {type(e)} {e}\nQuestion: {element[1]}\n"
532
692
  )
533
- if (
534
- not num
535
- and ("нулевой вопрос" in element[1].lower())
693
+ if (num is None or num and not num.group("number")) and (
694
+ ("нулевой вопрос" in element[1].lower())
536
695
  or ("разминочный вопрос" in element[1].lower())
537
696
  ):
538
- self.structure.insert(_id, ["number", "0"])
697
+ self.structure.insert(idx, ["number", "0"])
698
+ idx += 1
539
699
  if element[0] == "question":
540
700
  lines = element[1].split(SEP)
541
701
  for i, line in enumerate(lines):
542
702
  if regexes["question"].search(line):
543
703
  lines[i] = regexes["question"].sub("", line, 1)
544
704
  element[1] = SEP.join([x.strip() for x in lines if x.strip()])
705
+ before_replacement = None
545
706
  else:
546
707
  before_replacement = element[1]
547
708
  element[1] = regexes[element[0]].sub("", element[1], 1)
548
709
  if element[1].startswith(SEP):
549
710
  element[1] = element[1][len(SEP) :]
550
711
  # TODO: переделать корявую обработку авторки на нормальную
551
- if element[0] == "author" and "авторка:" in before_replacement.lower():
712
+ if (
713
+ element[0] == "author"
714
+ and before_replacement
715
+ and "авторка:" in before_replacement.lower()
716
+ ):
552
717
  element[1] = "!!Авторка" + element[1]
718
+ idx += 1
553
719
 
554
720
  if debug:
555
- with codecs.open("debug_4.json", "w", "utf8") as f:
721
+ with open("debug_4.json", "w", encoding="utf-8") as f:
556
722
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
557
723
 
558
724
  # 5.
@@ -632,7 +798,7 @@ class ChgkParser:
632
798
  )
633
799
 
634
800
  if debug:
635
- with codecs.open("debug_5.json", "w", "utf8") as f:
801
+ with open("debug_5.json", "w", encoding="utf-8") as f:
636
802
  f.write(json.dumps(self.structure, ensure_ascii=False, indent=4))
637
803
 
638
804
  # 6.
@@ -648,6 +814,7 @@ class ChgkParser:
648
814
  ):
649
815
  if self.defaultauthor and "author" not in current_question:
650
816
  current_question["author"] = self.defaultauthor
817
+ self._process_question_images(current_question)
651
818
  check_question(current_question, logger=logger)
652
819
  final_structure.append(["Question", current_question])
653
820
  current_question = {}
@@ -681,11 +848,12 @@ class ChgkParser:
681
848
  if current_question != {}:
682
849
  if self.defaultauthor and "author" not in current_question:
683
850
  current_question["author"] = self.defaultauthor
851
+ self._process_question_images(current_question)
684
852
  check_question(current_question, logger=logger)
685
853
  final_structure.append(["Question", current_question])
686
854
 
687
855
  if debug:
688
- with codecs.open("debug_6.json", "w", "utf8") as f:
856
+ with open("debug_6.json", "w", encoding="utf-8") as f:
689
857
  f.write(json.dumps(final_structure, ensure_ascii=False, indent=4))
690
858
 
691
859
  # 7.
@@ -724,16 +892,22 @@ class ChgkParser:
724
892
  elif element[0] == "tour" and self.args.tour_numbers_as_words == "on":
725
893
  element[1] = f"{self.TOUR_NUMBERS_AS_WORDS[tour_cnt]} тур"
726
894
  tour_cnt += 1
895
+ elif element[0] not in ["Question", "source"] and getattr(
896
+ self.args, "download_images", False
897
+ ):
898
+ # Process images in metadata fields (excluding source)
899
+ element[1] = self._process_images_in_text(element[1])
727
900
 
728
901
  if debug:
729
- with codecs.open("debug_final.json", "w", "utf8") as f:
902
+ with open("debug_final.json", "w", encoding="utf-8") as f:
730
903
  f.write(json.dumps(final_structure, ensure_ascii=False, indent=4))
731
904
  return final_structure
732
905
 
733
906
 
734
907
  def chgk_parse(text, defaultauthor=None, args=None):
735
908
  parser = ChgkParser(defaultauthor=defaultauthor, args=args)
736
- return parser.parse(text)
909
+ parsed = parser.parse(text)
910
+ return parsed
737
911
 
738
912
 
739
913
  class UnknownEncodingException(Exception):
@@ -779,7 +953,7 @@ def ensure_line_breaks(tag):
779
953
 
780
954
  def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
781
955
  logger = logger or DummyLogger()
782
- args = args or DefaultArgs()
956
+ args = args or DefaultNamespace()
783
957
  for_ol = {}
784
958
 
785
959
  def get_number(tag):
@@ -807,6 +981,11 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
807
981
  else:
808
982
  with open(docxfile, "rb") as docx_file:
809
983
  html = mammoth.convert_to_html(docx_file).value
984
+ if args.debug:
985
+ with open(
986
+ os.path.join(target_dir, "debugdebug.pydocx"), "w", encoding="utf-8"
987
+ ) as dbg:
988
+ dbg.write(html)
810
989
  input_docx = (
811
990
  html.replace("</strong><strong>", "")
812
991
  .replace("</em><em>", "")
@@ -815,8 +994,8 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
815
994
  bsoup = BeautifulSoup(input_docx, "html.parser")
816
995
 
817
996
  if args.debug:
818
- with codecs.open(
819
- os.path.join(target_dir, "debug.pydocx"), "w", "utf8"
997
+ with open(
998
+ os.path.join(target_dir, "debug.pydocx"), "w", encoding="utf-8"
820
999
  ) as dbg:
821
1000
  dbg.write(input_docx)
822
1001
 
@@ -886,7 +1065,7 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
886
1065
  ensure_line_breaks(tag)
887
1066
  for tag in bsoup.find_all("table"):
888
1067
  try:
889
- table = dashtable.html2md(str(tag))
1068
+ table = html2md(str(tag))
890
1069
  tag.insert_before(table)
891
1070
  except (TypeError, ValueError):
892
1071
  logger.error(f"couldn't parse html table: {str(tag)}")
@@ -917,12 +1096,12 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
917
1096
  tag.unwrap()
918
1097
 
919
1098
  if args.debug:
920
- with codecs.open(
921
- os.path.join(target_dir, "debug_raw.html"), "w", "utf8"
1099
+ with open(
1100
+ os.path.join(target_dir, "debug_raw.html"), "w", encoding="utf-8"
922
1101
  ) as dbg:
923
1102
  dbg.write(str(bsoup))
924
- with codecs.open(
925
- os.path.join(target_dir, "debug.html"), "w", "utf8"
1103
+ with open(
1104
+ os.path.join(target_dir, "debug.html"), "w", encoding="utf-8"
926
1105
  ) as dbg:
927
1106
  dbg.write(bsoup.prettify())
928
1107
 
@@ -960,7 +1139,9 @@ def chgk_parse_docx(docxfile, defaultauthor="", args=None, logger=None):
960
1139
  txt = txt.replace(f"IMGPATH({i})", elem)
961
1140
 
962
1141
  if args.debug:
963
- with codecs.open(os.path.join(target_dir, "debug.debug"), "w", "utf8") as dbg:
1142
+ with open(
1143
+ os.path.join(target_dir, "debug.debug"), "w", encoding="utf-8"
1144
+ ) as dbg:
964
1145
  dbg.write(txt)
965
1146
 
966
1147
  final_structure = chgk_parse(txt, defaultauthor=defaultauthor, args=args)
@@ -994,7 +1175,7 @@ def chgk_parse_wrapper(path, args, logger=None):
994
1175
  sys.exit()
995
1176
  outfilename = os.path.join(target_dir, make_filename(abspath, "4s", args))
996
1177
  logger.info("Output: {}".format(os.path.abspath(outfilename)))
997
- with codecs.open(outfilename, "w", "utf8") as output_file:
1178
+ with open(outfilename, "w", encoding="utf-8") as output_file:
998
1179
  output_file.write(compose_4s(final_structure, args=args))
999
1180
  return outfilename
1000
1181
 
chgksuite/parser_db.py CHANGED
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- import codecs
5
4
  import json
6
5
  import os
7
6
  import re
@@ -128,7 +127,7 @@ def t_ANSWER(t):
128
127
  t.lexer.text = ""
129
128
  if t.lexer.question["answer"]:
130
129
  logger.warning(
131
- "Bad format: several Answer fields. " "Previous Answer was:" " '%s'",
130
+ "Bad format: several Answer fields. Previous Answer was: '%s'",
132
131
  t.lexer.question["answer"],
133
132
  )
134
133
 
@@ -151,7 +150,7 @@ def t_COMMENT(t):
151
150
  t.lexer.text = ""
152
151
  if t.lexer.question["comment"]:
153
152
  logger.warning(
154
- "Bad format: several Comment fields. " "Previous Comment was:" " '%s'",
153
+ "Bad format: several Comment fields. Previous Comment was: '%s'",
155
154
  t.lexer.question["comment"],
156
155
  )
157
156
 
@@ -162,7 +161,7 @@ def t_SOURCE(t):
162
161
  t.lexer.text = ""
163
162
  if t.lexer.question["source"]:
164
163
  logger.warning(
165
- "Bad format: several Source fields. " "Previous Source was:" " '%s'",
164
+ "Bad format: several Source fields. Previous Source was: '%s'",
166
165
  t.lexer.question["source"],
167
166
  )
168
167
 
@@ -417,7 +416,6 @@ def replace_handouts(match_handout):
417
416
 
418
417
 
419
418
  def chgk_parse_db(text, debug=False, logger=False):
420
-
421
419
  if not logger:
422
420
  logger = init_logger("parser_db", debug=debug)
423
421
 
@@ -437,7 +435,7 @@ def chgk_parse_db(text, debug=False, logger=False):
437
435
  append_question(lexer)
438
436
 
439
437
  if debug:
440
- with codecs.open("debug_final.json", "w", "utf8") as f:
438
+ with open("debug_final.json", "w", encoding="utf-8") as f:
441
439
  f.write(json.dumps(lexer.structure, ensure_ascii=False, indent=4))
442
440
 
443
441
  return lexer.structure
@@ -0,0 +1,22 @@
1
+ [question_labels]
2
+ question = "Sual"
3
+ answer = "Cavab"
4
+ zachet = "Sayılma meyarı"
5
+ nezachet = "Sayılmır"
6
+ comment = "Şərh"
7
+ source = "Mənbə"
8
+ sources = "Mənbələr"
9
+ author = "Müəllif"
10
+ authors = "Müəlliflər"
11
+ handout = "Paylama materialı"
12
+
13
+ [general]
14
+ section = "Tur"
15
+ editor = "Redaktor"
16
+ date = "Tarix"
17
+ questions_in_comments = "Suallar şərh bölməsindədir."
18
+ general_impressions_caption = "Ümumi təəssüratlar"
19
+ handout_for_question = "Paylama materialı sual {} üçün"
20
+ general_impressions_text = "Paket barədə ümumi təəssüratlar — bu posun altına şərh kimi yazın."
21
+ right_answers_for_stats = "Doğru cavablar"
22
+ cf_image = "Şəklə baxın"
@@ -9,7 +9,6 @@ sources = "Крыніцы"
9
9
  author = "Аўтар"
10
10
  authors = "Аўтары"
11
11
  handout = "Раздаткавы матэрыял"
12
- handout_short = "Раздат"
13
12
 
14
13
  [general]
15
14
  section = "Тур"
@@ -19,4 +18,4 @@ questions_in_comments = "Пытанні ў каментарах."
19
18
  handout_for_question = "Раздаткавы матэрыял да пытання {}"
20
19
  general_impressions_caption = "Агульныя ўражанні"
21
20
  general_impressions_text = "Агульныя ўражанні ад пакета — у каментарах да гэтага паста."
22
- right_answers_for_stats = "Правільных адказаў"
21
+ right_answers_for_stats = "Правільных адказаў"
@@ -9,7 +9,6 @@ sources = "Крыніцы"
9
9
  author = "Аўтар"
10
10
  authors = "Аўтары"
11
11
  handout = "Раздаткавы матэрыял"
12
- handout_short = "Раздат"
13
12
 
14
13
  [general]
15
14
  section = "Тур"
@@ -19,4 +18,4 @@ questions_in_comments = "Пытаньні ў каментарыях."
19
18
  handout_for_question = "Раздатачны матэрыял да пытаньня {}"
20
19
  general_impressions_caption = "Агульныя ўражаньні"
21
20
  general_impressions_text = "Агульныя ўражаньні ад пакета — у каментарыях да гэтага паста."
22
- right_answers_for_stats = "Правільных адказаў"
21
+ right_answers_for_stats = "Правільных адказаў"
@@ -9,7 +9,6 @@ sources = "Sources"
9
9
  author = "Author"
10
10
  authors = "Authors"
11
11
  handout = "Handout"
12
- handout_short = "Handout"
13
12
 
14
13
  [general]
15
14
  section = "Block"
@@ -19,4 +18,4 @@ questions_in_comments = "Questions are in the comments."
19
18
  handout_for_question = "Handout for question {}"
20
19
  general_impressions_caption = "General impression"
21
20
  general_impressions_text = "Please share your general impression of the packet in the comments to this post."
22
- right_answers_for_stats = "Correct answers"
21
+ right_answers_for_stats = "Correct answers"
@@ -9,7 +9,6 @@ sources = "Дереккөздер"
9
9
  author = "Автор"
10
10
  authors = "Авторлар"
11
11
  handout = "Үлестіру материалы"
12
- handout_short = "Материал"
13
12
 
14
13
  [general]
15
14
  section = "Тур"
@@ -20,4 +19,4 @@ general_impressions_caption = "Жалпы әсер"
20
19
  handout_for_question = "{}-сұрақтың үлестіру материалы"
21
20
  general_impressions_text = "Пакеттен алған жалпы әсер — осы пост астындағы комментарийлерде."
22
21
  right_answers_for_stats = "Алған сұрақтар"
23
- cf_image = "суретті қараңыз"
22
+ cf_image = "суретті қараңыз"
@@ -9,7 +9,6 @@ sources = "Источники"
9
9
  author = "Автор"
10
10
  authors = "Авторы"
11
11
  handout = "Раздаточный материал"
12
- handout_short = "Раздат"
13
12
 
14
13
  [general]
15
14
  section = "Тур"
@@ -20,4 +19,4 @@ general_impressions_caption = "Общие впечатления"
20
19
  handout_for_question = "Раздаточный материал к вопросу {}"
21
20
  general_impressions_text = "Общее впечатление от пакета — в комментариях к этому посту."
22
21
  right_answers_for_stats = "Взятия"
23
- cf_image = "см. изображение"
22
+ cf_image = "см. изображение"
@@ -9,7 +9,6 @@ sources = "Izvori"
9
9
  author = "Autor"
10
10
  authors = "Autori"
11
11
  handout = "Materijal za deljenje"
12
- handout_short = "Podeljeno"
13
12
 
14
13
  [general]
15
14
  section = "Runda"
@@ -20,4 +19,4 @@ general_impressions_caption = "Opšti utisci"
20
19
  handout_for_question = "Materijal za deljenje uz pitanje {}"
21
20
  general_impressions_text = "Opšti utisak od paketa — u komentarima na ovu objavu."
22
21
  right_answers_for_stats = "Pogodak"
23
- cf_image = "vidi sliku"
22
+ cf_image = "vidi sliku"
@@ -9,7 +9,6 @@ sources = "Джерела"
9
9
  author = "Автор"
10
10
  authors = "Автори"
11
11
  handout = "Роздатковий матеріал"
12
- handout_short = "Роздат"
13
12
 
14
13
  [general]
15
14
  section = "Тур"
@@ -19,4 +18,4 @@ questions_in_comments = "Запитання у коментарях."
19
18
  handout_for_question = "Роздатковий матеріал до запитання {}"
20
19
  general_impressions_caption = "Загальні враження"
21
20
  general_impressions_text = "Загальні враження про пакет — у коментарях до цього посту."
22
- right_answers_for_stats = "Взяття"
21
+ right_answers_for_stats = "Взяття"
@@ -9,7 +9,6 @@ sources = "Manbalar"
9
9
  author = "Muallif"
10
10
  authors = "Mualliflar"
11
11
  handout = "Tarqatma material"
12
- handout_short = "Tarqat"
13
12
 
14
13
  [general]
15
14
  section = "Tur"
@@ -20,5 +19,3 @@ handout_for_question = "{} savolga tarqatma material."
20
19
  general_impressions_caption = "Umumiy taasurotlar"
21
20
  general_impressions_text = "To‘plamdan umumiy taasurotlar — ushbu postning izohlarida."
22
21
  right_answers_for_stats = "To‘g‘ri javoblar foizi"
23
-
24
-