PyPI - chgksuite - Versions diffs - 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl - Mend

chgksuite 0.26.0b11py3-none-any.whl → 0.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

chgksuite/_html2md.py +90 -0
chgksuite/cli.py +38 -8
chgksuite/common.py +16 -12
chgksuite/composer/__init__.py +9 -7
chgksuite/composer/chgksuite_parser.py +20 -9
chgksuite/composer/composer_common.py +30 -3
chgksuite/composer/db.py +1 -2
chgksuite/composer/docx.py +542 -292
chgksuite/composer/latex.py +3 -4
chgksuite/composer/lj.py +1 -2
chgksuite/composer/{reddit.py → markdown.py} +35 -25
chgksuite/composer/openquiz.py +2 -3
chgksuite/composer/pptx.py +18 -6
chgksuite/composer/telegram.py +22 -10
chgksuite/handouter/gen.py +11 -7
chgksuite/handouter/installer.py +0 -0
chgksuite/handouter/runner.py +237 -10
chgksuite/handouter/tex_internals.py +12 -13
chgksuite/handouter/utils.py +22 -1
chgksuite/lastdir +1 -0
chgksuite/parser.py +218 -37
chgksuite/parser_db.py +4 -6
chgksuite/resources/labels_az.toml +22 -0
chgksuite/resources/labels_by.toml +1 -2
chgksuite/resources/labels_by_tar.toml +1 -2
chgksuite/resources/labels_en.toml +1 -2
chgksuite/resources/labels_kz_cyr.toml +1 -2
chgksuite/resources/labels_ru.toml +1 -2
chgksuite/resources/labels_sr.toml +1 -2
chgksuite/resources/labels_ua.toml +1 -2
chgksuite/resources/labels_uz.toml +0 -3
chgksuite/resources/labels_uz_cyr.toml +1 -2
chgksuite/resources/regexes_az.json +17 -0
chgksuite/resources/regexes_by.json +3 -2
chgksuite/resources/regexes_by_tar.json +17 -0
chgksuite/resources/regexes_en.json +3 -2
chgksuite/resources/regexes_kz_cyr.json +3 -2
chgksuite/resources/regexes_ru.json +3 -2
chgksuite/resources/regexes_sr.json +3 -2
chgksuite/resources/regexes_ua.json +3 -2
chgksuite/resources/regexes_uz.json +16 -0
chgksuite/resources/regexes_uz_cyr.json +3 -2
chgksuite/trello.py +8 -9
chgksuite/typotools.py +9 -8
chgksuite/version.py +1 -1
{chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/METADATA +10 -19
chgksuite-0.27.0.dist-info/RECORD +63 -0
{chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/WHEEL +1 -2
chgksuite/composer/telegram_parser.py +0 -230
chgksuite-0.26.0b11.dist-info/RECORD +0 -59
chgksuite-0.26.0b11.dist-info/top_level.txt +0 -1
{chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/entry_points.txt +0 -0
{chgksuite-0.26.0b11.dist-info → chgksuite-0.27.0.dist-info}/licenses/LICENSE +0 -0

chgksuite/_html2md.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""Simple HTML table to Markdown converter.
+Replaces dashtable.html2md with a minimal implementation that avoids
+deprecated BeautifulSoup methods.
+"""
+from bs4 import BeautifulSoup
+def html2md(html_string: str) -> str:
+    """Convert an HTML table to a Markdown table string.
+    Parameters
+    ----------
+    html_string : str
+        HTML string containing a table
+    Returns
+    -------
+    str
+        The table formatted as Markdown
+    """
+    soup = BeautifulSoup(html_string, "html.parser")
+    table = soup.find("table")
+    if not table:
+        return ""
+    rows = table.find_all("tr")
+    if not rows:
+        return ""
+    # Extract all rows as lists of cell texts
+    data = []
+    for row in rows:
+        # Check for header cells first, then data cells
+        cells = row.find_all("th")
+        if not cells:
+            cells = row.find_all("td")
+        row_data = []
+        for cell in cells:
+            # Get text, normalize whitespace
+            text = " ".join(cell.get_text().split())
+            row_data.append(text)
+        if row_data:
+            data.append(row_data)
+    if not data:
+        return ""
+    # Normalize row lengths (pad shorter rows)
+    max_cols = max(len(row) for row in data)
+    for row in data:
+        while len(row) < max_cols:
+            row.append("")
+    # Calculate column widths (minimum 3 for markdown separator)
+    # Add 2 for space cushions on each side
+    widths = []
+    for col in range(max_cols):
+        width = max(len(row[col]) for row in data)
+        widths.append(max(width + 2, 3))
+    # Build markdown table
+    lines = []
+    # Header row (centered, with padding)
+    header = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(data[0])) + "|"
+    lines.append(header)
+    # Separator row (no spaces to avoid typotools converting --- to em-dash)
+    separator = "|" + "|".join("-" * w for w in widths) + "|"
+    lines.append(separator)
+    # Data rows (centered, with padding)
+    for row in data[1:]:
+        line = "|" + "|".join(_center(cell, widths[i]) for i, cell in enumerate(row)) + "|"
+        lines.append(line)
+    return "\n".join(lines)
+def _center(text: str, width: int) -> str:
+    """Center text within width, with space padding."""
+    text = text.strip()
+    padding = width - len(text)
+    left = padding // 2
+    right = padding - left
+    return " " * left + text + " " * right

chgksuite/cli.py CHANGED Viewed

@@ -13,12 +13,14 @@ from chgksuite.common import (
 )
 from chgksuite.composer import gui_compose
 from chgksuite.handouter.runner import gui_handouter
-from chgksuite.handouter.tex_internals import GREYTEXT_LANGS
 from chgksuite.parser import gui_parse
 from chgksuite.trello import gui_trello
 from chgksuite.version import __version__
-LANGS = ["by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + ["custom"]
+LANGS = ["az", "by", "by_tar", "en", "kz_cyr", "ru", "sr", "ua", "uz", "uz_cyr"] + [
+    "custom"
+]
+HANDOUT_LANGS = [lang for lang in LANGS if lang != "custom"]
 debug = False
@@ -277,6 +279,14 @@ class ArgparseBuilder:
             advanced=True,
             argtype="radiobutton",
         )
+        self.add_argument(
+            cmdparse,
+            "--download_images",
+            action="store_true",
+            help="download images from direct URLs and replace with local references",
+            caption="Скачивать изображения по прямым ссылкам",
+            advanced=True,
+        )
         self.add_argument(
             cmdparse,
             "--add_ts",
@@ -436,6 +446,15 @@ class ArgparseBuilder:
             advanced=True,
             caption='Без переноса строки после "Вопрос N."',
         )
+        self.add_argument(
+            cmdcompose_docx,
+            "--only_question_number",
+            action="store_true",
+            help="only show question number",
+            advanced=True,
+            caption='Без слова "Вопрос" в "Вопрос N."',
+        )
         self.add_argument(
             cmdcompose_docx,
             "--randomize",
@@ -577,6 +596,15 @@ class ArgparseBuilder:
             caption="Имя 4s-файла",
             filetypes=[("chgksuite markup files", "*.4s")],
         )
+        cmdcompose_markdown = cmdcompose_filetype.add_parser("markdown")
+        self.add_argument(
+            cmdcompose_markdown,
+            "filename",
+            nargs="*",
+            help="file(s) to compose from.",
+            caption="Имя 4s-файла",
+            filetypes=[("chgksuite markup files", "*.4s")],
+        )
         cmdcompose_pptx = cmdcompose_filetype.add_parser("pptx")
         self.add_argument(
             cmdcompose_pptx,
@@ -872,10 +900,11 @@ class ArgparseBuilder:
         )
         self.add_argument(
             cmdhandouts_run,
-            "--lang",
+            "--language",
+            "-lang",
             default="ru",
             argtype="radiobutton",
-            choices=sorted(GREYTEXT_LANGS.keys()),
+            choices=sorted(HANDOUT_LANGS),
             help="language",
             caption="Язык",
             advanced=True,
@@ -995,12 +1024,13 @@ class ArgparseBuilder:
         )
         self.add_argument(
             cmdhandouts_generate,
-            "--lang",
+            "--language",
+            "-lang",
             default="ru",
             help="language",
             caption="Язык",
             argtype="radiobutton",
-            choices=sorted(GREYTEXT_LANGS.keys()),
+            choices=sorted(HANDOUT_LANGS),
             advanced=True,
         )
         self.add_argument(
@@ -1067,9 +1097,9 @@ def single_action(args, use_wrapper, resourcedir):
         args.console_mode = True
     if args.language in LANGS:
+        args.regexes_file = os.path.join(resourcedir, f"regexes_{args.language}.json")
         if args.action == "parse":
-            regex_lang = "by" if args.language == "by_tar" else args.language
-            args.regexes = os.path.join(resourcedir, f"regexes_{regex_lang}.json")
+            args.regexes = args.regexes_file
         args.labels_file = os.path.join(resourcedir, f"labels_{args.language}.toml")
     if not args.docx_template:
         args.docx_template = os.path.join(resourcedir, "template.docx")

chgksuite/common.py CHANGED Viewed

@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import argparse
-import codecs
 import csv
 import itertools
 import json
@@ -29,16 +28,28 @@ QUESTION_LABELS = [
     "setcounter",
 ]
 SEP = os.linesep
-ENC = sys.stdout.encoding or "utf8"
+try:
+    ENC = sys.stdout.encoding or "utf8"
+except AttributeError:
+    ENC = "utf8"
 lastdir = os.path.join(os.path.dirname(os.path.abspath("__file__")), "lastdir")
+def get_chgksuite_dir():
+    chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
+    if not os.path.isdir(chgksuite_dir):
+        os.mkdir(chgksuite_dir)
+    return chgksuite_dir
 def init_logger(logger_name, debug=False):
     logger = logging.getLogger(logger_name)
     if not logger.handlers:
         logger.setLevel(logging.DEBUG)
-        fh = logging.FileHandler(f"{logger_name}.log", encoding="utf8")
+        log_dir = get_chgksuite_dir()
+        log_path = os.path.join(log_dir, f"{logger_name}.log")
+        fh = logging.FileHandler(log_path, encoding="utf8")
         fh.setLevel(logging.DEBUG)
         ch = logging.StreamHandler()
         if debug:
@@ -53,13 +64,6 @@ def init_logger(logger_name, debug=False):
     return logger
-def get_chgksuite_dir():
-    chgksuite_dir = os.path.join(os.path.expanduser("~"), ".chgksuite")
-    if not os.path.isdir(chgksuite_dir):
-        os.mkdir(chgksuite_dir)
-    return chgksuite_dir
 def load_settings():
     chgksuite_dir = get_chgksuite_dir()
     settings_file = os.path.join(chgksuite_dir, "settings.toml")
@@ -109,7 +113,7 @@ class DefaultArgs:
 def set_lastdir(path):
     chgksuite_dir = get_chgksuite_dir()
     lastdir = os.path.join(chgksuite_dir, "lastdir")
-    with codecs.open(lastdir, "w", "utf8") as f:
+    with open(lastdir, "w", encoding="utf-8") as f:
         f.write(path)
@@ -117,7 +121,7 @@ def get_lastdir():
     chgksuite_dir = get_chgksuite_dir()
     lastdir = os.path.join(chgksuite_dir, "lastdir")
     if os.path.isfile(lastdir):
-        with codecs.open(lastdir, "r", "utf8") as f:
+        with open(lastdir, "r", encoding="utf-8") as f:
             return f.read().rstrip()
     return "."

chgksuite/composer/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 #!usr/bin/env python
 # -*- coding: utf-8 -*-
-import codecs
 import json
 import os
 import shutil
@@ -22,7 +21,7 @@ from chgksuite.composer.docx import DocxExporter
 from chgksuite.composer.latex import LatexExporter
 from chgksuite.composer.lj import LjExporter
 from chgksuite.composer.pptx import PptxExporter
-from chgksuite.composer.reddit import RedditExporter
+from chgksuite.composer.markdown import MarkdownExporter
 from chgksuite.composer.stats import StatsAdder
 from chgksuite.composer.telegram import TelegramExporter
 from chgksuite.composer.openquiz import OpenquizExporter
@@ -75,10 +74,13 @@ def process_file_wrapper(filename, sourcedir, targetdir, args):
 def parse_filepath(filepath, args=None):
     args = args or DefaultArgs()
-    with codecs.open(filepath, "r", "utf8") as input_file:
+    with open(filepath, "r", encoding="utf-8") as input_file:
         input_text = input_file.read()
     input_text = input_text.replace("\r", "")
-    return parse_4s(input_text, randomize=args.randomize, debug=args.debug)
+    debug_dir = os.path.dirname(os.path.abspath(filepath))
+    return parse_4s(
+        input_text, randomize=args.randomize, debug=args.debug, debug_dir=debug_dir
+    )
 def make_merged_filename(filelist):
@@ -105,7 +107,7 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
             targetdir,
             make_filename(os.path.basename(filename), "dbg", args),
         )
-        with codecs.open(debug_fn, "w", "utf8") as output_file:
+        with open(debug_fn, "w", encoding="utf-8") as output_file:
             output_file.write(json.dumps(structure, indent=2, ensure_ascii=False))
     if not args.filetype:
@@ -146,8 +148,8 @@ def process_file(filename, tmp_dir, targetdir, args=None, logger=None):
         outfilename = os.path.join(targetdir, make_filename(filename, "txt", args))
         exporter.export(outfilename)
-    if args.filetype == "redditmd":
-        exporter = RedditExporter(structure, args, dir_kwargs)
+    if args.filetype in ("redditmd", "markdown"):
+        exporter = MarkdownExporter(structure, args, dir_kwargs)
         outfilename = os.path.join(targetdir, make_filename(filename, "md", args))
         exporter.export(outfilename)

chgksuite/composer/chgksuite_parser.py CHANGED Viewed

@@ -1,9 +1,15 @@
-import codecs
+import os
 import random
 import re
 from collections import defaultdict
-from chgksuite.common import QUESTION_LABELS, check_question, init_logger, log_wrap
+from chgksuite.common import (
+    QUESTION_LABELS,
+    check_question,
+    get_chgksuite_dir,
+    init_logger,
+    log_wrap,
+)
 from chgksuite.typotools import remove_excessive_whitespace as rew
 REQUIRED_LABELS = set(["question", "answer"])
@@ -64,7 +70,9 @@ def process_list(element):
 RE_COUNTER = "4SCOUNTER(?P<counter_id>[0-9a-zA-Z_]*)"
-RE_SET_COUNTER = "set 4SCOUNTER(?P<counter_id_set>[0-9a-zA-Z_]*) = (?P<counter_value>[0-9+])"
+RE_SET_COUNTER = (
+    "set 4SCOUNTER(?P<counter_id_set>[0-9a-zA-Z_]*) = (?P<counter_value>[0-9+])"
+)
 RE_COUNTER_UNIFY = re.compile(f"({RE_COUNTER}|{RE_SET_COUNTER})")
@@ -83,11 +91,11 @@ def replace_counters(string_):
             counter_id = match.group("counter_id")
             string_ = string_[: span[0]] + str(dd[counter_id]) + string_[span[1] :]
             dd[counter_id] += 1
-        match = RE_COUNTER_UNIFY.search(string_)
+        match = RE_COUNTER_UNIFY.search(string_)
     return string_
-def parse_4s(s, randomize=False, debug=False, logger=None):
+def parse_4s(s, randomize=False, debug=False, logger=None, debug_dir=None):
     logger = logger or init_logger("composer")
     mapping = {
         "#": "meta",
@@ -113,8 +121,11 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
     if s[0] == "\ufeff" and len(s) > 1:
         s = s[1:]
-    with codecs.open("raw.debug", "w", "utf8") as debugf:
-        debugf.write(log_wrap(s.split("\n")))
+    if debug:
+        debug_dir = debug_dir or get_chgksuite_dir()
+        debug_path = os.path.join(debug_dir, "raw.debug")
+        with open(debug_path, "w", encoding="utf-8") as debugf:
+            debugf.write(log_wrap(s.split("\n")))
     s = replace_counters(s)
@@ -135,7 +146,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
     counter = 1
     if debug:
-        with codecs.open("debug1st.debug", "w", "utf8") as debugf:
+        with open("debug1st.debug", "w", encoding="utf-8") as debugf:
             debugf.write(log_wrap(structure))
     for element in structure:
@@ -228,7 +239,7 @@ def parse_4s(s, randomize=False, debug=False, logger=None):
                 i += 1
     if debug:
-        with codecs.open("debug.debug", "w", "utf8") as debugf:
+        with open("debug.debug", "w", encoding="utf-8") as debugf:
             debugf.write(log_wrap(final_structure))
     for element in final_structure:

chgksuite/composer/composer_common.py CHANGED Viewed

@@ -59,6 +59,22 @@ def backtick_replace(el):
     return el
+def remove_accents_standalone(s, regexes):
+    hs = regexes["handout_short"]
+    re_hs = re.compile(f"\\[{hs}(.+?)\\]", flags=re.DOTALL)
+    replacements = {}
+    n_handouts = 0
+    while match := re_hs.search(s):
+        original = match.group(0)
+        n_handouts += 1
+        replacements[original] = f"HANDOUT_{str(n_handouts).zfill(3)}"
+        s = s.replace(original, replacements[original])
+    s = s.replace("\u0301", "")
+    for k, v in replacements.items():
+        s = s.replace(v, k)
+    return s
 def unquote(bytestring):
     return urllib.parse.unquote(bytestring.decode("utf8")).encode("utf8")
@@ -397,6 +413,8 @@ class BaseExporter:
         self.dir_kwargs = args[2]
         with open(self.args.labels_file, encoding="utf8") as f:
             self.labels = toml.load(f)
+        with open(self.args.regexes_file, encoding="utf8") as f:
+            self.regexes = json.load(f)
         logger = kwargs.get("logger")
         if logger:
             self.logger = logger
@@ -415,6 +433,8 @@ class BaseExporter:
         return _parse_4s_elem(*args, **kwargs)
     def get_label(self, question, field, number=None):
+        if field == "question" and self.args.only_question_number:
+            return str(question.get("number") or number)
         if field in ("question", "tour"):
             lbl = (question.get("overrides") or {}).get(field) or self.labels[
                 "question_labels"
@@ -433,10 +453,11 @@ class BaseExporter:
         return self.labels["question_labels"][field]
     def remove_square_brackets(self, s):
-        hs = self.labels["question_labels"]["handout_short"]
+        hs = self.regexes["handout_short"]
         s = s.replace("\\[", "LEFTSQUAREBRACKET")
         s = s.replace("\\]", "RIGHTSQUAREBRACKET")
-        s = re.sub(f"\\[{hs}(.+?)\\]", "{" + hs + "\\1}", s, flags=re.DOTALL)
+        # Use placeholder to preserve handout brackets during removal
+        s = re.sub(f"\\[{hs}(.+?)\\]", "{HANDOUT_PLACEHOLDER\\1}", s, flags=re.DOTALL)
         i = 0
         while "[" in s and "]" in s and i < 10:
             s = re.sub(" *\\[.+?\\]", "", s, flags=re.DOTALL)
@@ -446,7 +467,13 @@ class BaseExporter:
             sys.stderr.write(
                 f"Error replacing square brackets on question: {s}, retries exceeded\n"
             )
-        s = re.sub("\\{" + hs + "(.+?)\\}", "[" + hs + "\\1]", s, flags=re.DOTALL)
+        # Restore handout brackets - get the original matched text from the placeholder
+        s = re.sub(
+            r"\{HANDOUT_PLACEHOLDER(.+?)\}",
+            lambda m: "[" + m.group(1) + "]",
+            s,
+            flags=re.DOTALL,
+        )
         s = s.replace("LEFTSQUAREBRACKET", "[")
         s = s.replace("RIGHTSQUAREBRACKET", "]")
         return s

chgksuite/composer/db.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import codecs
 import datetime
 import os
 import re
@@ -211,7 +210,7 @@ class DbExporter(BaseExporter):
             if res:
                 result.append(res)
         text = "".join(result)
-        with codecs.open(outfilename, "w", "utf8") as f:
+        with open(outfilename, "w", encoding="utf-8") as f:
             f.write(text)
         self.logger.info("Output: {}".format(outfilename))
         if self.args.clipboard:

chgksuite 0.26.0b11__py3-none-any.whl → 0.27.0__py3-none-any.whl

chgksuite 0.26.0b11py3-none-any.whl → 0.27.0py3-none-any.whl